From 8c6076029e01d97409ae2edfce077c86bba4f758 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 13 Aug 2021 01:16:23 +0000 Subject: [PATCH 001/143] Bump path-parse from 1.0.6 to 1.0.7 in /web-demo/frontend/custom-js Bumps [path-parse](https://github.com/jbgutierrez/path-parse) from 1.0.6 to 1.0.7. - [Release notes](https://github.com/jbgutierrez/path-parse/releases) - [Commits](https://github.com/jbgutierrez/path-parse/commits/v1.0.7) --- updated-dependencies: - dependency-name: path-parse dependency-type: indirect ... Signed-off-by: dependabot[bot] --- web-demo/frontend/custom-js/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web-demo/frontend/custom-js/yarn.lock b/web-demo/frontend/custom-js/yarn.lock index 05139550..054e2950 100644 --- a/web-demo/frontend/custom-js/yarn.lock +++ b/web-demo/frontend/custom-js/yarn.lock @@ -748,9 +748,9 @@ path-is-absolute@^1.0.0, path-is-absolute@^1.0.1: integrity sha1-F0uSaHNVNP+8es5r9TpanhtcX18= path-parse@^1.0.6: - version "1.0.6" - resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.6.tgz#d62dbb5679405d72c4737ec58600e9ddcf06d24c" - integrity sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw== + version "1.0.7" + resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" + integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== path-platform@~0.11.15: version "0.11.15" From 7460dd498a4dd15dec3bbc3d5040d2666a652c14 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Mon, 13 Dec 2021 03:29:36 -0500 Subject: [PATCH 002/143] add new load reuse test --- src/dios-egraphs/Diospyros/aa.ll | 364 ++++++ src/dios-egraphs/Diospyros/clang.ll | 283 +++++ src/dios-egraphs/Diospyros/dce.ll | 854 +++++++++++++ src/dios-egraphs/Diospyros/diospyros.ll | 1056 +++++++++++++++++ .../Diospyros/llvm-tests/load_reuse.c | 70 ++ src/dios-egraphs/Diospyros/opt.ll | 364 ++++++ 6 files changed, 2991 insertions(+) create mode 100644 src/dios-egraphs/Diospyros/aa.ll create mode 100644 src/dios-egraphs/Diospyros/clang.ll create mode 100644 src/dios-egraphs/Diospyros/dce.ll create mode 100644 src/dios-egraphs/Diospyros/diospyros.ll create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/load_reuse.c create mode 100644 src/dios-egraphs/Diospyros/opt.ll diff --git a/src/dios-egraphs/Diospyros/aa.ll b/src/dios-egraphs/Diospyros/aa.ll new file mode 100644 index 00000000..df32c909 --- /dev/null +++ b/src/dios-egraphs/Diospyros/aa.ll @@ -0,0 +1,364 @@ +; ModuleID = 'opt.ll' +source_filename = "llvm-tests/load_reuse.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@__const.main.mat_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 2.000000e+00], [2 x float] [float 3.000000e+00, float 4.000000e+00]], align 16 +@__const.main.f_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 1.000000e+00], [2 x float] [float 1.000000e+00, float 1.000000e+00]], align 16 +@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 + +; Function Attrs: noinline nounwind ssp uwtable +define void @load_use_twice([2 x float]* %0, [2 x float]* %1, [3 x float]* %2, [3 x float]* %3) #0 { +.preheader7: + %4 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 0 + %5 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 0 + %6 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 + %7 = load float, float* %6, align 4 + %8 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 + %9 = load float, float* %8, align 4 + %10 = fmul float %7, %9 + %11 = fmul float %10, 3.000000e+00 + %12 = fadd float %11, -4.000000e+00 + %13 = load float, float* %4, align 4 + %14 = fadd float %13, %12 + store float %14, float* %4, align 4 + %15 = fmul float %10, 2.000000e+00 + %16 = fadd float %15, 1.000000e+00 + %17 = load float, float* %5, align 4 + %18 = fadd float %17, %16 + store float %18, float* %5, align 4 + %19 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 + %20 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 + %21 = load float, float* %6, align 4 + %22 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 + %23 = load float, float* %22, align 4 + %24 = fmul float %21, %23 + %25 = fmul float %24, 3.000000e+00 + %26 = fadd float %25, -4.000000e+00 + %27 = load float, float* %19, align 4 + %28 = fadd float %27, %26 + store float %28, float* %19, align 4 + %29 = fmul float %24, 2.000000e+00 + %30 = fadd float %29, 1.000000e+00 + %31 = load float, float* %20, align 4 + %32 = fadd float %31, %30 + store float %32, float* %20, align 4 + %33 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 + %34 = load float, float* %33, align 4 + %35 = load float, float* %8, align 4 + %36 = fmul float %34, %35 + %37 = fmul float %36, 3.000000e+00 + %38 = fadd float %37, -4.000000e+00 + %39 = load float, float* %19, align 4 + %40 = fadd float %39, %38 + store float %40, float* %19, align 4 + %41 = fmul float %36, 2.000000e+00 + %42 = fadd float %41, 1.000000e+00 + %43 = load float, float* %20, align 4 + %44 = fadd float %43, %42 + store float %44, float* %20, align 4 + %45 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 2 + %46 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 2 + %47 = load float, float* %33, align 4 + %48 = load float, float* %22, align 4 + %49 = fmul float %47, %48 + %50 = fmul float %49, 3.000000e+00 + %51 = fadd float %50, -4.000000e+00 + %52 = load float, float* %45, align 4 + %53 = fadd float %52, %51 + store float %53, float* %45, align 4 + %54 = fmul float %49, 2.000000e+00 + %55 = fadd float %54, 1.000000e+00 + %56 = load float, float* %46, align 4 + %57 = fadd float %56, %55 + store float %57, float* %46, align 4 + %58 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 + %59 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 + %60 = load float, float* %6, align 4 + %61 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 + %62 = load float, float* %61, align 4 + %63 = fmul float %60, %62 + %64 = fmul float %63, 3.000000e+00 + %65 = fadd float %64, -4.000000e+00 + %66 = load float, float* %58, align 4 + %67 = fadd float %66, %65 + store float %67, float* %58, align 4 + %68 = fmul float %63, 2.000000e+00 + %69 = fadd float %68, 1.000000e+00 + %70 = load float, float* %59, align 4 + %71 = fadd float %70, %69 + store float %71, float* %59, align 4 + %72 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 + %73 = load float, float* %72, align 4 + %74 = load float, float* %8, align 4 + %75 = fmul float %73, %74 + %76 = fmul float %75, 3.000000e+00 + %77 = fadd float %76, -4.000000e+00 + %78 = load float, float* %58, align 4 + %79 = fadd float %78, %77 + store float %79, float* %58, align 4 + %80 = fmul float %75, 2.000000e+00 + %81 = fadd float %80, 1.000000e+00 + %82 = load float, float* %59, align 4 + %83 = fadd float %82, %81 + store float %83, float* %59, align 4 + %84 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + %85 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + %86 = load float, float* %6, align 4 + %87 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 + %88 = load float, float* %87, align 4 + %89 = fmul float %86, %88 + %90 = fmul float %89, 3.000000e+00 + %91 = fadd float %90, -4.000000e+00 + %92 = load float, float* %84, align 4 + %93 = fadd float %92, %91 + store float %93, float* %84, align 4 + %94 = fmul float %89, 2.000000e+00 + %95 = fadd float %94, 1.000000e+00 + %96 = load float, float* %85, align 4 + %97 = fadd float %96, %95 + store float %97, float* %85, align 4 + %98 = load float, float* %33, align 4 + %99 = load float, float* %61, align 4 + %100 = fmul float %98, %99 + %101 = fmul float %100, 3.000000e+00 + %102 = fadd float %101, -4.000000e+00 + %103 = load float, float* %84, align 4 + %104 = fadd float %103, %102 + store float %104, float* %84, align 4 + %105 = fmul float %100, 2.000000e+00 + %106 = fadd float %105, 1.000000e+00 + %107 = load float, float* %85, align 4 + %108 = fadd float %107, %106 + store float %108, float* %85, align 4 + %109 = load float, float* %72, align 4 + %110 = load float, float* %22, align 4 + %111 = fmul float %109, %110 + %112 = fmul float %111, 3.000000e+00 + %113 = fadd float %112, -4.000000e+00 + %114 = load float, float* %84, align 4 + %115 = fadd float %114, %113 + store float %115, float* %84, align 4 + %116 = fmul float %111, 2.000000e+00 + %117 = fadd float %116, 1.000000e+00 + %118 = load float, float* %85, align 4 + %119 = fadd float %118, %117 + store float %119, float* %85, align 4 + %120 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 + %121 = load float, float* %120, align 4 + %122 = load float, float* %8, align 4 + %123 = fmul float %121, %122 + %124 = fmul float %123, 3.000000e+00 + %125 = fadd float %124, -4.000000e+00 + %126 = load float, float* %84, align 4 + %127 = fadd float %126, %125 + store float %127, float* %84, align 4 + %128 = fmul float %123, 2.000000e+00 + %129 = fadd float %128, 1.000000e+00 + %130 = load float, float* %85, align 4 + %131 = fadd float %130, %129 + store float %131, float* %85, align 4 + %132 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 + %133 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 + %134 = load float, float* %33, align 4 + %135 = load float, float* %87, align 4 + %136 = fmul float %134, %135 + %137 = fmul float %136, 3.000000e+00 + %138 = fadd float %137, -4.000000e+00 + %139 = load float, float* %132, align 4 + %140 = fadd float %139, %138 + store float %140, float* %132, align 4 + %141 = fmul float %136, 2.000000e+00 + %142 = fadd float %141, 1.000000e+00 + %143 = load float, float* %133, align 4 + %144 = fadd float %143, %142 + store float %144, float* %133, align 4 + %145 = load float, float* %120, align 4 + %146 = load float, float* %22, align 4 + %147 = fmul float %145, %146 + %148 = fmul float %147, 3.000000e+00 + %149 = fadd float %148, -4.000000e+00 + %150 = load float, float* %132, align 4 + %151 = fadd float %150, %149 + store float %151, float* %132, align 4 + %152 = fmul float %147, 2.000000e+00 + %153 = fadd float %152, 1.000000e+00 + %154 = load float, float* %133, align 4 + %155 = fadd float %154, %153 + store float %155, float* %133, align 4 + %156 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 0 + %157 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 0 + %158 = load float, float* %72, align 4 + %159 = load float, float* %61, align 4 + %160 = fmul float %158, %159 + %161 = fmul float %160, 3.000000e+00 + %162 = fadd float %161, -4.000000e+00 + %163 = load float, float* %156, align 4 + %164 = fadd float %163, %162 + store float %164, float* %156, align 4 + %165 = fmul float %160, 2.000000e+00 + %166 = fadd float %165, 1.000000e+00 + %167 = load float, float* %157, align 4 + %168 = fadd float %167, %166 + store float %168, float* %157, align 4 + %169 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 + %170 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 + %171 = load float, float* %72, align 4 + %172 = load float, float* %87, align 4 + %173 = fmul float %171, %172 + %174 = fmul float %173, 3.000000e+00 + %175 = fadd float %174, -4.000000e+00 + %176 = load float, float* %169, align 4 + %177 = fadd float %176, %175 + store float %177, float* %169, align 4 + %178 = fmul float %173, 2.000000e+00 + %179 = fadd float %178, 1.000000e+00 + %180 = load float, float* %170, align 4 + %181 = fadd float %180, %179 + store float %181, float* %170, align 4 + %182 = load float, float* %120, align 4 + %183 = load float, float* %61, align 4 + %184 = fmul float %182, %183 + %185 = fmul float %184, 3.000000e+00 + %186 = fadd float %185, -4.000000e+00 + %187 = load float, float* %169, align 4 + %188 = fadd float %187, %186 + store float %188, float* %169, align 4 + %189 = fmul float %184, 2.000000e+00 + %190 = fadd float %189, 1.000000e+00 + %191 = load float, float* %170, align 4 + %192 = fadd float %191, %190 + store float %192, float* %170, align 4 + %193 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 2 + %194 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 2 + %195 = load float, float* %120, align 4 + %196 = load float, float* %87, align 4 + %197 = fmul float %195, %196 + %198 = fmul float %197, 3.000000e+00 + %199 = fadd float %198, -4.000000e+00 + %200 = load float, float* %193, align 4 + %201 = fadd float %200, %199 + store float %201, float* %193, align 4 + %202 = fmul float %197, 2.000000e+00 + %203 = fadd float %202, 1.000000e+00 + %204 = load float, float* %194, align 4 + %205 = fadd float %204, %203 + store float %205, float* %194, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #0 { +.preheader: + %0 = alloca [2 x [2 x float]], align 16 + %1 = alloca [2 x [2 x float]], align 16 + %2 = alloca [3 x [3 x float]], align 16 + %3 = alloca [3 x [3 x float]], align 16 + %4 = bitcast [2 x [2 x float]]* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %4, i8* nonnull align 16 dereferenceable(16) bitcast ([2 x [2 x float]]* @__const.main.mat_in to i8*), i64 16, i1 false) + %5 = bitcast [2 x [2 x float]]* %1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %5, i8* nonnull align 16 dereferenceable(16) bitcast ([2 x [2 x float]]* @__const.main.f_in to i8*), i64 16, i1 false) + %6 = bitcast [3 x [3 x float]]* %2 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %6, i8 0, i64 36, i1 false) + %7 = bitcast [3 x [3 x float]]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %7, i8 0, i64 36, i1 false) + %8 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %0, i64 0, i64 0 + %9 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %1, i64 0, i64 0 + %10 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0 + %11 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0 + call void @load_use_twice([2 x float]* nonnull %8, [2 x float]* nonnull %9, [3 x float]* nonnull %10, [3 x float]* nonnull %11) + %12 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 0 + %13 = load float, float* %12, align 16 + %14 = fpext float %13 to double + %15 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %14) #4 + %16 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 0 + %17 = load float, float* %16, align 16 + %18 = fpext float %17 to double + %19 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %18) #4 + %20 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 1 + %21 = load float, float* %20, align 4 + %22 = fpext float %21 to double + %23 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %22) #4 + %24 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 1 + %25 = load float, float* %24, align 4 + %26 = fpext float %25 to double + %27 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %26) #4 + %28 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 2 + %29 = load float, float* %28, align 8 + %30 = fpext float %29 to double + %31 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %30) #4 + %32 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 2 + %33 = load float, float* %32, align 8 + %34 = fpext float %33 to double + %35 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %34) #4 + %36 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 0 + %37 = load float, float* %36, align 4 + %38 = fpext float %37 to double + %39 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %38) #4 + %40 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 0 + %41 = load float, float* %40, align 4 + %42 = fpext float %41 to double + %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %42) #4 + %44 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 1 + %45 = load float, float* %44, align 4 + %46 = fpext float %45 to double + %47 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %46) #4 + %48 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 1 + %49 = load float, float* %48, align 4 + %50 = fpext float %49 to double + %51 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %50) #4 + %52 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 2 + %53 = load float, float* %52, align 4 + %54 = fpext float %53 to double + %55 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %54) #4 + %56 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 2 + %57 = load float, float* %56, align 4 + %58 = fpext float %57 to double + %59 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %58) #4 + %60 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 0 + %61 = load float, float* %60, align 8 + %62 = fpext float %61 to double + %63 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %62) #4 + %64 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 0 + %65 = load float, float* %64, align 8 + %66 = fpext float %65 to double + %67 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %66) #4 + %68 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 1 + %69 = load float, float* %68, align 4 + %70 = fpext float %69 to double + %71 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %70) #4 + %72 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 1 + %73 = load float, float* %72, align 4 + %74 = fpext float %73 to double + %75 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %74) #4 + %76 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 2 + %77 = load float, float* %76, align 8 + %78 = fpext float %77 to double + %79 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %78) #4 + %80 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 2 + %81 = load float, float* %80, align 8 + %82 = fpext float %81 to double + %83 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %82) #4 + ret i32 0 +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 + +declare i32 @printf(i8*, ...) #3 + +attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind willreturn writeonly } +attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/clang.ll b/src/dios-egraphs/Diospyros/clang.ll new file mode 100644 index 00000000..fd259ce5 --- /dev/null +++ b/src/dios-egraphs/Diospyros/clang.ll @@ -0,0 +1,283 @@ +; ModuleID = 'llvm-tests/load_reuse.c' +source_filename = "llvm-tests/load_reuse.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@__const.main.mat_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 2.000000e+00], [2 x float] [float 3.000000e+00, float 4.000000e+00]], align 16 +@__const.main.f_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 1.000000e+00], [2 x float] [float 1.000000e+00, float 1.000000e+00]], align 16 +@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 + +; Function Attrs: noinline nounwind ssp uwtable +define void @load_use_twice([2 x float]* %0, [2 x float]* %1, [3 x float]* %2, [3 x float]* %3) #0 { + %5 = alloca [2 x float]*, align 8 + %6 = alloca [2 x float]*, align 8 + %7 = alloca [3 x float]*, align 8 + %8 = alloca [3 x float]*, align 8 + %9 = alloca i32, align 4 + %10 = alloca i32, align 4 + %11 = alloca i32, align 4 + %12 = alloca i32, align 4 + %13 = alloca i32, align 4 + %14 = alloca i32, align 4 + %15 = alloca i32, align 4 + %16 = alloca i32, align 4 + %17 = alloca float, align 4 + store [2 x float]* %0, [2 x float]** %5, align 8 + store [2 x float]* %1, [2 x float]** %6, align 8 + store [3 x float]* %2, [3 x float]** %7, align 8 + store [3 x float]* %3, [3 x float]** %8, align 8 + store i32 0, i32* %9, align 4 + br label %18 + +18: ; preds = %110, %4 + %19 = load i32, i32* %9, align 4 + %20 = icmp slt i32 %19, 3 + br i1 %20, label %21, label %113 + +21: ; preds = %18 + store i32 0, i32* %10, align 4 + br label %22 + +22: ; preds = %106, %21 + %23 = load i32, i32* %10, align 4 + %24 = icmp slt i32 %23, 3 + br i1 %24, label %25, label %109 + +25: ; preds = %22 + store i32 0, i32* %11, align 4 + br label %26 + +26: ; preds = %102, %25 + %27 = load i32, i32* %11, align 4 + %28 = icmp slt i32 %27, 2 + br i1 %28, label %29, label %105 + +29: ; preds = %26 + store i32 0, i32* %12, align 4 + br label %30 + +30: ; preds = %98, %29 + %31 = load i32, i32* %12, align 4 + %32 = icmp slt i32 %31, 2 + br i1 %32, label %33, label %101 + +33: ; preds = %30 + %34 = load i32, i32* %11, align 4 + %35 = sub nsw i32 1, %34 + store i32 %35, i32* %13, align 4 + %36 = load i32, i32* %12, align 4 + %37 = sub nsw i32 1, %36 + store i32 %37, i32* %14, align 4 + %38 = load i32, i32* %9, align 4 + %39 = load i32, i32* %13, align 4 + %40 = sub nsw i32 %38, %39 + store i32 %40, i32* %15, align 4 + %41 = load i32, i32* %10, align 4 + %42 = load i32, i32* %14, align 4 + %43 = sub nsw i32 %41, %42 + store i32 %43, i32* %16, align 4 + %44 = load i32, i32* %15, align 4 + %45 = icmp sge i32 %44, 0 + br i1 %45, label %46, label %97 + +46: ; preds = %33 + %47 = load i32, i32* %15, align 4 + %48 = icmp slt i32 %47, 2 + br i1 %48, label %49, label %97 + +49: ; preds = %46 + %50 = load i32, i32* %16, align 4 + %51 = icmp sge i32 %50, 0 + br i1 %51, label %52, label %97 + +52: ; preds = %49 + %53 = load i32, i32* %16, align 4 + %54 = icmp slt i32 %53, 2 + br i1 %54, label %55, label %97 + +55: ; preds = %52 + %56 = load [2 x float]*, [2 x float]** %5, align 8 + %57 = load i32, i32* %15, align 4 + %58 = sext i32 %57 to i64 + %59 = getelementptr inbounds [2 x float], [2 x float]* %56, i64 %58 + %60 = load i32, i32* %16, align 4 + %61 = sext i32 %60 to i64 + %62 = getelementptr inbounds [2 x float], [2 x float]* %59, i64 0, i64 %61 + %63 = load float, float* %62, align 4 + %64 = load [2 x float]*, [2 x float]** %6, align 8 + %65 = load i32, i32* %13, align 4 + %66 = sext i32 %65 to i64 + %67 = getelementptr inbounds [2 x float], [2 x float]* %64, i64 %66 + %68 = load i32, i32* %14, align 4 + %69 = sext i32 %68 to i64 + %70 = getelementptr inbounds [2 x float], [2 x float]* %67, i64 0, i64 %69 + %71 = load float, float* %70, align 4 + %72 = fmul float %63, %71 + store float %72, float* %17, align 4 + %73 = load float, float* %17, align 4 + %74 = fmul float 3.000000e+00, %73 + %75 = fsub float %74, 4.000000e+00 + %76 = load [3 x float]*, [3 x float]** %7, align 8 + %77 = load i32, i32* %9, align 4 + %78 = sext i32 %77 to i64 + %79 = getelementptr inbounds [3 x float], [3 x float]* %76, i64 %78 + %80 = load i32, i32* %10, align 4 + %81 = sext i32 %80 to i64 + %82 = getelementptr inbounds [3 x float], [3 x float]* %79, i64 0, i64 %81 + %83 = load float, float* %82, align 4 + %84 = fadd float %83, %75 + store float %84, float* %82, align 4 + %85 = load float, float* %17, align 4 + %86 = fmul float 2.000000e+00, %85 + %87 = fadd float %86, 1.000000e+00 + %88 = load [3 x float]*, [3 x float]** %8, align 8 + %89 = load i32, i32* %9, align 4 + %90 = sext i32 %89 to i64 + %91 = getelementptr inbounds [3 x float], [3 x float]* %88, i64 %90 + %92 = load i32, i32* %10, align 4 + %93 = sext i32 %92 to i64 + %94 = getelementptr inbounds [3 x float], [3 x float]* %91, i64 0, i64 %93 + %95 = load float, float* %94, align 4 + %96 = fadd float %95, %87 + store float %96, float* %94, align 4 + br label %97 + +97: ; preds = %55, %52, %49, %46, %33 + br label %98 + +98: ; preds = %97 + %99 = load i32, i32* %12, align 4 + %100 = add nsw i32 %99, 1 + store i32 %100, i32* %12, align 4 + br label %30 + +101: ; preds = %30 + br label %102 + +102: ; preds = %101 + %103 = load i32, i32* %11, align 4 + %104 = add nsw i32 %103, 1 + store i32 %104, i32* %11, align 4 + br label %26 + +105: ; preds = %26 + br label %106 + +106: ; preds = %105 + %107 = load i32, i32* %10, align 4 + %108 = add nsw i32 %107, 1 + store i32 %108, i32* %10, align 4 + br label %22 + +109: ; preds = %22 + br label %110 + +110: ; preds = %109 + %111 = load i32, i32* %9, align 4 + %112 = add nsw i32 %111, 1 + store i32 %112, i32* %9, align 4 + br label %18 + +113: ; preds = %18 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #0 { + %1 = alloca i32, align 4 + %2 = alloca [2 x [2 x float]], align 16 + %3 = alloca [2 x [2 x float]], align 16 + %4 = alloca [3 x [3 x float]], align 16 + %5 = alloca [3 x [3 x float]], align 16 + %6 = alloca i32, align 4 + %7 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + %8 = bitcast [2 x [2 x float]]* %2 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %8, i8* align 16 bitcast ([2 x [2 x float]]* @__const.main.mat_in to i8*), i64 16, i1 false) + %9 = bitcast [2 x [2 x float]]* %3 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %9, i8* align 16 bitcast ([2 x [2 x float]]* @__const.main.f_in to i8*), i64 16, i1 false) + %10 = bitcast [3 x [3 x float]]* %4 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %10, i8 0, i64 36, i1 false) + %11 = bitcast [3 x [3 x float]]* %5 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %11, i8 0, i64 36, i1 false) + %12 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %2, i64 0, i64 0 + %13 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %3, i64 0, i64 0 + %14 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %4, i64 0, i64 0 + %15 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %5, i64 0, i64 0 + call void @load_use_twice([2 x float]* %12, [2 x float]* %13, [3 x float]* %14, [3 x float]* %15) + store i32 0, i32* %6, align 4 + br label %16 + +16: ; preds = %46, %0 + %17 = load i32, i32* %6, align 4 + %18 = icmp slt i32 %17, 3 + br i1 %18, label %19, label %49 + +19: ; preds = %16 + store i32 0, i32* %7, align 4 + br label %20 + +20: ; preds = %42, %19 + %21 = load i32, i32* %7, align 4 + %22 = icmp slt i32 %21, 3 + br i1 %22, label %23, label %45 + +23: ; preds = %20 + %24 = load i32, i32* %6, align 4 + %25 = sext i32 %24 to i64 + %26 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %4, i64 0, i64 %25 + %27 = load i32, i32* %7, align 4 + %28 = sext i32 %27 to i64 + %29 = getelementptr inbounds [3 x float], [3 x float]* %26, i64 0, i64 %28 + %30 = load float, float* %29, align 4 + %31 = fpext float %30 to double + %32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %31) + %33 = load i32, i32* %6, align 4 + %34 = sext i32 %33 to i64 + %35 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %5, i64 0, i64 %34 + %36 = load i32, i32* %7, align 4 + %37 = sext i32 %36 to i64 + %38 = getelementptr inbounds [3 x float], [3 x float]* %35, i64 0, i64 %37 + %39 = load float, float* %38, align 4 + %40 = fpext float %39 to double + %41 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %40) + br label %42 + +42: ; preds = %23 + %43 = load i32, i32* %7, align 4 + %44 = add nsw i32 %43, 1 + store i32 %44, i32* %7, align 4 + br label %20 + +45: ; preds = %20 + br label %46 + +46: ; preds = %45 + %47 = load i32, i32* %6, align 4 + %48 = add nsw i32 %47, 1 + store i32 %48, i32* %6, align 4 + br label %16 + +49: ; preds = %16 + ret i32 0 +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 + +declare i32 @printf(i8*, ...) #3 + +attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind willreturn writeonly } +attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/dce.ll b/src/dios-egraphs/Diospyros/dce.ll new file mode 100644 index 00000000..5a30cf2b --- /dev/null +++ b/src/dios-egraphs/Diospyros/dce.ll @@ -0,0 +1,854 @@ +; ModuleID = 'diospyros.ll' +source_filename = "llvm-tests/load_reuse.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@__const.main.mat_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 2.000000e+00], [2 x float] [float 3.000000e+00, float 4.000000e+00]], align 16 +@__const.main.f_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 1.000000e+00], [2 x float] [float 1.000000e+00, float 1.000000e+00]], align 16 +@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 + +; Function Attrs: noinline nounwind ssp uwtable +define void @load_use_twice([2 x float]* %0, [2 x float]* %1, [3 x float]* %2, [3 x float]* %3) #0 { +.preheader7: + %4 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 0 + %5 = load float, float* %4, align 4 + %6 = insertelement <4 x float> zeroinitializer, float %5, i32 0 + %7 = insertelement <4 x float> %6, float 0.000000e+00, i32 1 + %8 = insertelement <4 x float> %7, float 0.000000e+00, i32 2 + %9 = insertelement <4 x float> %8, float 0.000000e+00, i32 3 + %10 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 + %11 = load float, float* %10, align 4 + %12 = insertelement <4 x float> zeroinitializer, float %11, i32 0 + %13 = insertelement <4 x float> %12, float 1.000000e+00, i32 1 + %14 = insertelement <4 x float> %13, float 1.000000e+00, i32 2 + %15 = insertelement <4 x float> %14, float 1.000000e+00, i32 3 + %16 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 + %17 = load float, float* %16, align 4 + %18 = insertelement <4 x float> zeroinitializer, float %17, i32 0 + %19 = insertelement <4 x float> %18, float 1.000000e+00, i32 1 + %20 = insertelement <4 x float> %19, float 1.000000e+00, i32 2 + %21 = insertelement <4 x float> %20, float 1.000000e+00, i32 3 + %22 = fmul <4 x float> %15, %21 + %23 = fmul <4 x float> %22, + %24 = fadd <4 x float> %23, + %25 = fadd <4 x float> %9, %24 + %26 = extractelement <4 x float> %25, i32 0 + %27 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 0 + store float %26, float* %27, align 4 + %28 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 0 + %29 = load float, float* %28, align 4 + %30 = insertelement <4 x float> zeroinitializer, float %29, i32 0 + %31 = insertelement <4 x float> %30, float 0.000000e+00, i32 1 + %32 = insertelement <4 x float> %31, float 0.000000e+00, i32 2 + %33 = insertelement <4 x float> %32, float 0.000000e+00, i32 3 + %34 = insertelement <4 x float> zeroinitializer, float %11, i32 0 + %35 = insertelement <4 x float> %34, float 1.000000e+00, i32 1 + %36 = insertelement <4 x float> %35, float 1.000000e+00, i32 2 + %37 = insertelement <4 x float> %36, float 1.000000e+00, i32 3 + %38 = insertelement <4 x float> zeroinitializer, float %17, i32 0 + %39 = insertelement <4 x float> %38, float 1.000000e+00, i32 1 + %40 = insertelement <4 x float> %39, float 1.000000e+00, i32 2 + %41 = insertelement <4 x float> %40, float 1.000000e+00, i32 3 + %42 = fmul <4 x float> %37, %41 + %43 = fmul <4 x float> %42, + %44 = fadd <4 x float> %43, + %45 = fadd <4 x float> %33, %44 + %46 = extractelement <4 x float> %45, i32 0 + %47 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 0 + store float %46, float* %47, align 4 + %48 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 + %49 = load float, float* %48, align 4 + %50 = insertelement <4 x float> zeroinitializer, float %49, i32 0 + %51 = insertelement <4 x float> %50, float 0.000000e+00, i32 1 + %52 = insertelement <4 x float> %51, float 0.000000e+00, i32 2 + %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 3 + %54 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 + %55 = load float, float* %54, align 4 + %56 = insertelement <4 x float> zeroinitializer, float %55, i32 0 + %57 = insertelement <4 x float> %56, float 1.000000e+00, i32 1 + %58 = insertelement <4 x float> %57, float 1.000000e+00, i32 2 + %59 = insertelement <4 x float> %58, float 1.000000e+00, i32 3 + %60 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 + %61 = load float, float* %60, align 4 + %62 = insertelement <4 x float> zeroinitializer, float %61, i32 0 + %63 = insertelement <4 x float> %62, float 1.000000e+00, i32 1 + %64 = insertelement <4 x float> %63, float 1.000000e+00, i32 2 + %65 = insertelement <4 x float> %64, float 1.000000e+00, i32 3 + %66 = fmul <4 x float> %59, %65 + %67 = fmul <4 x float> %66, + %68 = fadd <4 x float> %67, + %69 = fadd <4 x float> %53, %68 + %70 = extractelement <4 x float> %69, i32 0 + %71 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 + store float %70, float* %71, align 4 + %72 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 + %73 = load float, float* %72, align 4 + %74 = insertelement <4 x float> zeroinitializer, float %73, i32 0 + %75 = insertelement <4 x float> %74, float 0.000000e+00, i32 1 + %76 = insertelement <4 x float> %75, float 0.000000e+00, i32 2 + %77 = insertelement <4 x float> %76, float 0.000000e+00, i32 3 + %78 = insertelement <4 x float> zeroinitializer, float %55, i32 0 + %79 = insertelement <4 x float> %78, float 1.000000e+00, i32 1 + %80 = insertelement <4 x float> %79, float 1.000000e+00, i32 2 + %81 = insertelement <4 x float> %80, float 1.000000e+00, i32 3 + %82 = insertelement <4 x float> zeroinitializer, float %61, i32 0 + %83 = insertelement <4 x float> %82, float 1.000000e+00, i32 1 + %84 = insertelement <4 x float> %83, float 1.000000e+00, i32 2 + %85 = insertelement <4 x float> %84, float 1.000000e+00, i32 3 + %86 = fmul <4 x float> %81, %85 + %87 = fmul <4 x float> %86, + %88 = fadd <4 x float> %87, + %89 = fadd <4 x float> %77, %88 + %90 = extractelement <4 x float> %89, i32 0 + %91 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 + store float %90, float* %91, align 4 + %92 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 + %93 = load float, float* %92, align 4 + %94 = insertelement <4 x float> zeroinitializer, float %93, i32 0 + %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 1 + %96 = insertelement <4 x float> %95, float 0.000000e+00, i32 2 + %97 = insertelement <4 x float> %96, float 0.000000e+00, i32 3 + %98 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 + %99 = load float, float* %98, align 4 + %100 = insertelement <4 x float> zeroinitializer, float %99, i32 0 + %101 = insertelement <4 x float> %100, float 1.000000e+00, i32 1 + %102 = insertelement <4 x float> %101, float 1.000000e+00, i32 2 + %103 = insertelement <4 x float> %102, float 1.000000e+00, i32 3 + %104 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 + %105 = load float, float* %104, align 4 + %106 = insertelement <4 x float> zeroinitializer, float %105, i32 0 + %107 = insertelement <4 x float> %106, float 1.000000e+00, i32 1 + %108 = insertelement <4 x float> %107, float 1.000000e+00, i32 2 + %109 = insertelement <4 x float> %108, float 1.000000e+00, i32 3 + %110 = fmul <4 x float> %103, %109 + %111 = fmul <4 x float> %110, + %112 = fadd <4 x float> %111, + %113 = fadd <4 x float> %97, %112 + %114 = extractelement <4 x float> %113, i32 0 + %115 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 + store float %114, float* %115, align 4 + %116 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 + %117 = load float, float* %116, align 4 + %118 = insertelement <4 x float> zeroinitializer, float %117, i32 0 + %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 1 + %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 2 + %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 3 + %122 = insertelement <4 x float> zeroinitializer, float %99, i32 0 + %123 = insertelement <4 x float> %122, float 1.000000e+00, i32 1 + %124 = insertelement <4 x float> %123, float 1.000000e+00, i32 2 + %125 = insertelement <4 x float> %124, float 1.000000e+00, i32 3 + %126 = insertelement <4 x float> zeroinitializer, float %105, i32 0 + %127 = insertelement <4 x float> %126, float 1.000000e+00, i32 1 + %128 = insertelement <4 x float> %127, float 1.000000e+00, i32 2 + %129 = insertelement <4 x float> %128, float 1.000000e+00, i32 3 + %130 = fmul <4 x float> %125, %129 + %131 = fmul <4 x float> %130, + %132 = fadd <4 x float> %131, + %133 = fadd <4 x float> %121, %132 + %134 = extractelement <4 x float> %133, i32 0 + %135 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 + store float %134, float* %135, align 4 + %136 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 2 + %137 = load float, float* %136, align 4 + %138 = insertelement <4 x float> zeroinitializer, float %137, i32 0 + %139 = insertelement <4 x float> %138, float 0.000000e+00, i32 1 + %140 = insertelement <4 x float> %139, float 0.000000e+00, i32 2 + %141 = insertelement <4 x float> %140, float 0.000000e+00, i32 3 + %142 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 + %143 = load float, float* %142, align 4 + %144 = insertelement <4 x float> zeroinitializer, float %143, i32 0 + %145 = insertelement <4 x float> %144, float 1.000000e+00, i32 1 + %146 = insertelement <4 x float> %145, float 1.000000e+00, i32 2 + %147 = insertelement <4 x float> %146, float 1.000000e+00, i32 3 + %148 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 + %149 = load float, float* %148, align 4 + %150 = insertelement <4 x float> zeroinitializer, float %149, i32 0 + %151 = insertelement <4 x float> %150, float 1.000000e+00, i32 1 + %152 = insertelement <4 x float> %151, float 1.000000e+00, i32 2 + %153 = insertelement <4 x float> %152, float 1.000000e+00, i32 3 + %154 = fmul <4 x float> %147, %153 + %155 = fmul <4 x float> %154, + %156 = fadd <4 x float> %155, + %157 = fadd <4 x float> %141, %156 + %158 = extractelement <4 x float> %157, i32 0 + %159 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 2 + store float %158, float* %159, align 4 + %160 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 2 + %161 = load float, float* %160, align 4 + %162 = insertelement <4 x float> zeroinitializer, float %161, i32 0 + %163 = insertelement <4 x float> %162, float 0.000000e+00, i32 1 + %164 = insertelement <4 x float> %163, float 0.000000e+00, i32 2 + %165 = insertelement <4 x float> %164, float 0.000000e+00, i32 3 + %166 = insertelement <4 x float> zeroinitializer, float %143, i32 0 + %167 = insertelement <4 x float> %166, float 1.000000e+00, i32 1 + %168 = insertelement <4 x float> %167, float 1.000000e+00, i32 2 + %169 = insertelement <4 x float> %168, float 1.000000e+00, i32 3 + %170 = insertelement <4 x float> zeroinitializer, float %149, i32 0 + %171 = insertelement <4 x float> %170, float 1.000000e+00, i32 1 + %172 = insertelement <4 x float> %171, float 1.000000e+00, i32 2 + %173 = insertelement <4 x float> %172, float 1.000000e+00, i32 3 + %174 = fmul <4 x float> %169, %173 + %175 = fmul <4 x float> %174, + %176 = fadd <4 x float> %175, + %177 = fadd <4 x float> %165, %176 + %178 = extractelement <4 x float> %177, i32 0 + %179 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 2 + store float %178, float* %179, align 4 + %180 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 + %181 = load float, float* %180, align 4 + %182 = insertelement <4 x float> zeroinitializer, float %181, i32 0 + %183 = insertelement <4 x float> %182, float 0.000000e+00, i32 1 + %184 = insertelement <4 x float> %183, float 0.000000e+00, i32 2 + %185 = insertelement <4 x float> %184, float 0.000000e+00, i32 3 + %186 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 + %187 = load float, float* %186, align 4 + %188 = insertelement <4 x float> zeroinitializer, float %187, i32 0 + %189 = insertelement <4 x float> %188, float 1.000000e+00, i32 1 + %190 = insertelement <4 x float> %189, float 1.000000e+00, i32 2 + %191 = insertelement <4 x float> %190, float 1.000000e+00, i32 3 + %192 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 + %193 = load float, float* %192, align 4 + %194 = insertelement <4 x float> zeroinitializer, float %193, i32 0 + %195 = insertelement <4 x float> %194, float 1.000000e+00, i32 1 + %196 = insertelement <4 x float> %195, float 1.000000e+00, i32 2 + %197 = insertelement <4 x float> %196, float 1.000000e+00, i32 3 + %198 = fmul <4 x float> %191, %197 + %199 = fmul <4 x float> %198, + %200 = fadd <4 x float> %199, + %201 = fadd <4 x float> %185, %200 + %202 = extractelement <4 x float> %201, i32 0 + %203 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 + store float %202, float* %203, align 4 + %204 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 + %205 = load float, float* %204, align 4 + %206 = insertelement <4 x float> zeroinitializer, float %205, i32 0 + %207 = insertelement <4 x float> %206, float 0.000000e+00, i32 1 + %208 = insertelement <4 x float> %207, float 0.000000e+00, i32 2 + %209 = insertelement <4 x float> %208, float 0.000000e+00, i32 3 + %210 = insertelement <4 x float> zeroinitializer, float %187, i32 0 + %211 = insertelement <4 x float> %210, float 1.000000e+00, i32 1 + %212 = insertelement <4 x float> %211, float 1.000000e+00, i32 2 + %213 = insertelement <4 x float> %212, float 1.000000e+00, i32 3 + %214 = insertelement <4 x float> zeroinitializer, float %193, i32 0 + %215 = insertelement <4 x float> %214, float 1.000000e+00, i32 1 + %216 = insertelement <4 x float> %215, float 1.000000e+00, i32 2 + %217 = insertelement <4 x float> %216, float 1.000000e+00, i32 3 + %218 = fmul <4 x float> %213, %217 + %219 = fmul <4 x float> %218, + %220 = fadd <4 x float> %219, + %221 = fadd <4 x float> %209, %220 + %222 = extractelement <4 x float> %221, i32 0 + %223 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 + store float %222, float* %223, align 4 + %224 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 + %225 = load float, float* %224, align 4 + %226 = insertelement <4 x float> zeroinitializer, float %225, i32 0 + %227 = insertelement <4 x float> %226, float 0.000000e+00, i32 1 + %228 = insertelement <4 x float> %227, float 0.000000e+00, i32 2 + %229 = insertelement <4 x float> %228, float 0.000000e+00, i32 3 + %230 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 + %231 = load float, float* %230, align 4 + %232 = insertelement <4 x float> zeroinitializer, float %231, i32 0 + %233 = insertelement <4 x float> %232, float 1.000000e+00, i32 1 + %234 = insertelement <4 x float> %233, float 1.000000e+00, i32 2 + %235 = insertelement <4 x float> %234, float 1.000000e+00, i32 3 + %236 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 + %237 = load float, float* %236, align 4 + %238 = insertelement <4 x float> zeroinitializer, float %237, i32 0 + %239 = insertelement <4 x float> %238, float 1.000000e+00, i32 1 + %240 = insertelement <4 x float> %239, float 1.000000e+00, i32 2 + %241 = insertelement <4 x float> %240, float 1.000000e+00, i32 3 + %242 = fmul <4 x float> %235, %241 + %243 = fmul <4 x float> %242, + %244 = fadd <4 x float> %243, + %245 = fadd <4 x float> %229, %244 + %246 = extractelement <4 x float> %245, i32 0 + %247 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 + store float %246, float* %247, align 4 + %248 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 + %249 = load float, float* %248, align 4 + %250 = insertelement <4 x float> zeroinitializer, float %249, i32 0 + %251 = insertelement <4 x float> %250, float 0.000000e+00, i32 1 + %252 = insertelement <4 x float> %251, float 0.000000e+00, i32 2 + %253 = insertelement <4 x float> %252, float 0.000000e+00, i32 3 + %254 = insertelement <4 x float> zeroinitializer, float %231, i32 0 + %255 = insertelement <4 x float> %254, float 1.000000e+00, i32 1 + %256 = insertelement <4 x float> %255, float 1.000000e+00, i32 2 + %257 = insertelement <4 x float> %256, float 1.000000e+00, i32 3 + %258 = insertelement <4 x float> zeroinitializer, float %237, i32 0 + %259 = insertelement <4 x float> %258, float 1.000000e+00, i32 1 + %260 = insertelement <4 x float> %259, float 1.000000e+00, i32 2 + %261 = insertelement <4 x float> %260, float 1.000000e+00, i32 3 + %262 = fmul <4 x float> %257, %261 + %263 = fmul <4 x float> %262, + %264 = fadd <4 x float> %263, + %265 = fadd <4 x float> %253, %264 + %266 = extractelement <4 x float> %265, i32 0 + %267 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 + store float %266, float* %267, align 4 + %268 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + %269 = load float, float* %268, align 4 + %270 = insertelement <4 x float> zeroinitializer, float %269, i32 0 + %271 = insertelement <4 x float> %270, float 0.000000e+00, i32 1 + %272 = insertelement <4 x float> %271, float 0.000000e+00, i32 2 + %273 = insertelement <4 x float> %272, float 0.000000e+00, i32 3 + %274 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 + %275 = load float, float* %274, align 4 + %276 = insertelement <4 x float> zeroinitializer, float %275, i32 0 + %277 = insertelement <4 x float> %276, float 1.000000e+00, i32 1 + %278 = insertelement <4 x float> %277, float 1.000000e+00, i32 2 + %279 = insertelement <4 x float> %278, float 1.000000e+00, i32 3 + %280 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 + %281 = load float, float* %280, align 4 + %282 = insertelement <4 x float> zeroinitializer, float %281, i32 0 + %283 = insertelement <4 x float> %282, float 1.000000e+00, i32 1 + %284 = insertelement <4 x float> %283, float 1.000000e+00, i32 2 + %285 = insertelement <4 x float> %284, float 1.000000e+00, i32 3 + %286 = fmul <4 x float> %279, %285 + %287 = fmul <4 x float> %286, + %288 = fadd <4 x float> %287, + %289 = fadd <4 x float> %273, %288 + %290 = extractelement <4 x float> %289, i32 0 + %291 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + store float %290, float* %291, align 4 + %292 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + %293 = load float, float* %292, align 4 + %294 = insertelement <4 x float> zeroinitializer, float %293, i32 0 + %295 = insertelement <4 x float> %294, float 0.000000e+00, i32 1 + %296 = insertelement <4 x float> %295, float 0.000000e+00, i32 2 + %297 = insertelement <4 x float> %296, float 0.000000e+00, i32 3 + %298 = insertelement <4 x float> zeroinitializer, float %275, i32 0 + %299 = insertelement <4 x float> %298, float 1.000000e+00, i32 1 + %300 = insertelement <4 x float> %299, float 1.000000e+00, i32 2 + %301 = insertelement <4 x float> %300, float 1.000000e+00, i32 3 + %302 = insertelement <4 x float> zeroinitializer, float %281, i32 0 + %303 = insertelement <4 x float> %302, float 1.000000e+00, i32 1 + %304 = insertelement <4 x float> %303, float 1.000000e+00, i32 2 + %305 = insertelement <4 x float> %304, float 1.000000e+00, i32 3 + %306 = fmul <4 x float> %301, %305 + %307 = fmul <4 x float> %306, + %308 = fadd <4 x float> %307, + %309 = fadd <4 x float> %297, %308 + %310 = extractelement <4 x float> %309, i32 0 + %311 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + store float %310, float* %311, align 4 + %312 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + %313 = load float, float* %312, align 4 + %314 = insertelement <4 x float> zeroinitializer, float %313, i32 0 + %315 = insertelement <4 x float> %314, float 0.000000e+00, i32 1 + %316 = insertelement <4 x float> %315, float 0.000000e+00, i32 2 + %317 = insertelement <4 x float> %316, float 0.000000e+00, i32 3 + %318 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 + %319 = load float, float* %318, align 4 + %320 = insertelement <4 x float> zeroinitializer, float %319, i32 0 + %321 = insertelement <4 x float> %320, float 1.000000e+00, i32 1 + %322 = insertelement <4 x float> %321, float 1.000000e+00, i32 2 + %323 = insertelement <4 x float> %322, float 1.000000e+00, i32 3 + %324 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 + %325 = load float, float* %324, align 4 + %326 = insertelement <4 x float> zeroinitializer, float %325, i32 0 + %327 = insertelement <4 x float> %326, float 1.000000e+00, i32 1 + %328 = insertelement <4 x float> %327, float 1.000000e+00, i32 2 + %329 = insertelement <4 x float> %328, float 1.000000e+00, i32 3 + %330 = fmul <4 x float> %323, %329 + %331 = fmul <4 x float> %330, + %332 = fadd <4 x float> %331, + %333 = fadd <4 x float> %317, %332 + %334 = extractelement <4 x float> %333, i32 0 + %335 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + store float %334, float* %335, align 4 + %336 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + %337 = load float, float* %336, align 4 + %338 = insertelement <4 x float> zeroinitializer, float %337, i32 0 + %339 = insertelement <4 x float> %338, float 0.000000e+00, i32 1 + %340 = insertelement <4 x float> %339, float 0.000000e+00, i32 2 + %341 = insertelement <4 x float> %340, float 0.000000e+00, i32 3 + %342 = insertelement <4 x float> zeroinitializer, float %319, i32 0 + %343 = insertelement <4 x float> %342, float 1.000000e+00, i32 1 + %344 = insertelement <4 x float> %343, float 1.000000e+00, i32 2 + %345 = insertelement <4 x float> %344, float 1.000000e+00, i32 3 + %346 = insertelement <4 x float> zeroinitializer, float %325, i32 0 + %347 = insertelement <4 x float> %346, float 1.000000e+00, i32 1 + %348 = insertelement <4 x float> %347, float 1.000000e+00, i32 2 + %349 = insertelement <4 x float> %348, float 1.000000e+00, i32 3 + %350 = fmul <4 x float> %345, %349 + %351 = fmul <4 x float> %350, + %352 = fadd <4 x float> %351, + %353 = fadd <4 x float> %341, %352 + %354 = extractelement <4 x float> %353, i32 0 + %355 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + store float %354, float* %355, align 4 + %356 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + %357 = load float, float* %356, align 4 + %358 = insertelement <4 x float> zeroinitializer, float %357, i32 0 + %359 = insertelement <4 x float> %358, float 0.000000e+00, i32 1 + %360 = insertelement <4 x float> %359, float 0.000000e+00, i32 2 + %361 = insertelement <4 x float> %360, float 0.000000e+00, i32 3 + %362 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 + %363 = load float, float* %362, align 4 + %364 = insertelement <4 x float> zeroinitializer, float %363, i32 0 + %365 = insertelement <4 x float> %364, float 1.000000e+00, i32 1 + %366 = insertelement <4 x float> %365, float 1.000000e+00, i32 2 + %367 = insertelement <4 x float> %366, float 1.000000e+00, i32 3 + %368 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 + %369 = load float, float* %368, align 4 + %370 = insertelement <4 x float> zeroinitializer, float %369, i32 0 + %371 = insertelement <4 x float> %370, float 1.000000e+00, i32 1 + %372 = insertelement <4 x float> %371, float 1.000000e+00, i32 2 + %373 = insertelement <4 x float> %372, float 1.000000e+00, i32 3 + %374 = fmul <4 x float> %367, %373 + %375 = fmul <4 x float> %374, + %376 = fadd <4 x float> %375, + %377 = fadd <4 x float> %361, %376 + %378 = extractelement <4 x float> %377, i32 0 + %379 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + store float %378, float* %379, align 4 + %380 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + %381 = load float, float* %380, align 4 + %382 = insertelement <4 x float> zeroinitializer, float %381, i32 0 + %383 = insertelement <4 x float> %382, float 0.000000e+00, i32 1 + %384 = insertelement <4 x float> %383, float 0.000000e+00, i32 2 + %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 3 + %386 = insertelement <4 x float> zeroinitializer, float %363, i32 0 + %387 = insertelement <4 x float> %386, float 1.000000e+00, i32 1 + %388 = insertelement <4 x float> %387, float 1.000000e+00, i32 2 + %389 = insertelement <4 x float> %388, float 1.000000e+00, i32 3 + %390 = insertelement <4 x float> zeroinitializer, float %369, i32 0 + %391 = insertelement <4 x float> %390, float 1.000000e+00, i32 1 + %392 = insertelement <4 x float> %391, float 1.000000e+00, i32 2 + %393 = insertelement <4 x float> %392, float 1.000000e+00, i32 3 + %394 = fmul <4 x float> %389, %393 + %395 = fmul <4 x float> %394, + %396 = fadd <4 x float> %395, + %397 = fadd <4 x float> %385, %396 + %398 = extractelement <4 x float> %397, i32 0 + %399 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + store float %398, float* %399, align 4 + %400 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + %401 = load float, float* %400, align 4 + %402 = insertelement <4 x float> zeroinitializer, float %401, i32 0 + %403 = insertelement <4 x float> %402, float 0.000000e+00, i32 1 + %404 = insertelement <4 x float> %403, float 0.000000e+00, i32 2 + %405 = insertelement <4 x float> %404, float 0.000000e+00, i32 3 + %406 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 + %407 = load float, float* %406, align 4 + %408 = insertelement <4 x float> zeroinitializer, float %407, i32 0 + %409 = insertelement <4 x float> %408, float 1.000000e+00, i32 1 + %410 = insertelement <4 x float> %409, float 1.000000e+00, i32 2 + %411 = insertelement <4 x float> %410, float 1.000000e+00, i32 3 + %412 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 + %413 = load float, float* %412, align 4 + %414 = insertelement <4 x float> zeroinitializer, float %413, i32 0 + %415 = insertelement <4 x float> %414, float 1.000000e+00, i32 1 + %416 = insertelement <4 x float> %415, float 1.000000e+00, i32 2 + %417 = insertelement <4 x float> %416, float 1.000000e+00, i32 3 + %418 = fmul <4 x float> %411, %417 + %419 = fmul <4 x float> %418, + %420 = fadd <4 x float> %419, + %421 = fadd <4 x float> %405, %420 + %422 = extractelement <4 x float> %421, i32 0 + %423 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + store float %422, float* %423, align 4 + %424 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + %425 = load float, float* %424, align 4 + %426 = insertelement <4 x float> zeroinitializer, float %425, i32 0 + %427 = insertelement <4 x float> %426, float 0.000000e+00, i32 1 + %428 = insertelement <4 x float> %427, float 0.000000e+00, i32 2 + %429 = insertelement <4 x float> %428, float 0.000000e+00, i32 3 + %430 = insertelement <4 x float> zeroinitializer, float %407, i32 0 + %431 = insertelement <4 x float> %430, float 1.000000e+00, i32 1 + %432 = insertelement <4 x float> %431, float 1.000000e+00, i32 2 + %433 = insertelement <4 x float> %432, float 1.000000e+00, i32 3 + %434 = insertelement <4 x float> zeroinitializer, float %413, i32 0 + %435 = insertelement <4 x float> %434, float 1.000000e+00, i32 1 + %436 = insertelement <4 x float> %435, float 1.000000e+00, i32 2 + %437 = insertelement <4 x float> %436, float 1.000000e+00, i32 3 + %438 = fmul <4 x float> %433, %437 + %439 = fmul <4 x float> %438, + %440 = fadd <4 x float> %439, + %441 = fadd <4 x float> %429, %440 + %442 = extractelement <4 x float> %441, i32 0 + %443 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + store float %442, float* %443, align 4 + %444 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 + %445 = load float, float* %444, align 4 + %446 = insertelement <4 x float> zeroinitializer, float %445, i32 0 + %447 = insertelement <4 x float> %446, float 0.000000e+00, i32 1 + %448 = insertelement <4 x float> %447, float 0.000000e+00, i32 2 + %449 = insertelement <4 x float> %448, float 0.000000e+00, i32 3 + %450 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 + %451 = load float, float* %450, align 4 + %452 = insertelement <4 x float> zeroinitializer, float %451, i32 0 + %453 = insertelement <4 x float> %452, float 1.000000e+00, i32 1 + %454 = insertelement <4 x float> %453, float 1.000000e+00, i32 2 + %455 = insertelement <4 x float> %454, float 1.000000e+00, i32 3 + %456 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 + %457 = load float, float* %456, align 4 + %458 = insertelement <4 x float> zeroinitializer, float %457, i32 0 + %459 = insertelement <4 x float> %458, float 1.000000e+00, i32 1 + %460 = insertelement <4 x float> %459, float 1.000000e+00, i32 2 + %461 = insertelement <4 x float> %460, float 1.000000e+00, i32 3 + %462 = fmul <4 x float> %455, %461 + %463 = fmul <4 x float> %462, + %464 = fadd <4 x float> %463, + %465 = fadd <4 x float> %449, %464 + %466 = extractelement <4 x float> %465, i32 0 + %467 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 + store float %466, float* %467, align 4 + %468 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 + %469 = load float, float* %468, align 4 + %470 = insertelement <4 x float> zeroinitializer, float %469, i32 0 + %471 = insertelement <4 x float> %470, float 0.000000e+00, i32 1 + %472 = insertelement <4 x float> %471, float 0.000000e+00, i32 2 + %473 = insertelement <4 x float> %472, float 0.000000e+00, i32 3 + %474 = insertelement <4 x float> zeroinitializer, float %451, i32 0 + %475 = insertelement <4 x float> %474, float 1.000000e+00, i32 1 + %476 = insertelement <4 x float> %475, float 1.000000e+00, i32 2 + %477 = insertelement <4 x float> %476, float 1.000000e+00, i32 3 + %478 = insertelement <4 x float> zeroinitializer, float %457, i32 0 + %479 = insertelement <4 x float> %478, float 1.000000e+00, i32 1 + %480 = insertelement <4 x float> %479, float 1.000000e+00, i32 2 + %481 = insertelement <4 x float> %480, float 1.000000e+00, i32 3 + %482 = fmul <4 x float> %477, %481 + %483 = fmul <4 x float> %482, + %484 = fadd <4 x float> %483, + %485 = fadd <4 x float> %473, %484 + %486 = extractelement <4 x float> %485, i32 0 + %487 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 + store float %486, float* %487, align 4 + %488 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 + %489 = load float, float* %488, align 4 + %490 = insertelement <4 x float> zeroinitializer, float %489, i32 0 + %491 = insertelement <4 x float> %490, float 0.000000e+00, i32 1 + %492 = insertelement <4 x float> %491, float 0.000000e+00, i32 2 + %493 = insertelement <4 x float> %492, float 0.000000e+00, i32 3 + %494 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 + %495 = load float, float* %494, align 4 + %496 = insertelement <4 x float> zeroinitializer, float %495, i32 0 + %497 = insertelement <4 x float> %496, float 1.000000e+00, i32 1 + %498 = insertelement <4 x float> %497, float 1.000000e+00, i32 2 + %499 = insertelement <4 x float> %498, float 1.000000e+00, i32 3 + %500 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 + %501 = load float, float* %500, align 4 + %502 = insertelement <4 x float> zeroinitializer, float %501, i32 0 + %503 = insertelement <4 x float> %502, float 1.000000e+00, i32 1 + %504 = insertelement <4 x float> %503, float 1.000000e+00, i32 2 + %505 = insertelement <4 x float> %504, float 1.000000e+00, i32 3 + %506 = fmul <4 x float> %499, %505 + %507 = fmul <4 x float> %506, + %508 = fadd <4 x float> %507, + %509 = fadd <4 x float> %493, %508 + %510 = extractelement <4 x float> %509, i32 0 + %511 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 + store float %510, float* %511, align 4 + %512 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 + %513 = load float, float* %512, align 4 + %514 = insertelement <4 x float> zeroinitializer, float %513, i32 0 + %515 = insertelement <4 x float> %514, float 0.000000e+00, i32 1 + %516 = insertelement <4 x float> %515, float 0.000000e+00, i32 2 + %517 = insertelement <4 x float> %516, float 0.000000e+00, i32 3 + %518 = insertelement <4 x float> zeroinitializer, float %495, i32 0 + %519 = insertelement <4 x float> %518, float 1.000000e+00, i32 1 + %520 = insertelement <4 x float> %519, float 1.000000e+00, i32 2 + %521 = insertelement <4 x float> %520, float 1.000000e+00, i32 3 + %522 = insertelement <4 x float> zeroinitializer, float %501, i32 0 + %523 = insertelement <4 x float> %522, float 1.000000e+00, i32 1 + %524 = insertelement <4 x float> %523, float 1.000000e+00, i32 2 + %525 = insertelement <4 x float> %524, float 1.000000e+00, i32 3 + %526 = fmul <4 x float> %521, %525 + %527 = fmul <4 x float> %526, + %528 = fadd <4 x float> %527, + %529 = fadd <4 x float> %517, %528 + %530 = extractelement <4 x float> %529, i32 0 + %531 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 + store float %530, float* %531, align 4 + %532 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 0 + %533 = load float, float* %532, align 4 + %534 = insertelement <4 x float> zeroinitializer, float %533, i32 0 + %535 = insertelement <4 x float> %534, float 0.000000e+00, i32 1 + %536 = insertelement <4 x float> %535, float 0.000000e+00, i32 2 + %537 = insertelement <4 x float> %536, float 0.000000e+00, i32 3 + %538 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 + %539 = load float, float* %538, align 4 + %540 = insertelement <4 x float> zeroinitializer, float %539, i32 0 + %541 = insertelement <4 x float> %540, float 1.000000e+00, i32 1 + %542 = insertelement <4 x float> %541, float 1.000000e+00, i32 2 + %543 = insertelement <4 x float> %542, float 1.000000e+00, i32 3 + %544 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 + %545 = load float, float* %544, align 4 + %546 = insertelement <4 x float> zeroinitializer, float %545, i32 0 + %547 = insertelement <4 x float> %546, float 1.000000e+00, i32 1 + %548 = insertelement <4 x float> %547, float 1.000000e+00, i32 2 + %549 = insertelement <4 x float> %548, float 1.000000e+00, i32 3 + %550 = fmul <4 x float> %543, %549 + %551 = fmul <4 x float> %550, + %552 = fadd <4 x float> %551, + %553 = fadd <4 x float> %537, %552 + %554 = extractelement <4 x float> %553, i32 0 + %555 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 0 + store float %554, float* %555, align 4 + %556 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 0 + %557 = load float, float* %556, align 4 + %558 = insertelement <4 x float> zeroinitializer, float %557, i32 0 + %559 = insertelement <4 x float> %558, float 0.000000e+00, i32 1 + %560 = insertelement <4 x float> %559, float 0.000000e+00, i32 2 + %561 = insertelement <4 x float> %560, float 0.000000e+00, i32 3 + %562 = insertelement <4 x float> zeroinitializer, float %539, i32 0 + %563 = insertelement <4 x float> %562, float 1.000000e+00, i32 1 + %564 = insertelement <4 x float> %563, float 1.000000e+00, i32 2 + %565 = insertelement <4 x float> %564, float 1.000000e+00, i32 3 + %566 = insertelement <4 x float> zeroinitializer, float %545, i32 0 + %567 = insertelement <4 x float> %566, float 1.000000e+00, i32 1 + %568 = insertelement <4 x float> %567, float 1.000000e+00, i32 2 + %569 = insertelement <4 x float> %568, float 1.000000e+00, i32 3 + %570 = fmul <4 x float> %565, %569 + %571 = fmul <4 x float> %570, + %572 = fadd <4 x float> %571, + %573 = fadd <4 x float> %561, %572 + %574 = extractelement <4 x float> %573, i32 0 + %575 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 0 + store float %574, float* %575, align 4 + %576 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 + %577 = load float, float* %576, align 4 + %578 = insertelement <4 x float> zeroinitializer, float %577, i32 0 + %579 = insertelement <4 x float> %578, float 0.000000e+00, i32 1 + %580 = insertelement <4 x float> %579, float 0.000000e+00, i32 2 + %581 = insertelement <4 x float> %580, float 0.000000e+00, i32 3 + %582 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 + %583 = load float, float* %582, align 4 + %584 = insertelement <4 x float> zeroinitializer, float %583, i32 0 + %585 = insertelement <4 x float> %584, float 1.000000e+00, i32 1 + %586 = insertelement <4 x float> %585, float 1.000000e+00, i32 2 + %587 = insertelement <4 x float> %586, float 1.000000e+00, i32 3 + %588 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 + %589 = load float, float* %588, align 4 + %590 = insertelement <4 x float> zeroinitializer, float %589, i32 0 + %591 = insertelement <4 x float> %590, float 1.000000e+00, i32 1 + %592 = insertelement <4 x float> %591, float 1.000000e+00, i32 2 + %593 = insertelement <4 x float> %592, float 1.000000e+00, i32 3 + %594 = fmul <4 x float> %587, %593 + %595 = fmul <4 x float> %594, + %596 = fadd <4 x float> %595, + %597 = fadd <4 x float> %581, %596 + %598 = extractelement <4 x float> %597, i32 0 + %599 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 + store float %598, float* %599, align 4 + %600 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 + %601 = load float, float* %600, align 4 + %602 = insertelement <4 x float> zeroinitializer, float %601, i32 0 + %603 = insertelement <4 x float> %602, float 0.000000e+00, i32 1 + %604 = insertelement <4 x float> %603, float 0.000000e+00, i32 2 + %605 = insertelement <4 x float> %604, float 0.000000e+00, i32 3 + %606 = insertelement <4 x float> zeroinitializer, float %583, i32 0 + %607 = insertelement <4 x float> %606, float 1.000000e+00, i32 1 + %608 = insertelement <4 x float> %607, float 1.000000e+00, i32 2 + %609 = insertelement <4 x float> %608, float 1.000000e+00, i32 3 + %610 = insertelement <4 x float> zeroinitializer, float %589, i32 0 + %611 = insertelement <4 x float> %610, float 1.000000e+00, i32 1 + %612 = insertelement <4 x float> %611, float 1.000000e+00, i32 2 + %613 = insertelement <4 x float> %612, float 1.000000e+00, i32 3 + %614 = fmul <4 x float> %609, %613 + %615 = fmul <4 x float> %614, + %616 = fadd <4 x float> %615, + %617 = fadd <4 x float> %605, %616 + %618 = extractelement <4 x float> %617, i32 0 + %619 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 + store float %618, float* %619, align 4 + %620 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 + %621 = load float, float* %620, align 4 + %622 = insertelement <4 x float> zeroinitializer, float %621, i32 0 + %623 = insertelement <4 x float> %622, float 0.000000e+00, i32 1 + %624 = insertelement <4 x float> %623, float 0.000000e+00, i32 2 + %625 = insertelement <4 x float> %624, float 0.000000e+00, i32 3 + %626 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 + %627 = load float, float* %626, align 4 + %628 = insertelement <4 x float> zeroinitializer, float %627, i32 0 + %629 = insertelement <4 x float> %628, float 1.000000e+00, i32 1 + %630 = insertelement <4 x float> %629, float 1.000000e+00, i32 2 + %631 = insertelement <4 x float> %630, float 1.000000e+00, i32 3 + %632 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 + %633 = load float, float* %632, align 4 + %634 = insertelement <4 x float> zeroinitializer, float %633, i32 0 + %635 = insertelement <4 x float> %634, float 1.000000e+00, i32 1 + %636 = insertelement <4 x float> %635, float 1.000000e+00, i32 2 + %637 = insertelement <4 x float> %636, float 1.000000e+00, i32 3 + %638 = fmul <4 x float> %631, %637 + %639 = fmul <4 x float> %638, + %640 = fadd <4 x float> %639, + %641 = fadd <4 x float> %625, %640 + %642 = extractelement <4 x float> %641, i32 0 + %643 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 + store float %642, float* %643, align 4 + %644 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 + %645 = load float, float* %644, align 4 + %646 = insertelement <4 x float> zeroinitializer, float %645, i32 0 + %647 = insertelement <4 x float> %646, float 0.000000e+00, i32 1 + %648 = insertelement <4 x float> %647, float 0.000000e+00, i32 2 + %649 = insertelement <4 x float> %648, float 0.000000e+00, i32 3 + %650 = insertelement <4 x float> zeroinitializer, float %627, i32 0 + %651 = insertelement <4 x float> %650, float 1.000000e+00, i32 1 + %652 = insertelement <4 x float> %651, float 1.000000e+00, i32 2 + %653 = insertelement <4 x float> %652, float 1.000000e+00, i32 3 + %654 = insertelement <4 x float> zeroinitializer, float %633, i32 0 + %655 = insertelement <4 x float> %654, float 1.000000e+00, i32 1 + %656 = insertelement <4 x float> %655, float 1.000000e+00, i32 2 + %657 = insertelement <4 x float> %656, float 1.000000e+00, i32 3 + %658 = fmul <4 x float> %653, %657 + %659 = fmul <4 x float> %658, + %660 = fadd <4 x float> %659, + %661 = fadd <4 x float> %649, %660 + %662 = extractelement <4 x float> %661, i32 0 + %663 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 + store float %662, float* %663, align 4 + %664 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 2 + %665 = load float, float* %664, align 4 + %666 = insertelement <4 x float> zeroinitializer, float %665, i32 0 + %667 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 2 + %668 = load float, float* %667, align 4 + %669 = insertelement <4 x float> %666, float %668, i32 1 + %670 = insertelement <4 x float> %669, float 0.000000e+00, i32 2 + %671 = insertelement <4 x float> %670, float 0.000000e+00, i32 3 + %672 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 + %673 = load float, float* %672, align 4 + %674 = insertelement <4 x float> zeroinitializer, float %673, i32 0 + %675 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 + %676 = load float, float* %675, align 4 + %677 = insertelement <4 x float> %674, float %676, i32 1 + %678 = insertelement <4 x float> %677, float 1.000000e+00, i32 2 + %679 = insertelement <4 x float> %678, float 1.000000e+00, i32 3 + %680 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 + %681 = load float, float* %680, align 4 + %682 = insertelement <4 x float> zeroinitializer, float %681, i32 0 + %683 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 + %684 = load float, float* %683, align 4 + %685 = insertelement <4 x float> %682, float %684, i32 1 + %686 = insertelement <4 x float> %685, float 1.000000e+00, i32 2 + %687 = insertelement <4 x float> %686, float 1.000000e+00, i32 3 + %688 = fmul <4 x float> %679, %687 + %689 = fmul <4 x float> %688, + %690 = fadd <4 x float> %689, + %691 = fadd <4 x float> %671, %690 + %692 = extractelement <4 x float> %691, i32 0 + %693 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 2 + store float %692, float* %693, align 4 + %694 = extractelement <4 x float> %691, i32 1 + %695 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 2 + store float %694, float* %695, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #0 { +.preheader: + %0 = alloca [2 x [2 x float]], align 16 + %1 = alloca [2 x [2 x float]], align 16 + %2 = alloca [3 x [3 x float]], align 16 + %3 = alloca [3 x [3 x float]], align 16 + %4 = bitcast [2 x [2 x float]]* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %4, i8* nonnull align 16 dereferenceable(16) bitcast ([2 x [2 x float]]* @__const.main.mat_in to i8*), i64 16, i1 false) + %5 = bitcast [2 x [2 x float]]* %1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %5, i8* nonnull align 16 dereferenceable(16) bitcast ([2 x [2 x float]]* @__const.main.f_in to i8*), i64 16, i1 false) + %6 = bitcast [3 x [3 x float]]* %2 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %6, i8 0, i64 36, i1 false) + %7 = bitcast [3 x [3 x float]]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %7, i8 0, i64 36, i1 false) + %8 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %0, i64 0, i64 0 + %9 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %1, i64 0, i64 0 + %10 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0 + %11 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0 + call void @load_use_twice([2 x float]* nonnull %8, [2 x float]* nonnull %9, [3 x float]* nonnull %10, [3 x float]* nonnull %11) + %12 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 0 + %13 = load float, float* %12, align 16 + %14 = fpext float %13 to double + %15 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %14) #4 + %16 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 0 + %17 = load float, float* %16, align 16 + %18 = fpext float %17 to double + %19 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %18) #4 + %20 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 1 + %21 = load float, float* %20, align 4 + %22 = fpext float %21 to double + %23 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %22) #4 + %24 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 1 + %25 = load float, float* %24, align 4 + %26 = fpext float %25 to double + %27 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %26) #4 + %28 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 2 + %29 = load float, float* %28, align 8 + %30 = fpext float %29 to double + %31 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %30) #4 + %32 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 2 + %33 = load float, float* %32, align 8 + %34 = fpext float %33 to double + %35 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %34) #4 + %36 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 0 + %37 = load float, float* %36, align 4 + %38 = fpext float %37 to double + %39 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %38) #4 + %40 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 0 + %41 = load float, float* %40, align 4 + %42 = fpext float %41 to double + %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %42) #4 + %44 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 1 + %45 = load float, float* %44, align 4 + %46 = fpext float %45 to double + %47 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %46) #4 + %48 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 1 + %49 = load float, float* %48, align 4 + %50 = fpext float %49 to double + %51 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %50) #4 + %52 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 2 + %53 = load float, float* %52, align 4 + %54 = fpext float %53 to double + %55 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %54) #4 + %56 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 2 + %57 = load float, float* %56, align 4 + %58 = fpext float %57 to double + %59 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %58) #4 + %60 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 0 + %61 = load float, float* %60, align 8 + %62 = fpext float %61 to double + %63 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %62) #4 + %64 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 0 + %65 = load float, float* %64, align 8 + %66 = fpext float %65 to double + %67 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %66) #4 + %68 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 1 + %69 = load float, float* %68, align 4 + %70 = fpext float %69 to double + %71 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %70) #4 + %72 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 1 + %73 = load float, float* %72, align 4 + %74 = fpext float %73 to double + %75 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %74) #4 + %76 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 2 + %77 = load float, float* %76, align 8 + %78 = fpext float %77 to double + %79 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %78) #4 + %80 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 2 + %81 = load float, float* %80, align 8 + %82 = fpext float %81 to double + %83 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %82) #4 + ret i32 0 +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 + +declare i32 @printf(i8*, ...) #3 + +attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind willreturn writeonly } +attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/diospyros.ll b/src/dios-egraphs/Diospyros/diospyros.ll new file mode 100644 index 00000000..f4a34c43 --- /dev/null +++ b/src/dios-egraphs/Diospyros/diospyros.ll @@ -0,0 +1,1056 @@ +; ModuleID = 'aa.ll' +source_filename = "llvm-tests/load_reuse.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@__const.main.mat_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 2.000000e+00], [2 x float] [float 3.000000e+00, float 4.000000e+00]], align 16 +@__const.main.f_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 1.000000e+00], [2 x float] [float 1.000000e+00, float 1.000000e+00]], align 16 +@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 + +; Function Attrs: noinline nounwind ssp uwtable +define void @load_use_twice([2 x float]* %0, [2 x float]* %1, [3 x float]* %2, [3 x float]* %3) #0 { +.preheader7: + %4 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 0 + %5 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 0 + %6 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 + %7 = load float, float* %6, align 4 + %8 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 + %9 = load float, float* %8, align 4 + %10 = fmul float %7, %9 + %11 = fmul float %10, 3.000000e+00 + %12 = fadd float %11, -4.000000e+00 + %13 = load float, float* %4, align 4 + %14 = fadd float %13, %12 + %15 = fmul float %10, 2.000000e+00 + %16 = fadd float %15, 1.000000e+00 + %17 = load float, float* %5, align 4 + %18 = fadd float %17, %16 + %19 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 + %20 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 + %21 = load float, float* %6, align 4 + %22 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 + %23 = load float, float* %22, align 4 + %24 = fmul float %21, %23 + %25 = fmul float %24, 3.000000e+00 + %26 = fadd float %25, -4.000000e+00 + %27 = load float, float* %19, align 4 + %28 = fadd float %27, %26 + %29 = fmul float %24, 2.000000e+00 + %30 = fadd float %29, 1.000000e+00 + %31 = load float, float* %20, align 4 + %32 = fadd float %31, %30 + %33 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 + %34 = load float, float* %33, align 4 + %35 = load float, float* %8, align 4 + %36 = fmul float %34, %35 + %37 = fmul float %36, 3.000000e+00 + %38 = fadd float %37, -4.000000e+00 + %39 = load float, float* %19, align 4 + %40 = fadd float %39, %38 + %41 = fmul float %36, 2.000000e+00 + %42 = fadd float %41, 1.000000e+00 + %43 = load float, float* %20, align 4 + %44 = fadd float %43, %42 + %45 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 2 + %46 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 2 + %47 = load float, float* %33, align 4 + %48 = load float, float* %22, align 4 + %49 = fmul float %47, %48 + %50 = fmul float %49, 3.000000e+00 + %51 = fadd float %50, -4.000000e+00 + %52 = load float, float* %45, align 4 + %53 = fadd float %52, %51 + %54 = fmul float %49, 2.000000e+00 + %55 = fadd float %54, 1.000000e+00 + %56 = load float, float* %46, align 4 + %57 = fadd float %56, %55 + %58 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 + %59 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 + %60 = load float, float* %6, align 4 + %61 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 + %62 = load float, float* %61, align 4 + %63 = fmul float %60, %62 + %64 = fmul float %63, 3.000000e+00 + %65 = fadd float %64, -4.000000e+00 + %66 = load float, float* %58, align 4 + %67 = fadd float %66, %65 + %68 = fmul float %63, 2.000000e+00 + %69 = fadd float %68, 1.000000e+00 + %70 = load float, float* %59, align 4 + %71 = fadd float %70, %69 + %72 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 + %73 = load float, float* %72, align 4 + %74 = load float, float* %8, align 4 + %75 = fmul float %73, %74 + %76 = fmul float %75, 3.000000e+00 + %77 = fadd float %76, -4.000000e+00 + %78 = load float, float* %58, align 4 + %79 = fadd float %78, %77 + %80 = fmul float %75, 2.000000e+00 + %81 = fadd float %80, 1.000000e+00 + %82 = load float, float* %59, align 4 + %83 = fadd float %82, %81 + %84 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + %85 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + %86 = load float, float* %6, align 4 + %87 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 + %88 = load float, float* %87, align 4 + %89 = fmul float %86, %88 + %90 = fmul float %89, 3.000000e+00 + %91 = fadd float %90, -4.000000e+00 + %92 = load float, float* %84, align 4 + %93 = fadd float %92, %91 + %94 = fmul float %89, 2.000000e+00 + %95 = fadd float %94, 1.000000e+00 + %96 = load float, float* %85, align 4 + %97 = fadd float %96, %95 + %98 = load float, float* %33, align 4 + %99 = load float, float* %61, align 4 + %100 = fmul float %98, %99 + %101 = fmul float %100, 3.000000e+00 + %102 = fadd float %101, -4.000000e+00 + %103 = load float, float* %84, align 4 + %104 = fadd float %103, %102 + %105 = fmul float %100, 2.000000e+00 + %106 = fadd float %105, 1.000000e+00 + %107 = load float, float* %85, align 4 + %108 = fadd float %107, %106 + %109 = load float, float* %72, align 4 + %110 = load float, float* %22, align 4 + %111 = fmul float %109, %110 + %112 = fmul float %111, 3.000000e+00 + %113 = fadd float %112, -4.000000e+00 + %114 = load float, float* %84, align 4 + %115 = fadd float %114, %113 + %116 = fmul float %111, 2.000000e+00 + %117 = fadd float %116, 1.000000e+00 + %118 = load float, float* %85, align 4 + %119 = fadd float %118, %117 + %120 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 + %121 = load float, float* %120, align 4 + %122 = load float, float* %8, align 4 + %123 = fmul float %121, %122 + %124 = fmul float %123, 3.000000e+00 + %125 = fadd float %124, -4.000000e+00 + %126 = load float, float* %84, align 4 + %127 = fadd float %126, %125 + %128 = fmul float %123, 2.000000e+00 + %129 = fadd float %128, 1.000000e+00 + %130 = load float, float* %85, align 4 + %131 = fadd float %130, %129 + %132 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 + %133 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 + %134 = load float, float* %33, align 4 + %135 = load float, float* %87, align 4 + %136 = fmul float %134, %135 + %137 = fmul float %136, 3.000000e+00 + %138 = fadd float %137, -4.000000e+00 + %139 = load float, float* %132, align 4 + %140 = fadd float %139, %138 + %141 = fmul float %136, 2.000000e+00 + %142 = fadd float %141, 1.000000e+00 + %143 = load float, float* %133, align 4 + %144 = fadd float %143, %142 + %145 = load float, float* %120, align 4 + %146 = load float, float* %22, align 4 + %147 = fmul float %145, %146 + %148 = fmul float %147, 3.000000e+00 + %149 = fadd float %148, -4.000000e+00 + %150 = load float, float* %132, align 4 + %151 = fadd float %150, %149 + %152 = fmul float %147, 2.000000e+00 + %153 = fadd float %152, 1.000000e+00 + %154 = load float, float* %133, align 4 + %155 = fadd float %154, %153 + %156 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 0 + %157 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 0 + %158 = load float, float* %72, align 4 + %159 = load float, float* %61, align 4 + %160 = fmul float %158, %159 + %161 = fmul float %160, 3.000000e+00 + %162 = fadd float %161, -4.000000e+00 + %163 = load float, float* %156, align 4 + %164 = fadd float %163, %162 + %165 = fmul float %160, 2.000000e+00 + %166 = fadd float %165, 1.000000e+00 + %167 = load float, float* %157, align 4 + %168 = fadd float %167, %166 + %169 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 + %170 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 + %171 = load float, float* %72, align 4 + %172 = load float, float* %87, align 4 + %173 = fmul float %171, %172 + %174 = fmul float %173, 3.000000e+00 + %175 = fadd float %174, -4.000000e+00 + %176 = load float, float* %169, align 4 + %177 = fadd float %176, %175 + %178 = fmul float %173, 2.000000e+00 + %179 = fadd float %178, 1.000000e+00 + %180 = load float, float* %170, align 4 + %181 = fadd float %180, %179 + %182 = load float, float* %120, align 4 + %183 = load float, float* %61, align 4 + %184 = fmul float %182, %183 + %185 = fmul float %184, 3.000000e+00 + %186 = fadd float %185, -4.000000e+00 + %187 = load float, float* %169, align 4 + %188 = fadd float %187, %186 + %189 = fmul float %184, 2.000000e+00 + %190 = fadd float %189, 1.000000e+00 + %191 = load float, float* %170, align 4 + %192 = fadd float %191, %190 + %193 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 2 + %194 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 2 + %195 = load float, float* %120, align 4 + %196 = load float, float* %87, align 4 + %197 = fmul float %195, %196 + %198 = fmul float %197, 3.000000e+00 + %199 = fadd float %198, -4.000000e+00 + %200 = load float, float* %193, align 4 + %201 = fadd float %200, %199 + %202 = fmul float %197, 2.000000e+00 + %203 = fadd float %202, 1.000000e+00 + %204 = load float, float* %194, align 4 + %205 = fadd float %204, %203 + %206 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 0 + %207 = load float, float* %206, align 4 + %208 = insertelement <4 x float> zeroinitializer, float %207, i32 0 + %209 = insertelement <4 x float> %208, float 0.000000e+00, i32 1 + %210 = insertelement <4 x float> %209, float 0.000000e+00, i32 2 + %211 = insertelement <4 x float> %210, float 0.000000e+00, i32 3 + %212 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 + %213 = load float, float* %212, align 4 + %214 = insertelement <4 x float> zeroinitializer, float %213, i32 0 + %215 = insertelement <4 x float> %214, float 1.000000e+00, i32 1 + %216 = insertelement <4 x float> %215, float 1.000000e+00, i32 2 + %217 = insertelement <4 x float> %216, float 1.000000e+00, i32 3 + %218 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 + %219 = load float, float* %218, align 4 + %220 = insertelement <4 x float> zeroinitializer, float %219, i32 0 + %221 = insertelement <4 x float> %220, float 1.000000e+00, i32 1 + %222 = insertelement <4 x float> %221, float 1.000000e+00, i32 2 + %223 = insertelement <4 x float> %222, float 1.000000e+00, i32 3 + %224 = fmul <4 x float> %217, %223 + %225 = fmul <4 x float> %224, + %226 = fadd <4 x float> %225, + %227 = fadd <4 x float> %211, %226 + %228 = extractelement <4 x float> %227, i32 0 + %229 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 0 + store float %228, float* %229, align 4 + %230 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 0 + %231 = load float, float* %230, align 4 + %232 = insertelement <4 x float> zeroinitializer, float %231, i32 0 + %233 = insertelement <4 x float> %232, float 0.000000e+00, i32 1 + %234 = insertelement <4 x float> %233, float 0.000000e+00, i32 2 + %235 = insertelement <4 x float> %234, float 0.000000e+00, i32 3 + %236 = insertelement <4 x float> zeroinitializer, float %213, i32 0 + %237 = insertelement <4 x float> %236, float 1.000000e+00, i32 1 + %238 = insertelement <4 x float> %237, float 1.000000e+00, i32 2 + %239 = insertelement <4 x float> %238, float 1.000000e+00, i32 3 + %240 = insertelement <4 x float> zeroinitializer, float %219, i32 0 + %241 = insertelement <4 x float> %240, float 1.000000e+00, i32 1 + %242 = insertelement <4 x float> %241, float 1.000000e+00, i32 2 + %243 = insertelement <4 x float> %242, float 1.000000e+00, i32 3 + %244 = fmul <4 x float> %239, %243 + %245 = fmul <4 x float> %244, + %246 = fadd <4 x float> %245, + %247 = fadd <4 x float> %235, %246 + %248 = extractelement <4 x float> %247, i32 0 + %249 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 0 + store float %248, float* %249, align 4 + %250 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 + %251 = load float, float* %250, align 4 + %252 = insertelement <4 x float> zeroinitializer, float %251, i32 0 + %253 = insertelement <4 x float> %252, float 0.000000e+00, i32 1 + %254 = insertelement <4 x float> %253, float 0.000000e+00, i32 2 + %255 = insertelement <4 x float> %254, float 0.000000e+00, i32 3 + %256 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 + %257 = load float, float* %256, align 4 + %258 = insertelement <4 x float> zeroinitializer, float %257, i32 0 + %259 = insertelement <4 x float> %258, float 1.000000e+00, i32 1 + %260 = insertelement <4 x float> %259, float 1.000000e+00, i32 2 + %261 = insertelement <4 x float> %260, float 1.000000e+00, i32 3 + %262 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 + %263 = load float, float* %262, align 4 + %264 = insertelement <4 x float> zeroinitializer, float %263, i32 0 + %265 = insertelement <4 x float> %264, float 1.000000e+00, i32 1 + %266 = insertelement <4 x float> %265, float 1.000000e+00, i32 2 + %267 = insertelement <4 x float> %266, float 1.000000e+00, i32 3 + %268 = fmul <4 x float> %261, %267 + %269 = fmul <4 x float> %268, + %270 = fadd <4 x float> %269, + %271 = fadd <4 x float> %255, %270 + %272 = extractelement <4 x float> %271, i32 0 + %273 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 + store float %272, float* %273, align 4 + %274 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 + %275 = load float, float* %274, align 4 + %276 = insertelement <4 x float> zeroinitializer, float %275, i32 0 + %277 = insertelement <4 x float> %276, float 0.000000e+00, i32 1 + %278 = insertelement <4 x float> %277, float 0.000000e+00, i32 2 + %279 = insertelement <4 x float> %278, float 0.000000e+00, i32 3 + %280 = insertelement <4 x float> zeroinitializer, float %257, i32 0 + %281 = insertelement <4 x float> %280, float 1.000000e+00, i32 1 + %282 = insertelement <4 x float> %281, float 1.000000e+00, i32 2 + %283 = insertelement <4 x float> %282, float 1.000000e+00, i32 3 + %284 = insertelement <4 x float> zeroinitializer, float %263, i32 0 + %285 = insertelement <4 x float> %284, float 1.000000e+00, i32 1 + %286 = insertelement <4 x float> %285, float 1.000000e+00, i32 2 + %287 = insertelement <4 x float> %286, float 1.000000e+00, i32 3 + %288 = fmul <4 x float> %283, %287 + %289 = fmul <4 x float> %288, + %290 = fadd <4 x float> %289, + %291 = fadd <4 x float> %279, %290 + %292 = extractelement <4 x float> %291, i32 0 + %293 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 + store float %292, float* %293, align 4 + %294 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 + %295 = load float, float* %294, align 4 + %296 = insertelement <4 x float> zeroinitializer, float %295, i32 0 + %297 = insertelement <4 x float> %296, float 0.000000e+00, i32 1 + %298 = insertelement <4 x float> %297, float 0.000000e+00, i32 2 + %299 = insertelement <4 x float> %298, float 0.000000e+00, i32 3 + %300 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 + %301 = load float, float* %300, align 4 + %302 = insertelement <4 x float> zeroinitializer, float %301, i32 0 + %303 = insertelement <4 x float> %302, float 1.000000e+00, i32 1 + %304 = insertelement <4 x float> %303, float 1.000000e+00, i32 2 + %305 = insertelement <4 x float> %304, float 1.000000e+00, i32 3 + %306 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 + %307 = load float, float* %306, align 4 + %308 = insertelement <4 x float> zeroinitializer, float %307, i32 0 + %309 = insertelement <4 x float> %308, float 1.000000e+00, i32 1 + %310 = insertelement <4 x float> %309, float 1.000000e+00, i32 2 + %311 = insertelement <4 x float> %310, float 1.000000e+00, i32 3 + %312 = fmul <4 x float> %305, %311 + %313 = fmul <4 x float> %312, + %314 = fadd <4 x float> %313, + %315 = fadd <4 x float> %299, %314 + %316 = extractelement <4 x float> %315, i32 0 + %317 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 + store float %316, float* %317, align 4 + %318 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 + %319 = load float, float* %318, align 4 + %320 = insertelement <4 x float> zeroinitializer, float %319, i32 0 + %321 = insertelement <4 x float> %320, float 0.000000e+00, i32 1 + %322 = insertelement <4 x float> %321, float 0.000000e+00, i32 2 + %323 = insertelement <4 x float> %322, float 0.000000e+00, i32 3 + %324 = insertelement <4 x float> zeroinitializer, float %301, i32 0 + %325 = insertelement <4 x float> %324, float 1.000000e+00, i32 1 + %326 = insertelement <4 x float> %325, float 1.000000e+00, i32 2 + %327 = insertelement <4 x float> %326, float 1.000000e+00, i32 3 + %328 = insertelement <4 x float> zeroinitializer, float %307, i32 0 + %329 = insertelement <4 x float> %328, float 1.000000e+00, i32 1 + %330 = insertelement <4 x float> %329, float 1.000000e+00, i32 2 + %331 = insertelement <4 x float> %330, float 1.000000e+00, i32 3 + %332 = fmul <4 x float> %327, %331 + %333 = fmul <4 x float> %332, + %334 = fadd <4 x float> %333, + %335 = fadd <4 x float> %323, %334 + %336 = extractelement <4 x float> %335, i32 0 + %337 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 + store float %336, float* %337, align 4 + %338 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 2 + %339 = load float, float* %338, align 4 + %340 = insertelement <4 x float> zeroinitializer, float %339, i32 0 + %341 = insertelement <4 x float> %340, float 0.000000e+00, i32 1 + %342 = insertelement <4 x float> %341, float 0.000000e+00, i32 2 + %343 = insertelement <4 x float> %342, float 0.000000e+00, i32 3 + %344 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 + %345 = load float, float* %344, align 4 + %346 = insertelement <4 x float> zeroinitializer, float %345, i32 0 + %347 = insertelement <4 x float> %346, float 1.000000e+00, i32 1 + %348 = insertelement <4 x float> %347, float 1.000000e+00, i32 2 + %349 = insertelement <4 x float> %348, float 1.000000e+00, i32 3 + %350 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 + %351 = load float, float* %350, align 4 + %352 = insertelement <4 x float> zeroinitializer, float %351, i32 0 + %353 = insertelement <4 x float> %352, float 1.000000e+00, i32 1 + %354 = insertelement <4 x float> %353, float 1.000000e+00, i32 2 + %355 = insertelement <4 x float> %354, float 1.000000e+00, i32 3 + %356 = fmul <4 x float> %349, %355 + %357 = fmul <4 x float> %356, + %358 = fadd <4 x float> %357, + %359 = fadd <4 x float> %343, %358 + %360 = extractelement <4 x float> %359, i32 0 + %361 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 2 + store float %360, float* %361, align 4 + %362 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 2 + %363 = load float, float* %362, align 4 + %364 = insertelement <4 x float> zeroinitializer, float %363, i32 0 + %365 = insertelement <4 x float> %364, float 0.000000e+00, i32 1 + %366 = insertelement <4 x float> %365, float 0.000000e+00, i32 2 + %367 = insertelement <4 x float> %366, float 0.000000e+00, i32 3 + %368 = insertelement <4 x float> zeroinitializer, float %345, i32 0 + %369 = insertelement <4 x float> %368, float 1.000000e+00, i32 1 + %370 = insertelement <4 x float> %369, float 1.000000e+00, i32 2 + %371 = insertelement <4 x float> %370, float 1.000000e+00, i32 3 + %372 = insertelement <4 x float> zeroinitializer, float %351, i32 0 + %373 = insertelement <4 x float> %372, float 1.000000e+00, i32 1 + %374 = insertelement <4 x float> %373, float 1.000000e+00, i32 2 + %375 = insertelement <4 x float> %374, float 1.000000e+00, i32 3 + %376 = fmul <4 x float> %371, %375 + %377 = fmul <4 x float> %376, + %378 = fadd <4 x float> %377, + %379 = fadd <4 x float> %367, %378 + %380 = extractelement <4 x float> %379, i32 0 + %381 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 2 + store float %380, float* %381, align 4 + %382 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 + %383 = load float, float* %382, align 4 + %384 = insertelement <4 x float> zeroinitializer, float %383, i32 0 + %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 1 + %386 = insertelement <4 x float> %385, float 0.000000e+00, i32 2 + %387 = insertelement <4 x float> %386, float 0.000000e+00, i32 3 + %388 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 + %389 = load float, float* %388, align 4 + %390 = insertelement <4 x float> zeroinitializer, float %389, i32 0 + %391 = insertelement <4 x float> %390, float 1.000000e+00, i32 1 + %392 = insertelement <4 x float> %391, float 1.000000e+00, i32 2 + %393 = insertelement <4 x float> %392, float 1.000000e+00, i32 3 + %394 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 + %395 = load float, float* %394, align 4 + %396 = insertelement <4 x float> zeroinitializer, float %395, i32 0 + %397 = insertelement <4 x float> %396, float 1.000000e+00, i32 1 + %398 = insertelement <4 x float> %397, float 1.000000e+00, i32 2 + %399 = insertelement <4 x float> %398, float 1.000000e+00, i32 3 + %400 = fmul <4 x float> %393, %399 + %401 = fmul <4 x float> %400, + %402 = fadd <4 x float> %401, + %403 = fadd <4 x float> %387, %402 + %404 = extractelement <4 x float> %403, i32 0 + %405 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 + store float %404, float* %405, align 4 + %406 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 + %407 = load float, float* %406, align 4 + %408 = insertelement <4 x float> zeroinitializer, float %407, i32 0 + %409 = insertelement <4 x float> %408, float 0.000000e+00, i32 1 + %410 = insertelement <4 x float> %409, float 0.000000e+00, i32 2 + %411 = insertelement <4 x float> %410, float 0.000000e+00, i32 3 + %412 = insertelement <4 x float> zeroinitializer, float %389, i32 0 + %413 = insertelement <4 x float> %412, float 1.000000e+00, i32 1 + %414 = insertelement <4 x float> %413, float 1.000000e+00, i32 2 + %415 = insertelement <4 x float> %414, float 1.000000e+00, i32 3 + %416 = insertelement <4 x float> zeroinitializer, float %395, i32 0 + %417 = insertelement <4 x float> %416, float 1.000000e+00, i32 1 + %418 = insertelement <4 x float> %417, float 1.000000e+00, i32 2 + %419 = insertelement <4 x float> %418, float 1.000000e+00, i32 3 + %420 = fmul <4 x float> %415, %419 + %421 = fmul <4 x float> %420, + %422 = fadd <4 x float> %421, + %423 = fadd <4 x float> %411, %422 + %424 = extractelement <4 x float> %423, i32 0 + %425 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 + store float %424, float* %425, align 4 + %426 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 + %427 = load float, float* %426, align 4 + %428 = insertelement <4 x float> zeroinitializer, float %427, i32 0 + %429 = insertelement <4 x float> %428, float 0.000000e+00, i32 1 + %430 = insertelement <4 x float> %429, float 0.000000e+00, i32 2 + %431 = insertelement <4 x float> %430, float 0.000000e+00, i32 3 + %432 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 + %433 = load float, float* %432, align 4 + %434 = insertelement <4 x float> zeroinitializer, float %433, i32 0 + %435 = insertelement <4 x float> %434, float 1.000000e+00, i32 1 + %436 = insertelement <4 x float> %435, float 1.000000e+00, i32 2 + %437 = insertelement <4 x float> %436, float 1.000000e+00, i32 3 + %438 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 + %439 = load float, float* %438, align 4 + %440 = insertelement <4 x float> zeroinitializer, float %439, i32 0 + %441 = insertelement <4 x float> %440, float 1.000000e+00, i32 1 + %442 = insertelement <4 x float> %441, float 1.000000e+00, i32 2 + %443 = insertelement <4 x float> %442, float 1.000000e+00, i32 3 + %444 = fmul <4 x float> %437, %443 + %445 = fmul <4 x float> %444, + %446 = fadd <4 x float> %445, + %447 = fadd <4 x float> %431, %446 + %448 = extractelement <4 x float> %447, i32 0 + %449 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 + store float %448, float* %449, align 4 + %450 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 + %451 = load float, float* %450, align 4 + %452 = insertelement <4 x float> zeroinitializer, float %451, i32 0 + %453 = insertelement <4 x float> %452, float 0.000000e+00, i32 1 + %454 = insertelement <4 x float> %453, float 0.000000e+00, i32 2 + %455 = insertelement <4 x float> %454, float 0.000000e+00, i32 3 + %456 = insertelement <4 x float> zeroinitializer, float %433, i32 0 + %457 = insertelement <4 x float> %456, float 1.000000e+00, i32 1 + %458 = insertelement <4 x float> %457, float 1.000000e+00, i32 2 + %459 = insertelement <4 x float> %458, float 1.000000e+00, i32 3 + %460 = insertelement <4 x float> zeroinitializer, float %439, i32 0 + %461 = insertelement <4 x float> %460, float 1.000000e+00, i32 1 + %462 = insertelement <4 x float> %461, float 1.000000e+00, i32 2 + %463 = insertelement <4 x float> %462, float 1.000000e+00, i32 3 + %464 = fmul <4 x float> %459, %463 + %465 = fmul <4 x float> %464, + %466 = fadd <4 x float> %465, + %467 = fadd <4 x float> %455, %466 + %468 = extractelement <4 x float> %467, i32 0 + %469 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 + store float %468, float* %469, align 4 + %470 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + %471 = load float, float* %470, align 4 + %472 = insertelement <4 x float> zeroinitializer, float %471, i32 0 + %473 = insertelement <4 x float> %472, float 0.000000e+00, i32 1 + %474 = insertelement <4 x float> %473, float 0.000000e+00, i32 2 + %475 = insertelement <4 x float> %474, float 0.000000e+00, i32 3 + %476 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 + %477 = load float, float* %476, align 4 + %478 = insertelement <4 x float> zeroinitializer, float %477, i32 0 + %479 = insertelement <4 x float> %478, float 1.000000e+00, i32 1 + %480 = insertelement <4 x float> %479, float 1.000000e+00, i32 2 + %481 = insertelement <4 x float> %480, float 1.000000e+00, i32 3 + %482 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 + %483 = load float, float* %482, align 4 + %484 = insertelement <4 x float> zeroinitializer, float %483, i32 0 + %485 = insertelement <4 x float> %484, float 1.000000e+00, i32 1 + %486 = insertelement <4 x float> %485, float 1.000000e+00, i32 2 + %487 = insertelement <4 x float> %486, float 1.000000e+00, i32 3 + %488 = fmul <4 x float> %481, %487 + %489 = fmul <4 x float> %488, + %490 = fadd <4 x float> %489, + %491 = fadd <4 x float> %475, %490 + %492 = extractelement <4 x float> %491, i32 0 + %493 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + store float %492, float* %493, align 4 + %494 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + %495 = load float, float* %494, align 4 + %496 = insertelement <4 x float> zeroinitializer, float %495, i32 0 + %497 = insertelement <4 x float> %496, float 0.000000e+00, i32 1 + %498 = insertelement <4 x float> %497, float 0.000000e+00, i32 2 + %499 = insertelement <4 x float> %498, float 0.000000e+00, i32 3 + %500 = insertelement <4 x float> zeroinitializer, float %477, i32 0 + %501 = insertelement <4 x float> %500, float 1.000000e+00, i32 1 + %502 = insertelement <4 x float> %501, float 1.000000e+00, i32 2 + %503 = insertelement <4 x float> %502, float 1.000000e+00, i32 3 + %504 = insertelement <4 x float> zeroinitializer, float %483, i32 0 + %505 = insertelement <4 x float> %504, float 1.000000e+00, i32 1 + %506 = insertelement <4 x float> %505, float 1.000000e+00, i32 2 + %507 = insertelement <4 x float> %506, float 1.000000e+00, i32 3 + %508 = fmul <4 x float> %503, %507 + %509 = fmul <4 x float> %508, + %510 = fadd <4 x float> %509, + %511 = fadd <4 x float> %499, %510 + %512 = extractelement <4 x float> %511, i32 0 + %513 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + store float %512, float* %513, align 4 + %514 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + %515 = load float, float* %514, align 4 + %516 = insertelement <4 x float> zeroinitializer, float %515, i32 0 + %517 = insertelement <4 x float> %516, float 0.000000e+00, i32 1 + %518 = insertelement <4 x float> %517, float 0.000000e+00, i32 2 + %519 = insertelement <4 x float> %518, float 0.000000e+00, i32 3 + %520 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 + %521 = load float, float* %520, align 4 + %522 = insertelement <4 x float> zeroinitializer, float %521, i32 0 + %523 = insertelement <4 x float> %522, float 1.000000e+00, i32 1 + %524 = insertelement <4 x float> %523, float 1.000000e+00, i32 2 + %525 = insertelement <4 x float> %524, float 1.000000e+00, i32 3 + %526 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 + %527 = load float, float* %526, align 4 + %528 = insertelement <4 x float> zeroinitializer, float %527, i32 0 + %529 = insertelement <4 x float> %528, float 1.000000e+00, i32 1 + %530 = insertelement <4 x float> %529, float 1.000000e+00, i32 2 + %531 = insertelement <4 x float> %530, float 1.000000e+00, i32 3 + %532 = fmul <4 x float> %525, %531 + %533 = fmul <4 x float> %532, + %534 = fadd <4 x float> %533, + %535 = fadd <4 x float> %519, %534 + %536 = extractelement <4 x float> %535, i32 0 + %537 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + store float %536, float* %537, align 4 + %538 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + %539 = load float, float* %538, align 4 + %540 = insertelement <4 x float> zeroinitializer, float %539, i32 0 + %541 = insertelement <4 x float> %540, float 0.000000e+00, i32 1 + %542 = insertelement <4 x float> %541, float 0.000000e+00, i32 2 + %543 = insertelement <4 x float> %542, float 0.000000e+00, i32 3 + %544 = insertelement <4 x float> zeroinitializer, float %521, i32 0 + %545 = insertelement <4 x float> %544, float 1.000000e+00, i32 1 + %546 = insertelement <4 x float> %545, float 1.000000e+00, i32 2 + %547 = insertelement <4 x float> %546, float 1.000000e+00, i32 3 + %548 = insertelement <4 x float> zeroinitializer, float %527, i32 0 + %549 = insertelement <4 x float> %548, float 1.000000e+00, i32 1 + %550 = insertelement <4 x float> %549, float 1.000000e+00, i32 2 + %551 = insertelement <4 x float> %550, float 1.000000e+00, i32 3 + %552 = fmul <4 x float> %547, %551 + %553 = fmul <4 x float> %552, + %554 = fadd <4 x float> %553, + %555 = fadd <4 x float> %543, %554 + %556 = extractelement <4 x float> %555, i32 0 + %557 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + store float %556, float* %557, align 4 + %558 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + %559 = load float, float* %558, align 4 + %560 = insertelement <4 x float> zeroinitializer, float %559, i32 0 + %561 = insertelement <4 x float> %560, float 0.000000e+00, i32 1 + %562 = insertelement <4 x float> %561, float 0.000000e+00, i32 2 + %563 = insertelement <4 x float> %562, float 0.000000e+00, i32 3 + %564 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 + %565 = load float, float* %564, align 4 + %566 = insertelement <4 x float> zeroinitializer, float %565, i32 0 + %567 = insertelement <4 x float> %566, float 1.000000e+00, i32 1 + %568 = insertelement <4 x float> %567, float 1.000000e+00, i32 2 + %569 = insertelement <4 x float> %568, float 1.000000e+00, i32 3 + %570 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 + %571 = load float, float* %570, align 4 + %572 = insertelement <4 x float> zeroinitializer, float %571, i32 0 + %573 = insertelement <4 x float> %572, float 1.000000e+00, i32 1 + %574 = insertelement <4 x float> %573, float 1.000000e+00, i32 2 + %575 = insertelement <4 x float> %574, float 1.000000e+00, i32 3 + %576 = fmul <4 x float> %569, %575 + %577 = fmul <4 x float> %576, + %578 = fadd <4 x float> %577, + %579 = fadd <4 x float> %563, %578 + %580 = extractelement <4 x float> %579, i32 0 + %581 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + store float %580, float* %581, align 4 + %582 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + %583 = load float, float* %582, align 4 + %584 = insertelement <4 x float> zeroinitializer, float %583, i32 0 + %585 = insertelement <4 x float> %584, float 0.000000e+00, i32 1 + %586 = insertelement <4 x float> %585, float 0.000000e+00, i32 2 + %587 = insertelement <4 x float> %586, float 0.000000e+00, i32 3 + %588 = insertelement <4 x float> zeroinitializer, float %565, i32 0 + %589 = insertelement <4 x float> %588, float 1.000000e+00, i32 1 + %590 = insertelement <4 x float> %589, float 1.000000e+00, i32 2 + %591 = insertelement <4 x float> %590, float 1.000000e+00, i32 3 + %592 = insertelement <4 x float> zeroinitializer, float %571, i32 0 + %593 = insertelement <4 x float> %592, float 1.000000e+00, i32 1 + %594 = insertelement <4 x float> %593, float 1.000000e+00, i32 2 + %595 = insertelement <4 x float> %594, float 1.000000e+00, i32 3 + %596 = fmul <4 x float> %591, %595 + %597 = fmul <4 x float> %596, + %598 = fadd <4 x float> %597, + %599 = fadd <4 x float> %587, %598 + %600 = extractelement <4 x float> %599, i32 0 + %601 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + store float %600, float* %601, align 4 + %602 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + %603 = load float, float* %602, align 4 + %604 = insertelement <4 x float> zeroinitializer, float %603, i32 0 + %605 = insertelement <4 x float> %604, float 0.000000e+00, i32 1 + %606 = insertelement <4 x float> %605, float 0.000000e+00, i32 2 + %607 = insertelement <4 x float> %606, float 0.000000e+00, i32 3 + %608 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 + %609 = load float, float* %608, align 4 + %610 = insertelement <4 x float> zeroinitializer, float %609, i32 0 + %611 = insertelement <4 x float> %610, float 1.000000e+00, i32 1 + %612 = insertelement <4 x float> %611, float 1.000000e+00, i32 2 + %613 = insertelement <4 x float> %612, float 1.000000e+00, i32 3 + %614 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 + %615 = load float, float* %614, align 4 + %616 = insertelement <4 x float> zeroinitializer, float %615, i32 0 + %617 = insertelement <4 x float> %616, float 1.000000e+00, i32 1 + %618 = insertelement <4 x float> %617, float 1.000000e+00, i32 2 + %619 = insertelement <4 x float> %618, float 1.000000e+00, i32 3 + %620 = fmul <4 x float> %613, %619 + %621 = fmul <4 x float> %620, + %622 = fadd <4 x float> %621, + %623 = fadd <4 x float> %607, %622 + %624 = extractelement <4 x float> %623, i32 0 + %625 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + store float %624, float* %625, align 4 + %626 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + %627 = load float, float* %626, align 4 + %628 = insertelement <4 x float> zeroinitializer, float %627, i32 0 + %629 = insertelement <4 x float> %628, float 0.000000e+00, i32 1 + %630 = insertelement <4 x float> %629, float 0.000000e+00, i32 2 + %631 = insertelement <4 x float> %630, float 0.000000e+00, i32 3 + %632 = insertelement <4 x float> zeroinitializer, float %609, i32 0 + %633 = insertelement <4 x float> %632, float 1.000000e+00, i32 1 + %634 = insertelement <4 x float> %633, float 1.000000e+00, i32 2 + %635 = insertelement <4 x float> %634, float 1.000000e+00, i32 3 + %636 = insertelement <4 x float> zeroinitializer, float %615, i32 0 + %637 = insertelement <4 x float> %636, float 1.000000e+00, i32 1 + %638 = insertelement <4 x float> %637, float 1.000000e+00, i32 2 + %639 = insertelement <4 x float> %638, float 1.000000e+00, i32 3 + %640 = fmul <4 x float> %635, %639 + %641 = fmul <4 x float> %640, + %642 = fadd <4 x float> %641, + %643 = fadd <4 x float> %631, %642 + %644 = extractelement <4 x float> %643, i32 0 + %645 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + store float %644, float* %645, align 4 + %646 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 + %647 = load float, float* %646, align 4 + %648 = insertelement <4 x float> zeroinitializer, float %647, i32 0 + %649 = insertelement <4 x float> %648, float 0.000000e+00, i32 1 + %650 = insertelement <4 x float> %649, float 0.000000e+00, i32 2 + %651 = insertelement <4 x float> %650, float 0.000000e+00, i32 3 + %652 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 + %653 = load float, float* %652, align 4 + %654 = insertelement <4 x float> zeroinitializer, float %653, i32 0 + %655 = insertelement <4 x float> %654, float 1.000000e+00, i32 1 + %656 = insertelement <4 x float> %655, float 1.000000e+00, i32 2 + %657 = insertelement <4 x float> %656, float 1.000000e+00, i32 3 + %658 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 + %659 = load float, float* %658, align 4 + %660 = insertelement <4 x float> zeroinitializer, float %659, i32 0 + %661 = insertelement <4 x float> %660, float 1.000000e+00, i32 1 + %662 = insertelement <4 x float> %661, float 1.000000e+00, i32 2 + %663 = insertelement <4 x float> %662, float 1.000000e+00, i32 3 + %664 = fmul <4 x float> %657, %663 + %665 = fmul <4 x float> %664, + %666 = fadd <4 x float> %665, + %667 = fadd <4 x float> %651, %666 + %668 = extractelement <4 x float> %667, i32 0 + %669 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 + store float %668, float* %669, align 4 + %670 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 + %671 = load float, float* %670, align 4 + %672 = insertelement <4 x float> zeroinitializer, float %671, i32 0 + %673 = insertelement <4 x float> %672, float 0.000000e+00, i32 1 + %674 = insertelement <4 x float> %673, float 0.000000e+00, i32 2 + %675 = insertelement <4 x float> %674, float 0.000000e+00, i32 3 + %676 = insertelement <4 x float> zeroinitializer, float %653, i32 0 + %677 = insertelement <4 x float> %676, float 1.000000e+00, i32 1 + %678 = insertelement <4 x float> %677, float 1.000000e+00, i32 2 + %679 = insertelement <4 x float> %678, float 1.000000e+00, i32 3 + %680 = insertelement <4 x float> zeroinitializer, float %659, i32 0 + %681 = insertelement <4 x float> %680, float 1.000000e+00, i32 1 + %682 = insertelement <4 x float> %681, float 1.000000e+00, i32 2 + %683 = insertelement <4 x float> %682, float 1.000000e+00, i32 3 + %684 = fmul <4 x float> %679, %683 + %685 = fmul <4 x float> %684, + %686 = fadd <4 x float> %685, + %687 = fadd <4 x float> %675, %686 + %688 = extractelement <4 x float> %687, i32 0 + %689 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 + store float %688, float* %689, align 4 + %690 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 + %691 = load float, float* %690, align 4 + %692 = insertelement <4 x float> zeroinitializer, float %691, i32 0 + %693 = insertelement <4 x float> %692, float 0.000000e+00, i32 1 + %694 = insertelement <4 x float> %693, float 0.000000e+00, i32 2 + %695 = insertelement <4 x float> %694, float 0.000000e+00, i32 3 + %696 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 + %697 = load float, float* %696, align 4 + %698 = insertelement <4 x float> zeroinitializer, float %697, i32 0 + %699 = insertelement <4 x float> %698, float 1.000000e+00, i32 1 + %700 = insertelement <4 x float> %699, float 1.000000e+00, i32 2 + %701 = insertelement <4 x float> %700, float 1.000000e+00, i32 3 + %702 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 + %703 = load float, float* %702, align 4 + %704 = insertelement <4 x float> zeroinitializer, float %703, i32 0 + %705 = insertelement <4 x float> %704, float 1.000000e+00, i32 1 + %706 = insertelement <4 x float> %705, float 1.000000e+00, i32 2 + %707 = insertelement <4 x float> %706, float 1.000000e+00, i32 3 + %708 = fmul <4 x float> %701, %707 + %709 = fmul <4 x float> %708, + %710 = fadd <4 x float> %709, + %711 = fadd <4 x float> %695, %710 + %712 = extractelement <4 x float> %711, i32 0 + %713 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 + store float %712, float* %713, align 4 + %714 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 + %715 = load float, float* %714, align 4 + %716 = insertelement <4 x float> zeroinitializer, float %715, i32 0 + %717 = insertelement <4 x float> %716, float 0.000000e+00, i32 1 + %718 = insertelement <4 x float> %717, float 0.000000e+00, i32 2 + %719 = insertelement <4 x float> %718, float 0.000000e+00, i32 3 + %720 = insertelement <4 x float> zeroinitializer, float %697, i32 0 + %721 = insertelement <4 x float> %720, float 1.000000e+00, i32 1 + %722 = insertelement <4 x float> %721, float 1.000000e+00, i32 2 + %723 = insertelement <4 x float> %722, float 1.000000e+00, i32 3 + %724 = insertelement <4 x float> zeroinitializer, float %703, i32 0 + %725 = insertelement <4 x float> %724, float 1.000000e+00, i32 1 + %726 = insertelement <4 x float> %725, float 1.000000e+00, i32 2 + %727 = insertelement <4 x float> %726, float 1.000000e+00, i32 3 + %728 = fmul <4 x float> %723, %727 + %729 = fmul <4 x float> %728, + %730 = fadd <4 x float> %729, + %731 = fadd <4 x float> %719, %730 + %732 = extractelement <4 x float> %731, i32 0 + %733 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 + store float %732, float* %733, align 4 + %734 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 0 + %735 = load float, float* %734, align 4 + %736 = insertelement <4 x float> zeroinitializer, float %735, i32 0 + %737 = insertelement <4 x float> %736, float 0.000000e+00, i32 1 + %738 = insertelement <4 x float> %737, float 0.000000e+00, i32 2 + %739 = insertelement <4 x float> %738, float 0.000000e+00, i32 3 + %740 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 + %741 = load float, float* %740, align 4 + %742 = insertelement <4 x float> zeroinitializer, float %741, i32 0 + %743 = insertelement <4 x float> %742, float 1.000000e+00, i32 1 + %744 = insertelement <4 x float> %743, float 1.000000e+00, i32 2 + %745 = insertelement <4 x float> %744, float 1.000000e+00, i32 3 + %746 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 + %747 = load float, float* %746, align 4 + %748 = insertelement <4 x float> zeroinitializer, float %747, i32 0 + %749 = insertelement <4 x float> %748, float 1.000000e+00, i32 1 + %750 = insertelement <4 x float> %749, float 1.000000e+00, i32 2 + %751 = insertelement <4 x float> %750, float 1.000000e+00, i32 3 + %752 = fmul <4 x float> %745, %751 + %753 = fmul <4 x float> %752, + %754 = fadd <4 x float> %753, + %755 = fadd <4 x float> %739, %754 + %756 = extractelement <4 x float> %755, i32 0 + %757 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 0 + store float %756, float* %757, align 4 + %758 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 0 + %759 = load float, float* %758, align 4 + %760 = insertelement <4 x float> zeroinitializer, float %759, i32 0 + %761 = insertelement <4 x float> %760, float 0.000000e+00, i32 1 + %762 = insertelement <4 x float> %761, float 0.000000e+00, i32 2 + %763 = insertelement <4 x float> %762, float 0.000000e+00, i32 3 + %764 = insertelement <4 x float> zeroinitializer, float %741, i32 0 + %765 = insertelement <4 x float> %764, float 1.000000e+00, i32 1 + %766 = insertelement <4 x float> %765, float 1.000000e+00, i32 2 + %767 = insertelement <4 x float> %766, float 1.000000e+00, i32 3 + %768 = insertelement <4 x float> zeroinitializer, float %747, i32 0 + %769 = insertelement <4 x float> %768, float 1.000000e+00, i32 1 + %770 = insertelement <4 x float> %769, float 1.000000e+00, i32 2 + %771 = insertelement <4 x float> %770, float 1.000000e+00, i32 3 + %772 = fmul <4 x float> %767, %771 + %773 = fmul <4 x float> %772, + %774 = fadd <4 x float> %773, + %775 = fadd <4 x float> %763, %774 + %776 = extractelement <4 x float> %775, i32 0 + %777 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 0 + store float %776, float* %777, align 4 + %778 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 + %779 = load float, float* %778, align 4 + %780 = insertelement <4 x float> zeroinitializer, float %779, i32 0 + %781 = insertelement <4 x float> %780, float 0.000000e+00, i32 1 + %782 = insertelement <4 x float> %781, float 0.000000e+00, i32 2 + %783 = insertelement <4 x float> %782, float 0.000000e+00, i32 3 + %784 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 + %785 = load float, float* %784, align 4 + %786 = insertelement <4 x float> zeroinitializer, float %785, i32 0 + %787 = insertelement <4 x float> %786, float 1.000000e+00, i32 1 + %788 = insertelement <4 x float> %787, float 1.000000e+00, i32 2 + %789 = insertelement <4 x float> %788, float 1.000000e+00, i32 3 + %790 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 + %791 = load float, float* %790, align 4 + %792 = insertelement <4 x float> zeroinitializer, float %791, i32 0 + %793 = insertelement <4 x float> %792, float 1.000000e+00, i32 1 + %794 = insertelement <4 x float> %793, float 1.000000e+00, i32 2 + %795 = insertelement <4 x float> %794, float 1.000000e+00, i32 3 + %796 = fmul <4 x float> %789, %795 + %797 = fmul <4 x float> %796, + %798 = fadd <4 x float> %797, + %799 = fadd <4 x float> %783, %798 + %800 = extractelement <4 x float> %799, i32 0 + %801 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 + store float %800, float* %801, align 4 + %802 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 + %803 = load float, float* %802, align 4 + %804 = insertelement <4 x float> zeroinitializer, float %803, i32 0 + %805 = insertelement <4 x float> %804, float 0.000000e+00, i32 1 + %806 = insertelement <4 x float> %805, float 0.000000e+00, i32 2 + %807 = insertelement <4 x float> %806, float 0.000000e+00, i32 3 + %808 = insertelement <4 x float> zeroinitializer, float %785, i32 0 + %809 = insertelement <4 x float> %808, float 1.000000e+00, i32 1 + %810 = insertelement <4 x float> %809, float 1.000000e+00, i32 2 + %811 = insertelement <4 x float> %810, float 1.000000e+00, i32 3 + %812 = insertelement <4 x float> zeroinitializer, float %791, i32 0 + %813 = insertelement <4 x float> %812, float 1.000000e+00, i32 1 + %814 = insertelement <4 x float> %813, float 1.000000e+00, i32 2 + %815 = insertelement <4 x float> %814, float 1.000000e+00, i32 3 + %816 = fmul <4 x float> %811, %815 + %817 = fmul <4 x float> %816, + %818 = fadd <4 x float> %817, + %819 = fadd <4 x float> %807, %818 + %820 = extractelement <4 x float> %819, i32 0 + %821 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 + store float %820, float* %821, align 4 + %822 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 + %823 = load float, float* %822, align 4 + %824 = insertelement <4 x float> zeroinitializer, float %823, i32 0 + %825 = insertelement <4 x float> %824, float 0.000000e+00, i32 1 + %826 = insertelement <4 x float> %825, float 0.000000e+00, i32 2 + %827 = insertelement <4 x float> %826, float 0.000000e+00, i32 3 + %828 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 + %829 = load float, float* %828, align 4 + %830 = insertelement <4 x float> zeroinitializer, float %829, i32 0 + %831 = insertelement <4 x float> %830, float 1.000000e+00, i32 1 + %832 = insertelement <4 x float> %831, float 1.000000e+00, i32 2 + %833 = insertelement <4 x float> %832, float 1.000000e+00, i32 3 + %834 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 + %835 = load float, float* %834, align 4 + %836 = insertelement <4 x float> zeroinitializer, float %835, i32 0 + %837 = insertelement <4 x float> %836, float 1.000000e+00, i32 1 + %838 = insertelement <4 x float> %837, float 1.000000e+00, i32 2 + %839 = insertelement <4 x float> %838, float 1.000000e+00, i32 3 + %840 = fmul <4 x float> %833, %839 + %841 = fmul <4 x float> %840, + %842 = fadd <4 x float> %841, + %843 = fadd <4 x float> %827, %842 + %844 = extractelement <4 x float> %843, i32 0 + %845 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 + store float %844, float* %845, align 4 + %846 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 + %847 = load float, float* %846, align 4 + %848 = insertelement <4 x float> zeroinitializer, float %847, i32 0 + %849 = insertelement <4 x float> %848, float 0.000000e+00, i32 1 + %850 = insertelement <4 x float> %849, float 0.000000e+00, i32 2 + %851 = insertelement <4 x float> %850, float 0.000000e+00, i32 3 + %852 = insertelement <4 x float> zeroinitializer, float %829, i32 0 + %853 = insertelement <4 x float> %852, float 1.000000e+00, i32 1 + %854 = insertelement <4 x float> %853, float 1.000000e+00, i32 2 + %855 = insertelement <4 x float> %854, float 1.000000e+00, i32 3 + %856 = insertelement <4 x float> zeroinitializer, float %835, i32 0 + %857 = insertelement <4 x float> %856, float 1.000000e+00, i32 1 + %858 = insertelement <4 x float> %857, float 1.000000e+00, i32 2 + %859 = insertelement <4 x float> %858, float 1.000000e+00, i32 3 + %860 = fmul <4 x float> %855, %859 + %861 = fmul <4 x float> %860, + %862 = fadd <4 x float> %861, + %863 = fadd <4 x float> %851, %862 + %864 = extractelement <4 x float> %863, i32 0 + %865 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 + store float %864, float* %865, align 4 + %866 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 2 + %867 = load float, float* %866, align 4 + %868 = insertelement <4 x float> zeroinitializer, float %867, i32 0 + %869 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 2 + %870 = load float, float* %869, align 4 + %871 = insertelement <4 x float> %868, float %870, i32 1 + %872 = insertelement <4 x float> %871, float 0.000000e+00, i32 2 + %873 = insertelement <4 x float> %872, float 0.000000e+00, i32 3 + %874 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 + %875 = load float, float* %874, align 4 + %876 = insertelement <4 x float> zeroinitializer, float %875, i32 0 + %877 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 + %878 = load float, float* %877, align 4 + %879 = insertelement <4 x float> %876, float %878, i32 1 + %880 = insertelement <4 x float> %879, float 1.000000e+00, i32 2 + %881 = insertelement <4 x float> %880, float 1.000000e+00, i32 3 + %882 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 + %883 = load float, float* %882, align 4 + %884 = insertelement <4 x float> zeroinitializer, float %883, i32 0 + %885 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 + %886 = load float, float* %885, align 4 + %887 = insertelement <4 x float> %884, float %886, i32 1 + %888 = insertelement <4 x float> %887, float 1.000000e+00, i32 2 + %889 = insertelement <4 x float> %888, float 1.000000e+00, i32 3 + %890 = fmul <4 x float> %881, %889 + %891 = fmul <4 x float> %890, + %892 = fadd <4 x float> %891, + %893 = fadd <4 x float> %873, %892 + %894 = extractelement <4 x float> %893, i32 0 + %895 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 2 + store float %894, float* %895, align 4 + %896 = extractelement <4 x float> %893, i32 1 + %897 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 2 + store float %896, float* %897, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #0 { +.preheader: + %0 = alloca [2 x [2 x float]], align 16 + %1 = alloca [2 x [2 x float]], align 16 + %2 = alloca [3 x [3 x float]], align 16 + %3 = alloca [3 x [3 x float]], align 16 + %4 = bitcast [2 x [2 x float]]* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %4, i8* nonnull align 16 dereferenceable(16) bitcast ([2 x [2 x float]]* @__const.main.mat_in to i8*), i64 16, i1 false) + %5 = bitcast [2 x [2 x float]]* %1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %5, i8* nonnull align 16 dereferenceable(16) bitcast ([2 x [2 x float]]* @__const.main.f_in to i8*), i64 16, i1 false) + %6 = bitcast [3 x [3 x float]]* %2 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %6, i8 0, i64 36, i1 false) + %7 = bitcast [3 x [3 x float]]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %7, i8 0, i64 36, i1 false) + %8 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %0, i64 0, i64 0 + %9 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %1, i64 0, i64 0 + %10 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0 + %11 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0 + call void @load_use_twice([2 x float]* nonnull %8, [2 x float]* nonnull %9, [3 x float]* nonnull %10, [3 x float]* nonnull %11) + %12 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 0 + %13 = load float, float* %12, align 16 + %14 = fpext float %13 to double + %15 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %14) #4 + %16 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 0 + %17 = load float, float* %16, align 16 + %18 = fpext float %17 to double + %19 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %18) #4 + %20 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 1 + %21 = load float, float* %20, align 4 + %22 = fpext float %21 to double + %23 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %22) #4 + %24 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 1 + %25 = load float, float* %24, align 4 + %26 = fpext float %25 to double + %27 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %26) #4 + %28 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 2 + %29 = load float, float* %28, align 8 + %30 = fpext float %29 to double + %31 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %30) #4 + %32 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 2 + %33 = load float, float* %32, align 8 + %34 = fpext float %33 to double + %35 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %34) #4 + %36 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 0 + %37 = load float, float* %36, align 4 + %38 = fpext float %37 to double + %39 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %38) #4 + %40 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 0 + %41 = load float, float* %40, align 4 + %42 = fpext float %41 to double + %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %42) #4 + %44 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 1 + %45 = load float, float* %44, align 4 + %46 = fpext float %45 to double + %47 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %46) #4 + %48 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 1 + %49 = load float, float* %48, align 4 + %50 = fpext float %49 to double + %51 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %50) #4 + %52 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 2 + %53 = load float, float* %52, align 4 + %54 = fpext float %53 to double + %55 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %54) #4 + %56 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 2 + %57 = load float, float* %56, align 4 + %58 = fpext float %57 to double + %59 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %58) #4 + %60 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 0 + %61 = load float, float* %60, align 8 + %62 = fpext float %61 to double + %63 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %62) #4 + %64 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 0 + %65 = load float, float* %64, align 8 + %66 = fpext float %65 to double + %67 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %66) #4 + %68 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 1 + %69 = load float, float* %68, align 4 + %70 = fpext float %69 to double + %71 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %70) #4 + %72 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 1 + %73 = load float, float* %72, align 4 + %74 = fpext float %73 to double + %75 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %74) #4 + %76 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 2 + %77 = load float, float* %76, align 8 + %78 = fpext float %77 to double + %79 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %78) #4 + %80 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 2 + %81 = load float, float* %80, align 8 + %82 = fpext float %81 to double + %83 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %82) #4 + ret i32 0 +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 + +declare i32 @printf(i8*, ...) #3 + +attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind willreturn writeonly } +attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/llvm-tests/load_reuse.c b/src/dios-egraphs/Diospyros/llvm-tests/load_reuse.c new file mode 100644 index 00000000..b80b2403 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/load_reuse.c @@ -0,0 +1,70 @@ +#include + +#define I_ROWS 2 +#define I_COLS 2 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void load_use_twice(float mat_in[I_ROWS][I_COLS], float f_in[F_ROWS][F_COLS], + float mat_out[O_ROWS][O_COLS], + float mat_out2[O_ROWS][O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + mat_out[outRow][outCol] += + 3 * v - + 4; // try something to use v in a different way + mat_out2[outRow][outCol] += + 2 * v + + 1; // try something to use v in a different way + } + } + } + } + } +} + +int main(void) { + float mat_in[I_ROWS][I_COLS] = {{1, 2}, {3, 4}}; + float f_in[F_ROWS][F_COLS] = {{1, 1}, {1, 1}}; + float mat_out[O_ROWS][O_COLS] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; + float mat_out2[O_ROWS][O_COLS] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; + load_use_twice(mat_in, f_in, mat_out, mat_out2); + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + printf("output: %f\n", mat_out[i][j]); + printf("output: %f\n", mat_out2[i][j]); + } + } +// output: -1.000000 +// output: 3.000000 +// output: 1.000000 +// output: 8.000000 +// output: 2.000000 +// output: 5.000000 +// output: 4.000000 +// output: 10.000000 +// output: 14.000000 +// output: 24.000000 +// output: 10.000000 +// output: 14.000000 +// output: 5.000000 +// output: 7.000000 +// output: 13.000000 +// output: 16.000000 +// output: 8.000000 +// output: 9.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/opt.ll b/src/dios-egraphs/Diospyros/opt.ll new file mode 100644 index 00000000..745e1062 --- /dev/null +++ b/src/dios-egraphs/Diospyros/opt.ll @@ -0,0 +1,364 @@ +; ModuleID = 'clang.ll' +source_filename = "llvm-tests/load_reuse.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@__const.main.mat_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 2.000000e+00], [2 x float] [float 3.000000e+00, float 4.000000e+00]], align 16 +@__const.main.f_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 1.000000e+00], [2 x float] [float 1.000000e+00, float 1.000000e+00]], align 16 +@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 + +; Function Attrs: noinline nounwind ssp uwtable +define void @load_use_twice([2 x float]* %0, [2 x float]* %1, [3 x float]* %2, [3 x float]* %3) #0 { +.preheader7: + %4 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 0 + %5 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 0 + %6 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 + %7 = load float, float* %6, align 4 + %8 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 + %9 = load float, float* %8, align 4 + %10 = fmul float %7, %9 + %11 = fmul float %10, 3.000000e+00 + %12 = fadd float %11, -4.000000e+00 + %13 = load float, float* %4, align 4 + %14 = fadd float %13, %12 + store float %14, float* %4, align 4 + %15 = fmul float %10, 2.000000e+00 + %16 = fadd float %15, 1.000000e+00 + %17 = load float, float* %5, align 4 + %18 = fadd float %17, %16 + store float %18, float* %5, align 4 + %19 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 + %20 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 + %21 = load float, float* %6, align 4 + %22 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 + %23 = load float, float* %22, align 4 + %24 = fmul float %21, %23 + %25 = fmul float %24, 3.000000e+00 + %26 = fadd float %25, -4.000000e+00 + %27 = load float, float* %19, align 4 + %28 = fadd float %27, %26 + store float %28, float* %19, align 4 + %29 = fmul float %24, 2.000000e+00 + %30 = fadd float %29, 1.000000e+00 + %31 = load float, float* %20, align 4 + %32 = fadd float %31, %30 + store float %32, float* %20, align 4 + %33 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 + %34 = load float, float* %33, align 4 + %35 = load float, float* %8, align 4 + %36 = fmul float %34, %35 + %37 = fmul float %36, 3.000000e+00 + %38 = fadd float %37, -4.000000e+00 + %39 = load float, float* %19, align 4 + %40 = fadd float %39, %38 + store float %40, float* %19, align 4 + %41 = fmul float %36, 2.000000e+00 + %42 = fadd float %41, 1.000000e+00 + %43 = load float, float* %20, align 4 + %44 = fadd float %43, %42 + store float %44, float* %20, align 4 + %45 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 2 + %46 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 2 + %47 = load float, float* %33, align 4 + %48 = load float, float* %22, align 4 + %49 = fmul float %47, %48 + %50 = fmul float %49, 3.000000e+00 + %51 = fadd float %50, -4.000000e+00 + %52 = load float, float* %45, align 4 + %53 = fadd float %52, %51 + store float %53, float* %45, align 4 + %54 = fmul float %49, 2.000000e+00 + %55 = fadd float %54, 1.000000e+00 + %56 = load float, float* %46, align 4 + %57 = fadd float %56, %55 + store float %57, float* %46, align 4 + %58 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 + %59 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 + %60 = load float, float* %6, align 4 + %61 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 + %62 = load float, float* %61, align 4 + %63 = fmul float %60, %62 + %64 = fmul float %63, 3.000000e+00 + %65 = fadd float %64, -4.000000e+00 + %66 = load float, float* %58, align 4 + %67 = fadd float %66, %65 + store float %67, float* %58, align 4 + %68 = fmul float %63, 2.000000e+00 + %69 = fadd float %68, 1.000000e+00 + %70 = load float, float* %59, align 4 + %71 = fadd float %70, %69 + store float %71, float* %59, align 4 + %72 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 + %73 = load float, float* %72, align 4 + %74 = load float, float* %8, align 4 + %75 = fmul float %73, %74 + %76 = fmul float %75, 3.000000e+00 + %77 = fadd float %76, -4.000000e+00 + %78 = load float, float* %58, align 4 + %79 = fadd float %78, %77 + store float %79, float* %58, align 4 + %80 = fmul float %75, 2.000000e+00 + %81 = fadd float %80, 1.000000e+00 + %82 = load float, float* %59, align 4 + %83 = fadd float %82, %81 + store float %83, float* %59, align 4 + %84 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 + %85 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 + %86 = load float, float* %6, align 4 + %87 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 + %88 = load float, float* %87, align 4 + %89 = fmul float %86, %88 + %90 = fmul float %89, 3.000000e+00 + %91 = fadd float %90, -4.000000e+00 + %92 = load float, float* %84, align 4 + %93 = fadd float %92, %91 + store float %93, float* %84, align 4 + %94 = fmul float %89, 2.000000e+00 + %95 = fadd float %94, 1.000000e+00 + %96 = load float, float* %85, align 4 + %97 = fadd float %96, %95 + store float %97, float* %85, align 4 + %98 = load float, float* %33, align 4 + %99 = load float, float* %61, align 4 + %100 = fmul float %98, %99 + %101 = fmul float %100, 3.000000e+00 + %102 = fadd float %101, -4.000000e+00 + %103 = load float, float* %84, align 4 + %104 = fadd float %103, %102 + store float %104, float* %84, align 4 + %105 = fmul float %100, 2.000000e+00 + %106 = fadd float %105, 1.000000e+00 + %107 = load float, float* %85, align 4 + %108 = fadd float %107, %106 + store float %108, float* %85, align 4 + %109 = load float, float* %72, align 4 + %110 = load float, float* %22, align 4 + %111 = fmul float %109, %110 + %112 = fmul float %111, 3.000000e+00 + %113 = fadd float %112, -4.000000e+00 + %114 = load float, float* %84, align 4 + %115 = fadd float %114, %113 + store float %115, float* %84, align 4 + %116 = fmul float %111, 2.000000e+00 + %117 = fadd float %116, 1.000000e+00 + %118 = load float, float* %85, align 4 + %119 = fadd float %118, %117 + store float %119, float* %85, align 4 + %120 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 + %121 = load float, float* %120, align 4 + %122 = load float, float* %8, align 4 + %123 = fmul float %121, %122 + %124 = fmul float %123, 3.000000e+00 + %125 = fadd float %124, -4.000000e+00 + %126 = load float, float* %84, align 4 + %127 = fadd float %126, %125 + store float %127, float* %84, align 4 + %128 = fmul float %123, 2.000000e+00 + %129 = fadd float %128, 1.000000e+00 + %130 = load float, float* %85, align 4 + %131 = fadd float %130, %129 + store float %131, float* %85, align 4 + %132 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 + %133 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 + %134 = load float, float* %33, align 4 + %135 = load float, float* %87, align 4 + %136 = fmul float %134, %135 + %137 = fmul float %136, 3.000000e+00 + %138 = fadd float %137, -4.000000e+00 + %139 = load float, float* %132, align 4 + %140 = fadd float %139, %138 + store float %140, float* %132, align 4 + %141 = fmul float %136, 2.000000e+00 + %142 = fadd float %141, 1.000000e+00 + %143 = load float, float* %133, align 4 + %144 = fadd float %143, %142 + store float %144, float* %133, align 4 + %145 = load float, float* %120, align 4 + %146 = load float, float* %22, align 4 + %147 = fmul float %145, %146 + %148 = fmul float %147, 3.000000e+00 + %149 = fadd float %148, -4.000000e+00 + %150 = load float, float* %132, align 4 + %151 = fadd float %150, %149 + store float %151, float* %132, align 4 + %152 = fmul float %147, 2.000000e+00 + %153 = fadd float %152, 1.000000e+00 + %154 = load float, float* %133, align 4 + %155 = fadd float %154, %153 + store float %155, float* %133, align 4 + %156 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 0 + %157 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 0 + %158 = load float, float* %72, align 4 + %159 = load float, float* %61, align 4 + %160 = fmul float %158, %159 + %161 = fmul float %160, 3.000000e+00 + %162 = fadd float %161, -4.000000e+00 + %163 = load float, float* %156, align 4 + %164 = fadd float %163, %162 + store float %164, float* %156, align 4 + %165 = fmul float %160, 2.000000e+00 + %166 = fadd float %165, 1.000000e+00 + %167 = load float, float* %157, align 4 + %168 = fadd float %167, %166 + store float %168, float* %157, align 4 + %169 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 + %170 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 + %171 = load float, float* %72, align 4 + %172 = load float, float* %87, align 4 + %173 = fmul float %171, %172 + %174 = fmul float %173, 3.000000e+00 + %175 = fadd float %174, -4.000000e+00 + %176 = load float, float* %169, align 4 + %177 = fadd float %176, %175 + store float %177, float* %169, align 4 + %178 = fmul float %173, 2.000000e+00 + %179 = fadd float %178, 1.000000e+00 + %180 = load float, float* %170, align 4 + %181 = fadd float %180, %179 + store float %181, float* %170, align 4 + %182 = load float, float* %120, align 4 + %183 = load float, float* %61, align 4 + %184 = fmul float %182, %183 + %185 = fmul float %184, 3.000000e+00 + %186 = fadd float %185, -4.000000e+00 + %187 = load float, float* %169, align 4 + %188 = fadd float %187, %186 + store float %188, float* %169, align 4 + %189 = fmul float %184, 2.000000e+00 + %190 = fadd float %189, 1.000000e+00 + %191 = load float, float* %170, align 4 + %192 = fadd float %191, %190 + store float %192, float* %170, align 4 + %193 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 2 + %194 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 2 + %195 = load float, float* %120, align 4 + %196 = load float, float* %87, align 4 + %197 = fmul float %195, %196 + %198 = fmul float %197, 3.000000e+00 + %199 = fadd float %198, -4.000000e+00 + %200 = load float, float* %193, align 4 + %201 = fadd float %200, %199 + store float %201, float* %193, align 4 + %202 = fmul float %197, 2.000000e+00 + %203 = fadd float %202, 1.000000e+00 + %204 = load float, float* %194, align 4 + %205 = fadd float %204, %203 + store float %205, float* %194, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #0 { +.preheader: + %0 = alloca [2 x [2 x float]], align 16 + %1 = alloca [2 x [2 x float]], align 16 + %2 = alloca [3 x [3 x float]], align 16 + %3 = alloca [3 x [3 x float]], align 16 + %4 = bitcast [2 x [2 x float]]* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %4, i8* nonnull align 16 dereferenceable(16) bitcast ([2 x [2 x float]]* @__const.main.mat_in to i8*), i64 16, i1 false) + %5 = bitcast [2 x [2 x float]]* %1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %5, i8* nonnull align 16 dereferenceable(16) bitcast ([2 x [2 x float]]* @__const.main.f_in to i8*), i64 16, i1 false) + %6 = bitcast [3 x [3 x float]]* %2 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %6, i8 0, i64 36, i1 false) + %7 = bitcast [3 x [3 x float]]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %7, i8 0, i64 36, i1 false) + %8 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %0, i64 0, i64 0 + %9 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %1, i64 0, i64 0 + %10 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0 + %11 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0 + call void @load_use_twice([2 x float]* nonnull %8, [2 x float]* nonnull %9, [3 x float]* nonnull %10, [3 x float]* nonnull %11) + %12 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 0 + %13 = load float, float* %12, align 16 + %14 = fpext float %13 to double + %15 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %14) #4 + %16 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 0 + %17 = load float, float* %16, align 16 + %18 = fpext float %17 to double + %19 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %18) #4 + %20 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 1 + %21 = load float, float* %20, align 4 + %22 = fpext float %21 to double + %23 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %22) #4 + %24 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 1 + %25 = load float, float* %24, align 4 + %26 = fpext float %25 to double + %27 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %26) #4 + %28 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 2 + %29 = load float, float* %28, align 8 + %30 = fpext float %29 to double + %31 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %30) #4 + %32 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 2 + %33 = load float, float* %32, align 8 + %34 = fpext float %33 to double + %35 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %34) #4 + %36 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 0 + %37 = load float, float* %36, align 4 + %38 = fpext float %37 to double + %39 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %38) #4 + %40 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 0 + %41 = load float, float* %40, align 4 + %42 = fpext float %41 to double + %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %42) #4 + %44 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 1 + %45 = load float, float* %44, align 4 + %46 = fpext float %45 to double + %47 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %46) #4 + %48 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 1 + %49 = load float, float* %48, align 4 + %50 = fpext float %49 to double + %51 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %50) #4 + %52 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 2 + %53 = load float, float* %52, align 4 + %54 = fpext float %53 to double + %55 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %54) #4 + %56 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 2 + %57 = load float, float* %56, align 4 + %58 = fpext float %57 to double + %59 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %58) #4 + %60 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 0 + %61 = load float, float* %60, align 8 + %62 = fpext float %61 to double + %63 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %62) #4 + %64 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 0 + %65 = load float, float* %64, align 8 + %66 = fpext float %65 to double + %67 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %66) #4 + %68 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 1 + %69 = load float, float* %68, align 4 + %70 = fpext float %69 to double + %71 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %70) #4 + %72 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 1 + %73 = load float, float* %72, align 4 + %74 = fpext float %73 to double + %75 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %74) #4 + %76 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 2 + %77 = load float, float* %76, align 8 + %78 = fpext float %77 to double + %79 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %78) #4 + %80 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 2 + %81 = load float, float* %80, align 8 + %82 = fpext float %81 to double + %83 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %82) #4 + ret i32 0 +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 + +declare i32 @printf(i8*, ...) #3 + +attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind willreturn writeonly } +attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} From 54da95ef93574c998fdb7b1368c0c4fd7de0dec6 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Mon, 13 Dec 2021 03:29:50 -0500 Subject: [PATCH 003/143] remove ll --- src/dios-egraphs/Diospyros/aa.ll | 364 -------- src/dios-egraphs/Diospyros/clang.ll | 283 ------ src/dios-egraphs/Diospyros/dce.ll | 854 ------------------ src/dios-egraphs/Diospyros/diospyros.ll | 1056 ----------------------- src/dios-egraphs/Diospyros/opt.ll | 364 -------- 5 files changed, 2921 deletions(-) delete mode 100644 src/dios-egraphs/Diospyros/aa.ll delete mode 100644 src/dios-egraphs/Diospyros/clang.ll delete mode 100644 src/dios-egraphs/Diospyros/dce.ll delete mode 100644 src/dios-egraphs/Diospyros/diospyros.ll delete mode 100644 src/dios-egraphs/Diospyros/opt.ll diff --git a/src/dios-egraphs/Diospyros/aa.ll b/src/dios-egraphs/Diospyros/aa.ll deleted file mode 100644 index df32c909..00000000 --- a/src/dios-egraphs/Diospyros/aa.ll +++ /dev/null @@ -1,364 +0,0 @@ -; ModuleID = 'opt.ll' -source_filename = "llvm-tests/load_reuse.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@__const.main.mat_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 2.000000e+00], [2 x float] [float 3.000000e+00, float 4.000000e+00]], align 16 -@__const.main.f_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 1.000000e+00], [2 x float] [float 1.000000e+00, float 1.000000e+00]], align 16 -@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 - -; Function Attrs: noinline nounwind ssp uwtable -define void @load_use_twice([2 x float]* %0, [2 x float]* %1, [3 x float]* %2, [3 x float]* %3) #0 { -.preheader7: - %4 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 0 - %5 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 0 - %6 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 - %7 = load float, float* %6, align 4 - %8 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 - %9 = load float, float* %8, align 4 - %10 = fmul float %7, %9 - %11 = fmul float %10, 3.000000e+00 - %12 = fadd float %11, -4.000000e+00 - %13 = load float, float* %4, align 4 - %14 = fadd float %13, %12 - store float %14, float* %4, align 4 - %15 = fmul float %10, 2.000000e+00 - %16 = fadd float %15, 1.000000e+00 - %17 = load float, float* %5, align 4 - %18 = fadd float %17, %16 - store float %18, float* %5, align 4 - %19 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 - %20 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 - %21 = load float, float* %6, align 4 - %22 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 - %23 = load float, float* %22, align 4 - %24 = fmul float %21, %23 - %25 = fmul float %24, 3.000000e+00 - %26 = fadd float %25, -4.000000e+00 - %27 = load float, float* %19, align 4 - %28 = fadd float %27, %26 - store float %28, float* %19, align 4 - %29 = fmul float %24, 2.000000e+00 - %30 = fadd float %29, 1.000000e+00 - %31 = load float, float* %20, align 4 - %32 = fadd float %31, %30 - store float %32, float* %20, align 4 - %33 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 - %34 = load float, float* %33, align 4 - %35 = load float, float* %8, align 4 - %36 = fmul float %34, %35 - %37 = fmul float %36, 3.000000e+00 - %38 = fadd float %37, -4.000000e+00 - %39 = load float, float* %19, align 4 - %40 = fadd float %39, %38 - store float %40, float* %19, align 4 - %41 = fmul float %36, 2.000000e+00 - %42 = fadd float %41, 1.000000e+00 - %43 = load float, float* %20, align 4 - %44 = fadd float %43, %42 - store float %44, float* %20, align 4 - %45 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 2 - %46 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 2 - %47 = load float, float* %33, align 4 - %48 = load float, float* %22, align 4 - %49 = fmul float %47, %48 - %50 = fmul float %49, 3.000000e+00 - %51 = fadd float %50, -4.000000e+00 - %52 = load float, float* %45, align 4 - %53 = fadd float %52, %51 - store float %53, float* %45, align 4 - %54 = fmul float %49, 2.000000e+00 - %55 = fadd float %54, 1.000000e+00 - %56 = load float, float* %46, align 4 - %57 = fadd float %56, %55 - store float %57, float* %46, align 4 - %58 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 - %59 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 - %60 = load float, float* %6, align 4 - %61 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 - %62 = load float, float* %61, align 4 - %63 = fmul float %60, %62 - %64 = fmul float %63, 3.000000e+00 - %65 = fadd float %64, -4.000000e+00 - %66 = load float, float* %58, align 4 - %67 = fadd float %66, %65 - store float %67, float* %58, align 4 - %68 = fmul float %63, 2.000000e+00 - %69 = fadd float %68, 1.000000e+00 - %70 = load float, float* %59, align 4 - %71 = fadd float %70, %69 - store float %71, float* %59, align 4 - %72 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 - %73 = load float, float* %72, align 4 - %74 = load float, float* %8, align 4 - %75 = fmul float %73, %74 - %76 = fmul float %75, 3.000000e+00 - %77 = fadd float %76, -4.000000e+00 - %78 = load float, float* %58, align 4 - %79 = fadd float %78, %77 - store float %79, float* %58, align 4 - %80 = fmul float %75, 2.000000e+00 - %81 = fadd float %80, 1.000000e+00 - %82 = load float, float* %59, align 4 - %83 = fadd float %82, %81 - store float %83, float* %59, align 4 - %84 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - %85 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - %86 = load float, float* %6, align 4 - %87 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 - %88 = load float, float* %87, align 4 - %89 = fmul float %86, %88 - %90 = fmul float %89, 3.000000e+00 - %91 = fadd float %90, -4.000000e+00 - %92 = load float, float* %84, align 4 - %93 = fadd float %92, %91 - store float %93, float* %84, align 4 - %94 = fmul float %89, 2.000000e+00 - %95 = fadd float %94, 1.000000e+00 - %96 = load float, float* %85, align 4 - %97 = fadd float %96, %95 - store float %97, float* %85, align 4 - %98 = load float, float* %33, align 4 - %99 = load float, float* %61, align 4 - %100 = fmul float %98, %99 - %101 = fmul float %100, 3.000000e+00 - %102 = fadd float %101, -4.000000e+00 - %103 = load float, float* %84, align 4 - %104 = fadd float %103, %102 - store float %104, float* %84, align 4 - %105 = fmul float %100, 2.000000e+00 - %106 = fadd float %105, 1.000000e+00 - %107 = load float, float* %85, align 4 - %108 = fadd float %107, %106 - store float %108, float* %85, align 4 - %109 = load float, float* %72, align 4 - %110 = load float, float* %22, align 4 - %111 = fmul float %109, %110 - %112 = fmul float %111, 3.000000e+00 - %113 = fadd float %112, -4.000000e+00 - %114 = load float, float* %84, align 4 - %115 = fadd float %114, %113 - store float %115, float* %84, align 4 - %116 = fmul float %111, 2.000000e+00 - %117 = fadd float %116, 1.000000e+00 - %118 = load float, float* %85, align 4 - %119 = fadd float %118, %117 - store float %119, float* %85, align 4 - %120 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 - %121 = load float, float* %120, align 4 - %122 = load float, float* %8, align 4 - %123 = fmul float %121, %122 - %124 = fmul float %123, 3.000000e+00 - %125 = fadd float %124, -4.000000e+00 - %126 = load float, float* %84, align 4 - %127 = fadd float %126, %125 - store float %127, float* %84, align 4 - %128 = fmul float %123, 2.000000e+00 - %129 = fadd float %128, 1.000000e+00 - %130 = load float, float* %85, align 4 - %131 = fadd float %130, %129 - store float %131, float* %85, align 4 - %132 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 - %133 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 - %134 = load float, float* %33, align 4 - %135 = load float, float* %87, align 4 - %136 = fmul float %134, %135 - %137 = fmul float %136, 3.000000e+00 - %138 = fadd float %137, -4.000000e+00 - %139 = load float, float* %132, align 4 - %140 = fadd float %139, %138 - store float %140, float* %132, align 4 - %141 = fmul float %136, 2.000000e+00 - %142 = fadd float %141, 1.000000e+00 - %143 = load float, float* %133, align 4 - %144 = fadd float %143, %142 - store float %144, float* %133, align 4 - %145 = load float, float* %120, align 4 - %146 = load float, float* %22, align 4 - %147 = fmul float %145, %146 - %148 = fmul float %147, 3.000000e+00 - %149 = fadd float %148, -4.000000e+00 - %150 = load float, float* %132, align 4 - %151 = fadd float %150, %149 - store float %151, float* %132, align 4 - %152 = fmul float %147, 2.000000e+00 - %153 = fadd float %152, 1.000000e+00 - %154 = load float, float* %133, align 4 - %155 = fadd float %154, %153 - store float %155, float* %133, align 4 - %156 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 0 - %157 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 0 - %158 = load float, float* %72, align 4 - %159 = load float, float* %61, align 4 - %160 = fmul float %158, %159 - %161 = fmul float %160, 3.000000e+00 - %162 = fadd float %161, -4.000000e+00 - %163 = load float, float* %156, align 4 - %164 = fadd float %163, %162 - store float %164, float* %156, align 4 - %165 = fmul float %160, 2.000000e+00 - %166 = fadd float %165, 1.000000e+00 - %167 = load float, float* %157, align 4 - %168 = fadd float %167, %166 - store float %168, float* %157, align 4 - %169 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 - %170 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 - %171 = load float, float* %72, align 4 - %172 = load float, float* %87, align 4 - %173 = fmul float %171, %172 - %174 = fmul float %173, 3.000000e+00 - %175 = fadd float %174, -4.000000e+00 - %176 = load float, float* %169, align 4 - %177 = fadd float %176, %175 - store float %177, float* %169, align 4 - %178 = fmul float %173, 2.000000e+00 - %179 = fadd float %178, 1.000000e+00 - %180 = load float, float* %170, align 4 - %181 = fadd float %180, %179 - store float %181, float* %170, align 4 - %182 = load float, float* %120, align 4 - %183 = load float, float* %61, align 4 - %184 = fmul float %182, %183 - %185 = fmul float %184, 3.000000e+00 - %186 = fadd float %185, -4.000000e+00 - %187 = load float, float* %169, align 4 - %188 = fadd float %187, %186 - store float %188, float* %169, align 4 - %189 = fmul float %184, 2.000000e+00 - %190 = fadd float %189, 1.000000e+00 - %191 = load float, float* %170, align 4 - %192 = fadd float %191, %190 - store float %192, float* %170, align 4 - %193 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 2 - %194 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 2 - %195 = load float, float* %120, align 4 - %196 = load float, float* %87, align 4 - %197 = fmul float %195, %196 - %198 = fmul float %197, 3.000000e+00 - %199 = fadd float %198, -4.000000e+00 - %200 = load float, float* %193, align 4 - %201 = fadd float %200, %199 - store float %201, float* %193, align 4 - %202 = fmul float %197, 2.000000e+00 - %203 = fadd float %202, 1.000000e+00 - %204 = load float, float* %194, align 4 - %205 = fadd float %204, %203 - store float %205, float* %194, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #0 { -.preheader: - %0 = alloca [2 x [2 x float]], align 16 - %1 = alloca [2 x [2 x float]], align 16 - %2 = alloca [3 x [3 x float]], align 16 - %3 = alloca [3 x [3 x float]], align 16 - %4 = bitcast [2 x [2 x float]]* %0 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %4, i8* nonnull align 16 dereferenceable(16) bitcast ([2 x [2 x float]]* @__const.main.mat_in to i8*), i64 16, i1 false) - %5 = bitcast [2 x [2 x float]]* %1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %5, i8* nonnull align 16 dereferenceable(16) bitcast ([2 x [2 x float]]* @__const.main.f_in to i8*), i64 16, i1 false) - %6 = bitcast [3 x [3 x float]]* %2 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %6, i8 0, i64 36, i1 false) - %7 = bitcast [3 x [3 x float]]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %7, i8 0, i64 36, i1 false) - %8 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %0, i64 0, i64 0 - %9 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %1, i64 0, i64 0 - %10 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0 - %11 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0 - call void @load_use_twice([2 x float]* nonnull %8, [2 x float]* nonnull %9, [3 x float]* nonnull %10, [3 x float]* nonnull %11) - %12 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 0 - %13 = load float, float* %12, align 16 - %14 = fpext float %13 to double - %15 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %14) #4 - %16 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 0 - %17 = load float, float* %16, align 16 - %18 = fpext float %17 to double - %19 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %18) #4 - %20 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 1 - %21 = load float, float* %20, align 4 - %22 = fpext float %21 to double - %23 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %22) #4 - %24 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 1 - %25 = load float, float* %24, align 4 - %26 = fpext float %25 to double - %27 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %26) #4 - %28 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 2 - %29 = load float, float* %28, align 8 - %30 = fpext float %29 to double - %31 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %30) #4 - %32 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 2 - %33 = load float, float* %32, align 8 - %34 = fpext float %33 to double - %35 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %34) #4 - %36 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 0 - %37 = load float, float* %36, align 4 - %38 = fpext float %37 to double - %39 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %38) #4 - %40 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 0 - %41 = load float, float* %40, align 4 - %42 = fpext float %41 to double - %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %42) #4 - %44 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 1 - %45 = load float, float* %44, align 4 - %46 = fpext float %45 to double - %47 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %46) #4 - %48 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 1 - %49 = load float, float* %48, align 4 - %50 = fpext float %49 to double - %51 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %50) #4 - %52 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 2 - %53 = load float, float* %52, align 4 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %54) #4 - %56 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 2 - %57 = load float, float* %56, align 4 - %58 = fpext float %57 to double - %59 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %58) #4 - %60 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 0 - %61 = load float, float* %60, align 8 - %62 = fpext float %61 to double - %63 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %62) #4 - %64 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 0 - %65 = load float, float* %64, align 8 - %66 = fpext float %65 to double - %67 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %66) #4 - %68 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 1 - %69 = load float, float* %68, align 4 - %70 = fpext float %69 to double - %71 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %70) #4 - %72 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 1 - %73 = load float, float* %72, align 4 - %74 = fpext float %73 to double - %75 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %74) #4 - %76 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 2 - %77 = load float, float* %76, align 8 - %78 = fpext float %77 to double - %79 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %78) #4 - %80 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 2 - %81 = load float, float* %80, align 8 - %82 = fpext float %81 to double - %83 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %82) #4 - ret i32 0 -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 - -declare i32 @printf(i8*, ...) #3 - -attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { argmemonly nounwind willreturn writeonly } -attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/clang.ll b/src/dios-egraphs/Diospyros/clang.ll deleted file mode 100644 index fd259ce5..00000000 --- a/src/dios-egraphs/Diospyros/clang.ll +++ /dev/null @@ -1,283 +0,0 @@ -; ModuleID = 'llvm-tests/load_reuse.c' -source_filename = "llvm-tests/load_reuse.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@__const.main.mat_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 2.000000e+00], [2 x float] [float 3.000000e+00, float 4.000000e+00]], align 16 -@__const.main.f_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 1.000000e+00], [2 x float] [float 1.000000e+00, float 1.000000e+00]], align 16 -@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 - -; Function Attrs: noinline nounwind ssp uwtable -define void @load_use_twice([2 x float]* %0, [2 x float]* %1, [3 x float]* %2, [3 x float]* %3) #0 { - %5 = alloca [2 x float]*, align 8 - %6 = alloca [2 x float]*, align 8 - %7 = alloca [3 x float]*, align 8 - %8 = alloca [3 x float]*, align 8 - %9 = alloca i32, align 4 - %10 = alloca i32, align 4 - %11 = alloca i32, align 4 - %12 = alloca i32, align 4 - %13 = alloca i32, align 4 - %14 = alloca i32, align 4 - %15 = alloca i32, align 4 - %16 = alloca i32, align 4 - %17 = alloca float, align 4 - store [2 x float]* %0, [2 x float]** %5, align 8 - store [2 x float]* %1, [2 x float]** %6, align 8 - store [3 x float]* %2, [3 x float]** %7, align 8 - store [3 x float]* %3, [3 x float]** %8, align 8 - store i32 0, i32* %9, align 4 - br label %18 - -18: ; preds = %110, %4 - %19 = load i32, i32* %9, align 4 - %20 = icmp slt i32 %19, 3 - br i1 %20, label %21, label %113 - -21: ; preds = %18 - store i32 0, i32* %10, align 4 - br label %22 - -22: ; preds = %106, %21 - %23 = load i32, i32* %10, align 4 - %24 = icmp slt i32 %23, 3 - br i1 %24, label %25, label %109 - -25: ; preds = %22 - store i32 0, i32* %11, align 4 - br label %26 - -26: ; preds = %102, %25 - %27 = load i32, i32* %11, align 4 - %28 = icmp slt i32 %27, 2 - br i1 %28, label %29, label %105 - -29: ; preds = %26 - store i32 0, i32* %12, align 4 - br label %30 - -30: ; preds = %98, %29 - %31 = load i32, i32* %12, align 4 - %32 = icmp slt i32 %31, 2 - br i1 %32, label %33, label %101 - -33: ; preds = %30 - %34 = load i32, i32* %11, align 4 - %35 = sub nsw i32 1, %34 - store i32 %35, i32* %13, align 4 - %36 = load i32, i32* %12, align 4 - %37 = sub nsw i32 1, %36 - store i32 %37, i32* %14, align 4 - %38 = load i32, i32* %9, align 4 - %39 = load i32, i32* %13, align 4 - %40 = sub nsw i32 %38, %39 - store i32 %40, i32* %15, align 4 - %41 = load i32, i32* %10, align 4 - %42 = load i32, i32* %14, align 4 - %43 = sub nsw i32 %41, %42 - store i32 %43, i32* %16, align 4 - %44 = load i32, i32* %15, align 4 - %45 = icmp sge i32 %44, 0 - br i1 %45, label %46, label %97 - -46: ; preds = %33 - %47 = load i32, i32* %15, align 4 - %48 = icmp slt i32 %47, 2 - br i1 %48, label %49, label %97 - -49: ; preds = %46 - %50 = load i32, i32* %16, align 4 - %51 = icmp sge i32 %50, 0 - br i1 %51, label %52, label %97 - -52: ; preds = %49 - %53 = load i32, i32* %16, align 4 - %54 = icmp slt i32 %53, 2 - br i1 %54, label %55, label %97 - -55: ; preds = %52 - %56 = load [2 x float]*, [2 x float]** %5, align 8 - %57 = load i32, i32* %15, align 4 - %58 = sext i32 %57 to i64 - %59 = getelementptr inbounds [2 x float], [2 x float]* %56, i64 %58 - %60 = load i32, i32* %16, align 4 - %61 = sext i32 %60 to i64 - %62 = getelementptr inbounds [2 x float], [2 x float]* %59, i64 0, i64 %61 - %63 = load float, float* %62, align 4 - %64 = load [2 x float]*, [2 x float]** %6, align 8 - %65 = load i32, i32* %13, align 4 - %66 = sext i32 %65 to i64 - %67 = getelementptr inbounds [2 x float], [2 x float]* %64, i64 %66 - %68 = load i32, i32* %14, align 4 - %69 = sext i32 %68 to i64 - %70 = getelementptr inbounds [2 x float], [2 x float]* %67, i64 0, i64 %69 - %71 = load float, float* %70, align 4 - %72 = fmul float %63, %71 - store float %72, float* %17, align 4 - %73 = load float, float* %17, align 4 - %74 = fmul float 3.000000e+00, %73 - %75 = fsub float %74, 4.000000e+00 - %76 = load [3 x float]*, [3 x float]** %7, align 8 - %77 = load i32, i32* %9, align 4 - %78 = sext i32 %77 to i64 - %79 = getelementptr inbounds [3 x float], [3 x float]* %76, i64 %78 - %80 = load i32, i32* %10, align 4 - %81 = sext i32 %80 to i64 - %82 = getelementptr inbounds [3 x float], [3 x float]* %79, i64 0, i64 %81 - %83 = load float, float* %82, align 4 - %84 = fadd float %83, %75 - store float %84, float* %82, align 4 - %85 = load float, float* %17, align 4 - %86 = fmul float 2.000000e+00, %85 - %87 = fadd float %86, 1.000000e+00 - %88 = load [3 x float]*, [3 x float]** %8, align 8 - %89 = load i32, i32* %9, align 4 - %90 = sext i32 %89 to i64 - %91 = getelementptr inbounds [3 x float], [3 x float]* %88, i64 %90 - %92 = load i32, i32* %10, align 4 - %93 = sext i32 %92 to i64 - %94 = getelementptr inbounds [3 x float], [3 x float]* %91, i64 0, i64 %93 - %95 = load float, float* %94, align 4 - %96 = fadd float %95, %87 - store float %96, float* %94, align 4 - br label %97 - -97: ; preds = %55, %52, %49, %46, %33 - br label %98 - -98: ; preds = %97 - %99 = load i32, i32* %12, align 4 - %100 = add nsw i32 %99, 1 - store i32 %100, i32* %12, align 4 - br label %30 - -101: ; preds = %30 - br label %102 - -102: ; preds = %101 - %103 = load i32, i32* %11, align 4 - %104 = add nsw i32 %103, 1 - store i32 %104, i32* %11, align 4 - br label %26 - -105: ; preds = %26 - br label %106 - -106: ; preds = %105 - %107 = load i32, i32* %10, align 4 - %108 = add nsw i32 %107, 1 - store i32 %108, i32* %10, align 4 - br label %22 - -109: ; preds = %22 - br label %110 - -110: ; preds = %109 - %111 = load i32, i32* %9, align 4 - %112 = add nsw i32 %111, 1 - store i32 %112, i32* %9, align 4 - br label %18 - -113: ; preds = %18 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #0 { - %1 = alloca i32, align 4 - %2 = alloca [2 x [2 x float]], align 16 - %3 = alloca [2 x [2 x float]], align 16 - %4 = alloca [3 x [3 x float]], align 16 - %5 = alloca [3 x [3 x float]], align 16 - %6 = alloca i32, align 4 - %7 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - %8 = bitcast [2 x [2 x float]]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %8, i8* align 16 bitcast ([2 x [2 x float]]* @__const.main.mat_in to i8*), i64 16, i1 false) - %9 = bitcast [2 x [2 x float]]* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %9, i8* align 16 bitcast ([2 x [2 x float]]* @__const.main.f_in to i8*), i64 16, i1 false) - %10 = bitcast [3 x [3 x float]]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %10, i8 0, i64 36, i1 false) - %11 = bitcast [3 x [3 x float]]* %5 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %11, i8 0, i64 36, i1 false) - %12 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %2, i64 0, i64 0 - %13 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %3, i64 0, i64 0 - %14 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %4, i64 0, i64 0 - %15 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %5, i64 0, i64 0 - call void @load_use_twice([2 x float]* %12, [2 x float]* %13, [3 x float]* %14, [3 x float]* %15) - store i32 0, i32* %6, align 4 - br label %16 - -16: ; preds = %46, %0 - %17 = load i32, i32* %6, align 4 - %18 = icmp slt i32 %17, 3 - br i1 %18, label %19, label %49 - -19: ; preds = %16 - store i32 0, i32* %7, align 4 - br label %20 - -20: ; preds = %42, %19 - %21 = load i32, i32* %7, align 4 - %22 = icmp slt i32 %21, 3 - br i1 %22, label %23, label %45 - -23: ; preds = %20 - %24 = load i32, i32* %6, align 4 - %25 = sext i32 %24 to i64 - %26 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %4, i64 0, i64 %25 - %27 = load i32, i32* %7, align 4 - %28 = sext i32 %27 to i64 - %29 = getelementptr inbounds [3 x float], [3 x float]* %26, i64 0, i64 %28 - %30 = load float, float* %29, align 4 - %31 = fpext float %30 to double - %32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %31) - %33 = load i32, i32* %6, align 4 - %34 = sext i32 %33 to i64 - %35 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %5, i64 0, i64 %34 - %36 = load i32, i32* %7, align 4 - %37 = sext i32 %36 to i64 - %38 = getelementptr inbounds [3 x float], [3 x float]* %35, i64 0, i64 %37 - %39 = load float, float* %38, align 4 - %40 = fpext float %39 to double - %41 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %40) - br label %42 - -42: ; preds = %23 - %43 = load i32, i32* %7, align 4 - %44 = add nsw i32 %43, 1 - store i32 %44, i32* %7, align 4 - br label %20 - -45: ; preds = %20 - br label %46 - -46: ; preds = %45 - %47 = load i32, i32* %6, align 4 - %48 = add nsw i32 %47, 1 - store i32 %48, i32* %6, align 4 - br label %16 - -49: ; preds = %16 - ret i32 0 -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 - -declare i32 @printf(i8*, ...) #3 - -attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { argmemonly nounwind willreturn writeonly } -attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/dce.ll b/src/dios-egraphs/Diospyros/dce.ll deleted file mode 100644 index 5a30cf2b..00000000 --- a/src/dios-egraphs/Diospyros/dce.ll +++ /dev/null @@ -1,854 +0,0 @@ -; ModuleID = 'diospyros.ll' -source_filename = "llvm-tests/load_reuse.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@__const.main.mat_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 2.000000e+00], [2 x float] [float 3.000000e+00, float 4.000000e+00]], align 16 -@__const.main.f_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 1.000000e+00], [2 x float] [float 1.000000e+00, float 1.000000e+00]], align 16 -@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 - -; Function Attrs: noinline nounwind ssp uwtable -define void @load_use_twice([2 x float]* %0, [2 x float]* %1, [3 x float]* %2, [3 x float]* %3) #0 { -.preheader7: - %4 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 0 - %5 = load float, float* %4, align 4 - %6 = insertelement <4 x float> zeroinitializer, float %5, i32 0 - %7 = insertelement <4 x float> %6, float 0.000000e+00, i32 1 - %8 = insertelement <4 x float> %7, float 0.000000e+00, i32 2 - %9 = insertelement <4 x float> %8, float 0.000000e+00, i32 3 - %10 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 - %11 = load float, float* %10, align 4 - %12 = insertelement <4 x float> zeroinitializer, float %11, i32 0 - %13 = insertelement <4 x float> %12, float 1.000000e+00, i32 1 - %14 = insertelement <4 x float> %13, float 1.000000e+00, i32 2 - %15 = insertelement <4 x float> %14, float 1.000000e+00, i32 3 - %16 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 - %17 = load float, float* %16, align 4 - %18 = insertelement <4 x float> zeroinitializer, float %17, i32 0 - %19 = insertelement <4 x float> %18, float 1.000000e+00, i32 1 - %20 = insertelement <4 x float> %19, float 1.000000e+00, i32 2 - %21 = insertelement <4 x float> %20, float 1.000000e+00, i32 3 - %22 = fmul <4 x float> %15, %21 - %23 = fmul <4 x float> %22, - %24 = fadd <4 x float> %23, - %25 = fadd <4 x float> %9, %24 - %26 = extractelement <4 x float> %25, i32 0 - %27 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 0 - store float %26, float* %27, align 4 - %28 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 0 - %29 = load float, float* %28, align 4 - %30 = insertelement <4 x float> zeroinitializer, float %29, i32 0 - %31 = insertelement <4 x float> %30, float 0.000000e+00, i32 1 - %32 = insertelement <4 x float> %31, float 0.000000e+00, i32 2 - %33 = insertelement <4 x float> %32, float 0.000000e+00, i32 3 - %34 = insertelement <4 x float> zeroinitializer, float %11, i32 0 - %35 = insertelement <4 x float> %34, float 1.000000e+00, i32 1 - %36 = insertelement <4 x float> %35, float 1.000000e+00, i32 2 - %37 = insertelement <4 x float> %36, float 1.000000e+00, i32 3 - %38 = insertelement <4 x float> zeroinitializer, float %17, i32 0 - %39 = insertelement <4 x float> %38, float 1.000000e+00, i32 1 - %40 = insertelement <4 x float> %39, float 1.000000e+00, i32 2 - %41 = insertelement <4 x float> %40, float 1.000000e+00, i32 3 - %42 = fmul <4 x float> %37, %41 - %43 = fmul <4 x float> %42, - %44 = fadd <4 x float> %43, - %45 = fadd <4 x float> %33, %44 - %46 = extractelement <4 x float> %45, i32 0 - %47 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 0 - store float %46, float* %47, align 4 - %48 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 - %49 = load float, float* %48, align 4 - %50 = insertelement <4 x float> zeroinitializer, float %49, i32 0 - %51 = insertelement <4 x float> %50, float 0.000000e+00, i32 1 - %52 = insertelement <4 x float> %51, float 0.000000e+00, i32 2 - %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 3 - %54 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 - %55 = load float, float* %54, align 4 - %56 = insertelement <4 x float> zeroinitializer, float %55, i32 0 - %57 = insertelement <4 x float> %56, float 1.000000e+00, i32 1 - %58 = insertelement <4 x float> %57, float 1.000000e+00, i32 2 - %59 = insertelement <4 x float> %58, float 1.000000e+00, i32 3 - %60 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 - %61 = load float, float* %60, align 4 - %62 = insertelement <4 x float> zeroinitializer, float %61, i32 0 - %63 = insertelement <4 x float> %62, float 1.000000e+00, i32 1 - %64 = insertelement <4 x float> %63, float 1.000000e+00, i32 2 - %65 = insertelement <4 x float> %64, float 1.000000e+00, i32 3 - %66 = fmul <4 x float> %59, %65 - %67 = fmul <4 x float> %66, - %68 = fadd <4 x float> %67, - %69 = fadd <4 x float> %53, %68 - %70 = extractelement <4 x float> %69, i32 0 - %71 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 - store float %70, float* %71, align 4 - %72 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 - %73 = load float, float* %72, align 4 - %74 = insertelement <4 x float> zeroinitializer, float %73, i32 0 - %75 = insertelement <4 x float> %74, float 0.000000e+00, i32 1 - %76 = insertelement <4 x float> %75, float 0.000000e+00, i32 2 - %77 = insertelement <4 x float> %76, float 0.000000e+00, i32 3 - %78 = insertelement <4 x float> zeroinitializer, float %55, i32 0 - %79 = insertelement <4 x float> %78, float 1.000000e+00, i32 1 - %80 = insertelement <4 x float> %79, float 1.000000e+00, i32 2 - %81 = insertelement <4 x float> %80, float 1.000000e+00, i32 3 - %82 = insertelement <4 x float> zeroinitializer, float %61, i32 0 - %83 = insertelement <4 x float> %82, float 1.000000e+00, i32 1 - %84 = insertelement <4 x float> %83, float 1.000000e+00, i32 2 - %85 = insertelement <4 x float> %84, float 1.000000e+00, i32 3 - %86 = fmul <4 x float> %81, %85 - %87 = fmul <4 x float> %86, - %88 = fadd <4 x float> %87, - %89 = fadd <4 x float> %77, %88 - %90 = extractelement <4 x float> %89, i32 0 - %91 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 - store float %90, float* %91, align 4 - %92 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 - %93 = load float, float* %92, align 4 - %94 = insertelement <4 x float> zeroinitializer, float %93, i32 0 - %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 1 - %96 = insertelement <4 x float> %95, float 0.000000e+00, i32 2 - %97 = insertelement <4 x float> %96, float 0.000000e+00, i32 3 - %98 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 - %99 = load float, float* %98, align 4 - %100 = insertelement <4 x float> zeroinitializer, float %99, i32 0 - %101 = insertelement <4 x float> %100, float 1.000000e+00, i32 1 - %102 = insertelement <4 x float> %101, float 1.000000e+00, i32 2 - %103 = insertelement <4 x float> %102, float 1.000000e+00, i32 3 - %104 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 - %105 = load float, float* %104, align 4 - %106 = insertelement <4 x float> zeroinitializer, float %105, i32 0 - %107 = insertelement <4 x float> %106, float 1.000000e+00, i32 1 - %108 = insertelement <4 x float> %107, float 1.000000e+00, i32 2 - %109 = insertelement <4 x float> %108, float 1.000000e+00, i32 3 - %110 = fmul <4 x float> %103, %109 - %111 = fmul <4 x float> %110, - %112 = fadd <4 x float> %111, - %113 = fadd <4 x float> %97, %112 - %114 = extractelement <4 x float> %113, i32 0 - %115 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 - store float %114, float* %115, align 4 - %116 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 - %117 = load float, float* %116, align 4 - %118 = insertelement <4 x float> zeroinitializer, float %117, i32 0 - %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 1 - %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 2 - %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 3 - %122 = insertelement <4 x float> zeroinitializer, float %99, i32 0 - %123 = insertelement <4 x float> %122, float 1.000000e+00, i32 1 - %124 = insertelement <4 x float> %123, float 1.000000e+00, i32 2 - %125 = insertelement <4 x float> %124, float 1.000000e+00, i32 3 - %126 = insertelement <4 x float> zeroinitializer, float %105, i32 0 - %127 = insertelement <4 x float> %126, float 1.000000e+00, i32 1 - %128 = insertelement <4 x float> %127, float 1.000000e+00, i32 2 - %129 = insertelement <4 x float> %128, float 1.000000e+00, i32 3 - %130 = fmul <4 x float> %125, %129 - %131 = fmul <4 x float> %130, - %132 = fadd <4 x float> %131, - %133 = fadd <4 x float> %121, %132 - %134 = extractelement <4 x float> %133, i32 0 - %135 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 - store float %134, float* %135, align 4 - %136 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 2 - %137 = load float, float* %136, align 4 - %138 = insertelement <4 x float> zeroinitializer, float %137, i32 0 - %139 = insertelement <4 x float> %138, float 0.000000e+00, i32 1 - %140 = insertelement <4 x float> %139, float 0.000000e+00, i32 2 - %141 = insertelement <4 x float> %140, float 0.000000e+00, i32 3 - %142 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 - %143 = load float, float* %142, align 4 - %144 = insertelement <4 x float> zeroinitializer, float %143, i32 0 - %145 = insertelement <4 x float> %144, float 1.000000e+00, i32 1 - %146 = insertelement <4 x float> %145, float 1.000000e+00, i32 2 - %147 = insertelement <4 x float> %146, float 1.000000e+00, i32 3 - %148 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 - %149 = load float, float* %148, align 4 - %150 = insertelement <4 x float> zeroinitializer, float %149, i32 0 - %151 = insertelement <4 x float> %150, float 1.000000e+00, i32 1 - %152 = insertelement <4 x float> %151, float 1.000000e+00, i32 2 - %153 = insertelement <4 x float> %152, float 1.000000e+00, i32 3 - %154 = fmul <4 x float> %147, %153 - %155 = fmul <4 x float> %154, - %156 = fadd <4 x float> %155, - %157 = fadd <4 x float> %141, %156 - %158 = extractelement <4 x float> %157, i32 0 - %159 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 2 - store float %158, float* %159, align 4 - %160 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 2 - %161 = load float, float* %160, align 4 - %162 = insertelement <4 x float> zeroinitializer, float %161, i32 0 - %163 = insertelement <4 x float> %162, float 0.000000e+00, i32 1 - %164 = insertelement <4 x float> %163, float 0.000000e+00, i32 2 - %165 = insertelement <4 x float> %164, float 0.000000e+00, i32 3 - %166 = insertelement <4 x float> zeroinitializer, float %143, i32 0 - %167 = insertelement <4 x float> %166, float 1.000000e+00, i32 1 - %168 = insertelement <4 x float> %167, float 1.000000e+00, i32 2 - %169 = insertelement <4 x float> %168, float 1.000000e+00, i32 3 - %170 = insertelement <4 x float> zeroinitializer, float %149, i32 0 - %171 = insertelement <4 x float> %170, float 1.000000e+00, i32 1 - %172 = insertelement <4 x float> %171, float 1.000000e+00, i32 2 - %173 = insertelement <4 x float> %172, float 1.000000e+00, i32 3 - %174 = fmul <4 x float> %169, %173 - %175 = fmul <4 x float> %174, - %176 = fadd <4 x float> %175, - %177 = fadd <4 x float> %165, %176 - %178 = extractelement <4 x float> %177, i32 0 - %179 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 2 - store float %178, float* %179, align 4 - %180 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 - %181 = load float, float* %180, align 4 - %182 = insertelement <4 x float> zeroinitializer, float %181, i32 0 - %183 = insertelement <4 x float> %182, float 0.000000e+00, i32 1 - %184 = insertelement <4 x float> %183, float 0.000000e+00, i32 2 - %185 = insertelement <4 x float> %184, float 0.000000e+00, i32 3 - %186 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 - %187 = load float, float* %186, align 4 - %188 = insertelement <4 x float> zeroinitializer, float %187, i32 0 - %189 = insertelement <4 x float> %188, float 1.000000e+00, i32 1 - %190 = insertelement <4 x float> %189, float 1.000000e+00, i32 2 - %191 = insertelement <4 x float> %190, float 1.000000e+00, i32 3 - %192 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 - %193 = load float, float* %192, align 4 - %194 = insertelement <4 x float> zeroinitializer, float %193, i32 0 - %195 = insertelement <4 x float> %194, float 1.000000e+00, i32 1 - %196 = insertelement <4 x float> %195, float 1.000000e+00, i32 2 - %197 = insertelement <4 x float> %196, float 1.000000e+00, i32 3 - %198 = fmul <4 x float> %191, %197 - %199 = fmul <4 x float> %198, - %200 = fadd <4 x float> %199, - %201 = fadd <4 x float> %185, %200 - %202 = extractelement <4 x float> %201, i32 0 - %203 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 - store float %202, float* %203, align 4 - %204 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 - %205 = load float, float* %204, align 4 - %206 = insertelement <4 x float> zeroinitializer, float %205, i32 0 - %207 = insertelement <4 x float> %206, float 0.000000e+00, i32 1 - %208 = insertelement <4 x float> %207, float 0.000000e+00, i32 2 - %209 = insertelement <4 x float> %208, float 0.000000e+00, i32 3 - %210 = insertelement <4 x float> zeroinitializer, float %187, i32 0 - %211 = insertelement <4 x float> %210, float 1.000000e+00, i32 1 - %212 = insertelement <4 x float> %211, float 1.000000e+00, i32 2 - %213 = insertelement <4 x float> %212, float 1.000000e+00, i32 3 - %214 = insertelement <4 x float> zeroinitializer, float %193, i32 0 - %215 = insertelement <4 x float> %214, float 1.000000e+00, i32 1 - %216 = insertelement <4 x float> %215, float 1.000000e+00, i32 2 - %217 = insertelement <4 x float> %216, float 1.000000e+00, i32 3 - %218 = fmul <4 x float> %213, %217 - %219 = fmul <4 x float> %218, - %220 = fadd <4 x float> %219, - %221 = fadd <4 x float> %209, %220 - %222 = extractelement <4 x float> %221, i32 0 - %223 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 - store float %222, float* %223, align 4 - %224 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 - %225 = load float, float* %224, align 4 - %226 = insertelement <4 x float> zeroinitializer, float %225, i32 0 - %227 = insertelement <4 x float> %226, float 0.000000e+00, i32 1 - %228 = insertelement <4 x float> %227, float 0.000000e+00, i32 2 - %229 = insertelement <4 x float> %228, float 0.000000e+00, i32 3 - %230 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 - %231 = load float, float* %230, align 4 - %232 = insertelement <4 x float> zeroinitializer, float %231, i32 0 - %233 = insertelement <4 x float> %232, float 1.000000e+00, i32 1 - %234 = insertelement <4 x float> %233, float 1.000000e+00, i32 2 - %235 = insertelement <4 x float> %234, float 1.000000e+00, i32 3 - %236 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 - %237 = load float, float* %236, align 4 - %238 = insertelement <4 x float> zeroinitializer, float %237, i32 0 - %239 = insertelement <4 x float> %238, float 1.000000e+00, i32 1 - %240 = insertelement <4 x float> %239, float 1.000000e+00, i32 2 - %241 = insertelement <4 x float> %240, float 1.000000e+00, i32 3 - %242 = fmul <4 x float> %235, %241 - %243 = fmul <4 x float> %242, - %244 = fadd <4 x float> %243, - %245 = fadd <4 x float> %229, %244 - %246 = extractelement <4 x float> %245, i32 0 - %247 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 - store float %246, float* %247, align 4 - %248 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 - %249 = load float, float* %248, align 4 - %250 = insertelement <4 x float> zeroinitializer, float %249, i32 0 - %251 = insertelement <4 x float> %250, float 0.000000e+00, i32 1 - %252 = insertelement <4 x float> %251, float 0.000000e+00, i32 2 - %253 = insertelement <4 x float> %252, float 0.000000e+00, i32 3 - %254 = insertelement <4 x float> zeroinitializer, float %231, i32 0 - %255 = insertelement <4 x float> %254, float 1.000000e+00, i32 1 - %256 = insertelement <4 x float> %255, float 1.000000e+00, i32 2 - %257 = insertelement <4 x float> %256, float 1.000000e+00, i32 3 - %258 = insertelement <4 x float> zeroinitializer, float %237, i32 0 - %259 = insertelement <4 x float> %258, float 1.000000e+00, i32 1 - %260 = insertelement <4 x float> %259, float 1.000000e+00, i32 2 - %261 = insertelement <4 x float> %260, float 1.000000e+00, i32 3 - %262 = fmul <4 x float> %257, %261 - %263 = fmul <4 x float> %262, - %264 = fadd <4 x float> %263, - %265 = fadd <4 x float> %253, %264 - %266 = extractelement <4 x float> %265, i32 0 - %267 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 - store float %266, float* %267, align 4 - %268 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - %269 = load float, float* %268, align 4 - %270 = insertelement <4 x float> zeroinitializer, float %269, i32 0 - %271 = insertelement <4 x float> %270, float 0.000000e+00, i32 1 - %272 = insertelement <4 x float> %271, float 0.000000e+00, i32 2 - %273 = insertelement <4 x float> %272, float 0.000000e+00, i32 3 - %274 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 - %275 = load float, float* %274, align 4 - %276 = insertelement <4 x float> zeroinitializer, float %275, i32 0 - %277 = insertelement <4 x float> %276, float 1.000000e+00, i32 1 - %278 = insertelement <4 x float> %277, float 1.000000e+00, i32 2 - %279 = insertelement <4 x float> %278, float 1.000000e+00, i32 3 - %280 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 - %281 = load float, float* %280, align 4 - %282 = insertelement <4 x float> zeroinitializer, float %281, i32 0 - %283 = insertelement <4 x float> %282, float 1.000000e+00, i32 1 - %284 = insertelement <4 x float> %283, float 1.000000e+00, i32 2 - %285 = insertelement <4 x float> %284, float 1.000000e+00, i32 3 - %286 = fmul <4 x float> %279, %285 - %287 = fmul <4 x float> %286, - %288 = fadd <4 x float> %287, - %289 = fadd <4 x float> %273, %288 - %290 = extractelement <4 x float> %289, i32 0 - %291 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - store float %290, float* %291, align 4 - %292 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - %293 = load float, float* %292, align 4 - %294 = insertelement <4 x float> zeroinitializer, float %293, i32 0 - %295 = insertelement <4 x float> %294, float 0.000000e+00, i32 1 - %296 = insertelement <4 x float> %295, float 0.000000e+00, i32 2 - %297 = insertelement <4 x float> %296, float 0.000000e+00, i32 3 - %298 = insertelement <4 x float> zeroinitializer, float %275, i32 0 - %299 = insertelement <4 x float> %298, float 1.000000e+00, i32 1 - %300 = insertelement <4 x float> %299, float 1.000000e+00, i32 2 - %301 = insertelement <4 x float> %300, float 1.000000e+00, i32 3 - %302 = insertelement <4 x float> zeroinitializer, float %281, i32 0 - %303 = insertelement <4 x float> %302, float 1.000000e+00, i32 1 - %304 = insertelement <4 x float> %303, float 1.000000e+00, i32 2 - %305 = insertelement <4 x float> %304, float 1.000000e+00, i32 3 - %306 = fmul <4 x float> %301, %305 - %307 = fmul <4 x float> %306, - %308 = fadd <4 x float> %307, - %309 = fadd <4 x float> %297, %308 - %310 = extractelement <4 x float> %309, i32 0 - %311 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - store float %310, float* %311, align 4 - %312 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - %313 = load float, float* %312, align 4 - %314 = insertelement <4 x float> zeroinitializer, float %313, i32 0 - %315 = insertelement <4 x float> %314, float 0.000000e+00, i32 1 - %316 = insertelement <4 x float> %315, float 0.000000e+00, i32 2 - %317 = insertelement <4 x float> %316, float 0.000000e+00, i32 3 - %318 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 - %319 = load float, float* %318, align 4 - %320 = insertelement <4 x float> zeroinitializer, float %319, i32 0 - %321 = insertelement <4 x float> %320, float 1.000000e+00, i32 1 - %322 = insertelement <4 x float> %321, float 1.000000e+00, i32 2 - %323 = insertelement <4 x float> %322, float 1.000000e+00, i32 3 - %324 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 - %325 = load float, float* %324, align 4 - %326 = insertelement <4 x float> zeroinitializer, float %325, i32 0 - %327 = insertelement <4 x float> %326, float 1.000000e+00, i32 1 - %328 = insertelement <4 x float> %327, float 1.000000e+00, i32 2 - %329 = insertelement <4 x float> %328, float 1.000000e+00, i32 3 - %330 = fmul <4 x float> %323, %329 - %331 = fmul <4 x float> %330, - %332 = fadd <4 x float> %331, - %333 = fadd <4 x float> %317, %332 - %334 = extractelement <4 x float> %333, i32 0 - %335 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - store float %334, float* %335, align 4 - %336 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - %337 = load float, float* %336, align 4 - %338 = insertelement <4 x float> zeroinitializer, float %337, i32 0 - %339 = insertelement <4 x float> %338, float 0.000000e+00, i32 1 - %340 = insertelement <4 x float> %339, float 0.000000e+00, i32 2 - %341 = insertelement <4 x float> %340, float 0.000000e+00, i32 3 - %342 = insertelement <4 x float> zeroinitializer, float %319, i32 0 - %343 = insertelement <4 x float> %342, float 1.000000e+00, i32 1 - %344 = insertelement <4 x float> %343, float 1.000000e+00, i32 2 - %345 = insertelement <4 x float> %344, float 1.000000e+00, i32 3 - %346 = insertelement <4 x float> zeroinitializer, float %325, i32 0 - %347 = insertelement <4 x float> %346, float 1.000000e+00, i32 1 - %348 = insertelement <4 x float> %347, float 1.000000e+00, i32 2 - %349 = insertelement <4 x float> %348, float 1.000000e+00, i32 3 - %350 = fmul <4 x float> %345, %349 - %351 = fmul <4 x float> %350, - %352 = fadd <4 x float> %351, - %353 = fadd <4 x float> %341, %352 - %354 = extractelement <4 x float> %353, i32 0 - %355 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - store float %354, float* %355, align 4 - %356 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - %357 = load float, float* %356, align 4 - %358 = insertelement <4 x float> zeroinitializer, float %357, i32 0 - %359 = insertelement <4 x float> %358, float 0.000000e+00, i32 1 - %360 = insertelement <4 x float> %359, float 0.000000e+00, i32 2 - %361 = insertelement <4 x float> %360, float 0.000000e+00, i32 3 - %362 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 - %363 = load float, float* %362, align 4 - %364 = insertelement <4 x float> zeroinitializer, float %363, i32 0 - %365 = insertelement <4 x float> %364, float 1.000000e+00, i32 1 - %366 = insertelement <4 x float> %365, float 1.000000e+00, i32 2 - %367 = insertelement <4 x float> %366, float 1.000000e+00, i32 3 - %368 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 - %369 = load float, float* %368, align 4 - %370 = insertelement <4 x float> zeroinitializer, float %369, i32 0 - %371 = insertelement <4 x float> %370, float 1.000000e+00, i32 1 - %372 = insertelement <4 x float> %371, float 1.000000e+00, i32 2 - %373 = insertelement <4 x float> %372, float 1.000000e+00, i32 3 - %374 = fmul <4 x float> %367, %373 - %375 = fmul <4 x float> %374, - %376 = fadd <4 x float> %375, - %377 = fadd <4 x float> %361, %376 - %378 = extractelement <4 x float> %377, i32 0 - %379 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - store float %378, float* %379, align 4 - %380 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - %381 = load float, float* %380, align 4 - %382 = insertelement <4 x float> zeroinitializer, float %381, i32 0 - %383 = insertelement <4 x float> %382, float 0.000000e+00, i32 1 - %384 = insertelement <4 x float> %383, float 0.000000e+00, i32 2 - %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 3 - %386 = insertelement <4 x float> zeroinitializer, float %363, i32 0 - %387 = insertelement <4 x float> %386, float 1.000000e+00, i32 1 - %388 = insertelement <4 x float> %387, float 1.000000e+00, i32 2 - %389 = insertelement <4 x float> %388, float 1.000000e+00, i32 3 - %390 = insertelement <4 x float> zeroinitializer, float %369, i32 0 - %391 = insertelement <4 x float> %390, float 1.000000e+00, i32 1 - %392 = insertelement <4 x float> %391, float 1.000000e+00, i32 2 - %393 = insertelement <4 x float> %392, float 1.000000e+00, i32 3 - %394 = fmul <4 x float> %389, %393 - %395 = fmul <4 x float> %394, - %396 = fadd <4 x float> %395, - %397 = fadd <4 x float> %385, %396 - %398 = extractelement <4 x float> %397, i32 0 - %399 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - store float %398, float* %399, align 4 - %400 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - %401 = load float, float* %400, align 4 - %402 = insertelement <4 x float> zeroinitializer, float %401, i32 0 - %403 = insertelement <4 x float> %402, float 0.000000e+00, i32 1 - %404 = insertelement <4 x float> %403, float 0.000000e+00, i32 2 - %405 = insertelement <4 x float> %404, float 0.000000e+00, i32 3 - %406 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 - %407 = load float, float* %406, align 4 - %408 = insertelement <4 x float> zeroinitializer, float %407, i32 0 - %409 = insertelement <4 x float> %408, float 1.000000e+00, i32 1 - %410 = insertelement <4 x float> %409, float 1.000000e+00, i32 2 - %411 = insertelement <4 x float> %410, float 1.000000e+00, i32 3 - %412 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 - %413 = load float, float* %412, align 4 - %414 = insertelement <4 x float> zeroinitializer, float %413, i32 0 - %415 = insertelement <4 x float> %414, float 1.000000e+00, i32 1 - %416 = insertelement <4 x float> %415, float 1.000000e+00, i32 2 - %417 = insertelement <4 x float> %416, float 1.000000e+00, i32 3 - %418 = fmul <4 x float> %411, %417 - %419 = fmul <4 x float> %418, - %420 = fadd <4 x float> %419, - %421 = fadd <4 x float> %405, %420 - %422 = extractelement <4 x float> %421, i32 0 - %423 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - store float %422, float* %423, align 4 - %424 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - %425 = load float, float* %424, align 4 - %426 = insertelement <4 x float> zeroinitializer, float %425, i32 0 - %427 = insertelement <4 x float> %426, float 0.000000e+00, i32 1 - %428 = insertelement <4 x float> %427, float 0.000000e+00, i32 2 - %429 = insertelement <4 x float> %428, float 0.000000e+00, i32 3 - %430 = insertelement <4 x float> zeroinitializer, float %407, i32 0 - %431 = insertelement <4 x float> %430, float 1.000000e+00, i32 1 - %432 = insertelement <4 x float> %431, float 1.000000e+00, i32 2 - %433 = insertelement <4 x float> %432, float 1.000000e+00, i32 3 - %434 = insertelement <4 x float> zeroinitializer, float %413, i32 0 - %435 = insertelement <4 x float> %434, float 1.000000e+00, i32 1 - %436 = insertelement <4 x float> %435, float 1.000000e+00, i32 2 - %437 = insertelement <4 x float> %436, float 1.000000e+00, i32 3 - %438 = fmul <4 x float> %433, %437 - %439 = fmul <4 x float> %438, - %440 = fadd <4 x float> %439, - %441 = fadd <4 x float> %429, %440 - %442 = extractelement <4 x float> %441, i32 0 - %443 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - store float %442, float* %443, align 4 - %444 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 - %445 = load float, float* %444, align 4 - %446 = insertelement <4 x float> zeroinitializer, float %445, i32 0 - %447 = insertelement <4 x float> %446, float 0.000000e+00, i32 1 - %448 = insertelement <4 x float> %447, float 0.000000e+00, i32 2 - %449 = insertelement <4 x float> %448, float 0.000000e+00, i32 3 - %450 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 - %451 = load float, float* %450, align 4 - %452 = insertelement <4 x float> zeroinitializer, float %451, i32 0 - %453 = insertelement <4 x float> %452, float 1.000000e+00, i32 1 - %454 = insertelement <4 x float> %453, float 1.000000e+00, i32 2 - %455 = insertelement <4 x float> %454, float 1.000000e+00, i32 3 - %456 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 - %457 = load float, float* %456, align 4 - %458 = insertelement <4 x float> zeroinitializer, float %457, i32 0 - %459 = insertelement <4 x float> %458, float 1.000000e+00, i32 1 - %460 = insertelement <4 x float> %459, float 1.000000e+00, i32 2 - %461 = insertelement <4 x float> %460, float 1.000000e+00, i32 3 - %462 = fmul <4 x float> %455, %461 - %463 = fmul <4 x float> %462, - %464 = fadd <4 x float> %463, - %465 = fadd <4 x float> %449, %464 - %466 = extractelement <4 x float> %465, i32 0 - %467 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 - store float %466, float* %467, align 4 - %468 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 - %469 = load float, float* %468, align 4 - %470 = insertelement <4 x float> zeroinitializer, float %469, i32 0 - %471 = insertelement <4 x float> %470, float 0.000000e+00, i32 1 - %472 = insertelement <4 x float> %471, float 0.000000e+00, i32 2 - %473 = insertelement <4 x float> %472, float 0.000000e+00, i32 3 - %474 = insertelement <4 x float> zeroinitializer, float %451, i32 0 - %475 = insertelement <4 x float> %474, float 1.000000e+00, i32 1 - %476 = insertelement <4 x float> %475, float 1.000000e+00, i32 2 - %477 = insertelement <4 x float> %476, float 1.000000e+00, i32 3 - %478 = insertelement <4 x float> zeroinitializer, float %457, i32 0 - %479 = insertelement <4 x float> %478, float 1.000000e+00, i32 1 - %480 = insertelement <4 x float> %479, float 1.000000e+00, i32 2 - %481 = insertelement <4 x float> %480, float 1.000000e+00, i32 3 - %482 = fmul <4 x float> %477, %481 - %483 = fmul <4 x float> %482, - %484 = fadd <4 x float> %483, - %485 = fadd <4 x float> %473, %484 - %486 = extractelement <4 x float> %485, i32 0 - %487 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 - store float %486, float* %487, align 4 - %488 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 - %489 = load float, float* %488, align 4 - %490 = insertelement <4 x float> zeroinitializer, float %489, i32 0 - %491 = insertelement <4 x float> %490, float 0.000000e+00, i32 1 - %492 = insertelement <4 x float> %491, float 0.000000e+00, i32 2 - %493 = insertelement <4 x float> %492, float 0.000000e+00, i32 3 - %494 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 - %495 = load float, float* %494, align 4 - %496 = insertelement <4 x float> zeroinitializer, float %495, i32 0 - %497 = insertelement <4 x float> %496, float 1.000000e+00, i32 1 - %498 = insertelement <4 x float> %497, float 1.000000e+00, i32 2 - %499 = insertelement <4 x float> %498, float 1.000000e+00, i32 3 - %500 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 - %501 = load float, float* %500, align 4 - %502 = insertelement <4 x float> zeroinitializer, float %501, i32 0 - %503 = insertelement <4 x float> %502, float 1.000000e+00, i32 1 - %504 = insertelement <4 x float> %503, float 1.000000e+00, i32 2 - %505 = insertelement <4 x float> %504, float 1.000000e+00, i32 3 - %506 = fmul <4 x float> %499, %505 - %507 = fmul <4 x float> %506, - %508 = fadd <4 x float> %507, - %509 = fadd <4 x float> %493, %508 - %510 = extractelement <4 x float> %509, i32 0 - %511 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 - store float %510, float* %511, align 4 - %512 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 - %513 = load float, float* %512, align 4 - %514 = insertelement <4 x float> zeroinitializer, float %513, i32 0 - %515 = insertelement <4 x float> %514, float 0.000000e+00, i32 1 - %516 = insertelement <4 x float> %515, float 0.000000e+00, i32 2 - %517 = insertelement <4 x float> %516, float 0.000000e+00, i32 3 - %518 = insertelement <4 x float> zeroinitializer, float %495, i32 0 - %519 = insertelement <4 x float> %518, float 1.000000e+00, i32 1 - %520 = insertelement <4 x float> %519, float 1.000000e+00, i32 2 - %521 = insertelement <4 x float> %520, float 1.000000e+00, i32 3 - %522 = insertelement <4 x float> zeroinitializer, float %501, i32 0 - %523 = insertelement <4 x float> %522, float 1.000000e+00, i32 1 - %524 = insertelement <4 x float> %523, float 1.000000e+00, i32 2 - %525 = insertelement <4 x float> %524, float 1.000000e+00, i32 3 - %526 = fmul <4 x float> %521, %525 - %527 = fmul <4 x float> %526, - %528 = fadd <4 x float> %527, - %529 = fadd <4 x float> %517, %528 - %530 = extractelement <4 x float> %529, i32 0 - %531 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 - store float %530, float* %531, align 4 - %532 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 0 - %533 = load float, float* %532, align 4 - %534 = insertelement <4 x float> zeroinitializer, float %533, i32 0 - %535 = insertelement <4 x float> %534, float 0.000000e+00, i32 1 - %536 = insertelement <4 x float> %535, float 0.000000e+00, i32 2 - %537 = insertelement <4 x float> %536, float 0.000000e+00, i32 3 - %538 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 - %539 = load float, float* %538, align 4 - %540 = insertelement <4 x float> zeroinitializer, float %539, i32 0 - %541 = insertelement <4 x float> %540, float 1.000000e+00, i32 1 - %542 = insertelement <4 x float> %541, float 1.000000e+00, i32 2 - %543 = insertelement <4 x float> %542, float 1.000000e+00, i32 3 - %544 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 - %545 = load float, float* %544, align 4 - %546 = insertelement <4 x float> zeroinitializer, float %545, i32 0 - %547 = insertelement <4 x float> %546, float 1.000000e+00, i32 1 - %548 = insertelement <4 x float> %547, float 1.000000e+00, i32 2 - %549 = insertelement <4 x float> %548, float 1.000000e+00, i32 3 - %550 = fmul <4 x float> %543, %549 - %551 = fmul <4 x float> %550, - %552 = fadd <4 x float> %551, - %553 = fadd <4 x float> %537, %552 - %554 = extractelement <4 x float> %553, i32 0 - %555 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 0 - store float %554, float* %555, align 4 - %556 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 0 - %557 = load float, float* %556, align 4 - %558 = insertelement <4 x float> zeroinitializer, float %557, i32 0 - %559 = insertelement <4 x float> %558, float 0.000000e+00, i32 1 - %560 = insertelement <4 x float> %559, float 0.000000e+00, i32 2 - %561 = insertelement <4 x float> %560, float 0.000000e+00, i32 3 - %562 = insertelement <4 x float> zeroinitializer, float %539, i32 0 - %563 = insertelement <4 x float> %562, float 1.000000e+00, i32 1 - %564 = insertelement <4 x float> %563, float 1.000000e+00, i32 2 - %565 = insertelement <4 x float> %564, float 1.000000e+00, i32 3 - %566 = insertelement <4 x float> zeroinitializer, float %545, i32 0 - %567 = insertelement <4 x float> %566, float 1.000000e+00, i32 1 - %568 = insertelement <4 x float> %567, float 1.000000e+00, i32 2 - %569 = insertelement <4 x float> %568, float 1.000000e+00, i32 3 - %570 = fmul <4 x float> %565, %569 - %571 = fmul <4 x float> %570, - %572 = fadd <4 x float> %571, - %573 = fadd <4 x float> %561, %572 - %574 = extractelement <4 x float> %573, i32 0 - %575 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 0 - store float %574, float* %575, align 4 - %576 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 - %577 = load float, float* %576, align 4 - %578 = insertelement <4 x float> zeroinitializer, float %577, i32 0 - %579 = insertelement <4 x float> %578, float 0.000000e+00, i32 1 - %580 = insertelement <4 x float> %579, float 0.000000e+00, i32 2 - %581 = insertelement <4 x float> %580, float 0.000000e+00, i32 3 - %582 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 - %583 = load float, float* %582, align 4 - %584 = insertelement <4 x float> zeroinitializer, float %583, i32 0 - %585 = insertelement <4 x float> %584, float 1.000000e+00, i32 1 - %586 = insertelement <4 x float> %585, float 1.000000e+00, i32 2 - %587 = insertelement <4 x float> %586, float 1.000000e+00, i32 3 - %588 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 - %589 = load float, float* %588, align 4 - %590 = insertelement <4 x float> zeroinitializer, float %589, i32 0 - %591 = insertelement <4 x float> %590, float 1.000000e+00, i32 1 - %592 = insertelement <4 x float> %591, float 1.000000e+00, i32 2 - %593 = insertelement <4 x float> %592, float 1.000000e+00, i32 3 - %594 = fmul <4 x float> %587, %593 - %595 = fmul <4 x float> %594, - %596 = fadd <4 x float> %595, - %597 = fadd <4 x float> %581, %596 - %598 = extractelement <4 x float> %597, i32 0 - %599 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 - store float %598, float* %599, align 4 - %600 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 - %601 = load float, float* %600, align 4 - %602 = insertelement <4 x float> zeroinitializer, float %601, i32 0 - %603 = insertelement <4 x float> %602, float 0.000000e+00, i32 1 - %604 = insertelement <4 x float> %603, float 0.000000e+00, i32 2 - %605 = insertelement <4 x float> %604, float 0.000000e+00, i32 3 - %606 = insertelement <4 x float> zeroinitializer, float %583, i32 0 - %607 = insertelement <4 x float> %606, float 1.000000e+00, i32 1 - %608 = insertelement <4 x float> %607, float 1.000000e+00, i32 2 - %609 = insertelement <4 x float> %608, float 1.000000e+00, i32 3 - %610 = insertelement <4 x float> zeroinitializer, float %589, i32 0 - %611 = insertelement <4 x float> %610, float 1.000000e+00, i32 1 - %612 = insertelement <4 x float> %611, float 1.000000e+00, i32 2 - %613 = insertelement <4 x float> %612, float 1.000000e+00, i32 3 - %614 = fmul <4 x float> %609, %613 - %615 = fmul <4 x float> %614, - %616 = fadd <4 x float> %615, - %617 = fadd <4 x float> %605, %616 - %618 = extractelement <4 x float> %617, i32 0 - %619 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 - store float %618, float* %619, align 4 - %620 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 - %621 = load float, float* %620, align 4 - %622 = insertelement <4 x float> zeroinitializer, float %621, i32 0 - %623 = insertelement <4 x float> %622, float 0.000000e+00, i32 1 - %624 = insertelement <4 x float> %623, float 0.000000e+00, i32 2 - %625 = insertelement <4 x float> %624, float 0.000000e+00, i32 3 - %626 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 - %627 = load float, float* %626, align 4 - %628 = insertelement <4 x float> zeroinitializer, float %627, i32 0 - %629 = insertelement <4 x float> %628, float 1.000000e+00, i32 1 - %630 = insertelement <4 x float> %629, float 1.000000e+00, i32 2 - %631 = insertelement <4 x float> %630, float 1.000000e+00, i32 3 - %632 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 - %633 = load float, float* %632, align 4 - %634 = insertelement <4 x float> zeroinitializer, float %633, i32 0 - %635 = insertelement <4 x float> %634, float 1.000000e+00, i32 1 - %636 = insertelement <4 x float> %635, float 1.000000e+00, i32 2 - %637 = insertelement <4 x float> %636, float 1.000000e+00, i32 3 - %638 = fmul <4 x float> %631, %637 - %639 = fmul <4 x float> %638, - %640 = fadd <4 x float> %639, - %641 = fadd <4 x float> %625, %640 - %642 = extractelement <4 x float> %641, i32 0 - %643 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 - store float %642, float* %643, align 4 - %644 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 - %645 = load float, float* %644, align 4 - %646 = insertelement <4 x float> zeroinitializer, float %645, i32 0 - %647 = insertelement <4 x float> %646, float 0.000000e+00, i32 1 - %648 = insertelement <4 x float> %647, float 0.000000e+00, i32 2 - %649 = insertelement <4 x float> %648, float 0.000000e+00, i32 3 - %650 = insertelement <4 x float> zeroinitializer, float %627, i32 0 - %651 = insertelement <4 x float> %650, float 1.000000e+00, i32 1 - %652 = insertelement <4 x float> %651, float 1.000000e+00, i32 2 - %653 = insertelement <4 x float> %652, float 1.000000e+00, i32 3 - %654 = insertelement <4 x float> zeroinitializer, float %633, i32 0 - %655 = insertelement <4 x float> %654, float 1.000000e+00, i32 1 - %656 = insertelement <4 x float> %655, float 1.000000e+00, i32 2 - %657 = insertelement <4 x float> %656, float 1.000000e+00, i32 3 - %658 = fmul <4 x float> %653, %657 - %659 = fmul <4 x float> %658, - %660 = fadd <4 x float> %659, - %661 = fadd <4 x float> %649, %660 - %662 = extractelement <4 x float> %661, i32 0 - %663 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 - store float %662, float* %663, align 4 - %664 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 2 - %665 = load float, float* %664, align 4 - %666 = insertelement <4 x float> zeroinitializer, float %665, i32 0 - %667 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 2 - %668 = load float, float* %667, align 4 - %669 = insertelement <4 x float> %666, float %668, i32 1 - %670 = insertelement <4 x float> %669, float 0.000000e+00, i32 2 - %671 = insertelement <4 x float> %670, float 0.000000e+00, i32 3 - %672 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 - %673 = load float, float* %672, align 4 - %674 = insertelement <4 x float> zeroinitializer, float %673, i32 0 - %675 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 - %676 = load float, float* %675, align 4 - %677 = insertelement <4 x float> %674, float %676, i32 1 - %678 = insertelement <4 x float> %677, float 1.000000e+00, i32 2 - %679 = insertelement <4 x float> %678, float 1.000000e+00, i32 3 - %680 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 - %681 = load float, float* %680, align 4 - %682 = insertelement <4 x float> zeroinitializer, float %681, i32 0 - %683 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 - %684 = load float, float* %683, align 4 - %685 = insertelement <4 x float> %682, float %684, i32 1 - %686 = insertelement <4 x float> %685, float 1.000000e+00, i32 2 - %687 = insertelement <4 x float> %686, float 1.000000e+00, i32 3 - %688 = fmul <4 x float> %679, %687 - %689 = fmul <4 x float> %688, - %690 = fadd <4 x float> %689, - %691 = fadd <4 x float> %671, %690 - %692 = extractelement <4 x float> %691, i32 0 - %693 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 2 - store float %692, float* %693, align 4 - %694 = extractelement <4 x float> %691, i32 1 - %695 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 2 - store float %694, float* %695, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #0 { -.preheader: - %0 = alloca [2 x [2 x float]], align 16 - %1 = alloca [2 x [2 x float]], align 16 - %2 = alloca [3 x [3 x float]], align 16 - %3 = alloca [3 x [3 x float]], align 16 - %4 = bitcast [2 x [2 x float]]* %0 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %4, i8* nonnull align 16 dereferenceable(16) bitcast ([2 x [2 x float]]* @__const.main.mat_in to i8*), i64 16, i1 false) - %5 = bitcast [2 x [2 x float]]* %1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %5, i8* nonnull align 16 dereferenceable(16) bitcast ([2 x [2 x float]]* @__const.main.f_in to i8*), i64 16, i1 false) - %6 = bitcast [3 x [3 x float]]* %2 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %6, i8 0, i64 36, i1 false) - %7 = bitcast [3 x [3 x float]]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %7, i8 0, i64 36, i1 false) - %8 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %0, i64 0, i64 0 - %9 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %1, i64 0, i64 0 - %10 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0 - %11 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0 - call void @load_use_twice([2 x float]* nonnull %8, [2 x float]* nonnull %9, [3 x float]* nonnull %10, [3 x float]* nonnull %11) - %12 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 0 - %13 = load float, float* %12, align 16 - %14 = fpext float %13 to double - %15 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %14) #4 - %16 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 0 - %17 = load float, float* %16, align 16 - %18 = fpext float %17 to double - %19 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %18) #4 - %20 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 1 - %21 = load float, float* %20, align 4 - %22 = fpext float %21 to double - %23 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %22) #4 - %24 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 1 - %25 = load float, float* %24, align 4 - %26 = fpext float %25 to double - %27 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %26) #4 - %28 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 2 - %29 = load float, float* %28, align 8 - %30 = fpext float %29 to double - %31 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %30) #4 - %32 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 2 - %33 = load float, float* %32, align 8 - %34 = fpext float %33 to double - %35 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %34) #4 - %36 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 0 - %37 = load float, float* %36, align 4 - %38 = fpext float %37 to double - %39 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %38) #4 - %40 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 0 - %41 = load float, float* %40, align 4 - %42 = fpext float %41 to double - %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %42) #4 - %44 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 1 - %45 = load float, float* %44, align 4 - %46 = fpext float %45 to double - %47 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %46) #4 - %48 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 1 - %49 = load float, float* %48, align 4 - %50 = fpext float %49 to double - %51 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %50) #4 - %52 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 2 - %53 = load float, float* %52, align 4 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %54) #4 - %56 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 2 - %57 = load float, float* %56, align 4 - %58 = fpext float %57 to double - %59 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %58) #4 - %60 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 0 - %61 = load float, float* %60, align 8 - %62 = fpext float %61 to double - %63 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %62) #4 - %64 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 0 - %65 = load float, float* %64, align 8 - %66 = fpext float %65 to double - %67 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %66) #4 - %68 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 1 - %69 = load float, float* %68, align 4 - %70 = fpext float %69 to double - %71 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %70) #4 - %72 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 1 - %73 = load float, float* %72, align 4 - %74 = fpext float %73 to double - %75 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %74) #4 - %76 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 2 - %77 = load float, float* %76, align 8 - %78 = fpext float %77 to double - %79 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %78) #4 - %80 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 2 - %81 = load float, float* %80, align 8 - %82 = fpext float %81 to double - %83 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %82) #4 - ret i32 0 -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 - -declare i32 @printf(i8*, ...) #3 - -attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { argmemonly nounwind willreturn writeonly } -attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/diospyros.ll b/src/dios-egraphs/Diospyros/diospyros.ll deleted file mode 100644 index f4a34c43..00000000 --- a/src/dios-egraphs/Diospyros/diospyros.ll +++ /dev/null @@ -1,1056 +0,0 @@ -; ModuleID = 'aa.ll' -source_filename = "llvm-tests/load_reuse.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@__const.main.mat_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 2.000000e+00], [2 x float] [float 3.000000e+00, float 4.000000e+00]], align 16 -@__const.main.f_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 1.000000e+00], [2 x float] [float 1.000000e+00, float 1.000000e+00]], align 16 -@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 - -; Function Attrs: noinline nounwind ssp uwtable -define void @load_use_twice([2 x float]* %0, [2 x float]* %1, [3 x float]* %2, [3 x float]* %3) #0 { -.preheader7: - %4 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 0 - %5 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 0 - %6 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 - %7 = load float, float* %6, align 4 - %8 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 - %9 = load float, float* %8, align 4 - %10 = fmul float %7, %9 - %11 = fmul float %10, 3.000000e+00 - %12 = fadd float %11, -4.000000e+00 - %13 = load float, float* %4, align 4 - %14 = fadd float %13, %12 - %15 = fmul float %10, 2.000000e+00 - %16 = fadd float %15, 1.000000e+00 - %17 = load float, float* %5, align 4 - %18 = fadd float %17, %16 - %19 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 - %20 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 - %21 = load float, float* %6, align 4 - %22 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 - %23 = load float, float* %22, align 4 - %24 = fmul float %21, %23 - %25 = fmul float %24, 3.000000e+00 - %26 = fadd float %25, -4.000000e+00 - %27 = load float, float* %19, align 4 - %28 = fadd float %27, %26 - %29 = fmul float %24, 2.000000e+00 - %30 = fadd float %29, 1.000000e+00 - %31 = load float, float* %20, align 4 - %32 = fadd float %31, %30 - %33 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 - %34 = load float, float* %33, align 4 - %35 = load float, float* %8, align 4 - %36 = fmul float %34, %35 - %37 = fmul float %36, 3.000000e+00 - %38 = fadd float %37, -4.000000e+00 - %39 = load float, float* %19, align 4 - %40 = fadd float %39, %38 - %41 = fmul float %36, 2.000000e+00 - %42 = fadd float %41, 1.000000e+00 - %43 = load float, float* %20, align 4 - %44 = fadd float %43, %42 - %45 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 2 - %46 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 2 - %47 = load float, float* %33, align 4 - %48 = load float, float* %22, align 4 - %49 = fmul float %47, %48 - %50 = fmul float %49, 3.000000e+00 - %51 = fadd float %50, -4.000000e+00 - %52 = load float, float* %45, align 4 - %53 = fadd float %52, %51 - %54 = fmul float %49, 2.000000e+00 - %55 = fadd float %54, 1.000000e+00 - %56 = load float, float* %46, align 4 - %57 = fadd float %56, %55 - %58 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 - %59 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 - %60 = load float, float* %6, align 4 - %61 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 - %62 = load float, float* %61, align 4 - %63 = fmul float %60, %62 - %64 = fmul float %63, 3.000000e+00 - %65 = fadd float %64, -4.000000e+00 - %66 = load float, float* %58, align 4 - %67 = fadd float %66, %65 - %68 = fmul float %63, 2.000000e+00 - %69 = fadd float %68, 1.000000e+00 - %70 = load float, float* %59, align 4 - %71 = fadd float %70, %69 - %72 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 - %73 = load float, float* %72, align 4 - %74 = load float, float* %8, align 4 - %75 = fmul float %73, %74 - %76 = fmul float %75, 3.000000e+00 - %77 = fadd float %76, -4.000000e+00 - %78 = load float, float* %58, align 4 - %79 = fadd float %78, %77 - %80 = fmul float %75, 2.000000e+00 - %81 = fadd float %80, 1.000000e+00 - %82 = load float, float* %59, align 4 - %83 = fadd float %82, %81 - %84 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - %85 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - %86 = load float, float* %6, align 4 - %87 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 - %88 = load float, float* %87, align 4 - %89 = fmul float %86, %88 - %90 = fmul float %89, 3.000000e+00 - %91 = fadd float %90, -4.000000e+00 - %92 = load float, float* %84, align 4 - %93 = fadd float %92, %91 - %94 = fmul float %89, 2.000000e+00 - %95 = fadd float %94, 1.000000e+00 - %96 = load float, float* %85, align 4 - %97 = fadd float %96, %95 - %98 = load float, float* %33, align 4 - %99 = load float, float* %61, align 4 - %100 = fmul float %98, %99 - %101 = fmul float %100, 3.000000e+00 - %102 = fadd float %101, -4.000000e+00 - %103 = load float, float* %84, align 4 - %104 = fadd float %103, %102 - %105 = fmul float %100, 2.000000e+00 - %106 = fadd float %105, 1.000000e+00 - %107 = load float, float* %85, align 4 - %108 = fadd float %107, %106 - %109 = load float, float* %72, align 4 - %110 = load float, float* %22, align 4 - %111 = fmul float %109, %110 - %112 = fmul float %111, 3.000000e+00 - %113 = fadd float %112, -4.000000e+00 - %114 = load float, float* %84, align 4 - %115 = fadd float %114, %113 - %116 = fmul float %111, 2.000000e+00 - %117 = fadd float %116, 1.000000e+00 - %118 = load float, float* %85, align 4 - %119 = fadd float %118, %117 - %120 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 - %121 = load float, float* %120, align 4 - %122 = load float, float* %8, align 4 - %123 = fmul float %121, %122 - %124 = fmul float %123, 3.000000e+00 - %125 = fadd float %124, -4.000000e+00 - %126 = load float, float* %84, align 4 - %127 = fadd float %126, %125 - %128 = fmul float %123, 2.000000e+00 - %129 = fadd float %128, 1.000000e+00 - %130 = load float, float* %85, align 4 - %131 = fadd float %130, %129 - %132 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 - %133 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 - %134 = load float, float* %33, align 4 - %135 = load float, float* %87, align 4 - %136 = fmul float %134, %135 - %137 = fmul float %136, 3.000000e+00 - %138 = fadd float %137, -4.000000e+00 - %139 = load float, float* %132, align 4 - %140 = fadd float %139, %138 - %141 = fmul float %136, 2.000000e+00 - %142 = fadd float %141, 1.000000e+00 - %143 = load float, float* %133, align 4 - %144 = fadd float %143, %142 - %145 = load float, float* %120, align 4 - %146 = load float, float* %22, align 4 - %147 = fmul float %145, %146 - %148 = fmul float %147, 3.000000e+00 - %149 = fadd float %148, -4.000000e+00 - %150 = load float, float* %132, align 4 - %151 = fadd float %150, %149 - %152 = fmul float %147, 2.000000e+00 - %153 = fadd float %152, 1.000000e+00 - %154 = load float, float* %133, align 4 - %155 = fadd float %154, %153 - %156 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 0 - %157 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 0 - %158 = load float, float* %72, align 4 - %159 = load float, float* %61, align 4 - %160 = fmul float %158, %159 - %161 = fmul float %160, 3.000000e+00 - %162 = fadd float %161, -4.000000e+00 - %163 = load float, float* %156, align 4 - %164 = fadd float %163, %162 - %165 = fmul float %160, 2.000000e+00 - %166 = fadd float %165, 1.000000e+00 - %167 = load float, float* %157, align 4 - %168 = fadd float %167, %166 - %169 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 - %170 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 - %171 = load float, float* %72, align 4 - %172 = load float, float* %87, align 4 - %173 = fmul float %171, %172 - %174 = fmul float %173, 3.000000e+00 - %175 = fadd float %174, -4.000000e+00 - %176 = load float, float* %169, align 4 - %177 = fadd float %176, %175 - %178 = fmul float %173, 2.000000e+00 - %179 = fadd float %178, 1.000000e+00 - %180 = load float, float* %170, align 4 - %181 = fadd float %180, %179 - %182 = load float, float* %120, align 4 - %183 = load float, float* %61, align 4 - %184 = fmul float %182, %183 - %185 = fmul float %184, 3.000000e+00 - %186 = fadd float %185, -4.000000e+00 - %187 = load float, float* %169, align 4 - %188 = fadd float %187, %186 - %189 = fmul float %184, 2.000000e+00 - %190 = fadd float %189, 1.000000e+00 - %191 = load float, float* %170, align 4 - %192 = fadd float %191, %190 - %193 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 2 - %194 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 2 - %195 = load float, float* %120, align 4 - %196 = load float, float* %87, align 4 - %197 = fmul float %195, %196 - %198 = fmul float %197, 3.000000e+00 - %199 = fadd float %198, -4.000000e+00 - %200 = load float, float* %193, align 4 - %201 = fadd float %200, %199 - %202 = fmul float %197, 2.000000e+00 - %203 = fadd float %202, 1.000000e+00 - %204 = load float, float* %194, align 4 - %205 = fadd float %204, %203 - %206 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 0 - %207 = load float, float* %206, align 4 - %208 = insertelement <4 x float> zeroinitializer, float %207, i32 0 - %209 = insertelement <4 x float> %208, float 0.000000e+00, i32 1 - %210 = insertelement <4 x float> %209, float 0.000000e+00, i32 2 - %211 = insertelement <4 x float> %210, float 0.000000e+00, i32 3 - %212 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 - %213 = load float, float* %212, align 4 - %214 = insertelement <4 x float> zeroinitializer, float %213, i32 0 - %215 = insertelement <4 x float> %214, float 1.000000e+00, i32 1 - %216 = insertelement <4 x float> %215, float 1.000000e+00, i32 2 - %217 = insertelement <4 x float> %216, float 1.000000e+00, i32 3 - %218 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 - %219 = load float, float* %218, align 4 - %220 = insertelement <4 x float> zeroinitializer, float %219, i32 0 - %221 = insertelement <4 x float> %220, float 1.000000e+00, i32 1 - %222 = insertelement <4 x float> %221, float 1.000000e+00, i32 2 - %223 = insertelement <4 x float> %222, float 1.000000e+00, i32 3 - %224 = fmul <4 x float> %217, %223 - %225 = fmul <4 x float> %224, - %226 = fadd <4 x float> %225, - %227 = fadd <4 x float> %211, %226 - %228 = extractelement <4 x float> %227, i32 0 - %229 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 0 - store float %228, float* %229, align 4 - %230 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 0 - %231 = load float, float* %230, align 4 - %232 = insertelement <4 x float> zeroinitializer, float %231, i32 0 - %233 = insertelement <4 x float> %232, float 0.000000e+00, i32 1 - %234 = insertelement <4 x float> %233, float 0.000000e+00, i32 2 - %235 = insertelement <4 x float> %234, float 0.000000e+00, i32 3 - %236 = insertelement <4 x float> zeroinitializer, float %213, i32 0 - %237 = insertelement <4 x float> %236, float 1.000000e+00, i32 1 - %238 = insertelement <4 x float> %237, float 1.000000e+00, i32 2 - %239 = insertelement <4 x float> %238, float 1.000000e+00, i32 3 - %240 = insertelement <4 x float> zeroinitializer, float %219, i32 0 - %241 = insertelement <4 x float> %240, float 1.000000e+00, i32 1 - %242 = insertelement <4 x float> %241, float 1.000000e+00, i32 2 - %243 = insertelement <4 x float> %242, float 1.000000e+00, i32 3 - %244 = fmul <4 x float> %239, %243 - %245 = fmul <4 x float> %244, - %246 = fadd <4 x float> %245, - %247 = fadd <4 x float> %235, %246 - %248 = extractelement <4 x float> %247, i32 0 - %249 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 0 - store float %248, float* %249, align 4 - %250 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 - %251 = load float, float* %250, align 4 - %252 = insertelement <4 x float> zeroinitializer, float %251, i32 0 - %253 = insertelement <4 x float> %252, float 0.000000e+00, i32 1 - %254 = insertelement <4 x float> %253, float 0.000000e+00, i32 2 - %255 = insertelement <4 x float> %254, float 0.000000e+00, i32 3 - %256 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 - %257 = load float, float* %256, align 4 - %258 = insertelement <4 x float> zeroinitializer, float %257, i32 0 - %259 = insertelement <4 x float> %258, float 1.000000e+00, i32 1 - %260 = insertelement <4 x float> %259, float 1.000000e+00, i32 2 - %261 = insertelement <4 x float> %260, float 1.000000e+00, i32 3 - %262 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 - %263 = load float, float* %262, align 4 - %264 = insertelement <4 x float> zeroinitializer, float %263, i32 0 - %265 = insertelement <4 x float> %264, float 1.000000e+00, i32 1 - %266 = insertelement <4 x float> %265, float 1.000000e+00, i32 2 - %267 = insertelement <4 x float> %266, float 1.000000e+00, i32 3 - %268 = fmul <4 x float> %261, %267 - %269 = fmul <4 x float> %268, - %270 = fadd <4 x float> %269, - %271 = fadd <4 x float> %255, %270 - %272 = extractelement <4 x float> %271, i32 0 - %273 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 - store float %272, float* %273, align 4 - %274 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 - %275 = load float, float* %274, align 4 - %276 = insertelement <4 x float> zeroinitializer, float %275, i32 0 - %277 = insertelement <4 x float> %276, float 0.000000e+00, i32 1 - %278 = insertelement <4 x float> %277, float 0.000000e+00, i32 2 - %279 = insertelement <4 x float> %278, float 0.000000e+00, i32 3 - %280 = insertelement <4 x float> zeroinitializer, float %257, i32 0 - %281 = insertelement <4 x float> %280, float 1.000000e+00, i32 1 - %282 = insertelement <4 x float> %281, float 1.000000e+00, i32 2 - %283 = insertelement <4 x float> %282, float 1.000000e+00, i32 3 - %284 = insertelement <4 x float> zeroinitializer, float %263, i32 0 - %285 = insertelement <4 x float> %284, float 1.000000e+00, i32 1 - %286 = insertelement <4 x float> %285, float 1.000000e+00, i32 2 - %287 = insertelement <4 x float> %286, float 1.000000e+00, i32 3 - %288 = fmul <4 x float> %283, %287 - %289 = fmul <4 x float> %288, - %290 = fadd <4 x float> %289, - %291 = fadd <4 x float> %279, %290 - %292 = extractelement <4 x float> %291, i32 0 - %293 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 - store float %292, float* %293, align 4 - %294 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 - %295 = load float, float* %294, align 4 - %296 = insertelement <4 x float> zeroinitializer, float %295, i32 0 - %297 = insertelement <4 x float> %296, float 0.000000e+00, i32 1 - %298 = insertelement <4 x float> %297, float 0.000000e+00, i32 2 - %299 = insertelement <4 x float> %298, float 0.000000e+00, i32 3 - %300 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 - %301 = load float, float* %300, align 4 - %302 = insertelement <4 x float> zeroinitializer, float %301, i32 0 - %303 = insertelement <4 x float> %302, float 1.000000e+00, i32 1 - %304 = insertelement <4 x float> %303, float 1.000000e+00, i32 2 - %305 = insertelement <4 x float> %304, float 1.000000e+00, i32 3 - %306 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 - %307 = load float, float* %306, align 4 - %308 = insertelement <4 x float> zeroinitializer, float %307, i32 0 - %309 = insertelement <4 x float> %308, float 1.000000e+00, i32 1 - %310 = insertelement <4 x float> %309, float 1.000000e+00, i32 2 - %311 = insertelement <4 x float> %310, float 1.000000e+00, i32 3 - %312 = fmul <4 x float> %305, %311 - %313 = fmul <4 x float> %312, - %314 = fadd <4 x float> %313, - %315 = fadd <4 x float> %299, %314 - %316 = extractelement <4 x float> %315, i32 0 - %317 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 - store float %316, float* %317, align 4 - %318 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 - %319 = load float, float* %318, align 4 - %320 = insertelement <4 x float> zeroinitializer, float %319, i32 0 - %321 = insertelement <4 x float> %320, float 0.000000e+00, i32 1 - %322 = insertelement <4 x float> %321, float 0.000000e+00, i32 2 - %323 = insertelement <4 x float> %322, float 0.000000e+00, i32 3 - %324 = insertelement <4 x float> zeroinitializer, float %301, i32 0 - %325 = insertelement <4 x float> %324, float 1.000000e+00, i32 1 - %326 = insertelement <4 x float> %325, float 1.000000e+00, i32 2 - %327 = insertelement <4 x float> %326, float 1.000000e+00, i32 3 - %328 = insertelement <4 x float> zeroinitializer, float %307, i32 0 - %329 = insertelement <4 x float> %328, float 1.000000e+00, i32 1 - %330 = insertelement <4 x float> %329, float 1.000000e+00, i32 2 - %331 = insertelement <4 x float> %330, float 1.000000e+00, i32 3 - %332 = fmul <4 x float> %327, %331 - %333 = fmul <4 x float> %332, - %334 = fadd <4 x float> %333, - %335 = fadd <4 x float> %323, %334 - %336 = extractelement <4 x float> %335, i32 0 - %337 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 - store float %336, float* %337, align 4 - %338 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 2 - %339 = load float, float* %338, align 4 - %340 = insertelement <4 x float> zeroinitializer, float %339, i32 0 - %341 = insertelement <4 x float> %340, float 0.000000e+00, i32 1 - %342 = insertelement <4 x float> %341, float 0.000000e+00, i32 2 - %343 = insertelement <4 x float> %342, float 0.000000e+00, i32 3 - %344 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 - %345 = load float, float* %344, align 4 - %346 = insertelement <4 x float> zeroinitializer, float %345, i32 0 - %347 = insertelement <4 x float> %346, float 1.000000e+00, i32 1 - %348 = insertelement <4 x float> %347, float 1.000000e+00, i32 2 - %349 = insertelement <4 x float> %348, float 1.000000e+00, i32 3 - %350 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 - %351 = load float, float* %350, align 4 - %352 = insertelement <4 x float> zeroinitializer, float %351, i32 0 - %353 = insertelement <4 x float> %352, float 1.000000e+00, i32 1 - %354 = insertelement <4 x float> %353, float 1.000000e+00, i32 2 - %355 = insertelement <4 x float> %354, float 1.000000e+00, i32 3 - %356 = fmul <4 x float> %349, %355 - %357 = fmul <4 x float> %356, - %358 = fadd <4 x float> %357, - %359 = fadd <4 x float> %343, %358 - %360 = extractelement <4 x float> %359, i32 0 - %361 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 2 - store float %360, float* %361, align 4 - %362 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 2 - %363 = load float, float* %362, align 4 - %364 = insertelement <4 x float> zeroinitializer, float %363, i32 0 - %365 = insertelement <4 x float> %364, float 0.000000e+00, i32 1 - %366 = insertelement <4 x float> %365, float 0.000000e+00, i32 2 - %367 = insertelement <4 x float> %366, float 0.000000e+00, i32 3 - %368 = insertelement <4 x float> zeroinitializer, float %345, i32 0 - %369 = insertelement <4 x float> %368, float 1.000000e+00, i32 1 - %370 = insertelement <4 x float> %369, float 1.000000e+00, i32 2 - %371 = insertelement <4 x float> %370, float 1.000000e+00, i32 3 - %372 = insertelement <4 x float> zeroinitializer, float %351, i32 0 - %373 = insertelement <4 x float> %372, float 1.000000e+00, i32 1 - %374 = insertelement <4 x float> %373, float 1.000000e+00, i32 2 - %375 = insertelement <4 x float> %374, float 1.000000e+00, i32 3 - %376 = fmul <4 x float> %371, %375 - %377 = fmul <4 x float> %376, - %378 = fadd <4 x float> %377, - %379 = fadd <4 x float> %367, %378 - %380 = extractelement <4 x float> %379, i32 0 - %381 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 2 - store float %380, float* %381, align 4 - %382 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 - %383 = load float, float* %382, align 4 - %384 = insertelement <4 x float> zeroinitializer, float %383, i32 0 - %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 1 - %386 = insertelement <4 x float> %385, float 0.000000e+00, i32 2 - %387 = insertelement <4 x float> %386, float 0.000000e+00, i32 3 - %388 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 - %389 = load float, float* %388, align 4 - %390 = insertelement <4 x float> zeroinitializer, float %389, i32 0 - %391 = insertelement <4 x float> %390, float 1.000000e+00, i32 1 - %392 = insertelement <4 x float> %391, float 1.000000e+00, i32 2 - %393 = insertelement <4 x float> %392, float 1.000000e+00, i32 3 - %394 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 - %395 = load float, float* %394, align 4 - %396 = insertelement <4 x float> zeroinitializer, float %395, i32 0 - %397 = insertelement <4 x float> %396, float 1.000000e+00, i32 1 - %398 = insertelement <4 x float> %397, float 1.000000e+00, i32 2 - %399 = insertelement <4 x float> %398, float 1.000000e+00, i32 3 - %400 = fmul <4 x float> %393, %399 - %401 = fmul <4 x float> %400, - %402 = fadd <4 x float> %401, - %403 = fadd <4 x float> %387, %402 - %404 = extractelement <4 x float> %403, i32 0 - %405 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 - store float %404, float* %405, align 4 - %406 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 - %407 = load float, float* %406, align 4 - %408 = insertelement <4 x float> zeroinitializer, float %407, i32 0 - %409 = insertelement <4 x float> %408, float 0.000000e+00, i32 1 - %410 = insertelement <4 x float> %409, float 0.000000e+00, i32 2 - %411 = insertelement <4 x float> %410, float 0.000000e+00, i32 3 - %412 = insertelement <4 x float> zeroinitializer, float %389, i32 0 - %413 = insertelement <4 x float> %412, float 1.000000e+00, i32 1 - %414 = insertelement <4 x float> %413, float 1.000000e+00, i32 2 - %415 = insertelement <4 x float> %414, float 1.000000e+00, i32 3 - %416 = insertelement <4 x float> zeroinitializer, float %395, i32 0 - %417 = insertelement <4 x float> %416, float 1.000000e+00, i32 1 - %418 = insertelement <4 x float> %417, float 1.000000e+00, i32 2 - %419 = insertelement <4 x float> %418, float 1.000000e+00, i32 3 - %420 = fmul <4 x float> %415, %419 - %421 = fmul <4 x float> %420, - %422 = fadd <4 x float> %421, - %423 = fadd <4 x float> %411, %422 - %424 = extractelement <4 x float> %423, i32 0 - %425 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 - store float %424, float* %425, align 4 - %426 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 - %427 = load float, float* %426, align 4 - %428 = insertelement <4 x float> zeroinitializer, float %427, i32 0 - %429 = insertelement <4 x float> %428, float 0.000000e+00, i32 1 - %430 = insertelement <4 x float> %429, float 0.000000e+00, i32 2 - %431 = insertelement <4 x float> %430, float 0.000000e+00, i32 3 - %432 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 - %433 = load float, float* %432, align 4 - %434 = insertelement <4 x float> zeroinitializer, float %433, i32 0 - %435 = insertelement <4 x float> %434, float 1.000000e+00, i32 1 - %436 = insertelement <4 x float> %435, float 1.000000e+00, i32 2 - %437 = insertelement <4 x float> %436, float 1.000000e+00, i32 3 - %438 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 - %439 = load float, float* %438, align 4 - %440 = insertelement <4 x float> zeroinitializer, float %439, i32 0 - %441 = insertelement <4 x float> %440, float 1.000000e+00, i32 1 - %442 = insertelement <4 x float> %441, float 1.000000e+00, i32 2 - %443 = insertelement <4 x float> %442, float 1.000000e+00, i32 3 - %444 = fmul <4 x float> %437, %443 - %445 = fmul <4 x float> %444, - %446 = fadd <4 x float> %445, - %447 = fadd <4 x float> %431, %446 - %448 = extractelement <4 x float> %447, i32 0 - %449 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 - store float %448, float* %449, align 4 - %450 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 - %451 = load float, float* %450, align 4 - %452 = insertelement <4 x float> zeroinitializer, float %451, i32 0 - %453 = insertelement <4 x float> %452, float 0.000000e+00, i32 1 - %454 = insertelement <4 x float> %453, float 0.000000e+00, i32 2 - %455 = insertelement <4 x float> %454, float 0.000000e+00, i32 3 - %456 = insertelement <4 x float> zeroinitializer, float %433, i32 0 - %457 = insertelement <4 x float> %456, float 1.000000e+00, i32 1 - %458 = insertelement <4 x float> %457, float 1.000000e+00, i32 2 - %459 = insertelement <4 x float> %458, float 1.000000e+00, i32 3 - %460 = insertelement <4 x float> zeroinitializer, float %439, i32 0 - %461 = insertelement <4 x float> %460, float 1.000000e+00, i32 1 - %462 = insertelement <4 x float> %461, float 1.000000e+00, i32 2 - %463 = insertelement <4 x float> %462, float 1.000000e+00, i32 3 - %464 = fmul <4 x float> %459, %463 - %465 = fmul <4 x float> %464, - %466 = fadd <4 x float> %465, - %467 = fadd <4 x float> %455, %466 - %468 = extractelement <4 x float> %467, i32 0 - %469 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 - store float %468, float* %469, align 4 - %470 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - %471 = load float, float* %470, align 4 - %472 = insertelement <4 x float> zeroinitializer, float %471, i32 0 - %473 = insertelement <4 x float> %472, float 0.000000e+00, i32 1 - %474 = insertelement <4 x float> %473, float 0.000000e+00, i32 2 - %475 = insertelement <4 x float> %474, float 0.000000e+00, i32 3 - %476 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 - %477 = load float, float* %476, align 4 - %478 = insertelement <4 x float> zeroinitializer, float %477, i32 0 - %479 = insertelement <4 x float> %478, float 1.000000e+00, i32 1 - %480 = insertelement <4 x float> %479, float 1.000000e+00, i32 2 - %481 = insertelement <4 x float> %480, float 1.000000e+00, i32 3 - %482 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 - %483 = load float, float* %482, align 4 - %484 = insertelement <4 x float> zeroinitializer, float %483, i32 0 - %485 = insertelement <4 x float> %484, float 1.000000e+00, i32 1 - %486 = insertelement <4 x float> %485, float 1.000000e+00, i32 2 - %487 = insertelement <4 x float> %486, float 1.000000e+00, i32 3 - %488 = fmul <4 x float> %481, %487 - %489 = fmul <4 x float> %488, - %490 = fadd <4 x float> %489, - %491 = fadd <4 x float> %475, %490 - %492 = extractelement <4 x float> %491, i32 0 - %493 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - store float %492, float* %493, align 4 - %494 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - %495 = load float, float* %494, align 4 - %496 = insertelement <4 x float> zeroinitializer, float %495, i32 0 - %497 = insertelement <4 x float> %496, float 0.000000e+00, i32 1 - %498 = insertelement <4 x float> %497, float 0.000000e+00, i32 2 - %499 = insertelement <4 x float> %498, float 0.000000e+00, i32 3 - %500 = insertelement <4 x float> zeroinitializer, float %477, i32 0 - %501 = insertelement <4 x float> %500, float 1.000000e+00, i32 1 - %502 = insertelement <4 x float> %501, float 1.000000e+00, i32 2 - %503 = insertelement <4 x float> %502, float 1.000000e+00, i32 3 - %504 = insertelement <4 x float> zeroinitializer, float %483, i32 0 - %505 = insertelement <4 x float> %504, float 1.000000e+00, i32 1 - %506 = insertelement <4 x float> %505, float 1.000000e+00, i32 2 - %507 = insertelement <4 x float> %506, float 1.000000e+00, i32 3 - %508 = fmul <4 x float> %503, %507 - %509 = fmul <4 x float> %508, - %510 = fadd <4 x float> %509, - %511 = fadd <4 x float> %499, %510 - %512 = extractelement <4 x float> %511, i32 0 - %513 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - store float %512, float* %513, align 4 - %514 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - %515 = load float, float* %514, align 4 - %516 = insertelement <4 x float> zeroinitializer, float %515, i32 0 - %517 = insertelement <4 x float> %516, float 0.000000e+00, i32 1 - %518 = insertelement <4 x float> %517, float 0.000000e+00, i32 2 - %519 = insertelement <4 x float> %518, float 0.000000e+00, i32 3 - %520 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 - %521 = load float, float* %520, align 4 - %522 = insertelement <4 x float> zeroinitializer, float %521, i32 0 - %523 = insertelement <4 x float> %522, float 1.000000e+00, i32 1 - %524 = insertelement <4 x float> %523, float 1.000000e+00, i32 2 - %525 = insertelement <4 x float> %524, float 1.000000e+00, i32 3 - %526 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 - %527 = load float, float* %526, align 4 - %528 = insertelement <4 x float> zeroinitializer, float %527, i32 0 - %529 = insertelement <4 x float> %528, float 1.000000e+00, i32 1 - %530 = insertelement <4 x float> %529, float 1.000000e+00, i32 2 - %531 = insertelement <4 x float> %530, float 1.000000e+00, i32 3 - %532 = fmul <4 x float> %525, %531 - %533 = fmul <4 x float> %532, - %534 = fadd <4 x float> %533, - %535 = fadd <4 x float> %519, %534 - %536 = extractelement <4 x float> %535, i32 0 - %537 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - store float %536, float* %537, align 4 - %538 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - %539 = load float, float* %538, align 4 - %540 = insertelement <4 x float> zeroinitializer, float %539, i32 0 - %541 = insertelement <4 x float> %540, float 0.000000e+00, i32 1 - %542 = insertelement <4 x float> %541, float 0.000000e+00, i32 2 - %543 = insertelement <4 x float> %542, float 0.000000e+00, i32 3 - %544 = insertelement <4 x float> zeroinitializer, float %521, i32 0 - %545 = insertelement <4 x float> %544, float 1.000000e+00, i32 1 - %546 = insertelement <4 x float> %545, float 1.000000e+00, i32 2 - %547 = insertelement <4 x float> %546, float 1.000000e+00, i32 3 - %548 = insertelement <4 x float> zeroinitializer, float %527, i32 0 - %549 = insertelement <4 x float> %548, float 1.000000e+00, i32 1 - %550 = insertelement <4 x float> %549, float 1.000000e+00, i32 2 - %551 = insertelement <4 x float> %550, float 1.000000e+00, i32 3 - %552 = fmul <4 x float> %547, %551 - %553 = fmul <4 x float> %552, - %554 = fadd <4 x float> %553, - %555 = fadd <4 x float> %543, %554 - %556 = extractelement <4 x float> %555, i32 0 - %557 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - store float %556, float* %557, align 4 - %558 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - %559 = load float, float* %558, align 4 - %560 = insertelement <4 x float> zeroinitializer, float %559, i32 0 - %561 = insertelement <4 x float> %560, float 0.000000e+00, i32 1 - %562 = insertelement <4 x float> %561, float 0.000000e+00, i32 2 - %563 = insertelement <4 x float> %562, float 0.000000e+00, i32 3 - %564 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 - %565 = load float, float* %564, align 4 - %566 = insertelement <4 x float> zeroinitializer, float %565, i32 0 - %567 = insertelement <4 x float> %566, float 1.000000e+00, i32 1 - %568 = insertelement <4 x float> %567, float 1.000000e+00, i32 2 - %569 = insertelement <4 x float> %568, float 1.000000e+00, i32 3 - %570 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 - %571 = load float, float* %570, align 4 - %572 = insertelement <4 x float> zeroinitializer, float %571, i32 0 - %573 = insertelement <4 x float> %572, float 1.000000e+00, i32 1 - %574 = insertelement <4 x float> %573, float 1.000000e+00, i32 2 - %575 = insertelement <4 x float> %574, float 1.000000e+00, i32 3 - %576 = fmul <4 x float> %569, %575 - %577 = fmul <4 x float> %576, - %578 = fadd <4 x float> %577, - %579 = fadd <4 x float> %563, %578 - %580 = extractelement <4 x float> %579, i32 0 - %581 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - store float %580, float* %581, align 4 - %582 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - %583 = load float, float* %582, align 4 - %584 = insertelement <4 x float> zeroinitializer, float %583, i32 0 - %585 = insertelement <4 x float> %584, float 0.000000e+00, i32 1 - %586 = insertelement <4 x float> %585, float 0.000000e+00, i32 2 - %587 = insertelement <4 x float> %586, float 0.000000e+00, i32 3 - %588 = insertelement <4 x float> zeroinitializer, float %565, i32 0 - %589 = insertelement <4 x float> %588, float 1.000000e+00, i32 1 - %590 = insertelement <4 x float> %589, float 1.000000e+00, i32 2 - %591 = insertelement <4 x float> %590, float 1.000000e+00, i32 3 - %592 = insertelement <4 x float> zeroinitializer, float %571, i32 0 - %593 = insertelement <4 x float> %592, float 1.000000e+00, i32 1 - %594 = insertelement <4 x float> %593, float 1.000000e+00, i32 2 - %595 = insertelement <4 x float> %594, float 1.000000e+00, i32 3 - %596 = fmul <4 x float> %591, %595 - %597 = fmul <4 x float> %596, - %598 = fadd <4 x float> %597, - %599 = fadd <4 x float> %587, %598 - %600 = extractelement <4 x float> %599, i32 0 - %601 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - store float %600, float* %601, align 4 - %602 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - %603 = load float, float* %602, align 4 - %604 = insertelement <4 x float> zeroinitializer, float %603, i32 0 - %605 = insertelement <4 x float> %604, float 0.000000e+00, i32 1 - %606 = insertelement <4 x float> %605, float 0.000000e+00, i32 2 - %607 = insertelement <4 x float> %606, float 0.000000e+00, i32 3 - %608 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 - %609 = load float, float* %608, align 4 - %610 = insertelement <4 x float> zeroinitializer, float %609, i32 0 - %611 = insertelement <4 x float> %610, float 1.000000e+00, i32 1 - %612 = insertelement <4 x float> %611, float 1.000000e+00, i32 2 - %613 = insertelement <4 x float> %612, float 1.000000e+00, i32 3 - %614 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 - %615 = load float, float* %614, align 4 - %616 = insertelement <4 x float> zeroinitializer, float %615, i32 0 - %617 = insertelement <4 x float> %616, float 1.000000e+00, i32 1 - %618 = insertelement <4 x float> %617, float 1.000000e+00, i32 2 - %619 = insertelement <4 x float> %618, float 1.000000e+00, i32 3 - %620 = fmul <4 x float> %613, %619 - %621 = fmul <4 x float> %620, - %622 = fadd <4 x float> %621, - %623 = fadd <4 x float> %607, %622 - %624 = extractelement <4 x float> %623, i32 0 - %625 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - store float %624, float* %625, align 4 - %626 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - %627 = load float, float* %626, align 4 - %628 = insertelement <4 x float> zeroinitializer, float %627, i32 0 - %629 = insertelement <4 x float> %628, float 0.000000e+00, i32 1 - %630 = insertelement <4 x float> %629, float 0.000000e+00, i32 2 - %631 = insertelement <4 x float> %630, float 0.000000e+00, i32 3 - %632 = insertelement <4 x float> zeroinitializer, float %609, i32 0 - %633 = insertelement <4 x float> %632, float 1.000000e+00, i32 1 - %634 = insertelement <4 x float> %633, float 1.000000e+00, i32 2 - %635 = insertelement <4 x float> %634, float 1.000000e+00, i32 3 - %636 = insertelement <4 x float> zeroinitializer, float %615, i32 0 - %637 = insertelement <4 x float> %636, float 1.000000e+00, i32 1 - %638 = insertelement <4 x float> %637, float 1.000000e+00, i32 2 - %639 = insertelement <4 x float> %638, float 1.000000e+00, i32 3 - %640 = fmul <4 x float> %635, %639 - %641 = fmul <4 x float> %640, - %642 = fadd <4 x float> %641, - %643 = fadd <4 x float> %631, %642 - %644 = extractelement <4 x float> %643, i32 0 - %645 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - store float %644, float* %645, align 4 - %646 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 - %647 = load float, float* %646, align 4 - %648 = insertelement <4 x float> zeroinitializer, float %647, i32 0 - %649 = insertelement <4 x float> %648, float 0.000000e+00, i32 1 - %650 = insertelement <4 x float> %649, float 0.000000e+00, i32 2 - %651 = insertelement <4 x float> %650, float 0.000000e+00, i32 3 - %652 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 - %653 = load float, float* %652, align 4 - %654 = insertelement <4 x float> zeroinitializer, float %653, i32 0 - %655 = insertelement <4 x float> %654, float 1.000000e+00, i32 1 - %656 = insertelement <4 x float> %655, float 1.000000e+00, i32 2 - %657 = insertelement <4 x float> %656, float 1.000000e+00, i32 3 - %658 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 - %659 = load float, float* %658, align 4 - %660 = insertelement <4 x float> zeroinitializer, float %659, i32 0 - %661 = insertelement <4 x float> %660, float 1.000000e+00, i32 1 - %662 = insertelement <4 x float> %661, float 1.000000e+00, i32 2 - %663 = insertelement <4 x float> %662, float 1.000000e+00, i32 3 - %664 = fmul <4 x float> %657, %663 - %665 = fmul <4 x float> %664, - %666 = fadd <4 x float> %665, - %667 = fadd <4 x float> %651, %666 - %668 = extractelement <4 x float> %667, i32 0 - %669 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 - store float %668, float* %669, align 4 - %670 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 - %671 = load float, float* %670, align 4 - %672 = insertelement <4 x float> zeroinitializer, float %671, i32 0 - %673 = insertelement <4 x float> %672, float 0.000000e+00, i32 1 - %674 = insertelement <4 x float> %673, float 0.000000e+00, i32 2 - %675 = insertelement <4 x float> %674, float 0.000000e+00, i32 3 - %676 = insertelement <4 x float> zeroinitializer, float %653, i32 0 - %677 = insertelement <4 x float> %676, float 1.000000e+00, i32 1 - %678 = insertelement <4 x float> %677, float 1.000000e+00, i32 2 - %679 = insertelement <4 x float> %678, float 1.000000e+00, i32 3 - %680 = insertelement <4 x float> zeroinitializer, float %659, i32 0 - %681 = insertelement <4 x float> %680, float 1.000000e+00, i32 1 - %682 = insertelement <4 x float> %681, float 1.000000e+00, i32 2 - %683 = insertelement <4 x float> %682, float 1.000000e+00, i32 3 - %684 = fmul <4 x float> %679, %683 - %685 = fmul <4 x float> %684, - %686 = fadd <4 x float> %685, - %687 = fadd <4 x float> %675, %686 - %688 = extractelement <4 x float> %687, i32 0 - %689 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 - store float %688, float* %689, align 4 - %690 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 - %691 = load float, float* %690, align 4 - %692 = insertelement <4 x float> zeroinitializer, float %691, i32 0 - %693 = insertelement <4 x float> %692, float 0.000000e+00, i32 1 - %694 = insertelement <4 x float> %693, float 0.000000e+00, i32 2 - %695 = insertelement <4 x float> %694, float 0.000000e+00, i32 3 - %696 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 - %697 = load float, float* %696, align 4 - %698 = insertelement <4 x float> zeroinitializer, float %697, i32 0 - %699 = insertelement <4 x float> %698, float 1.000000e+00, i32 1 - %700 = insertelement <4 x float> %699, float 1.000000e+00, i32 2 - %701 = insertelement <4 x float> %700, float 1.000000e+00, i32 3 - %702 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 - %703 = load float, float* %702, align 4 - %704 = insertelement <4 x float> zeroinitializer, float %703, i32 0 - %705 = insertelement <4 x float> %704, float 1.000000e+00, i32 1 - %706 = insertelement <4 x float> %705, float 1.000000e+00, i32 2 - %707 = insertelement <4 x float> %706, float 1.000000e+00, i32 3 - %708 = fmul <4 x float> %701, %707 - %709 = fmul <4 x float> %708, - %710 = fadd <4 x float> %709, - %711 = fadd <4 x float> %695, %710 - %712 = extractelement <4 x float> %711, i32 0 - %713 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 - store float %712, float* %713, align 4 - %714 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 - %715 = load float, float* %714, align 4 - %716 = insertelement <4 x float> zeroinitializer, float %715, i32 0 - %717 = insertelement <4 x float> %716, float 0.000000e+00, i32 1 - %718 = insertelement <4 x float> %717, float 0.000000e+00, i32 2 - %719 = insertelement <4 x float> %718, float 0.000000e+00, i32 3 - %720 = insertelement <4 x float> zeroinitializer, float %697, i32 0 - %721 = insertelement <4 x float> %720, float 1.000000e+00, i32 1 - %722 = insertelement <4 x float> %721, float 1.000000e+00, i32 2 - %723 = insertelement <4 x float> %722, float 1.000000e+00, i32 3 - %724 = insertelement <4 x float> zeroinitializer, float %703, i32 0 - %725 = insertelement <4 x float> %724, float 1.000000e+00, i32 1 - %726 = insertelement <4 x float> %725, float 1.000000e+00, i32 2 - %727 = insertelement <4 x float> %726, float 1.000000e+00, i32 3 - %728 = fmul <4 x float> %723, %727 - %729 = fmul <4 x float> %728, - %730 = fadd <4 x float> %729, - %731 = fadd <4 x float> %719, %730 - %732 = extractelement <4 x float> %731, i32 0 - %733 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 - store float %732, float* %733, align 4 - %734 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 0 - %735 = load float, float* %734, align 4 - %736 = insertelement <4 x float> zeroinitializer, float %735, i32 0 - %737 = insertelement <4 x float> %736, float 0.000000e+00, i32 1 - %738 = insertelement <4 x float> %737, float 0.000000e+00, i32 2 - %739 = insertelement <4 x float> %738, float 0.000000e+00, i32 3 - %740 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 - %741 = load float, float* %740, align 4 - %742 = insertelement <4 x float> zeroinitializer, float %741, i32 0 - %743 = insertelement <4 x float> %742, float 1.000000e+00, i32 1 - %744 = insertelement <4 x float> %743, float 1.000000e+00, i32 2 - %745 = insertelement <4 x float> %744, float 1.000000e+00, i32 3 - %746 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 - %747 = load float, float* %746, align 4 - %748 = insertelement <4 x float> zeroinitializer, float %747, i32 0 - %749 = insertelement <4 x float> %748, float 1.000000e+00, i32 1 - %750 = insertelement <4 x float> %749, float 1.000000e+00, i32 2 - %751 = insertelement <4 x float> %750, float 1.000000e+00, i32 3 - %752 = fmul <4 x float> %745, %751 - %753 = fmul <4 x float> %752, - %754 = fadd <4 x float> %753, - %755 = fadd <4 x float> %739, %754 - %756 = extractelement <4 x float> %755, i32 0 - %757 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 0 - store float %756, float* %757, align 4 - %758 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 0 - %759 = load float, float* %758, align 4 - %760 = insertelement <4 x float> zeroinitializer, float %759, i32 0 - %761 = insertelement <4 x float> %760, float 0.000000e+00, i32 1 - %762 = insertelement <4 x float> %761, float 0.000000e+00, i32 2 - %763 = insertelement <4 x float> %762, float 0.000000e+00, i32 3 - %764 = insertelement <4 x float> zeroinitializer, float %741, i32 0 - %765 = insertelement <4 x float> %764, float 1.000000e+00, i32 1 - %766 = insertelement <4 x float> %765, float 1.000000e+00, i32 2 - %767 = insertelement <4 x float> %766, float 1.000000e+00, i32 3 - %768 = insertelement <4 x float> zeroinitializer, float %747, i32 0 - %769 = insertelement <4 x float> %768, float 1.000000e+00, i32 1 - %770 = insertelement <4 x float> %769, float 1.000000e+00, i32 2 - %771 = insertelement <4 x float> %770, float 1.000000e+00, i32 3 - %772 = fmul <4 x float> %767, %771 - %773 = fmul <4 x float> %772, - %774 = fadd <4 x float> %773, - %775 = fadd <4 x float> %763, %774 - %776 = extractelement <4 x float> %775, i32 0 - %777 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 0 - store float %776, float* %777, align 4 - %778 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 - %779 = load float, float* %778, align 4 - %780 = insertelement <4 x float> zeroinitializer, float %779, i32 0 - %781 = insertelement <4 x float> %780, float 0.000000e+00, i32 1 - %782 = insertelement <4 x float> %781, float 0.000000e+00, i32 2 - %783 = insertelement <4 x float> %782, float 0.000000e+00, i32 3 - %784 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 - %785 = load float, float* %784, align 4 - %786 = insertelement <4 x float> zeroinitializer, float %785, i32 0 - %787 = insertelement <4 x float> %786, float 1.000000e+00, i32 1 - %788 = insertelement <4 x float> %787, float 1.000000e+00, i32 2 - %789 = insertelement <4 x float> %788, float 1.000000e+00, i32 3 - %790 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 - %791 = load float, float* %790, align 4 - %792 = insertelement <4 x float> zeroinitializer, float %791, i32 0 - %793 = insertelement <4 x float> %792, float 1.000000e+00, i32 1 - %794 = insertelement <4 x float> %793, float 1.000000e+00, i32 2 - %795 = insertelement <4 x float> %794, float 1.000000e+00, i32 3 - %796 = fmul <4 x float> %789, %795 - %797 = fmul <4 x float> %796, - %798 = fadd <4 x float> %797, - %799 = fadd <4 x float> %783, %798 - %800 = extractelement <4 x float> %799, i32 0 - %801 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 - store float %800, float* %801, align 4 - %802 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 - %803 = load float, float* %802, align 4 - %804 = insertelement <4 x float> zeroinitializer, float %803, i32 0 - %805 = insertelement <4 x float> %804, float 0.000000e+00, i32 1 - %806 = insertelement <4 x float> %805, float 0.000000e+00, i32 2 - %807 = insertelement <4 x float> %806, float 0.000000e+00, i32 3 - %808 = insertelement <4 x float> zeroinitializer, float %785, i32 0 - %809 = insertelement <4 x float> %808, float 1.000000e+00, i32 1 - %810 = insertelement <4 x float> %809, float 1.000000e+00, i32 2 - %811 = insertelement <4 x float> %810, float 1.000000e+00, i32 3 - %812 = insertelement <4 x float> zeroinitializer, float %791, i32 0 - %813 = insertelement <4 x float> %812, float 1.000000e+00, i32 1 - %814 = insertelement <4 x float> %813, float 1.000000e+00, i32 2 - %815 = insertelement <4 x float> %814, float 1.000000e+00, i32 3 - %816 = fmul <4 x float> %811, %815 - %817 = fmul <4 x float> %816, - %818 = fadd <4 x float> %817, - %819 = fadd <4 x float> %807, %818 - %820 = extractelement <4 x float> %819, i32 0 - %821 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 - store float %820, float* %821, align 4 - %822 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 - %823 = load float, float* %822, align 4 - %824 = insertelement <4 x float> zeroinitializer, float %823, i32 0 - %825 = insertelement <4 x float> %824, float 0.000000e+00, i32 1 - %826 = insertelement <4 x float> %825, float 0.000000e+00, i32 2 - %827 = insertelement <4 x float> %826, float 0.000000e+00, i32 3 - %828 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 - %829 = load float, float* %828, align 4 - %830 = insertelement <4 x float> zeroinitializer, float %829, i32 0 - %831 = insertelement <4 x float> %830, float 1.000000e+00, i32 1 - %832 = insertelement <4 x float> %831, float 1.000000e+00, i32 2 - %833 = insertelement <4 x float> %832, float 1.000000e+00, i32 3 - %834 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 - %835 = load float, float* %834, align 4 - %836 = insertelement <4 x float> zeroinitializer, float %835, i32 0 - %837 = insertelement <4 x float> %836, float 1.000000e+00, i32 1 - %838 = insertelement <4 x float> %837, float 1.000000e+00, i32 2 - %839 = insertelement <4 x float> %838, float 1.000000e+00, i32 3 - %840 = fmul <4 x float> %833, %839 - %841 = fmul <4 x float> %840, - %842 = fadd <4 x float> %841, - %843 = fadd <4 x float> %827, %842 - %844 = extractelement <4 x float> %843, i32 0 - %845 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 - store float %844, float* %845, align 4 - %846 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 - %847 = load float, float* %846, align 4 - %848 = insertelement <4 x float> zeroinitializer, float %847, i32 0 - %849 = insertelement <4 x float> %848, float 0.000000e+00, i32 1 - %850 = insertelement <4 x float> %849, float 0.000000e+00, i32 2 - %851 = insertelement <4 x float> %850, float 0.000000e+00, i32 3 - %852 = insertelement <4 x float> zeroinitializer, float %829, i32 0 - %853 = insertelement <4 x float> %852, float 1.000000e+00, i32 1 - %854 = insertelement <4 x float> %853, float 1.000000e+00, i32 2 - %855 = insertelement <4 x float> %854, float 1.000000e+00, i32 3 - %856 = insertelement <4 x float> zeroinitializer, float %835, i32 0 - %857 = insertelement <4 x float> %856, float 1.000000e+00, i32 1 - %858 = insertelement <4 x float> %857, float 1.000000e+00, i32 2 - %859 = insertelement <4 x float> %858, float 1.000000e+00, i32 3 - %860 = fmul <4 x float> %855, %859 - %861 = fmul <4 x float> %860, - %862 = fadd <4 x float> %861, - %863 = fadd <4 x float> %851, %862 - %864 = extractelement <4 x float> %863, i32 0 - %865 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 - store float %864, float* %865, align 4 - %866 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 2 - %867 = load float, float* %866, align 4 - %868 = insertelement <4 x float> zeroinitializer, float %867, i32 0 - %869 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 2 - %870 = load float, float* %869, align 4 - %871 = insertelement <4 x float> %868, float %870, i32 1 - %872 = insertelement <4 x float> %871, float 0.000000e+00, i32 2 - %873 = insertelement <4 x float> %872, float 0.000000e+00, i32 3 - %874 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 - %875 = load float, float* %874, align 4 - %876 = insertelement <4 x float> zeroinitializer, float %875, i32 0 - %877 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 - %878 = load float, float* %877, align 4 - %879 = insertelement <4 x float> %876, float %878, i32 1 - %880 = insertelement <4 x float> %879, float 1.000000e+00, i32 2 - %881 = insertelement <4 x float> %880, float 1.000000e+00, i32 3 - %882 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 - %883 = load float, float* %882, align 4 - %884 = insertelement <4 x float> zeroinitializer, float %883, i32 0 - %885 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 - %886 = load float, float* %885, align 4 - %887 = insertelement <4 x float> %884, float %886, i32 1 - %888 = insertelement <4 x float> %887, float 1.000000e+00, i32 2 - %889 = insertelement <4 x float> %888, float 1.000000e+00, i32 3 - %890 = fmul <4 x float> %881, %889 - %891 = fmul <4 x float> %890, - %892 = fadd <4 x float> %891, - %893 = fadd <4 x float> %873, %892 - %894 = extractelement <4 x float> %893, i32 0 - %895 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 2 - store float %894, float* %895, align 4 - %896 = extractelement <4 x float> %893, i32 1 - %897 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 2 - store float %896, float* %897, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #0 { -.preheader: - %0 = alloca [2 x [2 x float]], align 16 - %1 = alloca [2 x [2 x float]], align 16 - %2 = alloca [3 x [3 x float]], align 16 - %3 = alloca [3 x [3 x float]], align 16 - %4 = bitcast [2 x [2 x float]]* %0 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %4, i8* nonnull align 16 dereferenceable(16) bitcast ([2 x [2 x float]]* @__const.main.mat_in to i8*), i64 16, i1 false) - %5 = bitcast [2 x [2 x float]]* %1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %5, i8* nonnull align 16 dereferenceable(16) bitcast ([2 x [2 x float]]* @__const.main.f_in to i8*), i64 16, i1 false) - %6 = bitcast [3 x [3 x float]]* %2 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %6, i8 0, i64 36, i1 false) - %7 = bitcast [3 x [3 x float]]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %7, i8 0, i64 36, i1 false) - %8 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %0, i64 0, i64 0 - %9 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %1, i64 0, i64 0 - %10 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0 - %11 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0 - call void @load_use_twice([2 x float]* nonnull %8, [2 x float]* nonnull %9, [3 x float]* nonnull %10, [3 x float]* nonnull %11) - %12 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 0 - %13 = load float, float* %12, align 16 - %14 = fpext float %13 to double - %15 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %14) #4 - %16 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 0 - %17 = load float, float* %16, align 16 - %18 = fpext float %17 to double - %19 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %18) #4 - %20 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 1 - %21 = load float, float* %20, align 4 - %22 = fpext float %21 to double - %23 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %22) #4 - %24 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 1 - %25 = load float, float* %24, align 4 - %26 = fpext float %25 to double - %27 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %26) #4 - %28 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 2 - %29 = load float, float* %28, align 8 - %30 = fpext float %29 to double - %31 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %30) #4 - %32 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 2 - %33 = load float, float* %32, align 8 - %34 = fpext float %33 to double - %35 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %34) #4 - %36 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 0 - %37 = load float, float* %36, align 4 - %38 = fpext float %37 to double - %39 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %38) #4 - %40 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 0 - %41 = load float, float* %40, align 4 - %42 = fpext float %41 to double - %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %42) #4 - %44 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 1 - %45 = load float, float* %44, align 4 - %46 = fpext float %45 to double - %47 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %46) #4 - %48 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 1 - %49 = load float, float* %48, align 4 - %50 = fpext float %49 to double - %51 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %50) #4 - %52 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 2 - %53 = load float, float* %52, align 4 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %54) #4 - %56 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 2 - %57 = load float, float* %56, align 4 - %58 = fpext float %57 to double - %59 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %58) #4 - %60 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 0 - %61 = load float, float* %60, align 8 - %62 = fpext float %61 to double - %63 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %62) #4 - %64 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 0 - %65 = load float, float* %64, align 8 - %66 = fpext float %65 to double - %67 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %66) #4 - %68 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 1 - %69 = load float, float* %68, align 4 - %70 = fpext float %69 to double - %71 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %70) #4 - %72 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 1 - %73 = load float, float* %72, align 4 - %74 = fpext float %73 to double - %75 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %74) #4 - %76 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 2 - %77 = load float, float* %76, align 8 - %78 = fpext float %77 to double - %79 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %78) #4 - %80 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 2 - %81 = load float, float* %80, align 8 - %82 = fpext float %81 to double - %83 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %82) #4 - ret i32 0 -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 - -declare i32 @printf(i8*, ...) #3 - -attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { argmemonly nounwind willreturn writeonly } -attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/opt.ll b/src/dios-egraphs/Diospyros/opt.ll deleted file mode 100644 index 745e1062..00000000 --- a/src/dios-egraphs/Diospyros/opt.ll +++ /dev/null @@ -1,364 +0,0 @@ -; ModuleID = 'clang.ll' -source_filename = "llvm-tests/load_reuse.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@__const.main.mat_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 2.000000e+00], [2 x float] [float 3.000000e+00, float 4.000000e+00]], align 16 -@__const.main.f_in = private unnamed_addr constant [2 x [2 x float]] [[2 x float] [float 1.000000e+00, float 1.000000e+00], [2 x float] [float 1.000000e+00, float 1.000000e+00]], align 16 -@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 - -; Function Attrs: noinline nounwind ssp uwtable -define void @load_use_twice([2 x float]* %0, [2 x float]* %1, [3 x float]* %2, [3 x float]* %3) #0 { -.preheader7: - %4 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 0 - %5 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 0 - %6 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 0 - %7 = load float, float* %6, align 4 - %8 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 0 - %9 = load float, float* %8, align 4 - %10 = fmul float %7, %9 - %11 = fmul float %10, 3.000000e+00 - %12 = fadd float %11, -4.000000e+00 - %13 = load float, float* %4, align 4 - %14 = fadd float %13, %12 - store float %14, float* %4, align 4 - %15 = fmul float %10, 2.000000e+00 - %16 = fadd float %15, 1.000000e+00 - %17 = load float, float* %5, align 4 - %18 = fadd float %17, %16 - store float %18, float* %5, align 4 - %19 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 1 - %20 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 1 - %21 = load float, float* %6, align 4 - %22 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 0, i64 1 - %23 = load float, float* %22, align 4 - %24 = fmul float %21, %23 - %25 = fmul float %24, 3.000000e+00 - %26 = fadd float %25, -4.000000e+00 - %27 = load float, float* %19, align 4 - %28 = fadd float %27, %26 - store float %28, float* %19, align 4 - %29 = fmul float %24, 2.000000e+00 - %30 = fadd float %29, 1.000000e+00 - %31 = load float, float* %20, align 4 - %32 = fadd float %31, %30 - store float %32, float* %20, align 4 - %33 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 0, i64 1 - %34 = load float, float* %33, align 4 - %35 = load float, float* %8, align 4 - %36 = fmul float %34, %35 - %37 = fmul float %36, 3.000000e+00 - %38 = fadd float %37, -4.000000e+00 - %39 = load float, float* %19, align 4 - %40 = fadd float %39, %38 - store float %40, float* %19, align 4 - %41 = fmul float %36, 2.000000e+00 - %42 = fadd float %41, 1.000000e+00 - %43 = load float, float* %20, align 4 - %44 = fadd float %43, %42 - store float %44, float* %20, align 4 - %45 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 0, i64 2 - %46 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 0, i64 2 - %47 = load float, float* %33, align 4 - %48 = load float, float* %22, align 4 - %49 = fmul float %47, %48 - %50 = fmul float %49, 3.000000e+00 - %51 = fadd float %50, -4.000000e+00 - %52 = load float, float* %45, align 4 - %53 = fadd float %52, %51 - store float %53, float* %45, align 4 - %54 = fmul float %49, 2.000000e+00 - %55 = fadd float %54, 1.000000e+00 - %56 = load float, float* %46, align 4 - %57 = fadd float %56, %55 - store float %57, float* %46, align 4 - %58 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 0 - %59 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 0 - %60 = load float, float* %6, align 4 - %61 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 0 - %62 = load float, float* %61, align 4 - %63 = fmul float %60, %62 - %64 = fmul float %63, 3.000000e+00 - %65 = fadd float %64, -4.000000e+00 - %66 = load float, float* %58, align 4 - %67 = fadd float %66, %65 - store float %67, float* %58, align 4 - %68 = fmul float %63, 2.000000e+00 - %69 = fadd float %68, 1.000000e+00 - %70 = load float, float* %59, align 4 - %71 = fadd float %70, %69 - store float %71, float* %59, align 4 - %72 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 0 - %73 = load float, float* %72, align 4 - %74 = load float, float* %8, align 4 - %75 = fmul float %73, %74 - %76 = fmul float %75, 3.000000e+00 - %77 = fadd float %76, -4.000000e+00 - %78 = load float, float* %58, align 4 - %79 = fadd float %78, %77 - store float %79, float* %58, align 4 - %80 = fmul float %75, 2.000000e+00 - %81 = fadd float %80, 1.000000e+00 - %82 = load float, float* %59, align 4 - %83 = fadd float %82, %81 - store float %83, float* %59, align 4 - %84 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 1 - %85 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 1 - %86 = load float, float* %6, align 4 - %87 = getelementptr inbounds [2 x float], [2 x float]* %1, i64 1, i64 1 - %88 = load float, float* %87, align 4 - %89 = fmul float %86, %88 - %90 = fmul float %89, 3.000000e+00 - %91 = fadd float %90, -4.000000e+00 - %92 = load float, float* %84, align 4 - %93 = fadd float %92, %91 - store float %93, float* %84, align 4 - %94 = fmul float %89, 2.000000e+00 - %95 = fadd float %94, 1.000000e+00 - %96 = load float, float* %85, align 4 - %97 = fadd float %96, %95 - store float %97, float* %85, align 4 - %98 = load float, float* %33, align 4 - %99 = load float, float* %61, align 4 - %100 = fmul float %98, %99 - %101 = fmul float %100, 3.000000e+00 - %102 = fadd float %101, -4.000000e+00 - %103 = load float, float* %84, align 4 - %104 = fadd float %103, %102 - store float %104, float* %84, align 4 - %105 = fmul float %100, 2.000000e+00 - %106 = fadd float %105, 1.000000e+00 - %107 = load float, float* %85, align 4 - %108 = fadd float %107, %106 - store float %108, float* %85, align 4 - %109 = load float, float* %72, align 4 - %110 = load float, float* %22, align 4 - %111 = fmul float %109, %110 - %112 = fmul float %111, 3.000000e+00 - %113 = fadd float %112, -4.000000e+00 - %114 = load float, float* %84, align 4 - %115 = fadd float %114, %113 - store float %115, float* %84, align 4 - %116 = fmul float %111, 2.000000e+00 - %117 = fadd float %116, 1.000000e+00 - %118 = load float, float* %85, align 4 - %119 = fadd float %118, %117 - store float %119, float* %85, align 4 - %120 = getelementptr inbounds [2 x float], [2 x float]* %0, i64 1, i64 1 - %121 = load float, float* %120, align 4 - %122 = load float, float* %8, align 4 - %123 = fmul float %121, %122 - %124 = fmul float %123, 3.000000e+00 - %125 = fadd float %124, -4.000000e+00 - %126 = load float, float* %84, align 4 - %127 = fadd float %126, %125 - store float %127, float* %84, align 4 - %128 = fmul float %123, 2.000000e+00 - %129 = fadd float %128, 1.000000e+00 - %130 = load float, float* %85, align 4 - %131 = fadd float %130, %129 - store float %131, float* %85, align 4 - %132 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 1, i64 2 - %133 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 1, i64 2 - %134 = load float, float* %33, align 4 - %135 = load float, float* %87, align 4 - %136 = fmul float %134, %135 - %137 = fmul float %136, 3.000000e+00 - %138 = fadd float %137, -4.000000e+00 - %139 = load float, float* %132, align 4 - %140 = fadd float %139, %138 - store float %140, float* %132, align 4 - %141 = fmul float %136, 2.000000e+00 - %142 = fadd float %141, 1.000000e+00 - %143 = load float, float* %133, align 4 - %144 = fadd float %143, %142 - store float %144, float* %133, align 4 - %145 = load float, float* %120, align 4 - %146 = load float, float* %22, align 4 - %147 = fmul float %145, %146 - %148 = fmul float %147, 3.000000e+00 - %149 = fadd float %148, -4.000000e+00 - %150 = load float, float* %132, align 4 - %151 = fadd float %150, %149 - store float %151, float* %132, align 4 - %152 = fmul float %147, 2.000000e+00 - %153 = fadd float %152, 1.000000e+00 - %154 = load float, float* %133, align 4 - %155 = fadd float %154, %153 - store float %155, float* %133, align 4 - %156 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 0 - %157 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 0 - %158 = load float, float* %72, align 4 - %159 = load float, float* %61, align 4 - %160 = fmul float %158, %159 - %161 = fmul float %160, 3.000000e+00 - %162 = fadd float %161, -4.000000e+00 - %163 = load float, float* %156, align 4 - %164 = fadd float %163, %162 - store float %164, float* %156, align 4 - %165 = fmul float %160, 2.000000e+00 - %166 = fadd float %165, 1.000000e+00 - %167 = load float, float* %157, align 4 - %168 = fadd float %167, %166 - store float %168, float* %157, align 4 - %169 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 1 - %170 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 1 - %171 = load float, float* %72, align 4 - %172 = load float, float* %87, align 4 - %173 = fmul float %171, %172 - %174 = fmul float %173, 3.000000e+00 - %175 = fadd float %174, -4.000000e+00 - %176 = load float, float* %169, align 4 - %177 = fadd float %176, %175 - store float %177, float* %169, align 4 - %178 = fmul float %173, 2.000000e+00 - %179 = fadd float %178, 1.000000e+00 - %180 = load float, float* %170, align 4 - %181 = fadd float %180, %179 - store float %181, float* %170, align 4 - %182 = load float, float* %120, align 4 - %183 = load float, float* %61, align 4 - %184 = fmul float %182, %183 - %185 = fmul float %184, 3.000000e+00 - %186 = fadd float %185, -4.000000e+00 - %187 = load float, float* %169, align 4 - %188 = fadd float %187, %186 - store float %188, float* %169, align 4 - %189 = fmul float %184, 2.000000e+00 - %190 = fadd float %189, 1.000000e+00 - %191 = load float, float* %170, align 4 - %192 = fadd float %191, %190 - store float %192, float* %170, align 4 - %193 = getelementptr inbounds [3 x float], [3 x float]* %2, i64 2, i64 2 - %194 = getelementptr inbounds [3 x float], [3 x float]* %3, i64 2, i64 2 - %195 = load float, float* %120, align 4 - %196 = load float, float* %87, align 4 - %197 = fmul float %195, %196 - %198 = fmul float %197, 3.000000e+00 - %199 = fadd float %198, -4.000000e+00 - %200 = load float, float* %193, align 4 - %201 = fadd float %200, %199 - store float %201, float* %193, align 4 - %202 = fmul float %197, 2.000000e+00 - %203 = fadd float %202, 1.000000e+00 - %204 = load float, float* %194, align 4 - %205 = fadd float %204, %203 - store float %205, float* %194, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #0 { -.preheader: - %0 = alloca [2 x [2 x float]], align 16 - %1 = alloca [2 x [2 x float]], align 16 - %2 = alloca [3 x [3 x float]], align 16 - %3 = alloca [3 x [3 x float]], align 16 - %4 = bitcast [2 x [2 x float]]* %0 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %4, i8* nonnull align 16 dereferenceable(16) bitcast ([2 x [2 x float]]* @__const.main.mat_in to i8*), i64 16, i1 false) - %5 = bitcast [2 x [2 x float]]* %1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %5, i8* nonnull align 16 dereferenceable(16) bitcast ([2 x [2 x float]]* @__const.main.f_in to i8*), i64 16, i1 false) - %6 = bitcast [3 x [3 x float]]* %2 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %6, i8 0, i64 36, i1 false) - %7 = bitcast [3 x [3 x float]]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %7, i8 0, i64 36, i1 false) - %8 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %0, i64 0, i64 0 - %9 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %1, i64 0, i64 0 - %10 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0 - %11 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0 - call void @load_use_twice([2 x float]* nonnull %8, [2 x float]* nonnull %9, [3 x float]* nonnull %10, [3 x float]* nonnull %11) - %12 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 0 - %13 = load float, float* %12, align 16 - %14 = fpext float %13 to double - %15 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %14) #4 - %16 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 0 - %17 = load float, float* %16, align 16 - %18 = fpext float %17 to double - %19 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %18) #4 - %20 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 1 - %21 = load float, float* %20, align 4 - %22 = fpext float %21 to double - %23 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %22) #4 - %24 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 1 - %25 = load float, float* %24, align 4 - %26 = fpext float %25 to double - %27 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %26) #4 - %28 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 0, i64 2 - %29 = load float, float* %28, align 8 - %30 = fpext float %29 to double - %31 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %30) #4 - %32 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 0, i64 2 - %33 = load float, float* %32, align 8 - %34 = fpext float %33 to double - %35 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %34) #4 - %36 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 0 - %37 = load float, float* %36, align 4 - %38 = fpext float %37 to double - %39 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %38) #4 - %40 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 0 - %41 = load float, float* %40, align 4 - %42 = fpext float %41 to double - %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %42) #4 - %44 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 1 - %45 = load float, float* %44, align 4 - %46 = fpext float %45 to double - %47 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %46) #4 - %48 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 1 - %49 = load float, float* %48, align 4 - %50 = fpext float %49 to double - %51 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %50) #4 - %52 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 1, i64 2 - %53 = load float, float* %52, align 4 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %54) #4 - %56 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 1, i64 2 - %57 = load float, float* %56, align 4 - %58 = fpext float %57 to double - %59 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %58) #4 - %60 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 0 - %61 = load float, float* %60, align 8 - %62 = fpext float %61 to double - %63 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %62) #4 - %64 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 0 - %65 = load float, float* %64, align 8 - %66 = fpext float %65 to double - %67 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %66) #4 - %68 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 1 - %69 = load float, float* %68, align 4 - %70 = fpext float %69 to double - %71 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %70) #4 - %72 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 1 - %73 = load float, float* %72, align 4 - %74 = fpext float %73 to double - %75 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %74) #4 - %76 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %2, i64 0, i64 2, i64 2 - %77 = load float, float* %76, align 8 - %78 = fpext float %77 to double - %79 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %78) #4 - %80 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %3, i64 0, i64 2, i64 2 - %81 = load float, float* %80, align 8 - %82 = fpext float %81 to double - %83 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %82) #4 - ret i32 0 -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 - -declare i32 @printf(i8*, ...) #3 - -attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { argmemonly nounwind willreturn writeonly } -attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} From d6d335b1cf016ca82f59678a3c92ed1fad579b9f Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Mon, 13 Dec 2021 03:37:21 -0500 Subject: [PATCH 004/143] add conv assertions --- src/dios-egraphs/Diospyros/aa.ll | 249 ++++++ src/dios-egraphs/Diospyros/clang.ll | 269 +++++++ src/dios-egraphs/Diospyros/dce.ll | 559 ++++++++++++++ src/dios-egraphs/Diospyros/diospyros.ll | 721 ++++++++++++++++++ .../Diospyros/llvm-tests/2d-2d-conv.c | 3 + .../Diospyros/llvm-tests/2d-conv.c | 3 + .../Diospyros/llvm-tests/load_reuse.c | 17 +- src/dios-egraphs/Diospyros/opt.ll | 249 ++++++ 8 files changed, 2067 insertions(+), 3 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/aa.ll create mode 100644 src/dios-egraphs/Diospyros/clang.ll create mode 100644 src/dios-egraphs/Diospyros/dce.ll create mode 100644 src/dios-egraphs/Diospyros/diospyros.ll create mode 100644 src/dios-egraphs/Diospyros/opt.ll diff --git a/src/dios-egraphs/Diospyros/aa.ll b/src/dios-egraphs/Diospyros/aa.ll new file mode 100644 index 00000000..b4d21e53 --- /dev/null +++ b/src/dios-egraphs/Diospyros/aa.ll @@ -0,0 +1,249 @@ +; ModuleID = 'opt.ll' +source_filename = "llvm-tests/2d-conv.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@__const.main.mat_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00], align 16 +@__const.main.f_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 +@__const.main.expected = private unnamed_addr constant [9 x float] [float 1.000000e+00, float 3.000000e+00, float 2.000000e+00, float 4.000000e+00, float 1.000000e+01, float 6.000000e+00, float 3.000000e+00, float 7.000000e+00, float 4.000000e+00], align 16 +@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 +@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 +@.str.1 = private unnamed_addr constant [21 x i8] c"llvm-tests/2d-conv.c\00", align 1 +@.str.2 = private unnamed_addr constant [26 x i8] c"mat_out[i] == expected[i]\00", align 1 + +; Function Attrs: noinline nounwind ssp uwtable +define void @convolution(float* %0, float* %1, float* %2) #0 { +.preheader7: + %3 = load float, float* %0, align 4 + %4 = load float, float* %1, align 4 + %5 = fmul float %3, %4 + %6 = load float, float* %2, align 4 + %7 = fadd float %6, %5 + store float %7, float* %2, align 4 + %8 = getelementptr inbounds float, float* %2, i64 1 + %9 = load float, float* %0, align 4 + %10 = getelementptr inbounds float, float* %1, i64 1 + %11 = load float, float* %10, align 4 + %12 = fmul float %9, %11 + %13 = load float, float* %8, align 4 + %14 = fadd float %13, %12 + store float %14, float* %8, align 4 + %15 = getelementptr inbounds float, float* %0, i64 1 + %16 = load float, float* %15, align 4 + %17 = load float, float* %1, align 4 + %18 = fmul float %16, %17 + %19 = fadd float %14, %18 + store float %19, float* %8, align 4 + %20 = getelementptr inbounds float, float* %2, i64 2 + %21 = load float, float* %15, align 4 + %22 = load float, float* %10, align 4 + %23 = fmul float %21, %22 + %24 = load float, float* %20, align 4 + %25 = fadd float %24, %23 + store float %25, float* %20, align 4 + %26 = getelementptr inbounds float, float* %2, i64 3 + %27 = load float, float* %0, align 4 + %28 = getelementptr inbounds float, float* %1, i64 2 + %29 = load float, float* %28, align 4 + %30 = fmul float %27, %29 + %31 = load float, float* %26, align 4 + %32 = fadd float %31, %30 + store float %32, float* %26, align 4 + %33 = getelementptr inbounds float, float* %0, i64 2 + %34 = load float, float* %33, align 4 + %35 = load float, float* %1, align 4 + %36 = fmul float %34, %35 + %37 = fadd float %32, %36 + store float %37, float* %26, align 4 + %38 = getelementptr inbounds float, float* %2, i64 4 + %39 = load float, float* %0, align 4 + %40 = getelementptr inbounds float, float* %1, i64 3 + %41 = load float, float* %40, align 4 + %42 = fmul float %39, %41 + %43 = load float, float* %38, align 4 + %44 = fadd float %43, %42 + store float %44, float* %38, align 4 + %45 = load float, float* %15, align 4 + %46 = load float, float* %28, align 4 + %47 = fmul float %45, %46 + %48 = fadd float %44, %47 + store float %48, float* %38, align 4 + %49 = load float, float* %33, align 4 + %50 = load float, float* %10, align 4 + %51 = fmul float %49, %50 + %52 = fadd float %48, %51 + store float %52, float* %38, align 4 + %53 = getelementptr inbounds float, float* %0, i64 3 + %54 = load float, float* %53, align 4 + %55 = load float, float* %1, align 4 + %56 = fmul float %54, %55 + %57 = fadd float %52, %56 + store float %57, float* %38, align 4 + %58 = getelementptr inbounds float, float* %2, i64 5 + %59 = load float, float* %15, align 4 + %60 = load float, float* %40, align 4 + %61 = fmul float %59, %60 + %62 = load float, float* %58, align 4 + %63 = fadd float %62, %61 + store float %63, float* %58, align 4 + %64 = load float, float* %53, align 4 + %65 = load float, float* %10, align 4 + %66 = fmul float %64, %65 + %67 = fadd float %63, %66 + store float %67, float* %58, align 4 + %68 = getelementptr inbounds float, float* %2, i64 6 + %69 = load float, float* %33, align 4 + %70 = load float, float* %28, align 4 + %71 = fmul float %69, %70 + %72 = load float, float* %68, align 4 + %73 = fadd float %72, %71 + store float %73, float* %68, align 4 + %74 = getelementptr inbounds float, float* %2, i64 7 + %75 = load float, float* %33, align 4 + %76 = load float, float* %40, align 4 + %77 = fmul float %75, %76 + %78 = load float, float* %74, align 4 + %79 = fadd float %78, %77 + store float %79, float* %74, align 4 + %80 = load float, float* %53, align 4 + %81 = load float, float* %28, align 4 + %82 = fmul float %80, %81 + %83 = fadd float %79, %82 + store float %83, float* %74, align 4 + %84 = getelementptr inbounds float, float* %2, i64 8 + %85 = load float, float* %53, align 4 + %86 = load float, float* %40, align 4 + %87 = fmul float %85, %86 + %88 = load float, float* %84, align 4 + %89 = fadd float %88, %87 + store float %89, float* %84, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #0 { + %1 = alloca [4 x float], align 16 + %2 = alloca [4 x float], align 16 + %3 = alloca [9 x float], align 16 + %4 = bitcast [4 x float]* %1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %4, i8* nonnull align 16 dereferenceable(16) bitcast ([4 x float]* @__const.main.mat_in to i8*), i64 16, i1 false) + %5 = bitcast [4 x float]* %2 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %5, i8* nonnull align 16 dereferenceable(16) bitcast ([4 x float]* @__const.main.f_in to i8*), i64 16, i1 false) + %6 = bitcast [9 x float]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %6, i8 0, i64 36, i1 false) + %7 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 + %8 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 + %9 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 0 + call void @convolution(float* nonnull %7, float* nonnull %8, float* nonnull %9) + %10 = load float, float* %9, align 16 + %11 = fpext float %10 to double + %12 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %11) #5 + %13 = load float, float* %9, align 16 + %14 = fcmp une float %13, 1.000000e+00 + br i1 %14, label %22, label %15 + +15: ; preds = %0 + %16 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 1 + %17 = load float, float* %16, align 4 + %18 = fpext float %17 to double + %19 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %18) #5 + %20 = load float, float* %16, align 4 + %21 = fcmp une float %20, 3.000000e+00 + br i1 %21, label %22, label %23 + +22: ; preds = %65, %58, %51, %44, %37, %30, %23, %15, %0 + call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i64 0, i64 0), i32 46, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.2, i64 0, i64 0)) #6 + unreachable + +23: ; preds = %15 + %24 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 2 + %25 = load float, float* %24, align 8 + %26 = fpext float %25 to double + %27 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %26) #5 + %28 = load float, float* %24, align 8 + %29 = fcmp une float %28, 2.000000e+00 + br i1 %29, label %22, label %30 + +30: ; preds = %23 + %31 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 3 + %32 = load float, float* %31, align 4 + %33 = fpext float %32 to double + %34 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %33) #5 + %35 = load float, float* %31, align 4 + %36 = fcmp une float %35, 4.000000e+00 + br i1 %36, label %22, label %37 + +37: ; preds = %30 + %38 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 4 + %39 = load float, float* %38, align 16 + %40 = fpext float %39 to double + %41 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %40) #5 + %42 = load float, float* %38, align 16 + %43 = fcmp une float %42, 1.000000e+01 + br i1 %43, label %22, label %44 + +44: ; preds = %37 + %45 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 5 + %46 = load float, float* %45, align 4 + %47 = fpext float %46 to double + %48 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %47) #5 + %49 = load float, float* %45, align 4 + %50 = fcmp une float %49, 6.000000e+00 + br i1 %50, label %22, label %51 + +51: ; preds = %44 + %52 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 6 + %53 = load float, float* %52, align 8 + %54 = fpext float %53 to double + %55 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %54) #5 + %56 = load float, float* %52, align 8 + %57 = fcmp une float %56, 3.000000e+00 + br i1 %57, label %22, label %58 + +58: ; preds = %51 + %59 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 7 + %60 = load float, float* %59, align 4 + %61 = fpext float %60 to double + %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %61) #5 + %63 = load float, float* %59, align 4 + %64 = fcmp une float %63, 7.000000e+00 + br i1 %64, label %22, label %65 + +65: ; preds = %58 + %66 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 8 + %67 = load float, float* %66, align 16 + %68 = fpext float %67 to double + %69 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %68) #5 + %70 = load float, float* %66, align 16 + %71 = fcmp une float %70, 4.000000e+00 + br i1 %71, label %22, label %72 + +72: ; preds = %65 + ret i32 0 +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 + +declare i32 @printf(i8*, ...) #3 + +; Function Attrs: noreturn +declare void @__assert_rtn(i8*, i8*, i32, i8*) #4 + +attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind willreturn writeonly } +attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { nounwind } +attributes #6 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/clang.ll b/src/dios-egraphs/Diospyros/clang.ll new file mode 100644 index 00000000..ad3269fd --- /dev/null +++ b/src/dios-egraphs/Diospyros/clang.ll @@ -0,0 +1,269 @@ +; ModuleID = 'llvm-tests/2d-conv.c' +source_filename = "llvm-tests/2d-conv.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@__const.main.mat_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00], align 16 +@__const.main.f_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 +@__const.main.expected = private unnamed_addr constant [9 x float] [float 1.000000e+00, float 3.000000e+00, float 2.000000e+00, float 4.000000e+00, float 1.000000e+01, float 6.000000e+00, float 3.000000e+00, float 7.000000e+00, float 4.000000e+00], align 16 +@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 +@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 +@.str.1 = private unnamed_addr constant [21 x i8] c"llvm-tests/2d-conv.c\00", align 1 +@.str.2 = private unnamed_addr constant [26 x i8] c"mat_out[i] == expected[i]\00", align 1 + +; Function Attrs: noinline nounwind ssp uwtable +define void @convolution(float* %0, float* %1, float* %2) #0 { + %4 = alloca float*, align 8 + %5 = alloca float*, align 8 + %6 = alloca float*, align 8 + %7 = alloca i32, align 4 + %8 = alloca i32, align 4 + %9 = alloca i32, align 4 + %10 = alloca i32, align 4 + %11 = alloca i32, align 4 + %12 = alloca i32, align 4 + %13 = alloca i32, align 4 + %14 = alloca i32, align 4 + %15 = alloca float, align 4 + store float* %0, float** %4, align 8 + store float* %1, float** %5, align 8 + store float* %2, float** %6, align 8 + store i32 0, i32* %7, align 4 + br label %16 + +16: ; preds = %94, %3 + %17 = load i32, i32* %7, align 4 + %18 = icmp slt i32 %17, 3 + br i1 %18, label %19, label %97 + +19: ; preds = %16 + store i32 0, i32* %8, align 4 + br label %20 + +20: ; preds = %90, %19 + %21 = load i32, i32* %8, align 4 + %22 = icmp slt i32 %21, 3 + br i1 %22, label %23, label %93 + +23: ; preds = %20 + store i32 0, i32* %9, align 4 + br label %24 + +24: ; preds = %86, %23 + %25 = load i32, i32* %9, align 4 + %26 = icmp slt i32 %25, 2 + br i1 %26, label %27, label %89 + +27: ; preds = %24 + store i32 0, i32* %10, align 4 + br label %28 + +28: ; preds = %82, %27 + %29 = load i32, i32* %10, align 4 + %30 = icmp slt i32 %29, 2 + br i1 %30, label %31, label %85 + +31: ; preds = %28 + %32 = load i32, i32* %9, align 4 + %33 = sub nsw i32 1, %32 + store i32 %33, i32* %11, align 4 + %34 = load i32, i32* %10, align 4 + %35 = sub nsw i32 1, %34 + store i32 %35, i32* %12, align 4 + %36 = load i32, i32* %7, align 4 + %37 = load i32, i32* %11, align 4 + %38 = sub nsw i32 %36, %37 + store i32 %38, i32* %13, align 4 + %39 = load i32, i32* %8, align 4 + %40 = load i32, i32* %12, align 4 + %41 = sub nsw i32 %39, %40 + store i32 %41, i32* %14, align 4 + %42 = load i32, i32* %13, align 4 + %43 = icmp sge i32 %42, 0 + br i1 %43, label %44, label %81 + +44: ; preds = %31 + %45 = load i32, i32* %13, align 4 + %46 = icmp slt i32 %45, 2 + br i1 %46, label %47, label %81 + +47: ; preds = %44 + %48 = load i32, i32* %14, align 4 + %49 = icmp sge i32 %48, 0 + br i1 %49, label %50, label %81 + +50: ; preds = %47 + %51 = load i32, i32* %14, align 4 + %52 = icmp slt i32 %51, 2 + br i1 %52, label %53, label %81 + +53: ; preds = %50 + %54 = load float*, float** %4, align 8 + %55 = load i32, i32* %13, align 4 + %56 = mul nsw i32 %55, 2 + %57 = load i32, i32* %14, align 4 + %58 = add nsw i32 %56, %57 + %59 = sext i32 %58 to i64 + %60 = getelementptr inbounds float, float* %54, i64 %59 + %61 = load float, float* %60, align 4 + %62 = load float*, float** %5, align 8 + %63 = load i32, i32* %11, align 4 + %64 = mul nsw i32 %63, 2 + %65 = load i32, i32* %12, align 4 + %66 = add nsw i32 %64, %65 + %67 = sext i32 %66 to i64 + %68 = getelementptr inbounds float, float* %62, i64 %67 + %69 = load float, float* %68, align 4 + %70 = fmul float %61, %69 + store float %70, float* %15, align 4 + %71 = load float, float* %15, align 4 + %72 = load float*, float** %6, align 8 + %73 = load i32, i32* %7, align 4 + %74 = mul nsw i32 %73, 3 + %75 = load i32, i32* %8, align 4 + %76 = add nsw i32 %74, %75 + %77 = sext i32 %76 to i64 + %78 = getelementptr inbounds float, float* %72, i64 %77 + %79 = load float, float* %78, align 4 + %80 = fadd float %79, %71 + store float %80, float* %78, align 4 + br label %81 + +81: ; preds = %53, %50, %47, %44, %31 + br label %82 + +82: ; preds = %81 + %83 = load i32, i32* %10, align 4 + %84 = add nsw i32 %83, 1 + store i32 %84, i32* %10, align 4 + br label %28 + +85: ; preds = %28 + br label %86 + +86: ; preds = %85 + %87 = load i32, i32* %9, align 4 + %88 = add nsw i32 %87, 1 + store i32 %88, i32* %9, align 4 + br label %24 + +89: ; preds = %24 + br label %90 + +90: ; preds = %89 + %91 = load i32, i32* %8, align 4 + %92 = add nsw i32 %91, 1 + store i32 %92, i32* %8, align 4 + br label %20 + +93: ; preds = %20 + br label %94 + +94: ; preds = %93 + %95 = load i32, i32* %7, align 4 + %96 = add nsw i32 %95, 1 + store i32 %96, i32* %7, align 4 + br label %16 + +97: ; preds = %16 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #0 { + %1 = alloca i32, align 4 + %2 = alloca [4 x float], align 16 + %3 = alloca [4 x float], align 16 + %4 = alloca [9 x float], align 16 + %5 = alloca [9 x float], align 16 + %6 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + %7 = bitcast [4 x float]* %2 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %7, i8* align 16 bitcast ([4 x float]* @__const.main.mat_in to i8*), i64 16, i1 false) + %8 = bitcast [4 x float]* %3 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %8, i8* align 16 bitcast ([4 x float]* @__const.main.f_in to i8*), i64 16, i1 false) + %9 = bitcast [9 x float]* %4 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %9, i8 0, i64 36, i1 false) + %10 = bitcast [9 x float]* %5 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %10, i8* align 16 bitcast ([9 x float]* @__const.main.expected to i8*), i64 36, i1 false) + %11 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 + %12 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 + %13 = getelementptr inbounds [9 x float], [9 x float]* %4, i64 0, i64 0 + call void @convolution(float* %11, float* %12, float* %13) + store i32 0, i32* %6, align 4 + br label %14 + +14: ; preds = %41, %0 + %15 = load i32, i32* %6, align 4 + %16 = icmp slt i32 %15, 9 + br i1 %16, label %17, label %44 + +17: ; preds = %14 + %18 = load i32, i32* %6, align 4 + %19 = sext i32 %18 to i64 + %20 = getelementptr inbounds [9 x float], [9 x float]* %4, i64 0, i64 %19 + %21 = load float, float* %20, align 4 + %22 = fpext float %21 to double + %23 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %22) + %24 = load i32, i32* %6, align 4 + %25 = sext i32 %24 to i64 + %26 = getelementptr inbounds [9 x float], [9 x float]* %4, i64 0, i64 %25 + %27 = load float, float* %26, align 4 + %28 = load i32, i32* %6, align 4 + %29 = sext i32 %28 to i64 + %30 = getelementptr inbounds [9 x float], [9 x float]* %5, i64 0, i64 %29 + %31 = load float, float* %30, align 4 + %32 = fcmp oeq float %27, %31 + %33 = xor i1 %32, true + %34 = zext i1 %33 to i32 + %35 = sext i32 %34 to i64 + %36 = icmp ne i64 %35, 0 + br i1 %36, label %37, label %39 + +37: ; preds = %17 + call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i64 0, i64 0), i32 46, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.2, i64 0, i64 0)) #5 + unreachable + +38: ; No predecessors! + br label %40 + +39: ; preds = %17 + br label %40 + +40: ; preds = %39, %38 + br label %41 + +41: ; preds = %40 + %42 = load i32, i32* %6, align 4 + %43 = add nsw i32 %42, 1 + store i32 %43, i32* %6, align 4 + br label %14 + +44: ; preds = %14 + ret i32 0 +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 + +declare i32 @printf(i8*, ...) #3 + +; Function Attrs: noreturn +declare void @__assert_rtn(i8*, i8*, i32, i8*) #4 + +attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind willreturn writeonly } +attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { noreturn } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/dce.ll b/src/dios-egraphs/Diospyros/dce.ll new file mode 100644 index 00000000..ab878f8e --- /dev/null +++ b/src/dios-egraphs/Diospyros/dce.ll @@ -0,0 +1,559 @@ +; ModuleID = 'diospyros.ll' +source_filename = "llvm-tests/2d-conv.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@__const.main.mat_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00], align 16 +@__const.main.f_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 +@__const.main.expected = private unnamed_addr constant [9 x float] [float 1.000000e+00, float 3.000000e+00, float 2.000000e+00, float 4.000000e+00, float 1.000000e+01, float 6.000000e+00, float 3.000000e+00, float 7.000000e+00, float 4.000000e+00], align 16 +@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 +@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 +@.str.1 = private unnamed_addr constant [21 x i8] c"llvm-tests/2d-conv.c\00", align 1 +@.str.2 = private unnamed_addr constant [26 x i8] c"mat_out[i] == expected[i]\00", align 1 + +; Function Attrs: noinline nounwind ssp uwtable +define void @convolution(float* %0, float* %1, float* %2) #0 { +.preheader7: + %3 = load float, float* %2, align 4 + %4 = insertelement <4 x float> zeroinitializer, float %3, i32 0 + %5 = insertelement <4 x float> %4, float 0.000000e+00, i32 1 + %6 = insertelement <4 x float> %5, float 0.000000e+00, i32 2 + %7 = insertelement <4 x float> %6, float 0.000000e+00, i32 3 + %8 = load float, float* %0, align 4 + %9 = insertelement <4 x float> zeroinitializer, float %8, i32 0 + %10 = insertelement <4 x float> %9, float 0.000000e+00, i32 1 + %11 = insertelement <4 x float> %10, float 0.000000e+00, i32 2 + %12 = insertelement <4 x float> %11, float 0.000000e+00, i32 3 + %13 = load float, float* %1, align 4 + %14 = insertelement <4 x float> zeroinitializer, float %13, i32 0 + %15 = insertelement <4 x float> %14, float 0.000000e+00, i32 1 + %16 = insertelement <4 x float> %15, float 0.000000e+00, i32 2 + %17 = insertelement <4 x float> %16, float 0.000000e+00, i32 3 + %18 = call <4 x float> @llvm.fma.v4f32(<4 x float> %12, <4 x float> %17, <4 x float> %7) + %19 = extractelement <4 x float> %18, i32 0 + store float %19, float* %2, align 4 + %20 = getelementptr inbounds float, float* %2, i64 1 + %21 = load float, float* %20, align 4 + %22 = insertelement <4 x float> zeroinitializer, float %21, i32 0 + %23 = insertelement <4 x float> %22, float 0.000000e+00, i32 1 + %24 = insertelement <4 x float> %23, float 0.000000e+00, i32 2 + %25 = insertelement <4 x float> %24, float 0.000000e+00, i32 3 + %26 = load float, float* %0, align 4 + %27 = insertelement <4 x float> zeroinitializer, float %26, i32 0 + %28 = insertelement <4 x float> %27, float 0.000000e+00, i32 1 + %29 = insertelement <4 x float> %28, float 0.000000e+00, i32 2 + %30 = insertelement <4 x float> %29, float 0.000000e+00, i32 3 + %31 = getelementptr inbounds float, float* %1, i64 1 + %32 = load float, float* %31, align 4 + %33 = insertelement <4 x float> zeroinitializer, float %32, i32 0 + %34 = insertelement <4 x float> %33, float 0.000000e+00, i32 1 + %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 2 + %36 = insertelement <4 x float> %35, float 0.000000e+00, i32 3 + %37 = call <4 x float> @llvm.fma.v4f32(<4 x float> %30, <4 x float> %36, <4 x float> %25) + %38 = extractelement <4 x float> %37, i32 0 + %39 = getelementptr inbounds float, float* %2, i64 1 + store float %38, float* %39, align 4 + %40 = insertelement <4 x float> zeroinitializer, float %21, i32 0 + %41 = insertelement <4 x float> %40, float 0.000000e+00, i32 1 + %42 = insertelement <4 x float> %41, float 0.000000e+00, i32 2 + %43 = insertelement <4 x float> %42, float 0.000000e+00, i32 3 + %44 = load float, float* %0, align 4 + %45 = insertelement <4 x float> zeroinitializer, float %44, i32 0 + %46 = insertelement <4 x float> %45, float 1.000000e+00, i32 1 + %47 = insertelement <4 x float> %46, float 1.000000e+00, i32 2 + %48 = insertelement <4 x float> %47, float 1.000000e+00, i32 3 + %49 = insertelement <4 x float> zeroinitializer, float %32, i32 0 + %50 = insertelement <4 x float> %49, float 0.000000e+00, i32 1 + %51 = insertelement <4 x float> %50, float 0.000000e+00, i32 2 + %52 = insertelement <4 x float> %51, float 0.000000e+00, i32 3 + %53 = call <4 x float> @llvm.fma.v4f32(<4 x float> %48, <4 x float> %52, <4 x float> %43) + %54 = getelementptr inbounds float, float* %0, i64 1 + %55 = load float, float* %54, align 4 + %56 = insertelement <4 x float> zeroinitializer, float %55, i32 0 + %57 = insertelement <4 x float> %56, float 0.000000e+00, i32 1 + %58 = insertelement <4 x float> %57, float 0.000000e+00, i32 2 + %59 = insertelement <4 x float> %58, float 0.000000e+00, i32 3 + %60 = load float, float* %1, align 4 + %61 = insertelement <4 x float> zeroinitializer, float %60, i32 0 + %62 = insertelement <4 x float> %61, float 0.000000e+00, i32 1 + %63 = insertelement <4 x float> %62, float 0.000000e+00, i32 2 + %64 = insertelement <4 x float> %63, float 0.000000e+00, i32 3 + %65 = call <4 x float> @llvm.fma.v4f32(<4 x float> %59, <4 x float> %64, <4 x float> %53) + %66 = extractelement <4 x float> %65, i32 0 + %67 = getelementptr inbounds float, float* %2, i64 1 + store float %66, float* %67, align 4 + %68 = getelementptr inbounds float, float* %2, i64 2 + %69 = load float, float* %68, align 4 + %70 = insertelement <4 x float> zeroinitializer, float %69, i32 0 + %71 = insertelement <4 x float> %70, float 0.000000e+00, i32 1 + %72 = insertelement <4 x float> %71, float 0.000000e+00, i32 2 + %73 = insertelement <4 x float> %72, float 0.000000e+00, i32 3 + %74 = getelementptr inbounds float, float* %0, i64 1 + %75 = load float, float* %74, align 4 + %76 = insertelement <4 x float> zeroinitializer, float %75, i32 0 + %77 = insertelement <4 x float> %76, float 0.000000e+00, i32 1 + %78 = insertelement <4 x float> %77, float 0.000000e+00, i32 2 + %79 = insertelement <4 x float> %78, float 0.000000e+00, i32 3 + %80 = getelementptr inbounds float, float* %1, i64 1 + %81 = load float, float* %80, align 4 + %82 = insertelement <4 x float> zeroinitializer, float %81, i32 0 + %83 = insertelement <4 x float> %82, float 0.000000e+00, i32 1 + %84 = insertelement <4 x float> %83, float 0.000000e+00, i32 2 + %85 = insertelement <4 x float> %84, float 0.000000e+00, i32 3 + %86 = call <4 x float> @llvm.fma.v4f32(<4 x float> %79, <4 x float> %85, <4 x float> %73) + %87 = extractelement <4 x float> %86, i32 0 + %88 = getelementptr inbounds float, float* %2, i64 2 + store float %87, float* %88, align 4 + %89 = getelementptr inbounds float, float* %2, i64 3 + %90 = load float, float* %89, align 4 + %91 = insertelement <4 x float> zeroinitializer, float %90, i32 1 + %92 = insertelement <4 x float> %91, float 0.000000e+00, i32 2 + %93 = insertelement <4 x float> %92, float 0.000000e+00, i32 3 + %94 = load float, float* %0, align 4 + %95 = insertelement <4 x float> , float %94, i32 1 + %96 = insertelement <4 x float> %95, float 1.000000e+00, i32 2 + %97 = insertelement <4 x float> %96, float 1.000000e+00, i32 3 + %98 = getelementptr inbounds float, float* %2, i64 3 + %99 = load float, float* %98, align 4 + %100 = insertelement <4 x float> zeroinitializer, float %99, i32 0 + %101 = getelementptr inbounds float, float* %1, i64 2 + %102 = load float, float* %101, align 4 + %103 = insertelement <4 x float> %100, float %102, i32 1 + %104 = insertelement <4 x float> %103, float 0.000000e+00, i32 2 + %105 = insertelement <4 x float> %104, float 0.000000e+00, i32 3 + %106 = call <4 x float> @llvm.fma.v4f32(<4 x float> %97, <4 x float> %105, <4 x float> %93) + %107 = load float, float* %0, align 4 + %108 = insertelement <4 x float> zeroinitializer, float %107, i32 0 + %109 = getelementptr inbounds float, float* %0, i64 2 + %110 = load float, float* %109, align 4 + %111 = insertelement <4 x float> %108, float %110, i32 1 + %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 2 + %113 = insertelement <4 x float> %112, float 0.000000e+00, i32 3 + %114 = getelementptr inbounds float, float* %1, i64 2 + %115 = load float, float* %114, align 4 + %116 = insertelement <4 x float> zeroinitializer, float %115, i32 0 + %117 = load float, float* %1, align 4 + %118 = insertelement <4 x float> %116, float %117, i32 1 + %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 2 + %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 3 + %121 = call <4 x float> @llvm.fma.v4f32(<4 x float> %113, <4 x float> %120, <4 x float> %106) + %122 = extractelement <4 x float> %121, i32 1 + %123 = getelementptr inbounds float, float* %2, i64 3 + store float %122, float* %123, align 4 + %124 = getelementptr inbounds float, float* %2, i64 4 + %125 = load float, float* %124, align 4 + %126 = insertelement <4 x float> zeroinitializer, float %125, i32 0 + %127 = insertelement <4 x float> %126, float 0.000000e+00, i32 1 + %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 2 + %129 = insertelement <4 x float> %128, float 0.000000e+00, i32 3 + %130 = load float, float* %0, align 4 + %131 = insertelement <4 x float> zeroinitializer, float %130, i32 0 + %132 = insertelement <4 x float> %131, float 0.000000e+00, i32 1 + %133 = insertelement <4 x float> %132, float 0.000000e+00, i32 2 + %134 = insertelement <4 x float> %133, float 0.000000e+00, i32 3 + %135 = getelementptr inbounds float, float* %1, i64 3 + %136 = load float, float* %135, align 4 + %137 = insertelement <4 x float> zeroinitializer, float %136, i32 0 + %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 1 + %139 = insertelement <4 x float> %138, float 0.000000e+00, i32 2 + %140 = insertelement <4 x float> %139, float 0.000000e+00, i32 3 + %141 = call <4 x float> @llvm.fma.v4f32(<4 x float> %134, <4 x float> %140, <4 x float> %129) + %142 = extractelement <4 x float> %141, i32 0 + %143 = getelementptr inbounds float, float* %2, i64 4 + store float %142, float* %143, align 4 + %144 = insertelement <4 x float> zeroinitializer, float %125, i32 0 + %145 = insertelement <4 x float> %144, float 0.000000e+00, i32 1 + %146 = insertelement <4 x float> %145, float 0.000000e+00, i32 2 + %147 = insertelement <4 x float> %146, float 0.000000e+00, i32 3 + %148 = load float, float* %0, align 4 + %149 = insertelement <4 x float> zeroinitializer, float %148, i32 0 + %150 = insertelement <4 x float> %149, float 1.000000e+00, i32 1 + %151 = insertelement <4 x float> %150, float 1.000000e+00, i32 2 + %152 = insertelement <4 x float> %151, float 1.000000e+00, i32 3 + %153 = insertelement <4 x float> zeroinitializer, float %136, i32 0 + %154 = insertelement <4 x float> %153, float 0.000000e+00, i32 1 + %155 = insertelement <4 x float> %154, float 0.000000e+00, i32 2 + %156 = insertelement <4 x float> %155, float 0.000000e+00, i32 3 + %157 = call <4 x float> @llvm.fma.v4f32(<4 x float> %152, <4 x float> %156, <4 x float> %147) + %158 = getelementptr inbounds float, float* %0, i64 1 + %159 = load float, float* %158, align 4 + %160 = insertelement <4 x float> zeroinitializer, float %159, i32 0 + %161 = insertelement <4 x float> %160, float 0.000000e+00, i32 1 + %162 = insertelement <4 x float> %161, float 0.000000e+00, i32 2 + %163 = insertelement <4 x float> %162, float 0.000000e+00, i32 3 + %164 = getelementptr inbounds float, float* %1, i64 2 + %165 = load float, float* %164, align 4 + %166 = insertelement <4 x float> zeroinitializer, float %165, i32 0 + %167 = insertelement <4 x float> %166, float 0.000000e+00, i32 1 + %168 = insertelement <4 x float> %167, float 0.000000e+00, i32 2 + %169 = insertelement <4 x float> %168, float 0.000000e+00, i32 3 + %170 = call <4 x float> @llvm.fma.v4f32(<4 x float> %163, <4 x float> %169, <4 x float> %157) + %171 = extractelement <4 x float> %170, i32 0 + %172 = getelementptr inbounds float, float* %2, i64 4 + store float %171, float* %172, align 4 + %173 = insertelement <4 x float> zeroinitializer, float %125, i32 0 + %174 = insertelement <4 x float> %173, float 0.000000e+00, i32 1 + %175 = insertelement <4 x float> %174, float 0.000000e+00, i32 2 + %176 = insertelement <4 x float> %175, float 0.000000e+00, i32 3 + %177 = load float, float* %0, align 4 + %178 = insertelement <4 x float> zeroinitializer, float %177, i32 0 + %179 = insertelement <4 x float> %178, float 1.000000e+00, i32 1 + %180 = insertelement <4 x float> %179, float 1.000000e+00, i32 2 + %181 = insertelement <4 x float> %180, float 1.000000e+00, i32 3 + %182 = insertelement <4 x float> zeroinitializer, float %136, i32 0 + %183 = insertelement <4 x float> %182, float 0.000000e+00, i32 1 + %184 = insertelement <4 x float> %183, float 0.000000e+00, i32 2 + %185 = insertelement <4 x float> %184, float 0.000000e+00, i32 3 + %186 = call <4 x float> @llvm.fma.v4f32(<4 x float> %181, <4 x float> %185, <4 x float> %176) + %187 = insertelement <4 x float> zeroinitializer, float %159, i32 0 + %188 = insertelement <4 x float> %187, float 1.000000e+00, i32 1 + %189 = insertelement <4 x float> %188, float 1.000000e+00, i32 2 + %190 = insertelement <4 x float> %189, float 1.000000e+00, i32 3 + %191 = insertelement <4 x float> zeroinitializer, float %165, i32 0 + %192 = insertelement <4 x float> %191, float 0.000000e+00, i32 1 + %193 = insertelement <4 x float> %192, float 0.000000e+00, i32 2 + %194 = insertelement <4 x float> %193, float 0.000000e+00, i32 3 + %195 = call <4 x float> @llvm.fma.v4f32(<4 x float> %190, <4 x float> %194, <4 x float> %186) + %196 = getelementptr inbounds float, float* %0, i64 2 + %197 = load float, float* %196, align 4 + %198 = insertelement <4 x float> zeroinitializer, float %197, i32 0 + %199 = insertelement <4 x float> %198, float 0.000000e+00, i32 1 + %200 = insertelement <4 x float> %199, float 0.000000e+00, i32 2 + %201 = insertelement <4 x float> %200, float 0.000000e+00, i32 3 + %202 = getelementptr inbounds float, float* %1, i64 1 + %203 = load float, float* %202, align 4 + %204 = insertelement <4 x float> zeroinitializer, float %203, i32 0 + %205 = insertelement <4 x float> %204, float 0.000000e+00, i32 1 + %206 = insertelement <4 x float> %205, float 0.000000e+00, i32 2 + %207 = insertelement <4 x float> %206, float 0.000000e+00, i32 3 + %208 = call <4 x float> @llvm.fma.v4f32(<4 x float> %201, <4 x float> %207, <4 x float> %195) + %209 = extractelement <4 x float> %208, i32 0 + %210 = getelementptr inbounds float, float* %2, i64 4 + store float %209, float* %210, align 4 + %211 = insertelement <4 x float> zeroinitializer, float %125, i32 0 + %212 = insertelement <4 x float> %211, float 0.000000e+00, i32 1 + %213 = insertelement <4 x float> %212, float 0.000000e+00, i32 2 + %214 = insertelement <4 x float> %213, float 0.000000e+00, i32 3 + %215 = load float, float* %0, align 4 + %216 = insertelement <4 x float> zeroinitializer, float %215, i32 0 + %217 = insertelement <4 x float> %216, float 1.000000e+00, i32 1 + %218 = insertelement <4 x float> %217, float 1.000000e+00, i32 2 + %219 = insertelement <4 x float> %218, float 1.000000e+00, i32 3 + %220 = insertelement <4 x float> zeroinitializer, float %136, i32 0 + %221 = insertelement <4 x float> %220, float 0.000000e+00, i32 1 + %222 = insertelement <4 x float> %221, float 0.000000e+00, i32 2 + %223 = insertelement <4 x float> %222, float 0.000000e+00, i32 3 + %224 = call <4 x float> @llvm.fma.v4f32(<4 x float> %219, <4 x float> %223, <4 x float> %214) + %225 = insertelement <4 x float> zeroinitializer, float %159, i32 0 + %226 = insertelement <4 x float> %225, float 1.000000e+00, i32 1 + %227 = insertelement <4 x float> %226, float 1.000000e+00, i32 2 + %228 = insertelement <4 x float> %227, float 1.000000e+00, i32 3 + %229 = insertelement <4 x float> zeroinitializer, float %165, i32 0 + %230 = insertelement <4 x float> %229, float 0.000000e+00, i32 1 + %231 = insertelement <4 x float> %230, float 0.000000e+00, i32 2 + %232 = insertelement <4 x float> %231, float 0.000000e+00, i32 3 + %233 = call <4 x float> @llvm.fma.v4f32(<4 x float> %228, <4 x float> %232, <4 x float> %224) + %234 = insertelement <4 x float> zeroinitializer, float %197, i32 0 + %235 = insertelement <4 x float> %234, float 1.000000e+00, i32 1 + %236 = insertelement <4 x float> %235, float 1.000000e+00, i32 2 + %237 = insertelement <4 x float> %236, float 1.000000e+00, i32 3 + %238 = insertelement <4 x float> zeroinitializer, float %203, i32 0 + %239 = insertelement <4 x float> %238, float 0.000000e+00, i32 1 + %240 = insertelement <4 x float> %239, float 0.000000e+00, i32 2 + %241 = insertelement <4 x float> %240, float 0.000000e+00, i32 3 + %242 = call <4 x float> @llvm.fma.v4f32(<4 x float> %237, <4 x float> %241, <4 x float> %233) + %243 = getelementptr inbounds float, float* %0, i64 3 + %244 = load float, float* %243, align 4 + %245 = insertelement <4 x float> zeroinitializer, float %244, i32 0 + %246 = insertelement <4 x float> %245, float 0.000000e+00, i32 1 + %247 = insertelement <4 x float> %246, float 0.000000e+00, i32 2 + %248 = insertelement <4 x float> %247, float 0.000000e+00, i32 3 + %249 = load float, float* %1, align 4 + %250 = insertelement <4 x float> zeroinitializer, float %249, i32 0 + %251 = insertelement <4 x float> %250, float 0.000000e+00, i32 1 + %252 = insertelement <4 x float> %251, float 0.000000e+00, i32 2 + %253 = insertelement <4 x float> %252, float 0.000000e+00, i32 3 + %254 = call <4 x float> @llvm.fma.v4f32(<4 x float> %248, <4 x float> %253, <4 x float> %242) + %255 = extractelement <4 x float> %254, i32 0 + %256 = getelementptr inbounds float, float* %2, i64 4 + store float %255, float* %256, align 4 + %257 = getelementptr inbounds float, float* %2, i64 5 + %258 = load float, float* %257, align 4 + %259 = insertelement <4 x float> zeroinitializer, float %258, i32 0 + %260 = insertelement <4 x float> %259, float 0.000000e+00, i32 1 + %261 = insertelement <4 x float> %260, float 0.000000e+00, i32 2 + %262 = insertelement <4 x float> %261, float 0.000000e+00, i32 3 + %263 = getelementptr inbounds float, float* %0, i64 1 + %264 = load float, float* %263, align 4 + %265 = insertelement <4 x float> zeroinitializer, float %264, i32 0 + %266 = insertelement <4 x float> %265, float 0.000000e+00, i32 1 + %267 = insertelement <4 x float> %266, float 0.000000e+00, i32 2 + %268 = insertelement <4 x float> %267, float 0.000000e+00, i32 3 + %269 = getelementptr inbounds float, float* %1, i64 3 + %270 = load float, float* %269, align 4 + %271 = insertelement <4 x float> zeroinitializer, float %270, i32 0 + %272 = insertelement <4 x float> %271, float 0.000000e+00, i32 1 + %273 = insertelement <4 x float> %272, float 0.000000e+00, i32 2 + %274 = insertelement <4 x float> %273, float 0.000000e+00, i32 3 + %275 = call <4 x float> @llvm.fma.v4f32(<4 x float> %268, <4 x float> %274, <4 x float> %262) + %276 = extractelement <4 x float> %275, i32 0 + %277 = getelementptr inbounds float, float* %2, i64 5 + store float %276, float* %277, align 4 + %278 = insertelement <4 x float> zeroinitializer, float %258, i32 0 + %279 = insertelement <4 x float> %278, float 0.000000e+00, i32 1 + %280 = insertelement <4 x float> %279, float 0.000000e+00, i32 2 + %281 = insertelement <4 x float> %280, float 0.000000e+00, i32 3 + %282 = insertelement <4 x float> zeroinitializer, float %264, i32 0 + %283 = insertelement <4 x float> %282, float 1.000000e+00, i32 1 + %284 = insertelement <4 x float> %283, float 1.000000e+00, i32 2 + %285 = insertelement <4 x float> %284, float 1.000000e+00, i32 3 + %286 = insertelement <4 x float> zeroinitializer, float %270, i32 0 + %287 = insertelement <4 x float> %286, float 0.000000e+00, i32 1 + %288 = insertelement <4 x float> %287, float 0.000000e+00, i32 2 + %289 = insertelement <4 x float> %288, float 0.000000e+00, i32 3 + %290 = call <4 x float> @llvm.fma.v4f32(<4 x float> %285, <4 x float> %289, <4 x float> %281) + %291 = getelementptr inbounds float, float* %0, i64 3 + %292 = load float, float* %291, align 4 + %293 = insertelement <4 x float> zeroinitializer, float %292, i32 0 + %294 = insertelement <4 x float> %293, float 0.000000e+00, i32 1 + %295 = insertelement <4 x float> %294, float 0.000000e+00, i32 2 + %296 = insertelement <4 x float> %295, float 0.000000e+00, i32 3 + %297 = getelementptr inbounds float, float* %1, i64 1 + %298 = load float, float* %297, align 4 + %299 = insertelement <4 x float> zeroinitializer, float %298, i32 0 + %300 = insertelement <4 x float> %299, float 0.000000e+00, i32 1 + %301 = insertelement <4 x float> %300, float 0.000000e+00, i32 2 + %302 = insertelement <4 x float> %301, float 0.000000e+00, i32 3 + %303 = call <4 x float> @llvm.fma.v4f32(<4 x float> %296, <4 x float> %302, <4 x float> %290) + %304 = extractelement <4 x float> %303, i32 0 + %305 = getelementptr inbounds float, float* %2, i64 5 + store float %304, float* %305, align 4 + %306 = getelementptr inbounds float, float* %2, i64 6 + %307 = load float, float* %306, align 4 + %308 = insertelement <4 x float> zeroinitializer, float %307, i32 0 + %309 = insertelement <4 x float> %308, float 0.000000e+00, i32 1 + %310 = insertelement <4 x float> %309, float 0.000000e+00, i32 2 + %311 = insertelement <4 x float> %310, float 0.000000e+00, i32 3 + %312 = getelementptr inbounds float, float* %0, i64 2 + %313 = load float, float* %312, align 4 + %314 = insertelement <4 x float> zeroinitializer, float %313, i32 0 + %315 = insertelement <4 x float> %314, float 0.000000e+00, i32 1 + %316 = insertelement <4 x float> %315, float 0.000000e+00, i32 2 + %317 = insertelement <4 x float> %316, float 0.000000e+00, i32 3 + %318 = getelementptr inbounds float, float* %1, i64 2 + %319 = load float, float* %318, align 4 + %320 = insertelement <4 x float> zeroinitializer, float %319, i32 0 + %321 = insertelement <4 x float> %320, float 0.000000e+00, i32 1 + %322 = insertelement <4 x float> %321, float 0.000000e+00, i32 2 + %323 = insertelement <4 x float> %322, float 0.000000e+00, i32 3 + %324 = call <4 x float> @llvm.fma.v4f32(<4 x float> %317, <4 x float> %323, <4 x float> %311) + %325 = extractelement <4 x float> %324, i32 0 + %326 = getelementptr inbounds float, float* %2, i64 6 + store float %325, float* %326, align 4 + %327 = getelementptr inbounds float, float* %2, i64 7 + %328 = load float, float* %327, align 4 + %329 = insertelement <4 x float> zeroinitializer, float %328, i32 0 + %330 = insertelement <4 x float> %329, float 0.000000e+00, i32 1 + %331 = insertelement <4 x float> %330, float 0.000000e+00, i32 2 + %332 = insertelement <4 x float> %331, float 0.000000e+00, i32 3 + %333 = getelementptr inbounds float, float* %0, i64 2 + %334 = load float, float* %333, align 4 + %335 = insertelement <4 x float> zeroinitializer, float %334, i32 0 + %336 = insertelement <4 x float> %335, float 0.000000e+00, i32 1 + %337 = insertelement <4 x float> %336, float 0.000000e+00, i32 2 + %338 = insertelement <4 x float> %337, float 0.000000e+00, i32 3 + %339 = getelementptr inbounds float, float* %1, i64 3 + %340 = load float, float* %339, align 4 + %341 = insertelement <4 x float> zeroinitializer, float %340, i32 0 + %342 = insertelement <4 x float> %341, float 0.000000e+00, i32 1 + %343 = insertelement <4 x float> %342, float 0.000000e+00, i32 2 + %344 = insertelement <4 x float> %343, float 0.000000e+00, i32 3 + %345 = call <4 x float> @llvm.fma.v4f32(<4 x float> %338, <4 x float> %344, <4 x float> %332) + %346 = extractelement <4 x float> %345, i32 0 + %347 = getelementptr inbounds float, float* %2, i64 7 + store float %346, float* %347, align 4 + %348 = insertelement <4 x float> zeroinitializer, float %328, i32 0 + %349 = insertelement <4 x float> %348, float 0.000000e+00, i32 1 + %350 = insertelement <4 x float> %349, float 0.000000e+00, i32 2 + %351 = insertelement <4 x float> %350, float 0.000000e+00, i32 3 + %352 = insertelement <4 x float> zeroinitializer, float %334, i32 0 + %353 = insertelement <4 x float> %352, float 1.000000e+00, i32 1 + %354 = insertelement <4 x float> %353, float 1.000000e+00, i32 2 + %355 = insertelement <4 x float> %354, float 1.000000e+00, i32 3 + %356 = insertelement <4 x float> zeroinitializer, float %340, i32 0 + %357 = insertelement <4 x float> %356, float 0.000000e+00, i32 1 + %358 = insertelement <4 x float> %357, float 0.000000e+00, i32 2 + %359 = insertelement <4 x float> %358, float 0.000000e+00, i32 3 + %360 = call <4 x float> @llvm.fma.v4f32(<4 x float> %355, <4 x float> %359, <4 x float> %351) + %361 = getelementptr inbounds float, float* %0, i64 3 + %362 = load float, float* %361, align 4 + %363 = insertelement <4 x float> zeroinitializer, float %362, i32 0 + %364 = insertelement <4 x float> %363, float 0.000000e+00, i32 1 + %365 = insertelement <4 x float> %364, float 0.000000e+00, i32 2 + %366 = insertelement <4 x float> %365, float 0.000000e+00, i32 3 + %367 = getelementptr inbounds float, float* %1, i64 2 + %368 = load float, float* %367, align 4 + %369 = insertelement <4 x float> zeroinitializer, float %368, i32 0 + %370 = insertelement <4 x float> %369, float 0.000000e+00, i32 1 + %371 = insertelement <4 x float> %370, float 0.000000e+00, i32 2 + %372 = insertelement <4 x float> %371, float 0.000000e+00, i32 3 + %373 = call <4 x float> @llvm.fma.v4f32(<4 x float> %366, <4 x float> %372, <4 x float> %360) + %374 = extractelement <4 x float> %373, i32 0 + %375 = getelementptr inbounds float, float* %2, i64 7 + store float %374, float* %375, align 4 + %376 = getelementptr inbounds float, float* %2, i64 8 + %377 = load float, float* %376, align 4 + %378 = insertelement <4 x float> zeroinitializer, float %377, i32 0 + %379 = insertelement <4 x float> %378, float 0.000000e+00, i32 1 + %380 = insertelement <4 x float> %379, float 0.000000e+00, i32 2 + %381 = insertelement <4 x float> %380, float 0.000000e+00, i32 3 + %382 = getelementptr inbounds float, float* %0, i64 3 + %383 = load float, float* %382, align 4 + %384 = insertelement <4 x float> zeroinitializer, float %383, i32 0 + %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 1 + %386 = insertelement <4 x float> %385, float 0.000000e+00, i32 2 + %387 = insertelement <4 x float> %386, float 0.000000e+00, i32 3 + %388 = getelementptr inbounds float, float* %1, i64 3 + %389 = load float, float* %388, align 4 + %390 = insertelement <4 x float> zeroinitializer, float %389, i32 0 + %391 = insertelement <4 x float> %390, float 0.000000e+00, i32 1 + %392 = insertelement <4 x float> %391, float 0.000000e+00, i32 2 + %393 = insertelement <4 x float> %392, float 0.000000e+00, i32 3 + %394 = call <4 x float> @llvm.fma.v4f32(<4 x float> %387, <4 x float> %393, <4 x float> %381) + %395 = extractelement <4 x float> %394, i32 0 + %396 = getelementptr inbounds float, float* %2, i64 8 + store float %395, float* %396, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #0 { + %1 = alloca [4 x float], align 16 + %2 = alloca [4 x float], align 16 + %3 = alloca [9 x float], align 16 + %4 = bitcast [4 x float]* %1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %4, i8* nonnull align 16 dereferenceable(16) bitcast ([4 x float]* @__const.main.mat_in to i8*), i64 16, i1 false) + %5 = bitcast [4 x float]* %2 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %5, i8* nonnull align 16 dereferenceable(16) bitcast ([4 x float]* @__const.main.f_in to i8*), i64 16, i1 false) + %6 = bitcast [9 x float]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %6, i8 0, i64 36, i1 false) + %7 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 + %8 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 + %9 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 0 + call void @convolution(float* nonnull %7, float* nonnull %8, float* nonnull %9) + %10 = load float, float* %9, align 16 + %11 = fpext float %10 to double + %12 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %11) #6 + %13 = load float, float* %9, align 16 + %14 = fcmp une float %13, 1.000000e+00 + br i1 %14, label %22, label %15 + +15: ; preds = %0 + %16 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 1 + %17 = load float, float* %16, align 4 + %18 = fpext float %17 to double + %19 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %18) #6 + %20 = load float, float* %16, align 4 + %21 = fcmp une float %20, 3.000000e+00 + br i1 %21, label %22, label %23 + +22: ; preds = %65, %58, %51, %44, %37, %30, %23, %15, %0 + call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i64 0, i64 0), i32 46, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.2, i64 0, i64 0)) #7 + unreachable + +23: ; preds = %15 + %24 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 2 + %25 = load float, float* %24, align 8 + %26 = fpext float %25 to double + %27 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %26) #6 + %28 = load float, float* %24, align 8 + %29 = fcmp une float %28, 2.000000e+00 + br i1 %29, label %22, label %30 + +30: ; preds = %23 + %31 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 3 + %32 = load float, float* %31, align 4 + %33 = fpext float %32 to double + %34 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %33) #6 + %35 = load float, float* %31, align 4 + %36 = fcmp une float %35, 4.000000e+00 + br i1 %36, label %22, label %37 + +37: ; preds = %30 + %38 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 4 + %39 = load float, float* %38, align 16 + %40 = fpext float %39 to double + %41 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %40) #6 + %42 = load float, float* %38, align 16 + %43 = fcmp une float %42, 1.000000e+01 + br i1 %43, label %22, label %44 + +44: ; preds = %37 + %45 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 5 + %46 = load float, float* %45, align 4 + %47 = fpext float %46 to double + %48 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %47) #6 + %49 = load float, float* %45, align 4 + %50 = fcmp une float %49, 6.000000e+00 + br i1 %50, label %22, label %51 + +51: ; preds = %44 + %52 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 6 + %53 = load float, float* %52, align 8 + %54 = fpext float %53 to double + %55 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %54) #6 + %56 = load float, float* %52, align 8 + %57 = fcmp une float %56, 3.000000e+00 + br i1 %57, label %22, label %58 + +58: ; preds = %51 + %59 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 7 + %60 = load float, float* %59, align 4 + %61 = fpext float %60 to double + %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %61) #6 + %63 = load float, float* %59, align 4 + %64 = fcmp une float %63, 7.000000e+00 + br i1 %64, label %22, label %65 + +65: ; preds = %58 + %66 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 8 + %67 = load float, float* %66, align 16 + %68 = fpext float %67 to double + %69 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %68) #6 + %70 = load float, float* %66, align 16 + %71 = fcmp une float %70, 4.000000e+00 + br i1 %71, label %22, label %72 + +72: ; preds = %65 + ret i32 0 +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 + +declare i32 @printf(i8*, ...) #3 + +; Function Attrs: noreturn +declare void @__assert_rtn(i8*, i8*, i32, i8*) #4 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #5 + +attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind willreturn writeonly } +attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { nounwind readnone speculatable willreturn } +attributes #6 = { nounwind } +attributes #7 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/diospyros.ll b/src/dios-egraphs/Diospyros/diospyros.ll new file mode 100644 index 00000000..36af488c --- /dev/null +++ b/src/dios-egraphs/Diospyros/diospyros.ll @@ -0,0 +1,721 @@ +; ModuleID = 'aa.ll' +source_filename = "llvm-tests/2d-conv.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@__const.main.mat_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00], align 16 +@__const.main.f_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 +@__const.main.expected = private unnamed_addr constant [9 x float] [float 1.000000e+00, float 3.000000e+00, float 2.000000e+00, float 4.000000e+00, float 1.000000e+01, float 6.000000e+00, float 3.000000e+00, float 7.000000e+00, float 4.000000e+00], align 16 +@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 +@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 +@.str.1 = private unnamed_addr constant [21 x i8] c"llvm-tests/2d-conv.c\00", align 1 +@.str.2 = private unnamed_addr constant [26 x i8] c"mat_out[i] == expected[i]\00", align 1 + +; Function Attrs: noinline nounwind ssp uwtable +define void @convolution(float* %0, float* %1, float* %2) #0 { +.preheader7: + %3 = load float, float* %0, align 4 + %4 = load float, float* %2, align 4 + %5 = load float, float* %1, align 4 + %6 = load float, float* %0, align 4 + %7 = fmul float %6, %5 + %8 = fadd float %4, %7 + %9 = getelementptr inbounds float, float* %2, i64 1 + %10 = getelementptr inbounds float, float* %1, i64 1 + %11 = load float, float* %10, align 4 + %12 = fmul float %3, %11 + %13 = load float, float* %9, align 4 + %14 = fadd float %13, %12 + %15 = getelementptr inbounds float, float* %0, i64 1 + %16 = load float, float* %15, align 4 + %17 = load float, float* %1, align 4 + %18 = fmul float %16, %17 + %19 = fadd float %14, %18 + %20 = getelementptr inbounds float, float* %2, i64 2 + %21 = load float, float* %15, align 4 + %22 = load float, float* %0, align 4 + %23 = load float, float* %10, align 4 + %24 = fmul float %21, %23 + %25 = load float, float* %20, align 4 + %26 = fadd float %25, %24 + %27 = getelementptr inbounds float, float* %2, i64 3 + %28 = getelementptr inbounds float, float* %1, i64 2 + %29 = load float, float* %28, align 4 + %30 = fmul float %22, %29 + %31 = load float, float* %27, align 4 + %32 = fadd float %31, %30 + %33 = getelementptr inbounds float, float* %0, i64 2 + %34 = load float, float* %33, align 4 + %35 = load float, float* %1, align 4 + %36 = load float, float* %0, align 4 + %37 = fmul float %34, %35 + %38 = fadd float %32, %37 + %39 = getelementptr inbounds float, float* %2, i64 4 + %40 = getelementptr inbounds float, float* %1, i64 3 + %41 = load float, float* %40, align 4 + %42 = fmul float %36, %41 + %43 = load float, float* %39, align 4 + %44 = fadd float %43, %42 + %45 = load float, float* %15, align 4 + %46 = load float, float* %28, align 4 + %47 = fmul float %45, %46 + %48 = fadd float %44, %47 + %49 = load float, float* %33, align 4 + %50 = load float, float* %10, align 4 + %51 = fmul float %49, %50 + %52 = fadd float %48, %51 + %53 = getelementptr inbounds float, float* %0, i64 3 + %54 = load float, float* %53, align 4 + %55 = load float, float* %1, align 4 + %56 = fmul float %54, %55 + %57 = fadd float %52, %56 + %58 = getelementptr inbounds float, float* %2, i64 5 + %59 = load float, float* %15, align 4 + %60 = load float, float* %40, align 4 + %61 = fmul float %59, %60 + %62 = load float, float* %58, align 4 + %63 = fadd float %62, %61 + %64 = load float, float* %53, align 4 + %65 = load float, float* %10, align 4 + %66 = fmul float %64, %65 + %67 = fadd float %63, %66 + %68 = getelementptr inbounds float, float* %2, i64 6 + %69 = load float, float* %33, align 4 + %70 = load float, float* %28, align 4 + %71 = fmul float %69, %70 + %72 = load float, float* %68, align 4 + %73 = fadd float %72, %71 + %74 = getelementptr inbounds float, float* %2, i64 7 + %75 = load float, float* %33, align 4 + %76 = load float, float* %40, align 4 + %77 = fmul float %75, %76 + %78 = load float, float* %74, align 4 + %79 = fadd float %78, %77 + %80 = load float, float* %53, align 4 + %81 = load float, float* %28, align 4 + %82 = fmul float %80, %81 + %83 = fadd float %79, %82 + %84 = getelementptr inbounds float, float* %2, i64 8 + %85 = load float, float* %53, align 4 + %86 = load float, float* %40, align 4 + %87 = fmul float %85, %86 + %88 = load float, float* %84, align 4 + %89 = fadd float %88, %87 + %90 = load float, float* %2, align 4 + %91 = insertelement <4 x float> zeroinitializer, float %90, i32 0 + %92 = insertelement <4 x float> %91, float 0.000000e+00, i32 1 + %93 = insertelement <4 x float> %92, float 0.000000e+00, i32 2 + %94 = insertelement <4 x float> %93, float 0.000000e+00, i32 3 + %95 = load float, float* %0, align 4 + %96 = insertelement <4 x float> zeroinitializer, float %95, i32 0 + %97 = insertelement <4 x float> %96, float 0.000000e+00, i32 1 + %98 = insertelement <4 x float> %97, float 0.000000e+00, i32 2 + %99 = insertelement <4 x float> %98, float 0.000000e+00, i32 3 + %100 = load float, float* %1, align 4 + %101 = insertelement <4 x float> zeroinitializer, float %100, i32 0 + %102 = insertelement <4 x float> %101, float 0.000000e+00, i32 1 + %103 = insertelement <4 x float> %102, float 0.000000e+00, i32 2 + %104 = insertelement <4 x float> %103, float 0.000000e+00, i32 3 + %105 = call <4 x float> @llvm.fma.f32(<4 x float> %99, <4 x float> %104, <4 x float> %94) + %106 = extractelement <4 x float> %105, i32 0 + store float %106, float* %2, align 4 + %107 = getelementptr inbounds float, float* %2, i64 1 + %108 = load float, float* %107, align 4 + %109 = insertelement <4 x float> zeroinitializer, float %108, i32 0 + %110 = insertelement <4 x float> %109, float 0.000000e+00, i32 1 + %111 = insertelement <4 x float> %110, float 0.000000e+00, i32 2 + %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 3 + %113 = load float, float* %0, align 4 + %114 = insertelement <4 x float> zeroinitializer, float %113, i32 0 + %115 = insertelement <4 x float> %114, float 0.000000e+00, i32 1 + %116 = insertelement <4 x float> %115, float 0.000000e+00, i32 2 + %117 = insertelement <4 x float> %116, float 0.000000e+00, i32 3 + %118 = getelementptr inbounds float, float* %1, i64 1 + %119 = load float, float* %118, align 4 + %120 = insertelement <4 x float> zeroinitializer, float %119, i32 0 + %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 1 + %122 = insertelement <4 x float> %121, float 0.000000e+00, i32 2 + %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3 + %124 = call <4 x float> @llvm.fma.f32.1(<4 x float> %117, <4 x float> %123, <4 x float> %112) + %125 = extractelement <4 x float> %124, i32 0 + %126 = getelementptr inbounds float, float* %2, i64 1 + store float %125, float* %126, align 4 + %127 = insertelement <4 x float> zeroinitializer, float %108, i32 0 + %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 1 + %129 = insertelement <4 x float> %128, float 0.000000e+00, i32 2 + %130 = insertelement <4 x float> %129, float 0.000000e+00, i32 3 + %131 = load float, float* %0, align 4 + %132 = insertelement <4 x float> zeroinitializer, float %131, i32 0 + %133 = insertelement <4 x float> %132, float 1.000000e+00, i32 1 + %134 = insertelement <4 x float> %133, float 1.000000e+00, i32 2 + %135 = insertelement <4 x float> %134, float 1.000000e+00, i32 3 + %136 = insertelement <4 x float> zeroinitializer, float %119, i32 0 + %137 = insertelement <4 x float> %136, float 0.000000e+00, i32 1 + %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 2 + %139 = insertelement <4 x float> %138, float 0.000000e+00, i32 3 + %140 = call <4 x float> @llvm.fma.f32.2(<4 x float> %135, <4 x float> %139, <4 x float> %130) + %141 = getelementptr inbounds float, float* %0, i64 1 + %142 = load float, float* %141, align 4 + %143 = insertelement <4 x float> zeroinitializer, float %142, i32 0 + %144 = insertelement <4 x float> %143, float 0.000000e+00, i32 1 + %145 = insertelement <4 x float> %144, float 0.000000e+00, i32 2 + %146 = insertelement <4 x float> %145, float 0.000000e+00, i32 3 + %147 = load float, float* %1, align 4 + %148 = insertelement <4 x float> zeroinitializer, float %147, i32 0 + %149 = insertelement <4 x float> %148, float 0.000000e+00, i32 1 + %150 = insertelement <4 x float> %149, float 0.000000e+00, i32 2 + %151 = insertelement <4 x float> %150, float 0.000000e+00, i32 3 + %152 = call <4 x float> @llvm.fma.f32.3(<4 x float> %146, <4 x float> %151, <4 x float> %140) + %153 = extractelement <4 x float> %152, i32 0 + %154 = getelementptr inbounds float, float* %2, i64 1 + store float %153, float* %154, align 4 + %155 = getelementptr inbounds float, float* %2, i64 2 + %156 = load float, float* %155, align 4 + %157 = insertelement <4 x float> zeroinitializer, float %156, i32 0 + %158 = insertelement <4 x float> %157, float 0.000000e+00, i32 1 + %159 = insertelement <4 x float> %158, float 0.000000e+00, i32 2 + %160 = insertelement <4 x float> %159, float 0.000000e+00, i32 3 + %161 = getelementptr inbounds float, float* %0, i64 1 + %162 = load float, float* %161, align 4 + %163 = insertelement <4 x float> zeroinitializer, float %162, i32 0 + %164 = insertelement <4 x float> %163, float 0.000000e+00, i32 1 + %165 = insertelement <4 x float> %164, float 0.000000e+00, i32 2 + %166 = insertelement <4 x float> %165, float 0.000000e+00, i32 3 + %167 = getelementptr inbounds float, float* %1, i64 1 + %168 = load float, float* %167, align 4 + %169 = insertelement <4 x float> zeroinitializer, float %168, i32 0 + %170 = insertelement <4 x float> %169, float 0.000000e+00, i32 1 + %171 = insertelement <4 x float> %170, float 0.000000e+00, i32 2 + %172 = insertelement <4 x float> %171, float 0.000000e+00, i32 3 + %173 = call <4 x float> @llvm.fma.f32.4(<4 x float> %166, <4 x float> %172, <4 x float> %160) + %174 = extractelement <4 x float> %173, i32 0 + %175 = getelementptr inbounds float, float* %2, i64 2 + store float %174, float* %175, align 4 + %176 = getelementptr inbounds float, float* %2, i64 3 + %177 = load float, float* %176, align 4 + %178 = insertelement <4 x float> zeroinitializer, float %177, i32 1 + %179 = insertelement <4 x float> %178, float 0.000000e+00, i32 2 + %180 = insertelement <4 x float> %179, float 0.000000e+00, i32 3 + %181 = load float, float* %0, align 4 + %182 = insertelement <4 x float> , float %181, i32 1 + %183 = insertelement <4 x float> %182, float 1.000000e+00, i32 2 + %184 = insertelement <4 x float> %183, float 1.000000e+00, i32 3 + %185 = getelementptr inbounds float, float* %2, i64 3 + %186 = load float, float* %185, align 4 + %187 = insertelement <4 x float> zeroinitializer, float %186, i32 0 + %188 = getelementptr inbounds float, float* %1, i64 2 + %189 = load float, float* %188, align 4 + %190 = insertelement <4 x float> %187, float %189, i32 1 + %191 = insertelement <4 x float> %190, float 0.000000e+00, i32 2 + %192 = insertelement <4 x float> %191, float 0.000000e+00, i32 3 + %193 = call <4 x float> @llvm.fma.f32.5(<4 x float> %184, <4 x float> %192, <4 x float> %180) + %194 = load float, float* %0, align 4 + %195 = insertelement <4 x float> zeroinitializer, float %194, i32 0 + %196 = getelementptr inbounds float, float* %0, i64 2 + %197 = load float, float* %196, align 4 + %198 = insertelement <4 x float> %195, float %197, i32 1 + %199 = insertelement <4 x float> %198, float 0.000000e+00, i32 2 + %200 = insertelement <4 x float> %199, float 0.000000e+00, i32 3 + %201 = getelementptr inbounds float, float* %1, i64 2 + %202 = load float, float* %201, align 4 + %203 = insertelement <4 x float> zeroinitializer, float %202, i32 0 + %204 = load float, float* %1, align 4 + %205 = insertelement <4 x float> %203, float %204, i32 1 + %206 = insertelement <4 x float> %205, float 0.000000e+00, i32 2 + %207 = insertelement <4 x float> %206, float 0.000000e+00, i32 3 + %208 = call <4 x float> @llvm.fma.f32.6(<4 x float> %200, <4 x float> %207, <4 x float> %193) + %209 = extractelement <4 x float> %208, i32 0 + %210 = getelementptr inbounds float, float* %2, i64 3 + store float %209, float* %210, align 4 + %211 = extractelement <4 x float> %208, i32 1 + %212 = getelementptr inbounds float, float* %2, i64 3 + store float %211, float* %212, align 4 + %213 = getelementptr inbounds float, float* %2, i64 4 + %214 = load float, float* %213, align 4 + %215 = insertelement <4 x float> zeroinitializer, float %214, i32 0 + %216 = insertelement <4 x float> %215, float 0.000000e+00, i32 1 + %217 = insertelement <4 x float> %216, float 0.000000e+00, i32 2 + %218 = insertelement <4 x float> %217, float 0.000000e+00, i32 3 + %219 = load float, float* %0, align 4 + %220 = insertelement <4 x float> zeroinitializer, float %219, i32 0 + %221 = insertelement <4 x float> %220, float 0.000000e+00, i32 1 + %222 = insertelement <4 x float> %221, float 0.000000e+00, i32 2 + %223 = insertelement <4 x float> %222, float 0.000000e+00, i32 3 + %224 = getelementptr inbounds float, float* %1, i64 3 + %225 = load float, float* %224, align 4 + %226 = insertelement <4 x float> zeroinitializer, float %225, i32 0 + %227 = insertelement <4 x float> %226, float 0.000000e+00, i32 1 + %228 = insertelement <4 x float> %227, float 0.000000e+00, i32 2 + %229 = insertelement <4 x float> %228, float 0.000000e+00, i32 3 + %230 = call <4 x float> @llvm.fma.f32.7(<4 x float> %223, <4 x float> %229, <4 x float> %218) + %231 = extractelement <4 x float> %230, i32 0 + %232 = getelementptr inbounds float, float* %2, i64 4 + store float %231, float* %232, align 4 + %233 = insertelement <4 x float> zeroinitializer, float %214, i32 0 + %234 = insertelement <4 x float> %233, float 0.000000e+00, i32 1 + %235 = insertelement <4 x float> %234, float 0.000000e+00, i32 2 + %236 = insertelement <4 x float> %235, float 0.000000e+00, i32 3 + %237 = load float, float* %0, align 4 + %238 = insertelement <4 x float> zeroinitializer, float %237, i32 0 + %239 = insertelement <4 x float> %238, float 1.000000e+00, i32 1 + %240 = insertelement <4 x float> %239, float 1.000000e+00, i32 2 + %241 = insertelement <4 x float> %240, float 1.000000e+00, i32 3 + %242 = insertelement <4 x float> zeroinitializer, float %225, i32 0 + %243 = insertelement <4 x float> %242, float 0.000000e+00, i32 1 + %244 = insertelement <4 x float> %243, float 0.000000e+00, i32 2 + %245 = insertelement <4 x float> %244, float 0.000000e+00, i32 3 + %246 = call <4 x float> @llvm.fma.f32.8(<4 x float> %241, <4 x float> %245, <4 x float> %236) + %247 = getelementptr inbounds float, float* %0, i64 1 + %248 = load float, float* %247, align 4 + %249 = insertelement <4 x float> zeroinitializer, float %248, i32 0 + %250 = insertelement <4 x float> %249, float 0.000000e+00, i32 1 + %251 = insertelement <4 x float> %250, float 0.000000e+00, i32 2 + %252 = insertelement <4 x float> %251, float 0.000000e+00, i32 3 + %253 = getelementptr inbounds float, float* %1, i64 2 + %254 = load float, float* %253, align 4 + %255 = insertelement <4 x float> zeroinitializer, float %254, i32 0 + %256 = insertelement <4 x float> %255, float 0.000000e+00, i32 1 + %257 = insertelement <4 x float> %256, float 0.000000e+00, i32 2 + %258 = insertelement <4 x float> %257, float 0.000000e+00, i32 3 + %259 = call <4 x float> @llvm.fma.f32.9(<4 x float> %252, <4 x float> %258, <4 x float> %246) + %260 = extractelement <4 x float> %259, i32 0 + %261 = getelementptr inbounds float, float* %2, i64 4 + store float %260, float* %261, align 4 + %262 = insertelement <4 x float> zeroinitializer, float %214, i32 0 + %263 = insertelement <4 x float> %262, float 0.000000e+00, i32 1 + %264 = insertelement <4 x float> %263, float 0.000000e+00, i32 2 + %265 = insertelement <4 x float> %264, float 0.000000e+00, i32 3 + %266 = load float, float* %0, align 4 + %267 = insertelement <4 x float> zeroinitializer, float %266, i32 0 + %268 = insertelement <4 x float> %267, float 1.000000e+00, i32 1 + %269 = insertelement <4 x float> %268, float 1.000000e+00, i32 2 + %270 = insertelement <4 x float> %269, float 1.000000e+00, i32 3 + %271 = insertelement <4 x float> zeroinitializer, float %225, i32 0 + %272 = insertelement <4 x float> %271, float 0.000000e+00, i32 1 + %273 = insertelement <4 x float> %272, float 0.000000e+00, i32 2 + %274 = insertelement <4 x float> %273, float 0.000000e+00, i32 3 + %275 = call <4 x float> @llvm.fma.f32.10(<4 x float> %270, <4 x float> %274, <4 x float> %265) + %276 = insertelement <4 x float> zeroinitializer, float %248, i32 0 + %277 = insertelement <4 x float> %276, float 1.000000e+00, i32 1 + %278 = insertelement <4 x float> %277, float 1.000000e+00, i32 2 + %279 = insertelement <4 x float> %278, float 1.000000e+00, i32 3 + %280 = insertelement <4 x float> zeroinitializer, float %254, i32 0 + %281 = insertelement <4 x float> %280, float 0.000000e+00, i32 1 + %282 = insertelement <4 x float> %281, float 0.000000e+00, i32 2 + %283 = insertelement <4 x float> %282, float 0.000000e+00, i32 3 + %284 = call <4 x float> @llvm.fma.f32.11(<4 x float> %279, <4 x float> %283, <4 x float> %275) + %285 = getelementptr inbounds float, float* %0, i64 2 + %286 = load float, float* %285, align 4 + %287 = insertelement <4 x float> zeroinitializer, float %286, i32 0 + %288 = insertelement <4 x float> %287, float 0.000000e+00, i32 1 + %289 = insertelement <4 x float> %288, float 0.000000e+00, i32 2 + %290 = insertelement <4 x float> %289, float 0.000000e+00, i32 3 + %291 = getelementptr inbounds float, float* %1, i64 1 + %292 = load float, float* %291, align 4 + %293 = insertelement <4 x float> zeroinitializer, float %292, i32 0 + %294 = insertelement <4 x float> %293, float 0.000000e+00, i32 1 + %295 = insertelement <4 x float> %294, float 0.000000e+00, i32 2 + %296 = insertelement <4 x float> %295, float 0.000000e+00, i32 3 + %297 = call <4 x float> @llvm.fma.f32.12(<4 x float> %290, <4 x float> %296, <4 x float> %284) + %298 = extractelement <4 x float> %297, i32 0 + %299 = getelementptr inbounds float, float* %2, i64 4 + store float %298, float* %299, align 4 + %300 = insertelement <4 x float> zeroinitializer, float %214, i32 0 + %301 = insertelement <4 x float> %300, float 0.000000e+00, i32 1 + %302 = insertelement <4 x float> %301, float 0.000000e+00, i32 2 + %303 = insertelement <4 x float> %302, float 0.000000e+00, i32 3 + %304 = load float, float* %0, align 4 + %305 = insertelement <4 x float> zeroinitializer, float %304, i32 0 + %306 = insertelement <4 x float> %305, float 1.000000e+00, i32 1 + %307 = insertelement <4 x float> %306, float 1.000000e+00, i32 2 + %308 = insertelement <4 x float> %307, float 1.000000e+00, i32 3 + %309 = insertelement <4 x float> zeroinitializer, float %225, i32 0 + %310 = insertelement <4 x float> %309, float 0.000000e+00, i32 1 + %311 = insertelement <4 x float> %310, float 0.000000e+00, i32 2 + %312 = insertelement <4 x float> %311, float 0.000000e+00, i32 3 + %313 = call <4 x float> @llvm.fma.f32.13(<4 x float> %308, <4 x float> %312, <4 x float> %303) + %314 = insertelement <4 x float> zeroinitializer, float %248, i32 0 + %315 = insertelement <4 x float> %314, float 1.000000e+00, i32 1 + %316 = insertelement <4 x float> %315, float 1.000000e+00, i32 2 + %317 = insertelement <4 x float> %316, float 1.000000e+00, i32 3 + %318 = insertelement <4 x float> zeroinitializer, float %254, i32 0 + %319 = insertelement <4 x float> %318, float 0.000000e+00, i32 1 + %320 = insertelement <4 x float> %319, float 0.000000e+00, i32 2 + %321 = insertelement <4 x float> %320, float 0.000000e+00, i32 3 + %322 = call <4 x float> @llvm.fma.f32.14(<4 x float> %317, <4 x float> %321, <4 x float> %313) + %323 = insertelement <4 x float> zeroinitializer, float %286, i32 0 + %324 = insertelement <4 x float> %323, float 1.000000e+00, i32 1 + %325 = insertelement <4 x float> %324, float 1.000000e+00, i32 2 + %326 = insertelement <4 x float> %325, float 1.000000e+00, i32 3 + %327 = insertelement <4 x float> zeroinitializer, float %292, i32 0 + %328 = insertelement <4 x float> %327, float 0.000000e+00, i32 1 + %329 = insertelement <4 x float> %328, float 0.000000e+00, i32 2 + %330 = insertelement <4 x float> %329, float 0.000000e+00, i32 3 + %331 = call <4 x float> @llvm.fma.f32.15(<4 x float> %326, <4 x float> %330, <4 x float> %322) + %332 = getelementptr inbounds float, float* %0, i64 3 + %333 = load float, float* %332, align 4 + %334 = insertelement <4 x float> zeroinitializer, float %333, i32 0 + %335 = insertelement <4 x float> %334, float 0.000000e+00, i32 1 + %336 = insertelement <4 x float> %335, float 0.000000e+00, i32 2 + %337 = insertelement <4 x float> %336, float 0.000000e+00, i32 3 + %338 = load float, float* %1, align 4 + %339 = insertelement <4 x float> zeroinitializer, float %338, i32 0 + %340 = insertelement <4 x float> %339, float 0.000000e+00, i32 1 + %341 = insertelement <4 x float> %340, float 0.000000e+00, i32 2 + %342 = insertelement <4 x float> %341, float 0.000000e+00, i32 3 + %343 = call <4 x float> @llvm.fma.f32.16(<4 x float> %337, <4 x float> %342, <4 x float> %331) + %344 = extractelement <4 x float> %343, i32 0 + %345 = getelementptr inbounds float, float* %2, i64 4 + store float %344, float* %345, align 4 + %346 = getelementptr inbounds float, float* %2, i64 5 + %347 = load float, float* %346, align 4 + %348 = insertelement <4 x float> zeroinitializer, float %347, i32 0 + %349 = insertelement <4 x float> %348, float 0.000000e+00, i32 1 + %350 = insertelement <4 x float> %349, float 0.000000e+00, i32 2 + %351 = insertelement <4 x float> %350, float 0.000000e+00, i32 3 + %352 = getelementptr inbounds float, float* %0, i64 1 + %353 = load float, float* %352, align 4 + %354 = insertelement <4 x float> zeroinitializer, float %353, i32 0 + %355 = insertelement <4 x float> %354, float 0.000000e+00, i32 1 + %356 = insertelement <4 x float> %355, float 0.000000e+00, i32 2 + %357 = insertelement <4 x float> %356, float 0.000000e+00, i32 3 + %358 = getelementptr inbounds float, float* %1, i64 3 + %359 = load float, float* %358, align 4 + %360 = insertelement <4 x float> zeroinitializer, float %359, i32 0 + %361 = insertelement <4 x float> %360, float 0.000000e+00, i32 1 + %362 = insertelement <4 x float> %361, float 0.000000e+00, i32 2 + %363 = insertelement <4 x float> %362, float 0.000000e+00, i32 3 + %364 = call <4 x float> @llvm.fma.f32.17(<4 x float> %357, <4 x float> %363, <4 x float> %351) + %365 = extractelement <4 x float> %364, i32 0 + %366 = getelementptr inbounds float, float* %2, i64 5 + store float %365, float* %366, align 4 + %367 = insertelement <4 x float> zeroinitializer, float %347, i32 0 + %368 = insertelement <4 x float> %367, float 0.000000e+00, i32 1 + %369 = insertelement <4 x float> %368, float 0.000000e+00, i32 2 + %370 = insertelement <4 x float> %369, float 0.000000e+00, i32 3 + %371 = insertelement <4 x float> zeroinitializer, float %353, i32 0 + %372 = insertelement <4 x float> %371, float 1.000000e+00, i32 1 + %373 = insertelement <4 x float> %372, float 1.000000e+00, i32 2 + %374 = insertelement <4 x float> %373, float 1.000000e+00, i32 3 + %375 = insertelement <4 x float> zeroinitializer, float %359, i32 0 + %376 = insertelement <4 x float> %375, float 0.000000e+00, i32 1 + %377 = insertelement <4 x float> %376, float 0.000000e+00, i32 2 + %378 = insertelement <4 x float> %377, float 0.000000e+00, i32 3 + %379 = call <4 x float> @llvm.fma.f32.18(<4 x float> %374, <4 x float> %378, <4 x float> %370) + %380 = getelementptr inbounds float, float* %0, i64 3 + %381 = load float, float* %380, align 4 + %382 = insertelement <4 x float> zeroinitializer, float %381, i32 0 + %383 = insertelement <4 x float> %382, float 0.000000e+00, i32 1 + %384 = insertelement <4 x float> %383, float 0.000000e+00, i32 2 + %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 3 + %386 = getelementptr inbounds float, float* %1, i64 1 + %387 = load float, float* %386, align 4 + %388 = insertelement <4 x float> zeroinitializer, float %387, i32 0 + %389 = insertelement <4 x float> %388, float 0.000000e+00, i32 1 + %390 = insertelement <4 x float> %389, float 0.000000e+00, i32 2 + %391 = insertelement <4 x float> %390, float 0.000000e+00, i32 3 + %392 = call <4 x float> @llvm.fma.f32.19(<4 x float> %385, <4 x float> %391, <4 x float> %379) + %393 = extractelement <4 x float> %392, i32 0 + %394 = getelementptr inbounds float, float* %2, i64 5 + store float %393, float* %394, align 4 + %395 = getelementptr inbounds float, float* %2, i64 6 + %396 = load float, float* %395, align 4 + %397 = insertelement <4 x float> zeroinitializer, float %396, i32 0 + %398 = insertelement <4 x float> %397, float 0.000000e+00, i32 1 + %399 = insertelement <4 x float> %398, float 0.000000e+00, i32 2 + %400 = insertelement <4 x float> %399, float 0.000000e+00, i32 3 + %401 = getelementptr inbounds float, float* %0, i64 2 + %402 = load float, float* %401, align 4 + %403 = insertelement <4 x float> zeroinitializer, float %402, i32 0 + %404 = insertelement <4 x float> %403, float 0.000000e+00, i32 1 + %405 = insertelement <4 x float> %404, float 0.000000e+00, i32 2 + %406 = insertelement <4 x float> %405, float 0.000000e+00, i32 3 + %407 = getelementptr inbounds float, float* %1, i64 2 + %408 = load float, float* %407, align 4 + %409 = insertelement <4 x float> zeroinitializer, float %408, i32 0 + %410 = insertelement <4 x float> %409, float 0.000000e+00, i32 1 + %411 = insertelement <4 x float> %410, float 0.000000e+00, i32 2 + %412 = insertelement <4 x float> %411, float 0.000000e+00, i32 3 + %413 = call <4 x float> @llvm.fma.f32.20(<4 x float> %406, <4 x float> %412, <4 x float> %400) + %414 = extractelement <4 x float> %413, i32 0 + %415 = getelementptr inbounds float, float* %2, i64 6 + store float %414, float* %415, align 4 + %416 = getelementptr inbounds float, float* %2, i64 7 + %417 = load float, float* %416, align 4 + %418 = insertelement <4 x float> zeroinitializer, float %417, i32 0 + %419 = insertelement <4 x float> %418, float 0.000000e+00, i32 1 + %420 = insertelement <4 x float> %419, float 0.000000e+00, i32 2 + %421 = insertelement <4 x float> %420, float 0.000000e+00, i32 3 + %422 = getelementptr inbounds float, float* %0, i64 2 + %423 = load float, float* %422, align 4 + %424 = insertelement <4 x float> zeroinitializer, float %423, i32 0 + %425 = insertelement <4 x float> %424, float 0.000000e+00, i32 1 + %426 = insertelement <4 x float> %425, float 0.000000e+00, i32 2 + %427 = insertelement <4 x float> %426, float 0.000000e+00, i32 3 + %428 = getelementptr inbounds float, float* %1, i64 3 + %429 = load float, float* %428, align 4 + %430 = insertelement <4 x float> zeroinitializer, float %429, i32 0 + %431 = insertelement <4 x float> %430, float 0.000000e+00, i32 1 + %432 = insertelement <4 x float> %431, float 0.000000e+00, i32 2 + %433 = insertelement <4 x float> %432, float 0.000000e+00, i32 3 + %434 = call <4 x float> @llvm.fma.f32.21(<4 x float> %427, <4 x float> %433, <4 x float> %421) + %435 = extractelement <4 x float> %434, i32 0 + %436 = getelementptr inbounds float, float* %2, i64 7 + store float %435, float* %436, align 4 + %437 = insertelement <4 x float> zeroinitializer, float %417, i32 0 + %438 = insertelement <4 x float> %437, float 0.000000e+00, i32 1 + %439 = insertelement <4 x float> %438, float 0.000000e+00, i32 2 + %440 = insertelement <4 x float> %439, float 0.000000e+00, i32 3 + %441 = insertelement <4 x float> zeroinitializer, float %423, i32 0 + %442 = insertelement <4 x float> %441, float 1.000000e+00, i32 1 + %443 = insertelement <4 x float> %442, float 1.000000e+00, i32 2 + %444 = insertelement <4 x float> %443, float 1.000000e+00, i32 3 + %445 = insertelement <4 x float> zeroinitializer, float %429, i32 0 + %446 = insertelement <4 x float> %445, float 0.000000e+00, i32 1 + %447 = insertelement <4 x float> %446, float 0.000000e+00, i32 2 + %448 = insertelement <4 x float> %447, float 0.000000e+00, i32 3 + %449 = call <4 x float> @llvm.fma.f32.22(<4 x float> %444, <4 x float> %448, <4 x float> %440) + %450 = getelementptr inbounds float, float* %0, i64 3 + %451 = load float, float* %450, align 4 + %452 = insertelement <4 x float> zeroinitializer, float %451, i32 0 + %453 = insertelement <4 x float> %452, float 0.000000e+00, i32 1 + %454 = insertelement <4 x float> %453, float 0.000000e+00, i32 2 + %455 = insertelement <4 x float> %454, float 0.000000e+00, i32 3 + %456 = getelementptr inbounds float, float* %1, i64 2 + %457 = load float, float* %456, align 4 + %458 = insertelement <4 x float> zeroinitializer, float %457, i32 0 + %459 = insertelement <4 x float> %458, float 0.000000e+00, i32 1 + %460 = insertelement <4 x float> %459, float 0.000000e+00, i32 2 + %461 = insertelement <4 x float> %460, float 0.000000e+00, i32 3 + %462 = call <4 x float> @llvm.fma.f32.23(<4 x float> %455, <4 x float> %461, <4 x float> %449) + %463 = extractelement <4 x float> %462, i32 0 + %464 = getelementptr inbounds float, float* %2, i64 7 + store float %463, float* %464, align 4 + %465 = getelementptr inbounds float, float* %2, i64 8 + %466 = load float, float* %465, align 4 + %467 = insertelement <4 x float> zeroinitializer, float %466, i32 0 + %468 = insertelement <4 x float> %467, float 0.000000e+00, i32 1 + %469 = insertelement <4 x float> %468, float 0.000000e+00, i32 2 + %470 = insertelement <4 x float> %469, float 0.000000e+00, i32 3 + %471 = getelementptr inbounds float, float* %0, i64 3 + %472 = load float, float* %471, align 4 + %473 = insertelement <4 x float> zeroinitializer, float %472, i32 0 + %474 = insertelement <4 x float> %473, float 0.000000e+00, i32 1 + %475 = insertelement <4 x float> %474, float 0.000000e+00, i32 2 + %476 = insertelement <4 x float> %475, float 0.000000e+00, i32 3 + %477 = getelementptr inbounds float, float* %1, i64 3 + %478 = load float, float* %477, align 4 + %479 = insertelement <4 x float> zeroinitializer, float %478, i32 0 + %480 = insertelement <4 x float> %479, float 0.000000e+00, i32 1 + %481 = insertelement <4 x float> %480, float 0.000000e+00, i32 2 + %482 = insertelement <4 x float> %481, float 0.000000e+00, i32 3 + %483 = call <4 x float> @llvm.fma.f32.24(<4 x float> %476, <4 x float> %482, <4 x float> %470) + %484 = extractelement <4 x float> %483, i32 0 + %485 = getelementptr inbounds float, float* %2, i64 8 + store float %484, float* %485, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #0 { + %1 = alloca [4 x float], align 16 + %2 = alloca [4 x float], align 16 + %3 = alloca [9 x float], align 16 + %4 = bitcast [4 x float]* %1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %4, i8* nonnull align 16 dereferenceable(16) bitcast ([4 x float]* @__const.main.mat_in to i8*), i64 16, i1 false) + %5 = bitcast [4 x float]* %2 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %5, i8* nonnull align 16 dereferenceable(16) bitcast ([4 x float]* @__const.main.f_in to i8*), i64 16, i1 false) + %6 = bitcast [9 x float]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %6, i8 0, i64 36, i1 false) + %7 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 + %8 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 + %9 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 0 + call void @convolution(float* nonnull %7, float* nonnull %8, float* nonnull %9) + %10 = load float, float* %9, align 16 + %11 = fpext float %10 to double + %12 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %11) #6 + %13 = load float, float* %9, align 16 + %14 = fcmp une float %13, 1.000000e+00 + br i1 %14, label %22, label %15 + +15: ; preds = %0 + %16 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 1 + %17 = load float, float* %16, align 4 + %18 = fpext float %17 to double + %19 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %18) #6 + %20 = load float, float* %16, align 4 + %21 = fcmp une float %20, 3.000000e+00 + br i1 %21, label %22, label %23 + +22: ; preds = %65, %58, %51, %44, %37, %30, %23, %15, %0 + call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i64 0, i64 0), i32 46, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.2, i64 0, i64 0)) #7 + unreachable + +23: ; preds = %15 + %24 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 2 + %25 = load float, float* %24, align 8 + %26 = fpext float %25 to double + %27 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %26) #6 + %28 = load float, float* %24, align 8 + %29 = fcmp une float %28, 2.000000e+00 + br i1 %29, label %22, label %30 + +30: ; preds = %23 + %31 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 3 + %32 = load float, float* %31, align 4 + %33 = fpext float %32 to double + %34 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %33) #6 + %35 = load float, float* %31, align 4 + %36 = fcmp une float %35, 4.000000e+00 + br i1 %36, label %22, label %37 + +37: ; preds = %30 + %38 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 4 + %39 = load float, float* %38, align 16 + %40 = fpext float %39 to double + %41 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %40) #6 + %42 = load float, float* %38, align 16 + %43 = fcmp une float %42, 1.000000e+01 + br i1 %43, label %22, label %44 + +44: ; preds = %37 + %45 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 5 + %46 = load float, float* %45, align 4 + %47 = fpext float %46 to double + %48 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %47) #6 + %49 = load float, float* %45, align 4 + %50 = fcmp une float %49, 6.000000e+00 + br i1 %50, label %22, label %51 + +51: ; preds = %44 + %52 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 6 + %53 = load float, float* %52, align 8 + %54 = fpext float %53 to double + %55 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %54) #6 + %56 = load float, float* %52, align 8 + %57 = fcmp une float %56, 3.000000e+00 + br i1 %57, label %22, label %58 + +58: ; preds = %51 + %59 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 7 + %60 = load float, float* %59, align 4 + %61 = fpext float %60 to double + %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %61) #6 + %63 = load float, float* %59, align 4 + %64 = fcmp une float %63, 7.000000e+00 + br i1 %64, label %22, label %65 + +65: ; preds = %58 + %66 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 8 + %67 = load float, float* %66, align 16 + %68 = fpext float %67 to double + %69 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %68) #6 + %70 = load float, float* %66, align 16 + %71 = fcmp une float %70, 4.000000e+00 + br i1 %71, label %22, label %72 + +72: ; preds = %65 + ret i32 0 +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 + +declare i32 @printf(i8*, ...) #3 + +; Function Attrs: noreturn +declare void @__assert_rtn(i8*, i8*, i32, i8*) #4 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.1(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.2(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.3(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.4(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.5(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.6(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.7(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.8(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.9(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.10(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.11(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.12(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.13(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.14(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.15(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.16(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.17(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.18(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.19(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.20(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.21(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.22(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.23(<4 x float>, <4 x float>, <4 x float>) #5 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.24(<4 x float>, <4 x float>, <4 x float>) #5 + +attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind willreturn writeonly } +attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { nounwind readnone speculatable willreturn } +attributes #6 = { nounwind } +attributes #7 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv.c b/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv.c index 68888c97..4653f9c1 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv.c @@ -1,4 +1,5 @@ #include +#include #define I_ROWS 2 #define I_COLS 2 @@ -37,9 +38,11 @@ int main(void) { float f_in[F_ROWS][F_COLS] = {{1, 1}, {1, 1}}; float mat_out[O_ROWS][O_COLS] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; convolution(mat_in, f_in, mat_out); + float expected[O_ROWS][O_COLS] = {{1, 3, 2}, {4, 10, 6}, {3, 7, 4}}; for (int i = 0; i < O_ROWS; i++) { for (int j = 0; j < O_COLS; j++) { printf("output: %f\n", mat_out[i][j]); + assert(mat_out[i][j] == expected[i][j]); } } // output: 1.000000 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-conv.c b/src/dios-egraphs/Diospyros/llvm-tests/2d-conv.c index e1614e05..46d4d15c 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/2d-conv.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/2d-conv.c @@ -1,4 +1,5 @@ #include +#include #define I_ROWS 2 #define I_COLS 2 @@ -38,9 +39,11 @@ int main(void) { float mat_in[I_ROWS * I_COLS] = {1, 2, 3, 4}; float f_in[F_ROWS * F_COLS] = {1, 1, 1, 1}; float mat_out[O_ROWS * O_COLS] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; + float expected[O_ROWS * O_COLS] = {1, 3, 2, 4, 10, 6, 3, 7, 4}; convolution(mat_in, f_in, mat_out); for (int i = 0; i < O_ROWS * O_COLS; i++) { printf("output: %f\n", mat_out[i]); + assert(mat_out[i] == expected[i]); } // output: 1.000000 // output: 3.000000 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/load_reuse.c b/src/dios-egraphs/Diospyros/llvm-tests/load_reuse.c index b80b2403..3a447db4 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/load_reuse.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/load_reuse.c @@ -1,4 +1,5 @@ #include +#include #define I_ROWS 2 #define I_COLS 2 @@ -39,27 +40,37 @@ void load_use_twice(float mat_in[I_ROWS][I_COLS], float f_in[F_ROWS][F_COLS], int main(void) { float mat_in[I_ROWS][I_COLS] = {{1, 2}, {3, 4}}; float f_in[F_ROWS][F_COLS] = {{1, 1}, {1, 1}}; - float mat_out[O_ROWS][O_COLS] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; + float mat_out1[O_ROWS][O_COLS] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; float mat_out2[O_ROWS][O_COLS] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; - load_use_twice(mat_in, f_in, mat_out, mat_out2); + load_use_twice(mat_in, f_in, mat_out1, mat_out2); for (int i = 0; i < O_ROWS; i++) { for (int j = 0; j < O_COLS; j++) { - printf("output: %f\n", mat_out[i][j]); + printf("output: %f\n", mat_out1[i][j]); printf("output: %f\n", mat_out2[i][j]); } } + float output1[O_ROWS][O_COLS] = {{-1, 1, 2}, {4, 14, 10}, {5, 13, 8}}; + float output2[O_ROWS][O_COLS] = {{3, 8, 5}, {10, 24, 14}, {7, 16, 9}}; + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + assert(output1[i][j] == mat_out1[i][j]); + assert(output2[i][j] == mat_out2[i][j]); + } + } // output: -1.000000 // output: 3.000000 // output: 1.000000 // output: 8.000000 // output: 2.000000 // output: 5.000000 + // output: 4.000000 // output: 10.000000 // output: 14.000000 // output: 24.000000 // output: 10.000000 // output: 14.000000 + // output: 5.000000 // output: 7.000000 // output: 13.000000 diff --git a/src/dios-egraphs/Diospyros/opt.ll b/src/dios-egraphs/Diospyros/opt.ll new file mode 100644 index 00000000..62f67344 --- /dev/null +++ b/src/dios-egraphs/Diospyros/opt.ll @@ -0,0 +1,249 @@ +; ModuleID = 'clang.ll' +source_filename = "llvm-tests/2d-conv.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@__const.main.mat_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00], align 16 +@__const.main.f_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 +@__const.main.expected = private unnamed_addr constant [9 x float] [float 1.000000e+00, float 3.000000e+00, float 2.000000e+00, float 4.000000e+00, float 1.000000e+01, float 6.000000e+00, float 3.000000e+00, float 7.000000e+00, float 4.000000e+00], align 16 +@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 +@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 +@.str.1 = private unnamed_addr constant [21 x i8] c"llvm-tests/2d-conv.c\00", align 1 +@.str.2 = private unnamed_addr constant [26 x i8] c"mat_out[i] == expected[i]\00", align 1 + +; Function Attrs: noinline nounwind ssp uwtable +define void @convolution(float* %0, float* %1, float* %2) #0 { +.preheader7: + %3 = load float, float* %0, align 4 + %4 = load float, float* %1, align 4 + %5 = fmul float %3, %4 + %6 = load float, float* %2, align 4 + %7 = fadd float %6, %5 + store float %7, float* %2, align 4 + %8 = getelementptr inbounds float, float* %2, i64 1 + %9 = load float, float* %0, align 4 + %10 = getelementptr inbounds float, float* %1, i64 1 + %11 = load float, float* %10, align 4 + %12 = fmul float %9, %11 + %13 = load float, float* %8, align 4 + %14 = fadd float %13, %12 + store float %14, float* %8, align 4 + %15 = getelementptr inbounds float, float* %0, i64 1 + %16 = load float, float* %15, align 4 + %17 = load float, float* %1, align 4 + %18 = fmul float %16, %17 + %19 = fadd float %14, %18 + store float %19, float* %8, align 4 + %20 = getelementptr inbounds float, float* %2, i64 2 + %21 = load float, float* %15, align 4 + %22 = load float, float* %10, align 4 + %23 = fmul float %21, %22 + %24 = load float, float* %20, align 4 + %25 = fadd float %24, %23 + store float %25, float* %20, align 4 + %26 = getelementptr inbounds float, float* %2, i64 3 + %27 = load float, float* %0, align 4 + %28 = getelementptr inbounds float, float* %1, i64 2 + %29 = load float, float* %28, align 4 + %30 = fmul float %27, %29 + %31 = load float, float* %26, align 4 + %32 = fadd float %31, %30 + store float %32, float* %26, align 4 + %33 = getelementptr inbounds float, float* %0, i64 2 + %34 = load float, float* %33, align 4 + %35 = load float, float* %1, align 4 + %36 = fmul float %34, %35 + %37 = fadd float %32, %36 + store float %37, float* %26, align 4 + %38 = getelementptr inbounds float, float* %2, i64 4 + %39 = load float, float* %0, align 4 + %40 = getelementptr inbounds float, float* %1, i64 3 + %41 = load float, float* %40, align 4 + %42 = fmul float %39, %41 + %43 = load float, float* %38, align 4 + %44 = fadd float %43, %42 + store float %44, float* %38, align 4 + %45 = load float, float* %15, align 4 + %46 = load float, float* %28, align 4 + %47 = fmul float %45, %46 + %48 = fadd float %44, %47 + store float %48, float* %38, align 4 + %49 = load float, float* %33, align 4 + %50 = load float, float* %10, align 4 + %51 = fmul float %49, %50 + %52 = fadd float %48, %51 + store float %52, float* %38, align 4 + %53 = getelementptr inbounds float, float* %0, i64 3 + %54 = load float, float* %53, align 4 + %55 = load float, float* %1, align 4 + %56 = fmul float %54, %55 + %57 = fadd float %52, %56 + store float %57, float* %38, align 4 + %58 = getelementptr inbounds float, float* %2, i64 5 + %59 = load float, float* %15, align 4 + %60 = load float, float* %40, align 4 + %61 = fmul float %59, %60 + %62 = load float, float* %58, align 4 + %63 = fadd float %62, %61 + store float %63, float* %58, align 4 + %64 = load float, float* %53, align 4 + %65 = load float, float* %10, align 4 + %66 = fmul float %64, %65 + %67 = fadd float %63, %66 + store float %67, float* %58, align 4 + %68 = getelementptr inbounds float, float* %2, i64 6 + %69 = load float, float* %33, align 4 + %70 = load float, float* %28, align 4 + %71 = fmul float %69, %70 + %72 = load float, float* %68, align 4 + %73 = fadd float %72, %71 + store float %73, float* %68, align 4 + %74 = getelementptr inbounds float, float* %2, i64 7 + %75 = load float, float* %33, align 4 + %76 = load float, float* %40, align 4 + %77 = fmul float %75, %76 + %78 = load float, float* %74, align 4 + %79 = fadd float %78, %77 + store float %79, float* %74, align 4 + %80 = load float, float* %53, align 4 + %81 = load float, float* %28, align 4 + %82 = fmul float %80, %81 + %83 = fadd float %79, %82 + store float %83, float* %74, align 4 + %84 = getelementptr inbounds float, float* %2, i64 8 + %85 = load float, float* %53, align 4 + %86 = load float, float* %40, align 4 + %87 = fmul float %85, %86 + %88 = load float, float* %84, align 4 + %89 = fadd float %88, %87 + store float %89, float* %84, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #0 { + %1 = alloca [4 x float], align 16 + %2 = alloca [4 x float], align 16 + %3 = alloca [9 x float], align 16 + %4 = bitcast [4 x float]* %1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %4, i8* nonnull align 16 dereferenceable(16) bitcast ([4 x float]* @__const.main.mat_in to i8*), i64 16, i1 false) + %5 = bitcast [4 x float]* %2 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %5, i8* nonnull align 16 dereferenceable(16) bitcast ([4 x float]* @__const.main.f_in to i8*), i64 16, i1 false) + %6 = bitcast [9 x float]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %6, i8 0, i64 36, i1 false) + %7 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 + %8 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 + %9 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 0 + call void @convolution(float* nonnull %7, float* nonnull %8, float* nonnull %9) + %10 = load float, float* %9, align 16 + %11 = fpext float %10 to double + %12 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %11) #5 + %13 = load float, float* %9, align 16 + %14 = fcmp une float %13, 1.000000e+00 + br i1 %14, label %22, label %15 + +15: ; preds = %0 + %16 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 1 + %17 = load float, float* %16, align 4 + %18 = fpext float %17 to double + %19 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %18) #5 + %20 = load float, float* %16, align 4 + %21 = fcmp une float %20, 3.000000e+00 + br i1 %21, label %22, label %23 + +22: ; preds = %65, %58, %51, %44, %37, %30, %23, %15, %0 + call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i64 0, i64 0), i32 46, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.2, i64 0, i64 0)) #6 + unreachable + +23: ; preds = %15 + %24 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 2 + %25 = load float, float* %24, align 8 + %26 = fpext float %25 to double + %27 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %26) #5 + %28 = load float, float* %24, align 8 + %29 = fcmp une float %28, 2.000000e+00 + br i1 %29, label %22, label %30 + +30: ; preds = %23 + %31 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 3 + %32 = load float, float* %31, align 4 + %33 = fpext float %32 to double + %34 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %33) #5 + %35 = load float, float* %31, align 4 + %36 = fcmp une float %35, 4.000000e+00 + br i1 %36, label %22, label %37 + +37: ; preds = %30 + %38 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 4 + %39 = load float, float* %38, align 16 + %40 = fpext float %39 to double + %41 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %40) #5 + %42 = load float, float* %38, align 16 + %43 = fcmp une float %42, 1.000000e+01 + br i1 %43, label %22, label %44 + +44: ; preds = %37 + %45 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 5 + %46 = load float, float* %45, align 4 + %47 = fpext float %46 to double + %48 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %47) #5 + %49 = load float, float* %45, align 4 + %50 = fcmp une float %49, 6.000000e+00 + br i1 %50, label %22, label %51 + +51: ; preds = %44 + %52 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 6 + %53 = load float, float* %52, align 8 + %54 = fpext float %53 to double + %55 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %54) #5 + %56 = load float, float* %52, align 8 + %57 = fcmp une float %56, 3.000000e+00 + br i1 %57, label %22, label %58 + +58: ; preds = %51 + %59 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 7 + %60 = load float, float* %59, align 4 + %61 = fpext float %60 to double + %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %61) #5 + %63 = load float, float* %59, align 4 + %64 = fcmp une float %63, 7.000000e+00 + br i1 %64, label %22, label %65 + +65: ; preds = %58 + %66 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 8 + %67 = load float, float* %66, align 16 + %68 = fpext float %67 to double + %69 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %68) #5 + %70 = load float, float* %66, align 16 + %71 = fcmp une float %70, 4.000000e+00 + br i1 %71, label %22, label %72 + +72: ; preds = %65 + ret i32 0 +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 + +declare i32 @printf(i8*, ...) #3 + +; Function Attrs: noreturn +declare void @__assert_rtn(i8*, i8*, i32, i8*) #4 + +attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind willreturn writeonly } +attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { nounwind } +attributes #6 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} From fa7d916c996e7a42036315ddeccbe88b72f50519 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Mon, 13 Dec 2021 03:37:33 -0500 Subject: [PATCH 005/143] remove ll --- src/dios-egraphs/Diospyros/aa.ll | 249 -------- src/dios-egraphs/Diospyros/clang.ll | 269 --------- src/dios-egraphs/Diospyros/dce.ll | 559 ------------------ src/dios-egraphs/Diospyros/diospyros.ll | 721 ------------------------ src/dios-egraphs/Diospyros/opt.ll | 249 -------- 5 files changed, 2047 deletions(-) delete mode 100644 src/dios-egraphs/Diospyros/aa.ll delete mode 100644 src/dios-egraphs/Diospyros/clang.ll delete mode 100644 src/dios-egraphs/Diospyros/dce.ll delete mode 100644 src/dios-egraphs/Diospyros/diospyros.ll delete mode 100644 src/dios-egraphs/Diospyros/opt.ll diff --git a/src/dios-egraphs/Diospyros/aa.ll b/src/dios-egraphs/Diospyros/aa.ll deleted file mode 100644 index b4d21e53..00000000 --- a/src/dios-egraphs/Diospyros/aa.ll +++ /dev/null @@ -1,249 +0,0 @@ -; ModuleID = 'opt.ll' -source_filename = "llvm-tests/2d-conv.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@__const.main.mat_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00], align 16 -@__const.main.f_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 -@__const.main.expected = private unnamed_addr constant [9 x float] [float 1.000000e+00, float 3.000000e+00, float 2.000000e+00, float 4.000000e+00, float 1.000000e+01, float 6.000000e+00, float 3.000000e+00, float 7.000000e+00, float 4.000000e+00], align 16 -@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 -@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 -@.str.1 = private unnamed_addr constant [21 x i8] c"llvm-tests/2d-conv.c\00", align 1 -@.str.2 = private unnamed_addr constant [26 x i8] c"mat_out[i] == expected[i]\00", align 1 - -; Function Attrs: noinline nounwind ssp uwtable -define void @convolution(float* %0, float* %1, float* %2) #0 { -.preheader7: - %3 = load float, float* %0, align 4 - %4 = load float, float* %1, align 4 - %5 = fmul float %3, %4 - %6 = load float, float* %2, align 4 - %7 = fadd float %6, %5 - store float %7, float* %2, align 4 - %8 = getelementptr inbounds float, float* %2, i64 1 - %9 = load float, float* %0, align 4 - %10 = getelementptr inbounds float, float* %1, i64 1 - %11 = load float, float* %10, align 4 - %12 = fmul float %9, %11 - %13 = load float, float* %8, align 4 - %14 = fadd float %13, %12 - store float %14, float* %8, align 4 - %15 = getelementptr inbounds float, float* %0, i64 1 - %16 = load float, float* %15, align 4 - %17 = load float, float* %1, align 4 - %18 = fmul float %16, %17 - %19 = fadd float %14, %18 - store float %19, float* %8, align 4 - %20 = getelementptr inbounds float, float* %2, i64 2 - %21 = load float, float* %15, align 4 - %22 = load float, float* %10, align 4 - %23 = fmul float %21, %22 - %24 = load float, float* %20, align 4 - %25 = fadd float %24, %23 - store float %25, float* %20, align 4 - %26 = getelementptr inbounds float, float* %2, i64 3 - %27 = load float, float* %0, align 4 - %28 = getelementptr inbounds float, float* %1, i64 2 - %29 = load float, float* %28, align 4 - %30 = fmul float %27, %29 - %31 = load float, float* %26, align 4 - %32 = fadd float %31, %30 - store float %32, float* %26, align 4 - %33 = getelementptr inbounds float, float* %0, i64 2 - %34 = load float, float* %33, align 4 - %35 = load float, float* %1, align 4 - %36 = fmul float %34, %35 - %37 = fadd float %32, %36 - store float %37, float* %26, align 4 - %38 = getelementptr inbounds float, float* %2, i64 4 - %39 = load float, float* %0, align 4 - %40 = getelementptr inbounds float, float* %1, i64 3 - %41 = load float, float* %40, align 4 - %42 = fmul float %39, %41 - %43 = load float, float* %38, align 4 - %44 = fadd float %43, %42 - store float %44, float* %38, align 4 - %45 = load float, float* %15, align 4 - %46 = load float, float* %28, align 4 - %47 = fmul float %45, %46 - %48 = fadd float %44, %47 - store float %48, float* %38, align 4 - %49 = load float, float* %33, align 4 - %50 = load float, float* %10, align 4 - %51 = fmul float %49, %50 - %52 = fadd float %48, %51 - store float %52, float* %38, align 4 - %53 = getelementptr inbounds float, float* %0, i64 3 - %54 = load float, float* %53, align 4 - %55 = load float, float* %1, align 4 - %56 = fmul float %54, %55 - %57 = fadd float %52, %56 - store float %57, float* %38, align 4 - %58 = getelementptr inbounds float, float* %2, i64 5 - %59 = load float, float* %15, align 4 - %60 = load float, float* %40, align 4 - %61 = fmul float %59, %60 - %62 = load float, float* %58, align 4 - %63 = fadd float %62, %61 - store float %63, float* %58, align 4 - %64 = load float, float* %53, align 4 - %65 = load float, float* %10, align 4 - %66 = fmul float %64, %65 - %67 = fadd float %63, %66 - store float %67, float* %58, align 4 - %68 = getelementptr inbounds float, float* %2, i64 6 - %69 = load float, float* %33, align 4 - %70 = load float, float* %28, align 4 - %71 = fmul float %69, %70 - %72 = load float, float* %68, align 4 - %73 = fadd float %72, %71 - store float %73, float* %68, align 4 - %74 = getelementptr inbounds float, float* %2, i64 7 - %75 = load float, float* %33, align 4 - %76 = load float, float* %40, align 4 - %77 = fmul float %75, %76 - %78 = load float, float* %74, align 4 - %79 = fadd float %78, %77 - store float %79, float* %74, align 4 - %80 = load float, float* %53, align 4 - %81 = load float, float* %28, align 4 - %82 = fmul float %80, %81 - %83 = fadd float %79, %82 - store float %83, float* %74, align 4 - %84 = getelementptr inbounds float, float* %2, i64 8 - %85 = load float, float* %53, align 4 - %86 = load float, float* %40, align 4 - %87 = fmul float %85, %86 - %88 = load float, float* %84, align 4 - %89 = fadd float %88, %87 - store float %89, float* %84, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #0 { - %1 = alloca [4 x float], align 16 - %2 = alloca [4 x float], align 16 - %3 = alloca [9 x float], align 16 - %4 = bitcast [4 x float]* %1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %4, i8* nonnull align 16 dereferenceable(16) bitcast ([4 x float]* @__const.main.mat_in to i8*), i64 16, i1 false) - %5 = bitcast [4 x float]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %5, i8* nonnull align 16 dereferenceable(16) bitcast ([4 x float]* @__const.main.f_in to i8*), i64 16, i1 false) - %6 = bitcast [9 x float]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %6, i8 0, i64 36, i1 false) - %7 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 - %8 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %9 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 0 - call void @convolution(float* nonnull %7, float* nonnull %8, float* nonnull %9) - %10 = load float, float* %9, align 16 - %11 = fpext float %10 to double - %12 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %11) #5 - %13 = load float, float* %9, align 16 - %14 = fcmp une float %13, 1.000000e+00 - br i1 %14, label %22, label %15 - -15: ; preds = %0 - %16 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 1 - %17 = load float, float* %16, align 4 - %18 = fpext float %17 to double - %19 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %18) #5 - %20 = load float, float* %16, align 4 - %21 = fcmp une float %20, 3.000000e+00 - br i1 %21, label %22, label %23 - -22: ; preds = %65, %58, %51, %44, %37, %30, %23, %15, %0 - call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i64 0, i64 0), i32 46, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.2, i64 0, i64 0)) #6 - unreachable - -23: ; preds = %15 - %24 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 2 - %25 = load float, float* %24, align 8 - %26 = fpext float %25 to double - %27 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %26) #5 - %28 = load float, float* %24, align 8 - %29 = fcmp une float %28, 2.000000e+00 - br i1 %29, label %22, label %30 - -30: ; preds = %23 - %31 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 3 - %32 = load float, float* %31, align 4 - %33 = fpext float %32 to double - %34 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %33) #5 - %35 = load float, float* %31, align 4 - %36 = fcmp une float %35, 4.000000e+00 - br i1 %36, label %22, label %37 - -37: ; preds = %30 - %38 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 4 - %39 = load float, float* %38, align 16 - %40 = fpext float %39 to double - %41 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %40) #5 - %42 = load float, float* %38, align 16 - %43 = fcmp une float %42, 1.000000e+01 - br i1 %43, label %22, label %44 - -44: ; preds = %37 - %45 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 5 - %46 = load float, float* %45, align 4 - %47 = fpext float %46 to double - %48 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %47) #5 - %49 = load float, float* %45, align 4 - %50 = fcmp une float %49, 6.000000e+00 - br i1 %50, label %22, label %51 - -51: ; preds = %44 - %52 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 6 - %53 = load float, float* %52, align 8 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %54) #5 - %56 = load float, float* %52, align 8 - %57 = fcmp une float %56, 3.000000e+00 - br i1 %57, label %22, label %58 - -58: ; preds = %51 - %59 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 7 - %60 = load float, float* %59, align 4 - %61 = fpext float %60 to double - %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %61) #5 - %63 = load float, float* %59, align 4 - %64 = fcmp une float %63, 7.000000e+00 - br i1 %64, label %22, label %65 - -65: ; preds = %58 - %66 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 8 - %67 = load float, float* %66, align 16 - %68 = fpext float %67 to double - %69 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %68) #5 - %70 = load float, float* %66, align 16 - %71 = fcmp une float %70, 4.000000e+00 - br i1 %71, label %22, label %72 - -72: ; preds = %65 - ret i32 0 -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 - -declare i32 @printf(i8*, ...) #3 - -; Function Attrs: noreturn -declare void @__assert_rtn(i8*, i8*, i32, i8*) #4 - -attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { argmemonly nounwind willreturn writeonly } -attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { nounwind } -attributes #6 = { noreturn nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/clang.ll b/src/dios-egraphs/Diospyros/clang.ll deleted file mode 100644 index ad3269fd..00000000 --- a/src/dios-egraphs/Diospyros/clang.ll +++ /dev/null @@ -1,269 +0,0 @@ -; ModuleID = 'llvm-tests/2d-conv.c' -source_filename = "llvm-tests/2d-conv.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@__const.main.mat_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00], align 16 -@__const.main.f_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 -@__const.main.expected = private unnamed_addr constant [9 x float] [float 1.000000e+00, float 3.000000e+00, float 2.000000e+00, float 4.000000e+00, float 1.000000e+01, float 6.000000e+00, float 3.000000e+00, float 7.000000e+00, float 4.000000e+00], align 16 -@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 -@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 -@.str.1 = private unnamed_addr constant [21 x i8] c"llvm-tests/2d-conv.c\00", align 1 -@.str.2 = private unnamed_addr constant [26 x i8] c"mat_out[i] == expected[i]\00", align 1 - -; Function Attrs: noinline nounwind ssp uwtable -define void @convolution(float* %0, float* %1, float* %2) #0 { - %4 = alloca float*, align 8 - %5 = alloca float*, align 8 - %6 = alloca float*, align 8 - %7 = alloca i32, align 4 - %8 = alloca i32, align 4 - %9 = alloca i32, align 4 - %10 = alloca i32, align 4 - %11 = alloca i32, align 4 - %12 = alloca i32, align 4 - %13 = alloca i32, align 4 - %14 = alloca i32, align 4 - %15 = alloca float, align 4 - store float* %0, float** %4, align 8 - store float* %1, float** %5, align 8 - store float* %2, float** %6, align 8 - store i32 0, i32* %7, align 4 - br label %16 - -16: ; preds = %94, %3 - %17 = load i32, i32* %7, align 4 - %18 = icmp slt i32 %17, 3 - br i1 %18, label %19, label %97 - -19: ; preds = %16 - store i32 0, i32* %8, align 4 - br label %20 - -20: ; preds = %90, %19 - %21 = load i32, i32* %8, align 4 - %22 = icmp slt i32 %21, 3 - br i1 %22, label %23, label %93 - -23: ; preds = %20 - store i32 0, i32* %9, align 4 - br label %24 - -24: ; preds = %86, %23 - %25 = load i32, i32* %9, align 4 - %26 = icmp slt i32 %25, 2 - br i1 %26, label %27, label %89 - -27: ; preds = %24 - store i32 0, i32* %10, align 4 - br label %28 - -28: ; preds = %82, %27 - %29 = load i32, i32* %10, align 4 - %30 = icmp slt i32 %29, 2 - br i1 %30, label %31, label %85 - -31: ; preds = %28 - %32 = load i32, i32* %9, align 4 - %33 = sub nsw i32 1, %32 - store i32 %33, i32* %11, align 4 - %34 = load i32, i32* %10, align 4 - %35 = sub nsw i32 1, %34 - store i32 %35, i32* %12, align 4 - %36 = load i32, i32* %7, align 4 - %37 = load i32, i32* %11, align 4 - %38 = sub nsw i32 %36, %37 - store i32 %38, i32* %13, align 4 - %39 = load i32, i32* %8, align 4 - %40 = load i32, i32* %12, align 4 - %41 = sub nsw i32 %39, %40 - store i32 %41, i32* %14, align 4 - %42 = load i32, i32* %13, align 4 - %43 = icmp sge i32 %42, 0 - br i1 %43, label %44, label %81 - -44: ; preds = %31 - %45 = load i32, i32* %13, align 4 - %46 = icmp slt i32 %45, 2 - br i1 %46, label %47, label %81 - -47: ; preds = %44 - %48 = load i32, i32* %14, align 4 - %49 = icmp sge i32 %48, 0 - br i1 %49, label %50, label %81 - -50: ; preds = %47 - %51 = load i32, i32* %14, align 4 - %52 = icmp slt i32 %51, 2 - br i1 %52, label %53, label %81 - -53: ; preds = %50 - %54 = load float*, float** %4, align 8 - %55 = load i32, i32* %13, align 4 - %56 = mul nsw i32 %55, 2 - %57 = load i32, i32* %14, align 4 - %58 = add nsw i32 %56, %57 - %59 = sext i32 %58 to i64 - %60 = getelementptr inbounds float, float* %54, i64 %59 - %61 = load float, float* %60, align 4 - %62 = load float*, float** %5, align 8 - %63 = load i32, i32* %11, align 4 - %64 = mul nsw i32 %63, 2 - %65 = load i32, i32* %12, align 4 - %66 = add nsw i32 %64, %65 - %67 = sext i32 %66 to i64 - %68 = getelementptr inbounds float, float* %62, i64 %67 - %69 = load float, float* %68, align 4 - %70 = fmul float %61, %69 - store float %70, float* %15, align 4 - %71 = load float, float* %15, align 4 - %72 = load float*, float** %6, align 8 - %73 = load i32, i32* %7, align 4 - %74 = mul nsw i32 %73, 3 - %75 = load i32, i32* %8, align 4 - %76 = add nsw i32 %74, %75 - %77 = sext i32 %76 to i64 - %78 = getelementptr inbounds float, float* %72, i64 %77 - %79 = load float, float* %78, align 4 - %80 = fadd float %79, %71 - store float %80, float* %78, align 4 - br label %81 - -81: ; preds = %53, %50, %47, %44, %31 - br label %82 - -82: ; preds = %81 - %83 = load i32, i32* %10, align 4 - %84 = add nsw i32 %83, 1 - store i32 %84, i32* %10, align 4 - br label %28 - -85: ; preds = %28 - br label %86 - -86: ; preds = %85 - %87 = load i32, i32* %9, align 4 - %88 = add nsw i32 %87, 1 - store i32 %88, i32* %9, align 4 - br label %24 - -89: ; preds = %24 - br label %90 - -90: ; preds = %89 - %91 = load i32, i32* %8, align 4 - %92 = add nsw i32 %91, 1 - store i32 %92, i32* %8, align 4 - br label %20 - -93: ; preds = %20 - br label %94 - -94: ; preds = %93 - %95 = load i32, i32* %7, align 4 - %96 = add nsw i32 %95, 1 - store i32 %96, i32* %7, align 4 - br label %16 - -97: ; preds = %16 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #0 { - %1 = alloca i32, align 4 - %2 = alloca [4 x float], align 16 - %3 = alloca [4 x float], align 16 - %4 = alloca [9 x float], align 16 - %5 = alloca [9 x float], align 16 - %6 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - %7 = bitcast [4 x float]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %7, i8* align 16 bitcast ([4 x float]* @__const.main.mat_in to i8*), i64 16, i1 false) - %8 = bitcast [4 x float]* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %8, i8* align 16 bitcast ([4 x float]* @__const.main.f_in to i8*), i64 16, i1 false) - %9 = bitcast [9 x float]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %9, i8 0, i64 36, i1 false) - %10 = bitcast [9 x float]* %5 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %10, i8* align 16 bitcast ([9 x float]* @__const.main.expected to i8*), i64 36, i1 false) - %11 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %12 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - %13 = getelementptr inbounds [9 x float], [9 x float]* %4, i64 0, i64 0 - call void @convolution(float* %11, float* %12, float* %13) - store i32 0, i32* %6, align 4 - br label %14 - -14: ; preds = %41, %0 - %15 = load i32, i32* %6, align 4 - %16 = icmp slt i32 %15, 9 - br i1 %16, label %17, label %44 - -17: ; preds = %14 - %18 = load i32, i32* %6, align 4 - %19 = sext i32 %18 to i64 - %20 = getelementptr inbounds [9 x float], [9 x float]* %4, i64 0, i64 %19 - %21 = load float, float* %20, align 4 - %22 = fpext float %21 to double - %23 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %22) - %24 = load i32, i32* %6, align 4 - %25 = sext i32 %24 to i64 - %26 = getelementptr inbounds [9 x float], [9 x float]* %4, i64 0, i64 %25 - %27 = load float, float* %26, align 4 - %28 = load i32, i32* %6, align 4 - %29 = sext i32 %28 to i64 - %30 = getelementptr inbounds [9 x float], [9 x float]* %5, i64 0, i64 %29 - %31 = load float, float* %30, align 4 - %32 = fcmp oeq float %27, %31 - %33 = xor i1 %32, true - %34 = zext i1 %33 to i32 - %35 = sext i32 %34 to i64 - %36 = icmp ne i64 %35, 0 - br i1 %36, label %37, label %39 - -37: ; preds = %17 - call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i64 0, i64 0), i32 46, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.2, i64 0, i64 0)) #5 - unreachable - -38: ; No predecessors! - br label %40 - -39: ; preds = %17 - br label %40 - -40: ; preds = %39, %38 - br label %41 - -41: ; preds = %40 - %42 = load i32, i32* %6, align 4 - %43 = add nsw i32 %42, 1 - store i32 %43, i32* %6, align 4 - br label %14 - -44: ; preds = %14 - ret i32 0 -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 - -declare i32 @printf(i8*, ...) #3 - -; Function Attrs: noreturn -declare void @__assert_rtn(i8*, i8*, i32, i8*) #4 - -attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { argmemonly nounwind willreturn writeonly } -attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { noreturn } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/dce.ll b/src/dios-egraphs/Diospyros/dce.ll deleted file mode 100644 index ab878f8e..00000000 --- a/src/dios-egraphs/Diospyros/dce.ll +++ /dev/null @@ -1,559 +0,0 @@ -; ModuleID = 'diospyros.ll' -source_filename = "llvm-tests/2d-conv.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@__const.main.mat_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00], align 16 -@__const.main.f_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 -@__const.main.expected = private unnamed_addr constant [9 x float] [float 1.000000e+00, float 3.000000e+00, float 2.000000e+00, float 4.000000e+00, float 1.000000e+01, float 6.000000e+00, float 3.000000e+00, float 7.000000e+00, float 4.000000e+00], align 16 -@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 -@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 -@.str.1 = private unnamed_addr constant [21 x i8] c"llvm-tests/2d-conv.c\00", align 1 -@.str.2 = private unnamed_addr constant [26 x i8] c"mat_out[i] == expected[i]\00", align 1 - -; Function Attrs: noinline nounwind ssp uwtable -define void @convolution(float* %0, float* %1, float* %2) #0 { -.preheader7: - %3 = load float, float* %2, align 4 - %4 = insertelement <4 x float> zeroinitializer, float %3, i32 0 - %5 = insertelement <4 x float> %4, float 0.000000e+00, i32 1 - %6 = insertelement <4 x float> %5, float 0.000000e+00, i32 2 - %7 = insertelement <4 x float> %6, float 0.000000e+00, i32 3 - %8 = load float, float* %0, align 4 - %9 = insertelement <4 x float> zeroinitializer, float %8, i32 0 - %10 = insertelement <4 x float> %9, float 0.000000e+00, i32 1 - %11 = insertelement <4 x float> %10, float 0.000000e+00, i32 2 - %12 = insertelement <4 x float> %11, float 0.000000e+00, i32 3 - %13 = load float, float* %1, align 4 - %14 = insertelement <4 x float> zeroinitializer, float %13, i32 0 - %15 = insertelement <4 x float> %14, float 0.000000e+00, i32 1 - %16 = insertelement <4 x float> %15, float 0.000000e+00, i32 2 - %17 = insertelement <4 x float> %16, float 0.000000e+00, i32 3 - %18 = call <4 x float> @llvm.fma.v4f32(<4 x float> %12, <4 x float> %17, <4 x float> %7) - %19 = extractelement <4 x float> %18, i32 0 - store float %19, float* %2, align 4 - %20 = getelementptr inbounds float, float* %2, i64 1 - %21 = load float, float* %20, align 4 - %22 = insertelement <4 x float> zeroinitializer, float %21, i32 0 - %23 = insertelement <4 x float> %22, float 0.000000e+00, i32 1 - %24 = insertelement <4 x float> %23, float 0.000000e+00, i32 2 - %25 = insertelement <4 x float> %24, float 0.000000e+00, i32 3 - %26 = load float, float* %0, align 4 - %27 = insertelement <4 x float> zeroinitializer, float %26, i32 0 - %28 = insertelement <4 x float> %27, float 0.000000e+00, i32 1 - %29 = insertelement <4 x float> %28, float 0.000000e+00, i32 2 - %30 = insertelement <4 x float> %29, float 0.000000e+00, i32 3 - %31 = getelementptr inbounds float, float* %1, i64 1 - %32 = load float, float* %31, align 4 - %33 = insertelement <4 x float> zeroinitializer, float %32, i32 0 - %34 = insertelement <4 x float> %33, float 0.000000e+00, i32 1 - %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 2 - %36 = insertelement <4 x float> %35, float 0.000000e+00, i32 3 - %37 = call <4 x float> @llvm.fma.v4f32(<4 x float> %30, <4 x float> %36, <4 x float> %25) - %38 = extractelement <4 x float> %37, i32 0 - %39 = getelementptr inbounds float, float* %2, i64 1 - store float %38, float* %39, align 4 - %40 = insertelement <4 x float> zeroinitializer, float %21, i32 0 - %41 = insertelement <4 x float> %40, float 0.000000e+00, i32 1 - %42 = insertelement <4 x float> %41, float 0.000000e+00, i32 2 - %43 = insertelement <4 x float> %42, float 0.000000e+00, i32 3 - %44 = load float, float* %0, align 4 - %45 = insertelement <4 x float> zeroinitializer, float %44, i32 0 - %46 = insertelement <4 x float> %45, float 1.000000e+00, i32 1 - %47 = insertelement <4 x float> %46, float 1.000000e+00, i32 2 - %48 = insertelement <4 x float> %47, float 1.000000e+00, i32 3 - %49 = insertelement <4 x float> zeroinitializer, float %32, i32 0 - %50 = insertelement <4 x float> %49, float 0.000000e+00, i32 1 - %51 = insertelement <4 x float> %50, float 0.000000e+00, i32 2 - %52 = insertelement <4 x float> %51, float 0.000000e+00, i32 3 - %53 = call <4 x float> @llvm.fma.v4f32(<4 x float> %48, <4 x float> %52, <4 x float> %43) - %54 = getelementptr inbounds float, float* %0, i64 1 - %55 = load float, float* %54, align 4 - %56 = insertelement <4 x float> zeroinitializer, float %55, i32 0 - %57 = insertelement <4 x float> %56, float 0.000000e+00, i32 1 - %58 = insertelement <4 x float> %57, float 0.000000e+00, i32 2 - %59 = insertelement <4 x float> %58, float 0.000000e+00, i32 3 - %60 = load float, float* %1, align 4 - %61 = insertelement <4 x float> zeroinitializer, float %60, i32 0 - %62 = insertelement <4 x float> %61, float 0.000000e+00, i32 1 - %63 = insertelement <4 x float> %62, float 0.000000e+00, i32 2 - %64 = insertelement <4 x float> %63, float 0.000000e+00, i32 3 - %65 = call <4 x float> @llvm.fma.v4f32(<4 x float> %59, <4 x float> %64, <4 x float> %53) - %66 = extractelement <4 x float> %65, i32 0 - %67 = getelementptr inbounds float, float* %2, i64 1 - store float %66, float* %67, align 4 - %68 = getelementptr inbounds float, float* %2, i64 2 - %69 = load float, float* %68, align 4 - %70 = insertelement <4 x float> zeroinitializer, float %69, i32 0 - %71 = insertelement <4 x float> %70, float 0.000000e+00, i32 1 - %72 = insertelement <4 x float> %71, float 0.000000e+00, i32 2 - %73 = insertelement <4 x float> %72, float 0.000000e+00, i32 3 - %74 = getelementptr inbounds float, float* %0, i64 1 - %75 = load float, float* %74, align 4 - %76 = insertelement <4 x float> zeroinitializer, float %75, i32 0 - %77 = insertelement <4 x float> %76, float 0.000000e+00, i32 1 - %78 = insertelement <4 x float> %77, float 0.000000e+00, i32 2 - %79 = insertelement <4 x float> %78, float 0.000000e+00, i32 3 - %80 = getelementptr inbounds float, float* %1, i64 1 - %81 = load float, float* %80, align 4 - %82 = insertelement <4 x float> zeroinitializer, float %81, i32 0 - %83 = insertelement <4 x float> %82, float 0.000000e+00, i32 1 - %84 = insertelement <4 x float> %83, float 0.000000e+00, i32 2 - %85 = insertelement <4 x float> %84, float 0.000000e+00, i32 3 - %86 = call <4 x float> @llvm.fma.v4f32(<4 x float> %79, <4 x float> %85, <4 x float> %73) - %87 = extractelement <4 x float> %86, i32 0 - %88 = getelementptr inbounds float, float* %2, i64 2 - store float %87, float* %88, align 4 - %89 = getelementptr inbounds float, float* %2, i64 3 - %90 = load float, float* %89, align 4 - %91 = insertelement <4 x float> zeroinitializer, float %90, i32 1 - %92 = insertelement <4 x float> %91, float 0.000000e+00, i32 2 - %93 = insertelement <4 x float> %92, float 0.000000e+00, i32 3 - %94 = load float, float* %0, align 4 - %95 = insertelement <4 x float> , float %94, i32 1 - %96 = insertelement <4 x float> %95, float 1.000000e+00, i32 2 - %97 = insertelement <4 x float> %96, float 1.000000e+00, i32 3 - %98 = getelementptr inbounds float, float* %2, i64 3 - %99 = load float, float* %98, align 4 - %100 = insertelement <4 x float> zeroinitializer, float %99, i32 0 - %101 = getelementptr inbounds float, float* %1, i64 2 - %102 = load float, float* %101, align 4 - %103 = insertelement <4 x float> %100, float %102, i32 1 - %104 = insertelement <4 x float> %103, float 0.000000e+00, i32 2 - %105 = insertelement <4 x float> %104, float 0.000000e+00, i32 3 - %106 = call <4 x float> @llvm.fma.v4f32(<4 x float> %97, <4 x float> %105, <4 x float> %93) - %107 = load float, float* %0, align 4 - %108 = insertelement <4 x float> zeroinitializer, float %107, i32 0 - %109 = getelementptr inbounds float, float* %0, i64 2 - %110 = load float, float* %109, align 4 - %111 = insertelement <4 x float> %108, float %110, i32 1 - %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 2 - %113 = insertelement <4 x float> %112, float 0.000000e+00, i32 3 - %114 = getelementptr inbounds float, float* %1, i64 2 - %115 = load float, float* %114, align 4 - %116 = insertelement <4 x float> zeroinitializer, float %115, i32 0 - %117 = load float, float* %1, align 4 - %118 = insertelement <4 x float> %116, float %117, i32 1 - %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 2 - %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 3 - %121 = call <4 x float> @llvm.fma.v4f32(<4 x float> %113, <4 x float> %120, <4 x float> %106) - %122 = extractelement <4 x float> %121, i32 1 - %123 = getelementptr inbounds float, float* %2, i64 3 - store float %122, float* %123, align 4 - %124 = getelementptr inbounds float, float* %2, i64 4 - %125 = load float, float* %124, align 4 - %126 = insertelement <4 x float> zeroinitializer, float %125, i32 0 - %127 = insertelement <4 x float> %126, float 0.000000e+00, i32 1 - %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 2 - %129 = insertelement <4 x float> %128, float 0.000000e+00, i32 3 - %130 = load float, float* %0, align 4 - %131 = insertelement <4 x float> zeroinitializer, float %130, i32 0 - %132 = insertelement <4 x float> %131, float 0.000000e+00, i32 1 - %133 = insertelement <4 x float> %132, float 0.000000e+00, i32 2 - %134 = insertelement <4 x float> %133, float 0.000000e+00, i32 3 - %135 = getelementptr inbounds float, float* %1, i64 3 - %136 = load float, float* %135, align 4 - %137 = insertelement <4 x float> zeroinitializer, float %136, i32 0 - %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 1 - %139 = insertelement <4 x float> %138, float 0.000000e+00, i32 2 - %140 = insertelement <4 x float> %139, float 0.000000e+00, i32 3 - %141 = call <4 x float> @llvm.fma.v4f32(<4 x float> %134, <4 x float> %140, <4 x float> %129) - %142 = extractelement <4 x float> %141, i32 0 - %143 = getelementptr inbounds float, float* %2, i64 4 - store float %142, float* %143, align 4 - %144 = insertelement <4 x float> zeroinitializer, float %125, i32 0 - %145 = insertelement <4 x float> %144, float 0.000000e+00, i32 1 - %146 = insertelement <4 x float> %145, float 0.000000e+00, i32 2 - %147 = insertelement <4 x float> %146, float 0.000000e+00, i32 3 - %148 = load float, float* %0, align 4 - %149 = insertelement <4 x float> zeroinitializer, float %148, i32 0 - %150 = insertelement <4 x float> %149, float 1.000000e+00, i32 1 - %151 = insertelement <4 x float> %150, float 1.000000e+00, i32 2 - %152 = insertelement <4 x float> %151, float 1.000000e+00, i32 3 - %153 = insertelement <4 x float> zeroinitializer, float %136, i32 0 - %154 = insertelement <4 x float> %153, float 0.000000e+00, i32 1 - %155 = insertelement <4 x float> %154, float 0.000000e+00, i32 2 - %156 = insertelement <4 x float> %155, float 0.000000e+00, i32 3 - %157 = call <4 x float> @llvm.fma.v4f32(<4 x float> %152, <4 x float> %156, <4 x float> %147) - %158 = getelementptr inbounds float, float* %0, i64 1 - %159 = load float, float* %158, align 4 - %160 = insertelement <4 x float> zeroinitializer, float %159, i32 0 - %161 = insertelement <4 x float> %160, float 0.000000e+00, i32 1 - %162 = insertelement <4 x float> %161, float 0.000000e+00, i32 2 - %163 = insertelement <4 x float> %162, float 0.000000e+00, i32 3 - %164 = getelementptr inbounds float, float* %1, i64 2 - %165 = load float, float* %164, align 4 - %166 = insertelement <4 x float> zeroinitializer, float %165, i32 0 - %167 = insertelement <4 x float> %166, float 0.000000e+00, i32 1 - %168 = insertelement <4 x float> %167, float 0.000000e+00, i32 2 - %169 = insertelement <4 x float> %168, float 0.000000e+00, i32 3 - %170 = call <4 x float> @llvm.fma.v4f32(<4 x float> %163, <4 x float> %169, <4 x float> %157) - %171 = extractelement <4 x float> %170, i32 0 - %172 = getelementptr inbounds float, float* %2, i64 4 - store float %171, float* %172, align 4 - %173 = insertelement <4 x float> zeroinitializer, float %125, i32 0 - %174 = insertelement <4 x float> %173, float 0.000000e+00, i32 1 - %175 = insertelement <4 x float> %174, float 0.000000e+00, i32 2 - %176 = insertelement <4 x float> %175, float 0.000000e+00, i32 3 - %177 = load float, float* %0, align 4 - %178 = insertelement <4 x float> zeroinitializer, float %177, i32 0 - %179 = insertelement <4 x float> %178, float 1.000000e+00, i32 1 - %180 = insertelement <4 x float> %179, float 1.000000e+00, i32 2 - %181 = insertelement <4 x float> %180, float 1.000000e+00, i32 3 - %182 = insertelement <4 x float> zeroinitializer, float %136, i32 0 - %183 = insertelement <4 x float> %182, float 0.000000e+00, i32 1 - %184 = insertelement <4 x float> %183, float 0.000000e+00, i32 2 - %185 = insertelement <4 x float> %184, float 0.000000e+00, i32 3 - %186 = call <4 x float> @llvm.fma.v4f32(<4 x float> %181, <4 x float> %185, <4 x float> %176) - %187 = insertelement <4 x float> zeroinitializer, float %159, i32 0 - %188 = insertelement <4 x float> %187, float 1.000000e+00, i32 1 - %189 = insertelement <4 x float> %188, float 1.000000e+00, i32 2 - %190 = insertelement <4 x float> %189, float 1.000000e+00, i32 3 - %191 = insertelement <4 x float> zeroinitializer, float %165, i32 0 - %192 = insertelement <4 x float> %191, float 0.000000e+00, i32 1 - %193 = insertelement <4 x float> %192, float 0.000000e+00, i32 2 - %194 = insertelement <4 x float> %193, float 0.000000e+00, i32 3 - %195 = call <4 x float> @llvm.fma.v4f32(<4 x float> %190, <4 x float> %194, <4 x float> %186) - %196 = getelementptr inbounds float, float* %0, i64 2 - %197 = load float, float* %196, align 4 - %198 = insertelement <4 x float> zeroinitializer, float %197, i32 0 - %199 = insertelement <4 x float> %198, float 0.000000e+00, i32 1 - %200 = insertelement <4 x float> %199, float 0.000000e+00, i32 2 - %201 = insertelement <4 x float> %200, float 0.000000e+00, i32 3 - %202 = getelementptr inbounds float, float* %1, i64 1 - %203 = load float, float* %202, align 4 - %204 = insertelement <4 x float> zeroinitializer, float %203, i32 0 - %205 = insertelement <4 x float> %204, float 0.000000e+00, i32 1 - %206 = insertelement <4 x float> %205, float 0.000000e+00, i32 2 - %207 = insertelement <4 x float> %206, float 0.000000e+00, i32 3 - %208 = call <4 x float> @llvm.fma.v4f32(<4 x float> %201, <4 x float> %207, <4 x float> %195) - %209 = extractelement <4 x float> %208, i32 0 - %210 = getelementptr inbounds float, float* %2, i64 4 - store float %209, float* %210, align 4 - %211 = insertelement <4 x float> zeroinitializer, float %125, i32 0 - %212 = insertelement <4 x float> %211, float 0.000000e+00, i32 1 - %213 = insertelement <4 x float> %212, float 0.000000e+00, i32 2 - %214 = insertelement <4 x float> %213, float 0.000000e+00, i32 3 - %215 = load float, float* %0, align 4 - %216 = insertelement <4 x float> zeroinitializer, float %215, i32 0 - %217 = insertelement <4 x float> %216, float 1.000000e+00, i32 1 - %218 = insertelement <4 x float> %217, float 1.000000e+00, i32 2 - %219 = insertelement <4 x float> %218, float 1.000000e+00, i32 3 - %220 = insertelement <4 x float> zeroinitializer, float %136, i32 0 - %221 = insertelement <4 x float> %220, float 0.000000e+00, i32 1 - %222 = insertelement <4 x float> %221, float 0.000000e+00, i32 2 - %223 = insertelement <4 x float> %222, float 0.000000e+00, i32 3 - %224 = call <4 x float> @llvm.fma.v4f32(<4 x float> %219, <4 x float> %223, <4 x float> %214) - %225 = insertelement <4 x float> zeroinitializer, float %159, i32 0 - %226 = insertelement <4 x float> %225, float 1.000000e+00, i32 1 - %227 = insertelement <4 x float> %226, float 1.000000e+00, i32 2 - %228 = insertelement <4 x float> %227, float 1.000000e+00, i32 3 - %229 = insertelement <4 x float> zeroinitializer, float %165, i32 0 - %230 = insertelement <4 x float> %229, float 0.000000e+00, i32 1 - %231 = insertelement <4 x float> %230, float 0.000000e+00, i32 2 - %232 = insertelement <4 x float> %231, float 0.000000e+00, i32 3 - %233 = call <4 x float> @llvm.fma.v4f32(<4 x float> %228, <4 x float> %232, <4 x float> %224) - %234 = insertelement <4 x float> zeroinitializer, float %197, i32 0 - %235 = insertelement <4 x float> %234, float 1.000000e+00, i32 1 - %236 = insertelement <4 x float> %235, float 1.000000e+00, i32 2 - %237 = insertelement <4 x float> %236, float 1.000000e+00, i32 3 - %238 = insertelement <4 x float> zeroinitializer, float %203, i32 0 - %239 = insertelement <4 x float> %238, float 0.000000e+00, i32 1 - %240 = insertelement <4 x float> %239, float 0.000000e+00, i32 2 - %241 = insertelement <4 x float> %240, float 0.000000e+00, i32 3 - %242 = call <4 x float> @llvm.fma.v4f32(<4 x float> %237, <4 x float> %241, <4 x float> %233) - %243 = getelementptr inbounds float, float* %0, i64 3 - %244 = load float, float* %243, align 4 - %245 = insertelement <4 x float> zeroinitializer, float %244, i32 0 - %246 = insertelement <4 x float> %245, float 0.000000e+00, i32 1 - %247 = insertelement <4 x float> %246, float 0.000000e+00, i32 2 - %248 = insertelement <4 x float> %247, float 0.000000e+00, i32 3 - %249 = load float, float* %1, align 4 - %250 = insertelement <4 x float> zeroinitializer, float %249, i32 0 - %251 = insertelement <4 x float> %250, float 0.000000e+00, i32 1 - %252 = insertelement <4 x float> %251, float 0.000000e+00, i32 2 - %253 = insertelement <4 x float> %252, float 0.000000e+00, i32 3 - %254 = call <4 x float> @llvm.fma.v4f32(<4 x float> %248, <4 x float> %253, <4 x float> %242) - %255 = extractelement <4 x float> %254, i32 0 - %256 = getelementptr inbounds float, float* %2, i64 4 - store float %255, float* %256, align 4 - %257 = getelementptr inbounds float, float* %2, i64 5 - %258 = load float, float* %257, align 4 - %259 = insertelement <4 x float> zeroinitializer, float %258, i32 0 - %260 = insertelement <4 x float> %259, float 0.000000e+00, i32 1 - %261 = insertelement <4 x float> %260, float 0.000000e+00, i32 2 - %262 = insertelement <4 x float> %261, float 0.000000e+00, i32 3 - %263 = getelementptr inbounds float, float* %0, i64 1 - %264 = load float, float* %263, align 4 - %265 = insertelement <4 x float> zeroinitializer, float %264, i32 0 - %266 = insertelement <4 x float> %265, float 0.000000e+00, i32 1 - %267 = insertelement <4 x float> %266, float 0.000000e+00, i32 2 - %268 = insertelement <4 x float> %267, float 0.000000e+00, i32 3 - %269 = getelementptr inbounds float, float* %1, i64 3 - %270 = load float, float* %269, align 4 - %271 = insertelement <4 x float> zeroinitializer, float %270, i32 0 - %272 = insertelement <4 x float> %271, float 0.000000e+00, i32 1 - %273 = insertelement <4 x float> %272, float 0.000000e+00, i32 2 - %274 = insertelement <4 x float> %273, float 0.000000e+00, i32 3 - %275 = call <4 x float> @llvm.fma.v4f32(<4 x float> %268, <4 x float> %274, <4 x float> %262) - %276 = extractelement <4 x float> %275, i32 0 - %277 = getelementptr inbounds float, float* %2, i64 5 - store float %276, float* %277, align 4 - %278 = insertelement <4 x float> zeroinitializer, float %258, i32 0 - %279 = insertelement <4 x float> %278, float 0.000000e+00, i32 1 - %280 = insertelement <4 x float> %279, float 0.000000e+00, i32 2 - %281 = insertelement <4 x float> %280, float 0.000000e+00, i32 3 - %282 = insertelement <4 x float> zeroinitializer, float %264, i32 0 - %283 = insertelement <4 x float> %282, float 1.000000e+00, i32 1 - %284 = insertelement <4 x float> %283, float 1.000000e+00, i32 2 - %285 = insertelement <4 x float> %284, float 1.000000e+00, i32 3 - %286 = insertelement <4 x float> zeroinitializer, float %270, i32 0 - %287 = insertelement <4 x float> %286, float 0.000000e+00, i32 1 - %288 = insertelement <4 x float> %287, float 0.000000e+00, i32 2 - %289 = insertelement <4 x float> %288, float 0.000000e+00, i32 3 - %290 = call <4 x float> @llvm.fma.v4f32(<4 x float> %285, <4 x float> %289, <4 x float> %281) - %291 = getelementptr inbounds float, float* %0, i64 3 - %292 = load float, float* %291, align 4 - %293 = insertelement <4 x float> zeroinitializer, float %292, i32 0 - %294 = insertelement <4 x float> %293, float 0.000000e+00, i32 1 - %295 = insertelement <4 x float> %294, float 0.000000e+00, i32 2 - %296 = insertelement <4 x float> %295, float 0.000000e+00, i32 3 - %297 = getelementptr inbounds float, float* %1, i64 1 - %298 = load float, float* %297, align 4 - %299 = insertelement <4 x float> zeroinitializer, float %298, i32 0 - %300 = insertelement <4 x float> %299, float 0.000000e+00, i32 1 - %301 = insertelement <4 x float> %300, float 0.000000e+00, i32 2 - %302 = insertelement <4 x float> %301, float 0.000000e+00, i32 3 - %303 = call <4 x float> @llvm.fma.v4f32(<4 x float> %296, <4 x float> %302, <4 x float> %290) - %304 = extractelement <4 x float> %303, i32 0 - %305 = getelementptr inbounds float, float* %2, i64 5 - store float %304, float* %305, align 4 - %306 = getelementptr inbounds float, float* %2, i64 6 - %307 = load float, float* %306, align 4 - %308 = insertelement <4 x float> zeroinitializer, float %307, i32 0 - %309 = insertelement <4 x float> %308, float 0.000000e+00, i32 1 - %310 = insertelement <4 x float> %309, float 0.000000e+00, i32 2 - %311 = insertelement <4 x float> %310, float 0.000000e+00, i32 3 - %312 = getelementptr inbounds float, float* %0, i64 2 - %313 = load float, float* %312, align 4 - %314 = insertelement <4 x float> zeroinitializer, float %313, i32 0 - %315 = insertelement <4 x float> %314, float 0.000000e+00, i32 1 - %316 = insertelement <4 x float> %315, float 0.000000e+00, i32 2 - %317 = insertelement <4 x float> %316, float 0.000000e+00, i32 3 - %318 = getelementptr inbounds float, float* %1, i64 2 - %319 = load float, float* %318, align 4 - %320 = insertelement <4 x float> zeroinitializer, float %319, i32 0 - %321 = insertelement <4 x float> %320, float 0.000000e+00, i32 1 - %322 = insertelement <4 x float> %321, float 0.000000e+00, i32 2 - %323 = insertelement <4 x float> %322, float 0.000000e+00, i32 3 - %324 = call <4 x float> @llvm.fma.v4f32(<4 x float> %317, <4 x float> %323, <4 x float> %311) - %325 = extractelement <4 x float> %324, i32 0 - %326 = getelementptr inbounds float, float* %2, i64 6 - store float %325, float* %326, align 4 - %327 = getelementptr inbounds float, float* %2, i64 7 - %328 = load float, float* %327, align 4 - %329 = insertelement <4 x float> zeroinitializer, float %328, i32 0 - %330 = insertelement <4 x float> %329, float 0.000000e+00, i32 1 - %331 = insertelement <4 x float> %330, float 0.000000e+00, i32 2 - %332 = insertelement <4 x float> %331, float 0.000000e+00, i32 3 - %333 = getelementptr inbounds float, float* %0, i64 2 - %334 = load float, float* %333, align 4 - %335 = insertelement <4 x float> zeroinitializer, float %334, i32 0 - %336 = insertelement <4 x float> %335, float 0.000000e+00, i32 1 - %337 = insertelement <4 x float> %336, float 0.000000e+00, i32 2 - %338 = insertelement <4 x float> %337, float 0.000000e+00, i32 3 - %339 = getelementptr inbounds float, float* %1, i64 3 - %340 = load float, float* %339, align 4 - %341 = insertelement <4 x float> zeroinitializer, float %340, i32 0 - %342 = insertelement <4 x float> %341, float 0.000000e+00, i32 1 - %343 = insertelement <4 x float> %342, float 0.000000e+00, i32 2 - %344 = insertelement <4 x float> %343, float 0.000000e+00, i32 3 - %345 = call <4 x float> @llvm.fma.v4f32(<4 x float> %338, <4 x float> %344, <4 x float> %332) - %346 = extractelement <4 x float> %345, i32 0 - %347 = getelementptr inbounds float, float* %2, i64 7 - store float %346, float* %347, align 4 - %348 = insertelement <4 x float> zeroinitializer, float %328, i32 0 - %349 = insertelement <4 x float> %348, float 0.000000e+00, i32 1 - %350 = insertelement <4 x float> %349, float 0.000000e+00, i32 2 - %351 = insertelement <4 x float> %350, float 0.000000e+00, i32 3 - %352 = insertelement <4 x float> zeroinitializer, float %334, i32 0 - %353 = insertelement <4 x float> %352, float 1.000000e+00, i32 1 - %354 = insertelement <4 x float> %353, float 1.000000e+00, i32 2 - %355 = insertelement <4 x float> %354, float 1.000000e+00, i32 3 - %356 = insertelement <4 x float> zeroinitializer, float %340, i32 0 - %357 = insertelement <4 x float> %356, float 0.000000e+00, i32 1 - %358 = insertelement <4 x float> %357, float 0.000000e+00, i32 2 - %359 = insertelement <4 x float> %358, float 0.000000e+00, i32 3 - %360 = call <4 x float> @llvm.fma.v4f32(<4 x float> %355, <4 x float> %359, <4 x float> %351) - %361 = getelementptr inbounds float, float* %0, i64 3 - %362 = load float, float* %361, align 4 - %363 = insertelement <4 x float> zeroinitializer, float %362, i32 0 - %364 = insertelement <4 x float> %363, float 0.000000e+00, i32 1 - %365 = insertelement <4 x float> %364, float 0.000000e+00, i32 2 - %366 = insertelement <4 x float> %365, float 0.000000e+00, i32 3 - %367 = getelementptr inbounds float, float* %1, i64 2 - %368 = load float, float* %367, align 4 - %369 = insertelement <4 x float> zeroinitializer, float %368, i32 0 - %370 = insertelement <4 x float> %369, float 0.000000e+00, i32 1 - %371 = insertelement <4 x float> %370, float 0.000000e+00, i32 2 - %372 = insertelement <4 x float> %371, float 0.000000e+00, i32 3 - %373 = call <4 x float> @llvm.fma.v4f32(<4 x float> %366, <4 x float> %372, <4 x float> %360) - %374 = extractelement <4 x float> %373, i32 0 - %375 = getelementptr inbounds float, float* %2, i64 7 - store float %374, float* %375, align 4 - %376 = getelementptr inbounds float, float* %2, i64 8 - %377 = load float, float* %376, align 4 - %378 = insertelement <4 x float> zeroinitializer, float %377, i32 0 - %379 = insertelement <4 x float> %378, float 0.000000e+00, i32 1 - %380 = insertelement <4 x float> %379, float 0.000000e+00, i32 2 - %381 = insertelement <4 x float> %380, float 0.000000e+00, i32 3 - %382 = getelementptr inbounds float, float* %0, i64 3 - %383 = load float, float* %382, align 4 - %384 = insertelement <4 x float> zeroinitializer, float %383, i32 0 - %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 1 - %386 = insertelement <4 x float> %385, float 0.000000e+00, i32 2 - %387 = insertelement <4 x float> %386, float 0.000000e+00, i32 3 - %388 = getelementptr inbounds float, float* %1, i64 3 - %389 = load float, float* %388, align 4 - %390 = insertelement <4 x float> zeroinitializer, float %389, i32 0 - %391 = insertelement <4 x float> %390, float 0.000000e+00, i32 1 - %392 = insertelement <4 x float> %391, float 0.000000e+00, i32 2 - %393 = insertelement <4 x float> %392, float 0.000000e+00, i32 3 - %394 = call <4 x float> @llvm.fma.v4f32(<4 x float> %387, <4 x float> %393, <4 x float> %381) - %395 = extractelement <4 x float> %394, i32 0 - %396 = getelementptr inbounds float, float* %2, i64 8 - store float %395, float* %396, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #0 { - %1 = alloca [4 x float], align 16 - %2 = alloca [4 x float], align 16 - %3 = alloca [9 x float], align 16 - %4 = bitcast [4 x float]* %1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %4, i8* nonnull align 16 dereferenceable(16) bitcast ([4 x float]* @__const.main.mat_in to i8*), i64 16, i1 false) - %5 = bitcast [4 x float]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %5, i8* nonnull align 16 dereferenceable(16) bitcast ([4 x float]* @__const.main.f_in to i8*), i64 16, i1 false) - %6 = bitcast [9 x float]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %6, i8 0, i64 36, i1 false) - %7 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 - %8 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %9 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 0 - call void @convolution(float* nonnull %7, float* nonnull %8, float* nonnull %9) - %10 = load float, float* %9, align 16 - %11 = fpext float %10 to double - %12 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %11) #6 - %13 = load float, float* %9, align 16 - %14 = fcmp une float %13, 1.000000e+00 - br i1 %14, label %22, label %15 - -15: ; preds = %0 - %16 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 1 - %17 = load float, float* %16, align 4 - %18 = fpext float %17 to double - %19 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %18) #6 - %20 = load float, float* %16, align 4 - %21 = fcmp une float %20, 3.000000e+00 - br i1 %21, label %22, label %23 - -22: ; preds = %65, %58, %51, %44, %37, %30, %23, %15, %0 - call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i64 0, i64 0), i32 46, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.2, i64 0, i64 0)) #7 - unreachable - -23: ; preds = %15 - %24 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 2 - %25 = load float, float* %24, align 8 - %26 = fpext float %25 to double - %27 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %26) #6 - %28 = load float, float* %24, align 8 - %29 = fcmp une float %28, 2.000000e+00 - br i1 %29, label %22, label %30 - -30: ; preds = %23 - %31 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 3 - %32 = load float, float* %31, align 4 - %33 = fpext float %32 to double - %34 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %33) #6 - %35 = load float, float* %31, align 4 - %36 = fcmp une float %35, 4.000000e+00 - br i1 %36, label %22, label %37 - -37: ; preds = %30 - %38 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 4 - %39 = load float, float* %38, align 16 - %40 = fpext float %39 to double - %41 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %40) #6 - %42 = load float, float* %38, align 16 - %43 = fcmp une float %42, 1.000000e+01 - br i1 %43, label %22, label %44 - -44: ; preds = %37 - %45 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 5 - %46 = load float, float* %45, align 4 - %47 = fpext float %46 to double - %48 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %47) #6 - %49 = load float, float* %45, align 4 - %50 = fcmp une float %49, 6.000000e+00 - br i1 %50, label %22, label %51 - -51: ; preds = %44 - %52 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 6 - %53 = load float, float* %52, align 8 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %54) #6 - %56 = load float, float* %52, align 8 - %57 = fcmp une float %56, 3.000000e+00 - br i1 %57, label %22, label %58 - -58: ; preds = %51 - %59 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 7 - %60 = load float, float* %59, align 4 - %61 = fpext float %60 to double - %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %61) #6 - %63 = load float, float* %59, align 4 - %64 = fcmp une float %63, 7.000000e+00 - br i1 %64, label %22, label %65 - -65: ; preds = %58 - %66 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 8 - %67 = load float, float* %66, align 16 - %68 = fpext float %67 to double - %69 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %68) #6 - %70 = load float, float* %66, align 16 - %71 = fcmp une float %70, 4.000000e+00 - br i1 %71, label %22, label %72 - -72: ; preds = %65 - ret i32 0 -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 - -declare i32 @printf(i8*, ...) #3 - -; Function Attrs: noreturn -declare void @__assert_rtn(i8*, i8*, i32, i8*) #4 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #5 - -attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { argmemonly nounwind willreturn writeonly } -attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { nounwind readnone speculatable willreturn } -attributes #6 = { nounwind } -attributes #7 = { noreturn nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/diospyros.ll b/src/dios-egraphs/Diospyros/diospyros.ll deleted file mode 100644 index 36af488c..00000000 --- a/src/dios-egraphs/Diospyros/diospyros.ll +++ /dev/null @@ -1,721 +0,0 @@ -; ModuleID = 'aa.ll' -source_filename = "llvm-tests/2d-conv.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@__const.main.mat_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00], align 16 -@__const.main.f_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 -@__const.main.expected = private unnamed_addr constant [9 x float] [float 1.000000e+00, float 3.000000e+00, float 2.000000e+00, float 4.000000e+00, float 1.000000e+01, float 6.000000e+00, float 3.000000e+00, float 7.000000e+00, float 4.000000e+00], align 16 -@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 -@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 -@.str.1 = private unnamed_addr constant [21 x i8] c"llvm-tests/2d-conv.c\00", align 1 -@.str.2 = private unnamed_addr constant [26 x i8] c"mat_out[i] == expected[i]\00", align 1 - -; Function Attrs: noinline nounwind ssp uwtable -define void @convolution(float* %0, float* %1, float* %2) #0 { -.preheader7: - %3 = load float, float* %0, align 4 - %4 = load float, float* %2, align 4 - %5 = load float, float* %1, align 4 - %6 = load float, float* %0, align 4 - %7 = fmul float %6, %5 - %8 = fadd float %4, %7 - %9 = getelementptr inbounds float, float* %2, i64 1 - %10 = getelementptr inbounds float, float* %1, i64 1 - %11 = load float, float* %10, align 4 - %12 = fmul float %3, %11 - %13 = load float, float* %9, align 4 - %14 = fadd float %13, %12 - %15 = getelementptr inbounds float, float* %0, i64 1 - %16 = load float, float* %15, align 4 - %17 = load float, float* %1, align 4 - %18 = fmul float %16, %17 - %19 = fadd float %14, %18 - %20 = getelementptr inbounds float, float* %2, i64 2 - %21 = load float, float* %15, align 4 - %22 = load float, float* %0, align 4 - %23 = load float, float* %10, align 4 - %24 = fmul float %21, %23 - %25 = load float, float* %20, align 4 - %26 = fadd float %25, %24 - %27 = getelementptr inbounds float, float* %2, i64 3 - %28 = getelementptr inbounds float, float* %1, i64 2 - %29 = load float, float* %28, align 4 - %30 = fmul float %22, %29 - %31 = load float, float* %27, align 4 - %32 = fadd float %31, %30 - %33 = getelementptr inbounds float, float* %0, i64 2 - %34 = load float, float* %33, align 4 - %35 = load float, float* %1, align 4 - %36 = load float, float* %0, align 4 - %37 = fmul float %34, %35 - %38 = fadd float %32, %37 - %39 = getelementptr inbounds float, float* %2, i64 4 - %40 = getelementptr inbounds float, float* %1, i64 3 - %41 = load float, float* %40, align 4 - %42 = fmul float %36, %41 - %43 = load float, float* %39, align 4 - %44 = fadd float %43, %42 - %45 = load float, float* %15, align 4 - %46 = load float, float* %28, align 4 - %47 = fmul float %45, %46 - %48 = fadd float %44, %47 - %49 = load float, float* %33, align 4 - %50 = load float, float* %10, align 4 - %51 = fmul float %49, %50 - %52 = fadd float %48, %51 - %53 = getelementptr inbounds float, float* %0, i64 3 - %54 = load float, float* %53, align 4 - %55 = load float, float* %1, align 4 - %56 = fmul float %54, %55 - %57 = fadd float %52, %56 - %58 = getelementptr inbounds float, float* %2, i64 5 - %59 = load float, float* %15, align 4 - %60 = load float, float* %40, align 4 - %61 = fmul float %59, %60 - %62 = load float, float* %58, align 4 - %63 = fadd float %62, %61 - %64 = load float, float* %53, align 4 - %65 = load float, float* %10, align 4 - %66 = fmul float %64, %65 - %67 = fadd float %63, %66 - %68 = getelementptr inbounds float, float* %2, i64 6 - %69 = load float, float* %33, align 4 - %70 = load float, float* %28, align 4 - %71 = fmul float %69, %70 - %72 = load float, float* %68, align 4 - %73 = fadd float %72, %71 - %74 = getelementptr inbounds float, float* %2, i64 7 - %75 = load float, float* %33, align 4 - %76 = load float, float* %40, align 4 - %77 = fmul float %75, %76 - %78 = load float, float* %74, align 4 - %79 = fadd float %78, %77 - %80 = load float, float* %53, align 4 - %81 = load float, float* %28, align 4 - %82 = fmul float %80, %81 - %83 = fadd float %79, %82 - %84 = getelementptr inbounds float, float* %2, i64 8 - %85 = load float, float* %53, align 4 - %86 = load float, float* %40, align 4 - %87 = fmul float %85, %86 - %88 = load float, float* %84, align 4 - %89 = fadd float %88, %87 - %90 = load float, float* %2, align 4 - %91 = insertelement <4 x float> zeroinitializer, float %90, i32 0 - %92 = insertelement <4 x float> %91, float 0.000000e+00, i32 1 - %93 = insertelement <4 x float> %92, float 0.000000e+00, i32 2 - %94 = insertelement <4 x float> %93, float 0.000000e+00, i32 3 - %95 = load float, float* %0, align 4 - %96 = insertelement <4 x float> zeroinitializer, float %95, i32 0 - %97 = insertelement <4 x float> %96, float 0.000000e+00, i32 1 - %98 = insertelement <4 x float> %97, float 0.000000e+00, i32 2 - %99 = insertelement <4 x float> %98, float 0.000000e+00, i32 3 - %100 = load float, float* %1, align 4 - %101 = insertelement <4 x float> zeroinitializer, float %100, i32 0 - %102 = insertelement <4 x float> %101, float 0.000000e+00, i32 1 - %103 = insertelement <4 x float> %102, float 0.000000e+00, i32 2 - %104 = insertelement <4 x float> %103, float 0.000000e+00, i32 3 - %105 = call <4 x float> @llvm.fma.f32(<4 x float> %99, <4 x float> %104, <4 x float> %94) - %106 = extractelement <4 x float> %105, i32 0 - store float %106, float* %2, align 4 - %107 = getelementptr inbounds float, float* %2, i64 1 - %108 = load float, float* %107, align 4 - %109 = insertelement <4 x float> zeroinitializer, float %108, i32 0 - %110 = insertelement <4 x float> %109, float 0.000000e+00, i32 1 - %111 = insertelement <4 x float> %110, float 0.000000e+00, i32 2 - %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 3 - %113 = load float, float* %0, align 4 - %114 = insertelement <4 x float> zeroinitializer, float %113, i32 0 - %115 = insertelement <4 x float> %114, float 0.000000e+00, i32 1 - %116 = insertelement <4 x float> %115, float 0.000000e+00, i32 2 - %117 = insertelement <4 x float> %116, float 0.000000e+00, i32 3 - %118 = getelementptr inbounds float, float* %1, i64 1 - %119 = load float, float* %118, align 4 - %120 = insertelement <4 x float> zeroinitializer, float %119, i32 0 - %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 1 - %122 = insertelement <4 x float> %121, float 0.000000e+00, i32 2 - %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3 - %124 = call <4 x float> @llvm.fma.f32.1(<4 x float> %117, <4 x float> %123, <4 x float> %112) - %125 = extractelement <4 x float> %124, i32 0 - %126 = getelementptr inbounds float, float* %2, i64 1 - store float %125, float* %126, align 4 - %127 = insertelement <4 x float> zeroinitializer, float %108, i32 0 - %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 1 - %129 = insertelement <4 x float> %128, float 0.000000e+00, i32 2 - %130 = insertelement <4 x float> %129, float 0.000000e+00, i32 3 - %131 = load float, float* %0, align 4 - %132 = insertelement <4 x float> zeroinitializer, float %131, i32 0 - %133 = insertelement <4 x float> %132, float 1.000000e+00, i32 1 - %134 = insertelement <4 x float> %133, float 1.000000e+00, i32 2 - %135 = insertelement <4 x float> %134, float 1.000000e+00, i32 3 - %136 = insertelement <4 x float> zeroinitializer, float %119, i32 0 - %137 = insertelement <4 x float> %136, float 0.000000e+00, i32 1 - %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 2 - %139 = insertelement <4 x float> %138, float 0.000000e+00, i32 3 - %140 = call <4 x float> @llvm.fma.f32.2(<4 x float> %135, <4 x float> %139, <4 x float> %130) - %141 = getelementptr inbounds float, float* %0, i64 1 - %142 = load float, float* %141, align 4 - %143 = insertelement <4 x float> zeroinitializer, float %142, i32 0 - %144 = insertelement <4 x float> %143, float 0.000000e+00, i32 1 - %145 = insertelement <4 x float> %144, float 0.000000e+00, i32 2 - %146 = insertelement <4 x float> %145, float 0.000000e+00, i32 3 - %147 = load float, float* %1, align 4 - %148 = insertelement <4 x float> zeroinitializer, float %147, i32 0 - %149 = insertelement <4 x float> %148, float 0.000000e+00, i32 1 - %150 = insertelement <4 x float> %149, float 0.000000e+00, i32 2 - %151 = insertelement <4 x float> %150, float 0.000000e+00, i32 3 - %152 = call <4 x float> @llvm.fma.f32.3(<4 x float> %146, <4 x float> %151, <4 x float> %140) - %153 = extractelement <4 x float> %152, i32 0 - %154 = getelementptr inbounds float, float* %2, i64 1 - store float %153, float* %154, align 4 - %155 = getelementptr inbounds float, float* %2, i64 2 - %156 = load float, float* %155, align 4 - %157 = insertelement <4 x float> zeroinitializer, float %156, i32 0 - %158 = insertelement <4 x float> %157, float 0.000000e+00, i32 1 - %159 = insertelement <4 x float> %158, float 0.000000e+00, i32 2 - %160 = insertelement <4 x float> %159, float 0.000000e+00, i32 3 - %161 = getelementptr inbounds float, float* %0, i64 1 - %162 = load float, float* %161, align 4 - %163 = insertelement <4 x float> zeroinitializer, float %162, i32 0 - %164 = insertelement <4 x float> %163, float 0.000000e+00, i32 1 - %165 = insertelement <4 x float> %164, float 0.000000e+00, i32 2 - %166 = insertelement <4 x float> %165, float 0.000000e+00, i32 3 - %167 = getelementptr inbounds float, float* %1, i64 1 - %168 = load float, float* %167, align 4 - %169 = insertelement <4 x float> zeroinitializer, float %168, i32 0 - %170 = insertelement <4 x float> %169, float 0.000000e+00, i32 1 - %171 = insertelement <4 x float> %170, float 0.000000e+00, i32 2 - %172 = insertelement <4 x float> %171, float 0.000000e+00, i32 3 - %173 = call <4 x float> @llvm.fma.f32.4(<4 x float> %166, <4 x float> %172, <4 x float> %160) - %174 = extractelement <4 x float> %173, i32 0 - %175 = getelementptr inbounds float, float* %2, i64 2 - store float %174, float* %175, align 4 - %176 = getelementptr inbounds float, float* %2, i64 3 - %177 = load float, float* %176, align 4 - %178 = insertelement <4 x float> zeroinitializer, float %177, i32 1 - %179 = insertelement <4 x float> %178, float 0.000000e+00, i32 2 - %180 = insertelement <4 x float> %179, float 0.000000e+00, i32 3 - %181 = load float, float* %0, align 4 - %182 = insertelement <4 x float> , float %181, i32 1 - %183 = insertelement <4 x float> %182, float 1.000000e+00, i32 2 - %184 = insertelement <4 x float> %183, float 1.000000e+00, i32 3 - %185 = getelementptr inbounds float, float* %2, i64 3 - %186 = load float, float* %185, align 4 - %187 = insertelement <4 x float> zeroinitializer, float %186, i32 0 - %188 = getelementptr inbounds float, float* %1, i64 2 - %189 = load float, float* %188, align 4 - %190 = insertelement <4 x float> %187, float %189, i32 1 - %191 = insertelement <4 x float> %190, float 0.000000e+00, i32 2 - %192 = insertelement <4 x float> %191, float 0.000000e+00, i32 3 - %193 = call <4 x float> @llvm.fma.f32.5(<4 x float> %184, <4 x float> %192, <4 x float> %180) - %194 = load float, float* %0, align 4 - %195 = insertelement <4 x float> zeroinitializer, float %194, i32 0 - %196 = getelementptr inbounds float, float* %0, i64 2 - %197 = load float, float* %196, align 4 - %198 = insertelement <4 x float> %195, float %197, i32 1 - %199 = insertelement <4 x float> %198, float 0.000000e+00, i32 2 - %200 = insertelement <4 x float> %199, float 0.000000e+00, i32 3 - %201 = getelementptr inbounds float, float* %1, i64 2 - %202 = load float, float* %201, align 4 - %203 = insertelement <4 x float> zeroinitializer, float %202, i32 0 - %204 = load float, float* %1, align 4 - %205 = insertelement <4 x float> %203, float %204, i32 1 - %206 = insertelement <4 x float> %205, float 0.000000e+00, i32 2 - %207 = insertelement <4 x float> %206, float 0.000000e+00, i32 3 - %208 = call <4 x float> @llvm.fma.f32.6(<4 x float> %200, <4 x float> %207, <4 x float> %193) - %209 = extractelement <4 x float> %208, i32 0 - %210 = getelementptr inbounds float, float* %2, i64 3 - store float %209, float* %210, align 4 - %211 = extractelement <4 x float> %208, i32 1 - %212 = getelementptr inbounds float, float* %2, i64 3 - store float %211, float* %212, align 4 - %213 = getelementptr inbounds float, float* %2, i64 4 - %214 = load float, float* %213, align 4 - %215 = insertelement <4 x float> zeroinitializer, float %214, i32 0 - %216 = insertelement <4 x float> %215, float 0.000000e+00, i32 1 - %217 = insertelement <4 x float> %216, float 0.000000e+00, i32 2 - %218 = insertelement <4 x float> %217, float 0.000000e+00, i32 3 - %219 = load float, float* %0, align 4 - %220 = insertelement <4 x float> zeroinitializer, float %219, i32 0 - %221 = insertelement <4 x float> %220, float 0.000000e+00, i32 1 - %222 = insertelement <4 x float> %221, float 0.000000e+00, i32 2 - %223 = insertelement <4 x float> %222, float 0.000000e+00, i32 3 - %224 = getelementptr inbounds float, float* %1, i64 3 - %225 = load float, float* %224, align 4 - %226 = insertelement <4 x float> zeroinitializer, float %225, i32 0 - %227 = insertelement <4 x float> %226, float 0.000000e+00, i32 1 - %228 = insertelement <4 x float> %227, float 0.000000e+00, i32 2 - %229 = insertelement <4 x float> %228, float 0.000000e+00, i32 3 - %230 = call <4 x float> @llvm.fma.f32.7(<4 x float> %223, <4 x float> %229, <4 x float> %218) - %231 = extractelement <4 x float> %230, i32 0 - %232 = getelementptr inbounds float, float* %2, i64 4 - store float %231, float* %232, align 4 - %233 = insertelement <4 x float> zeroinitializer, float %214, i32 0 - %234 = insertelement <4 x float> %233, float 0.000000e+00, i32 1 - %235 = insertelement <4 x float> %234, float 0.000000e+00, i32 2 - %236 = insertelement <4 x float> %235, float 0.000000e+00, i32 3 - %237 = load float, float* %0, align 4 - %238 = insertelement <4 x float> zeroinitializer, float %237, i32 0 - %239 = insertelement <4 x float> %238, float 1.000000e+00, i32 1 - %240 = insertelement <4 x float> %239, float 1.000000e+00, i32 2 - %241 = insertelement <4 x float> %240, float 1.000000e+00, i32 3 - %242 = insertelement <4 x float> zeroinitializer, float %225, i32 0 - %243 = insertelement <4 x float> %242, float 0.000000e+00, i32 1 - %244 = insertelement <4 x float> %243, float 0.000000e+00, i32 2 - %245 = insertelement <4 x float> %244, float 0.000000e+00, i32 3 - %246 = call <4 x float> @llvm.fma.f32.8(<4 x float> %241, <4 x float> %245, <4 x float> %236) - %247 = getelementptr inbounds float, float* %0, i64 1 - %248 = load float, float* %247, align 4 - %249 = insertelement <4 x float> zeroinitializer, float %248, i32 0 - %250 = insertelement <4 x float> %249, float 0.000000e+00, i32 1 - %251 = insertelement <4 x float> %250, float 0.000000e+00, i32 2 - %252 = insertelement <4 x float> %251, float 0.000000e+00, i32 3 - %253 = getelementptr inbounds float, float* %1, i64 2 - %254 = load float, float* %253, align 4 - %255 = insertelement <4 x float> zeroinitializer, float %254, i32 0 - %256 = insertelement <4 x float> %255, float 0.000000e+00, i32 1 - %257 = insertelement <4 x float> %256, float 0.000000e+00, i32 2 - %258 = insertelement <4 x float> %257, float 0.000000e+00, i32 3 - %259 = call <4 x float> @llvm.fma.f32.9(<4 x float> %252, <4 x float> %258, <4 x float> %246) - %260 = extractelement <4 x float> %259, i32 0 - %261 = getelementptr inbounds float, float* %2, i64 4 - store float %260, float* %261, align 4 - %262 = insertelement <4 x float> zeroinitializer, float %214, i32 0 - %263 = insertelement <4 x float> %262, float 0.000000e+00, i32 1 - %264 = insertelement <4 x float> %263, float 0.000000e+00, i32 2 - %265 = insertelement <4 x float> %264, float 0.000000e+00, i32 3 - %266 = load float, float* %0, align 4 - %267 = insertelement <4 x float> zeroinitializer, float %266, i32 0 - %268 = insertelement <4 x float> %267, float 1.000000e+00, i32 1 - %269 = insertelement <4 x float> %268, float 1.000000e+00, i32 2 - %270 = insertelement <4 x float> %269, float 1.000000e+00, i32 3 - %271 = insertelement <4 x float> zeroinitializer, float %225, i32 0 - %272 = insertelement <4 x float> %271, float 0.000000e+00, i32 1 - %273 = insertelement <4 x float> %272, float 0.000000e+00, i32 2 - %274 = insertelement <4 x float> %273, float 0.000000e+00, i32 3 - %275 = call <4 x float> @llvm.fma.f32.10(<4 x float> %270, <4 x float> %274, <4 x float> %265) - %276 = insertelement <4 x float> zeroinitializer, float %248, i32 0 - %277 = insertelement <4 x float> %276, float 1.000000e+00, i32 1 - %278 = insertelement <4 x float> %277, float 1.000000e+00, i32 2 - %279 = insertelement <4 x float> %278, float 1.000000e+00, i32 3 - %280 = insertelement <4 x float> zeroinitializer, float %254, i32 0 - %281 = insertelement <4 x float> %280, float 0.000000e+00, i32 1 - %282 = insertelement <4 x float> %281, float 0.000000e+00, i32 2 - %283 = insertelement <4 x float> %282, float 0.000000e+00, i32 3 - %284 = call <4 x float> @llvm.fma.f32.11(<4 x float> %279, <4 x float> %283, <4 x float> %275) - %285 = getelementptr inbounds float, float* %0, i64 2 - %286 = load float, float* %285, align 4 - %287 = insertelement <4 x float> zeroinitializer, float %286, i32 0 - %288 = insertelement <4 x float> %287, float 0.000000e+00, i32 1 - %289 = insertelement <4 x float> %288, float 0.000000e+00, i32 2 - %290 = insertelement <4 x float> %289, float 0.000000e+00, i32 3 - %291 = getelementptr inbounds float, float* %1, i64 1 - %292 = load float, float* %291, align 4 - %293 = insertelement <4 x float> zeroinitializer, float %292, i32 0 - %294 = insertelement <4 x float> %293, float 0.000000e+00, i32 1 - %295 = insertelement <4 x float> %294, float 0.000000e+00, i32 2 - %296 = insertelement <4 x float> %295, float 0.000000e+00, i32 3 - %297 = call <4 x float> @llvm.fma.f32.12(<4 x float> %290, <4 x float> %296, <4 x float> %284) - %298 = extractelement <4 x float> %297, i32 0 - %299 = getelementptr inbounds float, float* %2, i64 4 - store float %298, float* %299, align 4 - %300 = insertelement <4 x float> zeroinitializer, float %214, i32 0 - %301 = insertelement <4 x float> %300, float 0.000000e+00, i32 1 - %302 = insertelement <4 x float> %301, float 0.000000e+00, i32 2 - %303 = insertelement <4 x float> %302, float 0.000000e+00, i32 3 - %304 = load float, float* %0, align 4 - %305 = insertelement <4 x float> zeroinitializer, float %304, i32 0 - %306 = insertelement <4 x float> %305, float 1.000000e+00, i32 1 - %307 = insertelement <4 x float> %306, float 1.000000e+00, i32 2 - %308 = insertelement <4 x float> %307, float 1.000000e+00, i32 3 - %309 = insertelement <4 x float> zeroinitializer, float %225, i32 0 - %310 = insertelement <4 x float> %309, float 0.000000e+00, i32 1 - %311 = insertelement <4 x float> %310, float 0.000000e+00, i32 2 - %312 = insertelement <4 x float> %311, float 0.000000e+00, i32 3 - %313 = call <4 x float> @llvm.fma.f32.13(<4 x float> %308, <4 x float> %312, <4 x float> %303) - %314 = insertelement <4 x float> zeroinitializer, float %248, i32 0 - %315 = insertelement <4 x float> %314, float 1.000000e+00, i32 1 - %316 = insertelement <4 x float> %315, float 1.000000e+00, i32 2 - %317 = insertelement <4 x float> %316, float 1.000000e+00, i32 3 - %318 = insertelement <4 x float> zeroinitializer, float %254, i32 0 - %319 = insertelement <4 x float> %318, float 0.000000e+00, i32 1 - %320 = insertelement <4 x float> %319, float 0.000000e+00, i32 2 - %321 = insertelement <4 x float> %320, float 0.000000e+00, i32 3 - %322 = call <4 x float> @llvm.fma.f32.14(<4 x float> %317, <4 x float> %321, <4 x float> %313) - %323 = insertelement <4 x float> zeroinitializer, float %286, i32 0 - %324 = insertelement <4 x float> %323, float 1.000000e+00, i32 1 - %325 = insertelement <4 x float> %324, float 1.000000e+00, i32 2 - %326 = insertelement <4 x float> %325, float 1.000000e+00, i32 3 - %327 = insertelement <4 x float> zeroinitializer, float %292, i32 0 - %328 = insertelement <4 x float> %327, float 0.000000e+00, i32 1 - %329 = insertelement <4 x float> %328, float 0.000000e+00, i32 2 - %330 = insertelement <4 x float> %329, float 0.000000e+00, i32 3 - %331 = call <4 x float> @llvm.fma.f32.15(<4 x float> %326, <4 x float> %330, <4 x float> %322) - %332 = getelementptr inbounds float, float* %0, i64 3 - %333 = load float, float* %332, align 4 - %334 = insertelement <4 x float> zeroinitializer, float %333, i32 0 - %335 = insertelement <4 x float> %334, float 0.000000e+00, i32 1 - %336 = insertelement <4 x float> %335, float 0.000000e+00, i32 2 - %337 = insertelement <4 x float> %336, float 0.000000e+00, i32 3 - %338 = load float, float* %1, align 4 - %339 = insertelement <4 x float> zeroinitializer, float %338, i32 0 - %340 = insertelement <4 x float> %339, float 0.000000e+00, i32 1 - %341 = insertelement <4 x float> %340, float 0.000000e+00, i32 2 - %342 = insertelement <4 x float> %341, float 0.000000e+00, i32 3 - %343 = call <4 x float> @llvm.fma.f32.16(<4 x float> %337, <4 x float> %342, <4 x float> %331) - %344 = extractelement <4 x float> %343, i32 0 - %345 = getelementptr inbounds float, float* %2, i64 4 - store float %344, float* %345, align 4 - %346 = getelementptr inbounds float, float* %2, i64 5 - %347 = load float, float* %346, align 4 - %348 = insertelement <4 x float> zeroinitializer, float %347, i32 0 - %349 = insertelement <4 x float> %348, float 0.000000e+00, i32 1 - %350 = insertelement <4 x float> %349, float 0.000000e+00, i32 2 - %351 = insertelement <4 x float> %350, float 0.000000e+00, i32 3 - %352 = getelementptr inbounds float, float* %0, i64 1 - %353 = load float, float* %352, align 4 - %354 = insertelement <4 x float> zeroinitializer, float %353, i32 0 - %355 = insertelement <4 x float> %354, float 0.000000e+00, i32 1 - %356 = insertelement <4 x float> %355, float 0.000000e+00, i32 2 - %357 = insertelement <4 x float> %356, float 0.000000e+00, i32 3 - %358 = getelementptr inbounds float, float* %1, i64 3 - %359 = load float, float* %358, align 4 - %360 = insertelement <4 x float> zeroinitializer, float %359, i32 0 - %361 = insertelement <4 x float> %360, float 0.000000e+00, i32 1 - %362 = insertelement <4 x float> %361, float 0.000000e+00, i32 2 - %363 = insertelement <4 x float> %362, float 0.000000e+00, i32 3 - %364 = call <4 x float> @llvm.fma.f32.17(<4 x float> %357, <4 x float> %363, <4 x float> %351) - %365 = extractelement <4 x float> %364, i32 0 - %366 = getelementptr inbounds float, float* %2, i64 5 - store float %365, float* %366, align 4 - %367 = insertelement <4 x float> zeroinitializer, float %347, i32 0 - %368 = insertelement <4 x float> %367, float 0.000000e+00, i32 1 - %369 = insertelement <4 x float> %368, float 0.000000e+00, i32 2 - %370 = insertelement <4 x float> %369, float 0.000000e+00, i32 3 - %371 = insertelement <4 x float> zeroinitializer, float %353, i32 0 - %372 = insertelement <4 x float> %371, float 1.000000e+00, i32 1 - %373 = insertelement <4 x float> %372, float 1.000000e+00, i32 2 - %374 = insertelement <4 x float> %373, float 1.000000e+00, i32 3 - %375 = insertelement <4 x float> zeroinitializer, float %359, i32 0 - %376 = insertelement <4 x float> %375, float 0.000000e+00, i32 1 - %377 = insertelement <4 x float> %376, float 0.000000e+00, i32 2 - %378 = insertelement <4 x float> %377, float 0.000000e+00, i32 3 - %379 = call <4 x float> @llvm.fma.f32.18(<4 x float> %374, <4 x float> %378, <4 x float> %370) - %380 = getelementptr inbounds float, float* %0, i64 3 - %381 = load float, float* %380, align 4 - %382 = insertelement <4 x float> zeroinitializer, float %381, i32 0 - %383 = insertelement <4 x float> %382, float 0.000000e+00, i32 1 - %384 = insertelement <4 x float> %383, float 0.000000e+00, i32 2 - %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 3 - %386 = getelementptr inbounds float, float* %1, i64 1 - %387 = load float, float* %386, align 4 - %388 = insertelement <4 x float> zeroinitializer, float %387, i32 0 - %389 = insertelement <4 x float> %388, float 0.000000e+00, i32 1 - %390 = insertelement <4 x float> %389, float 0.000000e+00, i32 2 - %391 = insertelement <4 x float> %390, float 0.000000e+00, i32 3 - %392 = call <4 x float> @llvm.fma.f32.19(<4 x float> %385, <4 x float> %391, <4 x float> %379) - %393 = extractelement <4 x float> %392, i32 0 - %394 = getelementptr inbounds float, float* %2, i64 5 - store float %393, float* %394, align 4 - %395 = getelementptr inbounds float, float* %2, i64 6 - %396 = load float, float* %395, align 4 - %397 = insertelement <4 x float> zeroinitializer, float %396, i32 0 - %398 = insertelement <4 x float> %397, float 0.000000e+00, i32 1 - %399 = insertelement <4 x float> %398, float 0.000000e+00, i32 2 - %400 = insertelement <4 x float> %399, float 0.000000e+00, i32 3 - %401 = getelementptr inbounds float, float* %0, i64 2 - %402 = load float, float* %401, align 4 - %403 = insertelement <4 x float> zeroinitializer, float %402, i32 0 - %404 = insertelement <4 x float> %403, float 0.000000e+00, i32 1 - %405 = insertelement <4 x float> %404, float 0.000000e+00, i32 2 - %406 = insertelement <4 x float> %405, float 0.000000e+00, i32 3 - %407 = getelementptr inbounds float, float* %1, i64 2 - %408 = load float, float* %407, align 4 - %409 = insertelement <4 x float> zeroinitializer, float %408, i32 0 - %410 = insertelement <4 x float> %409, float 0.000000e+00, i32 1 - %411 = insertelement <4 x float> %410, float 0.000000e+00, i32 2 - %412 = insertelement <4 x float> %411, float 0.000000e+00, i32 3 - %413 = call <4 x float> @llvm.fma.f32.20(<4 x float> %406, <4 x float> %412, <4 x float> %400) - %414 = extractelement <4 x float> %413, i32 0 - %415 = getelementptr inbounds float, float* %2, i64 6 - store float %414, float* %415, align 4 - %416 = getelementptr inbounds float, float* %2, i64 7 - %417 = load float, float* %416, align 4 - %418 = insertelement <4 x float> zeroinitializer, float %417, i32 0 - %419 = insertelement <4 x float> %418, float 0.000000e+00, i32 1 - %420 = insertelement <4 x float> %419, float 0.000000e+00, i32 2 - %421 = insertelement <4 x float> %420, float 0.000000e+00, i32 3 - %422 = getelementptr inbounds float, float* %0, i64 2 - %423 = load float, float* %422, align 4 - %424 = insertelement <4 x float> zeroinitializer, float %423, i32 0 - %425 = insertelement <4 x float> %424, float 0.000000e+00, i32 1 - %426 = insertelement <4 x float> %425, float 0.000000e+00, i32 2 - %427 = insertelement <4 x float> %426, float 0.000000e+00, i32 3 - %428 = getelementptr inbounds float, float* %1, i64 3 - %429 = load float, float* %428, align 4 - %430 = insertelement <4 x float> zeroinitializer, float %429, i32 0 - %431 = insertelement <4 x float> %430, float 0.000000e+00, i32 1 - %432 = insertelement <4 x float> %431, float 0.000000e+00, i32 2 - %433 = insertelement <4 x float> %432, float 0.000000e+00, i32 3 - %434 = call <4 x float> @llvm.fma.f32.21(<4 x float> %427, <4 x float> %433, <4 x float> %421) - %435 = extractelement <4 x float> %434, i32 0 - %436 = getelementptr inbounds float, float* %2, i64 7 - store float %435, float* %436, align 4 - %437 = insertelement <4 x float> zeroinitializer, float %417, i32 0 - %438 = insertelement <4 x float> %437, float 0.000000e+00, i32 1 - %439 = insertelement <4 x float> %438, float 0.000000e+00, i32 2 - %440 = insertelement <4 x float> %439, float 0.000000e+00, i32 3 - %441 = insertelement <4 x float> zeroinitializer, float %423, i32 0 - %442 = insertelement <4 x float> %441, float 1.000000e+00, i32 1 - %443 = insertelement <4 x float> %442, float 1.000000e+00, i32 2 - %444 = insertelement <4 x float> %443, float 1.000000e+00, i32 3 - %445 = insertelement <4 x float> zeroinitializer, float %429, i32 0 - %446 = insertelement <4 x float> %445, float 0.000000e+00, i32 1 - %447 = insertelement <4 x float> %446, float 0.000000e+00, i32 2 - %448 = insertelement <4 x float> %447, float 0.000000e+00, i32 3 - %449 = call <4 x float> @llvm.fma.f32.22(<4 x float> %444, <4 x float> %448, <4 x float> %440) - %450 = getelementptr inbounds float, float* %0, i64 3 - %451 = load float, float* %450, align 4 - %452 = insertelement <4 x float> zeroinitializer, float %451, i32 0 - %453 = insertelement <4 x float> %452, float 0.000000e+00, i32 1 - %454 = insertelement <4 x float> %453, float 0.000000e+00, i32 2 - %455 = insertelement <4 x float> %454, float 0.000000e+00, i32 3 - %456 = getelementptr inbounds float, float* %1, i64 2 - %457 = load float, float* %456, align 4 - %458 = insertelement <4 x float> zeroinitializer, float %457, i32 0 - %459 = insertelement <4 x float> %458, float 0.000000e+00, i32 1 - %460 = insertelement <4 x float> %459, float 0.000000e+00, i32 2 - %461 = insertelement <4 x float> %460, float 0.000000e+00, i32 3 - %462 = call <4 x float> @llvm.fma.f32.23(<4 x float> %455, <4 x float> %461, <4 x float> %449) - %463 = extractelement <4 x float> %462, i32 0 - %464 = getelementptr inbounds float, float* %2, i64 7 - store float %463, float* %464, align 4 - %465 = getelementptr inbounds float, float* %2, i64 8 - %466 = load float, float* %465, align 4 - %467 = insertelement <4 x float> zeroinitializer, float %466, i32 0 - %468 = insertelement <4 x float> %467, float 0.000000e+00, i32 1 - %469 = insertelement <4 x float> %468, float 0.000000e+00, i32 2 - %470 = insertelement <4 x float> %469, float 0.000000e+00, i32 3 - %471 = getelementptr inbounds float, float* %0, i64 3 - %472 = load float, float* %471, align 4 - %473 = insertelement <4 x float> zeroinitializer, float %472, i32 0 - %474 = insertelement <4 x float> %473, float 0.000000e+00, i32 1 - %475 = insertelement <4 x float> %474, float 0.000000e+00, i32 2 - %476 = insertelement <4 x float> %475, float 0.000000e+00, i32 3 - %477 = getelementptr inbounds float, float* %1, i64 3 - %478 = load float, float* %477, align 4 - %479 = insertelement <4 x float> zeroinitializer, float %478, i32 0 - %480 = insertelement <4 x float> %479, float 0.000000e+00, i32 1 - %481 = insertelement <4 x float> %480, float 0.000000e+00, i32 2 - %482 = insertelement <4 x float> %481, float 0.000000e+00, i32 3 - %483 = call <4 x float> @llvm.fma.f32.24(<4 x float> %476, <4 x float> %482, <4 x float> %470) - %484 = extractelement <4 x float> %483, i32 0 - %485 = getelementptr inbounds float, float* %2, i64 8 - store float %484, float* %485, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #0 { - %1 = alloca [4 x float], align 16 - %2 = alloca [4 x float], align 16 - %3 = alloca [9 x float], align 16 - %4 = bitcast [4 x float]* %1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %4, i8* nonnull align 16 dereferenceable(16) bitcast ([4 x float]* @__const.main.mat_in to i8*), i64 16, i1 false) - %5 = bitcast [4 x float]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %5, i8* nonnull align 16 dereferenceable(16) bitcast ([4 x float]* @__const.main.f_in to i8*), i64 16, i1 false) - %6 = bitcast [9 x float]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %6, i8 0, i64 36, i1 false) - %7 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 - %8 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %9 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 0 - call void @convolution(float* nonnull %7, float* nonnull %8, float* nonnull %9) - %10 = load float, float* %9, align 16 - %11 = fpext float %10 to double - %12 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %11) #6 - %13 = load float, float* %9, align 16 - %14 = fcmp une float %13, 1.000000e+00 - br i1 %14, label %22, label %15 - -15: ; preds = %0 - %16 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 1 - %17 = load float, float* %16, align 4 - %18 = fpext float %17 to double - %19 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %18) #6 - %20 = load float, float* %16, align 4 - %21 = fcmp une float %20, 3.000000e+00 - br i1 %21, label %22, label %23 - -22: ; preds = %65, %58, %51, %44, %37, %30, %23, %15, %0 - call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i64 0, i64 0), i32 46, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.2, i64 0, i64 0)) #7 - unreachable - -23: ; preds = %15 - %24 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 2 - %25 = load float, float* %24, align 8 - %26 = fpext float %25 to double - %27 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %26) #6 - %28 = load float, float* %24, align 8 - %29 = fcmp une float %28, 2.000000e+00 - br i1 %29, label %22, label %30 - -30: ; preds = %23 - %31 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 3 - %32 = load float, float* %31, align 4 - %33 = fpext float %32 to double - %34 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %33) #6 - %35 = load float, float* %31, align 4 - %36 = fcmp une float %35, 4.000000e+00 - br i1 %36, label %22, label %37 - -37: ; preds = %30 - %38 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 4 - %39 = load float, float* %38, align 16 - %40 = fpext float %39 to double - %41 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %40) #6 - %42 = load float, float* %38, align 16 - %43 = fcmp une float %42, 1.000000e+01 - br i1 %43, label %22, label %44 - -44: ; preds = %37 - %45 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 5 - %46 = load float, float* %45, align 4 - %47 = fpext float %46 to double - %48 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %47) #6 - %49 = load float, float* %45, align 4 - %50 = fcmp une float %49, 6.000000e+00 - br i1 %50, label %22, label %51 - -51: ; preds = %44 - %52 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 6 - %53 = load float, float* %52, align 8 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %54) #6 - %56 = load float, float* %52, align 8 - %57 = fcmp une float %56, 3.000000e+00 - br i1 %57, label %22, label %58 - -58: ; preds = %51 - %59 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 7 - %60 = load float, float* %59, align 4 - %61 = fpext float %60 to double - %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %61) #6 - %63 = load float, float* %59, align 4 - %64 = fcmp une float %63, 7.000000e+00 - br i1 %64, label %22, label %65 - -65: ; preds = %58 - %66 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 8 - %67 = load float, float* %66, align 16 - %68 = fpext float %67 to double - %69 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %68) #6 - %70 = load float, float* %66, align 16 - %71 = fcmp une float %70, 4.000000e+00 - br i1 %71, label %22, label %72 - -72: ; preds = %65 - ret i32 0 -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 - -declare i32 @printf(i8*, ...) #3 - -; Function Attrs: noreturn -declare void @__assert_rtn(i8*, i8*, i32, i8*) #4 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.1(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.2(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.3(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.4(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.5(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.6(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.7(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.8(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.9(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.10(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.11(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.12(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.13(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.14(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.15(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.16(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.17(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.18(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.19(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.20(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.21(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.22(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.23(<4 x float>, <4 x float>, <4 x float>) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.24(<4 x float>, <4 x float>, <4 x float>) #5 - -attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { argmemonly nounwind willreturn writeonly } -attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { nounwind readnone speculatable willreturn } -attributes #6 = { nounwind } -attributes #7 = { noreturn nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/opt.ll b/src/dios-egraphs/Diospyros/opt.ll deleted file mode 100644 index 62f67344..00000000 --- a/src/dios-egraphs/Diospyros/opt.ll +++ /dev/null @@ -1,249 +0,0 @@ -; ModuleID = 'clang.ll' -source_filename = "llvm-tests/2d-conv.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@__const.main.mat_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00], align 16 -@__const.main.f_in = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 -@__const.main.expected = private unnamed_addr constant [9 x float] [float 1.000000e+00, float 3.000000e+00, float 2.000000e+00, float 4.000000e+00, float 1.000000e+01, float 6.000000e+00, float 3.000000e+00, float 7.000000e+00, float 4.000000e+00], align 16 -@.str = private unnamed_addr constant [12 x i8] c"output: %f\0A\00", align 1 -@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 -@.str.1 = private unnamed_addr constant [21 x i8] c"llvm-tests/2d-conv.c\00", align 1 -@.str.2 = private unnamed_addr constant [26 x i8] c"mat_out[i] == expected[i]\00", align 1 - -; Function Attrs: noinline nounwind ssp uwtable -define void @convolution(float* %0, float* %1, float* %2) #0 { -.preheader7: - %3 = load float, float* %0, align 4 - %4 = load float, float* %1, align 4 - %5 = fmul float %3, %4 - %6 = load float, float* %2, align 4 - %7 = fadd float %6, %5 - store float %7, float* %2, align 4 - %8 = getelementptr inbounds float, float* %2, i64 1 - %9 = load float, float* %0, align 4 - %10 = getelementptr inbounds float, float* %1, i64 1 - %11 = load float, float* %10, align 4 - %12 = fmul float %9, %11 - %13 = load float, float* %8, align 4 - %14 = fadd float %13, %12 - store float %14, float* %8, align 4 - %15 = getelementptr inbounds float, float* %0, i64 1 - %16 = load float, float* %15, align 4 - %17 = load float, float* %1, align 4 - %18 = fmul float %16, %17 - %19 = fadd float %14, %18 - store float %19, float* %8, align 4 - %20 = getelementptr inbounds float, float* %2, i64 2 - %21 = load float, float* %15, align 4 - %22 = load float, float* %10, align 4 - %23 = fmul float %21, %22 - %24 = load float, float* %20, align 4 - %25 = fadd float %24, %23 - store float %25, float* %20, align 4 - %26 = getelementptr inbounds float, float* %2, i64 3 - %27 = load float, float* %0, align 4 - %28 = getelementptr inbounds float, float* %1, i64 2 - %29 = load float, float* %28, align 4 - %30 = fmul float %27, %29 - %31 = load float, float* %26, align 4 - %32 = fadd float %31, %30 - store float %32, float* %26, align 4 - %33 = getelementptr inbounds float, float* %0, i64 2 - %34 = load float, float* %33, align 4 - %35 = load float, float* %1, align 4 - %36 = fmul float %34, %35 - %37 = fadd float %32, %36 - store float %37, float* %26, align 4 - %38 = getelementptr inbounds float, float* %2, i64 4 - %39 = load float, float* %0, align 4 - %40 = getelementptr inbounds float, float* %1, i64 3 - %41 = load float, float* %40, align 4 - %42 = fmul float %39, %41 - %43 = load float, float* %38, align 4 - %44 = fadd float %43, %42 - store float %44, float* %38, align 4 - %45 = load float, float* %15, align 4 - %46 = load float, float* %28, align 4 - %47 = fmul float %45, %46 - %48 = fadd float %44, %47 - store float %48, float* %38, align 4 - %49 = load float, float* %33, align 4 - %50 = load float, float* %10, align 4 - %51 = fmul float %49, %50 - %52 = fadd float %48, %51 - store float %52, float* %38, align 4 - %53 = getelementptr inbounds float, float* %0, i64 3 - %54 = load float, float* %53, align 4 - %55 = load float, float* %1, align 4 - %56 = fmul float %54, %55 - %57 = fadd float %52, %56 - store float %57, float* %38, align 4 - %58 = getelementptr inbounds float, float* %2, i64 5 - %59 = load float, float* %15, align 4 - %60 = load float, float* %40, align 4 - %61 = fmul float %59, %60 - %62 = load float, float* %58, align 4 - %63 = fadd float %62, %61 - store float %63, float* %58, align 4 - %64 = load float, float* %53, align 4 - %65 = load float, float* %10, align 4 - %66 = fmul float %64, %65 - %67 = fadd float %63, %66 - store float %67, float* %58, align 4 - %68 = getelementptr inbounds float, float* %2, i64 6 - %69 = load float, float* %33, align 4 - %70 = load float, float* %28, align 4 - %71 = fmul float %69, %70 - %72 = load float, float* %68, align 4 - %73 = fadd float %72, %71 - store float %73, float* %68, align 4 - %74 = getelementptr inbounds float, float* %2, i64 7 - %75 = load float, float* %33, align 4 - %76 = load float, float* %40, align 4 - %77 = fmul float %75, %76 - %78 = load float, float* %74, align 4 - %79 = fadd float %78, %77 - store float %79, float* %74, align 4 - %80 = load float, float* %53, align 4 - %81 = load float, float* %28, align 4 - %82 = fmul float %80, %81 - %83 = fadd float %79, %82 - store float %83, float* %74, align 4 - %84 = getelementptr inbounds float, float* %2, i64 8 - %85 = load float, float* %53, align 4 - %86 = load float, float* %40, align 4 - %87 = fmul float %85, %86 - %88 = load float, float* %84, align 4 - %89 = fadd float %88, %87 - store float %89, float* %84, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #0 { - %1 = alloca [4 x float], align 16 - %2 = alloca [4 x float], align 16 - %3 = alloca [9 x float], align 16 - %4 = bitcast [4 x float]* %1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %4, i8* nonnull align 16 dereferenceable(16) bitcast ([4 x float]* @__const.main.mat_in to i8*), i64 16, i1 false) - %5 = bitcast [4 x float]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %5, i8* nonnull align 16 dereferenceable(16) bitcast ([4 x float]* @__const.main.f_in to i8*), i64 16, i1 false) - %6 = bitcast [9 x float]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(36) %6, i8 0, i64 36, i1 false) - %7 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 - %8 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %9 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 0 - call void @convolution(float* nonnull %7, float* nonnull %8, float* nonnull %9) - %10 = load float, float* %9, align 16 - %11 = fpext float %10 to double - %12 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %11) #5 - %13 = load float, float* %9, align 16 - %14 = fcmp une float %13, 1.000000e+00 - br i1 %14, label %22, label %15 - -15: ; preds = %0 - %16 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 1 - %17 = load float, float* %16, align 4 - %18 = fpext float %17 to double - %19 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %18) #5 - %20 = load float, float* %16, align 4 - %21 = fcmp une float %20, 3.000000e+00 - br i1 %21, label %22, label %23 - -22: ; preds = %65, %58, %51, %44, %37, %30, %23, %15, %0 - call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i64 0, i64 0), i32 46, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.2, i64 0, i64 0)) #6 - unreachable - -23: ; preds = %15 - %24 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 2 - %25 = load float, float* %24, align 8 - %26 = fpext float %25 to double - %27 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %26) #5 - %28 = load float, float* %24, align 8 - %29 = fcmp une float %28, 2.000000e+00 - br i1 %29, label %22, label %30 - -30: ; preds = %23 - %31 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 3 - %32 = load float, float* %31, align 4 - %33 = fpext float %32 to double - %34 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %33) #5 - %35 = load float, float* %31, align 4 - %36 = fcmp une float %35, 4.000000e+00 - br i1 %36, label %22, label %37 - -37: ; preds = %30 - %38 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 4 - %39 = load float, float* %38, align 16 - %40 = fpext float %39 to double - %41 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %40) #5 - %42 = load float, float* %38, align 16 - %43 = fcmp une float %42, 1.000000e+01 - br i1 %43, label %22, label %44 - -44: ; preds = %37 - %45 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 5 - %46 = load float, float* %45, align 4 - %47 = fpext float %46 to double - %48 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %47) #5 - %49 = load float, float* %45, align 4 - %50 = fcmp une float %49, 6.000000e+00 - br i1 %50, label %22, label %51 - -51: ; preds = %44 - %52 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 6 - %53 = load float, float* %52, align 8 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %54) #5 - %56 = load float, float* %52, align 8 - %57 = fcmp une float %56, 3.000000e+00 - br i1 %57, label %22, label %58 - -58: ; preds = %51 - %59 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 7 - %60 = load float, float* %59, align 4 - %61 = fpext float %60 to double - %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %61) #5 - %63 = load float, float* %59, align 4 - %64 = fcmp une float %63, 7.000000e+00 - br i1 %64, label %22, label %65 - -65: ; preds = %58 - %66 = getelementptr inbounds [9 x float], [9 x float]* %3, i64 0, i64 8 - %67 = load float, float* %66, align 16 - %68 = fpext float %67 to double - %69 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %68) #5 - %70 = load float, float* %66, align 16 - %71 = fcmp une float %70, 4.000000e+00 - br i1 %71, label %22, label %72 - -72: ; preds = %65 - ret i32 0 -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 - -declare i32 @printf(i8*, ...) #3 - -; Function Attrs: noreturn -declare void @__assert_rtn(i8*, i8*, i32, i8*) #4 - -attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { argmemonly nounwind willreturn writeonly } -attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { nounwind } -attributes #6 = { noreturn nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} From 1df35e56870956f3014c91e279c50014f281c75f Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Mon, 13 Dec 2021 04:17:02 -0500 Subject: [PATCH 006/143] add asserts to all tests except qr-decomp qr-decomp-fixed-size is broken I used the wrong assertion values previously to test something not actually changing The current behavior is that all the values written to Q and R are incorrect. Assertions to all other tests appear correct. Add 3 by 3 and 5 by 5 multiplication tests. --- .../llvm-tests/3-by-3-matrix-multiply.c | 43 ++++++++++++++ .../llvm-tests/5-by-5-matrix-multiply.c | 59 +++++++++++++++++++ .../Diospyros/llvm-tests/break-w.c | 3 + src/dios-egraphs/Diospyros/llvm-tests/break.c | 3 + .../Diospyros/llvm-tests/continue-w.c | 3 + .../Diospyros/llvm-tests/continue.c | 3 + .../Diospyros/llvm-tests/multi-mat-mul.c | 3 + .../Diospyros/llvm-tests/point-product.c | 5 +- .../Diospyros/llvm-tests/q-prod.c | 5 ++ ...xed-size.c => qr-decomp-fixed-size-FAIl.c} | 46 ++++++++++----- src/dios-egraphs/Diospyros/llvm-tests/sqrt.c | 6 +- .../Diospyros/llvm-tests/stencil-2d.c | 41 +++++++++++++ 12 files changed, 203 insertions(+), 17 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/3-by-3-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/5-by-5-matrix-multiply.c rename src/dios-egraphs/Diospyros/llvm-tests/{qr-decomp-fixed-size.c => qr-decomp-fixed-size-FAIl.c} (84%) diff --git a/src/dios-egraphs/Diospyros/llvm-tests/3-by-3-matrix-multiply.c b/src/dios-egraphs/Diospyros/llvm-tests/3-by-3-matrix-multiply.c new file mode 100644 index 00000000..1fde8829 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/3-by-3-matrix-multiply.c @@ -0,0 +1,43 @@ +#include +#include + +#define A_ROWS 3 +#define A_COLS 3 +#define B_COLS 3 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + float a_in[A_ROWS][A_COLS] = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; + float b_in[A_COLS][B_COLS] = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; + float c_out[A_ROWS][B_COLS] = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; + matrix_multiply(a_in, b_in, c_out); + float expected[A_ROWS][B_COLS] = {{30, 36, 42}, {66, 81, 96}, {102, 126, 150}}; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + assert(expected[i][j] == c_out[i][j]); + } + } + // output: 30.000000 + // output: 36.000000 + // output: 42.000000 + // output: 66.000000 + // output: 81.000000 + // output: 96.000000 + // output: 102.000000 + // output: 126.000000 + // output: 150.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/5-by-5-matrix-multiply.c b/src/dios-egraphs/Diospyros/llvm-tests/5-by-5-matrix-multiply.c new file mode 100644 index 00000000..bab5bad9 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/5-by-5-matrix-multiply.c @@ -0,0 +1,59 @@ +#include +#include + +#define A_ROWS 5 +#define A_COLS 5 +#define B_COLS 5 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + float a_in[A_ROWS][A_COLS] = {{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}}; + float b_in[A_COLS][B_COLS] = {{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}}; + float c_out[A_ROWS][B_COLS] = {{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}}; + matrix_multiply(a_in, b_in, c_out); + float expected[A_ROWS][B_COLS] = {{45, 60, 75, 90, 105}, {120, 160, 200, 240, 280}, {45, 60, 75, 90, 105}, {120, 160, 200, 240, 280},{45, 60, 75, 90, 105}}; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + assert(expected[i][j] == c_out[i][j]); + } + } + // output: 45.000000 + // output: 60.000000 + // output: 75.000000 + // output: 90.000000 + // output: 105.000000 + // output: 120.000000 + // output: 160.000000 + // output: 200.000000 + // output: 240.000000 + // output: 280.000000 + // output: 45.000000 + // output: 60.000000 + // output: 75.000000 + // output: 90.000000 + // output: 105.000000 + // output: 120.000000 + // output: 160.000000 + // output: 200.000000 + // output: 240.000000 + // output: 280.000000 + // output: 45.000000 + // output: 60.000000 + // output: 75.000000 + // output: 90.000000 + // output: 105.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/break-w.c b/src/dios-egraphs/Diospyros/llvm-tests/break-w.c index 26581410..8afaed68 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/break-w.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/break-w.c @@ -1,4 +1,5 @@ #include +#include #define SIZE 8 void break_w_test(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { @@ -16,8 +17,10 @@ int main(void) { float scalar_in = 10; float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; break_w_test(a_in, scalar_in, b_out); + float expected[SIZE] = {10, 0, 0, 0, 50, 40, 30, 20}; for (int i = 0; i < SIZE; i++) { printf("%f\n", b_out[i]); + assert(expected[i] == b_out[i]); } // 10.000000 // 0.000000 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/break.c b/src/dios-egraphs/Diospyros/llvm-tests/break.c index e9e7f628..1790765f 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/break.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/break.c @@ -1,4 +1,5 @@ #include +#include #define SIZE 8 void break_test(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { @@ -14,8 +15,10 @@ int main(void) { float scalar_in = 10; float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; break_test(a_in, scalar_in, b_out); + float expected[SIZE] = {10, 0, 0, 0, 50, 40, 30, 20}; for (int i = 0; i < SIZE; i++) { printf("%f\n", b_out[i]); + assert(expected[i] == b_out[i]); } // 10.000000 // 0.000000 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/continue-w.c b/src/dios-egraphs/Diospyros/llvm-tests/continue-w.c index eef0cd58..d7b42a84 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/continue-w.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/continue-w.c @@ -1,3 +1,4 @@ +#include #include #define SIZE 8 @@ -18,8 +19,10 @@ int main(void) { float scalar_in = 10; float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; continue_w_test(a_in, scalar_in, b_out); + float expected[SIZE] = {0, 0, 0, 0, 50, 40, 30, 20}; for (int i = 0; i < SIZE; i++) { printf("%f\n", b_out[i]); + assert(expected[i] == b_out[i]); } // 0.000000 // 0.000000 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/continue.c b/src/dios-egraphs/Diospyros/llvm-tests/continue.c index 37206464..40bc13b8 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/continue.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/continue.c @@ -1,3 +1,4 @@ +#include #include #define SIZE 8 @@ -13,8 +14,10 @@ int main(void) { float scalar_in = 10; float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; continue_test(a_in, scalar_in, b_out); + float expected[SIZE] = {0, 0, 0, 0, 50, 40, 30, 20}; for (int i = 0; i < SIZE; i++) { printf("%f\n", b_out[i]); + assert(expected[i] == b_out[i]); } // 0.000000 // 0.000000 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/multi-mat-mul.c b/src/dios-egraphs/Diospyros/llvm-tests/multi-mat-mul.c index 973ef8f9..af5da2f1 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/multi-mat-mul.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/multi-mat-mul.c @@ -1,3 +1,4 @@ +#include #include #define ROWS 3 #define COLS 3 @@ -28,8 +29,10 @@ int main(void) { float c_in[ROWS * COLS] = {9, 8, 7, 6, 5, 4, 3, 2, 1}; float d_out[ROWS * COLS] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; multimatrix_multiply(a_in, b_in, c_in, d_out); + float expected[ROWS * COLS] = {160, 200, 240, 100, 125, 150, 40, 50, 60}; for (int i = 0; i < ROWS * COLS; i++) { printf("output: %f\n", d_out[i]); + assert(expected[i] == d_out[i]); } // output: 160.000000 // output: 200.000000 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/point-product.c b/src/dios-egraphs/Diospyros/llvm-tests/point-product.c index 48698dd1..1d1b41e5 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/point-product.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/point-product.c @@ -1,3 +1,4 @@ +#include #include void cross_product(float lhs[3], float rhs[3], float result[3]) @@ -38,7 +39,9 @@ int main(void) { float p_in[4] = {0, 1, 2, 3}; float result_out[4] = {0, 0, 0, 0}; point_product(q_in, p_in, result_out); - for (int i = 0; i < 3; i++) { + float expected[4] = {0, 1, 2, 0}; + for (int i = 0; i < 4; i++) { printf("%f\n", result_out[i]); + assert(expected[i] == result_out[i]); } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/q-prod.c b/src/dios-egraphs/Diospyros/llvm-tests/q-prod.c index c795c3ac..2ad30a6a 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/q-prod.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/q-prod.c @@ -3,6 +3,7 @@ #include #include #include +#include #define SIZE 4 @@ -58,10 +59,14 @@ int main(void) { float r_q[SIZE] = {0, 0, 0, 0}; float r_t[SIZE] = {0, 0, 0, 0}; naive_quaternion_product(a_q, a_t, b_q, b_t, r_q, r_t); + float expectedq[SIZE] = {0, 0, 0, 0}; for (int i = 0; i < SIZE; i++) { printf("%f\n", r_q[i]); + assert(expectedq[i] == r_q[i]); } + float expectedt[SIZE] = {2, 4, 6, 0}; for (int i = 0; i < SIZE; i++) { printf("%f\n", r_t[i]); + assert(expectedt[i] == r_t[i]); } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-fixed-size.c b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-fixed-size-FAIl.c similarity index 84% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-fixed-size.c rename to src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-fixed-size-FAIl.c index 85807197..6b8e4b72 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-fixed-size.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-fixed-size-FAIl.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -18,7 +19,7 @@ float sgn(float v) { return (v > 0) - (v < 0); } float naive_norm(float *x, int m) { float sum = 0; for (int i = 0; i < m; i++) { - sum += pow(x[i], 2); + sum += x[i] * x[i]; } return sqrtf(sum); } @@ -77,7 +78,7 @@ void naive_fixed_qr_decomp(float *A, float *Q, float *R) { } float norm_u = naive_norm(u, m); for (int i = 0; i < m; i++) { - v[i] = u[i] / norm_u; + v[i] = u[i] / (norm_u + 0.00001f); } float *q_min = (float *)calloc(sizeof(float), m * m); @@ -126,21 +127,36 @@ int main(void) { float Q[SIZE * SIZE] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; float R[SIZE * SIZE] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = { + 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, + }; for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { - printf("%f\n", A[i * SIZE + j]); + printf("Q Output: %f\n", Q[i * SIZE + j]); } } - // naive_fixed_matrix_multiply(A, Q, R); - // for (int i = 0; i < SIZE; i++) { - // for (int j = 0; j < SIZE; j++) { - // printf("%f\n", A[i * SIZE + j]); - // } - // } - // naive_fixed_transpose(A); - // for (int i = 0; i < SIZE; i++) { - // for (int j = 0; j < SIZE; j++) { - // printf("%f\n", A[i * SIZE + j]); - // } - // } + float expectedR[SIZE * SIZE] = { + 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, + }; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + } + } + // 1.000000 + // 2.000000 + // 3.000000 + // 4.000000 + // 1.000000 + // 2.000000 + // 3.000000 + // 4.000000 + // 1.000000 + // 2.000000 + // 3.000000 + // 4.000000 + // 1.000000 + // 2.000000 + // 3.000000 + // 4.000000 } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/sqrt.c b/src/dios-egraphs/Diospyros/llvm-tests/sqrt.c index 6cbcd0c9..a753d824 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/sqrt.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/sqrt.c @@ -1,3 +1,4 @@ +#include #include #include #define SIZE 8 @@ -14,9 +15,12 @@ int main(void) { float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; float c_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; vsqrt(a_in, b_out, c_out); + float delta = 0.00001f; + float expected[SIZE] = {3.000000f, 2.828427f, 2.645751f, 2.449490f, + 2.236068f, 2.000000f, 1.732051f, 1.414214f}; for (int i = 0; i < SIZE; i++) { - printf("%f\n", b_out[i]); printf("%f\n", c_out[i]); + assert(fabs(expected[i] - c_out[i]) < delta); } // 3.000000 // 2.828427 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d.c b/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d.c index c4fc9f97..38362c79 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d.c @@ -1,3 +1,4 @@ +#include #include #define ROW_SIZE 8 #define COL_SIZE 4 @@ -31,7 +32,47 @@ int main(void) { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; float filter_in[F_SIZE] = {1, 1, 1, 1, 1, 1, 1, 1, 1}; stencil(orig_in, sol_out, filter_in); + float expected[ROW_SIZE * COL_SIZE] = { + 9, 9, 1, 1, 9, 9, 1, 1, 9, 9, 1, 1, 9, 9, 1, 1, + 9, 9, 1, 1, 9, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }; for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { printf("%f\n", sol_out[i]); + assert(expected[i] == sol_out[i]); } + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 } \ No newline at end of file From 56a3961d1741548e66732916423405767e7dc73a Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Tue, 14 Dec 2021 03:08:37 -0500 Subject: [PATCH 007/143] random matrix multiply tests --- src/dios-egraphs/Diospyros/Makefile | 8 ++ .../100-by-100-random-matrix-multiply.c | 76 ++++++++++++++++++ .../12-by-12-random-matrix-multiply.c | 78 +++++++++++++++++++ .../25-by-25-random-matrix-multiply.c | 76 ++++++++++++++++++ .../50-by-50-random-matrix-multiply.c | 76 ++++++++++++++++++ .../75-by-75-random-matrix-multiply.c | 76 ++++++++++++++++++ 6 files changed, 390 insertions(+) create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/100-by-100-random-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/12-by-12-random-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/25-by-25-random-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/50-by-50-random-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/75-by-75-random-matrix-multiply.c diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index 1495bdcc..9ca373ed 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -58,6 +58,14 @@ target/debug/libllvmlib.$(EXT): $(LIB) set-up-mac: $(SETUP) + +clean-ll: + rm *.ll + +clean-tmp: + rm *.tmp clean: rm -rf target + rm *.ll + rm *.tmp diff --git a/src/dios-egraphs/Diospyros/llvm-tests/100-by-100-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/llvm-tests/100-by-100-random-matrix-multiply.c new file mode 100644 index 00000000..bb69fa0b --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/100-by-100-random-matrix-multiply.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include + +#define A_ROWS 100 +#define A_COLS 100 +#define B_COLS 100 +#define MAX_FLOAT 100.00f +#define DELTA 0.00001f + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // load in b_in + float b_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // set up c_out + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = 0.0f; + } + } + // prep expected + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = 0.0f; + } + } + // calculate up c_out + matrix_multiply(a_in, b_in, c_out); + // calculate expected + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + expected[i][j] = sum; + } + } + // check expected == output + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + assert(fabs(expected[i][j] - c_out[i][j]) < DELTA); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/12-by-12-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/llvm-tests/12-by-12-random-matrix-multiply.c new file mode 100644 index 00000000..2963ed91 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/12-by-12-random-matrix-multiply.c @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include + +#define A_ROWS 12 +#define A_COLS 12 +#define B_COLS 12 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // load in b_in + float b_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // set up c_out + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = 0.0f; + } + } + // prep expected + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = 0.0f; + } + } + // calculate up c_out + matrix_multiply(a_in, b_in, c_out); + // calculate expected + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + expected[i][j] = sum; + } + } + // check expected == output + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("calculated: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(fabs(expected[i][j] - c_out[i][j]) < DELTA); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/25-by-25-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/llvm-tests/25-by-25-random-matrix-multiply.c new file mode 100644 index 00000000..2cec8fd5 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/25-by-25-random-matrix-multiply.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include + +#define A_ROWS 25 +#define A_COLS 25 +#define B_COLS 25 +#define MAX_FLOAT 100.00f +#define DELTA 0.00001f + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // load in b_in + float b_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // set up c_out + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = 0.0f; + } + } + // prep expected + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = 0.0f; + } + } + // calculate up c_out + matrix_multiply(a_in, b_in, c_out); + // calculate expected + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + expected[i][j] = sum; + } + } + // check expected == output + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + assert(fabs(expected[i][j] - c_out[i][j]) < DELTA); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/50-by-50-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/llvm-tests/50-by-50-random-matrix-multiply.c new file mode 100644 index 00000000..bdee3b52 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/50-by-50-random-matrix-multiply.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include + +#define A_ROWS 50 +#define A_COLS 50 +#define B_COLS 50 +#define MAX_FLOAT 100.00f +#define DELTA 0.00001f + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // load in b_in + float b_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // set up c_out + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = 0.0f; + } + } + // prep expected + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = 0.0f; + } + } + // calculate up c_out + matrix_multiply(a_in, b_in, c_out); + // calculate expected + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + expected[i][j] = sum; + } + } + // check expected == output + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + assert(fabs(expected[i][j] - c_out[i][j]) < DELTA); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/75-by-75-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/llvm-tests/75-by-75-random-matrix-multiply.c new file mode 100644 index 00000000..bacfa238 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/75-by-75-random-matrix-multiply.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include + +#define A_ROWS 75 +#define A_COLS 75 +#define B_COLS 75 +#define MAX_FLOAT 100.00f +#define DELTA 0.00001f + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // load in b_in + float b_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // set up c_out + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = 0.0f; + } + } + // prep expected + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = 0.0f; + } + } + // calculate up c_out + matrix_multiply(a_in, b_in, c_out); + // calculate expected + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + expected[i][j] = sum; + } + } + // check expected == output + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + assert(fabs(expected[i][j] - c_out[i][j]) < DELTA); + } + } + return 0; +} \ No newline at end of file From 4cf5c563667fc36443863e590eeeea7a5e040b1d Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Tue, 14 Dec 2021 03:27:05 -0500 Subject: [PATCH 008/143] fix random tests error bounds, 25 by 25 works --- .../Diospyros/llvm-tests/100-by-100-random-matrix-multiply.c | 2 +- .../Diospyros/llvm-tests/25-by-25-random-matrix-multiply.c | 2 +- .../Diospyros/llvm-tests/50-by-50-random-matrix-multiply.c | 2 +- .../Diospyros/llvm-tests/75-by-75-random-matrix-multiply.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dios-egraphs/Diospyros/llvm-tests/100-by-100-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/llvm-tests/100-by-100-random-matrix-multiply.c index bb69fa0b..bdacc4fe 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/100-by-100-random-matrix-multiply.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/100-by-100-random-matrix-multiply.c @@ -8,7 +8,7 @@ #define A_COLS 100 #define B_COLS 100 #define MAX_FLOAT 100.00f -#define DELTA 0.00001f +#define DELTA 0.1f void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], float c_out[A_ROWS][B_COLS]) { diff --git a/src/dios-egraphs/Diospyros/llvm-tests/25-by-25-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/llvm-tests/25-by-25-random-matrix-multiply.c index 2cec8fd5..e50590ee 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/25-by-25-random-matrix-multiply.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/25-by-25-random-matrix-multiply.c @@ -8,7 +8,7 @@ #define A_COLS 25 #define B_COLS 25 #define MAX_FLOAT 100.00f -#define DELTA 0.00001f +#define DELTA 0.1f void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], float c_out[A_ROWS][B_COLS]) { diff --git a/src/dios-egraphs/Diospyros/llvm-tests/50-by-50-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/llvm-tests/50-by-50-random-matrix-multiply.c index bdee3b52..036c7a79 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/50-by-50-random-matrix-multiply.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/50-by-50-random-matrix-multiply.c @@ -8,7 +8,7 @@ #define A_COLS 50 #define B_COLS 50 #define MAX_FLOAT 100.00f -#define DELTA 0.00001f +#define DELTA 0.1f void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], float c_out[A_ROWS][B_COLS]) { diff --git a/src/dios-egraphs/Diospyros/llvm-tests/75-by-75-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/llvm-tests/75-by-75-random-matrix-multiply.c index bacfa238..afefac24 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/75-by-75-random-matrix-multiply.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/75-by-75-random-matrix-multiply.c @@ -8,7 +8,7 @@ #define A_COLS 75 #define B_COLS 75 #define MAX_FLOAT 100.00f -#define DELTA 0.00001f +#define DELTA 0.1f void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], float c_out[A_ROWS][B_COLS]) { From 6f40ce5edc0261eeb089bbd0e8892f1dd10ae2db Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Tue, 14 Dec 2021 03:37:09 -0500 Subject: [PATCH 009/143] begin working on flag for no-optimization --- src/dios-egraphs/Diospyros/diospyros.cpp | 5 +++-- src/dios-egraphs/Diospyros/src/lib.rs | 16 +++++++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index a59f0852..3bb32fa9 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -44,7 +44,8 @@ extern "C" VectorPointerSize optimize(LLVMModuleRef mod, LLVMContextRef context, LLVMBuilderRef builder, LLVMValueRef const *bb, std::size_t size, LLVMPair const *past_instrs, - std::size_t past_size); + std::size_t past_size, + bool run_egg); const string ARRAY_NAME = "no-array-name"; const string TEMP_NAME = "no-temp-name"; @@ -571,7 +572,7 @@ struct DiospyrosPass : public FunctionPass { VectorPointerSize pair = optimize( wrap(mod), wrap(&context), wrap(&builder), vec.data(), vec.size(), translated_exprs.data(), - translated_exprs.size()); + translated_exprs.size(), true); int size = pair.llvm_pointer_size; LLVMPair const *expr_array = pair.llvm_pointer; diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index a83ff639..e49bc399 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -715,6 +715,7 @@ pub fn optimize( size: size_t, past_instrs: *const LLVMPair, past_size: size_t, + run_egg: bool, ) -> VectorPointerSize { unsafe { // llvm to egg @@ -732,14 +733,19 @@ pub fn optimize( let (expr, gep_map, store_map, symbol_map) = llvm_to_egg(llvm_instrs, &mut llvm_arg_pairs, &mut node_to_arg); - // optimization pass - eprintln!("{}", expr.pretty(10)); - let (_, best) = rules::run(&expr, 180, true, false); - eprintln!("{}", best.pretty(10)); + let mut result = expr.clone(); + if run_egg { + // optimization pass + eprintln!("{}", expr.pretty(10)); + let (_, best) = rules::run(&expr, 180, true, false); + eprintln!("{}", best.pretty(10)); + + result = best; + } // egg to llvm egg_to_llvm( - best, + result, &gep_map, &store_map, &symbol_map, From 8e981bfdb43fa6203ac2d5dad5e6f766c5324793 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Tue, 14 Dec 2021 04:09:11 -0500 Subject: [PATCH 010/143] add no opt and no print flags to llvm, makefile commands updated --- src/dios-egraphs/Diospyros/Makefile | 20 +++++++++++++++++++- src/dios-egraphs/Diospyros/diospyros.cpp | 19 ++++++++++++++++--- src/dios-egraphs/Diospyros/src/lib.rs | 9 +++++++-- 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index 9ca373ed..e48bb6d4 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -45,7 +45,25 @@ run-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o clang.ll $(test) opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce clang.ll -o opt.ll opt -S --cfl-steens-aa opt.ll -o aa.ll - $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) aa.ll -o diospyros.ll + $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=false aa.ll -o diospyros.ll + opt -S --adce --dse diospyros.ll -o dce.ll + $(CLANG) dce.ll + ./a.out + +print-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp + $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o clang.ll $(test) + opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce clang.ll -o opt.ll + opt -S --cfl-steens-aa opt.ll -o aa.ll + $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=true aa.ll -o diospyros.ll + opt -S --adce --dse diospyros.ll -o dce.ll + $(CLANG) dce.ll + ./a.out + +no-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp + $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o clang.ll $(test) + opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce clang.ll -o opt.ll + opt -S --cfl-steens-aa opt.ll -o aa.ll + $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt=false -mllvm -print=false aa.ll -o diospyros.ll opt -S --adce --dse diospyros.ll -o dce.ll $(CLANG) dce.ll ./a.out diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index 3bb32fa9..730636ba 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Scalar/LoopUnrollPass.h" @@ -25,6 +26,18 @@ using namespace llvm; using namespace std; +int main(int argc, char **argv) { + llvm::cl::ParseCommandLineOptions(argc, argv); +} + +llvm::cl::opt RunOpt("r", llvm::cl::desc("Enable Egg Optimization.")); +llvm::cl::alias RunOptAlias("opt", llvm::cl::desc("Alias for -r"), + llvm::cl::aliasopt(RunOpt)); + +llvm::cl::opt PrintOpt("p", llvm::cl::desc("Print Egg Optimization.")); +llvm::cl::alias PrintOptAlias("print", llvm::cl::desc("Alias for -p"), + llvm::cl::aliasopt(PrintOpt)); + typedef struct IntLLVMPair { uint32_t node_int; LLVMValueRef arg; @@ -44,8 +57,8 @@ extern "C" VectorPointerSize optimize(LLVMModuleRef mod, LLVMContextRef context, LLVMBuilderRef builder, LLVMValueRef const *bb, std::size_t size, LLVMPair const *past_instrs, - std::size_t past_size, - bool run_egg); + std::size_t past_size, bool run_egg, + bool print_opt); const string ARRAY_NAME = "no-array-name"; const string TEMP_NAME = "no-temp-name"; @@ -572,7 +585,7 @@ struct DiospyrosPass : public FunctionPass { VectorPointerSize pair = optimize( wrap(mod), wrap(&context), wrap(&builder), vec.data(), vec.size(), translated_exprs.data(), - translated_exprs.size(), true); + translated_exprs.size(), RunOpt, PrintOpt); int size = pair.llvm_pointer_size; LLVMPair const *expr_array = pair.llvm_pointer; diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index e49bc399..8ae98ce8 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -716,6 +716,7 @@ pub fn optimize( past_instrs: *const LLVMPair, past_size: size_t, run_egg: bool, + print_opt: bool, ) -> VectorPointerSize { unsafe { // llvm to egg @@ -736,9 +737,13 @@ pub fn optimize( let mut result = expr.clone(); if run_egg { // optimization pass - eprintln!("{}", expr.pretty(10)); + if print_opt { + eprintln!("{}", expr.pretty(10)); + } let (_, best) = rules::run(&expr, 180, true, false); - eprintln!("{}", best.pretty(10)); + if print_opt { + eprintln!("{}", best.pretty(10)); + } result = best; } From 43e43268455aad9a0ff65dfa2d6b76492dac7e7d Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Tue, 14 Dec 2021 22:50:22 -0500 Subject: [PATCH 011/143] clean lib.rs no vec, add 2d-2d-conv random tests at 5 and 10: --- .../llvm-tests/2d-2d-conv-random-10.c | 95 +++++++++++++++++++ .../llvm-tests/2d-2d-conv-random-5.c | 95 +++++++++++++++++++ src/dios-egraphs/Diospyros/src/lib.rs | 21 ++-- 3 files changed, 198 insertions(+), 13 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv-random-10.c create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv-random-5.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv-random-10.c b/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv-random-10.c new file mode 100644 index 00000000..69eeec29 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv-random-10.c @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include + +#define I_ROWS 10 +#define I_COLS 10 +#define F_ROWS 5 +#define F_COLS 5 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void convolution(float mat_in[I_ROWS][I_COLS], float f_in[F_ROWS][F_COLS], + float mat_out[O_ROWS][O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + mat_out[outRow][outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS][I_COLS]; + for (int i = 0; i < I_ROWS; i++) { + for (int j = 0; j < I_ROWS; j++) { + mat_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float f_in[F_ROWS][F_COLS]; + for (int i = 0; i < F_ROWS; i++) { + for (int j = 0; j < F_COLS; j++) { + f_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float mat_out[O_ROWS][O_COLS]; + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + mat_out[i][j] = 0; + } + } + float expected[O_ROWS][O_COLS]; + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + expected[i][j] = 0; + } + } + convolution(mat_in, f_in, mat_out); + // calculate expected + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + expected[outRow][outCol] += v; + } + } + } + } + } + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + printf("calculated: %f\n", mat_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(fabs(expected[i][j] - mat_out[i][j]) < DELTA); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv-random-5.c b/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv-random-5.c new file mode 100644 index 00000000..968dfb43 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv-random-5.c @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include + +#define I_ROWS 5 +#define I_COLS 5 +#define F_ROWS 5 +#define F_COLS 5 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void convolution(float mat_in[I_ROWS][I_COLS], float f_in[F_ROWS][F_COLS], + float mat_out[O_ROWS][O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + mat_out[outRow][outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS][I_COLS]; + for (int i = 0; i < I_ROWS; i++) { + for (int j = 0; j < I_ROWS; j++) { + mat_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float f_in[F_ROWS][F_COLS]; + for (int i = 0; i < F_ROWS; i++) { + for (int j = 0; j < F_COLS; j++) { + f_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float mat_out[O_ROWS][O_COLS]; + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + mat_out[i][j] = 0; + } + } + float expected[O_ROWS][O_COLS]; + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + expected[i][j] = 0; + } + } + convolution(mat_in, f_in, mat_out); + // calculate expected + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + expected[outRow][outCol] += v; + } + } + } + } + } + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + printf("calculated: %f\n", mat_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(fabs(expected[i][j] - mat_out[i][j]) < DELTA); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 8ae98ce8..46874b27 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -734,23 +734,18 @@ pub fn optimize( let (expr, gep_map, store_map, symbol_map) = llvm_to_egg(llvm_instrs, &mut llvm_arg_pairs, &mut node_to_arg); - let mut result = expr.clone(); - if run_egg { - // optimization pass - if print_opt { - eprintln!("{}", expr.pretty(10)); - } - let (_, best) = rules::run(&expr, 180, true, false); - if print_opt { - eprintln!("{}", best.pretty(10)); - } - - result = best; + // optimization pass + if print_opt { + eprintln!("{}", expr.pretty(10)); + } + let (_, best) = rules::run(&expr, 180, true, !run_egg); + if print_opt { + eprintln!("{}", best.pretty(10)); } // egg to llvm egg_to_llvm( - result, + best, &gep_map, &store_map, &symbol_map, From c98b00cee91f6ddd057127b3cfcd269454052feb Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Tue, 14 Dec 2021 23:17:21 -0500 Subject: [PATCH 012/143] add new 1d-conv random 5 and 10 tests --- .../llvm-tests/2d-2d-conv-random-5.c | 4 +- .../Diospyros/llvm-tests/2d-conv-random-10.c | 88 ++++++++++++++++++ .../Diospyros/llvm-tests/2d-conv-random-5.c | 89 +++++++++++++++++++ 3 files changed, 179 insertions(+), 2 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/2d-conv-random-10.c create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/2d-conv-random-5.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv-random-5.c b/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv-random-5.c index 968dfb43..89ed97f0 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv-random-5.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv-random-5.c @@ -6,8 +6,8 @@ #define I_ROWS 5 #define I_COLS 5 -#define F_ROWS 5 -#define F_COLS 5 +#define F_ROWS 3 +#define F_COLS 3 #define O_ROWS ((I_ROWS + F_ROWS) - 1) #define O_COLS ((I_COLS + F_COLS) - 1) #define MAX_FLOAT 100.00f diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-conv-random-10.c b/src/dios-egraphs/Diospyros/llvm-tests/2d-conv-random-10.c new file mode 100644 index 00000000..c3f5c3a6 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/2d-conv-random-10.c @@ -0,0 +1,88 @@ +#include +#include +#include +#include +#include + +#define I_ROWS 10 +#define I_COLS 10 +#define F_ROWS 5 +#define F_COLS 5 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = 0; + } + convolution(mat_in, f_in, mat_out); + // calculate expected + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + expected[outRow * O_COLS + outCol] += v; + } + } + } + } + } + for (int i = 0; i < O_ROWS * O_COLS; i++) { + printf("calculated: %f\n", mat_out[i]); + printf("expected: %f\n", expected[i]); + printf("difference: %f\n", expected[i] - mat_out[i]); + } + for (int i = 0; i < O_ROWS * O_COLS; i++) { + assert(fabs(expected[i] - mat_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-conv-random-5.c b/src/dios-egraphs/Diospyros/llvm-tests/2d-conv-random-5.c new file mode 100644 index 00000000..a47953a0 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/2d-conv-random-5.c @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include + +#define I_ROWS 5 +#define I_COLS 5 +#define F_ROWS 3 +#define F_COLS 3 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = 0; + } + convolution(mat_in, f_in, mat_out); + // calculate expected + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + expected[outRow * O_COLS + outCol] += v; + } + } + } + } + } + for (int i = 0; i < O_ROWS * O_COLS; i++) { + printf("--------------------------\n"); + printf("calculated: %f\n", mat_out[i]); + printf("expected: %f\n", expected[i]); + printf("difference: %f\n", expected[i] - mat_out[i]); + } + for (int i = 0; i < O_ROWS * O_COLS; i++) { + assert(fabs(expected[i] - mat_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file From 4de12703426c8a40a6075120eb34e94bff96b6fc Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Tue, 14 Dec 2021 23:51:25 -0500 Subject: [PATCH 013/143] add random stencil tests at 15, 21, 50 --- .../llvm-tests/stencil-2d-random-15-16.c | 71 +++++++++++++++++++ .../llvm-tests/stencil-2d-random-22-21.c | 71 +++++++++++++++++++ .../llvm-tests/stencil-2d-random-50-50.c | 71 +++++++++++++++++++ 3 files changed, 213 insertions(+) create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-15-16.c create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-22-21.c create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-50-50.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-15-16.c b/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-15-16.c new file mode 100644 index 00000000..8432ebd7 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-15-16.c @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include + +#define ROW_SIZE 15 +#define COL_SIZE 16 +#define F_SIZE 9 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + stencil(orig_in, sol_out, filter_in); + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + expected[(r * COL_SIZE) + c] = temp; + } + } + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("calculated: %f\n", sol_out[i]); + printf("expected: %f\n", expected[i]); + printf("difference: %f\n", expected[i] - sol_out[i]); + } + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + assert(fabs(expected[i] - sol_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-22-21.c b/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-22-21.c new file mode 100644 index 00000000..019fd41b --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-22-21.c @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include + +#define ROW_SIZE 22 +#define COL_SIZE 21 +#define F_SIZE 11 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + stencil(orig_in, sol_out, filter_in); + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + expected[(r * COL_SIZE) + c] = temp; + } + } + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("calculated: %f\n", sol_out[i]); + printf("expected: %f\n", expected[i]); + printf("difference: %f\n", expected[i] - sol_out[i]); + } + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + assert(fabs(expected[i] - sol_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-50-50.c b/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-50-50.c new file mode 100644 index 00000000..c91b842e --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-50-50.c @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include + +#define ROW_SIZE 50 +#define COL_SIZE 50 +#define F_SIZE 25 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + stencil(orig_in, sol_out, filter_in); + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + expected[(r * COL_SIZE) + c] = temp; + } + } + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("calculated: %f\n", sol_out[i]); + printf("expected: %f\n", expected[i]); + printf("difference: %f\n", expected[i] - sol_out[i]); + } + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + assert(fabs(expected[i] - sol_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file From cefbf5f9c867434d2beaf342f8f87b9fba968708 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 15 Dec 2021 00:10:06 -0500 Subject: [PATCH 014/143] move all building to build directory --- src/dios-egraphs/Diospyros/Makefile | 54 +++++++++++++---------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index e48bb6d4..dd5bc618 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -42,31 +42,31 @@ test-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp opt -S --adce finish.ll -o final.ll run-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o clang.ll $(test) - opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce clang.ll -o opt.ll - opt -S --cfl-steens-aa opt.ll -o aa.ll - $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=false aa.ll -o diospyros.ll - opt -S --adce --dse diospyros.ll -o dce.ll - $(CLANG) dce.ll - ./a.out + $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) + opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll + opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll + $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=false build/aa.ll -o build/diospyros.ll + opt -S --adce --dse build/diospyros.ll -o build/dce.ll + $(CLANG) build/dce.ll -o build/final + build/final print-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o clang.ll $(test) - opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce clang.ll -o opt.ll - opt -S --cfl-steens-aa opt.ll -o aa.ll - $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=true aa.ll -o diospyros.ll - opt -S --adce --dse diospyros.ll -o dce.ll - $(CLANG) dce.ll - ./a.out + $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) + opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll + opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll + $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=true build/aa.ll -o build/diospyros.ll + opt -S --adce --dse build/diospyros.ll -o build/dce.ll + $(CLANG) build/dce.ll -o build/final + build/final no-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o clang.ll $(test) - opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce clang.ll -o opt.ll - opt -S --cfl-steens-aa opt.ll -o aa.ll - $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt=false -mllvm -print=false aa.ll -o diospyros.ll - opt -S --adce --dse diospyros.ll -o dce.ll - $(CLANG) dce.ll - ./a.out + $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) + opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll + opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll + $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt=false -mllvm -print=false build/aa.ll -o build/diospyros.ll + opt -S --adce --dse build/diospyros.ll -o build/dce.ll + $(CLANG) build/dce.ll -o build/final + build/final test: set-up-mac runt.sh runt.toml target/debug/libllvmlib.$(EXT) runt @@ -77,13 +77,9 @@ target/debug/libllvmlib.$(EXT): $(LIB) set-up-mac: $(SETUP) -clean-ll: - rm *.ll - -clean-tmp: - rm *.tmp - clean: + rm -r build/* + +clean-all: rm -rf target - rm *.ll - rm *.tmp + rm -r build/* From a5dfebfc17dba777aece254e1a16ab979a0831e6 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 15 Dec 2021 00:14:12 -0500 Subject: [PATCH 015/143] restructure with random tests directory --- .../100-by-100-random-matrix-multiply.c | 0 .../12-by-12-random-matrix-multiply.c | 0 .../25-by-25-random-matrix-multiply.c | 0 .../{llvm-tests => randomized-tests}/2d-2d-conv-random-10.c | 0 .../{llvm-tests => randomized-tests}/2d-2d-conv-random-5.c | 0 .../{llvm-tests => randomized-tests}/2d-conv-random-10.c | 0 .../Diospyros/{llvm-tests => randomized-tests}/2d-conv-random-5.c | 0 .../50-by-50-random-matrix-multiply.c | 0 .../75-by-75-random-matrix-multiply.c | 0 .../{llvm-tests => randomized-tests}/stencil-2d-random-15-16.c | 0 .../{llvm-tests => randomized-tests}/stencil-2d-random-22-21.c | 0 .../{llvm-tests => randomized-tests}/stencil-2d-random-50-50.c | 0 12 files changed, 0 insertions(+), 0 deletions(-) rename src/dios-egraphs/Diospyros/{llvm-tests => randomized-tests}/100-by-100-random-matrix-multiply.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => randomized-tests}/12-by-12-random-matrix-multiply.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => randomized-tests}/25-by-25-random-matrix-multiply.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => randomized-tests}/2d-2d-conv-random-10.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => randomized-tests}/2d-2d-conv-random-5.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => randomized-tests}/2d-conv-random-10.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => randomized-tests}/2d-conv-random-5.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => randomized-tests}/50-by-50-random-matrix-multiply.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => randomized-tests}/75-by-75-random-matrix-multiply.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => randomized-tests}/stencil-2d-random-15-16.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => randomized-tests}/stencil-2d-random-22-21.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => randomized-tests}/stencil-2d-random-50-50.c (100%) diff --git a/src/dios-egraphs/Diospyros/llvm-tests/100-by-100-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/randomized-tests/100-by-100-random-matrix-multiply.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/100-by-100-random-matrix-multiply.c rename to src/dios-egraphs/Diospyros/randomized-tests/100-by-100-random-matrix-multiply.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/12-by-12-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/randomized-tests/12-by-12-random-matrix-multiply.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/12-by-12-random-matrix-multiply.c rename to src/dios-egraphs/Diospyros/randomized-tests/12-by-12-random-matrix-multiply.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/25-by-25-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/randomized-tests/25-by-25-random-matrix-multiply.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/25-by-25-random-matrix-multiply.c rename to src/dios-egraphs/Diospyros/randomized-tests/25-by-25-random-matrix-multiply.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv-random-10.c b/src/dios-egraphs/Diospyros/randomized-tests/2d-2d-conv-random-10.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv-random-10.c rename to src/dios-egraphs/Diospyros/randomized-tests/2d-2d-conv-random-10.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv-random-5.c b/src/dios-egraphs/Diospyros/randomized-tests/2d-2d-conv-random-5.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv-random-5.c rename to src/dios-egraphs/Diospyros/randomized-tests/2d-2d-conv-random-5.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-conv-random-10.c b/src/dios-egraphs/Diospyros/randomized-tests/2d-conv-random-10.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/2d-conv-random-10.c rename to src/dios-egraphs/Diospyros/randomized-tests/2d-conv-random-10.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-conv-random-5.c b/src/dios-egraphs/Diospyros/randomized-tests/2d-conv-random-5.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/2d-conv-random-5.c rename to src/dios-egraphs/Diospyros/randomized-tests/2d-conv-random-5.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/50-by-50-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/randomized-tests/50-by-50-random-matrix-multiply.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/50-by-50-random-matrix-multiply.c rename to src/dios-egraphs/Diospyros/randomized-tests/50-by-50-random-matrix-multiply.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/75-by-75-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/randomized-tests/75-by-75-random-matrix-multiply.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/75-by-75-random-matrix-multiply.c rename to src/dios-egraphs/Diospyros/randomized-tests/75-by-75-random-matrix-multiply.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-15-16.c b/src/dios-egraphs/Diospyros/randomized-tests/stencil-2d-random-15-16.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-15-16.c rename to src/dios-egraphs/Diospyros/randomized-tests/stencil-2d-random-15-16.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-22-21.c b/src/dios-egraphs/Diospyros/randomized-tests/stencil-2d-random-22-21.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-22-21.c rename to src/dios-egraphs/Diospyros/randomized-tests/stencil-2d-random-22-21.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-50-50.c b/src/dios-egraphs/Diospyros/randomized-tests/stencil-2d-random-50-50.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/stencil-2d-random-50-50.c rename to src/dios-egraphs/Diospyros/randomized-tests/stencil-2d-random-50-50.c From c8ae4d0cfe4148a9380bc0fb7d26ddddbe44e1ea Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 15 Dec 2021 00:21:23 -0500 Subject: [PATCH 016/143] remove all 'new' tests --- .../Diospyros/llvm-tests/2d-2d-conv.expect | 249 ----------- .../Diospyros/llvm-tests/2d-conv.expect | 227 ---------- .../llvm-tests/2d-matrix-multiply.expect | 152 ------- src/dios-egraphs/Diospyros/llvm-tests/2d.c | 20 +- .../Diospyros/llvm-tests/2d.expect | 70 ---- .../Diospyros/llvm-tests/2d_new.c | 28 -- src/dios-egraphs/Diospyros/llvm-tests/add.c | 19 +- .../Diospyros/llvm-tests/add.expect | 70 ---- .../Diospyros/llvm-tests/add_mult.c | 31 +- .../Diospyros/llvm-tests/add_mult.expect | 73 ---- .../Diospyros/llvm-tests/add_mult_new.c | 27 -- .../Diospyros/llvm-tests/add_new.c | 27 -- .../Diospyros/llvm-tests/cube-new.c | 34 -- src/dios-egraphs/Diospyros/llvm-tests/cube.c | 9 + .../Diospyros/llvm-tests/fft.expect | 388 ------------------ .../Diospyros/llvm-tests/five_binops.c | 27 +- .../Diospyros/llvm-tests/five_binops.expect | 121 ------ .../Diospyros/llvm-tests/five_binops_new.c | 31 -- .../Diospyros/llvm-tests/if-else-new.c | 32 -- .../Diospyros/llvm-tests/if-else.c | 9 + src/dios-egraphs/Diospyros/llvm-tests/mac.c | 22 +- .../Diospyros/llvm-tests/mac.expect | 86 ---- .../Diospyros/llvm-tests/mac_new.c | 29 -- .../Diospyros/llvm-tests/mat_mul.c | 22 +- .../Diospyros/llvm-tests/mat_mul.expect | 103 ----- .../Diospyros/llvm-tests/mat_mul_new.c | 27 -- .../llvm-tests/matrix-multiply-new.c | 36 -- .../Diospyros/llvm-tests/matrix-multiply.c | 5 + .../llvm-tests/matrix-multiply.expect | 153 ------- src/dios-egraphs/Diospyros/llvm-tests/mixed.c | 19 +- .../Diospyros/llvm-tests/mixed.expect | 63 --- .../Diospyros/llvm-tests/mixed_new.c | 27 -- src/dios-egraphs/Diospyros/llvm-tests/mult.c | 19 +- .../Diospyros/llvm-tests/mult.expect | 70 ---- .../Diospyros/llvm-tests/mult_new.c | 27 -- .../Diospyros/llvm-tests/multiple_adds.c | 24 +- .../Diospyros/llvm-tests/multiple_adds.expect | 87 ---- .../Diospyros/llvm-tests/multiple_adds_new.c | 29 -- .../Diospyros/llvm-tests/out_of_order.c | 19 +- .../Diospyros/llvm-tests/out_of_order.expect | 70 ---- .../Diospyros/llvm-tests/out_of_order_new.c | 27 -- .../Diospyros/llvm-tests/point-product.expect | 284 ------------- .../Diospyros/llvm-tests/return-new.c | 37 -- .../Diospyros/llvm-tests/return.c | 9 + .../Diospyros/llvm-tests/scalar-new.c | 27 -- .../Diospyros/llvm-tests/scalar.c | 9 + .../Diospyros/llvm-tests/scalar.expect | 95 ----- .../Diospyros/llvm-tests/stencil-2d.expect | 181 -------- .../Diospyros/llvm-tests/ternary-new.c | 35 -- .../Diospyros/llvm-tests/ternary.c | 9 + src/dios-egraphs/Diospyros/llvm-tests/var.c | 23 +- .../Diospyros/llvm-tests/var.expect | 66 --- .../Diospyros/llvm-tests/var_new.c | 29 -- .../Diospyros/llvm-tests/width5.c | 22 +- .../Diospyros/llvm-tests/width5.expect | 92 ----- .../Diospyros/llvm-tests/width5_new.c | 30 -- .../Diospyros/llvm-tests/width9.c | 26 +- .../Diospyros/llvm-tests/width9.expect | 150 ------- .../Diospyros/llvm-tests/width9_new.c | 42 -- 59 files changed, 259 insertions(+), 3515 deletions(-) delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/2d-conv.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/2d-matrix-multiply.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/2d.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/2d_new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/add.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/add_mult.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/add_mult_new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/add_new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/cube-new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/fft.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/five_binops.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/five_binops_new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/if-else-new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/mac.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/mac_new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/mat_mul.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/mat_mul_new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply-new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/mixed.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/mixed_new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/mult.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/mult_new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/multiple_adds.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/multiple_adds_new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/out_of_order.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/out_of_order_new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/point-product.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/return-new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/scalar-new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/scalar.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/stencil-2d.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/ternary-new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/var.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/var_new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/width5.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/width5_new.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/width9.expect delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/width9_new.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv.expect b/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv.expect deleted file mode 100644 index ecbca2f2..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv.expect +++ /dev/null @@ -1,249 +0,0 @@ - %4 = alloca [2 x float]*, align 8 - %5 = alloca [2 x float]*, align 8 - %6 = alloca [3 x float]*, align 8 - %7 = alloca i32, align 4 - %8 = alloca i32, align 4 - %9 = alloca i32, align 4 - %10 = alloca i32, align 4 - %11 = alloca i32, align 4 - %12 = alloca i32, align 4 - %13 = alloca i32, align 4 - %14 = alloca i32, align 4 - %15 = alloca float, align 4 - store [2 x float]* %0, [2 x float]** %4, align 8 - store [2 x float]* %1, [2 x float]** %5, align 8 - store [3 x float]* %2, [3 x float]** %6, align 8 - store i32 0, i32* %7, align 4 - br label %16 - -16: ; preds = %117, %3 - %17 = load i32, i32* %7, align 4 - %18 = icmp slt i32 %17, 3 - br i1 %18, label %19, label %120 - -19: ; preds = %16 - store i32 0, i32* %8, align 4 - br label %20 - -20: ; preds = %113, %19 - %21 = load i32, i32* %8, align 4 - %22 = icmp slt i32 %21, 3 - br i1 %22, label %23, label %116 - -23: ; preds = %20 - store i32 0, i32* %9, align 4 - br label %24 - -24: ; preds = %109, %23 - %25 = load i32, i32* %9, align 4 - %26 = icmp slt i32 %25, 2 - br i1 %26, label %27, label %112 - -27: ; preds = %24 - store i32 0, i32* %10, align 4 - br label %28 - -28: ; preds = %105, %27 - %29 = load i32, i32* %10, align 4 - %30 = icmp slt i32 %29, 2 - br i1 %30, label %31, label %108 - -31: ; preds = %28 - %32 = load i32, i32* %9, align 4 - %33 = sub nsw i32 1, %32 - store i32 %33, i32* %11, align 4 - %34 = load i32, i32* %10, align 4 - %35 = sub nsw i32 1, %34 - store i32 %35, i32* %12, align 4 - %36 = load i32, i32* %7, align 4 - %37 = load i32, i32* %11, align 4 - %38 = sub nsw i32 %36, %37 - store i32 %38, i32* %13, align 4 - %39 = load i32, i32* %8, align 4 - %40 = load i32, i32* %12, align 4 - %41 = sub nsw i32 %39, %40 - store i32 %41, i32* %14, align 4 - %42 = load i32, i32* %13, align 4 - %43 = icmp sge i32 %42, 0 - br i1 %43, label %44, label %104 - -44: ; preds = %31 - %45 = load i32, i32* %13, align 4 - %46 = icmp slt i32 %45, 2 - br i1 %46, label %47, label %104 - -47: ; preds = %44 - %48 = load i32, i32* %14, align 4 - %49 = icmp sge i32 %48, 0 - br i1 %49, label %50, label %104 - -50: ; preds = %47 - %51 = load i32, i32* %14, align 4 - %52 = icmp slt i32 %51, 2 - br i1 %52, label %53, label %104 - -53: ; preds = %50 - %54 = load [2 x float]*, [2 x float]** %4, align 8 - %55 = load i32, i32* %13, align 4 - %56 = sext i32 %55 to i64 - %57 = getelementptr inbounds [2 x float], [2 x float]* %54, i64 %56 - %58 = load i32, i32* %14, align 4 - %59 = sext i32 %58 to i64 - %60 = getelementptr inbounds [2 x float], [2 x float]* %57, i64 0, i64 %59 - %61 = load float, float* %60, align 4 - %62 = load [2 x float]*, [2 x float]** %5, align 8 - %63 = load i32, i32* %11, align 4 - %64 = sext i32 %63 to i64 - %65 = getelementptr inbounds [2 x float], [2 x float]* %62, i64 %64 - %66 = load i32, i32* %12, align 4 - %67 = sext i32 %66 to i64 - %68 = getelementptr inbounds [2 x float], [2 x float]* %65, i64 0, i64 %67 - %69 = load float, float* %68, align 4 - %70 = fmul float %61, %69 - %71 = load float, float* %60, align 4 - %72 = insertelement <4 x float> zeroinitializer, float %71, i32 0 - %73 = insertelement <4 x float> %72, float 0.000000e+00, i32 1 - %74 = insertelement <4 x float> %73, float 0.000000e+00, i32 2 - %75 = insertelement <4 x float> %74, float 0.000000e+00, i32 3 - %76 = load float, float* %68, align 4 - %77 = insertelement <4 x float> zeroinitializer, float %76, i32 0 - %78 = insertelement <4 x float> %77, float 0.000000e+00, i32 1 - %79 = insertelement <4 x float> %78, float 0.000000e+00, i32 2 - %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 3 - %81 = fmul <4 x float> %75, %80 - %82 = extractelement <4 x float> %81, i32 0 - store float %82, float* %15, align 4 - %83 = load float, float* %15, align 4 - %84 = load [3 x float]*, [3 x float]** %6, align 8 - %85 = load i32, i32* %7, align 4 - %86 = sext i32 %85 to i64 - %87 = getelementptr inbounds [3 x float], [3 x float]* %84, i64 %86 - %88 = load i32, i32* %8, align 4 - %89 = sext i32 %88 to i64 - %90 = getelementptr inbounds [3 x float], [3 x float]* %87, i64 0, i64 %89 - %91 = load float, float* %90, align 4 - %92 = fadd float %91, %83 - %93 = load float, float* %90, align 4 - %94 = insertelement <4 x float> zeroinitializer, float %93, i32 0 - %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 1 - %96 = insertelement <4 x float> %95, float 0.000000e+00, i32 2 - %97 = insertelement <4 x float> %96, float 0.000000e+00, i32 3 - %98 = insertelement <4 x float> zeroinitializer, float %83, i32 0 - %99 = insertelement <4 x float> %98, float 0.000000e+00, i32 1 - %100 = insertelement <4 x float> %99, float 0.000000e+00, i32 2 - %101 = insertelement <4 x float> %100, float 0.000000e+00, i32 3 - %102 = fadd <4 x float> %97, %101 - %103 = extractelement <4 x float> %102, i32 0 - store float %103, float* %90, align 4 - br label %104 - -104: ; preds = %53, %50, %47, %44, %31 - br label %105 - -105: ; preds = %104 - %106 = load i32, i32* %10, align 4 - %107 = add nsw i32 %106, 1 - store i32 %107, i32* %10, align 4 - br label %28 - -108: ; preds = %28 - br label %109 - -109: ; preds = %108 - %110 = load i32, i32* %9, align 4 - %111 = add nsw i32 %110, 1 - store i32 %111, i32* %9, align 4 - br label %24 - -112: ; preds = %24 - br label %113 - -113: ; preds = %112 - %114 = load i32, i32* %8, align 4 - %115 = add nsw i32 %114, 1 - store i32 %115, i32* %8, align 4 - br label %20 - -116: ; preds = %20 - br label %117 - -117: ; preds = %116 - %118 = load i32, i32* %7, align 4 - %119 = add nsw i32 %118, 1 - store i32 %119, i32* %7, align 4 - br label %16 - -120: ; preds = %16 - ret void - %1 = alloca i32, align 4 - %2 = alloca [2 x [2 x float]], align 16 - %3 = alloca [2 x [2 x float]], align 16 - %4 = alloca [3 x [3 x float]], align 16 - %5 = alloca i32, align 4 - %6 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - %7 = bitcast [2 x [2 x float]]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %7, i8* align 16 bitcast ([2 x [2 x float]]* @__const.main.mat_in to i8*), i64 16, i1 false) - %8 = bitcast [2 x [2 x float]]* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %8, i8* align 16 bitcast ([2 x [2 x float]]* @__const.main.f_in to i8*), i64 16, i1 false) - %9 = bitcast [3 x [3 x float]]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %9, i8 0, i64 36, i1 false) - %10 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %2, i64 0, i64 0 - %11 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %3, i64 0, i64 0 - %12 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %4, i64 0, i64 0 - call void @convolution([2 x float]* %10, [2 x float]* %11, [3 x float]* %12) - store i32 0, i32* %5, align 4 - br label %13 - -13: ; preds = %34, %0 - %14 = load i32, i32* %5, align 4 - %15 = icmp slt i32 %14, 3 - br i1 %15, label %16, label %37 - -16: ; preds = %13 - store i32 0, i32* %6, align 4 - br label %17 - -17: ; preds = %30, %16 - %18 = load i32, i32* %6, align 4 - %19 = icmp slt i32 %18, 3 - br i1 %19, label %20, label %33 - -20: ; preds = %17 - %21 = load i32, i32* %5, align 4 - %22 = sext i32 %21 to i64 - %23 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %4, i64 0, i64 %22 - %24 = load i32, i32* %6, align 4 - %25 = sext i32 %24 to i64 - %26 = getelementptr inbounds [3 x float], [3 x float]* %23, i64 0, i64 %25 - %27 = load float, float* %26, align 4 - %28 = fpext float %27 to double - %29 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %28) - br label %30 - -30: ; preds = %20 - %31 = load i32, i32* %6, align 4 - %32 = add nsw i32 %31, 1 - store i32 %32, i32* %6, align 4 - br label %17 - -33: ; preds = %17 - br label %34 - -34: ; preds = %33 - %35 = load i32, i32* %5, align 4 - %36 = add nsw i32 %35, 1 - store i32 %36, i32* %5, align 4 - br label %13 - -37: ; preds = %13 - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("no-array-name1"), Symbol("0,-1,"), Get([0, 1]), Symbol("no-array-name2"), Symbol("0,-2,"), Get([3, 4]), Mul([2, 5]), Num(0), Num(0), Num(0), Vec([6, 7, 8, 9])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name1"), Symbol("0,-1,"), Get([0, 1]), Num(0), Num(0), Num(0), LitVec([2, 3, 4, 5]), Symbol("no-array-name2"), Symbol("0,-2,"), Get([7, 8]), Num(0), Num(0), Num(0), LitVec([9, 10, 11, 12]), VecMul([6, 13])] -RecExpr { nodes: [Symbol("no-array-name3"), Symbol("0,-3,"), Get([0, 1]), Symbol("no-temp-name1"), Add([2, 3]), Num(0), Num(0), Num(0), Vec([4, 5, 6, 7])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name3"), Symbol("0,-3,"), Get([0, 1]), Num(0), Num(0), Num(0), LitVec([2, 3, 4, 5]), Symbol("no-temp-name1"), Num(0), Num(0), Num(0), Vec([7, 8, 9, 10]), VecAdd([6, 11])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-conv.expect b/src/dios-egraphs/Diospyros/llvm-tests/2d-conv.expect deleted file mode 100644 index 93130f82..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/2d-conv.expect +++ /dev/null @@ -1,227 +0,0 @@ - %4 = alloca float*, align 8 - %5 = alloca float*, align 8 - %6 = alloca float*, align 8 - %7 = alloca i32, align 4 - %8 = alloca i32, align 4 - %9 = alloca i32, align 4 - %10 = alloca i32, align 4 - %11 = alloca i32, align 4 - %12 = alloca i32, align 4 - %13 = alloca i32, align 4 - %14 = alloca i32, align 4 - %15 = alloca float, align 4 - store float* %0, float** %4, align 8 - store float* %1, float** %5, align 8 - store float* %2, float** %6, align 8 - store i32 0, i32* %7, align 4 - br label %16 - -16: ; preds = %117, %3 - %17 = load i32, i32* %7, align 4 - %18 = icmp slt i32 %17, 3 - br i1 %18, label %19, label %120 - -19: ; preds = %16 - store i32 0, i32* %8, align 4 - br label %20 - -20: ; preds = %113, %19 - %21 = load i32, i32* %8, align 4 - %22 = icmp slt i32 %21, 3 - br i1 %22, label %23, label %116 - -23: ; preds = %20 - store i32 0, i32* %9, align 4 - br label %24 - -24: ; preds = %109, %23 - %25 = load i32, i32* %9, align 4 - %26 = icmp slt i32 %25, 2 - br i1 %26, label %27, label %112 - -27: ; preds = %24 - store i32 0, i32* %10, align 4 - br label %28 - -28: ; preds = %105, %27 - %29 = load i32, i32* %10, align 4 - %30 = icmp slt i32 %29, 2 - br i1 %30, label %31, label %108 - -31: ; preds = %28 - %32 = load i32, i32* %9, align 4 - %33 = sub nsw i32 1, %32 - store i32 %33, i32* %11, align 4 - %34 = load i32, i32* %10, align 4 - %35 = sub nsw i32 1, %34 - store i32 %35, i32* %12, align 4 - %36 = load i32, i32* %7, align 4 - %37 = load i32, i32* %11, align 4 - %38 = sub nsw i32 %36, %37 - store i32 %38, i32* %13, align 4 - %39 = load i32, i32* %8, align 4 - %40 = load i32, i32* %12, align 4 - %41 = sub nsw i32 %39, %40 - store i32 %41, i32* %14, align 4 - %42 = load i32, i32* %13, align 4 - %43 = icmp sge i32 %42, 0 - br i1 %43, label %44, label %104 - -44: ; preds = %31 - %45 = load i32, i32* %13, align 4 - %46 = icmp slt i32 %45, 2 - br i1 %46, label %47, label %104 - -47: ; preds = %44 - %48 = load i32, i32* %14, align 4 - %49 = icmp sge i32 %48, 0 - br i1 %49, label %50, label %104 - -50: ; preds = %47 - %51 = load i32, i32* %14, align 4 - %52 = icmp slt i32 %51, 2 - br i1 %52, label %53, label %104 - -53: ; preds = %50 - %54 = load float*, float** %4, align 8 - %55 = load i32, i32* %13, align 4 - %56 = mul nsw i32 %55, 2 - %57 = load i32, i32* %14, align 4 - %58 = add nsw i32 %56, %57 - %59 = sext i32 %58 to i64 - %60 = getelementptr inbounds float, float* %54, i64 %59 - %61 = load float, float* %60, align 4 - %62 = load float*, float** %5, align 8 - %63 = load i32, i32* %11, align 4 - %64 = mul nsw i32 %63, 2 - %65 = load i32, i32* %12, align 4 - %66 = add nsw i32 %64, %65 - %67 = sext i32 %66 to i64 - %68 = getelementptr inbounds float, float* %62, i64 %67 - %69 = load float, float* %68, align 4 - %70 = fmul float %61, %69 - %71 = load float, float* %60, align 4 - %72 = insertelement <4 x float> zeroinitializer, float %71, i32 0 - %73 = insertelement <4 x float> %72, float 0.000000e+00, i32 1 - %74 = insertelement <4 x float> %73, float 0.000000e+00, i32 2 - %75 = insertelement <4 x float> %74, float 0.000000e+00, i32 3 - %76 = load float, float* %68, align 4 - %77 = insertelement <4 x float> zeroinitializer, float %76, i32 0 - %78 = insertelement <4 x float> %77, float 0.000000e+00, i32 1 - %79 = insertelement <4 x float> %78, float 0.000000e+00, i32 2 - %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 3 - %81 = fmul <4 x float> %75, %80 - %82 = extractelement <4 x float> %81, i32 0 - store float %82, float* %15, align 4 - %83 = load float, float* %15, align 4 - %84 = load float*, float** %6, align 8 - %85 = load i32, i32* %7, align 4 - %86 = mul nsw i32 %85, 3 - %87 = load i32, i32* %8, align 4 - %88 = add nsw i32 %86, %87 - %89 = sext i32 %88 to i64 - %90 = getelementptr inbounds float, float* %84, i64 %89 - %91 = load float, float* %90, align 4 - %92 = fadd float %91, %83 - %93 = load float, float* %90, align 4 - %94 = insertelement <4 x float> zeroinitializer, float %93, i32 0 - %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 1 - %96 = insertelement <4 x float> %95, float 0.000000e+00, i32 2 - %97 = insertelement <4 x float> %96, float 0.000000e+00, i32 3 - %98 = insertelement <4 x float> zeroinitializer, float %83, i32 0 - %99 = insertelement <4 x float> %98, float 0.000000e+00, i32 1 - %100 = insertelement <4 x float> %99, float 0.000000e+00, i32 2 - %101 = insertelement <4 x float> %100, float 0.000000e+00, i32 3 - %102 = fadd <4 x float> %97, %101 - %103 = extractelement <4 x float> %102, i32 0 - store float %103, float* %90, align 4 - br label %104 - -104: ; preds = %53, %50, %47, %44, %31 - br label %105 - -105: ; preds = %104 - %106 = load i32, i32* %10, align 4 - %107 = add nsw i32 %106, 1 - store i32 %107, i32* %10, align 4 - br label %28 - -108: ; preds = %28 - br label %109 - -109: ; preds = %108 - %110 = load i32, i32* %9, align 4 - %111 = add nsw i32 %110, 1 - store i32 %111, i32* %9, align 4 - br label %24 - -112: ; preds = %24 - br label %113 - -113: ; preds = %112 - %114 = load i32, i32* %8, align 4 - %115 = add nsw i32 %114, 1 - store i32 %115, i32* %8, align 4 - br label %20 - -116: ; preds = %20 - br label %117 - -117: ; preds = %116 - %118 = load i32, i32* %7, align 4 - %119 = add nsw i32 %118, 1 - store i32 %119, i32* %7, align 4 - br label %16 - -120: ; preds = %16 - ret void - %1 = alloca i32, align 4 - %2 = alloca [4 x float], align 16 - %3 = alloca [4 x float], align 16 - %4 = alloca [9 x float], align 16 - %5 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - %6 = bitcast [4 x float]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %6, i8* align 16 bitcast ([4 x float]* @__const.main.mat_in to i8*), i64 16, i1 false) - %7 = bitcast [4 x float]* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %7, i8* align 16 bitcast ([4 x float]* @__const.main.f_in to i8*), i64 16, i1 false) - %8 = bitcast [9 x float]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %8, i8 0, i64 36, i1 false) - %9 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %10 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - %11 = getelementptr inbounds [9 x float], [9 x float]* %4, i64 0, i64 0 - call void @convolution(float* %9, float* %10, float* %11) - store i32 0, i32* %5, align 4 - br label %12 - -12: ; preds = %22, %0 - %13 = load i32, i32* %5, align 4 - %14 = icmp slt i32 %13, 9 - br i1 %14, label %15, label %25 - -15: ; preds = %12 - %16 = load i32, i32* %5, align 4 - %17 = sext i32 %16 to i64 - %18 = getelementptr inbounds [9 x float], [9 x float]* %4, i64 0, i64 %17 - %19 = load float, float* %18, align 4 - %20 = fpext float %19 to double - %21 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %20) - br label %22 - -22: ; preds = %15 - %23 = load i32, i32* %5, align 4 - %24 = add nsw i32 %23, 1 - store i32 %24, i32* %5, align 4 - br label %12 - -25: ; preds = %12 - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("no-array-name1"), Symbol("-1,"), Get([0, 1]), Symbol("no-array-name2"), Symbol("-2,"), Get([3, 4]), Mul([2, 5]), Num(0), Num(0), Num(0), Vec([6, 7, 8, 9])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name1"), Symbol("-1,"), Get([0, 1]), Num(0), Num(0), Num(0), LitVec([2, 3, 4, 5]), Symbol("no-array-name2"), Symbol("-2,"), Get([7, 8]), Num(0), Num(0), Num(0), LitVec([9, 10, 11, 12]), VecMul([6, 13])] -RecExpr { nodes: [Symbol("no-array-name3"), Symbol("-3,"), Get([0, 1]), Symbol("no-temp-name1"), Add([2, 3]), Num(0), Num(0), Num(0), Vec([4, 5, 6, 7])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name3"), Symbol("-3,"), Get([0, 1]), Num(0), Num(0), Num(0), LitVec([2, 3, 4, 5]), Symbol("no-temp-name1"), Num(0), Num(0), Num(0), Vec([7, 8, 9, 10]), VecAdd([6, 11])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-matrix-multiply.expect b/src/dios-egraphs/Diospyros/llvm-tests/2d-matrix-multiply.expect deleted file mode 100644 index 45c63123..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/2d-matrix-multiply.expect +++ /dev/null @@ -1,152 +0,0 @@ - %4 = alloca [2 x float]*, align 8 - %5 = alloca [2 x float]*, align 8 - %6 = alloca [2 x float]*, align 8 - %7 = alloca i32, align 4 - %8 = alloca i32, align 4 - %9 = alloca float, align 4 - %10 = alloca i32, align 4 - store [2 x float]* %0, [2 x float]** %4, align 8 - store [2 x float]* %1, [2 x float]** %5, align 8 - store [2 x float]* %2, [2 x float]** %6, align 8 - store i32 0, i32* %7, align 4 - br label %11 - -11: ; preds = %74, %3 - %12 = load i32, i32* %7, align 4 - %13 = icmp slt i32 %12, 2 - br i1 %13, label %14, label %77 - -14: ; preds = %11 - store i32 0, i32* %8, align 4 - br label %15 - -15: ; preds = %70, %14 - %16 = load i32, i32* %8, align 4 - %17 = icmp slt i32 %16, 2 - br i1 %17, label %18, label %73 - -18: ; preds = %15 - store float 0.000000e+00, float* %9, align 4 - store i32 0, i32* %10, align 4 - br label %19 - -19: ; preds = %58, %18 - %20 = load i32, i32* %10, align 4 - %21 = icmp slt i32 %20, 2 - br i1 %21, label %22, label %61 - -22: ; preds = %19 - %23 = load [2 x float]*, [2 x float]** %4, align 8 - %24 = load i32, i32* %7, align 4 - %25 = sext i32 %24 to i64 - %26 = getelementptr inbounds [2 x float], [2 x float]* %23, i64 %25 - %27 = load i32, i32* %10, align 4 - %28 = sext i32 %27 to i64 - %29 = getelementptr inbounds [2 x float], [2 x float]* %26, i64 0, i64 %28 - %30 = load float, float* %29, align 4 - %31 = load [2 x float]*, [2 x float]** %5, align 8 - %32 = load i32, i32* %10, align 4 - %33 = sext i32 %32 to i64 - %34 = getelementptr inbounds [2 x float], [2 x float]* %31, i64 %33 - %35 = load i32, i32* %8, align 4 - %36 = sext i32 %35 to i64 - %37 = getelementptr inbounds [2 x float], [2 x float]* %34, i64 0, i64 %36 - %38 = load float, float* %37, align 4 - %39 = fmul float %30, %38 - %40 = load float, float* %9, align 4 - %41 = fadd float %40, %39 - %42 = insertelement <4 x float> zeroinitializer, float %40, i32 0 - %43 = insertelement <4 x float> %42, float 0.000000e+00, i32 1 - %44 = insertelement <4 x float> %43, float 0.000000e+00, i32 2 - %45 = insertelement <4 x float> %44, float 0.000000e+00, i32 3 - %46 = load float, float* %29, align 4 - %47 = insertelement <4 x float> zeroinitializer, float %46, i32 0 - %48 = insertelement <4 x float> %47, float 0.000000e+00, i32 1 - %49 = insertelement <4 x float> %48, float 0.000000e+00, i32 2 - %50 = insertelement <4 x float> %49, float 0.000000e+00, i32 3 - %51 = load float, float* %37, align 4 - %52 = insertelement <4 x float> zeroinitializer, float %51, i32 0 - %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 1 - %54 = insertelement <4 x float> %53, float 0.000000e+00, i32 2 - %55 = insertelement <4 x float> %54, float 0.000000e+00, i32 3 - %56 = call <4 x float> @llvm.fma.f32(<4 x float> %50, <4 x float> %55, <4 x float> %45) - %57 = extractelement <4 x float> %56, i32 0 - store float %57, float* %9, align 4 - br label %58 - -58: ; preds = %22 - %59 = load i32, i32* %10, align 4 - %60 = add nsw i32 %59, 1 - store i32 %60, i32* %10, align 4 - br label %19 - -61: ; preds = %19 - %62 = load float, float* %9, align 4 - %63 = load [2 x float]*, [2 x float]** %6, align 8 - %64 = load i32, i32* %7, align 4 - %65 = sext i32 %64 to i64 - %66 = getelementptr inbounds [2 x float], [2 x float]* %63, i64 %65 - %67 = load i32, i32* %8, align 4 - %68 = sext i32 %67 to i64 - %69 = getelementptr inbounds [2 x float], [2 x float]* %66, i64 0, i64 %68 - store float %62, float* %69, align 4 - br label %70 - -70: ; preds = %61 - %71 = load i32, i32* %8, align 4 - %72 = add nsw i32 %71, 1 - store i32 %72, i32* %8, align 4 - br label %15 - -73: ; preds = %15 - br label %74 - -74: ; preds = %73 - %75 = load i32, i32* %7, align 4 - %76 = add nsw i32 %75, 1 - store i32 %76, i32* %7, align 4 - br label %11 - -77: ; preds = %11 - ret void - %1 = alloca i32, align 4 - %2 = alloca [2 x [2 x float]], align 16 - %3 = alloca [2 x [2 x float]], align 16 - %4 = alloca [2 x [2 x float]], align 16 - store i32 0, i32* %1, align 4 - %5 = bitcast [2 x [2 x float]]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %5, i8* align 16 bitcast ([2 x [2 x float]]* @__const.main.a_in to i8*), i64 16, i1 false) - %6 = bitcast [2 x [2 x float]]* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %6, i8* align 16 bitcast ([2 x [2 x float]]* @__const.main.b_in to i8*), i64 16, i1 false) - %7 = bitcast [2 x [2 x float]]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %7, i8 0, i64 16, i1 false) - %8 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %2, i64 0, i64 0 - %9 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %3, i64 0, i64 0 - %10 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %4, i64 0, i64 0 - call void @matrix_multiply([2 x float]* %8, [2 x float]* %9, [2 x float]* %10) - %11 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %4, i64 0, i64 0 - %12 = getelementptr inbounds [2 x float], [2 x float]* %11, i64 0, i64 0 - %13 = load float, float* %12, align 16 - %14 = fpext float %13 to double - %15 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %14) - %16 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %4, i64 0, i64 0 - %17 = getelementptr inbounds [2 x float], [2 x float]* %16, i64 0, i64 1 - %18 = load float, float* %17, align 4 - %19 = fpext float %18 to double - %20 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %19) - %21 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %4, i64 0, i64 1 - %22 = getelementptr inbounds [2 x float], [2 x float]* %21, i64 0, i64 0 - %23 = load float, float* %22, align 8 - %24 = fpext float %23 to double - %25 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %24) - %26 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %4, i64 0, i64 1 - %27 = getelementptr inbounds [2 x float], [2 x float]* %26, i64 0, i64 1 - %28 = load float, float* %27, align 4 - %29 = fpext float %28 to double - %30 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %29) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("no-array-name1"), Symbol("0,-1,"), Get([0, 1]), Symbol("no-array-name2"), Symbol("0,-2,"), Get([3, 4]), Mul([2, 5]), Symbol("no-temp-name1"), Add([7, 6]), Num(0), Num(0), Num(0), Vec([8, 9, 10, 11])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-temp-name1"), Num(0), Num(0), Num(0), Vec([0, 1, 2, 3]), Symbol("no-array-name1"), Symbol("0,-1,"), Get([5, 6]), Num(0), Num(0), Num(0), LitVec([7, 8, 9, 10]), Symbol("no-array-name2"), Symbol("0,-2,"), Get([12, 13]), Num(0), Num(0), Num(0), LitVec([14, 15, 16, 17]), VecMAC([4, 11, 18])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d.c b/src/dios-egraphs/Diospyros/llvm-tests/2d.c index a022be0f..a794d75a 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/2d.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/2d.c @@ -1,16 +1,24 @@ #include #include +#define SIZE 4 -float a_in[4][4] = { - {1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}; -float b_in[] = {5, 6, 7, 8}; - -int main(int argc, char **argv) { - float c_out[4]; +void sum(float a_in[SIZE][SIZE], float b_in[SIZE], float c_out[SIZE]) { c_out[0] = a_in[0][3] + b_in[0]; c_out[1] = a_in[1][3] + b_in[1]; c_out[2] = a_in[2][3] + b_in[2]; c_out[3] = a_in[3][3] + a_in[0][1]; +} + +int main(int argc, char **argv) { + float a_in[SIZE][SIZE] = { + {1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 9); + assert(c_out[1] == 14); + assert(c_out[2] == 19); + assert(c_out[3] == 18); printf("first: %f\n", c_out[0]); printf("second: %f\n", c_out[1]); printf("third: %f\n", c_out[2]); diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d.expect b/src/dios-egraphs/Diospyros/llvm-tests/2d.expect deleted file mode 100644 index e3ddf47d..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/2d.expect +++ /dev/null @@ -1,70 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 0, i64 3), align 4 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %11 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 1, i64 3), align 4 - %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %13 = fadd float %11, %12 - %14 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %15 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 2, i64 3), align 4 - %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %17 = fadd float %15, %16 - %18 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %19 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 3, i64 3), align 4 - %20 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 0, i64 1), align 4 - %21 = fadd float %19, %20 - %22 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %23 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 0, i64 3), align 4 - %24 = insertelement <4 x float> zeroinitializer, float %23, i32 0 - %25 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 1, i64 3), align 4 - %26 = insertelement <4 x float> %24, float %25, i32 1 - %27 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 2, i64 3), align 4 - %28 = insertelement <4 x float> %26, float %27, i32 2 - %29 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 3, i64 3), align 4 - %30 = insertelement <4 x float> %28, float %29, i32 3 - %31 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 - %33 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %34 = insertelement <4 x float> %32, float %33, i32 1 - %35 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %36 = insertelement <4 x float> %34, float %35, i32 2 - %37 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 0, i64 1), align 4 - %38 = insertelement <4 x float> %36, float %37, i32 3 - %39 = fadd <4 x float> %30, %38 - %40 = extractelement <4 x float> %39, i32 0 - store float %40, float* %10, align 16 - %41 = extractelement <4 x float> %39, i32 1 - store float %41, float* %14, align 4 - %42 = extractelement <4 x float> %39, i32 2 - store float %42, float* %18, align 8 - %43 = extractelement <4 x float> %39, i32 3 - store float %43, float* %22, align 4 - %44 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %45 = load float, float* %44, align 16 - %46 = fpext float %45 to double - %47 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %46) - %48 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %49 = load float, float* %48, align 4 - %50 = fpext float %49 to double - %51 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %50) - %52 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %53 = load float, float* %52, align 8 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %54) - %56 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %57 = load float, float* %56, align 4 - %58 = fpext float %57 to double - %59 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %58) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,3,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("a_in"), Symbol("0,1,3,"), Get([7, 8]), Symbol("b_in"), Symbol("0,1,"), Get([10, 11]), Add([9, 12]), Symbol("a_in"), Symbol("0,2,3,"), Get([14, 15]), Symbol("b_in"), Symbol("0,2,"), Get([17, 18]), Add([16, 19]), Symbol("a_in"), Symbol("0,3,3,"), Get([21, 22]), Symbol("a_in"), Symbol("0,0,1,"), Get([24, 25]), Add([23, 26]), Vec([6, 13, 20, 27])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,3,"), Get([0, 1]), Symbol("a_in"), Symbol("0,1,3,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,3,"), Get([6, 7]), Symbol("a_in"), Symbol("0,3,3,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("a_in"), Symbol("0,0,1,"), Get([22, 23]), Vec([15, 18, 21, 24]), VecAdd([12, 25])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d_new.c b/src/dios-egraphs/Diospyros/llvm-tests/2d_new.c deleted file mode 100644 index a794d75a..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/2d_new.c +++ /dev/null @@ -1,28 +0,0 @@ -#include -#include -#define SIZE 4 - -void sum(float a_in[SIZE][SIZE], float b_in[SIZE], float c_out[SIZE]) { - c_out[0] = a_in[0][3] + b_in[0]; - c_out[1] = a_in[1][3] + b_in[1]; - c_out[2] = a_in[2][3] + b_in[2]; - c_out[3] = a_in[3][3] + a_in[0][1]; -} - -int main(int argc, char **argv) { - float a_in[SIZE][SIZE] = { - {1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}; - float b_in[SIZE] = {5, 6, 7, 8}; - float c_out[SIZE]; - sum(a_in, b_in, c_out); - assert(c_out[0] == 9); - assert(c_out[1] == 14); - assert(c_out[2] == 19); - assert(c_out[3] == 18); - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - // expected: 9, 14, 19, 18 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/add.c b/src/dios-egraphs/Diospyros/llvm-tests/add.c index 54b1ef0b..ebbcdf35 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/add.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/add.c @@ -1,14 +1,23 @@ +#include #include +#define SIZE 4 -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {5, 6, 7, 8}; - -int main(int argc, char **argv) { - float c_out[4]; +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { c_out[0] = a_in[0] + b_in[0]; c_out[1] = a_in[1] + b_in[1]; c_out[2] = a_in[2] + b_in[2]; c_out[3] = a_in[3] + b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 6); + assert(c_out[1] == 8); + assert(c_out[2] == 10); + assert(c_out[3] == 12); printf("first: %f\n", c_out[0]); printf("second: %f\n", c_out[1]); printf("third: %f\n", c_out[2]); diff --git a/src/dios-egraphs/Diospyros/llvm-tests/add.expect b/src/dios-egraphs/Diospyros/llvm-tests/add.expect deleted file mode 100644 index 9fd209fb..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/add.expect +++ /dev/null @@ -1,70 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %13 = fadd float %11, %12 - %14 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %15 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %17 = fadd float %15, %16 - %18 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %19 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %21 = fadd float %19, %20 - %22 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %23 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %24 = insertelement <4 x float> zeroinitializer, float %23, i32 0 - %25 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %26 = insertelement <4 x float> %24, float %25, i32 1 - %27 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %28 = insertelement <4 x float> %26, float %27, i32 2 - %29 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %30 = insertelement <4 x float> %28, float %29, i32 3 - %31 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 - %33 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %34 = insertelement <4 x float> %32, float %33, i32 1 - %35 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %36 = insertelement <4 x float> %34, float %35, i32 2 - %37 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %38 = insertelement <4 x float> %36, float %37, i32 3 - %39 = fadd <4 x float> %30, %38 - %40 = extractelement <4 x float> %39, i32 0 - store float %40, float* %10, align 16 - %41 = extractelement <4 x float> %39, i32 1 - store float %41, float* %14, align 4 - %42 = extractelement <4 x float> %39, i32 2 - store float %42, float* %18, align 8 - %43 = extractelement <4 x float> %39, i32 3 - store float %43, float* %22, align 4 - %44 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %45 = load float, float* %44, align 16 - %46 = fpext float %45 to double - %47 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %46) - %48 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %49 = load float, float* %48, align 4 - %50 = fpext float %49 to double - %51 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %50) - %52 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %53 = load float, float* %52, align 8 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %54) - %56 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %57 = load float, float* %56, align 4 - %58 = fpext float %57 to double - %59 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %58) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("a_in"), Symbol("0,1,"), Get([7, 8]), Symbol("b_in"), Symbol("0,1,"), Get([10, 11]), Add([9, 12]), Symbol("a_in"), Symbol("0,2,"), Get([14, 15]), Symbol("b_in"), Symbol("0,2,"), Get([17, 18]), Add([16, 19]), Symbol("a_in"), Symbol("0,3,"), Get([21, 22]), Symbol("b_in"), Symbol("0,3,"), Get([24, 25]), Add([23, 26]), Vec([6, 13, 20, 27])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,1,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,3,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("b_in"), Symbol("0,3,"), Get([22, 23]), LitVec([15, 18, 21, 24]), VecAdd([12, 25])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/add_mult.c b/src/dios-egraphs/Diospyros/llvm-tests/add_mult.c index e2c07b00..732ba987 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/add_mult.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/add_mult.c @@ -1,18 +1,27 @@ +#include #include +#define SIZE 4 -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {2, 3, 4, 5}; +void add_mult(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] + b_in[0]; + c_out[1] = a_in[1] * b_in[1]; + c_out[2] = a_in[2] + b_in[2]; + c_out[3] = a_in[3] * b_in[3]; +} int main(int argc, char **argv) { - float d_out[4]; - d_out[0] = a_in[0] + b_in[0]; - d_out[1] = a_in[1] * b_in[1]; - d_out[2] = a_in[2] + b_in[2]; - d_out[3] = a_in[3] * b_in[3]; - printf("first: %f\n", d_out[0]); - printf("second: %f\n", d_out[1]); - printf("third: %f\n", d_out[2]); - printf("fourth: %f\n", d_out[3]); + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {2, 3, 4, 5}; + float c_out[SIZE]; + add_mult(a_in, b_in, c_out); + assert(c_out[0] == 3); + assert(c_out[1] == 6); + assert(c_out[2] == 7); + assert(c_out[3] == 20); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); // expected:3, 6, 7, 20 return 0; } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/add_mult.expect b/src/dios-egraphs/Diospyros/llvm-tests/add_mult.expect deleted file mode 100644 index 7ab7e34b..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/add_mult.expect +++ /dev/null @@ -1,73 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %13 = fmul float %11, %12 - %14 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %15 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %17 = fadd float %15, %16 - %18 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %19 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %21 = fmul float %19, %20 - %22 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %23 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %24 = insertelement <4 x float> zeroinitializer, float %23, i32 0 - %25 = insertelement <4 x float> %24, float 0.000000e+00, i32 1 - %26 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %27 = insertelement <4 x float> %25, float %26, i32 2 - %28 = insertelement <4 x float> %27, float 0.000000e+00, i32 3 - %29 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %30 = insertelement <4 x float> , float %29, i32 1 - %31 = insertelement <4 x float> %30, float 1.000000e+00, i32 2 - %32 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %33 = insertelement <4 x float> %31, float %32, i32 3 - %34 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %35 = insertelement <4 x float> zeroinitializer, float %34, i32 0 - %36 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %37 = insertelement <4 x float> %35, float %36, i32 1 - %38 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %39 = insertelement <4 x float> %37, float %38, i32 2 - %40 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %41 = insertelement <4 x float> %39, float %40, i32 3 - %42 = call <4 x float> @llvm.fma.f32(<4 x float> %33, <4 x float> %41, <4 x float> %28) - %43 = extractelement <4 x float> %42, i32 0 - store float %43, float* %10, align 16 - %44 = extractelement <4 x float> %42, i32 1 - store float %44, float* %14, align 4 - %45 = extractelement <4 x float> %42, i32 2 - store float %45, float* %18, align 8 - %46 = extractelement <4 x float> %42, i32 3 - store float %46, float* %22, align 4 - %47 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %48 = load float, float* %47, align 16 - %49 = fpext float %48 to double - %50 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %49) - %51 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %52 = load float, float* %51, align 4 - %53 = fpext float %52 to double - %54 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %53) - %55 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %56 = load float, float* %55, align 8 - %57 = fpext float %56 to double - %58 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %57) - %59 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %60 = load float, float* %59, align 4 - %61 = fpext float %60 to double - %62 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %61) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("a_in"), Symbol("0,1,"), Get([7, 8]), Symbol("b_in"), Symbol("0,1,"), Get([10, 11]), Mul([9, 12]), Symbol("a_in"), Symbol("0,2,"), Get([14, 15]), Symbol("b_in"), Symbol("0,2,"), Get([17, 18]), Add([16, 19]), Symbol("a_in"), Symbol("0,3,"), Get([21, 22]), Symbol("b_in"), Symbol("0,3,"), Get([24, 25]), Mul([23, 26]), Vec([6, 13, 20, 27])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Num(0), Symbol("a_in"), Symbol("0,2,"), Get([4, 5]), Num(0), LitVec([2, 3, 6, 7]), Num(1), Symbol("a_in"), Symbol("0,1,"), Get([10, 11]), Num(1), Symbol("a_in"), Symbol("0,3,"), Get([14, 15]), Vec([9, 12, 13, 16]), Symbol("b_in"), Symbol("0,0,"), Get([18, 19]), Symbol("b_in"), Symbol("0,1,"), Get([21, 22]), Symbol("b_in"), Symbol("0,2,"), Get([24, 25]), Symbol("b_in"), Symbol("0,3,"), Get([27, 28]), LitVec([20, 23, 26, 29]), VecMAC([8, 17, 30])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/add_mult_new.c b/src/dios-egraphs/Diospyros/llvm-tests/add_mult_new.c deleted file mode 100644 index 732ba987..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/add_mult_new.c +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#define SIZE 4 - -void add_mult(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { - c_out[0] = a_in[0] + b_in[0]; - c_out[1] = a_in[1] * b_in[1]; - c_out[2] = a_in[2] + b_in[2]; - c_out[3] = a_in[3] * b_in[3]; -} - -int main(int argc, char **argv) { - float a_in[SIZE] = {1, 2, 3, 4}; - float b_in[SIZE] = {2, 3, 4, 5}; - float c_out[SIZE]; - add_mult(a_in, b_in, c_out); - assert(c_out[0] == 3); - assert(c_out[1] == 6); - assert(c_out[2] == 7); - assert(c_out[3] == 20); - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - // expected:3, 6, 7, 20 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/add_new.c b/src/dios-egraphs/Diospyros/llvm-tests/add_new.c deleted file mode 100644 index ebbcdf35..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/add_new.c +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#define SIZE 4 - -void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { - c_out[0] = a_in[0] + b_in[0]; - c_out[1] = a_in[1] + b_in[1]; - c_out[2] = a_in[2] + b_in[2]; - c_out[3] = a_in[3] + b_in[3]; -} - -int main(int argc, char **argv) { - float a_in[SIZE] = {1, 2, 3, 4}; - float b_in[SIZE] = {5, 6, 7, 8}; - float c_out[SIZE]; - sum(a_in, b_in, c_out); - assert(c_out[0] == 6); - assert(c_out[1] == 8); - assert(c_out[2] == 10); - assert(c_out[3] == 12); - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - // expected: 6, 8, 10, 12 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/cube-new.c b/src/dios-egraphs/Diospyros/llvm-tests/cube-new.c deleted file mode 100644 index 38de21a5..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/cube-new.c +++ /dev/null @@ -1,34 +0,0 @@ -#include -#include -#include -#define SIZE 8 - -void cube(float a_in[SIZE], float b_out[SIZE]) { - for (int i = 0; i < SIZE; i++) { - b_out[i] = powf(a_in[i], 3); - } -} -int main(void) { - float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; - float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; - cube(a_in, b_out); - for (int i = 0; i < SIZE; i++) { - printf("%f\n", b_out[i]); - } - assert(b_out[0] == 729); - assert(b_out[1] == 512); - assert(b_out[2] == 343); - assert(b_out[3] == 216); - assert(b_out[4] == 125); - assert(b_out[5] == 64); - assert(b_out[6] == 27); - assert(b_out[7] == 8); - // 729.000000 - // 512.000000 - // 343.000000 - // 216.000000 - // 125.000000 - // 64.000000 - // 27.000000 - // 8.000000 -} diff --git a/src/dios-egraphs/Diospyros/llvm-tests/cube.c b/src/dios-egraphs/Diospyros/llvm-tests/cube.c index 25847462..38de21a5 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/cube.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/cube.c @@ -1,3 +1,4 @@ +#include #include #include #define SIZE 8 @@ -14,6 +15,14 @@ int main(void) { for (int i = 0; i < SIZE; i++) { printf("%f\n", b_out[i]); } + assert(b_out[0] == 729); + assert(b_out[1] == 512); + assert(b_out[2] == 343); + assert(b_out[3] == 216); + assert(b_out[4] == 125); + assert(b_out[5] == 64); + assert(b_out[6] == 27); + assert(b_out[7] == 8); // 729.000000 // 512.000000 // 343.000000 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/fft.expect b/src/dios-egraphs/Diospyros/llvm-tests/fft.expect deleted file mode 100644 index 916abfc9..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/fft.expect +++ /dev/null @@ -1,388 +0,0 @@ - %7 = alloca float*, align 8 - %8 = alloca float*, align 8 - %9 = alloca float*, align 8 - %10 = alloca float*, align 8 - %11 = alloca float*, align 8 - %12 = alloca float*, align 8 - %13 = alloca i32, align 4 - %14 = alloca i32, align 4 - %15 = alloca i32, align 4 - %16 = alloca i32, align 4 - %17 = alloca i32, align 4 - %18 = alloca float, align 4 - %19 = alloca i32, align 4 - store float* %0, float** %7, align 8 - store float* %1, float** %8, align 8 - store float* %2, float** %9, align 8 - store float* %3, float** %10, align 8 - store float* %4, float** %11, align 8 - store float* %5, float** %12, align 8 - store i32 0, i32* %13, align 4 - store i32 0, i32* %14, align 4 - store i32 0, i32* %15, align 4 - store i32 0, i32* %16, align 4 - store i32 4, i32* %17, align 4 - store float 0.000000e+00, float* %18, align 4 - store i32 0, i32* %19, align 4 - br label %20 - -20: ; preds = %42, %6 - %21 = load i32, i32* %19, align 4 - %22 = icmp slt i32 %21, 8 - br i1 %22, label %23, label %45 - -23: ; preds = %20 - %24 = load float*, float** %7, align 8 - %25 = load i32, i32* %19, align 4 - %26 = sext i32 %25 to i64 - %27 = getelementptr inbounds float, float* %24, i64 %26 - %28 = load float, float* %27, align 4 - %29 = load float*, float** %11, align 8 - %30 = load i32, i32* %19, align 4 - %31 = sext i32 %30 to i64 - %32 = getelementptr inbounds float, float* %29, i64 %31 - store float %28, float* %32, align 4 - %33 = load float*, float** %8, align 8 - %34 = load i32, i32* %19, align 4 - %35 = sext i32 %34 to i64 - %36 = getelementptr inbounds float, float* %33, i64 %35 - %37 = load float, float* %36, align 4 - %38 = load float*, float** %12, align 8 - %39 = load i32, i32* %19, align 4 - %40 = sext i32 %39 to i64 - %41 = getelementptr inbounds float, float* %38, i64 %40 - store float %37, float* %41, align 4 - br label %42 - -42: ; preds = %23 - %43 = load i32, i32* %19, align 4 - %44 = add nsw i32 %43, 1 - store i32 %44, i32* %19, align 4 - br label %20 - -45: ; preds = %20 - br label %46 - -46: ; preds = %240, %45 - %47 = load i32, i32* %17, align 4 - %48 = icmp ne i32 %47, 0 - br i1 %48, label %49, label %245 - -49: ; preds = %46 - %50 = load i32, i32* %17, align 4 - store i32 %50, i32* %14, align 4 - br label %51 - -51: ; preds = %237, %49 - %52 = load i32, i32* %14, align 4 - %53 = icmp slt i32 %52, 8 - br i1 %53, label %54, label %240 - -54: ; preds = %51 - %55 = load i32, i32* %14, align 4 - %56 = load i32, i32* %17, align 4 - %57 = or i32 %55, %56 - store i32 %57, i32* %14, align 4 - %58 = load i32, i32* %14, align 4 - %59 = load i32, i32* %17, align 4 - %60 = xor i32 %58, %59 - store i32 %60, i32* %13, align 4 - %61 = load float*, float** %11, align 8 - %62 = load i32, i32* %13, align 4 - %63 = sext i32 %62 to i64 - %64 = getelementptr inbounds float, float* %61, i64 %63 - %65 = load float, float* %64, align 4 - %66 = load float*, float** %11, align 8 - %67 = load i32, i32* %14, align 4 - %68 = sext i32 %67 to i64 - %69 = getelementptr inbounds float, float* %66, i64 %68 - %70 = load float, float* %69, align 4 - %71 = fadd float %65, %70 - %72 = load float*, float** %11, align 8 - %73 = load i32, i32* %13, align 4 - %74 = sext i32 %73 to i64 - %75 = getelementptr inbounds float, float* %72, i64 %74 - %76 = load float, float* %75, align 4 - %77 = load float*, float** %11, align 8 - %78 = load i32, i32* %14, align 4 - %79 = sext i32 %78 to i64 - %80 = getelementptr inbounds float, float* %77, i64 %79 - %81 = load float, float* %80, align 4 - %82 = fsub float %76, %81 - %83 = load float*, float** %11, align 8 - %84 = load i32, i32* %14, align 4 - %85 = sext i32 %84 to i64 - %86 = getelementptr inbounds float, float* %83, i64 %85 - %87 = load float, float* %64, align 4 - %88 = load float, float* %69, align 4 - %89 = fadd float %87, %88 - %90 = insertelement <4 x float> zeroinitializer, float %89, i32 0 - %91 = load float, float* %75, align 4 - %92 = load float, float* %80, align 4 - %93 = fsub float %91, %92 - %94 = insertelement <4 x float> %90, float %93, i32 1 - %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 2 - %96 = insertelement <4 x float> %95, float 0.000000e+00, i32 3 - %97 = extractelement <4 x float> %96, i32 0 - store float %97, float* %18, align 4 - %98 = extractelement <4 x float> %96, i32 1 - store float %98, float* %86, align 4 - %99 = load float, float* %18, align 4 - %100 = load float*, float** %11, align 8 - %101 = load i32, i32* %13, align 4 - %102 = sext i32 %101 to i64 - %103 = getelementptr inbounds float, float* %100, i64 %102 - store float %99, float* %103, align 4 - %104 = load float*, float** %12, align 8 - %105 = load i32, i32* %13, align 4 - %106 = sext i32 %105 to i64 - %107 = getelementptr inbounds float, float* %104, i64 %106 - %108 = load float, float* %107, align 4 - %109 = load float*, float** %12, align 8 - %110 = load i32, i32* %14, align 4 - %111 = sext i32 %110 to i64 - %112 = getelementptr inbounds float, float* %109, i64 %111 - %113 = load float, float* %112, align 4 - %114 = fadd float %108, %113 - %115 = load float*, float** %12, align 8 - %116 = load i32, i32* %13, align 4 - %117 = sext i32 %116 to i64 - %118 = getelementptr inbounds float, float* %115, i64 %117 - %119 = load float, float* %118, align 4 - %120 = load float*, float** %12, align 8 - %121 = load i32, i32* %14, align 4 - %122 = sext i32 %121 to i64 - %123 = getelementptr inbounds float, float* %120, i64 %122 - %124 = load float, float* %123, align 4 - %125 = fsub float %119, %124 - %126 = load float*, float** %12, align 8 - %127 = load i32, i32* %14, align 4 - %128 = sext i32 %127 to i64 - %129 = getelementptr inbounds float, float* %126, i64 %128 - %130 = load float, float* %107, align 4 - %131 = load float, float* %112, align 4 - %132 = fadd float %130, %131 - %133 = insertelement <4 x float> zeroinitializer, float %132, i32 0 - %134 = load float, float* %118, align 4 - %135 = load float, float* %123, align 4 - %136 = fsub float %134, %135 - %137 = insertelement <4 x float> %133, float %136, i32 1 - %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 2 - %139 = insertelement <4 x float> %138, float 0.000000e+00, i32 3 - %140 = extractelement <4 x float> %139, i32 0 - store float %140, float* %18, align 4 - %141 = extractelement <4 x float> %139, i32 1 - store float %141, float* %129, align 4 - %142 = load float, float* %18, align 4 - %143 = load float*, float** %12, align 8 - %144 = load i32, i32* %13, align 4 - %145 = sext i32 %144 to i64 - %146 = getelementptr inbounds float, float* %143, i64 %145 - store float %142, float* %146, align 4 - %147 = load i32, i32* %13, align 4 - %148 = load i32, i32* %15, align 4 - %149 = shl i32 %147, %148 - %150 = and i32 %149, 7 - store i32 %150, i32* %16, align 4 - %151 = load i32, i32* %16, align 4 - %152 = icmp sgt i32 %151, 0 - br i1 %152, label %153, label %237 - -153: ; preds = %54 - %154 = load float*, float** %9, align 8 - %155 = load i32, i32* %16, align 4 - %156 = sext i32 %155 to i64 - %157 = getelementptr inbounds float, float* %154, i64 %156 - %158 = load float, float* %157, align 4 - %159 = load float*, float** %11, align 8 - %160 = load i32, i32* %14, align 4 - %161 = sext i32 %160 to i64 - %162 = getelementptr inbounds float, float* %159, i64 %161 - %163 = load float, float* %162, align 4 - %164 = fmul float %158, %163 - %165 = load float*, float** %10, align 8 - %166 = load i32, i32* %16, align 4 - %167 = sext i32 %166 to i64 - %168 = getelementptr inbounds float, float* %165, i64 %167 - %169 = load float, float* %168, align 4 - %170 = load float*, float** %12, align 8 - %171 = load i32, i32* %14, align 4 - %172 = sext i32 %171 to i64 - %173 = getelementptr inbounds float, float* %170, i64 %172 - %174 = load float, float* %173, align 4 - %175 = fmul float %169, %174 - %176 = fsub float %164, %175 - %177 = load float*, float** %9, align 8 - %178 = load i32, i32* %16, align 4 - %179 = sext i32 %178 to i64 - %180 = getelementptr inbounds float, float* %177, i64 %179 - %181 = load float, float* %180, align 4 - %182 = load float*, float** %12, align 8 - %183 = load i32, i32* %14, align 4 - %184 = sext i32 %183 to i64 - %185 = getelementptr inbounds float, float* %182, i64 %184 - %186 = load float, float* %185, align 4 - %187 = fmul float %181, %186 - %188 = load float*, float** %10, align 8 - %189 = load i32, i32* %16, align 4 - %190 = sext i32 %189 to i64 - %191 = getelementptr inbounds float, float* %188, i64 %190 - %192 = load float, float* %191, align 4 - %193 = load float*, float** %11, align 8 - %194 = load i32, i32* %14, align 4 - %195 = sext i32 %194 to i64 - %196 = getelementptr inbounds float, float* %193, i64 %195 - %197 = load float, float* %196, align 4 - %198 = fmul float %192, %197 - %199 = fadd float %187, %198 - %200 = load float*, float** %12, align 8 - %201 = load i32, i32* %14, align 4 - %202 = sext i32 %201 to i64 - %203 = getelementptr inbounds float, float* %200, i64 %202 - %204 = load float, float* %180, align 4 - %205 = insertelement <4 x float> , float %204, i32 1 - %206 = insertelement <4 x float> %205, float 1.000000e+00, i32 2 - %207 = insertelement <4 x float> %206, float 1.000000e+00, i32 3 - %208 = load float, float* %185, align 4 - %209 = insertelement <4 x float> zeroinitializer, float %208, i32 1 - %210 = insertelement <4 x float> %209, float 0.000000e+00, i32 2 - %211 = insertelement <4 x float> %210, float 0.000000e+00, i32 3 - %212 = fmul <4 x float> %207, %211 - %213 = load float, float* %191, align 4 - %214 = insertelement <4 x float> , float %213, i32 1 - %215 = insertelement <4 x float> %214, float 1.000000e+00, i32 2 - %216 = insertelement <4 x float> %215, float 1.000000e+00, i32 3 - %217 = load float, float* %157, align 4 - %218 = load float, float* %162, align 4 - %219 = fmul float %217, %218 - %220 = load float, float* %168, align 4 - %221 = load float, float* %173, align 4 - %222 = fmul float %220, %221 - %223 = fsub float %219, %222 - %224 = insertelement <4 x float> zeroinitializer, float %223, i32 0 - %225 = load float, float* %196, align 4 - %226 = insertelement <4 x float> %224, float %225, i32 1 - %227 = insertelement <4 x float> %226, float 0.000000e+00, i32 2 - %228 = insertelement <4 x float> %227, float 0.000000e+00, i32 3 - %229 = call <4 x float> @llvm.fma.f32(<4 x float> %216, <4 x float> %228, <4 x float> %212) - %230 = extractelement <4 x float> %229, i32 0 - store float %230, float* %18, align 4 - %231 = extractelement <4 x float> %229, i32 1 - store float %231, float* %203, align 4 - %232 = load float, float* %18, align 4 - %233 = load float*, float** %11, align 8 - %234 = load i32, i32* %14, align 4 - %235 = sext i32 %234 to i64 - %236 = getelementptr inbounds float, float* %233, i64 %235 - store float %232, float* %236, align 4 - br label %237 - -237: ; preds = %153, %54 - %238 = load i32, i32* %14, align 4 - %239 = add nsw i32 %238, 1 - store i32 %239, i32* %14, align 4 - br label %51 - -240: ; preds = %51 - %241 = load i32, i32* %17, align 4 - %242 = ashr i32 %241, 1 - store i32 %242, i32* %17, align 4 - %243 = load i32, i32* %15, align 4 - %244 = add nsw i32 %243, 1 - store i32 %244, i32* %15, align 4 - br label %46 - -245: ; preds = %46 - ret void - %1 = alloca i32, align 4 - %2 = alloca [8 x float], align 16 - %3 = alloca [8 x float], align 16 - %4 = alloca [4 x float], align 16 - %5 = alloca [4 x float], align 16 - %6 = alloca [8 x float], align 16 - %7 = alloca [8 x float], align 16 - %8 = alloca i32, align 4 - %9 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - %10 = bitcast [8 x float]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %10, i8* align 16 bitcast ([8 x float]* @__const.main.real_in to i8*), i64 32, i1 false) - %11 = bitcast [8 x float]* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %11, i8* align 16 bitcast ([8 x float]* @__const.main.img_in to i8*), i64 32, i1 false) - %12 = bitcast [4 x float]* %4 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %12, i8* align 16 bitcast ([4 x float]* @__const.main.real_twid_in to i8*), i64 16, i1 false) - %13 = bitcast [4 x float]* %5 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %13, i8* align 16 bitcast ([4 x float]* @__const.main.img_twid_in to i8*), i64 16, i1 false) - %14 = bitcast [8 x float]* %6 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %14, i8* align 16 bitcast ([8 x float]* @__const.main.real_out to i8*), i64 32, i1 false) - %15 = bitcast [8 x float]* %7 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %15, i8* align 16 bitcast ([8 x float]* @__const.main.img_out to i8*), i64 32, i1 false) - %16 = getelementptr inbounds [8 x float], [8 x float]* %2, i64 0, i64 0 - %17 = getelementptr inbounds [8 x float], [8 x float]* %3, i64 0, i64 0 - %18 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 - %19 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 - %20 = getelementptr inbounds [8 x float], [8 x float]* %6, i64 0, i64 0 - %21 = getelementptr inbounds [8 x float], [8 x float]* %7, i64 0, i64 0 - call void @fft(float* %16, float* %17, float* %18, float* %19, float* %20, float* %21) - store i32 0, i32* %8, align 4 - br label %22 - -22: ; preds = %32, %0 - %23 = load i32, i32* %8, align 4 - %24 = icmp slt i32 %23, 8 - br i1 %24, label %25, label %35 - -25: ; preds = %22 - %26 = load i32, i32* %8, align 4 - %27 = sext i32 %26 to i64 - %28 = getelementptr inbounds [8 x float], [8 x float]* %6, i64 0, i64 %27 - %29 = load float, float* %28, align 4 - %30 = fpext float %29 to double - %31 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %30) - br label %32 - -32: ; preds = %25 - %33 = load i32, i32* %8, align 4 - %34 = add nsw i32 %33, 1 - store i32 %34, i32* %8, align 4 - br label %22 - -35: ; preds = %22 - store i32 0, i32* %9, align 4 - br label %36 - -36: ; preds = %46, %35 - %37 = load i32, i32* %9, align 4 - %38 = icmp slt i32 %37, 8 - br i1 %38, label %39, label %49 - -39: ; preds = %36 - %40 = load i32, i32* %9, align 4 - %41 = sext i32 %40 to i64 - %42 = getelementptr inbounds [8 x float], [8 x float]* %7, i64 0, i64 %41 - %43 = load float, float* %42, align 4 - %44 = fpext float %43 to double - %45 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %44) - br label %46 - -46: ; preds = %39 - %47 = load i32, i32* %9, align 4 - %48 = add nsw i32 %47, 1 - store i32 %48, i32* %9, align 4 - br label %36 - -49: ; preds = %36 - %50 = load i32, i32* %1, align 4 - ret i32 %50 ----STDERR--- -RecExpr { nodes: [Symbol("no-array-name1"), Symbol("-1,"), Get([0, 1]), Symbol("no-array-name2"), Symbol("-2,"), Get([3, 4]), Add([2, 5]), Symbol("no-array-name3"), Symbol("-3,"), Get([7, 8]), Symbol("no-array-name4"), Symbol("-4,"), Get([10, 11]), Minus([9, 12]), Num(0), Num(0), Vec([6, 13, 14, 15])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name1"), Symbol("-1,"), Get([0, 1]), Symbol("no-array-name2"), Symbol("-2,"), Get([3, 4]), Add([2, 5]), Symbol("no-array-name3"), Symbol("-3,"), Get([7, 8]), Symbol("no-array-name4"), Symbol("-4,"), Get([10, 11]), Minus([9, 12]), Num(0), Num(0), Vec([6, 13, 14, 15])] -RecExpr { nodes: [Symbol("no-array-name5"), Symbol("-5,"), Get([0, 1]), Symbol("no-array-name6"), Symbol("-6,"), Get([3, 4]), Add([2, 5]), Symbol("no-array-name7"), Symbol("-7,"), Get([7, 8]), Symbol("no-array-name8"), Symbol("-8,"), Get([10, 11]), Minus([9, 12]), Num(0), Num(0), Vec([6, 13, 14, 15])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name5"), Symbol("-5,"), Get([0, 1]), Symbol("no-array-name6"), Symbol("-6,"), Get([3, 4]), Add([2, 5]), Symbol("no-array-name7"), Symbol("-7,"), Get([7, 8]), Symbol("no-array-name8"), Symbol("-8,"), Get([10, 11]), Minus([9, 12]), Num(0), Num(0), Vec([6, 13, 14, 15])] -RecExpr { nodes: [Symbol("no-array-name9"), Symbol("-9,"), Get([0, 1]), Symbol("no-array-name10"), Symbol("-10,"), Get([3, 4]), Mul([2, 5]), Symbol("no-array-name11"), Symbol("-11,"), Get([7, 8]), Symbol("no-array-name12"), Symbol("-12,"), Get([10, 11]), Mul([9, 12]), Minus([6, 13]), Symbol("no-array-name13"), Symbol("-13,"), Get([15, 16]), Symbol("no-array-name14"), Symbol("-14,"), Get([18, 19]), Mul([17, 20]), Symbol("no-array-name15"), Symbol("-15,"), Get([22, 23]), Symbol("no-array-name16"), Symbol("-16,"), Get([25, 26]), Mul([24, 27]), Add([21, 28]), Num(0), Num(0), Vec([14, 29, 30, 31])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 6 iterations, reason: Some(Saturated) -[Num(1), Symbol("no-array-name13"), Symbol("-13,"), Get([1, 2]), Num(1), Num(1), Vec([0, 3, 4, 5]), Num(0), Symbol("no-array-name14"), Symbol("-14,"), Get([8, 9]), Num(0), Num(0), LitVec([7, 10, 11, 12]), VecMul([6, 13]), Num(1), Symbol("no-array-name15"), Symbol("-15,"), Get([16, 17]), Num(1), Num(1), Vec([15, 18, 19, 20]), Symbol("no-array-name9"), Symbol("-9,"), Get([22, 23]), Symbol("no-array-name10"), Symbol("-10,"), Get([25, 26]), Mul([24, 27]), Symbol("no-array-name11"), Symbol("-11,"), Get([29, 30]), Symbol("no-array-name12"), Symbol("-12,"), Get([32, 33]), Mul([31, 34]), Minus([28, 35]), Symbol("no-array-name16"), Symbol("-16,"), Get([37, 38]), Num(0), Num(0), Vec([36, 39, 40, 41]), VecMAC([14, 21, 42])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/five_binops.c b/src/dios-egraphs/Diospyros/llvm-tests/five_binops.c index 0d03f65f..6a33c896 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/five_binops.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/five_binops.c @@ -1,22 +1,31 @@ +#include #include +#define SIZE 4 -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {5, 6, 7, 8}; -float c_in[] = {1, 2, 3, 4}; -float d_in[] = {5, 6, 7, 8}; -float e_in[] = {1, 2, 3, 4}; - -int main(int argc, char **argv) { - // return argc + 5; - float c_out[4]; +void add5(float a_in[SIZE], float b_in[SIZE], float c_in[SIZE], + float d_in[SIZE], float e_in[SIZE], float c_out[SIZE]) { c_out[0] = a_in[0] + b_in[0] + c_in[0] + d_in[0] + e_in[0]; c_out[1] = a_in[1] + b_in[1] + c_in[1] + d_in[1] + e_in[1]; c_out[2] = a_in[2] + b_in[2] + c_in[2] + d_in[2] + e_in[2]; c_out[3] = a_in[3] + b_in[3] + c_in[3] + d_in[3] + e_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_in[SIZE] = {1, 2, 3, 4}; + float d_in[SIZE] = {5, 6, 7, 8}; + float e_in[SIZE] = {1, 2, 3, 4}; + float c_out[SIZE]; + add5(a_in, b_in, c_in, d_in, e_in, c_out); printf("first: %f\n", c_out[0]); printf("second: %f\n", c_out[1]); printf("third: %f\n", c_out[2]); printf("fourth: %f\n", c_out[3]); + assert(c_out[0] == 13); + assert(c_out[1] == 18); + assert(c_out[2] == 23); + assert(c_out[3] == 28); // expected: 13, 18, 23, 28 return 0; } diff --git a/src/dios-egraphs/Diospyros/llvm-tests/five_binops.expect b/src/dios-egraphs/Diospyros/llvm-tests/five_binops.expect deleted file mode 100644 index b76605ed..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/five_binops.expect +++ /dev/null @@ -1,121 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 0), align 16 - %11 = fadd float %9, %10 - %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @d_in, i64 0, i64 0), align 16 - %13 = fadd float %11, %12 - %14 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @e_in, i64 0, i64 0), align 16 - %15 = fadd float %13, %14 - %16 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %17 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %18 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %19 = fadd float %17, %18 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 1), align 4 - %21 = fadd float %19, %20 - %22 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @d_in, i64 0, i64 1), align 4 - %23 = fadd float %21, %22 - %24 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @e_in, i64 0, i64 1), align 4 - %25 = fadd float %23, %24 - %26 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %27 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %28 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %29 = fadd float %27, %28 - %30 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 2), align 8 - %31 = fadd float %29, %30 - %32 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @d_in, i64 0, i64 2), align 8 - %33 = fadd float %31, %32 - %34 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @e_in, i64 0, i64 2), align 8 - %35 = fadd float %33, %34 - %36 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %37 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %38 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %39 = fadd float %37, %38 - %40 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 3), align 4 - %41 = fadd float %39, %40 - %42 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @d_in, i64 0, i64 3), align 4 - %43 = fadd float %41, %42 - %44 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @e_in, i64 0, i64 3), align 4 - %45 = fadd float %43, %44 - %46 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %47 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %48 = insertelement <4 x float> zeroinitializer, float %47, i32 0 - %49 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %50 = insertelement <4 x float> %48, float %49, i32 1 - %51 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %52 = insertelement <4 x float> %50, float %51, i32 2 - %53 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %54 = insertelement <4 x float> %52, float %53, i32 3 - %55 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %56 = insertelement <4 x float> zeroinitializer, float %55, i32 0 - %57 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %58 = insertelement <4 x float> %56, float %57, i32 1 - %59 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %60 = insertelement <4 x float> %58, float %59, i32 2 - %61 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %62 = insertelement <4 x float> %60, float %61, i32 3 - %63 = fadd <4 x float> %54, %62 - %64 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 0), align 4 - %65 = insertelement <4 x float> zeroinitializer, float %64, i32 0 - %66 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 1), align 4 - %67 = insertelement <4 x float> %65, float %66, i32 1 - %68 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 2), align 4 - %69 = insertelement <4 x float> %67, float %68, i32 2 - %70 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 3), align 4 - %71 = insertelement <4 x float> %69, float %70, i32 3 - %72 = fadd <4 x float> %63, %71 - %73 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @d_in, i64 0, i64 0), align 4 - %74 = insertelement <4 x float> zeroinitializer, float %73, i32 0 - %75 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @d_in, i64 0, i64 1), align 4 - %76 = insertelement <4 x float> %74, float %75, i32 1 - %77 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @d_in, i64 0, i64 2), align 4 - %78 = insertelement <4 x float> %76, float %77, i32 2 - %79 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @d_in, i64 0, i64 3), align 4 - %80 = insertelement <4 x float> %78, float %79, i32 3 - %81 = fadd <4 x float> %72, %80 - %82 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @e_in, i64 0, i64 0), align 4 - %83 = insertelement <4 x float> zeroinitializer, float %82, i32 0 - %84 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @e_in, i64 0, i64 1), align 4 - %85 = insertelement <4 x float> %83, float %84, i32 1 - %86 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @e_in, i64 0, i64 2), align 4 - %87 = insertelement <4 x float> %85, float %86, i32 2 - %88 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @e_in, i64 0, i64 3), align 4 - %89 = insertelement <4 x float> %87, float %88, i32 3 - %90 = fadd <4 x float> %81, %89 - %91 = extractelement <4 x float> %90, i32 0 - store float %91, float* %16, align 16 - %92 = extractelement <4 x float> %90, i32 1 - store float %92, float* %26, align 4 - %93 = extractelement <4 x float> %90, i32 2 - store float %93, float* %36, align 8 - %94 = extractelement <4 x float> %90, i32 3 - store float %94, float* %46, align 4 - %95 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %96 = load float, float* %95, align 16 - %97 = fpext float %96 to double - %98 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %97) - %99 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %100 = load float, float* %99, align 4 - %101 = fpext float %100 to double - %102 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %101) - %103 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %104 = load float, float* %103, align 8 - %105 = fpext float %104 to double - %106 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %105) - %107 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %108 = load float, float* %107, align 4 - %109 = fpext float %108 to double - %110 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %109) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("c_in"), Symbol("0,0,"), Get([7, 8]), Add([6, 9]), Symbol("d_in"), Symbol("0,0,"), Get([11, 12]), Add([10, 13]), Symbol("e_in"), Symbol("0,0,"), Get([15, 16]), Add([14, 17]), Symbol("a_in"), Symbol("0,1,"), Get([19, 20]), Symbol("b_in"), Symbol("0,1,"), Get([22, 23]), Add([21, 24]), Symbol("c_in"), Symbol("0,1,"), Get([26, 27]), Add([25, 28]), Symbol("d_in"), Symbol("0,1,"), Get([30, 31]), Add([29, 32]), Symbol("e_in"), Symbol("0,1,"), Get([34, 35]), Add([33, 36]), Symbol("a_in"), Symbol("0,2,"), Get([38, 39]), Symbol("b_in"), Symbol("0,2,"), Get([41, 42]), Add([40, 43]), Symbol("c_in"), Symbol("0,2,"), Get([45, 46]), Add([44, 47]), Symbol("d_in"), Symbol("0,2,"), Get([49, 50]), Add([48, 51]), Symbol("e_in"), Symbol("0,2,"), Get([53, 54]), Add([52, 55]), Symbol("a_in"), Symbol("0,3,"), Get([57, 58]), Symbol("b_in"), Symbol("0,3,"), Get([60, 61]), Add([59, 62]), Symbol("c_in"), Symbol("0,3,"), Get([64, 65]), Add([63, 66]), Symbol("d_in"), Symbol("0,3,"), Get([68, 69]), Add([67, 70]), Symbol("e_in"), Symbol("0,3,"), Get([72, 73]), Add([71, 74]), Vec([18, 37, 56, 75])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 7 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,1,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,3,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("b_in"), Symbol("0,3,"), Get([22, 23]), LitVec([15, 18, 21, 24]), VecAdd([12, 25]), Symbol("c_in"), Symbol("0,0,"), Get([27, 28]), Symbol("c_in"), Symbol("0,1,"), Get([30, 31]), Symbol("c_in"), Symbol("0,2,"), Get([33, 34]), Symbol("c_in"), Symbol("0,3,"), Get([36, 37]), LitVec([29, 32, 35, 38]), VecAdd([26, 39]), Symbol("d_in"), Symbol("0,0,"), Get([41, 42]), Symbol("d_in"), Symbol("0,1,"), Get([44, 45]), Symbol("d_in"), Symbol("0,2,"), Get([47, 48]), Symbol("d_in"), Symbol("0,3,"), Get([50, 51]), LitVec([43, 46, 49, 52]), VecAdd([40, 53]), Symbol("e_in"), Symbol("0,0,"), Get([55, 56]), Symbol("e_in"), Symbol("0,1,"), Get([58, 59]), Symbol("e_in"), Symbol("0,2,"), Get([61, 62]), Symbol("e_in"), Symbol("0,3,"), Get([64, 65]), LitVec([57, 60, 63, 66]), VecAdd([54, 67])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/five_binops_new.c b/src/dios-egraphs/Diospyros/llvm-tests/five_binops_new.c deleted file mode 100644 index 6a33c896..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/five_binops_new.c +++ /dev/null @@ -1,31 +0,0 @@ -#include -#include -#define SIZE 4 - -void add5(float a_in[SIZE], float b_in[SIZE], float c_in[SIZE], - float d_in[SIZE], float e_in[SIZE], float c_out[SIZE]) { - c_out[0] = a_in[0] + b_in[0] + c_in[0] + d_in[0] + e_in[0]; - c_out[1] = a_in[1] + b_in[1] + c_in[1] + d_in[1] + e_in[1]; - c_out[2] = a_in[2] + b_in[2] + c_in[2] + d_in[2] + e_in[2]; - c_out[3] = a_in[3] + b_in[3] + c_in[3] + d_in[3] + e_in[3]; -} - -int main(int argc, char **argv) { - float a_in[SIZE] = {1, 2, 3, 4}; - float b_in[SIZE] = {5, 6, 7, 8}; - float c_in[SIZE] = {1, 2, 3, 4}; - float d_in[SIZE] = {5, 6, 7, 8}; - float e_in[SIZE] = {1, 2, 3, 4}; - float c_out[SIZE]; - add5(a_in, b_in, c_in, d_in, e_in, c_out); - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - assert(c_out[0] == 13); - assert(c_out[1] == 18); - assert(c_out[2] == 23); - assert(c_out[3] == 28); - // expected: 13, 18, 23, 28 - return 0; -} diff --git a/src/dios-egraphs/Diospyros/llvm-tests/if-else-new.c b/src/dios-egraphs/Diospyros/llvm-tests/if-else-new.c deleted file mode 100644 index 3cd9c502..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/if-else-new.c +++ /dev/null @@ -1,32 +0,0 @@ -#include -#include -#define SIZE 8 - -void if_else(float a_in[SIZE], float b_out[SIZE]) { - for (int i = 0; i < SIZE; i++) { - if (i < SIZE / 2) { - b_out[i] = a_in[i]; - } else { - b_out[i] = a_in[i] + 1; - } - } -} - -int main(int argc, char **argv) { - float a_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; - float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; - if_else(a_in, b_out); - for (int i = 0; i < SIZE; i++) { - printf("%f\n", b_out[i]); - } - assert(b_out[0] == 1); - assert(b_out[1] == 2); - assert(b_out[2] == 3); - assert(b_out[3] == 4); - assert(b_out[4] == 6); - assert(b_out[5] == 7); - assert(b_out[6] == 8); - assert(b_out[7] == 9); - // expected: 1, 2, 3, 4, 6, 7, 8, 9 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/if-else.c b/src/dios-egraphs/Diospyros/llvm-tests/if-else.c index 7e5858fd..3cd9c502 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/if-else.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/if-else.c @@ -1,3 +1,4 @@ +#include #include #define SIZE 8 @@ -18,6 +19,14 @@ int main(int argc, char **argv) { for (int i = 0; i < SIZE; i++) { printf("%f\n", b_out[i]); } + assert(b_out[0] == 1); + assert(b_out[1] == 2); + assert(b_out[2] == 3); + assert(b_out[3] == 4); + assert(b_out[4] == 6); + assert(b_out[5] == 7); + assert(b_out[6] == 8); + assert(b_out[7] == 9); // expected: 1, 2, 3, 4, 6, 7, 8, 9 return 0; } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mac.c b/src/dios-egraphs/Diospyros/llvm-tests/mac.c index 787b5ad0..475ba09f 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/mac.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/mac.c @@ -1,15 +1,25 @@ +#include #include +#define SIZE 4 -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {2, 3, 4, 5}; -float c_in[] = {3, 4, 5, 6}; - -int main(int argc, char **argv) { - float d_out[4]; +void mac(float a_in[SIZE], float b_in[SIZE], float c_in[SIZE], + float d_out[SIZE]) { d_out[0] = a_in[0] + (b_in[0] * c_in[0]); d_out[1] = a_in[1] + (b_in[1] * c_in[1]); d_out[2] = a_in[2] + (b_in[2] * c_in[2]); d_out[3] = a_in[3] + (b_in[3] * c_in[3]); +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {2, 3, 4, 5}; + float c_in[SIZE] = {3, 4, 5, 6}; + float d_out[SIZE]; + mac(a_in, b_in, c_in, d_out); + assert(d_out[0] == 7); + assert(d_out[1] == 14); + assert(d_out[2] == 23); + assert(d_out[3] == 34); printf("first: %f\n", d_out[0]); printf("second: %f\n", d_out[1]); printf("third: %f\n", d_out[2]); diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mac.expect b/src/dios-egraphs/Diospyros/llvm-tests/mac.expect deleted file mode 100644 index e0e798a8..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/mac.expect +++ /dev/null @@ -1,86 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 0), align 16 - %10 = fmul float %8, %9 - %11 = fadd float %7, %10 - %12 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %13 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %14 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %15 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 1), align 4 - %16 = fmul float %14, %15 - %17 = fadd float %13, %16 - %18 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %19 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %21 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 2), align 8 - %22 = fmul float %20, %21 - %23 = fadd float %19, %22 - %24 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %25 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %26 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %27 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 3), align 4 - %28 = fmul float %26, %27 - %29 = fadd float %25, %28 - %30 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %31 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 - %33 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %34 = insertelement <4 x float> %32, float %33, i32 1 - %35 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %36 = insertelement <4 x float> %34, float %35, i32 2 - %37 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %38 = insertelement <4 x float> %36, float %37, i32 3 - %39 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %40 = insertelement <4 x float> zeroinitializer, float %39, i32 0 - %41 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %42 = insertelement <4 x float> %40, float %41, i32 1 - %43 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %44 = insertelement <4 x float> %42, float %43, i32 2 - %45 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %46 = insertelement <4 x float> %44, float %45, i32 3 - %47 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 0), align 4 - %48 = insertelement <4 x float> zeroinitializer, float %47, i32 0 - %49 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 1), align 4 - %50 = insertelement <4 x float> %48, float %49, i32 1 - %51 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 2), align 4 - %52 = insertelement <4 x float> %50, float %51, i32 2 - %53 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 3), align 4 - %54 = insertelement <4 x float> %52, float %53, i32 3 - %55 = call <4 x float> @llvm.fma.f32(<4 x float> %46, <4 x float> %54, <4 x float> %38) - %56 = extractelement <4 x float> %55, i32 0 - store float %56, float* %12, align 16 - %57 = extractelement <4 x float> %55, i32 1 - store float %57, float* %18, align 4 - %58 = extractelement <4 x float> %55, i32 2 - store float %58, float* %24, align 8 - %59 = extractelement <4 x float> %55, i32 3 - store float %59, float* %30, align 4 - %60 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %61 = load float, float* %60, align 16 - %62 = fpext float %61 to double - %63 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %62) - %64 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %65 = load float, float* %64, align 4 - %66 = fpext float %65 to double - %67 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %66) - %68 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %69 = load float, float* %68, align 8 - %70 = fpext float %69 to double - %71 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %70) - %72 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %73 = load float, float* %72, align 4 - %74 = fpext float %73 to double - %75 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %74) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("b_in"), Symbol("0,0,"), Get([0, 1]), Symbol("c_in"), Symbol("0,0,"), Get([3, 4]), Mul([2, 5]), Symbol("a_in"), Symbol("0,0,"), Get([7, 8]), Add([9, 6]), Symbol("b_in"), Symbol("0,1,"), Get([11, 12]), Symbol("c_in"), Symbol("0,1,"), Get([14, 15]), Mul([13, 16]), Symbol("a_in"), Symbol("0,1,"), Get([18, 19]), Add([20, 17]), Symbol("b_in"), Symbol("0,2,"), Get([22, 23]), Symbol("c_in"), Symbol("0,2,"), Get([25, 26]), Mul([24, 27]), Symbol("a_in"), Symbol("0,2,"), Get([29, 30]), Add([31, 28]), Symbol("b_in"), Symbol("0,3,"), Get([33, 34]), Symbol("c_in"), Symbol("0,3,"), Get([36, 37]), Mul([35, 38]), Symbol("a_in"), Symbol("0,3,"), Get([40, 41]), Add([42, 39]), Vec([10, 21, 32, 43])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 7 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,1,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,3,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("b_in"), Symbol("0,3,"), Get([22, 23]), LitVec([15, 18, 21, 24]), Symbol("c_in"), Symbol("0,0,"), Get([26, 27]), Symbol("c_in"), Symbol("0,1,"), Get([29, 30]), Symbol("c_in"), Symbol("0,2,"), Get([32, 33]), Symbol("c_in"), Symbol("0,3,"), Get([35, 36]), LitVec([28, 31, 34, 37]), VecMAC([12, 25, 38])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mac_new.c b/src/dios-egraphs/Diospyros/llvm-tests/mac_new.c deleted file mode 100644 index 475ba09f..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/mac_new.c +++ /dev/null @@ -1,29 +0,0 @@ -#include -#include -#define SIZE 4 - -void mac(float a_in[SIZE], float b_in[SIZE], float c_in[SIZE], - float d_out[SIZE]) { - d_out[0] = a_in[0] + (b_in[0] * c_in[0]); - d_out[1] = a_in[1] + (b_in[1] * c_in[1]); - d_out[2] = a_in[2] + (b_in[2] * c_in[2]); - d_out[3] = a_in[3] + (b_in[3] * c_in[3]); -} - -int main(int argc, char **argv) { - float a_in[SIZE] = {1, 2, 3, 4}; - float b_in[SIZE] = {2, 3, 4, 5}; - float c_in[SIZE] = {3, 4, 5, 6}; - float d_out[SIZE]; - mac(a_in, b_in, c_in, d_out); - assert(d_out[0] == 7); - assert(d_out[1] == 14); - assert(d_out[2] == 23); - assert(d_out[3] == 34); - printf("first: %f\n", d_out[0]); - printf("second: %f\n", d_out[1]); - printf("third: %f\n", d_out[2]); - printf("fourth: %f\n", d_out[3]); - // expected: 7, 14, 23, 34 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mat_mul.c b/src/dios-egraphs/Diospyros/llvm-tests/mat_mul.c index 344fce3d..58c9208d 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/mat_mul.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/mat_mul.c @@ -1,13 +1,23 @@ +#include #include +#define SIZE 4 -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {5, 6, 7, 8}; +void mat_mul(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] * b_in[0] + a_in[1] * b_in[2]; + c_out[1] = a_in[0] * b_in[1] + a_in[1] * b_in[3]; + c_out[2] = a_in[2] * b_in[0] + a_in[3] * b_in[2]; + c_out[3] = a_in[2] * b_in[1] + a_in[3] * b_in[3]; +} int main(int argc, char **argv) { - float c_out[4] = {a_in[0] * b_in[0] + a_in[1] * b_in[2], - a_in[0] * b_in[1] + a_in[1] * b_in[3], - a_in[2] * b_in[0] + a_in[3] * b_in[2], - a_in[2] * b_in[1] + a_in[3] * b_in[3]}; + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + mat_mul(a_in, b_in, c_out); + assert(c_out[0] == 19); + assert(c_out[1] == 22); + assert(c_out[2] == 43); + assert(c_out[3] == 50); printf("first: %f\n", c_out[0]); printf("second: %f\n", c_out[1]); printf("third: %f\n", c_out[2]); diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mat_mul.expect b/src/dios-egraphs/Diospyros/llvm-tests/mat_mul.expect deleted file mode 100644 index 35745a9e..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/mat_mul.expect +++ /dev/null @@ -1,103 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %9 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %10 = fmul float %8, %9 - %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %13 = fmul float %11, %12 - %14 = fadd float %10, %13 - %15 = getelementptr inbounds float, float* %7, i64 1 - %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %17 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %18 = fmul float %16, %17 - %19 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %21 = fmul float %19, %20 - %22 = fadd float %18, %21 - %23 = getelementptr inbounds float, float* %15, i64 1 - %24 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %25 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %26 = fmul float %24, %25 - %27 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %28 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %29 = fmul float %27, %28 - %30 = fadd float %26, %29 - %31 = getelementptr inbounds float, float* %23, i64 1 - %32 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %33 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %34 = fmul float %32, %33 - %35 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %36 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %37 = fmul float %35, %36 - %38 = fadd float %34, %37 - %39 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %40 = insertelement <4 x float> zeroinitializer, float %39, i32 0 - %41 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %42 = insertelement <4 x float> %40, float %41, i32 1 - %43 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %44 = insertelement <4 x float> %42, float %43, i32 2 - %45 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %46 = insertelement <4 x float> %44, float %45, i32 3 - %47 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %48 = insertelement <4 x float> zeroinitializer, float %47, i32 0 - %49 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %50 = insertelement <4 x float> %48, float %49, i32 1 - %51 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %52 = insertelement <4 x float> %50, float %51, i32 2 - %53 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %54 = insertelement <4 x float> %52, float %53, i32 3 - %55 = fmul <4 x float> %46, %54 - %56 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %57 = insertelement <4 x float> zeroinitializer, float %56, i32 0 - %58 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %59 = insertelement <4 x float> %57, float %58, i32 1 - %60 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %61 = insertelement <4 x float> %59, float %60, i32 2 - %62 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %63 = insertelement <4 x float> %61, float %62, i32 3 - %64 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %65 = insertelement <4 x float> zeroinitializer, float %64, i32 0 - %66 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %67 = insertelement <4 x float> %65, float %66, i32 1 - %68 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %69 = insertelement <4 x float> %67, float %68, i32 2 - %70 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %71 = insertelement <4 x float> %69, float %70, i32 3 - %72 = call <4 x float> @llvm.fma.f32(<4 x float> %63, <4 x float> %71, <4 x float> %55) - %73 = extractelement <4 x float> %72, i32 0 - store float %73, float* %7, align 4 - %74 = extractelement <4 x float> %72, i32 1 - store float %74, float* %15, align 4 - %75 = extractelement <4 x float> %72, i32 2 - store float %75, float* %23, align 4 - %76 = extractelement <4 x float> %72, i32 3 - store float %76, float* %31, align 4 - %77 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %78 = load float, float* %77, align 16 - %79 = fpext float %78 to double - %80 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %79) - %81 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %82 = load float, float* %81, align 4 - %83 = fpext float %82 to double - %84 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %83) - %85 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %86 = load float, float* %85, align 8 - %87 = fpext float %86 to double - %88 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %87) - %89 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %90 = load float, float* %89, align 4 - %91 = fpext float %90 to double - %92 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %91) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Mul([2, 5]), Symbol("a_in"), Symbol("0,1,"), Get([7, 8]), Symbol("b_in"), Symbol("0,2,"), Get([10, 11]), Mul([9, 12]), Add([6, 13]), Symbol("a_in"), Symbol("0,0,"), Get([15, 16]), Symbol("b_in"), Symbol("0,1,"), Get([18, 19]), Mul([17, 20]), Symbol("a_in"), Symbol("0,1,"), Get([22, 23]), Symbol("b_in"), Symbol("0,3,"), Get([25, 26]), Mul([24, 27]), Add([21, 28]), Symbol("a_in"), Symbol("0,2,"), Get([30, 31]), Symbol("b_in"), Symbol("0,0,"), Get([33, 34]), Mul([32, 35]), Symbol("a_in"), Symbol("0,3,"), Get([37, 38]), Symbol("b_in"), Symbol("0,2,"), Get([40, 41]), Mul([39, 42]), Add([36, 43]), Symbol("a_in"), Symbol("0,2,"), Get([45, 46]), Symbol("b_in"), Symbol("0,1,"), Get([48, 49]), Mul([47, 50]), Symbol("a_in"), Symbol("0,3,"), Get([52, 53]), Symbol("b_in"), Symbol("0,3,"), Get([55, 56]), Mul([54, 57]), Add([51, 58]), Vec([14, 29, 44, 59])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 8 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,0,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,2,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,0,"), Get([19, 20]), Symbol("b_in"), Symbol("0,1,"), Get([22, 23]), LitVec([15, 18, 21, 24]), VecMul([12, 25]), Symbol("a_in"), Symbol("0,1,"), Get([27, 28]), Symbol("a_in"), Symbol("0,1,"), Get([30, 31]), Symbol("a_in"), Symbol("0,3,"), Get([33, 34]), Symbol("a_in"), Symbol("0,3,"), Get([36, 37]), LitVec([29, 32, 35, 38]), Symbol("b_in"), Symbol("0,2,"), Get([40, 41]), Symbol("b_in"), Symbol("0,3,"), Get([43, 44]), Symbol("b_in"), Symbol("0,2,"), Get([46, 47]), Symbol("b_in"), Symbol("0,3,"), Get([49, 50]), LitVec([42, 45, 48, 51]), VecMAC([26, 39, 52])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mat_mul_new.c b/src/dios-egraphs/Diospyros/llvm-tests/mat_mul_new.c deleted file mode 100644 index 58c9208d..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/mat_mul_new.c +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#define SIZE 4 - -void mat_mul(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { - c_out[0] = a_in[0] * b_in[0] + a_in[1] * b_in[2]; - c_out[1] = a_in[0] * b_in[1] + a_in[1] * b_in[3]; - c_out[2] = a_in[2] * b_in[0] + a_in[3] * b_in[2]; - c_out[3] = a_in[2] * b_in[1] + a_in[3] * b_in[3]; -} - -int main(int argc, char **argv) { - float a_in[SIZE] = {1, 2, 3, 4}; - float b_in[SIZE] = {5, 6, 7, 8}; - float c_out[SIZE]; - mat_mul(a_in, b_in, c_out); - assert(c_out[0] == 19); - assert(c_out[1] == 22); - assert(c_out[2] == 43); - assert(c_out[3] == 50); - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - // expected: 19, 22, 43, 50 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply-new.c b/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply-new.c deleted file mode 100644 index 5da28448..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply-new.c +++ /dev/null @@ -1,36 +0,0 @@ -#include -#include - -#define A_ROWS 2 -#define A_COLS 2 -#define B_COLS 2 - -void matrix_multiply(float a_in[A_ROWS * A_COLS], float b_in[A_COLS * B_COLS], - float c_out[A_ROWS * B_COLS]) { - for (int y = 0; y < A_ROWS; y++) { - for (int x = 0; x < B_COLS; x++) { - c_out[B_COLS * y + x] = 0; - for (int k = 0; k < A_COLS; k++) { - c_out[B_COLS * y + x] += - a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; - } - } - } -} - -int main(void) { - float a_in[A_ROWS * A_COLS] = {1, 2, 3, 4}; - float b_in[A_COLS * B_COLS] = {1, 2, 3, 4}; - float c_out[A_ROWS * B_COLS] = {0, 0, 0, 0}; - matrix_multiply(a_in, b_in, c_out); - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - assert(c_out[0] == 7); - assert(c_out[1] == 10); - assert(c_out[2] == 15); - assert(c_out[3] == 22); - // expected (7, 10, 15, 22) - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply.c b/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply.c index 4165ae39..5da28448 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply.c @@ -1,3 +1,4 @@ +#include #include #define A_ROWS 2 @@ -26,6 +27,10 @@ int main(void) { printf("second: %f\n", c_out[1]); printf("third: %f\n", c_out[2]); printf("fourth: %f\n", c_out[3]); + assert(c_out[0] == 7); + assert(c_out[1] == 10); + assert(c_out[2] == 15); + assert(c_out[3] == 22); // expected (7, 10, 15, 22) return 0; } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply.expect b/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply.expect deleted file mode 100644 index 8c0a7cf0..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply.expect +++ /dev/null @@ -1,153 +0,0 @@ - %4 = alloca float*, align 8 - %5 = alloca float*, align 8 - %6 = alloca float*, align 8 - %7 = alloca i32, align 4 - %8 = alloca i32, align 4 - %9 = alloca i32, align 4 - store float* %0, float** %4, align 8 - store float* %1, float** %5, align 8 - store float* %2, float** %6, align 8 - store i32 0, i32* %7, align 4 - br label %10 - -10: ; preds = %80, %3 - %11 = load i32, i32* %7, align 4 - %12 = icmp slt i32 %11, 2 - br i1 %12, label %13, label %83 - -13: ; preds = %10 - store i32 0, i32* %8, align 4 - br label %14 - -14: ; preds = %76, %13 - %15 = load i32, i32* %8, align 4 - %16 = icmp slt i32 %15, 2 - br i1 %16, label %17, label %79 - -17: ; preds = %14 - %18 = load float*, float** %6, align 8 - %19 = load i32, i32* %7, align 4 - %20 = mul nsw i32 2, %19 - %21 = load i32, i32* %8, align 4 - %22 = add nsw i32 %20, %21 - %23 = sext i32 %22 to i64 - %24 = getelementptr inbounds float, float* %18, i64 %23 - store float 0.000000e+00, float* %24, align 4 - store i32 0, i32* %9, align 4 - br label %25 - -25: ; preds = %72, %17 - %26 = load i32, i32* %9, align 4 - %27 = icmp slt i32 %26, 2 - br i1 %27, label %28, label %75 - -28: ; preds = %25 - %29 = load float*, float** %4, align 8 - %30 = load i32, i32* %7, align 4 - %31 = mul nsw i32 2, %30 - %32 = load i32, i32* %9, align 4 - %33 = add nsw i32 %31, %32 - %34 = sext i32 %33 to i64 - %35 = getelementptr inbounds float, float* %29, i64 %34 - %36 = load float, float* %35, align 4 - %37 = load float*, float** %5, align 8 - %38 = load i32, i32* %9, align 4 - %39 = mul nsw i32 2, %38 - %40 = load i32, i32* %8, align 4 - %41 = add nsw i32 %39, %40 - %42 = sext i32 %41 to i64 - %43 = getelementptr inbounds float, float* %37, i64 %42 - %44 = load float, float* %43, align 4 - %45 = fmul float %36, %44 - %46 = load float*, float** %6, align 8 - %47 = load i32, i32* %7, align 4 - %48 = mul nsw i32 2, %47 - %49 = load i32, i32* %8, align 4 - %50 = add nsw i32 %48, %49 - %51 = sext i32 %50 to i64 - %52 = getelementptr inbounds float, float* %46, i64 %51 - %53 = load float, float* %52, align 4 - %54 = fadd float %53, %45 - %55 = load float, float* %52, align 4 - %56 = insertelement <4 x float> zeroinitializer, float %55, i32 0 - %57 = insertelement <4 x float> %56, float 0.000000e+00, i32 1 - %58 = insertelement <4 x float> %57, float 0.000000e+00, i32 2 - %59 = insertelement <4 x float> %58, float 0.000000e+00, i32 3 - %60 = load float, float* %35, align 4 - %61 = insertelement <4 x float> zeroinitializer, float %60, i32 0 - %62 = insertelement <4 x float> %61, float 0.000000e+00, i32 1 - %63 = insertelement <4 x float> %62, float 0.000000e+00, i32 2 - %64 = insertelement <4 x float> %63, float 0.000000e+00, i32 3 - %65 = load float, float* %43, align 4 - %66 = insertelement <4 x float> zeroinitializer, float %65, i32 0 - %67 = insertelement <4 x float> %66, float 0.000000e+00, i32 1 - %68 = insertelement <4 x float> %67, float 0.000000e+00, i32 2 - %69 = insertelement <4 x float> %68, float 0.000000e+00, i32 3 - %70 = call <4 x float> @llvm.fma.f32(<4 x float> %64, <4 x float> %69, <4 x float> %59) - %71 = extractelement <4 x float> %70, i32 0 - store float %71, float* %52, align 4 - br label %72 - -72: ; preds = %28 - %73 = load i32, i32* %9, align 4 - %74 = add nsw i32 %73, 1 - store i32 %74, i32* %9, align 4 - br label %25 - -75: ; preds = %25 - br label %76 - -76: ; preds = %75 - %77 = load i32, i32* %8, align 4 - %78 = add nsw i32 %77, 1 - store i32 %78, i32* %8, align 4 - br label %14 - -79: ; preds = %14 - br label %80 - -80: ; preds = %79 - %81 = load i32, i32* %7, align 4 - %82 = add nsw i32 %81, 1 - store i32 %82, i32* %7, align 4 - br label %10 - -83: ; preds = %10 - ret void - %1 = alloca i32, align 4 - %2 = alloca [4 x float], align 16 - %3 = alloca [4 x float], align 16 - %4 = alloca [4 x float], align 16 - store i32 0, i32* %1, align 4 - %5 = bitcast [4 x float]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %5, i8* align 16 bitcast ([4 x float]* @__const.main.a_in to i8*), i64 16, i1 false) - %6 = bitcast [4 x float]* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %6, i8* align 16 bitcast ([4 x float]* @__const.main.b_in to i8*), i64 16, i1 false) - %7 = bitcast [4 x float]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %7, i8 0, i64 16, i1 false) - %8 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %9 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - %10 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 - call void @matrix_multiply(float* %8, float* %9, float* %10) - %11 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 - %12 = load float, float* %11, align 16 - %13 = fpext float %12 to double - %14 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %13) - %15 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 - %16 = load float, float* %15, align 4 - %17 = fpext float %16 to double - %18 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %17) - %19 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 - %20 = load float, float* %19, align 8 - %21 = fpext float %20 to double - %22 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %21) - %23 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 - %24 = load float, float* %23, align 4 - %25 = fpext float %24 to double - %26 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %25) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("no-array-name1"), Symbol("-1,"), Get([0, 1]), Symbol("no-array-name2"), Symbol("-2,"), Get([3, 4]), Mul([2, 5]), Symbol("no-array-name3"), Symbol("-3,"), Get([7, 8]), Add([9, 6]), Num(0), Num(0), Num(0), Vec([10, 11, 12, 13])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name3"), Symbol("-3,"), Get([0, 1]), Num(0), Num(0), Num(0), LitVec([2, 3, 4, 5]), Symbol("no-array-name1"), Symbol("-1,"), Get([7, 8]), Num(0), Num(0), Num(0), LitVec([9, 10, 11, 12]), Symbol("no-array-name2"), Symbol("-2,"), Get([14, 15]), Num(0), Num(0), Num(0), LitVec([16, 17, 18, 19]), VecMAC([6, 13, 20])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mixed.c b/src/dios-egraphs/Diospyros/llvm-tests/mixed.c index 3cf28606..90c97d64 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/mixed.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/mixed.c @@ -1,14 +1,23 @@ +#include #include +#define SIZE 4 -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {5, 6, 7, 8}; - -int main(int argc, char **argv) { - float c_out[4]; +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { c_out[0] = a_in[0] + 3; c_out[1] = 5 + b_in[1]; c_out[2] = 3 + b_in[2]; c_out[3] = a_in[3] + b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 4); + assert(c_out[1] == 11); + assert(c_out[2] == 10); + assert(c_out[3] == 12); printf("first: %f\n", c_out[0]); printf("second: %f\n", c_out[1]); printf("third: %f\n", c_out[2]); diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mixed.expect b/src/dios-egraphs/Diospyros/llvm-tests/mixed.expect deleted file mode 100644 index b743cb56..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/mixed.expect +++ /dev/null @@ -1,63 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = fadd float %7, 3.000000e+00 - %9 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %10 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %11 = fadd float 5.000000e+00, %10 - %12 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %13 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %14 = fadd float 3.000000e+00, %13 - %15 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %17 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %18 = fadd float %16, %17 - %19 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %21 = insertelement <4 x float> zeroinitializer, float %20, i32 0 - %22 = insertelement <4 x float> %21, float 5.000000e+00, i32 1 - %23 = insertelement <4 x float> %22, float 3.000000e+00, i32 2 - %24 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %25 = insertelement <4 x float> %23, float %24, i32 3 - %26 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %27 = insertelement <4 x float> , float %26, i32 1 - %28 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %29 = insertelement <4 x float> %27, float %28, i32 2 - %30 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %31 = insertelement <4 x float> %29, float %30, i32 3 - %32 = fadd <4 x float> %25, %31 - %33 = extractelement <4 x float> %32, i32 0 - store float %33, float* %9, align 16 - %34 = extractelement <4 x float> %32, i32 1 - store float %34, float* %12, align 4 - %35 = extractelement <4 x float> %32, i32 2 - store float %35, float* %15, align 8 - %36 = extractelement <4 x float> %32, i32 3 - store float %36, float* %19, align 4 - %37 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %38 = load float, float* %37, align 16 - %39 = fpext float %38 to double - %40 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %39) - %41 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %42 = load float, float* %41, align 4 - %43 = fpext float %42 to double - %44 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %43) - %45 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %46 = load float, float* %45, align 8 - %47 = fpext float %46 to double - %48 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %47) - %49 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %50 = load float, float* %49, align 4 - %51 = fpext float %50 to double - %52 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %51) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Num(3), Add([2, 3]), Num(5), Symbol("b_in"), Symbol("0,1,"), Get([6, 7]), Add([5, 8]), Num(3), Symbol("b_in"), Symbol("0,2,"), Get([11, 12]), Add([10, 13]), Symbol("a_in"), Symbol("0,3,"), Get([15, 16]), Symbol("b_in"), Symbol("0,3,"), Get([18, 19]), Add([17, 20]), Vec([4, 9, 14, 21])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Num(5), Num(3), Symbol("a_in"), Symbol("0,3,"), Get([5, 6]), Vec([2, 3, 4, 7]), Num(3), Symbol("b_in"), Symbol("0,1,"), Get([10, 11]), Symbol("b_in"), Symbol("0,2,"), Get([13, 14]), Symbol("b_in"), Symbol("0,3,"), Get([16, 17]), Vec([9, 12, 15, 18]), VecAdd([8, 19])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mixed_new.c b/src/dios-egraphs/Diospyros/llvm-tests/mixed_new.c deleted file mode 100644 index 90c97d64..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/mixed_new.c +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#define SIZE 4 - -void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { - c_out[0] = a_in[0] + 3; - c_out[1] = 5 + b_in[1]; - c_out[2] = 3 + b_in[2]; - c_out[3] = a_in[3] + b_in[3]; -} - -int main(int argc, char **argv) { - float a_in[SIZE] = {1, 2, 3, 4}; - float b_in[SIZE] = {5, 6, 7, 8}; - float c_out[SIZE]; - sum(a_in, b_in, c_out); - assert(c_out[0] == 4); - assert(c_out[1] == 11); - assert(c_out[2] == 10); - assert(c_out[3] == 12); - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - // expected: 4, 11, 10, 12 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mult.c b/src/dios-egraphs/Diospyros/llvm-tests/mult.c index aebe69f7..f72d5d39 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/mult.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/mult.c @@ -1,18 +1,27 @@ +#include #include +#define SIZE 4 -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {5, 6, 7, 8}; - -int main(int argc, char **argv) { - float c_out[4]; +void prod(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { c_out[0] = a_in[0] * b_in[0]; c_out[1] = a_in[1] * b_in[1]; c_out[2] = a_in[2] * b_in[2]; c_out[3] = a_in[3] * b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + prod(a_in, b_in, c_out); printf("first: %f\n", c_out[0]); printf("second: %f\n", c_out[1]); printf("third: %f\n", c_out[2]); printf("fourth: %f\n", c_out[3]); + assert(c_out[0] == 5); + assert(c_out[1] == 12); + assert(c_out[2] == 21); + assert(c_out[3] == 32); // expected: 5, 12, 21, 32 return 0; } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mult.expect b/src/dios-egraphs/Diospyros/llvm-tests/mult.expect deleted file mode 100644 index ed6d2873..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/mult.expect +++ /dev/null @@ -1,70 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fmul float %7, %8 - %10 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %13 = fmul float %11, %12 - %14 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %15 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %17 = fmul float %15, %16 - %18 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %19 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %21 = fmul float %19, %20 - %22 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %23 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %24 = insertelement <4 x float> zeroinitializer, float %23, i32 0 - %25 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %26 = insertelement <4 x float> %24, float %25, i32 1 - %27 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %28 = insertelement <4 x float> %26, float %27, i32 2 - %29 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %30 = insertelement <4 x float> %28, float %29, i32 3 - %31 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 - %33 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %34 = insertelement <4 x float> %32, float %33, i32 1 - %35 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %36 = insertelement <4 x float> %34, float %35, i32 2 - %37 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %38 = insertelement <4 x float> %36, float %37, i32 3 - %39 = fmul <4 x float> %30, %38 - %40 = extractelement <4 x float> %39, i32 0 - store float %40, float* %10, align 16 - %41 = extractelement <4 x float> %39, i32 1 - store float %41, float* %14, align 4 - %42 = extractelement <4 x float> %39, i32 2 - store float %42, float* %18, align 8 - %43 = extractelement <4 x float> %39, i32 3 - store float %43, float* %22, align 4 - %44 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %45 = load float, float* %44, align 16 - %46 = fpext float %45 to double - %47 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %46) - %48 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %49 = load float, float* %48, align 4 - %50 = fpext float %49 to double - %51 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %50) - %52 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %53 = load float, float* %52, align 8 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %54) - %56 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %57 = load float, float* %56, align 4 - %58 = fpext float %57 to double - %59 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %58) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Mul([2, 5]), Symbol("a_in"), Symbol("0,1,"), Get([7, 8]), Symbol("b_in"), Symbol("0,1,"), Get([10, 11]), Mul([9, 12]), Symbol("a_in"), Symbol("0,2,"), Get([14, 15]), Symbol("b_in"), Symbol("0,2,"), Get([17, 18]), Mul([16, 19]), Symbol("a_in"), Symbol("0,3,"), Get([21, 22]), Symbol("b_in"), Symbol("0,3,"), Get([24, 25]), Mul([23, 26]), Vec([6, 13, 20, 27])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,1,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,3,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("b_in"), Symbol("0,3,"), Get([22, 23]), LitVec([15, 18, 21, 24]), VecMul([12, 25])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mult_new.c b/src/dios-egraphs/Diospyros/llvm-tests/mult_new.c deleted file mode 100644 index f72d5d39..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/mult_new.c +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#define SIZE 4 - -void prod(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { - c_out[0] = a_in[0] * b_in[0]; - c_out[1] = a_in[1] * b_in[1]; - c_out[2] = a_in[2] * b_in[2]; - c_out[3] = a_in[3] * b_in[3]; -} - -int main(int argc, char **argv) { - float a_in[SIZE] = {1, 2, 3, 4}; - float b_in[SIZE] = {5, 6, 7, 8}; - float c_out[SIZE]; - prod(a_in, b_in, c_out); - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - assert(c_out[0] == 5); - assert(c_out[1] == 12); - assert(c_out[2] == 21); - assert(c_out[3] == 32); - // expected: 5, 12, 21, 32 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds.c b/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds.c index c474e2f2..33ac2dc3 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds.c @@ -1,19 +1,29 @@ +#include #include +#define SIZE 4 -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {5, 6, 7, 8}; -float c_in[] = {9, 10, 11, 12}; - -int main(int argc, char **argv) { - float d_out[4]; +void madd(float a_in[SIZE], float b_in[SIZE], float c_in[SIZE], + float d_out[SIZE]) { d_out[0] = a_in[0] + b_in[0] + c_in[0]; d_out[1] = a_in[1] + b_in[1] + c_in[1]; d_out[2] = a_in[2] + b_in[2] + c_in[2]; d_out[3] = a_in[3] + b_in[3] + c_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_in[SIZE] = {9, 10, 11, 12}; + float d_out[4]; + madd(a_in, b_in, c_in, d_out); printf("first: %f\n", d_out[0]); printf("second: %f\n", d_out[1]); printf("third: %f\n", d_out[2]); printf("fourth: %f\n", d_out[3]); - // expected: 15, 18, 21, 24 + assert(d_out[0] == 15); + assert(d_out[1] == 18); + assert(d_out[2] == 21); + assert(d_out[3] == 24); + // expected: 6, 8, 10, 12 return 0; } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds.expect b/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds.expect deleted file mode 100644 index 622025c3..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds.expect +++ /dev/null @@ -1,87 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 0), align 16 - %11 = fadd float %9, %10 - %12 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %13 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %14 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %15 = fadd float %13, %14 - %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 1), align 4 - %17 = fadd float %15, %16 - %18 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %19 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %21 = fadd float %19, %20 - %22 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 2), align 8 - %23 = fadd float %21, %22 - %24 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %25 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %26 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %27 = fadd float %25, %26 - %28 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 3), align 4 - %29 = fadd float %27, %28 - %30 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %31 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 - %33 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %34 = insertelement <4 x float> %32, float %33, i32 1 - %35 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %36 = insertelement <4 x float> %34, float %35, i32 2 - %37 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %38 = insertelement <4 x float> %36, float %37, i32 3 - %39 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %40 = insertelement <4 x float> zeroinitializer, float %39, i32 0 - %41 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %42 = insertelement <4 x float> %40, float %41, i32 1 - %43 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %44 = insertelement <4 x float> %42, float %43, i32 2 - %45 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %46 = insertelement <4 x float> %44, float %45, i32 3 - %47 = fadd <4 x float> %38, %46 - %48 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 0), align 4 - %49 = insertelement <4 x float> zeroinitializer, float %48, i32 0 - %50 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 1), align 4 - %51 = insertelement <4 x float> %49, float %50, i32 1 - %52 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 2), align 4 - %53 = insertelement <4 x float> %51, float %52, i32 2 - %54 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 3), align 4 - %55 = insertelement <4 x float> %53, float %54, i32 3 - %56 = fadd <4 x float> %47, %55 - %57 = extractelement <4 x float> %56, i32 0 - store float %57, float* %12, align 16 - %58 = extractelement <4 x float> %56, i32 1 - store float %58, float* %18, align 4 - %59 = extractelement <4 x float> %56, i32 2 - store float %59, float* %24, align 8 - %60 = extractelement <4 x float> %56, i32 3 - store float %60, float* %30, align 4 - %61 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %62 = load float, float* %61, align 16 - %63 = fpext float %62 to double - %64 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %63) - %65 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %66 = load float, float* %65, align 4 - %67 = fpext float %66 to double - %68 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %67) - %69 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %70 = load float, float* %69, align 8 - %71 = fpext float %70 to double - %72 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %71) - %73 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %74 = load float, float* %73, align 4 - %75 = fpext float %74 to double - %76 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %75) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("c_in"), Symbol("0,0,"), Get([7, 8]), Add([6, 9]), Symbol("a_in"), Symbol("0,1,"), Get([11, 12]), Symbol("b_in"), Symbol("0,1,"), Get([14, 15]), Add([13, 16]), Symbol("c_in"), Symbol("0,1,"), Get([18, 19]), Add([17, 20]), Symbol("a_in"), Symbol("0,2,"), Get([22, 23]), Symbol("b_in"), Symbol("0,2,"), Get([25, 26]), Add([24, 27]), Symbol("c_in"), Symbol("0,2,"), Get([29, 30]), Add([28, 31]), Symbol("a_in"), Symbol("0,3,"), Get([33, 34]), Symbol("b_in"), Symbol("0,3,"), Get([36, 37]), Add([35, 38]), Symbol("c_in"), Symbol("0,3,"), Get([40, 41]), Add([39, 42]), Vec([10, 21, 32, 43])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 6 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,1,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,3,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("b_in"), Symbol("0,3,"), Get([22, 23]), LitVec([15, 18, 21, 24]), VecAdd([12, 25]), Symbol("c_in"), Symbol("0,0,"), Get([27, 28]), Symbol("c_in"), Symbol("0,1,"), Get([30, 31]), Symbol("c_in"), Symbol("0,2,"), Get([33, 34]), Symbol("c_in"), Symbol("0,3,"), Get([36, 37]), LitVec([29, 32, 35, 38]), VecAdd([26, 39])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds_new.c b/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds_new.c deleted file mode 100644 index 33ac2dc3..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds_new.c +++ /dev/null @@ -1,29 +0,0 @@ -#include -#include -#define SIZE 4 - -void madd(float a_in[SIZE], float b_in[SIZE], float c_in[SIZE], - float d_out[SIZE]) { - d_out[0] = a_in[0] + b_in[0] + c_in[0]; - d_out[1] = a_in[1] + b_in[1] + c_in[1]; - d_out[2] = a_in[2] + b_in[2] + c_in[2]; - d_out[3] = a_in[3] + b_in[3] + c_in[3]; -} - -int main(int argc, char **argv) { - float a_in[SIZE] = {1, 2, 3, 4}; - float b_in[SIZE] = {5, 6, 7, 8}; - float c_in[SIZE] = {9, 10, 11, 12}; - float d_out[4]; - madd(a_in, b_in, c_in, d_out); - printf("first: %f\n", d_out[0]); - printf("second: %f\n", d_out[1]); - printf("third: %f\n", d_out[2]); - printf("fourth: %f\n", d_out[3]); - assert(d_out[0] == 15); - assert(d_out[1] == 18); - assert(d_out[2] == 21); - assert(d_out[3] == 24); - // expected: 6, 8, 10, 12 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/out_of_order.c b/src/dios-egraphs/Diospyros/llvm-tests/out_of_order.c index 5469e3df..6c59dd20 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/out_of_order.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/out_of_order.c @@ -1,14 +1,23 @@ +#include #include +#define SIZE 4 -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {5, 6, 7, 8}; - -int main(int argc, char **argv) { - float c_out[4]; +void oo(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { c_out[0] = a_in[0] + b_in[0]; c_out[3] = a_in[3] + b_in[3]; c_out[2] = a_in[2] + b_in[2]; c_out[1] = a_in[1] + b_in[1]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + oo(a_in, b_in, c_out); + assert(c_out[0] == 6); + assert(c_out[1] == 8); + assert(c_out[2] == 10); + assert(c_out[3] == 12); printf("first: %f\n", c_out[0]); printf("second: %f\n", c_out[1]); printf("third: %f\n", c_out[2]); diff --git a/src/dios-egraphs/Diospyros/llvm-tests/out_of_order.expect b/src/dios-egraphs/Diospyros/llvm-tests/out_of_order.expect deleted file mode 100644 index 58450e4a..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/out_of_order.expect +++ /dev/null @@ -1,70 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %13 = fadd float %11, %12 - %14 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %15 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %17 = fadd float %15, %16 - %18 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %19 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %21 = fadd float %19, %20 - %22 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %23 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %24 = insertelement <4 x float> zeroinitializer, float %23, i32 0 - %25 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %26 = insertelement <4 x float> %24, float %25, i32 1 - %27 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %28 = insertelement <4 x float> %26, float %27, i32 2 - %29 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %30 = insertelement <4 x float> %28, float %29, i32 3 - %31 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 - %33 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %34 = insertelement <4 x float> %32, float %33, i32 1 - %35 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %36 = insertelement <4 x float> %34, float %35, i32 2 - %37 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %38 = insertelement <4 x float> %36, float %37, i32 3 - %39 = fadd <4 x float> %30, %38 - %40 = extractelement <4 x float> %39, i32 0 - store float %40, float* %10, align 16 - %41 = extractelement <4 x float> %39, i32 1 - store float %41, float* %14, align 4 - %42 = extractelement <4 x float> %39, i32 2 - store float %42, float* %18, align 8 - %43 = extractelement <4 x float> %39, i32 3 - store float %43, float* %22, align 4 - %44 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %45 = load float, float* %44, align 16 - %46 = fpext float %45 to double - %47 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %46) - %48 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %49 = load float, float* %48, align 4 - %50 = fpext float %49 to double - %51 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %50) - %52 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %53 = load float, float* %52, align 8 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %54) - %56 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %57 = load float, float* %56, align 4 - %58 = fpext float %57 to double - %59 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %58) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("a_in"), Symbol("0,3,"), Get([7, 8]), Symbol("b_in"), Symbol("0,3,"), Get([10, 11]), Add([9, 12]), Symbol("a_in"), Symbol("0,2,"), Get([14, 15]), Symbol("b_in"), Symbol("0,2,"), Get([17, 18]), Add([16, 19]), Symbol("a_in"), Symbol("0,1,"), Get([21, 22]), Symbol("b_in"), Symbol("0,1,"), Get([24, 25]), Add([23, 26]), Vec([6, 13, 20, 27])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,3,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,1,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,3,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("b_in"), Symbol("0,1,"), Get([22, 23]), LitVec([15, 18, 21, 24]), VecAdd([12, 25])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/out_of_order_new.c b/src/dios-egraphs/Diospyros/llvm-tests/out_of_order_new.c deleted file mode 100644 index 6c59dd20..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/out_of_order_new.c +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#define SIZE 4 - -void oo(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { - c_out[0] = a_in[0] + b_in[0]; - c_out[3] = a_in[3] + b_in[3]; - c_out[2] = a_in[2] + b_in[2]; - c_out[1] = a_in[1] + b_in[1]; -} - -int main(int argc, char **argv) { - float a_in[SIZE] = {1, 2, 3, 4}; - float b_in[SIZE] = {5, 6, 7, 8}; - float c_out[SIZE]; - oo(a_in, b_in, c_out); - assert(c_out[0] == 6); - assert(c_out[1] == 8); - assert(c_out[2] == 10); - assert(c_out[3] == 12); - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - // 6, 8, 10, 12 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/point-product.expect b/src/dios-egraphs/Diospyros/llvm-tests/point-product.expect deleted file mode 100644 index ef741a40..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/point-product.expect +++ /dev/null @@ -1,284 +0,0 @@ - %4 = alloca float*, align 8 - %5 = alloca float*, align 8 - %6 = alloca float*, align 8 - store float* %0, float** %4, align 8 - store float* %1, float** %5, align 8 - store float* %2, float** %6, align 8 - %7 = load float*, float** %4, align 8 - %8 = getelementptr inbounds float, float* %7, i64 1 - %9 = load float, float* %8, align 4 - %10 = load float*, float** %5, align 8 - %11 = getelementptr inbounds float, float* %10, i64 2 - %12 = load float, float* %11, align 4 - %13 = fmul float %9, %12 - %14 = load float*, float** %4, align 8 - %15 = getelementptr inbounds float, float* %14, i64 2 - %16 = load float, float* %15, align 4 - %17 = load float*, float** %5, align 8 - %18 = getelementptr inbounds float, float* %17, i64 1 - %19 = load float, float* %18, align 4 - %20 = fmul float %16, %19 - %21 = fsub float %13, %20 - %22 = load float*, float** %6, align 8 - %23 = getelementptr inbounds float, float* %22, i64 0 - %24 = load float*, float** %4, align 8 - %25 = getelementptr inbounds float, float* %24, i64 2 - %26 = load float, float* %25, align 4 - %27 = load float*, float** %5, align 8 - %28 = getelementptr inbounds float, float* %27, i64 0 - %29 = load float, float* %28, align 4 - %30 = fmul float %26, %29 - %31 = load float*, float** %4, align 8 - %32 = getelementptr inbounds float, float* %31, i64 0 - %33 = load float, float* %32, align 4 - %34 = load float*, float** %5, align 8 - %35 = getelementptr inbounds float, float* %34, i64 2 - %36 = load float, float* %35, align 4 - %37 = fmul float %33, %36 - %38 = fsub float %30, %37 - %39 = load float*, float** %6, align 8 - %40 = getelementptr inbounds float, float* %39, i64 1 - %41 = load float*, float** %4, align 8 - %42 = getelementptr inbounds float, float* %41, i64 0 - %43 = load float, float* %42, align 4 - %44 = load float*, float** %5, align 8 - %45 = getelementptr inbounds float, float* %44, i64 1 - %46 = load float, float* %45, align 4 - %47 = fmul float %43, %46 - %48 = load float*, float** %4, align 8 - %49 = getelementptr inbounds float, float* %48, i64 1 - %50 = load float, float* %49, align 4 - %51 = load float*, float** %5, align 8 - %52 = getelementptr inbounds float, float* %51, i64 0 - %53 = load float, float* %52, align 4 - %54 = fmul float %50, %53 - %55 = fsub float %47, %54 - %56 = load float*, float** %6, align 8 - %57 = getelementptr inbounds float, float* %56, i64 2 - %58 = load float, float* %8, align 4 - %59 = insertelement <4 x float> zeroinitializer, float %58, i32 0 - %60 = load float, float* %25, align 4 - %61 = insertelement <4 x float> %59, float %60, i32 1 - %62 = load float, float* %42, align 4 - %63 = insertelement <4 x float> %61, float %62, i32 2 - %64 = insertelement <4 x float> %63, float 1.000000e+00, i32 3 - %65 = load float, float* %11, align 4 - %66 = insertelement <4 x float> zeroinitializer, float %65, i32 0 - %67 = load float, float* %28, align 4 - %68 = insertelement <4 x float> %66, float %67, i32 1 - %69 = load float, float* %45, align 4 - %70 = insertelement <4 x float> %68, float %69, i32 2 - %71 = insertelement <4 x float> %70, float 0.000000e+00, i32 3 - %72 = fmul <4 x float> %64, %71 - %73 = load float, float* %15, align 4 - %74 = insertelement <4 x float> zeroinitializer, float %73, i32 0 - %75 = load float, float* %32, align 4 - %76 = insertelement <4 x float> %74, float %75, i32 1 - %77 = load float, float* %49, align 4 - %78 = insertelement <4 x float> %76, float %77, i32 2 - %79 = insertelement <4 x float> %78, float 1.000000e+00, i32 3 - %80 = load float, float* %18, align 4 - %81 = insertelement <4 x float> zeroinitializer, float %80, i32 0 - %82 = load float, float* %35, align 4 - %83 = insertelement <4 x float> %81, float %82, i32 1 - %84 = load float, float* %52, align 4 - %85 = insertelement <4 x float> %83, float %84, i32 2 - %86 = insertelement <4 x float> %85, float 0.000000e+00, i32 3 - %87 = fmul <4 x float> %79, %86 - %88 = fsub <4 x float> %72, %87 - %89 = extractelement <4 x float> %88, i32 0 - store float %89, float* %23, align 4 - %90 = extractelement <4 x float> %88, i32 1 - store float %90, float* %40, align 4 - %91 = extractelement <4 x float> %88, i32 2 - store float %91, float* %57, align 4 - ret void - %4 = alloca float*, align 8 - %5 = alloca float*, align 8 - %6 = alloca float*, align 8 - %7 = alloca [3 x float], align 4 - %8 = alloca [3 x float], align 4 - %9 = alloca i32, align 4 - %10 = alloca [3 x float], align 4 - %11 = alloca i32, align 4 - store float* %0, float** %4, align 8 - store float* %1, float** %5, align 8 - store float* %2, float** %6, align 8 - %12 = getelementptr inbounds [3 x float], [3 x float]* %7, i64 0, i64 0 - %13 = load float*, float** %4, align 8 - %14 = getelementptr inbounds float, float* %13, i64 0 - %15 = load float, float* %14, align 4 - store float %15, float* %12, align 4 - %16 = getelementptr inbounds float, float* %12, i64 1 - %17 = load float*, float** %4, align 8 - %18 = getelementptr inbounds float, float* %17, i64 1 - %19 = load float, float* %18, align 4 - store float %19, float* %16, align 4 - %20 = getelementptr inbounds float, float* %16, i64 1 - %21 = load float*, float** %4, align 8 - %22 = getelementptr inbounds float, float* %21, i64 2 - %23 = load float, float* %22, align 4 - store float %23, float* %20, align 4 - %24 = getelementptr inbounds [3 x float], [3 x float]* %7, i64 0, i64 0 - %25 = load float*, float** %5, align 8 - %26 = getelementptr inbounds [3 x float], [3 x float]* %8, i64 0, i64 0 - call void @cross_product(float* %24, float* %25, float* %26) - store i32 0, i32* %9, align 4 - br label %27 - -27: ; preds = %46, %3 - %28 = load i32, i32* %9, align 4 - %29 = icmp slt i32 %28, 3 - br i1 %29, label %30, label %49 - -30: ; preds = %27 - %31 = load i32, i32* %9, align 4 - %32 = sext i32 %31 to i64 - %33 = getelementptr inbounds [3 x float], [3 x float]* %8, i64 0, i64 %32 - %34 = load float, float* %33, align 4 - %35 = fmul float %34, 2.000000e+00 - %36 = load i32, i32* %9, align 4 - %37 = sext i32 %36 to i64 - %38 = getelementptr inbounds [3 x float], [3 x float]* %8, i64 0, i64 %37 - %39 = load float, float* %33, align 4 - %40 = insertelement <4 x float> zeroinitializer, float %39, i32 0 - %41 = insertelement <4 x float> %40, float 0.000000e+00, i32 1 - %42 = insertelement <4 x float> %41, float 0.000000e+00, i32 2 - %43 = insertelement <4 x float> %42, float 0.000000e+00, i32 3 - %44 = fmul <4 x float> %43, - %45 = extractelement <4 x float> %44, i32 0 - store float %45, float* %38, align 4 - br label %46 - -46: ; preds = %30 - %47 = load i32, i32* %9, align 4 - %48 = add nsw i32 %47, 1 - store i32 %48, i32* %9, align 4 - br label %27 - -49: ; preds = %27 - %50 = getelementptr inbounds [3 x float], [3 x float]* %7, i64 0, i64 0 - %51 = getelementptr inbounds [3 x float], [3 x float]* %8, i64 0, i64 0 - %52 = getelementptr inbounds [3 x float], [3 x float]* %10, i64 0, i64 0 - call void @cross_product(float* %50, float* %51, float* %52) - store i32 0, i32* %11, align 4 - br label %53 - -53: ; preds = %103, %49 - %54 = load i32, i32* %11, align 4 - %55 = icmp slt i32 %54, 3 - br i1 %55, label %56, label %106 - -56: ; preds = %53 - %57 = load float*, float** %5, align 8 - %58 = load i32, i32* %11, align 4 - %59 = sext i32 %58 to i64 - %60 = getelementptr inbounds float, float* %57, i64 %59 - %61 = load float, float* %60, align 4 - %62 = load float*, float** %4, align 8 - %63 = getelementptr inbounds float, float* %62, i64 3 - %64 = load float, float* %63, align 4 - %65 = load i32, i32* %11, align 4 - %66 = sext i32 %65 to i64 - %67 = getelementptr inbounds [3 x float], [3 x float]* %8, i64 0, i64 %66 - %68 = load float, float* %67, align 4 - %69 = fmul float %64, %68 - %70 = fadd float %61, %69 - %71 = load i32, i32* %11, align 4 - %72 = sext i32 %71 to i64 - %73 = getelementptr inbounds [3 x float], [3 x float]* %10, i64 0, i64 %72 - %74 = load float, float* %73, align 4 - %75 = fadd float %70, %74 - %76 = load float*, float** %6, align 8 - %77 = load i32, i32* %11, align 4 - %78 = sext i32 %77 to i64 - %79 = getelementptr inbounds float, float* %76, i64 %78 - %80 = load float, float* %60, align 4 - %81 = insertelement <4 x float> zeroinitializer, float %80, i32 0 - %82 = insertelement <4 x float> %81, float 0.000000e+00, i32 1 - %83 = insertelement <4 x float> %82, float 0.000000e+00, i32 2 - %84 = insertelement <4 x float> %83, float 0.000000e+00, i32 3 - %85 = load float, float* %63, align 4 - %86 = insertelement <4 x float> zeroinitializer, float %85, i32 0 - %87 = insertelement <4 x float> %86, float 1.000000e+00, i32 1 - %88 = insertelement <4 x float> %87, float 1.000000e+00, i32 2 - %89 = insertelement <4 x float> %88, float 1.000000e+00, i32 3 - %90 = load float, float* %67, align 4 - %91 = insertelement <4 x float> zeroinitializer, float %90, i32 0 - %92 = insertelement <4 x float> %91, float 0.000000e+00, i32 1 - %93 = insertelement <4 x float> %92, float 0.000000e+00, i32 2 - %94 = insertelement <4 x float> %93, float 0.000000e+00, i32 3 - %95 = call <4 x float> @llvm.fma.f32(<4 x float> %89, <4 x float> %94, <4 x float> %84) - %96 = load float, float* %73, align 4 - %97 = insertelement <4 x float> zeroinitializer, float %96, i32 0 - %98 = insertelement <4 x float> %97, float 0.000000e+00, i32 1 - %99 = insertelement <4 x float> %98, float 0.000000e+00, i32 2 - %100 = insertelement <4 x float> %99, float 0.000000e+00, i32 3 - %101 = fadd <4 x float> %95, %100 - %102 = extractelement <4 x float> %101, i32 0 - store float %102, float* %79, align 4 - br label %103 - -103: ; preds = %56 - %104 = load i32, i32* %11, align 4 - %105 = add nsw i32 %104, 1 - store i32 %105, i32* %11, align 4 - br label %53 - -106: ; preds = %53 - ret void - %1 = alloca i32, align 4 - %2 = alloca [4 x float], align 16 - %3 = alloca [4 x float], align 16 - %4 = alloca [4 x float], align 16 - %5 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - %6 = bitcast [4 x float]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %6, i8* align 16 bitcast ([4 x float]* @__const.main.q_in to i8*), i64 16, i1 false) - %7 = bitcast [4 x float]* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %7, i8* align 16 bitcast ([4 x float]* @__const.main.p_in to i8*), i64 16, i1 false) - %8 = bitcast [4 x float]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %8, i8 0, i64 16, i1 false) - %9 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %10 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - %11 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 - call void @point_product(float* %9, float* %10, float* %11) - store i32 0, i32* %5, align 4 - br label %12 - -12: ; preds = %22, %0 - %13 = load i32, i32* %5, align 4 - %14 = icmp slt i32 %13, 3 - br i1 %14, label %15, label %25 - -15: ; preds = %12 - %16 = load i32, i32* %5, align 4 - %17 = sext i32 %16 to i64 - %18 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 %17 - %19 = load float, float* %18, align 4 - %20 = fpext float %19 to double - %21 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %20) - br label %22 - -22: ; preds = %15 - %23 = load i32, i32* %5, align 4 - %24 = add nsw i32 %23, 1 - store i32 %24, i32* %5, align 4 - br label %12 - -25: ; preds = %12 - %26 = load i32, i32* %1, align 4 - ret i32 %26 ----STDERR--- -RecExpr { nodes: [Symbol("no-array-name1"), Symbol("1,"), Get([0, 1]), Symbol("no-array-name2"), Symbol("2,"), Get([3, 4]), Mul([2, 5]), Symbol("no-array-name3"), Symbol("2,"), Get([7, 8]), Symbol("no-array-name4"), Symbol("1,"), Get([10, 11]), Mul([9, 12]), Minus([6, 13]), Symbol("no-array-name5"), Symbol("2,"), Get([15, 16]), Symbol("no-array-name6"), Symbol("0,"), Get([18, 19]), Mul([17, 20]), Symbol("no-array-name7"), Symbol("0,"), Get([22, 23]), Symbol("no-array-name8"), Symbol("2,"), Get([25, 26]), Mul([24, 27]), Minus([21, 28]), Symbol("no-array-name9"), Symbol("0,"), Get([30, 31]), Symbol("no-array-name10"), Symbol("1,"), Get([33, 34]), Mul([32, 35]), Symbol("no-array-name11"), Symbol("1,"), Get([37, 38]), Symbol("no-array-name12"), Symbol("0,"), Get([40, 41]), Mul([39, 42]), Minus([36, 43]), Num(0), Vec([14, 29, 44, 45])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name1"), Symbol("1,"), Get([0, 1]), Symbol("no-array-name5"), Symbol("2,"), Get([3, 4]), Symbol("no-array-name9"), Symbol("0,"), Get([6, 7]), Num(1), Vec([2, 5, 8, 9]), Symbol("no-array-name2"), Symbol("2,"), Get([11, 12]), Symbol("no-array-name6"), Symbol("0,"), Get([14, 15]), Symbol("no-array-name10"), Symbol("1,"), Get([17, 18]), Num(0), Vec([13, 16, 19, 20]), VecMul([10, 21]), Symbol("no-array-name3"), Symbol("2,"), Get([23, 24]), Symbol("no-array-name7"), Symbol("0,"), Get([26, 27]), Symbol("no-array-name11"), Symbol("1,"), Get([29, 30]), Num(1), Vec([25, 28, 31, 32]), Symbol("no-array-name4"), Symbol("1,"), Get([34, 35]), Symbol("no-array-name8"), Symbol("2,"), Get([37, 38]), Symbol("no-array-name12"), Symbol("0,"), Get([40, 41]), Num(0), Vec([36, 39, 42, 43]), VecMul([33, 44]), VecMinus([22, 45])] -RecExpr { nodes: [Symbol("no-array-name13"), Symbol("0,-1,"), Get([0, 1]), Num(2), Mul([2, 3]), Num(0), Num(0), Num(0), Vec([4, 5, 6, 7])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name13"), Symbol("0,-1,"), Get([0, 1]), Num(0), Num(0), Num(0), LitVec([2, 3, 4, 5]), Num(2), Num(0), Num(0), Num(0), Vec([7, 8, 9, 10]), VecMul([6, 11])] -RecExpr { nodes: [Symbol("no-array-name14"), Symbol("3,"), Get([0, 1]), Symbol("no-array-name15"), Symbol("0,-2,"), Get([3, 4]), Mul([2, 5]), Symbol("no-array-name16"), Symbol("-3,"), Get([7, 8]), Add([9, 6]), Symbol("no-array-name17"), Symbol("0,-4,"), Get([11, 12]), Add([10, 13]), Num(0), Num(0), Num(0), Vec([14, 15, 16, 17])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name16"), Symbol("-3,"), Get([0, 1]), Num(0), Num(0), Num(0), LitVec([2, 3, 4, 5]), Symbol("no-array-name14"), Symbol("3,"), Get([7, 8]), Num(1), Num(1), Num(1), Vec([9, 10, 11, 12]), Symbol("no-array-name15"), Symbol("0,-2,"), Get([14, 15]), Num(0), Num(0), Num(0), LitVec([16, 17, 18, 19]), VecMAC([6, 13, 20]), Symbol("no-array-name17"), Symbol("0,-4,"), Get([22, 23]), Num(0), Num(0), Num(0), LitVec([24, 25, 26, 27]), VecAdd([21, 28])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/return-new.c b/src/dios-egraphs/Diospyros/llvm-tests/return-new.c deleted file mode 100644 index 02a7a6f1..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/return-new.c +++ /dev/null @@ -1,37 +0,0 @@ -#include -#include -#define SIZE 8 - -void return_test(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { - for (int i = 0; i < SIZE; i++) { - if (i == SIZE / 2) return; - b_out[i] = a_in[i] * scalar_in; - } - b_out[SIZE / 2] = a_in[SIZE / 2] * scalar_in; // shouldn't run -} - -int main(void) { - float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; - float scalar_in = 10; - float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; - return_test(a_in, scalar_in, b_out); - for (int i = 0; i < SIZE; i++) { - printf("%f\n", b_out[i]); - } - assert(b_out[0] == 90); - assert(b_out[1] == 80); - assert(b_out[2] == 70); - assert(b_out[3] == 60); - assert(b_out[4] == 0); - assert(b_out[5] == 0); - assert(b_out[6] == 0); - assert(b_out[7] == 0); - // 90.000000 - // 80.000000 - // 70.000000 - // 60.000000 - // 0.000000 - // 0.000000 - // 0.000000 - // 0.000000 -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/return.c b/src/dios-egraphs/Diospyros/llvm-tests/return.c index b9c73033..02a7a6f1 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/return.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/return.c @@ -1,3 +1,4 @@ +#include #include #define SIZE 8 @@ -17,6 +18,14 @@ int main(void) { for (int i = 0; i < SIZE; i++) { printf("%f\n", b_out[i]); } + assert(b_out[0] == 90); + assert(b_out[1] == 80); + assert(b_out[2] == 70); + assert(b_out[3] == 60); + assert(b_out[4] == 0); + assert(b_out[5] == 0); + assert(b_out[6] == 0); + assert(b_out[7] == 0); // 90.000000 // 80.000000 // 70.000000 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/scalar-new.c b/src/dios-egraphs/Diospyros/llvm-tests/scalar-new.c deleted file mode 100644 index f5ca5592..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/scalar-new.c +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#define SIZE 8 - -void matrix_multiply(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { - for (int i = 0; i < SIZE; i++) { - b_out[i] = a_in[i] * scalar_in; - } -} - -int main(void) { - float a_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; - float scalar_in = 10; - float b_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; - matrix_multiply(a_in, scalar_in, b_in); - for (int i = 0; i < SIZE; i++) { - printf("%f\n", b_in[i]); - } - assert(b_in[0] == 10); - assert(b_in[1] == 20); - assert(b_in[2] == 30); - assert(b_in[3] == 40); - assert(b_in[4] == 50); - assert(b_in[5] == 60); - assert(b_in[6] == 70); - assert(b_in[7] == 80); -} diff --git a/src/dios-egraphs/Diospyros/llvm-tests/scalar.c b/src/dios-egraphs/Diospyros/llvm-tests/scalar.c index 773b9233..f5ca5592 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/scalar.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/scalar.c @@ -1,3 +1,4 @@ +#include #include #define SIZE 8 @@ -15,4 +16,12 @@ int main(void) { for (int i = 0; i < SIZE; i++) { printf("%f\n", b_in[i]); } + assert(b_in[0] == 10); + assert(b_in[1] == 20); + assert(b_in[2] == 30); + assert(b_in[3] == 40); + assert(b_in[4] == 50); + assert(b_in[5] == 60); + assert(b_in[6] == 70); + assert(b_in[7] == 80); } diff --git a/src/dios-egraphs/Diospyros/llvm-tests/scalar.expect b/src/dios-egraphs/Diospyros/llvm-tests/scalar.expect deleted file mode 100644 index 5dc6cfb5..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/scalar.expect +++ /dev/null @@ -1,95 +0,0 @@ - %4 = alloca float*, align 8 - %5 = alloca float, align 4 - %6 = alloca float*, align 8 - %7 = alloca i32, align 4 - store float* %0, float** %4, align 8 - store float %1, float* %5, align 4 - store float* %2, float** %6, align 8 - store i32 0, i32* %7, align 4 - br label %8 - -8: ; preds = %34, %3 - %9 = load i32, i32* %7, align 4 - %10 = icmp slt i32 %9, 8 - br i1 %10, label %11, label %37 - -11: ; preds = %8 - %12 = load float*, float** %4, align 8 - %13 = load i32, i32* %7, align 4 - %14 = sext i32 %13 to i64 - %15 = getelementptr inbounds float, float* %12, i64 %14 - %16 = load float, float* %15, align 4 - %17 = load float, float* %5, align 4 - %18 = fmul float %16, %17 - %19 = load float*, float** %6, align 8 - %20 = load i32, i32* %7, align 4 - %21 = sext i32 %20 to i64 - %22 = getelementptr inbounds float, float* %19, i64 %21 - %23 = load float, float* %15, align 4 - %24 = insertelement <4 x float> zeroinitializer, float %23, i32 0 - %25 = insertelement <4 x float> %24, float 0.000000e+00, i32 1 - %26 = insertelement <4 x float> %25, float 0.000000e+00, i32 2 - %27 = insertelement <4 x float> %26, float 0.000000e+00, i32 3 - %28 = insertelement <4 x float> zeroinitializer, float %17, i32 0 - %29 = insertelement <4 x float> %28, float 0.000000e+00, i32 1 - %30 = insertelement <4 x float> %29, float 0.000000e+00, i32 2 - %31 = insertelement <4 x float> %30, float 0.000000e+00, i32 3 - %32 = fmul <4 x float> %27, %31 - %33 = extractelement <4 x float> %32, i32 0 - store float %33, float* %22, align 4 - br label %34 - -34: ; preds = %11 - %35 = load i32, i32* %7, align 4 - %36 = add nsw i32 %35, 1 - store i32 %36, i32* %7, align 4 - br label %8 - -37: ; preds = %8 - ret void - %1 = alloca i32, align 4 - %2 = alloca [8 x float], align 16 - %3 = alloca float, align 4 - %4 = alloca [8 x float], align 16 - %5 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - %6 = bitcast [8 x float]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %6, i8* align 16 bitcast ([8 x float]* @__const.main.a_in to i8*), i64 32, i1 false) - store float 1.000000e+01, float* %3, align 4 - %7 = bitcast [8 x float]* %4 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %7, i8* align 16 bitcast ([8 x float]* @__const.main.b_in to i8*), i64 32, i1 false) - %8 = getelementptr inbounds [8 x float], [8 x float]* %2, i64 0, i64 0 - %9 = load float, float* %3, align 4 - %10 = getelementptr inbounds [8 x float], [8 x float]* %4, i64 0, i64 0 - call void @matrix_multiply(float* %8, float %9, float* %10) - store i32 0, i32* %5, align 4 - br label %11 - -11: ; preds = %21, %0 - %12 = load i32, i32* %5, align 4 - %13 = icmp slt i32 %12, 8 - br i1 %13, label %14, label %24 - -14: ; preds = %11 - %15 = load i32, i32* %5, align 4 - %16 = sext i32 %15 to i64 - %17 = getelementptr inbounds [8 x float], [8 x float]* %4, i64 0, i64 %16 - %18 = load float, float* %17, align 4 - %19 = fpext float %18 to double - %20 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %19) - br label %21 - -21: ; preds = %14 - %22 = load i32, i32* %5, align 4 - %23 = add nsw i32 %22, 1 - store i32 %23, i32* %5, align 4 - br label %11 - -24: ; preds = %11 - %25 = load i32, i32* %1, align 4 - ret i32 %25 ----STDERR--- -RecExpr { nodes: [Symbol("no-array-name1"), Symbol("-1,"), Get([0, 1]), Symbol("no-temp-name1"), Mul([2, 3]), Num(0), Num(0), Num(0), Vec([4, 5, 6, 7])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name1"), Symbol("-1,"), Get([0, 1]), Num(0), Num(0), Num(0), LitVec([2, 3, 4, 5]), Symbol("no-temp-name1"), Num(0), Num(0), Num(0), Vec([7, 8, 9, 10]), VecMul([6, 11])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d.expect b/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d.expect deleted file mode 100644 index 019bdfc8..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d.expect +++ /dev/null @@ -1,181 +0,0 @@ - %4 = alloca float*, align 8 - %5 = alloca float*, align 8 - %6 = alloca float*, align 8 - %7 = alloca i32, align 4 - %8 = alloca i32, align 4 - %9 = alloca float, align 4 - %10 = alloca i32, align 4 - %11 = alloca i32, align 4 - store float* %0, float** %4, align 8 - store float* %1, float** %5, align 8 - store float* %2, float** %6, align 8 - store i32 0, i32* %7, align 4 - br label %12 - -12: ; preds = %87, %3 - %13 = load i32, i32* %7, align 4 - %14 = icmp slt i32 %13, 6 - br i1 %14, label %15, label %90 - -15: ; preds = %12 - store i32 0, i32* %8, align 4 - br label %16 - -16: ; preds = %83, %15 - %17 = load i32, i32* %8, align 4 - %18 = icmp slt i32 %17, 2 - br i1 %18, label %19, label %86 - -19: ; preds = %16 - store float 0.000000e+00, float* %9, align 4 - store i32 0, i32* %10, align 4 - br label %20 - -20: ; preds = %71, %19 - %21 = load i32, i32* %10, align 4 - %22 = icmp slt i32 %21, 3 - br i1 %22, label %23, label %74 - -23: ; preds = %20 - store i32 0, i32* %11, align 4 - br label %24 - -24: ; preds = %67, %23 - %25 = load i32, i32* %11, align 4 - %26 = icmp slt i32 %25, 3 - br i1 %26, label %27, label %70 - -27: ; preds = %24 - %28 = load float*, float** %6, align 8 - %29 = load i32, i32* %10, align 4 - %30 = mul nsw i32 %29, 3 - %31 = load i32, i32* %11, align 4 - %32 = add nsw i32 %30, %31 - %33 = sext i32 %32 to i64 - %34 = getelementptr inbounds float, float* %28, i64 %33 - %35 = load float, float* %34, align 4 - %36 = load float*, float** %4, align 8 - %37 = load i32, i32* %7, align 4 - %38 = load i32, i32* %10, align 4 - %39 = add nsw i32 %37, %38 - %40 = mul nsw i32 %39, 4 - %41 = load i32, i32* %8, align 4 - %42 = add nsw i32 %40, %41 - %43 = load i32, i32* %11, align 4 - %44 = add nsw i32 %42, %43 - %45 = sext i32 %44 to i64 - %46 = getelementptr inbounds float, float* %36, i64 %45 - %47 = load float, float* %46, align 4 - %48 = fmul float %35, %47 - %49 = load float, float* %9, align 4 - %50 = fadd float %49, %48 - %51 = insertelement <4 x float> zeroinitializer, float %49, i32 0 - %52 = insertelement <4 x float> %51, float 0.000000e+00, i32 1 - %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 2 - %54 = insertelement <4 x float> %53, float 0.000000e+00, i32 3 - %55 = load float, float* %34, align 4 - %56 = insertelement <4 x float> zeroinitializer, float %55, i32 0 - %57 = insertelement <4 x float> %56, float 0.000000e+00, i32 1 - %58 = insertelement <4 x float> %57, float 0.000000e+00, i32 2 - %59 = insertelement <4 x float> %58, float 0.000000e+00, i32 3 - %60 = load float, float* %46, align 4 - %61 = insertelement <4 x float> zeroinitializer, float %60, i32 0 - %62 = insertelement <4 x float> %61, float 0.000000e+00, i32 1 - %63 = insertelement <4 x float> %62, float 0.000000e+00, i32 2 - %64 = insertelement <4 x float> %63, float 0.000000e+00, i32 3 - %65 = call <4 x float> @llvm.fma.f32(<4 x float> %59, <4 x float> %64, <4 x float> %54) - %66 = extractelement <4 x float> %65, i32 0 - store float %66, float* %9, align 4 - br label %67 - -67: ; preds = %27 - %68 = load i32, i32* %11, align 4 - %69 = add nsw i32 %68, 1 - store i32 %69, i32* %11, align 4 - br label %24 - -70: ; preds = %24 - br label %71 - -71: ; preds = %70 - %72 = load i32, i32* %10, align 4 - %73 = add nsw i32 %72, 1 - store i32 %73, i32* %10, align 4 - br label %20 - -74: ; preds = %20 - %75 = load float, float* %9, align 4 - %76 = load float*, float** %5, align 8 - %77 = load i32, i32* %7, align 4 - %78 = mul nsw i32 %77, 4 - %79 = load i32, i32* %8, align 4 - %80 = add nsw i32 %78, %79 - %81 = sext i32 %80 to i64 - %82 = getelementptr inbounds float, float* %76, i64 %81 - store float %75, float* %82, align 4 - br label %83 - -83: ; preds = %74 - %84 = load i32, i32* %8, align 4 - %85 = add nsw i32 %84, 1 - store i32 %85, i32* %8, align 4 - br label %16 - -86: ; preds = %16 - br label %87 - -87: ; preds = %86 - %88 = load i32, i32* %7, align 4 - %89 = add nsw i32 %88, 1 - store i32 %89, i32* %7, align 4 - br label %12 - -90: ; preds = %12 - ret void - %1 = alloca i32, align 4 - %2 = alloca [32 x float], align 16 - %3 = alloca [32 x float], align 16 - %4 = alloca [9 x float], align 16 - %5 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - %6 = bitcast [32 x float]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %6, i8* align 16 bitcast ([32 x float]* @__const.main.orig_in to i8*), i64 128, i1 false) - %7 = bitcast [32 x float]* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %7, i8* align 16 bitcast ([32 x float]* @__const.main.sol_out to i8*), i64 128, i1 false) - %8 = bitcast [9 x float]* %4 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %8, i8* align 16 bitcast ([9 x float]* @__const.main.filter_in to i8*), i64 36, i1 false) - %9 = getelementptr inbounds [32 x float], [32 x float]* %2, i64 0, i64 0 - %10 = getelementptr inbounds [32 x float], [32 x float]* %3, i64 0, i64 0 - %11 = getelementptr inbounds [9 x float], [9 x float]* %4, i64 0, i64 0 - call void @stencil(float* %9, float* %10, float* %11) - store i32 0, i32* %5, align 4 - br label %12 - -12: ; preds = %22, %0 - %13 = load i32, i32* %5, align 4 - %14 = icmp slt i32 %13, 32 - br i1 %14, label %15, label %25 - -15: ; preds = %12 - %16 = load i32, i32* %5, align 4 - %17 = sext i32 %16 to i64 - %18 = getelementptr inbounds [32 x float], [32 x float]* %3, i64 0, i64 %17 - %19 = load float, float* %18, align 4 - %20 = fpext float %19 to double - %21 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %20) - br label %22 - -22: ; preds = %15 - %23 = load i32, i32* %5, align 4 - %24 = add nsw i32 %23, 1 - store i32 %24, i32* %5, align 4 - br label %12 - -25: ; preds = %12 - %26 = load i32, i32* %1, align 4 - ret i32 %26 ----STDERR--- -RecExpr { nodes: [Symbol("no-array-name1"), Symbol("-1,"), Get([0, 1]), Symbol("no-array-name2"), Symbol("-2,"), Get([3, 4]), Mul([2, 5]), Symbol("no-temp-name1"), Add([7, 6]), Num(0), Num(0), Num(0), Vec([8, 9, 10, 11])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-temp-name1"), Num(0), Num(0), Num(0), Vec([0, 1, 2, 3]), Symbol("no-array-name1"), Symbol("-1,"), Get([5, 6]), Num(0), Num(0), Num(0), LitVec([7, 8, 9, 10]), Symbol("no-array-name2"), Symbol("-2,"), Get([12, 13]), Num(0), Num(0), Num(0), LitVec([14, 15, 16, 17]), VecMAC([4, 11, 18])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/ternary-new.c b/src/dios-egraphs/Diospyros/llvm-tests/ternary-new.c deleted file mode 100644 index 04acb85e..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/ternary-new.c +++ /dev/null @@ -1,35 +0,0 @@ -#include -#include -#define SIZE 8 - -void tern(float a_in[SIZE], float b_out[SIZE]) { - for (int i = 0; i < SIZE; i++) { - b_out[i] = (i < SIZE / 2) ? a_in[i] : 0; - } -} - -int main(int argc, char **argv) { - float a_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; - float b_out[SIZE] = {5, 6, 7, 8, 1, 2, 3, 4}; - tern(a_in, b_out); - for (int i = 0; i < SIZE; i++) { - printf("%f\n", b_out[i]); - } - assert(b_out[0] == 1); - assert(b_out[1] == 2); - assert(b_out[2] == 3); - assert(b_out[3] == 4); - assert(b_out[4] == 0); - assert(b_out[5] == 0); - assert(b_out[6] == 0); - assert(b_out[7] == 0); - // 1.000000 - // 2.000000 - // 3.000000 - // 4.000000 - // 0.000000 - // 0.000000 - // 0.000000 - // 0.000000 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/ternary.c b/src/dios-egraphs/Diospyros/llvm-tests/ternary.c index bc667fe2..04acb85e 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/ternary.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/ternary.c @@ -1,3 +1,4 @@ +#include #include #define SIZE 8 @@ -14,6 +15,14 @@ int main(int argc, char **argv) { for (int i = 0; i < SIZE; i++) { printf("%f\n", b_out[i]); } + assert(b_out[0] == 1); + assert(b_out[1] == 2); + assert(b_out[2] == 3); + assert(b_out[3] == 4); + assert(b_out[4] == 0); + assert(b_out[5] == 0); + assert(b_out[6] == 0); + assert(b_out[7] == 0); // 1.000000 // 2.000000 // 3.000000 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/var.c b/src/dios-egraphs/Diospyros/llvm-tests/var.c index de55a2e9..486903c0 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/var.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/var.c @@ -1,16 +1,25 @@ +#include #include +#define SIZE 4 -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {5, 6, 7, 8}; -float t1 = 10; -float t2 = 20; - -int main(int argc, char **argv) { - float c_out[4]; +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + float t1 = 10; + float t2 = 20; c_out[0] = a_in[0] + b_in[0]; c_out[1] = t1 + b_in[1]; c_out[2] = a_in[2] + t2; c_out[3] = t2 + t1; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 6); + assert(c_out[1] == 16); + assert(c_out[2] == 23); + assert(c_out[3] == 30); printf("first: %f\n", c_out[0]); printf("second: %f\n", c_out[1]); printf("third: %f\n", c_out[2]); diff --git a/src/dios-egraphs/Diospyros/llvm-tests/var.expect b/src/dios-egraphs/Diospyros/llvm-tests/var.expect deleted file mode 100644 index 9d1ce417..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/var.expect +++ /dev/null @@ -1,66 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %11 = load float, float* @t1, align 4 - %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %13 = fadd float %11, %12 - %14 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %15 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %16 = load float, float* @t2, align 4 - %17 = fadd float %15, %16 - %18 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %19 = load float, float* @t2, align 4 - %20 = load float, float* @t1, align 4 - %21 = fadd float %19, %20 - %22 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %23 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %24 = insertelement <4 x float> zeroinitializer, float %23, i32 0 - %25 = insertelement <4 x float> %24, float %20, i32 1 - %26 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %27 = insertelement <4 x float> %25, float %26, i32 2 - %28 = insertelement <4 x float> %27, float %19, i32 3 - %29 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %30 = insertelement <4 x float> zeroinitializer, float %29, i32 0 - %31 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %32 = insertelement <4 x float> %30, float %31, i32 1 - %33 = insertelement <4 x float> %32, float %19, i32 2 - %34 = insertelement <4 x float> %33, float %20, i32 3 - %35 = fadd <4 x float> %28, %34 - %36 = extractelement <4 x float> %35, i32 0 - store float %36, float* %10, align 16 - %37 = extractelement <4 x float> %35, i32 1 - store float %37, float* %14, align 4 - %38 = extractelement <4 x float> %35, i32 2 - store float %38, float* %18, align 8 - %39 = extractelement <4 x float> %35, i32 3 - store float %39, float* %22, align 4 - %40 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %41 = load float, float* %40, align 16 - %42 = fpext float %41 to double - %43 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %42) - %44 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %45 = load float, float* %44, align 4 - %46 = fpext float %45 to double - %47 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %46) - %48 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %49 = load float, float* %48, align 8 - %50 = fpext float %49 to double - %51 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %50) - %52 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %53 = load float, float* %52, align 4 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %54) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("t1"), Symbol("b_in"), Symbol("0,1,"), Get([8, 9]), Add([7, 10]), Symbol("a_in"), Symbol("0,2,"), Get([12, 13]), Symbol("t2"), Add([14, 15]), Symbol("t2"), Symbol("t1"), Add([17, 18]), Vec([6, 11, 16, 19])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("t1"), Symbol("a_in"), Symbol("0,2,"), Get([4, 5]), Symbol("t2"), Vec([2, 3, 6, 7]), Symbol("b_in"), Symbol("0,0,"), Get([9, 10]), Symbol("b_in"), Symbol("0,1,"), Get([12, 13]), Symbol("t2"), Symbol("t1"), Vec([11, 14, 15, 16]), VecAdd([8, 17])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/var_new.c b/src/dios-egraphs/Diospyros/llvm-tests/var_new.c deleted file mode 100644 index 486903c0..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/var_new.c +++ /dev/null @@ -1,29 +0,0 @@ -#include -#include -#define SIZE 4 - -void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { - float t1 = 10; - float t2 = 20; - c_out[0] = a_in[0] + b_in[0]; - c_out[1] = t1 + b_in[1]; - c_out[2] = a_in[2] + t2; - c_out[3] = t2 + t1; -} - -int main(int argc, char **argv) { - float a_in[SIZE] = {1, 2, 3, 4}; - float b_in[SIZE] = {5, 6, 7, 8}; - float c_out[SIZE]; - sum(a_in, b_in, c_out); - assert(c_out[0] == 6); - assert(c_out[1] == 16); - assert(c_out[2] == 23); - assert(c_out[3] == 30); - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - // expected: 6, 16, 23, 30 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/width5.c b/src/dios-egraphs/Diospyros/llvm-tests/width5.c index 06560b25..a50c3e0b 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/width5.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/width5.c @@ -1,15 +1,25 @@ +#include #include +#define SIZE 5 -float a_in[] = {1, 2, 3, 4, 5}; -float b_in[] = {6, 7, 8, 9, 10}; - -int main(int argc, char **argv) { - float c_out[5]; +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { c_out[0] = a_in[0] + b_in[0]; c_out[1] = a_in[1] + b_in[1]; c_out[2] = a_in[2] + b_in[2]; c_out[3] = a_in[3] + b_in[3]; c_out[4] = a_in[4] + b_in[4]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4, 5}; + float b_in[SIZE] = {6, 7, 8, 9, 10}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 7); + assert(c_out[1] == 9); + assert(c_out[2] == 11); + assert(c_out[3] == 13); + assert(c_out[4] == 15); printf("first: %f\n", c_out[0]); printf("second: %f\n", c_out[1]); printf("third: %f\n", c_out[2]); @@ -17,4 +27,4 @@ int main(int argc, char **argv) { printf("fifth: %f\n", c_out[4]); // expected: 7, 9, 11, 13, 15 return 0; -} \ No newline at end of file +} diff --git a/src/dios-egraphs/Diospyros/llvm-tests/width5.expect b/src/dios-egraphs/Diospyros/llvm-tests/width5.expect deleted file mode 100644 index fc2dfab9..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/width5.expect +++ /dev/null @@ -1,92 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [5 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 0 - %11 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 1), align 4 - %12 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 1), align 4 - %13 = fadd float %11, %12 - %14 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 1 - %15 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 2), align 8 - %16 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 2), align 8 - %17 = fadd float %15, %16 - %18 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 2 - %19 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 3), align 4 - %20 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 3), align 4 - %21 = fadd float %19, %20 - %22 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 3 - %23 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 4), align 16 - %24 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 4), align 16 - %25 = fadd float %23, %24 - %26 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 4 - %27 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 0), align 4 - %28 = insertelement <4 x float> zeroinitializer, float %27, i32 0 - %29 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 1), align 4 - %30 = insertelement <4 x float> %28, float %29, i32 1 - %31 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 2), align 4 - %32 = insertelement <4 x float> %30, float %31, i32 2 - %33 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 3), align 4 - %34 = insertelement <4 x float> %32, float %33, i32 3 - %35 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 0), align 4 - %36 = insertelement <4 x float> zeroinitializer, float %35, i32 0 - %37 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 1), align 4 - %38 = insertelement <4 x float> %36, float %37, i32 1 - %39 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 2), align 4 - %40 = insertelement <4 x float> %38, float %39, i32 2 - %41 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 3), align 4 - %42 = insertelement <4 x float> %40, float %41, i32 3 - %43 = fadd <4 x float> %34, %42 - %44 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 4), align 4 - %45 = insertelement <4 x float> zeroinitializer, float %44, i32 0 - %46 = insertelement <4 x float> %45, float 0.000000e+00, i32 1 - %47 = insertelement <4 x float> %46, float 0.000000e+00, i32 2 - %48 = insertelement <4 x float> %47, float 0.000000e+00, i32 3 - %49 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 4), align 4 - %50 = insertelement <4 x float> zeroinitializer, float %49, i32 0 - %51 = insertelement <4 x float> %50, float 0.000000e+00, i32 1 - %52 = insertelement <4 x float> %51, float 0.000000e+00, i32 2 - %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 3 - %54 = fadd <4 x float> %48, %53 - %55 = shufflevector <4 x float> %43, <4 x float> %54, <8 x i32> - %56 = extractelement <8 x float> %55, i32 0 - store float %56, float* %10, align 16 - %57 = extractelement <8 x float> %55, i32 1 - store float %57, float* %14, align 4 - %58 = extractelement <8 x float> %55, i32 2 - store float %58, float* %18, align 8 - %59 = extractelement <8 x float> %55, i32 3 - store float %59, float* %22, align 4 - %60 = extractelement <8 x float> %55, i32 4 - store float %60, float* %26, align 16 - %61 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 0 - %62 = load float, float* %61, align 16 - %63 = fpext float %62 to double - %64 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %63) - %65 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 1 - %66 = load float, float* %65, align 4 - %67 = fpext float %66 to double - %68 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %67) - %69 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 2 - %70 = load float, float* %69, align 8 - %71 = fpext float %70 to double - %72 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %71) - %73 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 3 - %74 = load float, float* %73, align 4 - %75 = fpext float %74 to double - %76 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %75) - %77 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 4 - %78 = load float, float* %77, align 16 - %79 = fpext float %78 to double - %80 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.4, i64 0, i64 0), double %79) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("a_in"), Symbol("0,1,"), Get([7, 8]), Symbol("b_in"), Symbol("0,1,"), Get([10, 11]), Add([9, 12]), Symbol("a_in"), Symbol("0,2,"), Get([14, 15]), Symbol("b_in"), Symbol("0,2,"), Get([17, 18]), Add([16, 19]), Symbol("a_in"), Symbol("0,3,"), Get([21, 22]), Symbol("b_in"), Symbol("0,3,"), Get([24, 25]), Add([23, 26]), Symbol("a_in"), Symbol("0,4,"), Get([28, 29]), Symbol("b_in"), Symbol("0,4,"), Get([31, 32]), Add([30, 33]), Vec([6, 13, 20, 27]), Num(0), Num(0), Num(0), Vec([34, 36, 37, 38]), Concat([35, 39])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,1,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,3,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("b_in"), Symbol("0,3,"), Get([22, 23]), LitVec([15, 18, 21, 24]), VecAdd([12, 25]), Symbol("a_in"), Symbol("0,4,"), Get([27, 28]), Num(0), Num(0), Num(0), LitVec([29, 30, 31, 32]), Symbol("b_in"), Symbol("0,4,"), Get([34, 35]), Num(0), Num(0), Num(0), LitVec([36, 37, 38, 39]), VecAdd([33, 40]), Concat([26, 41])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/width5_new.c b/src/dios-egraphs/Diospyros/llvm-tests/width5_new.c deleted file mode 100644 index a50c3e0b..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/width5_new.c +++ /dev/null @@ -1,30 +0,0 @@ -#include -#include -#define SIZE 5 - -void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { - c_out[0] = a_in[0] + b_in[0]; - c_out[1] = a_in[1] + b_in[1]; - c_out[2] = a_in[2] + b_in[2]; - c_out[3] = a_in[3] + b_in[3]; - c_out[4] = a_in[4] + b_in[4]; -} - -int main(int argc, char **argv) { - float a_in[SIZE] = {1, 2, 3, 4, 5}; - float b_in[SIZE] = {6, 7, 8, 9, 10}; - float c_out[SIZE]; - sum(a_in, b_in, c_out); - assert(c_out[0] == 7); - assert(c_out[1] == 9); - assert(c_out[2] == 11); - assert(c_out[3] == 13); - assert(c_out[4] == 15); - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - printf("fifth: %f\n", c_out[4]); - // expected: 7, 9, 11, 13, 15 - return 0; -} diff --git a/src/dios-egraphs/Diospyros/llvm-tests/width9.c b/src/dios-egraphs/Diospyros/llvm-tests/width9.c index 536ff7c3..7b7b2e96 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/width9.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/width9.c @@ -1,10 +1,8 @@ +#include #include +#define SIZE 9 -float a_in[] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; -float b_in[] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - -int main(int argc, char **argv) { - float c_out[9]; +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { c_out[0] = a_in[0] + b_in[0]; c_out[1] = a_in[1] + b_in[1]; c_out[2] = a_in[2] + b_in[2]; @@ -14,6 +12,22 @@ int main(int argc, char **argv) { c_out[6] = a_in[6] + b_in[6]; c_out[7] = a_in[7] + b_in[7]; c_out[8] = a_in[8] + b_in[8]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + float b_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 2); + assert(c_out[1] == 4); + assert(c_out[2] == 6); + assert(c_out[3] == 8); + assert(c_out[4] == 10); + assert(c_out[5] == 12); + assert(c_out[6] == 14); + assert(c_out[7] == 16); + assert(c_out[8] == 18); printf("first: %f\n", c_out[0]); printf("second: %f\n", c_out[1]); printf("third: %f\n", c_out[2]); @@ -25,4 +39,4 @@ int main(int argc, char **argv) { printf("ninth: %f\n", c_out[8]); // expected: 2, 4, 6, 8, 10, 12, 14, 16, 18 return 0; -} \ No newline at end of file +} diff --git a/src/dios-egraphs/Diospyros/llvm-tests/width9.expect b/src/dios-egraphs/Diospyros/llvm-tests/width9.expect deleted file mode 100644 index 887b8fd1..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/width9.expect +++ /dev/null @@ -1,150 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [9 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 0 - %11 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 1), align 4 - %12 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 1), align 4 - %13 = fadd float %11, %12 - %14 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 1 - %15 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 2), align 8 - %16 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 2), align 8 - %17 = fadd float %15, %16 - %18 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 2 - %19 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 3), align 4 - %20 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 3), align 4 - %21 = fadd float %19, %20 - %22 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 3 - %23 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 4), align 16 - %24 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 4), align 16 - %25 = fadd float %23, %24 - %26 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 4 - %27 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 5), align 4 - %28 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 5), align 4 - %29 = fadd float %27, %28 - %30 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 5 - %31 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 6), align 8 - %32 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 6), align 8 - %33 = fadd float %31, %32 - %34 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 6 - %35 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 7), align 4 - %36 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 7), align 4 - %37 = fadd float %35, %36 - %38 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 7 - %39 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 8), align 16 - %40 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 8), align 16 - %41 = fadd float %39, %40 - %42 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 8 - %43 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 0), align 4 - %44 = insertelement <4 x float> zeroinitializer, float %43, i32 0 - %45 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 1), align 4 - %46 = insertelement <4 x float> %44, float %45, i32 1 - %47 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 2), align 4 - %48 = insertelement <4 x float> %46, float %47, i32 2 - %49 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 3), align 4 - %50 = insertelement <4 x float> %48, float %49, i32 3 - %51 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 0), align 4 - %52 = insertelement <4 x float> zeroinitializer, float %51, i32 0 - %53 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 1), align 4 - %54 = insertelement <4 x float> %52, float %53, i32 1 - %55 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 2), align 4 - %56 = insertelement <4 x float> %54, float %55, i32 2 - %57 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 3), align 4 - %58 = insertelement <4 x float> %56, float %57, i32 3 - %59 = fadd <4 x float> %50, %58 - %60 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 4), align 4 - %61 = insertelement <4 x float> zeroinitializer, float %60, i32 0 - %62 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 5), align 4 - %63 = insertelement <4 x float> %61, float %62, i32 1 - %64 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 6), align 4 - %65 = insertelement <4 x float> %63, float %64, i32 2 - %66 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 7), align 4 - %67 = insertelement <4 x float> %65, float %66, i32 3 - %68 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 4), align 4 - %69 = insertelement <4 x float> zeroinitializer, float %68, i32 0 - %70 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 5), align 4 - %71 = insertelement <4 x float> %69, float %70, i32 1 - %72 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 6), align 4 - %73 = insertelement <4 x float> %71, float %72, i32 2 - %74 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 7), align 4 - %75 = insertelement <4 x float> %73, float %74, i32 3 - %76 = fadd <4 x float> %67, %75 - %77 = shufflevector <4 x float> %59, <4 x float> %76, <8 x i32> - %78 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 8), align 4 - %79 = insertelement <4 x float> zeroinitializer, float %78, i32 0 - %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 1 - %81 = insertelement <4 x float> %80, float 0.000000e+00, i32 2 - %82 = insertelement <4 x float> %81, float 0.000000e+00, i32 3 - %83 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 8), align 4 - %84 = insertelement <4 x float> zeroinitializer, float %83, i32 0 - %85 = insertelement <4 x float> %84, float 0.000000e+00, i32 1 - %86 = insertelement <4 x float> %85, float 0.000000e+00, i32 2 - %87 = insertelement <4 x float> %86, float 0.000000e+00, i32 3 - %88 = fadd <4 x float> %82, %87 - %89 = shufflevector <8 x float> %77, <4 x float> %88, <12 x i32> - %90 = extractelement <12 x float> %89, i32 0 - store float %90, float* %10, align 16 - %91 = extractelement <12 x float> %89, i32 1 - store float %91, float* %14, align 4 - %92 = extractelement <12 x float> %89, i32 2 - store float %92, float* %18, align 8 - %93 = extractelement <12 x float> %89, i32 3 - store float %93, float* %22, align 4 - %94 = extractelement <12 x float> %89, i32 4 - store float %94, float* %26, align 16 - %95 = extractelement <12 x float> %89, i32 5 - store float %95, float* %30, align 4 - %96 = extractelement <12 x float> %89, i32 6 - store float %96, float* %34, align 8 - %97 = extractelement <12 x float> %89, i32 7 - store float %97, float* %38, align 4 - %98 = extractelement <12 x float> %89, i32 8 - store float %98, float* %42, align 16 - %99 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 0 - %100 = load float, float* %99, align 16 - %101 = fpext float %100 to double - %102 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %101) - %103 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 1 - %104 = load float, float* %103, align 4 - %105 = fpext float %104 to double - %106 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %105) - %107 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 2 - %108 = load float, float* %107, align 8 - %109 = fpext float %108 to double - %110 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %109) - %111 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 3 - %112 = load float, float* %111, align 4 - %113 = fpext float %112 to double - %114 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %113) - %115 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 4 - %116 = load float, float* %115, align 16 - %117 = fpext float %116 to double - %118 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.4, i64 0, i64 0), double %117) - %119 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 5 - %120 = load float, float* %119, align 4 - %121 = fpext float %120 to double - %122 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.5, i64 0, i64 0), double %121) - %123 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 6 - %124 = load float, float* %123, align 8 - %125 = fpext float %124 to double - %126 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.6, i64 0, i64 0), double %125) - %127 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 7 - %128 = load float, float* %127, align 4 - %129 = fpext float %128 to double - %130 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.7, i64 0, i64 0), double %129) - %131 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 8 - %132 = load float, float* %131, align 16 - %133 = fpext float %132 to double - %134 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.8, i64 0, i64 0), double %133) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("a_in"), Symbol("0,1,"), Get([7, 8]), Symbol("b_in"), Symbol("0,1,"), Get([10, 11]), Add([9, 12]), Symbol("a_in"), Symbol("0,2,"), Get([14, 15]), Symbol("b_in"), Symbol("0,2,"), Get([17, 18]), Add([16, 19]), Symbol("a_in"), Symbol("0,3,"), Get([21, 22]), Symbol("b_in"), Symbol("0,3,"), Get([24, 25]), Add([23, 26]), Symbol("a_in"), Symbol("0,4,"), Get([28, 29]), Symbol("b_in"), Symbol("0,4,"), Get([31, 32]), Add([30, 33]), Symbol("a_in"), Symbol("0,5,"), Get([35, 36]), Symbol("b_in"), Symbol("0,5,"), Get([38, 39]), Add([37, 40]), Symbol("a_in"), Symbol("0,6,"), Get([42, 43]), Symbol("b_in"), Symbol("0,6,"), Get([45, 46]), Add([44, 47]), Symbol("a_in"), Symbol("0,7,"), Get([49, 50]), Symbol("b_in"), Symbol("0,7,"), Get([52, 53]), Add([51, 54]), Symbol("a_in"), Symbol("0,8,"), Get([56, 57]), Symbol("b_in"), Symbol("0,8,"), Get([59, 60]), Add([58, 61]), Vec([6, 13, 20, 27]), Vec([34, 41, 48, 55]), Num(0), Num(0), Num(0), Vec([62, 65, 66, 67]), Concat([63, 64]), Concat([69, 68])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,1,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,3,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("b_in"), Symbol("0,3,"), Get([22, 23]), LitVec([15, 18, 21, 24]), VecAdd([12, 25]), Symbol("a_in"), Symbol("0,4,"), Get([27, 28]), Symbol("a_in"), Symbol("0,5,"), Get([30, 31]), Symbol("a_in"), Symbol("0,6,"), Get([33, 34]), Symbol("a_in"), Symbol("0,7,"), Get([36, 37]), LitVec([29, 32, 35, 38]), Symbol("b_in"), Symbol("0,4,"), Get([40, 41]), Symbol("b_in"), Symbol("0,5,"), Get([43, 44]), Symbol("b_in"), Symbol("0,6,"), Get([46, 47]), Symbol("b_in"), Symbol("0,7,"), Get([49, 50]), LitVec([42, 45, 48, 51]), VecAdd([39, 52]), Concat([26, 53]), Symbol("a_in"), Symbol("0,8,"), Get([55, 56]), Num(0), Num(0), Num(0), LitVec([57, 58, 59, 60]), Symbol("b_in"), Symbol("0,8,"), Get([62, 63]), Num(0), Num(0), Num(0), LitVec([64, 65, 66, 67]), VecAdd([61, 68]), Concat([54, 69])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/width9_new.c b/src/dios-egraphs/Diospyros/llvm-tests/width9_new.c deleted file mode 100644 index 7b7b2e96..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/width9_new.c +++ /dev/null @@ -1,42 +0,0 @@ -#include -#include -#define SIZE 9 - -void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { - c_out[0] = a_in[0] + b_in[0]; - c_out[1] = a_in[1] + b_in[1]; - c_out[2] = a_in[2] + b_in[2]; - c_out[3] = a_in[3] + b_in[3]; - c_out[4] = a_in[4] + b_in[4]; - c_out[5] = a_in[5] + b_in[5]; - c_out[6] = a_in[6] + b_in[6]; - c_out[7] = a_in[7] + b_in[7]; - c_out[8] = a_in[8] + b_in[8]; -} - -int main(int argc, char **argv) { - float a_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - float b_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - float c_out[SIZE]; - sum(a_in, b_in, c_out); - assert(c_out[0] == 2); - assert(c_out[1] == 4); - assert(c_out[2] == 6); - assert(c_out[3] == 8); - assert(c_out[4] == 10); - assert(c_out[5] == 12); - assert(c_out[6] == 14); - assert(c_out[7] == 16); - assert(c_out[8] == 18); - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - printf("fifth: %f\n", c_out[4]); - printf("sixth: %f\n", c_out[5]); - printf("seventh: %f\n", c_out[6]); - printf("eight: %f\n", c_out[7]); - printf("ninth: %f\n", c_out[8]); - // expected: 2, 4, 6, 8, 10, 12, 14, 16, 18 - return 0; -} From 87951be4891959cd829b2e1f8bf4a79a9c9529c6 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 15 Dec 2021 00:22:26 -0500 Subject: [PATCH 017/143] add failed test directoryy --- src/dios-egraphs/Diospyros/{llvm-tests => fail-tests}/fft-FAIL.c | 0 .../Diospyros/{llvm-tests => fail-tests}/qr-decomp-FAIL.c | 0 .../qr-decomp-fixed-size-FAIL.c} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename src/dios-egraphs/Diospyros/{llvm-tests => fail-tests}/fft-FAIL.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => fail-tests}/qr-decomp-FAIL.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests/qr-decomp-fixed-size-FAIl.c => fail-tests/qr-decomp-fixed-size-FAIL.c} (100%) diff --git a/src/dios-egraphs/Diospyros/llvm-tests/fft-FAIL.c b/src/dios-egraphs/Diospyros/fail-tests/fft-FAIL.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/fft-FAIL.c rename to src/dios-egraphs/Diospyros/fail-tests/fft-FAIL.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-FAIL.c b/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-FAIL.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-FAIL.c rename to src/dios-egraphs/Diospyros/fail-tests/qr-decomp-FAIL.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-fixed-size-FAIl.c b/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-fixed-size-FAIL.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-fixed-size-FAIl.c rename to src/dios-egraphs/Diospyros/fail-tests/qr-decomp-fixed-size-FAIL.c From 0f1bdcceffef3a0a34587f735f2e5558e3cbeb4a Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 15 Dec 2021 00:32:24 -0500 Subject: [PATCH 018/143] fix run all --- src/dios-egraphs/Diospyros/run_all.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dios-egraphs/Diospyros/run_all.sh b/src/dios-egraphs/Diospyros/run_all.sh index e9992c70..241b3ed5 100644 --- a/src/dios-egraphs/Diospyros/run_all.sh +++ b/src/dios-egraphs/Diospyros/run_all.sh @@ -1,5 +1,4 @@ for file in llvm-tests/*.c do make run test="$file" - ./a.out done \ No newline at end of file From 2f273b60316fcc97dc36302cfa4d065684becac6 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 17 Dec 2021 01:50:30 -0800 Subject: [PATCH 019/143] add 1d matrix multiply, add random q-prod and point-product; random tests both fail on fourth index --- .../fail-tests/point-product-random-FAIL.c | 90 ++++++++++++++ .../Diospyros/fail-tests/q-prod-random-FAIL.c | 117 ++++++++++++++++++ .../1d-12-by-12-random-matrix-multiply.c | 68 ++++++++++ .../1d-25-by-25-random-matrix-multiply.c | 68 ++++++++++ 4 files changed, 343 insertions(+) create mode 100644 src/dios-egraphs/Diospyros/fail-tests/point-product-random-FAIL.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/q-prod-random-FAIL.c create mode 100644 src/dios-egraphs/Diospyros/randomized-tests/1d-12-by-12-random-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/randomized-tests/1d-25-by-25-random-matrix-multiply.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/point-product-random-FAIL.c b/src/dios-egraphs/Diospyros/fail-tests/point-product-random-FAIL.c new file mode 100644 index 00000000..cf056fe4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/point-product-random-FAIL.c @@ -0,0 +1,90 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 4 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void cross_product(float lhs[3], float rhs[3], float result[3]) + __attribute__((always_inline)); + +void cross_product(float lhs[3], float rhs[3], float result[3]) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +void point_product(float q_in[4], float p_in[4], float result_out[4]) { + float qvec[3] = {q_in[0], q_in[1], q_in[2]}; + // qvec = {0, 1, 2} + + float uv[3]; + cross_product(qvec, p_in, uv); + // uv = {1 * 2 - 2 * 1, 2 * 0 - 0 * 2, 0 * 1 - 1 * 0} = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + // uv = {0, 0 , 0} + float qxuv[3]; + cross_product(qvec, uv, qxuv); + // qxuv = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + result_out[i] = p_in[i] + q_in[3] * uv[i] + qxuv[i]; + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float q_in[SIZE]; + for (int i = 0; i < SIZE; i++) { + q_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float p_in[SIZE]; + for (int i = 0; i < SIZE; i++) { + p_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float result_out[SIZE]; + for (int i = 0; i < SIZE; i++) { + result_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[SIZE]; + for (int i = 0; i < SIZE; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + point_product(q_in, p_in, result_out); + float qvec[3] = {q_in[0], q_in[1], q_in[2]}; + // qvec = {0, 1, 2} + + float uv[3]; + cross_product(qvec, p_in, uv); + // uv = {1 * 2 - 2 * 1, 2 * 0 - 0 * 2, 0 * 1 - 1 * 0} = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + // uv = {0, 0 , 0} + float qxuv[3]; + cross_product(qvec, uv, qxuv); + // qxuv = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + expected[i] = p_in[i] + q_in[3] * uv[i] + qxuv[i]; + } + for (int i = 0; i < 4; i++) { + printf("Calculated: %f\n", result_out[i]); + printf("Expected: %f\n", expected[i]); + assert(fabs(expected[i] - result_out[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/q-prod-random-FAIL.c b/src/dios-egraphs/Diospyros/fail-tests/q-prod-random-FAIL.c new file mode 100644 index 00000000..89879252 --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/q-prod-random-FAIL.c @@ -0,0 +1,117 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 4 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +__attribute__((always_inline)) void naive_cross_product(float *lhs, float *rhs, + float *result) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +__attribute__((always_inline)) void naive_point_product(float *q, float *p, + float *result) { + float qvec[3] = {q[0], q[1], q[2]}; + float uv[3]; + naive_cross_product(qvec, p, uv); + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + float qxuv[3]; + naive_cross_product(qvec, uv, qxuv); + + for (int i = 0; i < 3; i++) { + result[i] = p[i] + q[3] * uv[i] + qxuv[i]; + } +} + +void naive_quaternion_product(float *a_q, float *a_t, float *b_q, float *b_t, + float *r_q, float *r_t) { + r_q[3] = + a_q[3] * b_q[3] - a_q[0] * b_q[0] - a_q[1] * b_q[1] - a_q[2] * b_q[2]; + r_q[0] = + a_q[3] * b_q[0] + a_q[0] * b_q[3] + a_q[1] * b_q[2] - a_q[2] * b_q[1]; + r_q[1] = + a_q[3] * b_q[1] + a_q[1] * b_q[3] + a_q[2] * b_q[0] - a_q[0] * b_q[2]; + r_q[2] = + a_q[3] * b_q[2] + a_q[2] * b_q[3] + a_q[0] * b_q[1] - a_q[1] * b_q[0]; + + naive_point_product(a_q, b_t, r_t); + for (int i = 0; i < 3; i++) { + r_t[i] += a_t[i]; + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float a_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + a_q[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float a_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + a_t[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float b_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + b_q[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float b_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + b_t[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float r_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + r_q[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float r_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + r_t[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expectedq[SIZE]; + for (int i = 0; i < SIZE; i++) { + expectedq[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expectedt[SIZE]; + for (int i = 0; i < SIZE; i++) { + expectedt[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + naive_quaternion_product(a_q, a_t, b_q, b_t, r_q, r_t); + expectedq[3] = + a_q[3] * b_q[3] - a_q[0] * b_q[0] - a_q[1] * b_q[1] - a_q[2] * b_q[2]; + expectedq[0] = + a_q[3] * b_q[0] + a_q[0] * b_q[3] + a_q[1] * b_q[2] - a_q[2] * b_q[1]; + expectedq[1] = + a_q[3] * b_q[1] + a_q[1] * b_q[3] + a_q[2] * b_q[0] - a_q[0] * b_q[2]; + expectedq[2] = + a_q[3] * b_q[2] + a_q[2] * b_q[3] + a_q[0] * b_q[1] - a_q[1] * b_q[0]; + + naive_point_product(a_q, b_t, expectedt); + for (int i = 0; i < 3; i++) { + expectedt[i] += a_t[i]; + } + for (int i = 0; i < SIZE; i++) { + printf("Calculated r: %f\n", r_q[i]); + printf("Expected r: %f\n", expectedq[i]); + assert(fabs(expectedq[i] - r_q[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Calculated t: %f\n", r_t[i]); + printf("Expected t: %f\n", expectedt[i]); + assert(fabs(expectedt[i] - r_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/1d-12-by-12-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/randomized-tests/1d-12-by-12-random-matrix-multiply.c new file mode 100644 index 00000000..aae33dda --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/1d-12-by-12-random-matrix-multiply.c @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include + +#define A_ROWS 12 +#define A_COLS 12 +#define B_COLS 12 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void matrix_multiply(float a_in[A_ROWS * A_COLS], float b_in[A_COLS * B_COLS], + float c_out[A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // calculate up c_out + matrix_multiply(a_in, b_in, c_out); + // calculate expected + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + expected[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + expected[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } + // check expected == output + for (int i = 0; i < A_ROWS * B_COLS; i++) { + printf("calculated: %f\n", c_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - c_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/1d-25-by-25-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/randomized-tests/1d-25-by-25-random-matrix-multiply.c new file mode 100644 index 00000000..cd678956 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/1d-25-by-25-random-matrix-multiply.c @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include + +#define A_ROWS 25 +#define A_COLS 25 +#define B_COLS 25 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void matrix_multiply(float a_in[A_ROWS * A_COLS], float b_in[A_COLS * B_COLS], + float c_out[A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // calculate up c_out + matrix_multiply(a_in, b_in, c_out); + // calculate expected + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + expected[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + expected[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } + // check expected == output + for (int i = 0; i < A_ROWS * B_COLS; i++) { + printf("calculated: %f\n", c_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - c_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file From 248050523896108a5219581556e6ad7d5668e70d Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 19 Jan 2022 17:34:39 -0500 Subject: [PATCH 020/143] change bounds for point-product-random and q-prod-random Why were the vectors length 4 but only the first 3 entries being filled? Only check first 3 entries now; with random filling, 4th entries will highly likely differ --- .../Diospyros/fail-tests/point-product-random-FAIL.c | 2 +- src/dios-egraphs/Diospyros/fail-tests/q-prod-random-FAIL.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dios-egraphs/Diospyros/fail-tests/point-product-random-FAIL.c b/src/dios-egraphs/Diospyros/fail-tests/point-product-random-FAIL.c index cf056fe4..a41838e3 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/point-product-random-FAIL.c +++ b/src/dios-egraphs/Diospyros/fail-tests/point-product-random-FAIL.c @@ -82,7 +82,7 @@ int main(void) { for (int i = 0; i < 3; i++) { expected[i] = p_in[i] + q_in[3] * uv[i] + qxuv[i]; } - for (int i = 0; i < 4; i++) { + for (int i = 0; i < 3; i++) { printf("Calculated: %f\n", result_out[i]); printf("Expected: %f\n", expected[i]); assert(fabs(expected[i] - result_out[i]) < DELTA); diff --git a/src/dios-egraphs/Diospyros/fail-tests/q-prod-random-FAIL.c b/src/dios-egraphs/Diospyros/fail-tests/q-prod-random-FAIL.c index 89879252..403efae3 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/q-prod-random-FAIL.c +++ b/src/dios-egraphs/Diospyros/fail-tests/q-prod-random-FAIL.c @@ -105,11 +105,11 @@ int main(void) { expectedt[i] += a_t[i]; } for (int i = 0; i < SIZE; i++) { - printf("Calculated r: %f\n", r_q[i]); - printf("Expected r: %f\n", expectedq[i]); + printf("Calculated q: %f\n", r_q[i]); + printf("Expected q: %f\n", expectedq[i]); assert(fabs(expectedq[i] - r_q[i]) < DELTA); } - for (int i = 0; i < SIZE; i++) { + for (int i = 0; i < 3; i++) { printf("Calculated t: %f\n", r_t[i]); printf("Expected t: %f\n", expectedt[i]); assert(fabs(expectedt[i] - r_t[i]) < DELTA); From 68ad7c400ccaf39a751f9ea3b3182956c50913b9 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 19 Jan 2022 23:50:15 -0500 Subject: [PATCH 021/143] move non failing tests Reorganize point product, q-prod to random tests --- .../point-product-random.c} | 0 .../q-prod-random-FAIL.c => randomized-tests/q-prod-random.c} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/dios-egraphs/Diospyros/{fail-tests/point-product-random-FAIL.c => randomized-tests/point-product-random.c} (100%) rename src/dios-egraphs/Diospyros/{fail-tests/q-prod-random-FAIL.c => randomized-tests/q-prod-random.c} (100%) diff --git a/src/dios-egraphs/Diospyros/fail-tests/point-product-random-FAIL.c b/src/dios-egraphs/Diospyros/randomized-tests/point-product-random.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/point-product-random-FAIL.c rename to src/dios-egraphs/Diospyros/randomized-tests/point-product-random.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/q-prod-random-FAIL.c b/src/dios-egraphs/Diospyros/randomized-tests/q-prod-random.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/q-prod-random-FAIL.c rename to src/dios-egraphs/Diospyros/randomized-tests/q-prod-random.c From 1d85649f5f03044197801e56fe565c7171415669 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 20 Jan 2022 19:30:42 -0500 Subject: [PATCH 022/143] malloc and calloc simple tests work --- .../Diospyros/llvm-tests/calloc.c | 30 +++++++++++++++++++ .../Diospyros/llvm-tests/malloc.c | 30 +++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/calloc.c create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/malloc.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/calloc.c b/src/dios-egraphs/Diospyros/llvm-tests/calloc.c new file mode 100644 index 00000000..d8e0f10a --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/calloc.c @@ -0,0 +1,30 @@ +#include +#include +#define SIZE 4 + +void calloc_func(int m, float q_out[SIZE][SIZE]) { + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + q_min[i * m + j] = 10.0f; + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + q_out[i][j] = q_min[i * m + j]; + } + } +} + +int main(int argc, char **argv) { + float q_out[SIZE][SIZE] = { + {1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}; + calloc_func(SIZE, q_out); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("q_out: %f\n", q_out[i][j]); + assert(q_out[i][j] == 10); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/malloc.c b/src/dios-egraphs/Diospyros/llvm-tests/malloc.c new file mode 100644 index 00000000..fb91400c --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/malloc.c @@ -0,0 +1,30 @@ +#include +#include +#define SIZE 4 + +void malloc_func(int m, float q_out[SIZE][SIZE]) { + float *q_min = (float *)malloc(sizeof(float) * m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + q_min[i * m + j] = 10.0f; + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + q_out[i][j] = q_min[i * m + j]; + } + } +} + +int main(int argc, char **argv) { + float q_out[SIZE][SIZE] = { + {1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}; + malloc_func(SIZE, q_out); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("q_out: %f\n", q_out[i][j]); + assert(q_out[i][j] == 10); + } + } + return 0; +} \ No newline at end of file From eb7404a7e361fc16dedc48a795042caee8ab13c4 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 28 Jan 2022 17:49:24 -0500 Subject: [PATCH 023/143] checkpoint for debugging qr-decomp fixed size --- src/dios-egraphs/Diospyros/Makefile | 4 ++++ src/dios-egraphs/Diospyros/runt.py | 18 ++++++++++++++++++ src/dios-egraphs/Diospyros/runt.toml | 2 +- src/dios-egraphs/Diospyros/test.sh | 16 ++++++++++++++++ 4 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 src/dios-egraphs/Diospyros/runt.py create mode 100644 src/dios-egraphs/Diospyros/test.sh diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index dd5bc618..aad22710 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -68,6 +68,10 @@ no-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp $(CLANG) build/dce.ll -o build/final build/final +test-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp + $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone $(test) | opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce | opt -S --cfl-steens-aa | $(CLANG) -x ir -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=false - | opt -S --adce --dse | $(CLANG) -x ir -o build/final - + build/final + test: set-up-mac runt.sh runt.toml target/debug/libllvmlib.$(EXT) runt diff --git a/src/dios-egraphs/Diospyros/runt.py b/src/dios-egraphs/Diospyros/runt.py new file mode 100644 index 00000000..8d7bb4ae --- /dev/null +++ b/src/dios-egraphs/Diospyros/runt.py @@ -0,0 +1,18 @@ +import click +import subprocess +import sys + + +@click.command() +@click.argument('test_file', + type=click.Path(exists=True), + metavar='') +def run(test_file): + test_path = [f"test={test_file}"] + cmd = subprocess.run(["make", "run-opt"] + test_path, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + sys.stdout.write(cmd.stdout.decode('utf-8')) + + +if __name__ == "__main__": + run() diff --git a/src/dios-egraphs/Diospyros/runt.toml b/src/dios-egraphs/Diospyros/runt.toml index 19995b09..f8580c4a 100644 --- a/src/dios-egraphs/Diospyros/runt.toml +++ b/src/dios-egraphs/Diospyros/runt.toml @@ -3,4 +3,4 @@ ver = "0.3.1" [[tests]] name = "llvm-diospyros tests" paths = [ "llvm-tests/*.c" ] -cmd = "bash runt.sh {}" \ No newline at end of file +cmd = "python3 runt.py {}" \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/test.sh b/src/dios-egraphs/Diospyros/test.sh new file mode 100644 index 00000000..39e18e85 --- /dev/null +++ b/src/dios-egraphs/Diospyros/test.sh @@ -0,0 +1,16 @@ +FILE=target/debug/libllvmlib.so + +if ! [ -f $FILE ]; then + FILE=target/debug/libllvmlib.dylib +fi + +if [[ "$OSTYPE" == "darwin"* ]]; then + CLANG=/usr/local/opt/llvm/bin/clang +else + CLANG=clang +fi + +TEST=./llvm-tests/add.c + +$CLANG -emit-llvm -S -Xclang -disable-O0-optnone $TEST \ +| opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce -o build/sample - From 4e79e07cec798037c4fb93a456df5d1753122b96 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 28 Jan 2022 19:10:47 -0500 Subject: [PATCH 024/143] fix up no-opt annotated functions --- .../Diospyros/LoadStoreMovement.cpp | 3 +- src/dios-egraphs/Diospyros/diospyros.cpp | 5 +- .../fail-tests/qr-decomp-fixed-size-debug.c | 254 ++++++++++++++++++ 3 files changed, 259 insertions(+), 3 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/fail-tests/qr-decomp-fixed-size-debug.c diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index bd98073a..6735e435 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -256,7 +256,8 @@ struct LoadStoreMovementPass : public FunctionPass { * * We gr */ - if (F.getName() == "main") { + if (F.getName() == "main" || + (F.getName().size() > 7 && F.getName().substr(0, 7) == "no_opt_")) { return false; } rewrite_stores(F); diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index 730636ba..120dc8c9 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -511,8 +511,9 @@ struct DiospyrosPass : public FunctionPass { DiospyrosPass() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F) override { - // do not optimize on main function. - if (F.getName() == "main") { + // do not optimize on main function or no_opt functions. + if (F.getName() == "main" || + (F.getName().size() > 7 && F.getName().substr(0, 7) == "no_opt_")) { return false; } bool has_changes = false; diff --git a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-fixed-size-debug.c b/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-fixed-size-debug.c new file mode 100644 index 00000000..889076ac --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-fixed-size-debug.c @@ -0,0 +1,254 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], + float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1, 2, 3, 4}; + float Q[SIZE * SIZE] = {0, 0, 0, 0}; + float R[SIZE * SIZE] = {0, 0, 0, 0}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0, 0, 0, 0}; + float expectedR[SIZE * SIZE] = {0, 0, 0, 0}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + } + } +} \ No newline at end of file From c0fea25d8c25ffa9f3bf0de8154047761783e147 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 28 Jan 2022 19:16:07 -0500 Subject: [PATCH 025/143] revert all runt failures with FAIL marker --- src/dios-egraphs/Diospyros/Makefile | 4 ---- src/dios-egraphs/Diospyros/{runt.py => runt_FAIL.py} | 0 src/dios-egraphs/Diospyros/{test.sh => test_FAIL.sh} | 0 3 files changed, 4 deletions(-) rename src/dios-egraphs/Diospyros/{runt.py => runt_FAIL.py} (100%) rename src/dios-egraphs/Diospyros/{test.sh => test_FAIL.sh} (100%) diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index aad22710..dd5bc618 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -68,10 +68,6 @@ no-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp $(CLANG) build/dce.ll -o build/final build/final -test-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone $(test) | opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce | opt -S --cfl-steens-aa | $(CLANG) -x ir -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=false - | opt -S --adce --dse | $(CLANG) -x ir -o build/final - - build/final - test: set-up-mac runt.sh runt.toml target/debug/libllvmlib.$(EXT) runt diff --git a/src/dios-egraphs/Diospyros/runt.py b/src/dios-egraphs/Diospyros/runt_FAIL.py similarity index 100% rename from src/dios-egraphs/Diospyros/runt.py rename to src/dios-egraphs/Diospyros/runt_FAIL.py diff --git a/src/dios-egraphs/Diospyros/test.sh b/src/dios-egraphs/Diospyros/test_FAIL.sh similarity index 100% rename from src/dios-egraphs/Diospyros/test.sh rename to src/dios-egraphs/Diospyros/test_FAIL.sh From 0fae9b83c16e2e0ac9ef124c242e6ce6deab5b3a Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 28 Jan 2022 19:17:33 -0500 Subject: [PATCH 026/143] revert runt.toml --- src/dios-egraphs/Diospyros/runt.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dios-egraphs/Diospyros/runt.toml b/src/dios-egraphs/Diospyros/runt.toml index f8580c4a..19995b09 100644 --- a/src/dios-egraphs/Diospyros/runt.toml +++ b/src/dios-egraphs/Diospyros/runt.toml @@ -3,4 +3,4 @@ ver = "0.3.1" [[tests]] name = "llvm-diospyros tests" paths = [ "llvm-tests/*.c" ] -cmd = "python3 runt.py {}" \ No newline at end of file +cmd = "bash runt.sh {}" \ No newline at end of file From 5977c0d67da1e8cc50e742e6c06fe8375ef79138 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 3 Feb 2022 19:53:32 -0500 Subject: [PATCH 027/143] add regression test --- .../fail-tests/modified_qr_1_regress_PASS.c | 181 ++++++++++++ .../fail-tests/simplified_qr_decomp.c | 276 ++++++++++++++++++ .../Diospyros/llvm-tests/identity_matrix.c | 10 + 3 files changed, 467 insertions(+) create mode 100644 src/dios-egraphs/Diospyros/fail-tests/modified_qr_1_regress_PASS.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/simplified_qr_decomp.c create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/identity_matrix.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/modified_qr_1_regress_PASS.c b/src/dios-egraphs/Diospyros/fail-tests/modified_qr_1_regress_PASS.c new file mode 100644 index 00000000..04841b87 --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/modified_qr_1_regress_PASS.c @@ -0,0 +1,181 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], + float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // No Calloc is used here. + float I[SIZE * SIZE] = {0}; + // float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + float q_t[SIZE * SIZE] = {1}; + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float res[SIZE * SIZE] = {0}; + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + float I[SIZE * SIZE] = {0}; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + float q_t[SIZE * SIZE] = {1}; + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float res[SIZE * SIZE] = {0}; + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + } + naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1, 2, 3, 4}; + float Q[SIZE * SIZE] = {0, 0, 0, 0}; + float R[SIZE * SIZE] = {0, 0, 0, 0}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0, 0, 0, 0}; + float expectedR[SIZE * SIZE] = {0, 0, 0, 0}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/simplified_qr_decomp.c b/src/dios-egraphs/Diospyros/fail-tests/simplified_qr_decomp.c new file mode 100644 index 00000000..a8c75e3c --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/simplified_qr_decomp.c @@ -0,0 +1,276 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], + float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // No Calloc is used here. + float I[SIZE * SIZE] = {0}; + // float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + // int m = SIZE - k; + + // float x[SIZE] = {0}; + // float e[SIZE] = {0}; + // for (int i = 0; i < m; i++) { + // int row = k + i; + // x[i] = R[row * SIZE + k]; + // e[i] = I[row * SIZE + k]; + // } + + // float alpha = -sgn(x[0]) * naive_norm(x, m); + + // float u[SIZE] = {0}; + // float v[SIZE] = {0}; + // for (int i = 0; i < m; i++) { + // u[i] = x[i] + alpha * e[i]; + // } + // float norm_u = naive_norm(u, m); + // for (int i = 0; i < m; i++) { + // v[i] = u[i] / (norm_u + 0.00001f); + // } + + // float q_min[m * m]; + // for (int i = 0; i < m; i++) { + // for (int j = 0; j < m; j++) { + // q_min[i * m + j] = 0.0f; + // } + // } + // for (int i = 0; i < m; i++) { + // for (int j = 0; j < m; j++) { + // float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + // q_min[i * m + j] = q_min_i; + // } + // } + + // float q_t[SIZE * SIZE] = {0}; + // for (int i = 0; i < SIZE; i++) { + // for (int j = 0; j < SIZE; j++) { + // float q_t_i; + // if ((i < k) || (j < k)) { + // q_t_i = (i == j) ? 1.0f : 0.0f; + // } else { + // q_t_i = q_min[(i - k) * m + (j - k)]; + // } + // q_t[i * SIZE + j] = q_t_i; + // } + // } + float q_t[SIZE * SIZE] = {1}; + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float res[SIZE * SIZE] = {0}; + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + float I[SIZE * SIZE] = {0}; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + // int m = SIZE - k; + + // float x[SIZE] = {0}; + // float e[SIZE] = {0}; + // for (int i = 0; i < m; i++) { + // int row = k + i; + // x[i] = R[row * SIZE + k]; + // e[i] = I[row * SIZE + k]; + // } + + // float alpha = -sgn(x[0]) * naive_norm(x, m); + + // float u[SIZE] = {0}; + // float v[SIZE] = {0}; + // for (int i = 0; i < m; i++) { + // u[i] = x[i] + alpha * e[i]; + // } + // float norm_u = naive_norm(u, m); + // for (int i = 0; i < m; i++) { + // v[i] = u[i] / (norm_u + 0.00001f); + // } + + // float q_min[m * m]; + // for (int i = 0; i < m; i++) { + // for (int j = 0; j < m; j++) { + // q_min[i * m + j] = 0.0f; + // } + // } + // for (int i = 0; i < m; i++) { + // for (int j = 0; j < m; j++) { + // float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + // q_min[i * m + j] = q_min_i; + // } + // } + + // float q_t[SIZE * SIZE] = {0}; + // for (int i = 0; i < SIZE; i++) { + // for (int j = 0; j < SIZE; j++) { + // float q_t_i; + // if ((i < k) || (j < k)) { + // q_t_i = (i == j) ? 1.0f : 0.0f; + // } else { + // q_t_i = q_min[(i - k) * m + (j - k)]; + // } + // q_t[i * SIZE + j] = q_t_i; + // } + // } + + float q_t[SIZE * SIZE] = {1}; + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float res[SIZE * SIZE] = {0}; + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + } + naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1, 2, 3, 4}; + float Q[SIZE * SIZE] = {0, 0, 0, 0}; + float R[SIZE * SIZE] = {0, 0, 0, 0}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0, 0, 0, 0}; + float expectedR[SIZE * SIZE] = {0, 0, 0, 0}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/identity_matrix.c b/src/dios-egraphs/Diospyros/llvm-tests/identity_matrix.c new file mode 100644 index 00000000..35f63b3e --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/identity_matrix.c @@ -0,0 +1,10 @@ +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } From 0043d42f81cb43630491229d69c9d06895e206ba Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Tue, 8 Feb 2022 21:30:02 -0500 Subject: [PATCH 028/143] add regression test for type error --- .../fail-tests/simplified_qr_decomp.c | 60 +++++++------ src/dios-egraphs/Diospyros/fail-tests/test1.c | 84 +++++++++++++++++++ src/dios-egraphs/Diospyros/src/lib.rs | 5 ++ 3 files changed, 125 insertions(+), 24 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/fail-tests/test1.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/simplified_qr_decomp.c b/src/dios-egraphs/Diospyros/fail-tests/simplified_qr_decomp.c index a8c75e3c..da3cf046 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/simplified_qr_decomp.c +++ b/src/dios-egraphs/Diospyros/fail-tests/simplified_qr_decomp.c @@ -91,7 +91,7 @@ void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { // Build identity matrix of size SIZE * SIZE // No Calloc is used here. - float I[SIZE * SIZE] = {0}; + float I[SIZE * SIZE] = {0.0f}; // float *I = (float *)calloc(sizeof(float), SIZE * SIZE); for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { @@ -101,17 +101,23 @@ void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { // Householder for (int k = 0; k < SIZE - 1; k++) { - // int m = SIZE - k; + int32_t m = SIZE - k; - // float x[SIZE] = {0}; - // float e[SIZE] = {0}; - // for (int i = 0; i < m; i++) { - // int row = k + i; - // x[i] = R[row * SIZE + k]; - // e[i] = I[row * SIZE + k]; - // } + float x[m]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + float e[m]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = 1.0f; + e[i] = 2.0f; + } - // float alpha = -sgn(x[0]) * naive_norm(x, m); + float alpha = -sgn(x[0]) * naive_norm(x, m); // float u[SIZE] = {0}; // float v[SIZE] = {0}; @@ -148,14 +154,14 @@ void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { // q_t[i * SIZE + j] = q_t_i; // } // } - float q_t[SIZE * SIZE] = {1}; + float q_t[SIZE * SIZE] = {alpha}; if (k == 0) { for (int i = 0; i < SIZE * SIZE; i++) { Q[i] = q_t[i]; } no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A } else { - float res[SIZE * SIZE] = {0}; + float res[SIZE * SIZE] = {0.0f}; no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A for (int i = 0; i < SIZE * SIZE; i++) { Q[i] = res[i]; @@ -174,7 +180,7 @@ void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { R[i] = A[i]; } - float I[SIZE * SIZE] = {0}; + float I[SIZE * SIZE] = {0.0f}; for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { I[i * SIZE + j] = (i == j); @@ -183,17 +189,23 @@ void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { // Householder for (int k = 0; k < SIZE - 1; k++) { - // int m = SIZE - k; + int32_t m = SIZE - k; - // float x[SIZE] = {0}; - // float e[SIZE] = {0}; - // for (int i = 0; i < m; i++) { - // int row = k + i; - // x[i] = R[row * SIZE + k]; - // e[i] = I[row * SIZE + k]; - // } + float x[m]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + float e[m]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = 1.0f; + e[i] = 2.0f; + } - // float alpha = -sgn(x[0]) * naive_norm(x, m); + float alpha = -sgn(x[0]) * naive_norm(x, m); // float u[SIZE] = {0}; // float v[SIZE] = {0}; @@ -231,14 +243,14 @@ void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { // } // } - float q_t[SIZE * SIZE] = {1}; + float q_t[SIZE * SIZE] = {alpha}; if (k == 0) { for (int i = 0; i < SIZE * SIZE; i++) { Q[i] = q_t[i]; } no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A } else { - float res[SIZE * SIZE] = {0}; + float res[SIZE * SIZE] = {0.0f}; no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A for (int i = 0; i < SIZE * SIZE; i++) { Q[i] = res[i]; diff --git a/src/dios-egraphs/Diospyros/fail-tests/test1.c b/src/dios-egraphs/Diospyros/fail-tests/test1.c new file mode 100644 index 00000000..e7b03401 --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/test1.c @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float x[m]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + float e[m]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + A[k] = alpha; + } +} + +void no_opt_sample_test(float A[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float x[m]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + float e[m]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + A[k] = alpha; + } +} + +int main(void) { + float A[SIZE] = {0}; + sample_test(A); + float expectedA[SIZE] = {0}; + no_opt_sample_test(expectedA); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 46874b27..5ba7d793 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -2183,6 +2183,11 @@ unsafe fn egg_to_llvm( } else { let cloned_addr = LLVMInstructionClone(*addr); let new_addr = llvm_recursive_add(builder, cloned_addr); + LLVMDumpType(LLVMTypeOf(extracted_value)); + println!(); + LLVMDumpType(LLVMTypeOf(new_addr)); + println!(); + assert!(LLVMTypeOf(extracted_value) == LLVMGetElementType(LLVMTypeOf(new_addr))); // LLVMReplaceAllUsesWith(*addr, new_addr); LLVMBuildStore(builder, extracted_value, new_addr); } From 0cda76cdf31162aa8b78ab0ea97d1ef4972832a3 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 9 Feb 2022 21:21:43 -0500 Subject: [PATCH 029/143] test 1 works, but have to manually insert type conversions in code TODO: check whether dynamically sized arrays can be used --- src/dios-egraphs/Diospyros/fail-tests/test1.c | 8 ++-- src/dios-egraphs/Diospyros/src/lib.rs | 42 ++++++++++++++++--- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/src/dios-egraphs/Diospyros/fail-tests/test1.c b/src/dios-egraphs/Diospyros/fail-tests/test1.c index e7b03401..53be1a54 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test1.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test1.c @@ -32,11 +32,11 @@ void sample_test(float A[SIZE]) { for (int k = 0; k < SIZE - 1; k++) { int m = SIZE - k; - float x[m]; + float x[SIZE]; for (int i = 0; i < m; i++) { x[i] = 0.0f; } - float e[m]; + float e[SIZE]; for (int i = 0; i < m; i++) { e[i] = 0.0f; } @@ -54,11 +54,11 @@ void no_opt_sample_test(float A[SIZE]) { for (int k = 0; k < SIZE - 1; k++) { int m = SIZE - k; - float x[m]; + float x[SIZE]; for (int i = 0; i < m; i++) { x[i] = 0.0f; } - float e[m]; + float e[SIZE]; for (int i = 0; i < m; i++) { e[i] = 0.0f; } diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 5ba7d793..23af4dd2 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -2126,6 +2126,23 @@ unsafe fn translate_egg( return instr; } +unsafe fn gen_type_cast( + val: LLVMValueRef, + typ1: LLVMTypeRef, + typ2: LLVMTypeRef, + context: LLVMContextRef, + builder: LLVMBuilderRef, +) -> LLVMValueRef { + if typ1 == LLVMInt32TypeInContext(context) && typ2 == LLVMInt64TypeInContext(context) { + return LLVMBuildZExt(builder, val, typ2, b"\0".as_ptr() as *const _); + } else if typ1 == LLVMInt16TypeInContext(context) && typ2 == LLVMInt64TypeInContext(context) { + return LLVMBuildZExt(builder, val, typ2, b"\0".as_ptr() as *const _); + } else if typ1 == LLVMInt16TypeInContext(context) && typ2 == LLVMInt32TypeInContext(context) { + return LLVMBuildZExt(builder, val, typ2, b"\0".as_ptr() as *const _); + } + panic!("Cannot convert between {:?} {:?}\n.", typ1, typ2); +} + unsafe fn egg_to_llvm( expr: RecExpr, gep_map: &GEPMap, @@ -2179,16 +2196,31 @@ unsafe fn egg_to_llvm( ); } if isa_argument(*addr) { + if LLVMTypeOf(extracted_value) != LLVMGetElementType(LLVMTypeOf(*addr)) { + extracted_value = gen_type_cast( + extracted_value, + LLVMTypeOf(extracted_value), + LLVMGetElementType(LLVMTypeOf(*addr)), + context, + builder, + ); + } + assert!(LLVMTypeOf(extracted_value) == LLVMGetElementType(LLVMTypeOf(*addr))); LLVMBuildStore(builder, extracted_value, *addr); } else { let cloned_addr = LLVMInstructionClone(*addr); let new_addr = llvm_recursive_add(builder, cloned_addr); - LLVMDumpType(LLVMTypeOf(extracted_value)); - println!(); - LLVMDumpType(LLVMTypeOf(new_addr)); - println!(); - assert!(LLVMTypeOf(extracted_value) == LLVMGetElementType(LLVMTypeOf(new_addr))); + if LLVMTypeOf(extracted_value) != LLVMGetElementType(LLVMTypeOf(new_addr)) { + extracted_value = gen_type_cast( + extracted_value, + LLVMTypeOf(extracted_value), + LLVMGetElementType(LLVMTypeOf(new_addr)), + context, + builder, + ); + } // LLVMReplaceAllUsesWith(*addr, new_addr); + assert!(LLVMTypeOf(extracted_value) == LLVMGetElementType(LLVMTypeOf(new_addr))); LLVMBuildStore(builder, extracted_value, new_addr); } } From 8243d8736c7a7aed768de370a603311a3563d467 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 9 Feb 2022 21:25:04 -0500 Subject: [PATCH 030/143] dynamically sized arrays in test2 work --- src/dios-egraphs/Diospyros/fail-tests/test2.c | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 src/dios-egraphs/Diospyros/fail-tests/test2.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/test2.c b/src/dios-egraphs/Diospyros/fail-tests/test2.c new file mode 100644 index 00000000..f55594ac --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/test2.c @@ -0,0 +1,86 @@ +// Modification of test 1, with dynamically sized arrays. + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float x[m]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + float e[m]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + A[k] = alpha; + } +} + +void no_opt_sample_test(float A[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float x[SIZE]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + float e[SIZE]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + A[k] = alpha; + } +} + +int main(void) { + float A[SIZE] = {0}; + sample_test(A); + float expectedA[SIZE] = {0}; + no_opt_sample_test(expectedA); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + } +} \ No newline at end of file From cdd3441477b47f94a4776441d73203800b5ecc94 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 9 Feb 2022 21:27:51 -0500 Subject: [PATCH 031/143] add test1/test2 asserts --- src/dios-egraphs/Diospyros/fail-tests/test1.c | 2 ++ src/dios-egraphs/Diospyros/fail-tests/test2.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/dios-egraphs/Diospyros/fail-tests/test1.c b/src/dios-egraphs/Diospyros/fail-tests/test1.c index 53be1a54..717dcd71 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test1.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test1.c @@ -7,6 +7,7 @@ #include #define SIZE 2 +#define DELTA 0.1f float sgn(float v) { return (v > 0) - (v < 0); } @@ -80,5 +81,6 @@ int main(void) { for (int i = 0; i < SIZE; i++) { printf("A Output: %f\n", A[i]); printf("Expected A Output: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test2.c b/src/dios-egraphs/Diospyros/fail-tests/test2.c index f55594ac..44866628 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test2.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test2.c @@ -9,6 +9,7 @@ #include #define SIZE 2 +#define DELTA 0.1f float sgn(float v) { return (v > 0) - (v < 0); } @@ -82,5 +83,6 @@ int main(void) { for (int i = 0; i < SIZE; i++) { printf("A Output: %f\n", A[i]); printf("Expected A Output: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); } } \ No newline at end of file From fa81f9366e7b6276296fde1c948a844869d1792e Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 9 Feb 2022 23:22:02 -0500 Subject: [PATCH 032/143] add test 3 Storage to Q is not working --- .../fail-tests/simplified_qr_decomp.c | 48 ++++---- src/dios-egraphs/Diospyros/fail-tests/test3.c | 108 ++++++++++++++++++ 2 files changed, 133 insertions(+), 23 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/fail-tests/test3.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/simplified_qr_decomp.c b/src/dios-egraphs/Diospyros/fail-tests/simplified_qr_decomp.c index da3cf046..e20b96ac 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/simplified_qr_decomp.c +++ b/src/dios-egraphs/Diospyros/fail-tests/simplified_qr_decomp.c @@ -159,18 +159,19 @@ void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { for (int i = 0; i < SIZE * SIZE; i++) { Q[i] = q_t[i]; } - no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A - } else { - float res[SIZE * SIZE] = {0.0f}; - no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A - for (int i = 0; i < SIZE * SIZE; i++) { - Q[i] = res[i]; - } - no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A - for (int i = 0; i < SIZE * SIZE; i++) { - R[i] = res[i]; - } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A } + // else { + // float res[SIZE * SIZE] = {0.0f}; + // naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // for (int i = 0; i < SIZE * SIZE; i++) { + // Q[i] = res[i]; + // } + // naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // for (int i = 0; i < SIZE * SIZE; i++) { + // R[i] = res[i]; + // } + // } } naive_fixed_transpose(Q); } @@ -205,7 +206,7 @@ void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { e[i] = 2.0f; } - float alpha = -sgn(x[0]) * naive_norm(x, m); + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); // float u[SIZE] = {0}; // float v[SIZE] = {0}; @@ -249,19 +250,20 @@ void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { Q[i] = q_t[i]; } no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A - } else { - float res[SIZE * SIZE] = {0.0f}; - no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A - for (int i = 0; i < SIZE * SIZE; i++) { - Q[i] = res[i]; - } - no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A - for (int i = 0; i < SIZE * SIZE; i++) { - R[i] = res[i]; - } } + // else { + // float res[SIZE * SIZE] = {0.0f}; + // no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // for (int i = 0; i < SIZE * SIZE; i++) { + // Q[i] = res[i]; + // } + // no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // for (int i = 0; i < SIZE * SIZE; i++) { + // R[i] = res[i]; + // } + // } } - naive_fixed_transpose(Q); + no_opt_naive_fixed_transpose(Q); } int main(void) { diff --git a/src/dios-egraphs/Diospyros/fail-tests/test3.c b/src/dios-egraphs/Diospyros/fail-tests/test3.c new file mode 100644 index 00000000..825a92d1 --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/test3.c @@ -0,0 +1,108 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float Q[SIZE * SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float x[SIZE]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + float e[SIZE]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + A[k] = alpha; + + float q_t[SIZE * SIZE] = {alpha}; + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +void no_opt_sample_test(float A[SIZE], float Q[SIZE * SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float x[SIZE]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + float e[SIZE]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + A[k] = alpha; + + float q_t[SIZE * SIZE] = {alpha}; + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +int main(void) { + float A[SIZE] = {0}; + float Q[SIZE * SIZE] = {0}; + sample_test(A, Q); + float expectedA[SIZE] = {0}; + float expectedQ[SIZE * SIZE] = {0}; + no_opt_sample_test(expectedA, expectedQ); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + // assert(fabs(expectedA[i] - A[i]) < DELTA); + } + + for (int i = 0; i < SIZE * SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + // assert(fabs(expectedA[i] - A[i]) < DELTA); + } +} \ No newline at end of file From cf2154e28f8850a5c547570e696b14def6b47808 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 11 Feb 2022 13:05:10 -0500 Subject: [PATCH 033/143] add naive-norm and sgn inlining passes --- .../fail-tests/naive-norm-inline-PASS.c | 50 +++++++++++++++++++ .../Diospyros/fail-tests/sgn-inline-PASS.c | 42 ++++++++++++++++ src/dios-egraphs/Diospyros/fail-tests/test1.c | 3 ++ src/dios-egraphs/Diospyros/fail-tests/test2.c | 3 ++ src/dios-egraphs/Diospyros/fail-tests/test3.c | 14 +++--- 5 files changed, 106 insertions(+), 6 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/fail-tests/naive-norm-inline-PASS.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/sgn-inline-PASS.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/naive-norm-inline-PASS.c b/src/dios-egraphs/Diospyros/fail-tests/naive-norm-inline-PASS.c new file mode 100644 index 00000000..66605c05 --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/naive-norm-inline-PASS.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float B[SIZE - 1]) { + B[0] = naive_norm(A, SIZE); +} + +void no_opt_sample_test(float A[SIZE], float B[SIZE - 1]) { + B[0] = no_opt_naive_norm(A, SIZE); +} + +int main(void) { + float A[SIZE] = {1, 2, 3, 4, 5}; + float B[SIZE - 1] = {0}; + sample_test(A, B); + float expectedA[SIZE] = {1, 2, 3, 4, 5}; + float expectedB[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedB); + for (int i = 0; i < SIZE - 1; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/sgn-inline-PASS.c b/src/dios-egraphs/Diospyros/fail-tests/sgn-inline-PASS.c new file mode 100644 index 00000000..e982b0eb --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/sgn-inline-PASS.c @@ -0,0 +1,42 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +void sample_test(float A[SIZE], float B[SIZE]) { + for (int i = 0; i < SIZE; i++) { + B[i] = sgn(A[i]); + } +} + +void no_opt_sample_test(float A[SIZE], float B[SIZE]) { + for (int i = 0; i < SIZE; i++) { + B[i] = no_opt_sgn(A[i]); + } +} + +int main(void) { + float A[SIZE] = {1, -2, 0, -4, 5}; + float B[SIZE] = {0}; + sample_test(A, B); + float expectedA[SIZE] = {1, -2, 0, -4, 5}; + float expectedB[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedB); + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test1.c b/src/dios-egraphs/Diospyros/fail-tests/test1.c index 717dcd71..05b53f3d 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test1.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test1.c @@ -9,6 +9,9 @@ #define SIZE 2 #define DELTA 0.1f +// float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + float sgn(float v) { return (v > 0) - (v < 0); } float no_opt_sgn(float v) { return (v > 0) - (v < 0); } diff --git a/src/dios-egraphs/Diospyros/fail-tests/test2.c b/src/dios-egraphs/Diospyros/fail-tests/test2.c index 44866628..48370b89 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test2.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test2.c @@ -11,6 +11,9 @@ #define SIZE 2 #define DELTA 0.1f +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + float sgn(float v) { return (v > 0) - (v < 0); } float no_opt_sgn(float v) { return (v > 0) - (v < 0); } diff --git a/src/dios-egraphs/Diospyros/fail-tests/test3.c b/src/dios-egraphs/Diospyros/fail-tests/test3.c index 825a92d1..ec219b7a 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test3.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test3.c @@ -9,6 +9,9 @@ #define SIZE 2 #define DELTA 0.1f +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + float sgn(float v) { return (v > 0) - (v < 0); } float no_opt_sgn(float v) { return (v > 0) - (v < 0); } @@ -78,12 +81,11 @@ void no_opt_sample_test(float A[SIZE], float Q[SIZE * SIZE]) { float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); A[k] = alpha; - float q_t[SIZE * SIZE] = {alpha}; - if (k == 0) { - for (int i = 0; i < SIZE * SIZE; i++) { - Q[i] = q_t[i]; - } - } + // float q_t[SIZE * SIZE] = {alpha}; + + // for (int i = 0; i < SIZE * SIZE; i++) { + // Q[i] = q_t[i]; + // } } } From 626fd1b9f54e3c5a97d61d470f039ad2cff711cd Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 11 Feb 2022 13:14:35 -0500 Subject: [PATCH 034/143] sgn and naive norm inlining tests both pass No Loops Just use sgn and naive norm together Issue in test1 may be due to a nested loop : --- .../fail-tests/sgn-naive-norm-inline-PASS.c | 61 ++++++++++++++++++ .../fail-tests/sgn-naive-norm-inline2-PASS.c | 63 +++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 src/dios-egraphs/Diospyros/fail-tests/sgn-naive-norm-inline-PASS.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/sgn-naive-norm-inline2-PASS.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/sgn-naive-norm-inline-PASS.c b/src/dios-egraphs/Diospyros/fail-tests/sgn-naive-norm-inline-PASS.c new file mode 100644 index 00000000..c04592b6 --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/sgn-naive-norm-inline-PASS.c @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float naive_norm(float *x, int m) __attribute__((always_inline)); +float sgn(float v) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float B[SIZE]) { + B[0] = naive_norm(A, SIZE); + for (int i = 1; i < SIZE; i++) { + B[i] = sgn(A[i]); + } +} + +void no_opt_sample_test(float A[SIZE], float B[SIZE]) { + B[0] = no_opt_naive_norm(A, SIZE); + for (int i = 1; i < SIZE; i++) { + B[i] = no_opt_sgn(A[i]); + } +} + +int main(void) { + float A[SIZE] = {1, -2, 0, -4, 5}; + float B[SIZE] = {0}; + sample_test(A, B); + float expectedA[SIZE] = {1, -2, 0, -4, 5}; + float expectedB[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedB); + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/sgn-naive-norm-inline2-PASS.c b/src/dios-egraphs/Diospyros/fail-tests/sgn-naive-norm-inline2-PASS.c new file mode 100644 index 00000000..9613c3e4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/sgn-naive-norm-inline2-PASS.c @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float naive_norm(float *x, int m) __attribute__((always_inline)); +float sgn(float v) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float B[SIZE]) { + float alpha = -sgn(A[0]) * naive_norm(A, SIZE); + B[0] = alpha; + for (int i = 1; i < SIZE; i++) { + B[i] = sgn(A[i]); + } +} + +void no_opt_sample_test(float A[SIZE], float B[SIZE]) { + float alpha = -no_opt_sgn(A[0]) * no_opt_naive_norm(A, SIZE); + B[0] = alpha; + for (int i = 1; i < SIZE; i++) { + B[i] = no_opt_sgn(A[i]); + } +} + +int main(void) { + float A[SIZE] = {1, -2, 0, -4, 5}; + float B[SIZE] = {0}; + sample_test(A, B); + float expectedA[SIZE] = {1, -2, 0, -4, 5}; + float expectedB[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedB); + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } +} \ No newline at end of file From 72a83f8999e02f3b051e3be75d85dae5fb2fdb17 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 11 Feb 2022 13:28:50 -0500 Subject: [PATCH 035/143] simple loop sgn/naive norm inline Loop with indices read and written Uses sgn and naive_norm, both inlined --- .../Diospyros/fail-tests/loop-inline.c | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 src/dios-egraphs/Diospyros/fail-tests/loop-inline.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/loop-inline.c b/src/dios-egraphs/Diospyros/fail-tests/loop-inline.c new file mode 100644 index 00000000..c7a9ea22 --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/loop-inline.c @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +// float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float x[SIZE], float A[SIZE]) { + for (int k = 0; k < SIZE; k++) { + float alpha = -sgn(x[k]) * naive_norm(x, k); + A[k] = alpha; + } +} + +void no_opt_sample_test(float x[SIZE], float A[SIZE]) { + for (int k = 0; k < SIZE; k++) { + float alpha = -no_opt_sgn(x[k]) * no_opt_naive_norm(x, k); + A[k] = alpha; + } +} + +int main(void) { + float x[SIZE] = {1, -1, 2, 3, 5}; + float A[SIZE] = {0}; + sample_test(x, A); + float expectedA[SIZE] = {0}; + no_opt_sample_test(x, expectedA); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } +} \ No newline at end of file From 296167f1a627d0b760bbaf73f178cff21703adf0 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 11 Feb 2022 13:29:15 -0500 Subject: [PATCH 036/143] fix sgn to be inlined --- src/dios-egraphs/Diospyros/fail-tests/loop-inline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dios-egraphs/Diospyros/fail-tests/loop-inline.c b/src/dios-egraphs/Diospyros/fail-tests/loop-inline.c index c7a9ea22..5a817002 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/loop-inline.c +++ b/src/dios-egraphs/Diospyros/fail-tests/loop-inline.c @@ -9,7 +9,7 @@ #define SIZE 5 #define DELTA 0.1f -// float sgn(float v) __attribute__((always_inline)); +float sgn(float v) __attribute__((always_inline)); float naive_norm(float *x, int m) __attribute__((always_inline)); float sgn(float v) { return (v > 0) - (v < 0); } From 556225418e8ef28c2f89ff54e7c9d6021ccf8df1 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sat, 12 Feb 2022 02:37:40 -0500 Subject: [PATCH 037/143] fix no-opt flag --- .../{loop-inline.c => loop-inline-PASS.c} | 0 src/dios-egraphs/Diospyros/fail-tests/test1.c | 26 ++++++++++++------- src/dios-egraphs/Diospyros/src/lib.rs | 6 ++++- 3 files changed, 22 insertions(+), 10 deletions(-) rename src/dios-egraphs/Diospyros/fail-tests/{loop-inline.c => loop-inline-PASS.c} (100%) diff --git a/src/dios-egraphs/Diospyros/fail-tests/loop-inline.c b/src/dios-egraphs/Diospyros/fail-tests/loop-inline-PASS.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/loop-inline.c rename to src/dios-egraphs/Diospyros/fail-tests/loop-inline-PASS.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/test1.c b/src/dios-egraphs/Diospyros/fail-tests/test1.c index 05b53f3d..d3070f6b 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test1.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test1.c @@ -32,15 +32,15 @@ float no_opt_naive_norm(float *x, int m) { return sqrtf(sum); } -void sample_test(float A[SIZE]) { +void sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { for (int k = 0; k < SIZE - 1; k++) { int m = SIZE - k; - float x[SIZE]; + // float x[SIZE]; for (int i = 0; i < m; i++) { x[i] = 0.0f; } - float e[SIZE]; + // float e[SIZE]; for (int i = 0; i < m; i++) { e[i] = 0.0f; } @@ -54,15 +54,15 @@ void sample_test(float A[SIZE]) { } } -void no_opt_sample_test(float A[SIZE]) { +void no_opt_sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { for (int k = 0; k < SIZE - 1; k++) { int m = SIZE - k; - float x[SIZE]; + // float x[SIZE]; for (int i = 0; i < m; i++) { x[i] = 0.0f; } - float e[SIZE]; + // float e[SIZE]; for (int i = 0; i < m; i++) { e[i] = 0.0f; } @@ -78,12 +78,20 @@ void no_opt_sample_test(float A[SIZE]) { int main(void) { float A[SIZE] = {0}; - sample_test(A); + float x[SIZE] = {0}; + float e[SIZE] = {0}; + sample_test(A, x, e); float expectedA[SIZE] = {0}; - no_opt_sample_test(expectedA); + float expectedx[SIZE] = {0}; + float expectede[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedx, expectede); for (int i = 0; i < SIZE; i++) { printf("A Output: %f\n", A[i]); printf("Expected A Output: %f\n", expectedA[i]); - assert(fabs(expectedA[i] - A[i]) < DELTA); + printf("X Output: %f\n", x[i]); + printf("Expected X Output: %f\n", expectedx[i]); + printf("E Output: %f\n", e[i]); + printf("Expected E Output: %f\n", expectede[i]); + // assert(fabs(expectedA[i] - A[i]) < DELTA); } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 23af4dd2..bb138dd0 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -738,7 +738,11 @@ pub fn optimize( if print_opt { eprintln!("{}", expr.pretty(10)); } - let (_, best) = rules::run(&expr, 180, true, !run_egg); + let mut best = expr.clone(); + if run_egg { + let pair = rules::run(&expr, 180, true, !run_egg); + best = pair.1; + } if print_opt { eprintln!("{}", best.pretty(10)); } From 8b338451db4146b7d0e73edeab4c7077231f5696 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Mon, 14 Feb 2022 23:44:21 -0500 Subject: [PATCH 038/143] fix call issue, test1 pass Fix VecLang::Symbol branch in egg2llvm by recursively adding in the llvm instruction Fix llvm_recursive_add by commenting out copy/recurse_add on call in else if os_call --- src/dios-egraphs/Diospyros/Makefile | 9 +++++++++ src/dios-egraphs/Diospyros/src/lib.rs | 11 ++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index dd5bc618..47f32a71 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -41,6 +41,15 @@ test-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) opt.ll -o finish.ll opt -S --adce finish.ll -o final.ll +min-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp + $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) + opt -S -always-inline --inline --mem2reg --scev-aa --unroll-threshold=1000000 --loop-unroll --simplifycfg --mem2reg --dse --adce build/clang.ll -o build/opt.ll + opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll + $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=false build/aa.ll -o build/diospyros.ll + opt -S --adce --dse build/diospyros.ll -o build/dce.ll + $(CLANG) build/dce.ll -o build/final + build/final + run-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index bb138dd0..3add325c 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -903,11 +903,12 @@ unsafe fn llvm_recursive_add(builder: LLVMBuilderRef, inst: LLVMValueRef) -> LLV let cloned_inst = LLVMInstructionClone(inst); LLVMInsertIntoBuilder(builder, cloned_inst); return cloned_inst; - } else if isa_call(inst) { - let cloned_inst = LLVMInstructionClone(inst); - LLVMInsertIntoBuilder(builder, cloned_inst); - return cloned_inst; } + // else if isa_call(inst) { + // let cloned_inst = LLVMInstructionClone(inst); + // LLVMInsertIntoBuilder(builder, cloned_inst); + // return cloned_inst; + // } let cloned_inst = LLVMInstructionClone(inst); let num_ops = LLVMGetNumOperands(inst); for i in 0..num_ops { @@ -1707,7 +1708,7 @@ unsafe fn translate_egg( let instr = match enode { VecLang::Symbol(symbol) => { match symbol_map.get(enode) { - Some(llvm_instr) => *llvm_instr, + Some(llvm_instr) => llvm_recursive_add(builder, *llvm_instr), None => { let mut matched = false; let mut ret_value = LLVMBuildAdd( From fec0a524ccd47b606fa7cc8f4bc8fdead3bee907 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 17 Feb 2022 15:48:44 -0500 Subject: [PATCH 039/143] test1, 2 and 3 work Problem is that static or dynamically sized arrays cause problems with where a memset is placed I do not know a sound technique to choose these memset sequences to be recopied Hence, for now, I ban all static or dynamic sized arrays in optimized functions --- src/dios-egraphs/Diospyros/fail-tests/test2.c | 17 +++++---- src/dios-egraphs/Diospyros/fail-tests/test3.c | 38 ++++++++++++------- 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/src/dios-egraphs/Diospyros/fail-tests/test2.c b/src/dios-egraphs/Diospyros/fail-tests/test2.c index 48370b89..367d75a4 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test2.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test2.c @@ -34,15 +34,14 @@ float no_opt_naive_norm(float *x, int m) { return sqrtf(sum); } -void sample_test(float A[SIZE]) { +void sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { for (int k = 0; k < SIZE - 1; k++) { int m = SIZE - k; - float x[m]; for (int i = 0; i < m; i++) { x[i] = 0.0f; } - float e[m]; + for (int i = 0; i < m; i++) { e[i] = 0.0f; } @@ -56,15 +55,13 @@ void sample_test(float A[SIZE]) { } } -void no_opt_sample_test(float A[SIZE]) { +void no_opt_sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { for (int k = 0; k < SIZE - 1; k++) { int m = SIZE - k; - float x[SIZE]; for (int i = 0; i < m; i++) { x[i] = 0.0f; } - float e[SIZE]; for (int i = 0; i < m; i++) { e[i] = 0.0f; } @@ -80,9 +77,13 @@ void no_opt_sample_test(float A[SIZE]) { int main(void) { float A[SIZE] = {0}; - sample_test(A); + float x[SIZE] = {0}; + float e[SIZE] = {0}; + sample_test(A, x, e); float expectedA[SIZE] = {0}; - no_opt_sample_test(expectedA); + float expectedX[SIZE] = {0}; + float expectedE[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedX, expectedE); for (int i = 0; i < SIZE; i++) { printf("A Output: %f\n", A[i]); printf("Expected A Output: %f\n", expectedA[i]); diff --git a/src/dios-egraphs/Diospyros/fail-tests/test3.c b/src/dios-egraphs/Diospyros/fail-tests/test3.c index ec219b7a..ebe988ae 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test3.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test3.c @@ -32,15 +32,15 @@ float no_opt_naive_norm(float *x, int m) { return sqrtf(sum); } -void sample_test(float A[SIZE], float Q[SIZE * SIZE]) { +void sample_test(float A[SIZE], float x[SIZE], float e[SIZE], + float Q[SIZE * SIZE]) { for (int k = 0; k < SIZE - 1; k++) { int m = SIZE - k; - float x[SIZE]; for (int i = 0; i < m; i++) { x[i] = 0.0f; } - float e[SIZE]; + for (int i = 0; i < m; i++) { e[i] = 0.0f; } @@ -49,27 +49,26 @@ void sample_test(float A[SIZE], float Q[SIZE * SIZE]) { e[i] = 2.0f; } - float alpha = -sgn(x[0]) * naive_norm(x, m); + float alpha = -sgn(x[0]) * naive_norm(x, m) * naive_norm(e, m); A[k] = alpha; - float q_t[SIZE * SIZE] = {alpha}; + // float q_t[SIZE * SIZE] = {alpha}; if (k == 0) { for (int i = 0; i < SIZE * SIZE; i++) { - Q[i] = q_t[i]; + Q[i] = alpha; } } } } -void no_opt_sample_test(float A[SIZE], float Q[SIZE * SIZE]) { +void no_opt_sample_test(float A[SIZE], float x[SIZE], float e[SIZE], + float Q[SIZE * SIZE]) { for (int k = 0; k < SIZE - 1; k++) { int m = SIZE - k; - float x[SIZE]; for (int i = 0; i < m; i++) { x[i] = 0.0f; } - float e[SIZE]; for (int i = 0; i < m; i++) { e[i] = 0.0f; } @@ -78,7 +77,8 @@ void no_opt_sample_test(float A[SIZE], float Q[SIZE * SIZE]) { e[i] = 2.0f; } - float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m) * + no_opt_naive_norm(e, m); A[k] = alpha; // float q_t[SIZE * SIZE] = {alpha}; @@ -86,25 +86,35 @@ void no_opt_sample_test(float A[SIZE], float Q[SIZE * SIZE]) { // for (int i = 0; i < SIZE * SIZE; i++) { // Q[i] = q_t[i]; // } + + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = alpha; + } + } } } int main(void) { float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; float Q[SIZE * SIZE] = {0}; - sample_test(A, Q); + sample_test(A, x, e, Q); float expectedA[SIZE] = {0}; + float expectedX[SIZE] = {0}; + float expectedE[SIZE] = {0}; float expectedQ[SIZE * SIZE] = {0}; - no_opt_sample_test(expectedA, expectedQ); + no_opt_sample_test(expectedA, expectedX, expectedE, expectedQ); for (int i = 0; i < SIZE; i++) { printf("A Output: %f\n", A[i]); printf("Expected A Output: %f\n", expectedA[i]); - // assert(fabs(expectedA[i] - A[i]) < DELTA); + assert(fabs(expectedA[i] - A[i]) < DELTA); } for (int i = 0; i < SIZE * SIZE; i++) { printf("Q Output: %f\n", Q[i]); printf("Expected Q Output: %f\n", expectedQ[i]); - // assert(fabs(expectedA[i] - A[i]) < DELTA); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); } } \ No newline at end of file From f27679cade6d766a558a35a396684e1f231e33bc Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 25 Feb 2022 03:14:56 -0500 Subject: [PATCH 040/143] random and feature tests Add naive-norm, transpose 10by10 20by20 random tests Add transpose, naive-norm, inlining feature tests --- .../Diospyros/LoadStoreMovement.cpp | 4 +- .../Diospyros/fail-tests/local-array-2.c | 33 + .../Diospyros/fail-tests/local-array-3.c | 28 + .../Diospyros/fail-tests/local-array-4.c | 32 + .../Diospyros/fail-tests/local-array.c | 22 + .../Diospyros/fail-tests/test1-local-array.c | 101 ++++ .../Diospyros/fail-tests/test4-1-linear.c | 197 ++++++ .../Diospyros/fail-tests/test4-1.c | 207 +++++++ .../Diospyros/fail-tests/test4-2-2.c | 213 +++++++ .../Diospyros/fail-tests/test4-2-linear.c | 197 ++++++ .../Diospyros/fail-tests/test4-2.c | 203 +++++++ .../Diospyros/fail-tests/test4-3-linear.c | 197 ++++++ .../Diospyros/fail-tests/test4-3.c | 206 +++++++ .../Diospyros/fail-tests/test4-4-linear.c | 127 ++++ .../Diospyros/fail-tests/test4-5-linear.c | 197 ++++++ .../Diospyros/llvm-tests/inline-float.c | 78 +++ .../Diospyros/llvm-tests/inline-void.c | 54 ++ .../Diospyros/llvm-tests/naive-norm.c | 43 ++ .../Diospyros/llvm-tests/nested-inline.c | 105 ++++ .../Diospyros/llvm-tests/transpose.c | 57 ++ .../Diospyros/polybench-tests/3mm.c | 169 ++++++ .../Diospyros/polybench-tests/3mm.h | 98 +++ .../Diospyros/polybench-tests/polybench.c | 569 ++++++++++++++++++ .../Diospyros/polybench-tests/polybench.h | 241 ++++++++ .../10-by-10-random-naive-norm.c | 41 ++ .../10-by-10-random-transpose.c | 51 ++ .../20-by-20-random-naive-norm.c | 41 ++ .../20-by-20-random-transpose.c | 51 ++ src/dios-egraphs/Diospyros/src/lib.rs | 42 +- 29 files changed, 3587 insertions(+), 17 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/fail-tests/local-array-2.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/local-array-3.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/local-array-4.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/local-array.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/test1-local-array.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/test4-1-linear.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/test4-1.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/test4-2-2.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/test4-2-linear.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/test4-2.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/test4-3-linear.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/test4-3.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/test4-4-linear.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/test4-5-linear.c create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/inline-float.c create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/inline-void.c create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/naive-norm.c create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/nested-inline.c create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/transpose.c create mode 100644 src/dios-egraphs/Diospyros/polybench-tests/3mm.c create mode 100644 src/dios-egraphs/Diospyros/polybench-tests/3mm.h create mode 100644 src/dios-egraphs/Diospyros/polybench-tests/polybench.c create mode 100644 src/dios-egraphs/Diospyros/polybench-tests/polybench.h create mode 100644 src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-naive-norm.c create mode 100644 src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-transpose.c create mode 100644 src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-naive-norm.c create mode 100644 src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-transpose.c diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index 6735e435..804004a4 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -260,8 +260,8 @@ struct LoadStoreMovementPass : public FunctionPass { (F.getName().size() > 7 && F.getName().substr(0, 7) == "no_opt_")) { return false; } - rewrite_stores(F); - rewrite_loads(F); + // rewrite_stores(F); + // rewrite_loads(F); return true; } diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array-2.c b/src/dios-egraphs/Diospyros/fail-tests/local-array-2.c new file mode 100644 index 00000000..3e89895b --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/local-array-2.c @@ -0,0 +1,33 @@ +#include +#include +#include +#include + +#define SIZE 10 + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float x[SIZE] = {3.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] += x[i]; + } + for (int i = 0; i < SIZE; i++) { + C[i] *= A[i]; + } + for (int i = 0; i < SIZE; i++) { + B[i] -= x[i]; + } + for (int i = 0; i < SIZE; i++) { + C[i] += B[i]; + } +} + +int main() { + float A[SIZE] = {1.0f}; + float B[SIZE] = {2.0f}; + float C[SIZE] = {0.0f}; + test(A, B, C); + for (int i = 0; i < SIZE; i++) { + printf("C Output: %f\n", C[i]); + } + return 0; +} diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array-3.c b/src/dios-egraphs/Diospyros/fail-tests/local-array-3.c new file mode 100644 index 00000000..0bccfcb3 --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/local-array-3.c @@ -0,0 +1,28 @@ +#include +#include +#include +#include + +#define SIZE 10 + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {(float)i}; + C[i] = A[i] + x[i]; + } + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {(float)i}; + C[i] = B[i] - x[i]; + } +} + +int main() { + float A[SIZE] = {1.0f}; + float B[SIZE] = {2.0f}; + float C[SIZE] = {0.0f}; + test(A, B, C); + for (int i = 0; i < SIZE; i++) { + printf("C Output: %f\n", C[i]); + } + return 0; +} diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c b/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c new file mode 100644 index 00000000..17f081f0 --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c @@ -0,0 +1,32 @@ +#include +#include +#include +#include + +#define SIZE 2 + +void test(float A[SIZE]) { + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {0.0f}; + for (int j = 0; j < SIZE; j++) { + x[j] = 1.0f; + } + float sum = 0.0f; + for (int j = 0; j < SIZE; j++) { + sum += x[j]; + } + A[i] = sum; + } +} + +int main() { + float A[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = (float)i; + } + test(A); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + } + return 0; +} diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array.c b/src/dios-egraphs/Diospyros/fail-tests/local-array.c new file mode 100644 index 00000000..ca32918e --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/local-array.c @@ -0,0 +1,22 @@ +#include +#include +#include +#include + +#define SIZE 10 + +void test(float A[SIZE]) { + float x[SIZE] = {3.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = x[i]; + } +} + +int main() { + float A[SIZE] = {1.0f}; + test(A); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + } + return 0; +} diff --git a/src/dios-egraphs/Diospyros/fail-tests/test1-local-array.c b/src/dios-egraphs/Diospyros/fail-tests/test1-local-array.c new file mode 100644 index 00000000..2a91056a --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/test1-local-array.c @@ -0,0 +1,101 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +// float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + int m = SIZE - k; + + // float x[SIZE]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + // float e[SIZE]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + A[k] = alpha; + } +} + +void no_opt_sample_test(float A[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + int m = SIZE - k; + + // float x[SIZE]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + // float e[SIZE]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + A[k] = alpha; + } +} + +int main(void) { + float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; + sample_test(A); + float expectedA[SIZE] = {0}; + float expectedx[SIZE] = {0}; + float expectede[SIZE] = {0}; + no_opt_sample_test(expectedA); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + printf("X Output: %f\n", x[i]); + printf("Expected X Output: %f\n", expectedx[i]); + printf("E Output: %f\n", e[i]); + printf("Expected E Output: %f\n", expectede[i]); + // assert(fabs(expectedA[i] - A[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-1-linear.c b/src/dios-egraphs/Diospyros/fail-tests/test4-1-linear.c new file mode 100644 index 00000000..d1e8cf33 --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-1-linear.c @@ -0,0 +1,197 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 1 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +int main(void) { + float A[SIZE] = {1.0f}; + float Q[SIZE] = {0.0f}; + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE] = {0.0f}; + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-1.c b/src/dios-egraphs/Diospyros/fail-tests/test4-1.c new file mode 100644 index 00000000..4f683f6b --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-1.c @@ -0,0 +1,207 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 1 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1.0f}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float i[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + float expectedi[SIZE * SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-2-2.c b/src/dios-egraphs/Diospyros/fail-tests/test4-2-2.c new file mode 100644 index 00000000..efb2dfcf --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-2-2.c @@ -0,0 +1,213 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1.0f, 2.0f, 3.0f, 4.0f}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {1.0f}; + float i[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {1.0f}; + float expectedi[SIZE * SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q_T Output: %f\n", q_t[i * SIZE + j]); + printf("Expected Q_T Output: %f\n", expectedq_t[i * SIZE + j]); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-2-linear.c b/src/dios-egraphs/Diospyros/fail-tests/test4-2-linear.c new file mode 100644 index 00000000..428a10be --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-2-linear.c @@ -0,0 +1,197 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +int main(void) { + float A[SIZE] = {1.0f, 2.0f}; + float Q[SIZE] = {0.0f}; + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE] = {0.0f}; + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-2.c b/src/dios-egraphs/Diospyros/fail-tests/test4-2.c new file mode 100644 index 00000000..1bcca8d2 --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-2.c @@ -0,0 +1,203 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], + float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE * SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE * SIZE], float x[SIZE], + float e[SIZE], float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1.0f, 2.0f, 3.0f, 4.0f}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float i[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + float expectedi[SIZE * SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-3-linear.c b/src/dios-egraphs/Diospyros/fail-tests/test4-3-linear.c new file mode 100644 index 00000000..d400d018 --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-3-linear.c @@ -0,0 +1,197 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 3 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +int main(void) { + float A[SIZE] = {1.0f, 2.0f, 3.0f}; + float Q[SIZE] = {0.0f}; + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE] = {0.0f}; + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-3.c b/src/dios-egraphs/Diospyros/fail-tests/test4-3.c new file mode 100644 index 00000000..333d176f --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-3.c @@ -0,0 +1,206 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 3 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE * SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE * SIZE], float x[SIZE], + float e[SIZE], float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, 9.0f}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float i[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + float expectedi[SIZE * SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-4-linear.c b/src/dios-egraphs/Diospyros/fail-tests/test4-4-linear.c new file mode 100644 index 00000000..332fd882 --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-4-linear.c @@ -0,0 +1,127 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 4 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return v; } + +float no_opt_sgn(float v) { return v; } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void naive_fixed_qr_decomp(float Q[SIZE], float x[SIZE], float q_t[SIZE]) { + // for (int i = 0; i < SIZE; i++) { + // R[i] = A[i]; + // } + + // for (int i = 0; i < SIZE; i++) { + // I[i] = 1.0f; + // } + + // Householder + // for (int k = 0; k < SIZE - 1; k++) { + // int k = 0; + // int m = SIZE - k; + + float alpha = -sgn(x[0]); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + // if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + // } + // } +} + +void no_opt_naive_fixed_qr_decomp(float Q[SIZE], float x[SIZE], + float q_t[SIZE]) { + // for (int i = 0; i < SIZE; i++) { + // R[i] = A[i]; + // } + + // for (int i = 0; i < SIZE; i++) { + // I[i] = 1.0f; + // } + + // Householder + // for (int k = 0; k < SIZE - 1; k++) { + // int k = 0; + // int m = SIZE - k; + + float alpha = -no_opt_sgn(x[0]); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + // if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + // } + // } +} + +int main(void) { + float A[SIZE] = {1.1f, 2.1f, 3.1f, 4.1f}; + float Q[SIZE] = {0.0f}; + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {1.2f, 1.3f, 1.4f, 1.5f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(Q, x, q_t); + float expectedQ[SIZE] = {0.0f}; + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {1.2f, 1.3f, 1.4f, 1.5f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(expectedQ, expectedx, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-5-linear.c b/src/dios-egraphs/Diospyros/fail-tests/test4-5-linear.c new file mode 100644 index 00000000..04c2dffd --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-5-linear.c @@ -0,0 +1,197 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +int main(void) { + float A[SIZE] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; + float Q[SIZE] = {0.0f}; + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE] = {0.0f}; + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/inline-float.c b/src/dios-egraphs/Diospyros/llvm-tests/inline-float.c new file mode 100644 index 00000000..2b0245d8 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/inline-float.c @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + return sum; +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = no_opt_test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +float test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + return sum; +} + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +int main() { + float A[SIZE] = {1.0f}; + float expectedA[SIZE] = {1.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = 1.0f; + expectedA[i] = 1.0f; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + B[i] = 0.0f; + expectedB[i] = 0.0f; + } + float C[SIZE] = {0.0f}; + float expectedC[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + C[i] = 0.0f; + expectedC[i] = 0.0f; + } + test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("C Output: %f\n", C[i]); + printf("Expected C Output: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/inline-void.c b/src/dios-egraphs/Diospyros/llvm-tests/inline-void.c new file mode 100644 index 00000000..71e7aa89 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/inline-void.c @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +void test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +void no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } +} + +void no_opt_test(float A[SIZE], float B[SIZE]) { + no_opt_test_inline(A, B, SIZE); +} + +void test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } +} + +void test(float A[SIZE], float B[SIZE]) { test_inline(A, B, SIZE); } + +int main() { + float A[SIZE] = {1.0f}; + float expectedA[SIZE] = {1.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = 1.0f; + expectedA[i] = 1.0f; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + B[i] = 0.0f; + expectedB[i] = 0.0f; + } + test(A, B); + no_opt_test(expectedA, expectedB); + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/naive-norm.c b/src/dios-egraphs/Diospyros/llvm-tests/naive-norm.c new file mode 100644 index 00000000..1727c165 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/naive-norm.c @@ -0,0 +1,43 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +float naive_norm(float x[SIZE], int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float x[SIZE], int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +int main() { + float x[SIZE] = {1.0f}; + for (int i = 0; i < SIZE; i++) { + if (i % 2 == 0) { + x[i] = 1.0f; + } else { + x[i] = 0.0f; + } + } + float calculated = naive_norm(x, SIZE); + float expected = no_opt_naive_norm(x, SIZE); + printf("Calculated of Naive L2 Norm: %f\n", calculated); + printf("Expected of Naive L2 Norm: %f\n", expected); + assert(fabs(expected - calculated) < DELTA); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/nested-inline.c b/src/dios-egraphs/Diospyros/llvm-tests/nested-inline.c new file mode 100644 index 00000000..02650d06 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/nested-inline.c @@ -0,0 +1,105 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float nested_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float no_opt_nested_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = -1 * A[i]; + } + float prod = 0.0f; + for (int i = 0; i < n; i++) { + prod *= B[i]; + } + return prod; +} + +float no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + float prod = no_opt_nested_inline(A, B, n); + return prod - sum; +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = no_opt_test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +float nested_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = -1 * A[i]; + } + float prod = 0.0f; + for (int i = 0; i < n; i++) { + prod *= B[i]; + } + return prod; +} + +float test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + float prod = nested_inline(A, B, n); + return prod - sum; +} + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +int main() { + float A[SIZE] = {0.0f}; + float expectedA[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = 1.0f; + expectedA[i] = 1.0f; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + B[i] = -1.0f; + expectedB[i] = -1.0f; + } + float C[SIZE] = {0.0f}; + float expectedC[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + C[i] = 0.0f; + expectedC[i] = 0.0f; + } + test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("Calculated C Output: %f\n", C[i]); + printf("Expected C Output: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/transpose.c b/src/dios-egraphs/Diospyros/llvm-tests/transpose.c new file mode 100644 index 00000000..934d651f --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/transpose.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +void naive_transpose(float a[SIZE * SIZE], int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} + +void no_opt_naive_transpose(float a[SIZE * SIZE], int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} + +int main() { + float calculated[SIZE * SIZE] = {0}; + for (int i = 0; i < SIZE * SIZE; i++) { + if (i % 2 == 0) { + calculated[i] = 1.0f; + } else { + calculated[i] = 0.0f; + } + } + float expected[SIZE * SIZE] = {0}; + for (int i = 0; i < SIZE * SIZE; i++) { + if (i % 2 == 0) { + expected[i] = 1.0f; + } else { + expected[i] = 0.0f; + } + } + naive_transpose(calculated, SIZE); + no_opt_naive_transpose(expected, SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + printf("A Transpose Calculated: %f\n", calculated[i]); + printf("A Transpose Expected: %f\n", expected[i]); + assert(fabs(expected[i] - calculated[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/polybench-tests/3mm.c b/src/dios-egraphs/Diospyros/polybench-tests/3mm.c new file mode 100644 index 00000000..d67178b0 --- /dev/null +++ b/src/dios-egraphs/Diospyros/polybench-tests/3mm.c @@ -0,0 +1,169 @@ +/** + * This version is stamped on May 10, 2016 + * + * Contact: + * Louis-Noel Pouchet + * Tomofumi Yuki + * + * Web address: http://polybench.sourceforge.net + */ +/* 3mm.c: this file is part of PolyBench/C */ + +#include +#include +#include +#include + +/* Include polybench common header. */ +#include + +/* Include benchmark-specific header. */ +#include "3mm.h" + + +/* Array initialization. */ +static +void init_array(int ni, int nj, int nk, int nl, int nm, + DATA_TYPE POLYBENCH_2D(A,NI,NK,ni,nk), + DATA_TYPE POLYBENCH_2D(B,NK,NJ,nk,nj), + DATA_TYPE POLYBENCH_2D(C,NJ,NM,nj,nm), + DATA_TYPE POLYBENCH_2D(D,NM,NL,nm,nl)) +{ + int i, j; + + for (i = 0; i < ni; i++) + for (j = 0; j < nk; j++) + A[i][j] = (DATA_TYPE) ((i*j+1) % ni) / (5*ni); + for (i = 0; i < nk; i++) + for (j = 0; j < nj; j++) + B[i][j] = (DATA_TYPE) ((i*(j+1)+2) % nj) / (5*nj); + for (i = 0; i < nj; i++) + for (j = 0; j < nm; j++) + C[i][j] = (DATA_TYPE) (i*(j+3) % nl) / (5*nl); + for (i = 0; i < nm; i++) + for (j = 0; j < nl; j++) + D[i][j] = (DATA_TYPE) ((i*(j+2)+2) % nk) / (5*nk); +} + + +/* DCE code. Must scan the entire live-out data. + Can be used also to check the correctness of the output. */ +static +void print_array(int ni, int nl, + DATA_TYPE POLYBENCH_2D(G,NI,NL,ni,nl)) +{ + int i, j; + + POLYBENCH_DUMP_START; + POLYBENCH_DUMP_BEGIN("G"); + for (i = 0; i < ni; i++) + for (j = 0; j < nl; j++) { + if ((i * ni + j) % 20 == 0) fprintf (POLYBENCH_DUMP_TARGET, "\n"); + fprintf (POLYBENCH_DUMP_TARGET, DATA_PRINTF_MODIFIER, G[i][j]); + } + POLYBENCH_DUMP_END("G"); + POLYBENCH_DUMP_FINISH; +} + + +/* Main computational kernel. The whole function will be timed, + including the call and return. */ +static +void kernel_3mm(int ni, int nj, int nk, int nl, int nm, + DATA_TYPE POLYBENCH_2D(E,NI,NJ,ni,nj), + DATA_TYPE POLYBENCH_2D(A,NI,NK,ni,nk), + DATA_TYPE POLYBENCH_2D(B,NK,NJ,nk,nj), + DATA_TYPE POLYBENCH_2D(F,NJ,NL,nj,nl), + DATA_TYPE POLYBENCH_2D(C,NJ,NM,nj,nm), + DATA_TYPE POLYBENCH_2D(D,NM,NL,nm,nl), + DATA_TYPE POLYBENCH_2D(G,NI,NL,ni,nl)) +{ + int i, j, k; + +#pragma scop + /* E := A*B */ + for (i = 0; i < _PB_NI; i++) + for (j = 0; j < _PB_NJ; j++) + { + E[i][j] = SCALAR_VAL(0.0); + for (k = 0; k < _PB_NK; ++k) + E[i][j] += A[i][k] * B[k][j]; + } + /* F := C*D */ + for (i = 0; i < _PB_NJ; i++) + for (j = 0; j < _PB_NL; j++) + { + F[i][j] = SCALAR_VAL(0.0); + for (k = 0; k < _PB_NM; ++k) + F[i][j] += C[i][k] * D[k][j]; + } + /* G := E*F */ + for (i = 0; i < _PB_NI; i++) + for (j = 0; j < _PB_NL; j++) + { + G[i][j] = SCALAR_VAL(0.0); + for (k = 0; k < _PB_NJ; ++k) + G[i][j] += E[i][k] * F[k][j]; + } +#pragma endscop + +} + + +int main(int argc, char** argv) +{ + /* Retrieve problem size. */ + int ni = NI; + int nj = NJ; + int nk = NK; + int nl = NL; + int nm = NM; + + /* Variable declaration/allocation. */ + POLYBENCH_2D_ARRAY_DECL(E, DATA_TYPE, NI, NJ, ni, nj); + POLYBENCH_2D_ARRAY_DECL(A, DATA_TYPE, NI, NK, ni, nk); + POLYBENCH_2D_ARRAY_DECL(B, DATA_TYPE, NK, NJ, nk, nj); + POLYBENCH_2D_ARRAY_DECL(F, DATA_TYPE, NJ, NL, nj, nl); + POLYBENCH_2D_ARRAY_DECL(C, DATA_TYPE, NJ, NM, nj, nm); + POLYBENCH_2D_ARRAY_DECL(D, DATA_TYPE, NM, NL, nm, nl); + POLYBENCH_2D_ARRAY_DECL(G, DATA_TYPE, NI, NL, ni, nl); + + /* Initialize array(s). */ + init_array (ni, nj, nk, nl, nm, + POLYBENCH_ARRAY(A), + POLYBENCH_ARRAY(B), + POLYBENCH_ARRAY(C), + POLYBENCH_ARRAY(D)); + + /* Start timer. */ + polybench_start_instruments; + + /* Run kernel. */ + kernel_3mm (ni, nj, nk, nl, nm, + POLYBENCH_ARRAY(E), + POLYBENCH_ARRAY(A), + POLYBENCH_ARRAY(B), + POLYBENCH_ARRAY(F), + POLYBENCH_ARRAY(C), + POLYBENCH_ARRAY(D), + POLYBENCH_ARRAY(G)); + + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + /* Prevent dead-code elimination. All live-out data must be printed + by the function call in argument. */ + polybench_prevent_dce(print_array(ni, nl, POLYBENCH_ARRAY(G))); + + /* Be clean. */ + POLYBENCH_FREE_ARRAY(E); + POLYBENCH_FREE_ARRAY(A); + POLYBENCH_FREE_ARRAY(B); + POLYBENCH_FREE_ARRAY(F); + POLYBENCH_FREE_ARRAY(C); + POLYBENCH_FREE_ARRAY(D); + POLYBENCH_FREE_ARRAY(G); + + return 0; +} diff --git a/src/dios-egraphs/Diospyros/polybench-tests/3mm.h b/src/dios-egraphs/Diospyros/polybench-tests/3mm.h new file mode 100644 index 00000000..8cc464eb --- /dev/null +++ b/src/dios-egraphs/Diospyros/polybench-tests/3mm.h @@ -0,0 +1,98 @@ +/** + * This version is stamped on May 10, 2016 + * + * Contact: + * Louis-Noel Pouchet + * Tomofumi Yuki + * + * Web address: http://polybench.sourceforge.net + */ +#ifndef _3MM_H +# define _3MM_H + +/* Default to LARGE_DATASET. */ +# if !defined(MINI_DATASET) && !defined(SMALL_DATASET) && !defined(MEDIUM_DATASET) && !defined(LARGE_DATASET) && !defined(EXTRALARGE_DATASET) +# define LARGE_DATASET +# endif + +# if !defined(NI) && !defined(NJ) && !defined(NK) && !defined(NL) && !defined(NM) +/* Define sample dataset sizes. */ +# ifdef MINI_DATASET +# define NI 16 +# define NJ 18 +# define NK 20 +# define NL 22 +# define NM 24 +# endif + +# ifdef SMALL_DATASET +# define NI 40 +# define NJ 50 +# define NK 60 +# define NL 70 +# define NM 80 +# endif + +# ifdef MEDIUM_DATASET +# define NI 180 +# define NJ 190 +# define NK 200 +# define NL 210 +# define NM 220 +# endif + +# ifdef LARGE_DATASET +# define NI 800 +# define NJ 900 +# define NK 1000 +# define NL 1100 +# define NM 1200 +# endif + +# ifdef EXTRALARGE_DATASET +# define NI 1600 +# define NJ 1800 +# define NK 2000 +# define NL 2200 +# define NM 2400 +# endif + + +#endif /* !(NI NJ NK NL NM) */ + +# define _PB_NI POLYBENCH_LOOP_BOUND(NI,ni) +# define _PB_NJ POLYBENCH_LOOP_BOUND(NJ,nj) +# define _PB_NK POLYBENCH_LOOP_BOUND(NK,nk) +# define _PB_NL POLYBENCH_LOOP_BOUND(NL,nl) +# define _PB_NM POLYBENCH_LOOP_BOUND(NM,nm) + + +/* Default data type */ +# if !defined(DATA_TYPE_IS_INT) && !defined(DATA_TYPE_IS_FLOAT) && !defined(DATA_TYPE_IS_DOUBLE) +# define DATA_TYPE_IS_DOUBLE +# endif + +#ifdef DATA_TYPE_IS_INT +# define DATA_TYPE int +# define DATA_PRINTF_MODIFIER "%d " +#endif + +#ifdef DATA_TYPE_IS_FLOAT +# define DATA_TYPE float +# define DATA_PRINTF_MODIFIER "%0.2f " +# define SCALAR_VAL(x) x##f +# define SQRT_FUN(x) sqrtf(x) +# define EXP_FUN(x) expf(x) +# define POW_FUN(x,y) powf(x,y) +# endif + +#ifdef DATA_TYPE_IS_DOUBLE +# define DATA_TYPE double +# define DATA_PRINTF_MODIFIER "%0.2lf " +# define SCALAR_VAL(x) x +# define SQRT_FUN(x) sqrt(x) +# define EXP_FUN(x) exp(x) +# define POW_FUN(x,y) pow(x,y) +# endif + +#endif /* !_3MM_H */ diff --git a/src/dios-egraphs/Diospyros/polybench-tests/polybench.c b/src/dios-egraphs/Diospyros/polybench-tests/polybench.c new file mode 100644 index 00000000..e3359337 --- /dev/null +++ b/src/dios-egraphs/Diospyros/polybench-tests/polybench.c @@ -0,0 +1,569 @@ +/** + * This version is stamped on May 10, 2016 + * + * Contact: + * Louis-Noel Pouchet + * Tomofumi Yuki + * + * Web address: http://polybench.sourceforge.net + */ +/* polybench.c: this file is part of PolyBench/C */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef _OPENMP +# include +#endif + +#if defined(POLYBENCH_PAPI) +# undef POLYBENCH_PAPI +# include "polybench.h" +# define POLYBENCH_PAPI +#else +# include "polybench.h" +#endif + +/* By default, collect PAPI counters on thread 0. */ +#ifndef POLYBENCH_THREAD_MONITOR +# define POLYBENCH_THREAD_MONITOR 0 +#endif + +/* Total LLC cache size. By default 32+MB.. */ +#ifndef POLYBENCH_CACHE_SIZE_KB +# define POLYBENCH_CACHE_SIZE_KB 32770 +#endif + + +int polybench_papi_counters_threadid = POLYBENCH_THREAD_MONITOR; +double polybench_program_total_flops = 0; + +#ifdef POLYBENCH_PAPI +# include +# define POLYBENCH_MAX_NB_PAPI_COUNTERS 96 + char* _polybench_papi_eventlist[] = { +#include "papi_counters.list" + NULL + }; + int polybench_papi_eventset; + int polybench_papi_eventlist[POLYBENCH_MAX_NB_PAPI_COUNTERS]; + long_long polybench_papi_values[POLYBENCH_MAX_NB_PAPI_COUNTERS]; + +#endif + +/* + * Allocation table, to enable inter-array padding. All data allocated + * with polybench_alloc_data should be freed with polybench_free_data. + * + */ +#define NB_INITIAL_TABLE_ENTRIES 512 +struct polybench_data_ptrs +{ + void** user_view; + void** real_ptr; + int nb_entries; + int nb_avail_entries; +}; +static struct polybench_data_ptrs* _polybench_alloc_table = NULL; +static size_t polybench_inter_array_padding_sz = 0; + +/* Timer code (gettimeofday). */ +double polybench_t_start, polybench_t_end; +/* Timer code (RDTSC). */ +unsigned long long int polybench_c_start, polybench_c_end; + +static +double rtclock() +{ +#if defined(POLYBENCH_TIME) || defined(POLYBENCH_GFLOPS) + struct timeval Tp; + int stat; + stat = gettimeofday (&Tp, NULL); + if (stat != 0) + printf ("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +#else + return 0; +#endif +} + + +#ifdef POLYBENCH_CYCLE_ACCURATE_TIMER +static +unsigned long long int rdtsc() +{ + unsigned long long int ret = 0; + unsigned int cycles_lo; + unsigned int cycles_hi; + __asm__ volatile ("RDTSC" : "=a" (cycles_lo), "=d" (cycles_hi)); + ret = (unsigned long long int)cycles_hi << 32 | cycles_lo; + + return ret; +} +#endif + +void polybench_flush_cache() +{ + int cs = POLYBENCH_CACHE_SIZE_KB * 1024 / sizeof(double); + double* flush = (double*) calloc (cs, sizeof(double)); + int i; + double tmp = 0.0; +#ifdef _OPENMP +#pragma omp parallel for reduction(+:tmp) private(i) +#endif + for (i = 0; i < cs; i++) + tmp += flush[i]; + assert (tmp <= 10.0); + free (flush); +} + + +#ifdef POLYBENCH_LINUX_FIFO_SCHEDULER +void polybench_linux_fifo_scheduler() +{ + /* Use FIFO scheduler to limit OS interference. Program must be run + as root, and this works only for Linux kernels. */ + struct sched_param schedParam; + schedParam.sched_priority = sched_get_priority_max (SCHED_FIFO); + sched_setscheduler (0, SCHED_FIFO, &schedParam); +} + + +void polybench_linux_standard_scheduler() +{ + /* Restore to standard scheduler policy. */ + struct sched_param schedParam; + schedParam.sched_priority = sched_get_priority_max (SCHED_OTHER); + sched_setscheduler (0, SCHED_OTHER, &schedParam); +} +#endif + +#ifdef POLYBENCH_PAPI + +static +void test_fail(char *file, int line, char *call, int retval) +{ + char buf[128]; + + memset(buf, '\0', sizeof(buf)); + if (retval != 0) + fprintf (stdout,"%-40s FAILED\nLine # %d\n", file, line); + else + { + fprintf (stdout,"%-40s SKIPPED\n", file); + fprintf (stdout,"Line # %d\n", line); + } + if (retval == PAPI_ESYS) + { + sprintf (buf, "System error in %s", call); + perror (buf); + } + else if (retval > 0) + fprintf (stdout,"Error: %s\n", call); + else if (retval == 0) + fprintf (stdout,"Error: %s\n", call); + else + { + char errstring[PAPI_MAX_STR_LEN]; + // PAPI 5.4.3 has changed the API for PAPI_perror. + #if defined (PAPI_VERSION) && ((PAPI_VERSION_MAJOR(PAPI_VERSION) == 5 && PAPI_VERSION_MINOR(PAPI_VERSION) >= 4) || PAPI_VERSION_MAJOR(PAPI_VERSION) > 5) + fprintf (stdout, "Error in %s: %s\n", call, PAPI_strerror(retval)); + #else + PAPI_perror (retval, errstring, PAPI_MAX_STR_LEN); + fprintf (stdout,"Error in %s: %s\n", call, errstring); + #endif + } + fprintf (stdout,"\n"); + if (PAPI_is_initialized ()) + PAPI_shutdown (); + exit (1); +} + + +void polybench_papi_init() +{ +# ifdef _OPENMP +#pragma omp parallel + { +#pragma omp master + { + if (omp_get_max_threads () < polybench_papi_counters_threadid) + polybench_papi_counters_threadid = omp_get_max_threads () - 1; + } +#pragma omp barrier + + if (omp_get_thread_num () == polybench_papi_counters_threadid) + { +# endif + int retval; + polybench_papi_eventset = PAPI_NULL; + if ((retval = PAPI_library_init (PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) + test_fail (__FILE__, __LINE__, "PAPI_library_init", retval); + if ((retval = PAPI_create_eventset (&polybench_papi_eventset)) + != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_create_eventset", retval); + int k; + for (k = 0; _polybench_papi_eventlist[k]; ++k) + { + if ((retval = + PAPI_event_name_to_code (_polybench_papi_eventlist[k], + &(polybench_papi_eventlist[k]))) + != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_event_name_to_code", retval); + } + polybench_papi_eventlist[k] = 0; + + +# ifdef _OPENMP + } + } +#pragma omp barrier +# endif +} + + +void polybench_papi_close() +{ +# ifdef _OPENMP +#pragma omp parallel + { + if (omp_get_thread_num () == polybench_papi_counters_threadid) + { +# endif + int retval; + if ((retval = PAPI_destroy_eventset (&polybench_papi_eventset)) + != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_destroy_eventset", retval); + if (PAPI_is_initialized ()) + PAPI_shutdown (); +# ifdef _OPENMP + } + } +#pragma omp barrier +# endif +} + +int polybench_papi_start_counter(int evid) +{ +# ifndef POLYBENCH_NO_FLUSH_CACHE + polybench_flush_cache(); +# endif + +# ifdef _OPENMP +# pragma omp parallel + { + if (omp_get_thread_num () == polybench_papi_counters_threadid) + { +# endif + + int retval = 1; + char descr[PAPI_MAX_STR_LEN]; + PAPI_event_info_t evinfo; + PAPI_event_code_to_name (polybench_papi_eventlist[evid], descr); + if (PAPI_add_event (polybench_papi_eventset, + polybench_papi_eventlist[evid]) != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_add_event", 1); + if (PAPI_get_event_info (polybench_papi_eventlist[evid], &evinfo) + != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_get_event_info", retval); + if ((retval = PAPI_start (polybench_papi_eventset)) != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_start", retval); +# ifdef _OPENMP + } + } +#pragma omp barrier +# endif + return 0; +} + + +void polybench_papi_stop_counter(int evid) +{ +# ifdef _OPENMP +# pragma omp parallel + { + if (omp_get_thread_num () == polybench_papi_counters_threadid) + { +# endif + int retval; + long_long values[1]; + values[0] = 0; + if ((retval = PAPI_read (polybench_papi_eventset, &values[0])) + != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_read", retval); + + if ((retval = PAPI_stop (polybench_papi_eventset, NULL)) != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_stop", retval); + + polybench_papi_values[evid] = values[0]; + + if ((retval = PAPI_remove_event + (polybench_papi_eventset, + polybench_papi_eventlist[evid])) != PAPI_OK) + test_fail (__FILE__, __LINE__, "PAPI_remove_event", retval); +# ifdef _OPENMP + } + } +#pragma omp barrier +# endif +} + + +void polybench_papi_print() +{ + int verbose = 0; +# ifdef _OPENMP +# pragma omp parallel + { + if (omp_get_thread_num() == polybench_papi_counters_threadid) + { +#ifdef POLYBENCH_PAPI_VERBOSE + verbose = 1; +#endif + if (verbose) + printf ("On thread %d:\n", polybench_papi_counters_threadid); +#endif + int evid; + for (evid = 0; polybench_papi_eventlist[evid] != 0; ++evid) + { + if (verbose) + printf ("%s=", _polybench_papi_eventlist[evid]); + printf ("%llu ", polybench_papi_values[evid]); + if (verbose) + printf ("\n"); + } + printf ("\n"); +# ifdef _OPENMP + } + } +#pragma omp barrier +# endif +} + +#endif +/* ! POLYBENCH_PAPI */ + +void polybench_prepare_instruments() +{ +#ifndef POLYBENCH_NO_FLUSH_CACHE + polybench_flush_cache (); +#endif +#ifdef POLYBENCH_LINUX_FIFO_SCHEDULER + polybench_linux_fifo_scheduler (); +#endif +} + + +void polybench_timer_start() +{ + polybench_prepare_instruments (); +#ifndef POLYBENCH_CYCLE_ACCURATE_TIMER + polybench_t_start = rtclock (); +#else + polybench_c_start = rdtsc (); +#endif +} + + +void polybench_timer_stop() +{ +#ifndef POLYBENCH_CYCLE_ACCURATE_TIMER + polybench_t_end = rtclock (); +#else + polybench_c_end = rdtsc (); +#endif +#ifdef POLYBENCH_LINUX_FIFO_SCHEDULER + polybench_linux_standard_scheduler (); +#endif +} + + +void polybench_timer_print() +{ +#ifdef POLYBENCH_GFLOPS + if (polybench_program_total_flops == 0) + { + printf ("[PolyBench][WARNING] Program flops not defined, use polybench_set_program_flops(value)\n"); + printf ("%0.6lf\n", polybench_t_end - polybench_t_start); + } + else + printf ("%0.2lf\n", + (polybench_program_total_flops / + (double)(polybench_t_end - polybench_t_start)) / 1000000000); +#else +# ifndef POLYBENCH_CYCLE_ACCURATE_TIMER + printf ("%0.6f\n", polybench_t_end - polybench_t_start); +# else + printf ("%Ld\n", polybench_c_end - polybench_c_start); +# endif +#endif +} + +/* + * These functions are used only if the user defines a specific + * inter-array padding. It grows a global structure, + * _polybench_alloc_table, which keeps track of the data allocated via + * polybench_alloc_data (on which inter-array padding is applied), so + * that the original, non-shifted pointer can be recovered when + * calling polybench_free_data. + * + */ +#ifdef POLYBENCH_ENABLE_INTARRAY_PAD +static +void grow_alloc_table() +{ + if (_polybench_alloc_table == NULL || + (_polybench_alloc_table->nb_entries % NB_INITIAL_TABLE_ENTRIES) != 0 || + _polybench_alloc_table->nb_avail_entries != 0) + { + /* Should never happen if the API is properly used. */ + fprintf (stderr, "[ERROR] Inter-array padding requires to use polybench_alloc_data and polybench_free_data\n"); + exit (1); + } + size_t sz = _polybench_alloc_table->nb_entries; + sz += NB_INITIAL_TABLE_ENTRIES; + _polybench_alloc_table->user_view = + realloc (_polybench_alloc_table->user_view, sz * sizeof(void*)); + assert(_polybench_alloc_table->user_view != NULL); + _polybench_alloc_table->real_ptr = + realloc (_polybench_alloc_table->real_ptr, sz * sizeof(void*)); + assert(_polybench_alloc_table->real_ptr != NULL); + _polybench_alloc_table->nb_avail_entries = NB_INITIAL_TABLE_ENTRIES; +} + +static +void* register_padded_pointer(void* ptr, size_t orig_sz, size_t padded_sz) +{ + if (_polybench_alloc_table == NULL) + { + fprintf (stderr, "[ERROR] Inter-array padding requires to use polybench_alloc_data and polybench_free_data\n"); + exit (1); + } + if (_polybench_alloc_table->nb_avail_entries == 0) + grow_alloc_table (); + int id = _polybench_alloc_table->nb_entries++; + _polybench_alloc_table->real_ptr[id] = ptr; + _polybench_alloc_table->user_view[id] = ptr + (padded_sz - orig_sz); + + return _polybench_alloc_table->user_view[id]; +} + + +static +void +free_data_from_alloc_table (void* ptr) +{ + if (_polybench_alloc_table != NULL && _polybench_alloc_table->nb_entries > 0) + { + int i; + for (i = 0; i < _polybench_alloc_table->nb_entries; ++i) + if (_polybench_alloc_table->user_view[i] == ptr || + _polybench_alloc_table->real_ptr[i] == ptr) + break; + if (i != _polybench_alloc_table->nb_entries) + { + free (_polybench_alloc_table->real_ptr[i]); + for (; i < _polybench_alloc_table->nb_entries - 1; ++i) + { + _polybench_alloc_table->user_view[i] = + _polybench_alloc_table->user_view[i + 1]; + _polybench_alloc_table->real_ptr[i] = + _polybench_alloc_table->real_ptr[i + 1]; + } + _polybench_alloc_table->nb_entries--; + _polybench_alloc_table->nb_avail_entries++; + if (_polybench_alloc_table->nb_entries == 0) + { + free (_polybench_alloc_table->user_view); + free (_polybench_alloc_table->real_ptr); + free (_polybench_alloc_table); + _polybench_alloc_table = NULL; + } + } + } +} + +static +void check_alloc_table_state() +{ + if (_polybench_alloc_table == NULL) + { + _polybench_alloc_table = (struct polybench_data_ptrs*) + malloc (sizeof(struct polybench_data_ptrs)); + assert(_polybench_alloc_table != NULL); + _polybench_alloc_table->user_view = + (void**) malloc (sizeof(void*) * NB_INITIAL_TABLE_ENTRIES); + assert(_polybench_alloc_table->user_view != NULL); + _polybench_alloc_table->real_ptr = + (void**) malloc (sizeof(void*) * NB_INITIAL_TABLE_ENTRIES); + assert(_polybench_alloc_table->real_ptr != NULL); + _polybench_alloc_table->nb_entries = 0; + _polybench_alloc_table->nb_avail_entries = NB_INITIAL_TABLE_ENTRIES; + } +} + +#endif // !POLYBENCH_ENABLE_INTARRAY_PAD + + +static +void* +xmalloc(size_t alloc_sz) +{ + void* ret = NULL; + /* By default, post-pad the arrays. Safe behavior, but likely useless. */ + polybench_inter_array_padding_sz += POLYBENCH_INTER_ARRAY_PADDING_FACTOR; + size_t padded_sz = alloc_sz + polybench_inter_array_padding_sz; + int err = posix_memalign (&ret, 4096, padded_sz); + if (! ret || err) + { + fprintf (stderr, "[PolyBench] posix_memalign: cannot allocate memory"); + exit (1); + } + /* Safeguard: this is invoked only if polybench.c has been compiled + with inter-array padding support from polybench.h. If so, move + the starting address of the allocation and return it to the + user. The original pointer is registered in an allocation table + internal to polybench.c. Data must then be freed using + polybench_free_data, which will inspect the allocation table to + free the original pointer.*/ +#ifdef POLYBENCH_ENABLE_INTARRAY_PAD + /* This moves the 'ret' pointer by (padded_sz - alloc_sz) positions, and + registers it in the lookup table for future free using + polybench_free_data. */ + ret = register_padded_pointer(ret, alloc_sz, padded_sz); +#endif + + return ret; +} + + +void polybench_free_data(void* ptr) +{ +#ifdef POLYBENCH_ENABLE_INTARRAY_PAD + free_data_from_alloc_table (ptr); +#else + free (ptr); +#endif +} + + +void* polybench_alloc_data(unsigned long long int n, int elt_size) +{ +#ifdef POLYBENCH_ENABLE_INTARRAY_PAD + check_alloc_table_state (); +#endif + + /// FIXME: detect overflow! + size_t val = n; + val *= elt_size; + void* ret = xmalloc (val); + + return ret; +} diff --git a/src/dios-egraphs/Diospyros/polybench-tests/polybench.h b/src/dios-egraphs/Diospyros/polybench-tests/polybench.h new file mode 100644 index 00000000..e6f30f5a --- /dev/null +++ b/src/dios-egraphs/Diospyros/polybench-tests/polybench.h @@ -0,0 +1,241 @@ +/** + * This version is stamped on May 10, 2016 + * + * Contact: + * Louis-Noel Pouchet + * Tomofumi Yuki + * + * Web address: http://polybench.sourceforge.net + */ +/* + * polybench.h: this file is part of PolyBench/C + * + * Polybench header for instrumentation. + * + * Programs must be compiled with `-I utilities utilities/polybench.c' + * + * Optionally, one can define: + * + * -DPOLYBENCH_TIME, to report the execution time, + * OR (exclusive): + * -DPOLYBENCH_PAPI, to use PAPI H/W counters (defined in polybench.c) + * + * + * See README or utilities/polybench.c for additional options. + * + */ +#ifndef POLYBENCH_H +# define POLYBENCH_H + +# include + +/* Array padding. By default, none is used. */ +# ifndef POLYBENCH_PADDING_FACTOR +/* default: */ +# define POLYBENCH_PADDING_FACTOR 0 +# endif + +/* Inter-array padding, for use with . By default, none is used. */ +# ifndef POLYBENCH_INTER_ARRAY_PADDING_FACTOR +/* default: */ +# define POLYBENCH_INTER_ARRAY_PADDING_FACTOR 0 +# undef POLYBENCH_ENABLE_INTARRAY_PAD +# else +# define POLYBENCH_ENABLE_INTARRAY_PAD +# endif + + +/* C99 arrays in function prototype. By default, do not use. */ +# ifdef POLYBENCH_USE_C99_PROTO +# define POLYBENCH_C99_SELECT(x,y) y +# else +/* default: */ +# define POLYBENCH_C99_SELECT(x,y) x +# endif + + +/* Scalar loop bounds in SCoPs. By default, use parametric loop bounds. */ +# ifdef POLYBENCH_USE_SCALAR_LB +# define POLYBENCH_LOOP_BOUND(x,y) x +# else +/* default: */ +# define POLYBENCH_LOOP_BOUND(x,y) y +# endif + +/* Use the 'restrict' keyword to declare that the different arrays do not + * alias. By default, we do not use it as it is only supported in C99 and + * even here several compilers do not properly get it. + */ +# ifdef POLYBENCH_USE_RESTRICT +# define POLYBENCH_RESTRICT restrict +# else +/* default: */ +# define POLYBENCH_RESTRICT +# endif + +/* Macros to reference an array. Generic for heap and stack arrays + (C99). Each array dimensionality has his own macro, to be used at + declaration or as a function argument. + Example: + int b[x] => POLYBENCH_1D_ARRAY(b, x) + int A[N][N] => POLYBENCH_2D_ARRAY(A, N, N) +*/ +# ifndef POLYBENCH_STACK_ARRAYS +# define POLYBENCH_ARRAY(x) *x +# ifdef POLYBENCH_ENABLE_INTARRAY_PAD +# define POLYBENCH_FREE_ARRAY(x) polybench_free_data((void*)x); +# else +# define POLYBENCH_FREE_ARRAY(x) free((void*)x); +# endif +# define POLYBENCH_DECL_VAR(x) (*x) +# else +# define POLYBENCH_ARRAY(x) x +# define POLYBENCH_FREE_ARRAY(x) +# define POLYBENCH_DECL_VAR(x) x +# endif +/* Macros for using arrays in the function prototypes. */ +# define POLYBENCH_1D(var, dim1,ddim1) var[POLYBENCH_RESTRICT POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_2D(var, dim1, dim2, ddim1, ddim2) var[POLYBENCH_RESTRICT POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_3D(var, dim1, dim2, dim3, ddim1, ddim2, ddim3) var[POLYBENCH_RESTRICT POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_4D(var, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) var[POLYBENCH_RESTRICT POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_5D(var, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) var[POLYBENCH_RESTRICT POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim5,ddim5) + POLYBENCH_PADDING_FACTOR] +/* Macros for using arrays within the functions. */ +# define POLYBENCH_1D_F(var, dim1,ddim1) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_2D_F(var, dim1, dim2, ddim1, ddim2) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_3D_F(var, dim1, dim2, dim3, ddim1, ddim2, ddim3) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_4D_F(var, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_5D_F(var, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim5,ddim5) + POLYBENCH_PADDING_FACTOR] + + +/* Macros to allocate heap arrays. + Example: + polybench_alloc_2d_array(N, M, double) => allocates N x M x sizeof(double) + and returns a pointer to the 2d array + */ +# define POLYBENCH_ALLOC_1D_ARRAY(n1, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data (n1 + POLYBENCH_PADDING_FACTOR, sizeof(type)) +# define POLYBENCH_ALLOC_2D_ARRAY(n1, n2, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR), sizeof(type)) +# define POLYBENCH_ALLOC_3D_ARRAY(n1, n2, n3, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR), sizeof(type)) +# define POLYBENCH_ALLOC_4D_ARRAY(n1, n2, n3, n4, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR][n4 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR) * (n4 + POLYBENCH_PADDING_FACTOR), sizeof(type)) +# define POLYBENCH_ALLOC_5D_ARRAY(n1, n2, n3, n4, n5, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR][n4 + POLYBENCH_PADDING_FACTOR][n5 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR) * (n4 + POLYBENCH_PADDING_FACTOR) * (n5 + POLYBENCH_PADDING_FACTOR), sizeof(type)) + +/* Macros for array declaration. */ +# ifndef POLYBENCH_STACK_ARRAYS +# define POLYBENCH_1D_ARRAY_DECL(var, type, dim1, ddim1) \ + type POLYBENCH_1D_F(POLYBENCH_DECL_VAR(var), dim1, ddim1); \ + var = POLYBENCH_ALLOC_1D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), type); +# define POLYBENCH_2D_ARRAY_DECL(var, type, dim1, dim2, ddim1, ddim2) \ + type POLYBENCH_2D_F(POLYBENCH_DECL_VAR(var), dim1, dim2, ddim1, ddim2); \ + var = POLYBENCH_ALLOC_2D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), type); +# define POLYBENCH_3D_ARRAY_DECL(var, type, dim1, dim2, dim3, ddim1, ddim2, ddim3) \ + type POLYBENCH_3D_F(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, ddim1, ddim2, ddim3); \ + var = POLYBENCH_ALLOC_3D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), type); +# define POLYBENCH_4D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) \ + type POLYBENCH_4D_F(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4); \ + var = POLYBENCH_ALLOC_4D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), POLYBENCH_C99_SELECT(dim4, ddim4), type); +# define POLYBENCH_5D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) \ + type POLYBENCH_5D_F(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5); \ + var = POLYBENCH_ALLOC_5D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), POLYBENCH_C99_SELECT(dim4, ddim4), POLYBENCH_C99_SELECT(dim5, ddim5), type); +# else +# define POLYBENCH_1D_ARRAY_DECL(var, type, dim1, ddim1) \ + type POLYBENCH_1D_F(POLYBENCH_DECL_VAR(var), dim1, ddim1); +# define POLYBENCH_2D_ARRAY_DECL(var, type, dim1, dim2, ddim1, ddim2) \ + type POLYBENCH_2D_F(POLYBENCH_DECL_VAR(var), dim1, dim2, ddim1, ddim2); +# define POLYBENCH_3D_ARRAY_DECL(var, type, dim1, dim2, dim3, ddim1, ddim2, ddim3) \ + type POLYBENCH_3D_F(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, ddim1, ddim2, ddim3); +# define POLYBENCH_4D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) \ + type POLYBENCH_4D_F(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4); +# define POLYBENCH_5D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) \ + type POLYBENCH_5D_F(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5); +# endif + + +/* Dead-code elimination macros. Use argc/argv for the run-time check. */ +# ifndef POLYBENCH_DUMP_ARRAYS +# define POLYBENCH_DCE_ONLY_CODE if (argc > 42 && ! strcmp(argv[0], "")) +# else +# define POLYBENCH_DCE_ONLY_CODE +# endif + +#define POLYBENCH_DUMP_TARGET stderr +#define POLYBENCH_DUMP_START fprintf(POLYBENCH_DUMP_TARGET, "==BEGIN DUMP_ARRAYS==\n") +#define POLYBENCH_DUMP_FINISH fprintf(POLYBENCH_DUMP_TARGET, "==END DUMP_ARRAYS==\n") +#define POLYBENCH_DUMP_BEGIN(s) fprintf(POLYBENCH_DUMP_TARGET, "begin dump: %s", s) +#define POLYBENCH_DUMP_END(s) fprintf(POLYBENCH_DUMP_TARGET, "\nend dump: %s\n", s) + +# define polybench_prevent_dce(func) \ + POLYBENCH_DCE_ONLY_CODE \ + func + + +/* Performance-related instrumentation. See polybench.c */ +# define polybench_start_instruments +# define polybench_stop_instruments +# define polybench_print_instruments + + +/* PAPI support. */ +# ifdef POLYBENCH_PAPI +extern const unsigned int polybench_papi_eventlist[]; +# undef polybench_start_instruments +# undef polybench_stop_instruments +# undef polybench_print_instruments +# define polybench_set_papi_thread_report(x) \ + polybench_papi_counters_threadid = x; +# define polybench_start_instruments \ + polybench_prepare_instruments(); \ + polybench_papi_init(); \ + int evid; \ + for (evid = 0; polybench_papi_eventlist[evid] != 0; evid++) \ + { \ + if (polybench_papi_start_counter(evid)) \ + continue; \ + +# define polybench_stop_instruments \ + polybench_papi_stop_counter(evid); \ + } \ + polybench_papi_close(); \ + +# define polybench_print_instruments polybench_papi_print(); +# endif + + +/* Timing support. */ +# if defined(POLYBENCH_TIME) || defined(POLYBENCH_GFLOPS) +# undef polybench_start_instruments +# undef polybench_stop_instruments +# undef polybench_print_instruments +# define polybench_start_instruments polybench_timer_start(); +# define polybench_stop_instruments polybench_timer_stop(); +# define polybench_print_instruments polybench_timer_print(); +extern double polybench_program_total_flops; +extern void polybench_timer_start(); +extern void polybench_timer_stop(); +extern void polybench_timer_print(); +# endif + +/* PAPI support. */ +# ifdef POLYBENCH_PAPI +extern int polybench_papi_start_counter(int evid); +extern void polybench_papi_stop_counter(int evid); +extern void polybench_papi_init(); +extern void polybench_papi_close(); +extern void polybench_papi_print(); +# endif + +/* Function prototypes. */ +extern void* polybench_alloc_data(unsigned long long int n, int elt_size); +extern void polybench_free_data(void* ptr); + +/* PolyBench internal functions that should not be directly called by */ +/* the user, unless when designing customized execution profiling */ +/* approaches. */ +extern void polybench_flush_cache(); +extern void polybench_prepare_instruments(); + + +#endif /* !POLYBENCH_H */ diff --git a/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-naive-norm.c b/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-naive-norm.c new file mode 100644 index 00000000..83e5ac9f --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-naive-norm.c @@ -0,0 +1,41 @@ +#include +#include +#include +#include +#include + +#define SIZE 10 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float naive_norm(float x[SIZE], int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float x_in[SIZE]; + for (int i = 0; i < SIZE; i++) { + x_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // calculate up c_out + float calculated = naive_norm(x_in, SIZE); + // calculate expected + float sum = 0; + for (int i = 0; i < SIZE; i++) { + sum += x_in[i] * x_in[i]; + } + float expected = sqrtf(sum); + // check expected == output + printf("calculated: %f\n", calculated); + printf("expected: %f\n", expected); + assert(fabs(expected - calculated) < DELTA); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-transpose.c b/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-transpose.c new file mode 100644 index 00000000..a162ae1c --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-transpose.c @@ -0,0 +1,51 @@ +#include +#include +#include +#include +#include + +#define SIZE 10 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void naive_transpose(float a[SIZE * SIZE], int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float x_calculated[SIZE * SIZE]; + for (int i = 0; i < SIZE * SIZE; i++) { + x_calculated[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float x_expected[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + x_expected[i] = x_calculated[i]; + } + // calculate up c_out + naive_transpose(x_calculated, SIZE); + // calculate expected + int n = SIZE; + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = x_expected[i * n + j]; + x_expected[i * n + j] = x_expected[j * n + i]; + x_expected[j * n + i] = tmp; + } + } + // check expected == output + for (int i = 0; i < SIZE * SIZE; i++) { + printf("calculated: %f\n", x_calculated[i]); + printf("expected: %f\n", x_expected[i]); + assert(fabs(x_expected[i] - x_calculated[i]) < DELTA); + } + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-naive-norm.c b/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-naive-norm.c new file mode 100644 index 00000000..449460d2 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-naive-norm.c @@ -0,0 +1,41 @@ +#include +#include +#include +#include +#include + +#define SIZE 20 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float naive_norm(float x[SIZE], int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float x_in[SIZE]; + for (int i = 0; i < SIZE; i++) { + x_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // calculate up c_out + float calculated = naive_norm(x_in, SIZE); + // calculate expected + float sum = 0; + for (int i = 0; i < SIZE; i++) { + sum += x_in[i] * x_in[i]; + } + float expected = sqrtf(sum); + // check expected == output + printf("calculated: %f\n", calculated); + printf("expected: %f\n", expected); + assert(fabs(expected - calculated) < DELTA); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-transpose.c b/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-transpose.c new file mode 100644 index 00000000..de27bd49 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-transpose.c @@ -0,0 +1,51 @@ +#include +#include +#include +#include +#include + +#define SIZE 20 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void naive_transpose(float a[SIZE * SIZE], int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float x_calculated[SIZE * SIZE]; + for (int i = 0; i < SIZE * SIZE; i++) { + x_calculated[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float x_expected[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + x_expected[i] = x_calculated[i]; + } + // calculate up c_out + naive_transpose(x_calculated, SIZE); + // calculate expected + int n = SIZE; + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = x_expected[i * n + j]; + x_expected[i * n + j] = x_expected[j * n + i]; + x_expected[j * n + i] = tmp; + } + } + // check expected == output + for (int i = 0; i < SIZE * SIZE; i++) { + printf("calculated: %f\n", x_calculated[i]); + printf("expected: %f\n", x_expected[i]); + assert(fabs(x_expected[i] - x_calculated[i]) < DELTA); + } + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 3add325c..026b8247 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -735,17 +735,19 @@ pub fn optimize( llvm_to_egg(llvm_instrs, &mut llvm_arg_pairs, &mut node_to_arg); // optimization pass - if print_opt { - eprintln!("{}", expr.pretty(10)); - } + // if print_opt { + println!("In"); + eprintln!("{}", expr.pretty(10)); + // } let mut best = expr.clone(); if run_egg { let pair = rules::run(&expr, 180, true, !run_egg); best = pair.1; } - if print_opt { - eprintln!("{}", best.pretty(10)); - } + // if print_opt { + // println!("Out"); + // eprintln!("{}", best.pretty(10)); + // } // egg to llvm egg_to_llvm( @@ -892,9 +894,19 @@ unsafe fn _llvm_recursive_print(inst: LLVMValueRef) -> () { return; } -unsafe fn llvm_recursive_add(builder: LLVMBuilderRef, inst: LLVMValueRef) -> LLVMValueRef { +unsafe fn llvm_recursive_add( + builder: LLVMBuilderRef, + inst: LLVMValueRef, + context: LLVMContextRef, +) -> LLVMValueRef { if isa_argument(inst) { - return inst; + let mut indices = Vec::new(); + for i in 0..1 { + indices.push(LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0)); + } + let indices_vector = indices.as_mut_ptr(); + return LLVMBuildGEP(builder, inst, indices_vector, 1, b"\0".as_ptr() as *const _); + // return inst; } else if isa_constant(inst) { return inst; } else if isa_phi(inst) { @@ -913,7 +925,7 @@ unsafe fn llvm_recursive_add(builder: LLVMBuilderRef, inst: LLVMValueRef) -> LLV let num_ops = LLVMGetNumOperands(inst); for i in 0..num_ops { let operand = LLVMGetOperand(inst, i as u32); - let new_inst = llvm_recursive_add(builder, operand); + let new_inst = llvm_recursive_add(builder, operand, context); LLVMSetOperand(cloned_inst, i as u32, new_inst); } LLVMInsertIntoBuilder(builder, cloned_inst); @@ -1708,7 +1720,7 @@ unsafe fn translate_egg( let instr = match enode { VecLang::Symbol(symbol) => { match symbol_map.get(enode) { - Some(llvm_instr) => llvm_recursive_add(builder, *llvm_instr), + Some(llvm_instr) => llvm_recursive_add(builder, *llvm_instr, context), None => { let mut matched = false; let mut ret_value = LLVMBuildAdd( @@ -1757,7 +1769,7 @@ unsafe fn translate_egg( let load_value = if isa_load(*gep_value) { let addr = LLVMGetOperand(*gep_value, 0); let cloned_gep = LLVMInstructionClone(addr); - let new_gep = llvm_recursive_add(builder, cloned_gep); + let new_gep = llvm_recursive_add(builder, cloned_gep, context); let new_load = LLVMBuildLoad(builder, new_gep, b"\0".as_ptr() as *const _); let llvm_pair = LLVMPair { original_value: *gep_value, @@ -1767,16 +1779,16 @@ unsafe fn translate_egg( new_load } else if isa_gep(*gep_value) { let cloned_gep = LLVMInstructionClone(*gep_value); - let new_gep = llvm_recursive_add(builder, cloned_gep); + let new_gep = llvm_recursive_add(builder, cloned_gep, context); LLVMBuildLoad(builder, new_gep, b"\0".as_ptr() as *const _) } else if isa_bitcast(*gep_value) { // TODO: DO NOT REGERATE CALLS. THESE SHOULD BE CACHED!!. e.g. a CALLOC let cloned_bitcast = LLVMInstructionClone(*gep_value); - let new_bitcast = llvm_recursive_add(builder, cloned_bitcast); + let new_bitcast = llvm_recursive_add(builder, cloned_bitcast, context); LLVMBuildLoad(builder, new_bitcast, b"\0".as_ptr() as *const _) } else if isa_sitofp(*gep_value) { let cloned_sitofp = LLVMInstructionClone(*gep_value); - let new_sitofp = llvm_recursive_add(builder, cloned_sitofp); + let new_sitofp = llvm_recursive_add(builder, cloned_sitofp, context); new_sitofp } else { LLVMBuildLoad(builder, *gep_value, b"\0".as_ptr() as *const _) @@ -2214,7 +2226,7 @@ unsafe fn egg_to_llvm( LLVMBuildStore(builder, extracted_value, *addr); } else { let cloned_addr = LLVMInstructionClone(*addr); - let new_addr = llvm_recursive_add(builder, cloned_addr); + let new_addr = llvm_recursive_add(builder, cloned_addr, context); if LLVMTypeOf(extracted_value) != LLVMGetElementType(LLVMTypeOf(new_addr)) { extracted_value = gen_type_cast( extracted_value, From a1074634b9ba4889b9f71c774954ac91d385f891 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 25 Feb 2022 03:21:59 -0500 Subject: [PATCH 041/143] add random inlining tests for 10by10, 20by20 --- .../10-by-10-random-inlining.c | 112 ++++++++++++++++++ .../20-by-20-random-inlining.c | 112 ++++++++++++++++++ 2 files changed, 224 insertions(+) create mode 100644 src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-inlining.c create mode 100644 src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-inlining.c diff --git a/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-inlining.c b/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-inlining.c new file mode 100644 index 00000000..9857755c --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-inlining.c @@ -0,0 +1,112 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 10 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float nested_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float no_opt_nested_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = -1 * A[i]; + } + float prod = 0.0f; + for (int i = 0; i < n; i++) { + prod *= B[i]; + } + return prod; +} + +float no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + float prod = no_opt_nested_inline(A, B, n); + return prod - sum; +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = no_opt_test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +float nested_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = -1 * A[i]; + } + float prod = 0.0f; + for (int i = 0; i < n; i++) { + prod *= B[i]; + } + return prod; +} + +float test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + float prod = nested_inline(A, B, n); + return prod - sum; +} + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +int main() { + time_t t = time(NULL); + srand((unsigned)time(&t)); + + float A[SIZE] = {0.0f}; + float expectedA[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + float a = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + A[i] = a; + expectedA[i] = a; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + float b = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + B[i] = b; + expectedB[i] = b; + } + float C[SIZE] = {0.0f}; + float expectedC[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + C[i] = 0.0f; + expectedC[i] = 0.0f; + } + test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("Calculated C Output: %f\n", C[i]); + printf("Expected C Output: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-inlining.c b/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-inlining.c new file mode 100644 index 00000000..b9b81702 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-inlining.c @@ -0,0 +1,112 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 20 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float nested_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float no_opt_nested_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = -1 * A[i]; + } + float prod = 0.0f; + for (int i = 0; i < n; i++) { + prod *= B[i]; + } + return prod; +} + +float no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + float prod = no_opt_nested_inline(A, B, n); + return prod - sum; +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = no_opt_test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +float nested_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = -1 * A[i]; + } + float prod = 0.0f; + for (int i = 0; i < n; i++) { + prod *= B[i]; + } + return prod; +} + +float test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + float prod = nested_inline(A, B, n); + return prod - sum; +} + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +int main() { + time_t t = time(NULL); + srand((unsigned)time(&t)); + + float A[SIZE] = {0.0f}; + float expectedA[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + float a = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + A[i] = a; + expectedA[i] = a; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + float b = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + B[i] = b; + expectedB[i] = b; + } + float C[SIZE] = {0.0f}; + float expectedC[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + C[i] = 0.0f; + expectedC[i] = 0.0f; + } + test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("Calculated C Output: %f\n", C[i]); + printf("Expected C Output: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} \ No newline at end of file From 99931fb7fc00f01a5aca9091134b4f5901ca4854 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 25 Feb 2022 13:19:32 -0500 Subject: [PATCH 042/143] ignore polybench tests --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a9e3cb97..09c7ec69 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,5 @@ Cargo.lock *.out .clang-format* .cargo -.vscode \ No newline at end of file +.vscode +polybench-tests/ \ No newline at end of file From 6890293128292a83aff258ae7439d2d42b0c9bd5 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 25 Feb 2022 13:20:33 -0500 Subject: [PATCH 043/143] polybench tests commands added in makefile, change to float type --- src/dios-egraphs/Diospyros/Makefile | 9 + .../Diospyros/polybench-tests/3mm.c | 169 ------ .../Diospyros/polybench-tests/3mm.h | 98 --- .../Diospyros/polybench-tests/polybench.c | 569 ------------------ .../Diospyros/polybench-tests/polybench.h | 241 -------- 5 files changed, 9 insertions(+), 1077 deletions(-) delete mode 100644 src/dios-egraphs/Diospyros/polybench-tests/3mm.c delete mode 100644 src/dios-egraphs/Diospyros/polybench-tests/3mm.h delete mode 100644 src/dios-egraphs/Diospyros/polybench-tests/polybench.c delete mode 100644 src/dios-egraphs/Diospyros/polybench-tests/polybench.h diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index 47f32a71..82d85f04 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -77,6 +77,15 @@ no-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp $(CLANG) build/dce.ll -o build/final build/final +run-polybench: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp + $(CLANG) -I polybench-tests/utilities -I polybench-tests/linear-algebra/kernels/atax $(test) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll + opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll + opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll + $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=true build/aa.ll -o build/diospyros.ll + opt -S --adce --dse build/diospyros.ll -o build/dce.ll + $(CLANG) -I utilities polybench-tests/utilities/polybench.c build/dce.ll -o build/final + build/final + test: set-up-mac runt.sh runt.toml target/debug/libllvmlib.$(EXT) runt diff --git a/src/dios-egraphs/Diospyros/polybench-tests/3mm.c b/src/dios-egraphs/Diospyros/polybench-tests/3mm.c deleted file mode 100644 index d67178b0..00000000 --- a/src/dios-egraphs/Diospyros/polybench-tests/3mm.c +++ /dev/null @@ -1,169 +0,0 @@ -/** - * This version is stamped on May 10, 2016 - * - * Contact: - * Louis-Noel Pouchet - * Tomofumi Yuki - * - * Web address: http://polybench.sourceforge.net - */ -/* 3mm.c: this file is part of PolyBench/C */ - -#include -#include -#include -#include - -/* Include polybench common header. */ -#include - -/* Include benchmark-specific header. */ -#include "3mm.h" - - -/* Array initialization. */ -static -void init_array(int ni, int nj, int nk, int nl, int nm, - DATA_TYPE POLYBENCH_2D(A,NI,NK,ni,nk), - DATA_TYPE POLYBENCH_2D(B,NK,NJ,nk,nj), - DATA_TYPE POLYBENCH_2D(C,NJ,NM,nj,nm), - DATA_TYPE POLYBENCH_2D(D,NM,NL,nm,nl)) -{ - int i, j; - - for (i = 0; i < ni; i++) - for (j = 0; j < nk; j++) - A[i][j] = (DATA_TYPE) ((i*j+1) % ni) / (5*ni); - for (i = 0; i < nk; i++) - for (j = 0; j < nj; j++) - B[i][j] = (DATA_TYPE) ((i*(j+1)+2) % nj) / (5*nj); - for (i = 0; i < nj; i++) - for (j = 0; j < nm; j++) - C[i][j] = (DATA_TYPE) (i*(j+3) % nl) / (5*nl); - for (i = 0; i < nm; i++) - for (j = 0; j < nl; j++) - D[i][j] = (DATA_TYPE) ((i*(j+2)+2) % nk) / (5*nk); -} - - -/* DCE code. Must scan the entire live-out data. - Can be used also to check the correctness of the output. */ -static -void print_array(int ni, int nl, - DATA_TYPE POLYBENCH_2D(G,NI,NL,ni,nl)) -{ - int i, j; - - POLYBENCH_DUMP_START; - POLYBENCH_DUMP_BEGIN("G"); - for (i = 0; i < ni; i++) - for (j = 0; j < nl; j++) { - if ((i * ni + j) % 20 == 0) fprintf (POLYBENCH_DUMP_TARGET, "\n"); - fprintf (POLYBENCH_DUMP_TARGET, DATA_PRINTF_MODIFIER, G[i][j]); - } - POLYBENCH_DUMP_END("G"); - POLYBENCH_DUMP_FINISH; -} - - -/* Main computational kernel. The whole function will be timed, - including the call and return. */ -static -void kernel_3mm(int ni, int nj, int nk, int nl, int nm, - DATA_TYPE POLYBENCH_2D(E,NI,NJ,ni,nj), - DATA_TYPE POLYBENCH_2D(A,NI,NK,ni,nk), - DATA_TYPE POLYBENCH_2D(B,NK,NJ,nk,nj), - DATA_TYPE POLYBENCH_2D(F,NJ,NL,nj,nl), - DATA_TYPE POLYBENCH_2D(C,NJ,NM,nj,nm), - DATA_TYPE POLYBENCH_2D(D,NM,NL,nm,nl), - DATA_TYPE POLYBENCH_2D(G,NI,NL,ni,nl)) -{ - int i, j, k; - -#pragma scop - /* E := A*B */ - for (i = 0; i < _PB_NI; i++) - for (j = 0; j < _PB_NJ; j++) - { - E[i][j] = SCALAR_VAL(0.0); - for (k = 0; k < _PB_NK; ++k) - E[i][j] += A[i][k] * B[k][j]; - } - /* F := C*D */ - for (i = 0; i < _PB_NJ; i++) - for (j = 0; j < _PB_NL; j++) - { - F[i][j] = SCALAR_VAL(0.0); - for (k = 0; k < _PB_NM; ++k) - F[i][j] += C[i][k] * D[k][j]; - } - /* G := E*F */ - for (i = 0; i < _PB_NI; i++) - for (j = 0; j < _PB_NL; j++) - { - G[i][j] = SCALAR_VAL(0.0); - for (k = 0; k < _PB_NJ; ++k) - G[i][j] += E[i][k] * F[k][j]; - } -#pragma endscop - -} - - -int main(int argc, char** argv) -{ - /* Retrieve problem size. */ - int ni = NI; - int nj = NJ; - int nk = NK; - int nl = NL; - int nm = NM; - - /* Variable declaration/allocation. */ - POLYBENCH_2D_ARRAY_DECL(E, DATA_TYPE, NI, NJ, ni, nj); - POLYBENCH_2D_ARRAY_DECL(A, DATA_TYPE, NI, NK, ni, nk); - POLYBENCH_2D_ARRAY_DECL(B, DATA_TYPE, NK, NJ, nk, nj); - POLYBENCH_2D_ARRAY_DECL(F, DATA_TYPE, NJ, NL, nj, nl); - POLYBENCH_2D_ARRAY_DECL(C, DATA_TYPE, NJ, NM, nj, nm); - POLYBENCH_2D_ARRAY_DECL(D, DATA_TYPE, NM, NL, nm, nl); - POLYBENCH_2D_ARRAY_DECL(G, DATA_TYPE, NI, NL, ni, nl); - - /* Initialize array(s). */ - init_array (ni, nj, nk, nl, nm, - POLYBENCH_ARRAY(A), - POLYBENCH_ARRAY(B), - POLYBENCH_ARRAY(C), - POLYBENCH_ARRAY(D)); - - /* Start timer. */ - polybench_start_instruments; - - /* Run kernel. */ - kernel_3mm (ni, nj, nk, nl, nm, - POLYBENCH_ARRAY(E), - POLYBENCH_ARRAY(A), - POLYBENCH_ARRAY(B), - POLYBENCH_ARRAY(F), - POLYBENCH_ARRAY(C), - POLYBENCH_ARRAY(D), - POLYBENCH_ARRAY(G)); - - /* Stop and print timer. */ - polybench_stop_instruments; - polybench_print_instruments; - - /* Prevent dead-code elimination. All live-out data must be printed - by the function call in argument. */ - polybench_prevent_dce(print_array(ni, nl, POLYBENCH_ARRAY(G))); - - /* Be clean. */ - POLYBENCH_FREE_ARRAY(E); - POLYBENCH_FREE_ARRAY(A); - POLYBENCH_FREE_ARRAY(B); - POLYBENCH_FREE_ARRAY(F); - POLYBENCH_FREE_ARRAY(C); - POLYBENCH_FREE_ARRAY(D); - POLYBENCH_FREE_ARRAY(G); - - return 0; -} diff --git a/src/dios-egraphs/Diospyros/polybench-tests/3mm.h b/src/dios-egraphs/Diospyros/polybench-tests/3mm.h deleted file mode 100644 index 8cc464eb..00000000 --- a/src/dios-egraphs/Diospyros/polybench-tests/3mm.h +++ /dev/null @@ -1,98 +0,0 @@ -/** - * This version is stamped on May 10, 2016 - * - * Contact: - * Louis-Noel Pouchet - * Tomofumi Yuki - * - * Web address: http://polybench.sourceforge.net - */ -#ifndef _3MM_H -# define _3MM_H - -/* Default to LARGE_DATASET. */ -# if !defined(MINI_DATASET) && !defined(SMALL_DATASET) && !defined(MEDIUM_DATASET) && !defined(LARGE_DATASET) && !defined(EXTRALARGE_DATASET) -# define LARGE_DATASET -# endif - -# if !defined(NI) && !defined(NJ) && !defined(NK) && !defined(NL) && !defined(NM) -/* Define sample dataset sizes. */ -# ifdef MINI_DATASET -# define NI 16 -# define NJ 18 -# define NK 20 -# define NL 22 -# define NM 24 -# endif - -# ifdef SMALL_DATASET -# define NI 40 -# define NJ 50 -# define NK 60 -# define NL 70 -# define NM 80 -# endif - -# ifdef MEDIUM_DATASET -# define NI 180 -# define NJ 190 -# define NK 200 -# define NL 210 -# define NM 220 -# endif - -# ifdef LARGE_DATASET -# define NI 800 -# define NJ 900 -# define NK 1000 -# define NL 1100 -# define NM 1200 -# endif - -# ifdef EXTRALARGE_DATASET -# define NI 1600 -# define NJ 1800 -# define NK 2000 -# define NL 2200 -# define NM 2400 -# endif - - -#endif /* !(NI NJ NK NL NM) */ - -# define _PB_NI POLYBENCH_LOOP_BOUND(NI,ni) -# define _PB_NJ POLYBENCH_LOOP_BOUND(NJ,nj) -# define _PB_NK POLYBENCH_LOOP_BOUND(NK,nk) -# define _PB_NL POLYBENCH_LOOP_BOUND(NL,nl) -# define _PB_NM POLYBENCH_LOOP_BOUND(NM,nm) - - -/* Default data type */ -# if !defined(DATA_TYPE_IS_INT) && !defined(DATA_TYPE_IS_FLOAT) && !defined(DATA_TYPE_IS_DOUBLE) -# define DATA_TYPE_IS_DOUBLE -# endif - -#ifdef DATA_TYPE_IS_INT -# define DATA_TYPE int -# define DATA_PRINTF_MODIFIER "%d " -#endif - -#ifdef DATA_TYPE_IS_FLOAT -# define DATA_TYPE float -# define DATA_PRINTF_MODIFIER "%0.2f " -# define SCALAR_VAL(x) x##f -# define SQRT_FUN(x) sqrtf(x) -# define EXP_FUN(x) expf(x) -# define POW_FUN(x,y) powf(x,y) -# endif - -#ifdef DATA_TYPE_IS_DOUBLE -# define DATA_TYPE double -# define DATA_PRINTF_MODIFIER "%0.2lf " -# define SCALAR_VAL(x) x -# define SQRT_FUN(x) sqrt(x) -# define EXP_FUN(x) exp(x) -# define POW_FUN(x,y) pow(x,y) -# endif - -#endif /* !_3MM_H */ diff --git a/src/dios-egraphs/Diospyros/polybench-tests/polybench.c b/src/dios-egraphs/Diospyros/polybench-tests/polybench.c deleted file mode 100644 index e3359337..00000000 --- a/src/dios-egraphs/Diospyros/polybench-tests/polybench.c +++ /dev/null @@ -1,569 +0,0 @@ -/** - * This version is stamped on May 10, 2016 - * - * Contact: - * Louis-Noel Pouchet - * Tomofumi Yuki - * - * Web address: http://polybench.sourceforge.net - */ -/* polybench.c: this file is part of PolyBench/C */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef _OPENMP -# include -#endif - -#if defined(POLYBENCH_PAPI) -# undef POLYBENCH_PAPI -# include "polybench.h" -# define POLYBENCH_PAPI -#else -# include "polybench.h" -#endif - -/* By default, collect PAPI counters on thread 0. */ -#ifndef POLYBENCH_THREAD_MONITOR -# define POLYBENCH_THREAD_MONITOR 0 -#endif - -/* Total LLC cache size. By default 32+MB.. */ -#ifndef POLYBENCH_CACHE_SIZE_KB -# define POLYBENCH_CACHE_SIZE_KB 32770 -#endif - - -int polybench_papi_counters_threadid = POLYBENCH_THREAD_MONITOR; -double polybench_program_total_flops = 0; - -#ifdef POLYBENCH_PAPI -# include -# define POLYBENCH_MAX_NB_PAPI_COUNTERS 96 - char* _polybench_papi_eventlist[] = { -#include "papi_counters.list" - NULL - }; - int polybench_papi_eventset; - int polybench_papi_eventlist[POLYBENCH_MAX_NB_PAPI_COUNTERS]; - long_long polybench_papi_values[POLYBENCH_MAX_NB_PAPI_COUNTERS]; - -#endif - -/* - * Allocation table, to enable inter-array padding. All data allocated - * with polybench_alloc_data should be freed with polybench_free_data. - * - */ -#define NB_INITIAL_TABLE_ENTRIES 512 -struct polybench_data_ptrs -{ - void** user_view; - void** real_ptr; - int nb_entries; - int nb_avail_entries; -}; -static struct polybench_data_ptrs* _polybench_alloc_table = NULL; -static size_t polybench_inter_array_padding_sz = 0; - -/* Timer code (gettimeofday). */ -double polybench_t_start, polybench_t_end; -/* Timer code (RDTSC). */ -unsigned long long int polybench_c_start, polybench_c_end; - -static -double rtclock() -{ -#if defined(POLYBENCH_TIME) || defined(POLYBENCH_GFLOPS) - struct timeval Tp; - int stat; - stat = gettimeofday (&Tp, NULL); - if (stat != 0) - printf ("Error return from gettimeofday: %d", stat); - return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); -#else - return 0; -#endif -} - - -#ifdef POLYBENCH_CYCLE_ACCURATE_TIMER -static -unsigned long long int rdtsc() -{ - unsigned long long int ret = 0; - unsigned int cycles_lo; - unsigned int cycles_hi; - __asm__ volatile ("RDTSC" : "=a" (cycles_lo), "=d" (cycles_hi)); - ret = (unsigned long long int)cycles_hi << 32 | cycles_lo; - - return ret; -} -#endif - -void polybench_flush_cache() -{ - int cs = POLYBENCH_CACHE_SIZE_KB * 1024 / sizeof(double); - double* flush = (double*) calloc (cs, sizeof(double)); - int i; - double tmp = 0.0; -#ifdef _OPENMP -#pragma omp parallel for reduction(+:tmp) private(i) -#endif - for (i = 0; i < cs; i++) - tmp += flush[i]; - assert (tmp <= 10.0); - free (flush); -} - - -#ifdef POLYBENCH_LINUX_FIFO_SCHEDULER -void polybench_linux_fifo_scheduler() -{ - /* Use FIFO scheduler to limit OS interference. Program must be run - as root, and this works only for Linux kernels. */ - struct sched_param schedParam; - schedParam.sched_priority = sched_get_priority_max (SCHED_FIFO); - sched_setscheduler (0, SCHED_FIFO, &schedParam); -} - - -void polybench_linux_standard_scheduler() -{ - /* Restore to standard scheduler policy. */ - struct sched_param schedParam; - schedParam.sched_priority = sched_get_priority_max (SCHED_OTHER); - sched_setscheduler (0, SCHED_OTHER, &schedParam); -} -#endif - -#ifdef POLYBENCH_PAPI - -static -void test_fail(char *file, int line, char *call, int retval) -{ - char buf[128]; - - memset(buf, '\0', sizeof(buf)); - if (retval != 0) - fprintf (stdout,"%-40s FAILED\nLine # %d\n", file, line); - else - { - fprintf (stdout,"%-40s SKIPPED\n", file); - fprintf (stdout,"Line # %d\n", line); - } - if (retval == PAPI_ESYS) - { - sprintf (buf, "System error in %s", call); - perror (buf); - } - else if (retval > 0) - fprintf (stdout,"Error: %s\n", call); - else if (retval == 0) - fprintf (stdout,"Error: %s\n", call); - else - { - char errstring[PAPI_MAX_STR_LEN]; - // PAPI 5.4.3 has changed the API for PAPI_perror. - #if defined (PAPI_VERSION) && ((PAPI_VERSION_MAJOR(PAPI_VERSION) == 5 && PAPI_VERSION_MINOR(PAPI_VERSION) >= 4) || PAPI_VERSION_MAJOR(PAPI_VERSION) > 5) - fprintf (stdout, "Error in %s: %s\n", call, PAPI_strerror(retval)); - #else - PAPI_perror (retval, errstring, PAPI_MAX_STR_LEN); - fprintf (stdout,"Error in %s: %s\n", call, errstring); - #endif - } - fprintf (stdout,"\n"); - if (PAPI_is_initialized ()) - PAPI_shutdown (); - exit (1); -} - - -void polybench_papi_init() -{ -# ifdef _OPENMP -#pragma omp parallel - { -#pragma omp master - { - if (omp_get_max_threads () < polybench_papi_counters_threadid) - polybench_papi_counters_threadid = omp_get_max_threads () - 1; - } -#pragma omp barrier - - if (omp_get_thread_num () == polybench_papi_counters_threadid) - { -# endif - int retval; - polybench_papi_eventset = PAPI_NULL; - if ((retval = PAPI_library_init (PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) - test_fail (__FILE__, __LINE__, "PAPI_library_init", retval); - if ((retval = PAPI_create_eventset (&polybench_papi_eventset)) - != PAPI_OK) - test_fail (__FILE__, __LINE__, "PAPI_create_eventset", retval); - int k; - for (k = 0; _polybench_papi_eventlist[k]; ++k) - { - if ((retval = - PAPI_event_name_to_code (_polybench_papi_eventlist[k], - &(polybench_papi_eventlist[k]))) - != PAPI_OK) - test_fail (__FILE__, __LINE__, "PAPI_event_name_to_code", retval); - } - polybench_papi_eventlist[k] = 0; - - -# ifdef _OPENMP - } - } -#pragma omp barrier -# endif -} - - -void polybench_papi_close() -{ -# ifdef _OPENMP -#pragma omp parallel - { - if (omp_get_thread_num () == polybench_papi_counters_threadid) - { -# endif - int retval; - if ((retval = PAPI_destroy_eventset (&polybench_papi_eventset)) - != PAPI_OK) - test_fail (__FILE__, __LINE__, "PAPI_destroy_eventset", retval); - if (PAPI_is_initialized ()) - PAPI_shutdown (); -# ifdef _OPENMP - } - } -#pragma omp barrier -# endif -} - -int polybench_papi_start_counter(int evid) -{ -# ifndef POLYBENCH_NO_FLUSH_CACHE - polybench_flush_cache(); -# endif - -# ifdef _OPENMP -# pragma omp parallel - { - if (omp_get_thread_num () == polybench_papi_counters_threadid) - { -# endif - - int retval = 1; - char descr[PAPI_MAX_STR_LEN]; - PAPI_event_info_t evinfo; - PAPI_event_code_to_name (polybench_papi_eventlist[evid], descr); - if (PAPI_add_event (polybench_papi_eventset, - polybench_papi_eventlist[evid]) != PAPI_OK) - test_fail (__FILE__, __LINE__, "PAPI_add_event", 1); - if (PAPI_get_event_info (polybench_papi_eventlist[evid], &evinfo) - != PAPI_OK) - test_fail (__FILE__, __LINE__, "PAPI_get_event_info", retval); - if ((retval = PAPI_start (polybench_papi_eventset)) != PAPI_OK) - test_fail (__FILE__, __LINE__, "PAPI_start", retval); -# ifdef _OPENMP - } - } -#pragma omp barrier -# endif - return 0; -} - - -void polybench_papi_stop_counter(int evid) -{ -# ifdef _OPENMP -# pragma omp parallel - { - if (omp_get_thread_num () == polybench_papi_counters_threadid) - { -# endif - int retval; - long_long values[1]; - values[0] = 0; - if ((retval = PAPI_read (polybench_papi_eventset, &values[0])) - != PAPI_OK) - test_fail (__FILE__, __LINE__, "PAPI_read", retval); - - if ((retval = PAPI_stop (polybench_papi_eventset, NULL)) != PAPI_OK) - test_fail (__FILE__, __LINE__, "PAPI_stop", retval); - - polybench_papi_values[evid] = values[0]; - - if ((retval = PAPI_remove_event - (polybench_papi_eventset, - polybench_papi_eventlist[evid])) != PAPI_OK) - test_fail (__FILE__, __LINE__, "PAPI_remove_event", retval); -# ifdef _OPENMP - } - } -#pragma omp barrier -# endif -} - - -void polybench_papi_print() -{ - int verbose = 0; -# ifdef _OPENMP -# pragma omp parallel - { - if (omp_get_thread_num() == polybench_papi_counters_threadid) - { -#ifdef POLYBENCH_PAPI_VERBOSE - verbose = 1; -#endif - if (verbose) - printf ("On thread %d:\n", polybench_papi_counters_threadid); -#endif - int evid; - for (evid = 0; polybench_papi_eventlist[evid] != 0; ++evid) - { - if (verbose) - printf ("%s=", _polybench_papi_eventlist[evid]); - printf ("%llu ", polybench_papi_values[evid]); - if (verbose) - printf ("\n"); - } - printf ("\n"); -# ifdef _OPENMP - } - } -#pragma omp barrier -# endif -} - -#endif -/* ! POLYBENCH_PAPI */ - -void polybench_prepare_instruments() -{ -#ifndef POLYBENCH_NO_FLUSH_CACHE - polybench_flush_cache (); -#endif -#ifdef POLYBENCH_LINUX_FIFO_SCHEDULER - polybench_linux_fifo_scheduler (); -#endif -} - - -void polybench_timer_start() -{ - polybench_prepare_instruments (); -#ifndef POLYBENCH_CYCLE_ACCURATE_TIMER - polybench_t_start = rtclock (); -#else - polybench_c_start = rdtsc (); -#endif -} - - -void polybench_timer_stop() -{ -#ifndef POLYBENCH_CYCLE_ACCURATE_TIMER - polybench_t_end = rtclock (); -#else - polybench_c_end = rdtsc (); -#endif -#ifdef POLYBENCH_LINUX_FIFO_SCHEDULER - polybench_linux_standard_scheduler (); -#endif -} - - -void polybench_timer_print() -{ -#ifdef POLYBENCH_GFLOPS - if (polybench_program_total_flops == 0) - { - printf ("[PolyBench][WARNING] Program flops not defined, use polybench_set_program_flops(value)\n"); - printf ("%0.6lf\n", polybench_t_end - polybench_t_start); - } - else - printf ("%0.2lf\n", - (polybench_program_total_flops / - (double)(polybench_t_end - polybench_t_start)) / 1000000000); -#else -# ifndef POLYBENCH_CYCLE_ACCURATE_TIMER - printf ("%0.6f\n", polybench_t_end - polybench_t_start); -# else - printf ("%Ld\n", polybench_c_end - polybench_c_start); -# endif -#endif -} - -/* - * These functions are used only if the user defines a specific - * inter-array padding. It grows a global structure, - * _polybench_alloc_table, which keeps track of the data allocated via - * polybench_alloc_data (on which inter-array padding is applied), so - * that the original, non-shifted pointer can be recovered when - * calling polybench_free_data. - * - */ -#ifdef POLYBENCH_ENABLE_INTARRAY_PAD -static -void grow_alloc_table() -{ - if (_polybench_alloc_table == NULL || - (_polybench_alloc_table->nb_entries % NB_INITIAL_TABLE_ENTRIES) != 0 || - _polybench_alloc_table->nb_avail_entries != 0) - { - /* Should never happen if the API is properly used. */ - fprintf (stderr, "[ERROR] Inter-array padding requires to use polybench_alloc_data and polybench_free_data\n"); - exit (1); - } - size_t sz = _polybench_alloc_table->nb_entries; - sz += NB_INITIAL_TABLE_ENTRIES; - _polybench_alloc_table->user_view = - realloc (_polybench_alloc_table->user_view, sz * sizeof(void*)); - assert(_polybench_alloc_table->user_view != NULL); - _polybench_alloc_table->real_ptr = - realloc (_polybench_alloc_table->real_ptr, sz * sizeof(void*)); - assert(_polybench_alloc_table->real_ptr != NULL); - _polybench_alloc_table->nb_avail_entries = NB_INITIAL_TABLE_ENTRIES; -} - -static -void* register_padded_pointer(void* ptr, size_t orig_sz, size_t padded_sz) -{ - if (_polybench_alloc_table == NULL) - { - fprintf (stderr, "[ERROR] Inter-array padding requires to use polybench_alloc_data and polybench_free_data\n"); - exit (1); - } - if (_polybench_alloc_table->nb_avail_entries == 0) - grow_alloc_table (); - int id = _polybench_alloc_table->nb_entries++; - _polybench_alloc_table->real_ptr[id] = ptr; - _polybench_alloc_table->user_view[id] = ptr + (padded_sz - orig_sz); - - return _polybench_alloc_table->user_view[id]; -} - - -static -void -free_data_from_alloc_table (void* ptr) -{ - if (_polybench_alloc_table != NULL && _polybench_alloc_table->nb_entries > 0) - { - int i; - for (i = 0; i < _polybench_alloc_table->nb_entries; ++i) - if (_polybench_alloc_table->user_view[i] == ptr || - _polybench_alloc_table->real_ptr[i] == ptr) - break; - if (i != _polybench_alloc_table->nb_entries) - { - free (_polybench_alloc_table->real_ptr[i]); - for (; i < _polybench_alloc_table->nb_entries - 1; ++i) - { - _polybench_alloc_table->user_view[i] = - _polybench_alloc_table->user_view[i + 1]; - _polybench_alloc_table->real_ptr[i] = - _polybench_alloc_table->real_ptr[i + 1]; - } - _polybench_alloc_table->nb_entries--; - _polybench_alloc_table->nb_avail_entries++; - if (_polybench_alloc_table->nb_entries == 0) - { - free (_polybench_alloc_table->user_view); - free (_polybench_alloc_table->real_ptr); - free (_polybench_alloc_table); - _polybench_alloc_table = NULL; - } - } - } -} - -static -void check_alloc_table_state() -{ - if (_polybench_alloc_table == NULL) - { - _polybench_alloc_table = (struct polybench_data_ptrs*) - malloc (sizeof(struct polybench_data_ptrs)); - assert(_polybench_alloc_table != NULL); - _polybench_alloc_table->user_view = - (void**) malloc (sizeof(void*) * NB_INITIAL_TABLE_ENTRIES); - assert(_polybench_alloc_table->user_view != NULL); - _polybench_alloc_table->real_ptr = - (void**) malloc (sizeof(void*) * NB_INITIAL_TABLE_ENTRIES); - assert(_polybench_alloc_table->real_ptr != NULL); - _polybench_alloc_table->nb_entries = 0; - _polybench_alloc_table->nb_avail_entries = NB_INITIAL_TABLE_ENTRIES; - } -} - -#endif // !POLYBENCH_ENABLE_INTARRAY_PAD - - -static -void* -xmalloc(size_t alloc_sz) -{ - void* ret = NULL; - /* By default, post-pad the arrays. Safe behavior, but likely useless. */ - polybench_inter_array_padding_sz += POLYBENCH_INTER_ARRAY_PADDING_FACTOR; - size_t padded_sz = alloc_sz + polybench_inter_array_padding_sz; - int err = posix_memalign (&ret, 4096, padded_sz); - if (! ret || err) - { - fprintf (stderr, "[PolyBench] posix_memalign: cannot allocate memory"); - exit (1); - } - /* Safeguard: this is invoked only if polybench.c has been compiled - with inter-array padding support from polybench.h. If so, move - the starting address of the allocation and return it to the - user. The original pointer is registered in an allocation table - internal to polybench.c. Data must then be freed using - polybench_free_data, which will inspect the allocation table to - free the original pointer.*/ -#ifdef POLYBENCH_ENABLE_INTARRAY_PAD - /* This moves the 'ret' pointer by (padded_sz - alloc_sz) positions, and - registers it in the lookup table for future free using - polybench_free_data. */ - ret = register_padded_pointer(ret, alloc_sz, padded_sz); -#endif - - return ret; -} - - -void polybench_free_data(void* ptr) -{ -#ifdef POLYBENCH_ENABLE_INTARRAY_PAD - free_data_from_alloc_table (ptr); -#else - free (ptr); -#endif -} - - -void* polybench_alloc_data(unsigned long long int n, int elt_size) -{ -#ifdef POLYBENCH_ENABLE_INTARRAY_PAD - check_alloc_table_state (); -#endif - - /// FIXME: detect overflow! - size_t val = n; - val *= elt_size; - void* ret = xmalloc (val); - - return ret; -} diff --git a/src/dios-egraphs/Diospyros/polybench-tests/polybench.h b/src/dios-egraphs/Diospyros/polybench-tests/polybench.h deleted file mode 100644 index e6f30f5a..00000000 --- a/src/dios-egraphs/Diospyros/polybench-tests/polybench.h +++ /dev/null @@ -1,241 +0,0 @@ -/** - * This version is stamped on May 10, 2016 - * - * Contact: - * Louis-Noel Pouchet - * Tomofumi Yuki - * - * Web address: http://polybench.sourceforge.net - */ -/* - * polybench.h: this file is part of PolyBench/C - * - * Polybench header for instrumentation. - * - * Programs must be compiled with `-I utilities utilities/polybench.c' - * - * Optionally, one can define: - * - * -DPOLYBENCH_TIME, to report the execution time, - * OR (exclusive): - * -DPOLYBENCH_PAPI, to use PAPI H/W counters (defined in polybench.c) - * - * - * See README or utilities/polybench.c for additional options. - * - */ -#ifndef POLYBENCH_H -# define POLYBENCH_H - -# include - -/* Array padding. By default, none is used. */ -# ifndef POLYBENCH_PADDING_FACTOR -/* default: */ -# define POLYBENCH_PADDING_FACTOR 0 -# endif - -/* Inter-array padding, for use with . By default, none is used. */ -# ifndef POLYBENCH_INTER_ARRAY_PADDING_FACTOR -/* default: */ -# define POLYBENCH_INTER_ARRAY_PADDING_FACTOR 0 -# undef POLYBENCH_ENABLE_INTARRAY_PAD -# else -# define POLYBENCH_ENABLE_INTARRAY_PAD -# endif - - -/* C99 arrays in function prototype. By default, do not use. */ -# ifdef POLYBENCH_USE_C99_PROTO -# define POLYBENCH_C99_SELECT(x,y) y -# else -/* default: */ -# define POLYBENCH_C99_SELECT(x,y) x -# endif - - -/* Scalar loop bounds in SCoPs. By default, use parametric loop bounds. */ -# ifdef POLYBENCH_USE_SCALAR_LB -# define POLYBENCH_LOOP_BOUND(x,y) x -# else -/* default: */ -# define POLYBENCH_LOOP_BOUND(x,y) y -# endif - -/* Use the 'restrict' keyword to declare that the different arrays do not - * alias. By default, we do not use it as it is only supported in C99 and - * even here several compilers do not properly get it. - */ -# ifdef POLYBENCH_USE_RESTRICT -# define POLYBENCH_RESTRICT restrict -# else -/* default: */ -# define POLYBENCH_RESTRICT -# endif - -/* Macros to reference an array. Generic for heap and stack arrays - (C99). Each array dimensionality has his own macro, to be used at - declaration or as a function argument. - Example: - int b[x] => POLYBENCH_1D_ARRAY(b, x) - int A[N][N] => POLYBENCH_2D_ARRAY(A, N, N) -*/ -# ifndef POLYBENCH_STACK_ARRAYS -# define POLYBENCH_ARRAY(x) *x -# ifdef POLYBENCH_ENABLE_INTARRAY_PAD -# define POLYBENCH_FREE_ARRAY(x) polybench_free_data((void*)x); -# else -# define POLYBENCH_FREE_ARRAY(x) free((void*)x); -# endif -# define POLYBENCH_DECL_VAR(x) (*x) -# else -# define POLYBENCH_ARRAY(x) x -# define POLYBENCH_FREE_ARRAY(x) -# define POLYBENCH_DECL_VAR(x) x -# endif -/* Macros for using arrays in the function prototypes. */ -# define POLYBENCH_1D(var, dim1,ddim1) var[POLYBENCH_RESTRICT POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR] -# define POLYBENCH_2D(var, dim1, dim2, ddim1, ddim2) var[POLYBENCH_RESTRICT POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR] -# define POLYBENCH_3D(var, dim1, dim2, dim3, ddim1, ddim2, ddim3) var[POLYBENCH_RESTRICT POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR] -# define POLYBENCH_4D(var, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) var[POLYBENCH_RESTRICT POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR] -# define POLYBENCH_5D(var, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) var[POLYBENCH_RESTRICT POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim5,ddim5) + POLYBENCH_PADDING_FACTOR] -/* Macros for using arrays within the functions. */ -# define POLYBENCH_1D_F(var, dim1,ddim1) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR] -# define POLYBENCH_2D_F(var, dim1, dim2, ddim1, ddim2) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR] -# define POLYBENCH_3D_F(var, dim1, dim2, dim3, ddim1, ddim2, ddim3) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR] -# define POLYBENCH_4D_F(var, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR] -# define POLYBENCH_5D_F(var, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim5,ddim5) + POLYBENCH_PADDING_FACTOR] - - -/* Macros to allocate heap arrays. - Example: - polybench_alloc_2d_array(N, M, double) => allocates N x M x sizeof(double) - and returns a pointer to the 2d array - */ -# define POLYBENCH_ALLOC_1D_ARRAY(n1, type) \ - (type(*)[n1 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data (n1 + POLYBENCH_PADDING_FACTOR, sizeof(type)) -# define POLYBENCH_ALLOC_2D_ARRAY(n1, n2, type) \ - (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR), sizeof(type)) -# define POLYBENCH_ALLOC_3D_ARRAY(n1, n2, n3, type) \ - (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR), sizeof(type)) -# define POLYBENCH_ALLOC_4D_ARRAY(n1, n2, n3, n4, type) \ - (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR][n4 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR) * (n4 + POLYBENCH_PADDING_FACTOR), sizeof(type)) -# define POLYBENCH_ALLOC_5D_ARRAY(n1, n2, n3, n4, n5, type) \ - (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR][n4 + POLYBENCH_PADDING_FACTOR][n5 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR) * (n4 + POLYBENCH_PADDING_FACTOR) * (n5 + POLYBENCH_PADDING_FACTOR), sizeof(type)) - -/* Macros for array declaration. */ -# ifndef POLYBENCH_STACK_ARRAYS -# define POLYBENCH_1D_ARRAY_DECL(var, type, dim1, ddim1) \ - type POLYBENCH_1D_F(POLYBENCH_DECL_VAR(var), dim1, ddim1); \ - var = POLYBENCH_ALLOC_1D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), type); -# define POLYBENCH_2D_ARRAY_DECL(var, type, dim1, dim2, ddim1, ddim2) \ - type POLYBENCH_2D_F(POLYBENCH_DECL_VAR(var), dim1, dim2, ddim1, ddim2); \ - var = POLYBENCH_ALLOC_2D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), type); -# define POLYBENCH_3D_ARRAY_DECL(var, type, dim1, dim2, dim3, ddim1, ddim2, ddim3) \ - type POLYBENCH_3D_F(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, ddim1, ddim2, ddim3); \ - var = POLYBENCH_ALLOC_3D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), type); -# define POLYBENCH_4D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) \ - type POLYBENCH_4D_F(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4); \ - var = POLYBENCH_ALLOC_4D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), POLYBENCH_C99_SELECT(dim4, ddim4), type); -# define POLYBENCH_5D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) \ - type POLYBENCH_5D_F(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5); \ - var = POLYBENCH_ALLOC_5D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), POLYBENCH_C99_SELECT(dim4, ddim4), POLYBENCH_C99_SELECT(dim5, ddim5), type); -# else -# define POLYBENCH_1D_ARRAY_DECL(var, type, dim1, ddim1) \ - type POLYBENCH_1D_F(POLYBENCH_DECL_VAR(var), dim1, ddim1); -# define POLYBENCH_2D_ARRAY_DECL(var, type, dim1, dim2, ddim1, ddim2) \ - type POLYBENCH_2D_F(POLYBENCH_DECL_VAR(var), dim1, dim2, ddim1, ddim2); -# define POLYBENCH_3D_ARRAY_DECL(var, type, dim1, dim2, dim3, ddim1, ddim2, ddim3) \ - type POLYBENCH_3D_F(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, ddim1, ddim2, ddim3); -# define POLYBENCH_4D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) \ - type POLYBENCH_4D_F(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4); -# define POLYBENCH_5D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) \ - type POLYBENCH_5D_F(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5); -# endif - - -/* Dead-code elimination macros. Use argc/argv for the run-time check. */ -# ifndef POLYBENCH_DUMP_ARRAYS -# define POLYBENCH_DCE_ONLY_CODE if (argc > 42 && ! strcmp(argv[0], "")) -# else -# define POLYBENCH_DCE_ONLY_CODE -# endif - -#define POLYBENCH_DUMP_TARGET stderr -#define POLYBENCH_DUMP_START fprintf(POLYBENCH_DUMP_TARGET, "==BEGIN DUMP_ARRAYS==\n") -#define POLYBENCH_DUMP_FINISH fprintf(POLYBENCH_DUMP_TARGET, "==END DUMP_ARRAYS==\n") -#define POLYBENCH_DUMP_BEGIN(s) fprintf(POLYBENCH_DUMP_TARGET, "begin dump: %s", s) -#define POLYBENCH_DUMP_END(s) fprintf(POLYBENCH_DUMP_TARGET, "\nend dump: %s\n", s) - -# define polybench_prevent_dce(func) \ - POLYBENCH_DCE_ONLY_CODE \ - func - - -/* Performance-related instrumentation. See polybench.c */ -# define polybench_start_instruments -# define polybench_stop_instruments -# define polybench_print_instruments - - -/* PAPI support. */ -# ifdef POLYBENCH_PAPI -extern const unsigned int polybench_papi_eventlist[]; -# undef polybench_start_instruments -# undef polybench_stop_instruments -# undef polybench_print_instruments -# define polybench_set_papi_thread_report(x) \ - polybench_papi_counters_threadid = x; -# define polybench_start_instruments \ - polybench_prepare_instruments(); \ - polybench_papi_init(); \ - int evid; \ - for (evid = 0; polybench_papi_eventlist[evid] != 0; evid++) \ - { \ - if (polybench_papi_start_counter(evid)) \ - continue; \ - -# define polybench_stop_instruments \ - polybench_papi_stop_counter(evid); \ - } \ - polybench_papi_close(); \ - -# define polybench_print_instruments polybench_papi_print(); -# endif - - -/* Timing support. */ -# if defined(POLYBENCH_TIME) || defined(POLYBENCH_GFLOPS) -# undef polybench_start_instruments -# undef polybench_stop_instruments -# undef polybench_print_instruments -# define polybench_start_instruments polybench_timer_start(); -# define polybench_stop_instruments polybench_timer_stop(); -# define polybench_print_instruments polybench_timer_print(); -extern double polybench_program_total_flops; -extern void polybench_timer_start(); -extern void polybench_timer_stop(); -extern void polybench_timer_print(); -# endif - -/* PAPI support. */ -# ifdef POLYBENCH_PAPI -extern int polybench_papi_start_counter(int evid); -extern void polybench_papi_stop_counter(int evid); -extern void polybench_papi_init(); -extern void polybench_papi_close(); -extern void polybench_papi_print(); -# endif - -/* Function prototypes. */ -extern void* polybench_alloc_data(unsigned long long int n, int elt_size); -extern void polybench_free_data(void* ptr); - -/* PolyBench internal functions that should not be directly called by */ -/* the user, unless when designing customized execution profiling */ -/* approaches. */ -extern void polybench_flush_cache(); -extern void polybench_prepare_instruments(); - - -#endif /* !POLYBENCH_H */ From 9e9c14964bb9c57ffd6a999d535062793c5afd9c Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sat, 26 Feb 2022 23:12:23 -0500 Subject: [PATCH 044/143] bitcasting works convert at loads bitcast back from i32* to float* convert at stores bitcast from i32* to float* test4-4-linear.c passes --- src/dios-egraphs/Diospyros/diospyros.cpp | 14 +++++- .../Diospyros/fail-tests/test4-4-linear.c | 6 +++ src/dios-egraphs/Diospyros/src/lib.rs | 49 +++++++++++++------ 3 files changed, 53 insertions(+), 16 deletions(-) diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index 120dc8c9..c1ac8ad3 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -311,7 +311,7 @@ extern "C" bool isa_integertype(LLVMValueRef val) { } /** - * True iff a value is an LLVM IntPTr/LLVMValueRef ItPtr + * True iff a value is an LLVM IntPTr/LLVMValueRef IntPtr */ extern "C" bool isa_intptr(LLVMValueRef val) { auto unwrapped = unwrap(val); @@ -322,6 +322,18 @@ extern "C" bool isa_intptr(LLVMValueRef val) { return t->isPointerTy() && t->getContainedType(0)->isIntegerTy(); } +/** + * True iff a value is an LLVM FloatPtr/LLVMValueRef FloatPtr + */ +extern "C" bool isa_floatptr(LLVMValueRef val) { + auto unwrapped = unwrap(val); + if (unwrapped == NULL) { + return false; + } + Type *t = unwrapped->getType(); + return t->isPointerTy() && t->getContainedType(0)->isFloatTy(); +} + /** * True iff a value is an LLVM Float/LLVMValueRef Float */ diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-4-linear.c b/src/dios-egraphs/Diospyros/fail-tests/test4-4-linear.c index 332fd882..92b1195d 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test4-4-linear.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-4-linear.c @@ -98,6 +98,9 @@ void no_opt_naive_fixed_qr_decomp(float Q[SIZE], float x[SIZE], int main(void) { float A[SIZE] = {1.1f, 2.1f, 3.1f, 4.1f}; float Q[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + Q[i] = 0.0f; + } float R[SIZE] = {1.0f}; float i[SIZE] = {0.0f}; float x[SIZE] = {1.2f, 1.3f, 1.4f, 1.5f}; @@ -105,6 +108,9 @@ int main(void) { float q_t[SIZE] = {0.0f}; naive_fixed_qr_decomp(Q, x, q_t); float expectedQ[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + expectedQ[i] = 0.0f; + } float expectedR[SIZE] = {1.0f}; float expectedi[SIZE] = {0.0f}; float expectedx[SIZE] = {1.2f, 1.3f, 1.4f, 1.5f}; diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 026b8247..9d5bcf86 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -34,6 +34,7 @@ extern "C" { fn _isa_constaggregate(val: LLVMValueRef) -> bool; fn isa_integertype(val: LLVMValueRef) -> bool; fn isa_intptr(val: LLVMValueRef) -> bool; + fn isa_floatptr(val: LLVMValueRef) -> bool; fn isa_floattype(val: LLVMValueRef) -> bool; fn isa_bitcast(val: LLVMValueRef) -> bool; fn isa_sqrt32(val: LLVMValueRef) -> bool; @@ -1784,7 +1785,18 @@ unsafe fn translate_egg( } else if isa_bitcast(*gep_value) { // TODO: DO NOT REGERATE CALLS. THESE SHOULD BE CACHED!!. e.g. a CALLOC let cloned_bitcast = LLVMInstructionClone(*gep_value); - let new_bitcast = llvm_recursive_add(builder, cloned_bitcast, context); + let mut new_bitcast = llvm_recursive_add(builder, cloned_bitcast, context); + // if bitcast was to i32, handle bitcast from float* to i32* + if !isa_floatptr(new_bitcast) { + let addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(new_bitcast)); + let new_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(context), addr_space); + new_bitcast = LLVMBuildBitCast( + builder, + new_bitcast, + new_ptr_type, + b"\0".as_ptr() as *const _, + ); + } LLVMBuildLoad(builder, new_bitcast, b"\0".as_ptr() as *const _) } else if isa_sitofp(*gep_value) { let cloned_sitofp = LLVMInstructionClone(*gep_value); @@ -2204,28 +2216,35 @@ unsafe fn egg_to_llvm( let mut extracted_value = LLVMBuildExtractElement(builder, vector, index, b"\0".as_ptr() as *const _); // check if the extracted type is an float and the address is a int ptr - if isa_floattype(extracted_value) && isa_intptr(*addr) { - extracted_value = LLVMBuildFPToSI( - builder, - extracted_value, - LLVMIntTypeInContext(context, 32), - b"\0".as_ptr() as *const _, - ); - } - if isa_argument(*addr) { - if LLVMTypeOf(extracted_value) != LLVMGetElementType(LLVMTypeOf(*addr)) { + let mut_addr = if !isa_floatptr(*addr) { + let addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(*addr)); + let new_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(context), addr_space); + LLVMBuildBitCast(builder, *addr, new_ptr_type, b"\0".as_ptr() as *const _) + } else { + *addr + }; + // if isa_floattype(extracted_value) && isa_intptr(*mut_addr) { + // extracted_value = LLVMBuildFPToSI( + // builder, + // extracted_value, + // LLVMIntTypeInContext(context, 32), + // b"\0".as_ptr() as *const _, + // ); + // } + if isa_argument(mut_addr) { + if LLVMTypeOf(extracted_value) != LLVMGetElementType(LLVMTypeOf(mut_addr)) { extracted_value = gen_type_cast( extracted_value, LLVMTypeOf(extracted_value), - LLVMGetElementType(LLVMTypeOf(*addr)), + LLVMGetElementType(LLVMTypeOf(mut_addr)), context, builder, ); } - assert!(LLVMTypeOf(extracted_value) == LLVMGetElementType(LLVMTypeOf(*addr))); - LLVMBuildStore(builder, extracted_value, *addr); + assert!(LLVMTypeOf(extracted_value) == LLVMGetElementType(LLVMTypeOf(mut_addr))); + LLVMBuildStore(builder, extracted_value, mut_addr); } else { - let cloned_addr = LLVMInstructionClone(*addr); + let cloned_addr = LLVMInstructionClone(mut_addr); let new_addr = llvm_recursive_add(builder, cloned_addr, context); if LLVMTypeOf(extracted_value) != LLVMGetElementType(LLVMTypeOf(new_addr)) { extracted_value = gen_type_cast( From 97c7f8075f7738c6e91baa8972a87578f298b0fa Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sat, 26 Feb 2022 23:30:23 -0500 Subject: [PATCH 045/143] test4-1-linear, 4-2-linear, test4-3-linear all asserted and pass under run-opt and no-opt --- src/dios-egraphs/Diospyros/fail-tests/test4-1-linear.c | 4 ++++ src/dios-egraphs/Diospyros/fail-tests/test4-2-linear.c | 4 ++++ src/dios-egraphs/Diospyros/fail-tests/test4-3-linear.c | 4 ++++ src/dios-egraphs/Diospyros/fail-tests/test4-4-linear.c | 4 ++++ src/dios-egraphs/Diospyros/fail-tests/test4-5-linear.c | 4 ++++ 5 files changed, 20 insertions(+) diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-1-linear.c b/src/dios-egraphs/Diospyros/fail-tests/test4-1-linear.c index d1e8cf33..97726430 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test4-1-linear.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-1-linear.c @@ -13,6 +13,7 @@ #include #define SIZE 1 +#define DELTA 0.1f float sgn(float v) __attribute__((always_inline)); float naive_norm(float *x, int m) __attribute__((always_inline)); @@ -185,13 +186,16 @@ int main(void) { for (int i = 0; i < SIZE; i++) { printf("Q Output: %f\n", Q[i]); printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); } for (int i = 0; i < SIZE; i++) { printf("R Output: %f\n", R[i]); printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); } for (int i = 0; i < SIZE; i++) { printf("Q_T Output: %f\n", q_t[i]); printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-2-linear.c b/src/dios-egraphs/Diospyros/fail-tests/test4-2-linear.c index 428a10be..fc1db5ba 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test4-2-linear.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-2-linear.c @@ -13,6 +13,7 @@ #include #define SIZE 2 +#define DELTA 0.1f float sgn(float v) __attribute__((always_inline)); float naive_norm(float *x, int m) __attribute__((always_inline)); @@ -185,13 +186,16 @@ int main(void) { for (int i = 0; i < SIZE; i++) { printf("Q Output: %f\n", Q[i]); printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); } for (int i = 0; i < SIZE; i++) { printf("R Output: %f\n", R[i]); printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); } for (int i = 0; i < SIZE; i++) { printf("Q_T Output: %f\n", q_t[i]); printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-3-linear.c b/src/dios-egraphs/Diospyros/fail-tests/test4-3-linear.c index d400d018..4a9b7256 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test4-3-linear.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-3-linear.c @@ -13,6 +13,7 @@ #include #define SIZE 3 +#define DELTA 0.1f float sgn(float v) __attribute__((always_inline)); float naive_norm(float *x, int m) __attribute__((always_inline)); @@ -185,13 +186,16 @@ int main(void) { for (int i = 0; i < SIZE; i++) { printf("Q Output: %f\n", Q[i]); printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); } for (int i = 0; i < SIZE; i++) { printf("R Output: %f\n", R[i]); printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); } for (int i = 0; i < SIZE; i++) { printf("Q_T Output: %f\n", q_t[i]); printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-4-linear.c b/src/dios-egraphs/Diospyros/fail-tests/test4-4-linear.c index 92b1195d..5dc845dc 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test4-4-linear.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-4-linear.c @@ -13,6 +13,7 @@ #include #define SIZE 4 +#define DELTA 0.1f float sgn(float v) __attribute__((always_inline)); float naive_norm(float *x, int m) __attribute__((always_inline)); @@ -121,13 +122,16 @@ int main(void) { for (int i = 0; i < SIZE; i++) { printf("Q Output: %f\n", Q[i]); printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); } for (int i = 0; i < SIZE; i++) { printf("R Output: %f\n", R[i]); printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); } for (int i = 0; i < SIZE; i++) { printf("Q_T Output: %f\n", q_t[i]); printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-5-linear.c b/src/dios-egraphs/Diospyros/fail-tests/test4-5-linear.c index 04c2dffd..de55e82a 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test4-5-linear.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-5-linear.c @@ -13,6 +13,7 @@ #include #define SIZE 5 +#define DELTA 0.1f float sgn(float v) __attribute__((always_inline)); float naive_norm(float *x, int m) __attribute__((always_inline)); @@ -185,13 +186,16 @@ int main(void) { for (int i = 0; i < SIZE; i++) { printf("Q Output: %f\n", Q[i]); printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); } for (int i = 0; i < SIZE; i++) { printf("R Output: %f\n", R[i]); printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); } for (int i = 0; i < SIZE; i++) { printf("Q_T Output: %f\n", q_t[i]); printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); } } \ No newline at end of file From a1bb5cc6a7e317c79e95e4172ae7447260c238f0 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sat, 26 Feb 2022 23:31:21 -0500 Subject: [PATCH 046/143] move linear tests into llvm-tests --- .../Diospyros/{fail-tests => llvm-tests}/test4-1-linear.c | 0 .../Diospyros/{fail-tests => llvm-tests}/test4-2-linear.c | 0 .../Diospyros/{fail-tests => llvm-tests}/test4-3-linear.c | 0 .../Diospyros/{fail-tests => llvm-tests}/test4-4-linear.c | 0 .../Diospyros/{fail-tests => llvm-tests}/test4-5-linear.c | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename src/dios-egraphs/Diospyros/{fail-tests => llvm-tests}/test4-1-linear.c (100%) rename src/dios-egraphs/Diospyros/{fail-tests => llvm-tests}/test4-2-linear.c (100%) rename src/dios-egraphs/Diospyros/{fail-tests => llvm-tests}/test4-3-linear.c (100%) rename src/dios-egraphs/Diospyros/{fail-tests => llvm-tests}/test4-4-linear.c (100%) rename src/dios-egraphs/Diospyros/{fail-tests => llvm-tests}/test4-5-linear.c (100%) diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-1-linear.c b/src/dios-egraphs/Diospyros/llvm-tests/test4-1-linear.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/test4-1-linear.c rename to src/dios-egraphs/Diospyros/llvm-tests/test4-1-linear.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-2-linear.c b/src/dios-egraphs/Diospyros/llvm-tests/test4-2-linear.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/test4-2-linear.c rename to src/dios-egraphs/Diospyros/llvm-tests/test4-2-linear.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-3-linear.c b/src/dios-egraphs/Diospyros/llvm-tests/test4-3-linear.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/test4-3-linear.c rename to src/dios-egraphs/Diospyros/llvm-tests/test4-3-linear.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-4-linear.c b/src/dios-egraphs/Diospyros/llvm-tests/test4-4-linear.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/test4-4-linear.c rename to src/dios-egraphs/Diospyros/llvm-tests/test4-4-linear.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-5-linear.c b/src/dios-egraphs/Diospyros/llvm-tests/test4-5-linear.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/test4-5-linear.c rename to src/dios-egraphs/Diospyros/llvm-tests/test4-5-linear.c From 5ba80daa872e442587e6074bb8e9310932b2834f Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sun, 27 Feb 2022 00:36:29 -0500 Subject: [PATCH 047/143] remove prints in lib.rs, tests1, 2, 3, 4-1, 4-2, 4-2-2, 4-3 all pass with no-opt and run-opt --- src/dios-egraphs/Diospyros/fail-tests/test1.c | 4 +++- src/dios-egraphs/Diospyros/fail-tests/test4-1.c | 3 +++ src/dios-egraphs/Diospyros/fail-tests/test4-2-2.c | 4 ++++ src/dios-egraphs/Diospyros/fail-tests/test4-2.c | 3 +++ src/dios-egraphs/Diospyros/fail-tests/test4-3.c | 3 +++ src/dios-egraphs/Diospyros/src/lib.rs | 14 ++++++-------- 6 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/dios-egraphs/Diospyros/fail-tests/test1.c b/src/dios-egraphs/Diospyros/fail-tests/test1.c index d3070f6b..8c131bfb 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test1.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test1.c @@ -92,6 +92,8 @@ int main(void) { printf("Expected X Output: %f\n", expectedx[i]); printf("E Output: %f\n", e[i]); printf("Expected E Output: %f\n", expectede[i]); - // assert(fabs(expectedA[i] - A[i]) < DELTA); + assert(fabs(expectedA[i] - A[i]) < DELTA); + assert(fabs(expectedx[i] - x[i]) < DELTA); + assert(fabs(expectede[i] - e[i]) < DELTA); } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-1.c b/src/dios-egraphs/Diospyros/fail-tests/test4-1.c index 4f683f6b..b0fc07ae 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test4-1.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-1.c @@ -13,6 +13,7 @@ #include #define SIZE 1 +#define DELTA 0.1f float sgn(float v) __attribute__((always_inline)); float naive_norm(float *x, int m) __attribute__((always_inline)); @@ -196,12 +197,14 @@ int main(void) { for (int j = 0; j < SIZE; j++) { printf("Q Output: %f\n", Q[i * SIZE + j]); printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); } } for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { printf("R Output: %f\n", R[i * SIZE + j]); printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); } } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-2-2.c b/src/dios-egraphs/Diospyros/fail-tests/test4-2-2.c index efb2dfcf..63ea53cd 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test4-2-2.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-2-2.c @@ -13,6 +13,7 @@ #include #define SIZE 2 +#define DELTA 0.1f float sgn(float v) __attribute__((always_inline)); float naive_norm(float *x, int m) __attribute__((always_inline)); @@ -196,18 +197,21 @@ int main(void) { for (int j = 0; j < SIZE; j++) { printf("Q Output: %f\n", Q[i * SIZE + j]); printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); } } for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { printf("R Output: %f\n", R[i * SIZE + j]); printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); } } for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { printf("Q_T Output: %f\n", q_t[i * SIZE + j]); printf("Expected Q_T Output: %f\n", expectedq_t[i * SIZE + j]); + assert(fabs(expectedq_t[i] - q_t[i]) < DELTA); } } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-2.c b/src/dios-egraphs/Diospyros/fail-tests/test4-2.c index 1bcca8d2..4898553e 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test4-2.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-2.c @@ -13,6 +13,7 @@ #include #define SIZE 2 +#define DELTA 0.1f float sgn(float v) __attribute__((always_inline)); float naive_norm(float *x, int m) __attribute__((always_inline)); @@ -192,12 +193,14 @@ int main(void) { for (int j = 0; j < SIZE; j++) { printf("Q Output: %f\n", Q[i * SIZE + j]); printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); } } for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { printf("R Output: %f\n", R[i * SIZE + j]); printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); } } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-3.c b/src/dios-egraphs/Diospyros/fail-tests/test4-3.c index 333d176f..cdb08413 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test4-3.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test4-3.c @@ -13,6 +13,7 @@ #include #define SIZE 3 +#define DELTA 0.1f float sgn(float v) __attribute__((always_inline)); float naive_norm(float *x, int m) __attribute__((always_inline)); @@ -195,12 +196,14 @@ int main(void) { for (int j = 0; j < SIZE; j++) { printf("Q Output: %f\n", Q[i * SIZE + j]); printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); } } for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { printf("R Output: %f\n", R[i * SIZE + j]); printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); } } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 9d5bcf86..4b23b809 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -736,19 +736,17 @@ pub fn optimize( llvm_to_egg(llvm_instrs, &mut llvm_arg_pairs, &mut node_to_arg); // optimization pass - // if print_opt { - println!("In"); - eprintln!("{}", expr.pretty(10)); - // } + if print_opt { + eprintln!("{}", expr.pretty(10)); + } let mut best = expr.clone(); if run_egg { let pair = rules::run(&expr, 180, true, !run_egg); best = pair.1; } - // if print_opt { - // println!("Out"); - // eprintln!("{}", best.pretty(10)); - // } + if print_opt { + eprintln!("{}", best.pretty(10)); + } // egg to llvm egg_to_llvm( From a198a96a0a7b4b49840cb0486b8c4b2e60012281 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sun, 27 Feb 2022 00:40:28 -0500 Subject: [PATCH 048/143] rename tests to be more descriptive, move tests to llvm-tests --- .../{fail-tests/test1.c => llvm-tests/qr-decomp-test-1.c} | 0 .../{fail-tests/test2.c => llvm-tests/qr-decomp-test-2.c} | 0 .../{fail-tests/test3.c => llvm-tests/qr-decomp-test-3.c} | 0 .../{test4-1-linear.c => qr-decomp-test-4-1-linear-array.c} | 0 .../{fail-tests/test4-1.c => llvm-tests/qr-decomp-test-4-1.c} | 0 .../{fail-tests/test4-2-2.c => llvm-tests/qr-decomp-test-4-2-2.c} | 0 .../{test4-2-linear.c => qr-decomp-test-4-2-linear-array.c} | 0 .../{fail-tests/test4-2.c => llvm-tests/qr-decomp-test-4-2.c} | 0 .../{test4-3-linear.c => qr-decomp-test-4-3-linear-array.c} | 0 .../{fail-tests/test4-3.c => llvm-tests/qr-decomp-test-4-3.c} | 0 .../{test4-4-linear.c => qr-decomp-test-4-4-linear-array.c} | 0 .../{test4-5-linear.c => qr-decomp-test-4-5-linear-array.c} | 0 12 files changed, 0 insertions(+), 0 deletions(-) rename src/dios-egraphs/Diospyros/{fail-tests/test1.c => llvm-tests/qr-decomp-test-1.c} (100%) rename src/dios-egraphs/Diospyros/{fail-tests/test2.c => llvm-tests/qr-decomp-test-2.c} (100%) rename src/dios-egraphs/Diospyros/{fail-tests/test3.c => llvm-tests/qr-decomp-test-3.c} (100%) rename src/dios-egraphs/Diospyros/llvm-tests/{test4-1-linear.c => qr-decomp-test-4-1-linear-array.c} (100%) rename src/dios-egraphs/Diospyros/{fail-tests/test4-1.c => llvm-tests/qr-decomp-test-4-1.c} (100%) rename src/dios-egraphs/Diospyros/{fail-tests/test4-2-2.c => llvm-tests/qr-decomp-test-4-2-2.c} (100%) rename src/dios-egraphs/Diospyros/llvm-tests/{test4-2-linear.c => qr-decomp-test-4-2-linear-array.c} (100%) rename src/dios-egraphs/Diospyros/{fail-tests/test4-2.c => llvm-tests/qr-decomp-test-4-2.c} (100%) rename src/dios-egraphs/Diospyros/llvm-tests/{test4-3-linear.c => qr-decomp-test-4-3-linear-array.c} (100%) rename src/dios-egraphs/Diospyros/{fail-tests/test4-3.c => llvm-tests/qr-decomp-test-4-3.c} (100%) rename src/dios-egraphs/Diospyros/llvm-tests/{test4-4-linear.c => qr-decomp-test-4-4-linear-array.c} (100%) rename src/dios-egraphs/Diospyros/llvm-tests/{test4-5-linear.c => qr-decomp-test-4-5-linear-array.c} (100%) diff --git a/src/dios-egraphs/Diospyros/fail-tests/test1.c b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-1.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/test1.c rename to src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-1.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/test2.c b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-2.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/test2.c rename to src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-2.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/test3.c b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-3.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/test3.c rename to src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-3.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/test4-1-linear.c b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-1-linear-array.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/test4-1-linear.c rename to src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-1-linear-array.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-1.c b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-1.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/test4-1.c rename to src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-1.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-2-2.c b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-2-2.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/test4-2-2.c rename to src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-2-2.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/test4-2-linear.c b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-2-linear-array.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/test4-2-linear.c rename to src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-2-linear-array.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-2.c b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-2.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/test4-2.c rename to src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-2.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/test4-3-linear.c b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-3-linear-array.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/test4-3-linear.c rename to src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-3-linear-array.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/test4-3.c b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-3.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/test4-3.c rename to src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-3.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/test4-4-linear.c b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-4-linear-array.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/test4-4-linear.c rename to src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-4-linear-array.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/test4-5-linear.c b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-5-linear-array.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/test4-5-linear.c rename to src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-5-linear-array.c From 5fc1a45a13079d7fc53cd575d13ded454cd96d3b Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sun, 27 Feb 2022 01:34:42 -0500 Subject: [PATCH 049/143] qr decomp works --- ... => qr-decomp-fixed-size-debug-template.c} | 0 .../fail-tests/qr-decomp-no-local-array.c | 304 ++++++++++++++++++ .../fail-tests/simplified_qr_decomp.c | 290 ----------------- .../loop-inline.c} | 0 .../naive-norm-inline.c} | 0 .../qr-decomp-modified-no-local-array.c} | 3 + .../sgn-inline.c} | 0 .../sgn-naive-norm-inline-1.c} | 0 .../sgn-naive-norm-inline-2.c} | 0 9 files changed, 307 insertions(+), 290 deletions(-) rename src/dios-egraphs/Diospyros/fail-tests/{qr-decomp-fixed-size-debug.c => qr-decomp-fixed-size-debug-template.c} (100%) create mode 100644 src/dios-egraphs/Diospyros/fail-tests/qr-decomp-no-local-array.c delete mode 100644 src/dios-egraphs/Diospyros/fail-tests/simplified_qr_decomp.c rename src/dios-egraphs/Diospyros/{fail-tests/loop-inline-PASS.c => llvm-tests/loop-inline.c} (100%) rename src/dios-egraphs/Diospyros/{fail-tests/naive-norm-inline-PASS.c => llvm-tests/naive-norm-inline.c} (100%) rename src/dios-egraphs/Diospyros/{fail-tests/modified_qr_1_regress_PASS.c => llvm-tests/qr-decomp-modified-no-local-array.c} (97%) rename src/dios-egraphs/Diospyros/{fail-tests/sgn-inline-PASS.c => llvm-tests/sgn-inline.c} (100%) rename src/dios-egraphs/Diospyros/{fail-tests/sgn-naive-norm-inline-PASS.c => llvm-tests/sgn-naive-norm-inline-1.c} (100%) rename src/dios-egraphs/Diospyros/{fail-tests/sgn-naive-norm-inline2-PASS.c => llvm-tests/sgn-naive-norm-inline-2.c} (100%) diff --git a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-fixed-size-debug.c b/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-fixed-size-debug-template.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/qr-decomp-fixed-size-debug.c rename to src/dios-egraphs/Diospyros/fail-tests/qr-decomp-fixed-size-debug-template.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-no-local-array.c b/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-no-local-array.c new file mode 100644 index 00000000..44b534ee --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-no-local-array.c @@ -0,0 +1,304 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], float u[SIZE], + float v[SIZE], float q_min[SIZE * SIZE], + float q_t[SIZE * SIZE], float res[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1, 2, 3, 4}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float I[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float u[SIZE] = {0.0f}; + float v[SIZE] = {0.0f}; + float q_min[SIZE * SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + float res[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, I, x, e, u, v, q_min, q_t, res); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/simplified_qr_decomp.c b/src/dios-egraphs/Diospyros/fail-tests/simplified_qr_decomp.c deleted file mode 100644 index e20b96ac..00000000 --- a/src/dios-egraphs/Diospyros/fail-tests/simplified_qr_decomp.c +++ /dev/null @@ -1,290 +0,0 @@ -// Here I remove the need for using calloc and free by preallocating larger -// arrays I want to eliminate any sources of error first. -// I also remove all references to memcpy as well -// This is to isolate the error so that it is not because of an externally -// linked program - -#include -#include -#include -#include -#include -#include -#include - -#define SIZE 2 - -float sgn(float v) __attribute__((always_inline)); -float naive_norm(float *x, int m) __attribute__((always_inline)); -void naive_fixed_transpose(float *a) __attribute__((always_inline)); -void naive_fixed_matrix_multiply(float *a, float *b, float *c) - __attribute__((always_inline)); - -float sgn(float v) { return (v > 0) - (v < 0); } - -float no_opt_sgn(float v) { return (v > 0) - (v < 0); } - -float naive_norm(float *x, int m) { - float sum = 0; - for (int i = 0; i < m; i++) { - sum += x[i] * x[i]; - } - return sqrtf(sum); -} - -float no_opt_naive_norm(float *x, int m) { - float sum = 0; - for (int i = 0; i < m; i++) { - sum += x[i] * x[i]; - } - return sqrtf(sum); -} - -// Naive with fixed size -void naive_fixed_transpose(float a[SIZE]) { - for (int i = 0; i < SIZE; i++) { - for (int j = i + 1; j < SIZE; j++) { - float tmp = a[i * SIZE + j]; - a[i * SIZE + j] = a[j * SIZE + i]; - a[j * SIZE + i] = tmp; - } - } -} - -void no_opt_naive_fixed_transpose(float a[SIZE]) { - for (int i = 0; i < SIZE; i++) { - for (int j = i + 1; j < SIZE; j++) { - float tmp = a[i * SIZE + j]; - a[i * SIZE + j] = a[j * SIZE + i]; - a[j * SIZE + i] = tmp; - } - } -} - -void naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], float c[SIZE]) { - for (int y = 0; y < SIZE; y++) { - for (int x = 0; x < SIZE; x++) { - c[SIZE * y + x] = 0; - for (int k = 0; k < SIZE; k++) { - c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; - } - } - } -} - -void no_opt_naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], - float c[SIZE]) { - for (int y = 0; y < SIZE; y++) { - for (int x = 0; x < SIZE; x++) { - c[SIZE * y + x] = 0; - for (int k = 0; k < SIZE; k++) { - c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; - } - } - } -} - -void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { - for (int i = 0; i < SIZE * SIZE; i++) { - R[i] = A[i]; - } - - // Build identity matrix of size SIZE * SIZE - // No Calloc is used here. - float I[SIZE * SIZE] = {0.0f}; - // float *I = (float *)calloc(sizeof(float), SIZE * SIZE); - for (int i = 0; i < SIZE; i++) { - for (int j = 0; j < SIZE; j++) { - I[i * SIZE + j] = (i == j); - } - } - - // Householder - for (int k = 0; k < SIZE - 1; k++) { - int32_t m = SIZE - k; - - float x[m]; - for (int i = 0; i < m; i++) { - x[i] = 0.0f; - } - float e[m]; - for (int i = 0; i < m; i++) { - e[i] = 0.0f; - } - for (int i = 0; i < m; i++) { - int row = k + i; - x[i] = 1.0f; - e[i] = 2.0f; - } - - float alpha = -sgn(x[0]) * naive_norm(x, m); - - // float u[SIZE] = {0}; - // float v[SIZE] = {0}; - // for (int i = 0; i < m; i++) { - // u[i] = x[i] + alpha * e[i]; - // } - // float norm_u = naive_norm(u, m); - // for (int i = 0; i < m; i++) { - // v[i] = u[i] / (norm_u + 0.00001f); - // } - - // float q_min[m * m]; - // for (int i = 0; i < m; i++) { - // for (int j = 0; j < m; j++) { - // q_min[i * m + j] = 0.0f; - // } - // } - // for (int i = 0; i < m; i++) { - // for (int j = 0; j < m; j++) { - // float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; - // q_min[i * m + j] = q_min_i; - // } - // } - - // float q_t[SIZE * SIZE] = {0}; - // for (int i = 0; i < SIZE; i++) { - // for (int j = 0; j < SIZE; j++) { - // float q_t_i; - // if ((i < k) || (j < k)) { - // q_t_i = (i == j) ? 1.0f : 0.0f; - // } else { - // q_t_i = q_min[(i - k) * m + (j - k)]; - // } - // q_t[i * SIZE + j] = q_t_i; - // } - // } - float q_t[SIZE * SIZE] = {alpha}; - if (k == 0) { - for (int i = 0; i < SIZE * SIZE; i++) { - Q[i] = q_t[i]; - } - naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A - } - // else { - // float res[SIZE * SIZE] = {0.0f}; - // naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A - // for (int i = 0; i < SIZE * SIZE; i++) { - // Q[i] = res[i]; - // } - // naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A - // for (int i = 0; i < SIZE * SIZE; i++) { - // R[i] = res[i]; - // } - // } - } - naive_fixed_transpose(Q); -} - -void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { - for (int i = 0; i < SIZE * SIZE; i++) { - R[i] = A[i]; - } - - float I[SIZE * SIZE] = {0.0f}; - for (int i = 0; i < SIZE; i++) { - for (int j = 0; j < SIZE; j++) { - I[i * SIZE + j] = (i == j); - } - } - - // Householder - for (int k = 0; k < SIZE - 1; k++) { - int32_t m = SIZE - k; - - float x[m]; - for (int i = 0; i < m; i++) { - x[i] = 0.0f; - } - float e[m]; - for (int i = 0; i < m; i++) { - e[i] = 0.0f; - } - for (int i = 0; i < m; i++) { - int row = k + i; - x[i] = 1.0f; - e[i] = 2.0f; - } - - float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); - - // float u[SIZE] = {0}; - // float v[SIZE] = {0}; - // for (int i = 0; i < m; i++) { - // u[i] = x[i] + alpha * e[i]; - // } - // float norm_u = naive_norm(u, m); - // for (int i = 0; i < m; i++) { - // v[i] = u[i] / (norm_u + 0.00001f); - // } - - // float q_min[m * m]; - // for (int i = 0; i < m; i++) { - // for (int j = 0; j < m; j++) { - // q_min[i * m + j] = 0.0f; - // } - // } - // for (int i = 0; i < m; i++) { - // for (int j = 0; j < m; j++) { - // float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; - // q_min[i * m + j] = q_min_i; - // } - // } - - // float q_t[SIZE * SIZE] = {0}; - // for (int i = 0; i < SIZE; i++) { - // for (int j = 0; j < SIZE; j++) { - // float q_t_i; - // if ((i < k) || (j < k)) { - // q_t_i = (i == j) ? 1.0f : 0.0f; - // } else { - // q_t_i = q_min[(i - k) * m + (j - k)]; - // } - // q_t[i * SIZE + j] = q_t_i; - // } - // } - - float q_t[SIZE * SIZE] = {alpha}; - if (k == 0) { - for (int i = 0; i < SIZE * SIZE; i++) { - Q[i] = q_t[i]; - } - no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A - } - // else { - // float res[SIZE * SIZE] = {0.0f}; - // no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A - // for (int i = 0; i < SIZE * SIZE; i++) { - // Q[i] = res[i]; - // } - // no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A - // for (int i = 0; i < SIZE * SIZE; i++) { - // R[i] = res[i]; - // } - // } - } - no_opt_naive_fixed_transpose(Q); -} - -int main(void) { - float A[SIZE * SIZE] = {1, 2, 3, 4}; - float Q[SIZE * SIZE] = {0, 0, 0, 0}; - float R[SIZE * SIZE] = {0, 0, 0, 0}; - naive_fixed_qr_decomp(A, Q, R); - float expectedQ[SIZE * SIZE] = {0, 0, 0, 0}; - float expectedR[SIZE * SIZE] = {0, 0, 0, 0}; - no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); - - for (int i = 0; i < SIZE; i++) { - for (int j = 0; j < SIZE; j++) { - printf("Q Output: %f\n", Q[i * SIZE + j]); - printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); - } - } - for (int i = 0; i < SIZE; i++) { - for (int j = 0; j < SIZE; j++) { - printf("R Output: %f\n", R[i * SIZE + j]); - printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); - } - } -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/loop-inline-PASS.c b/src/dios-egraphs/Diospyros/llvm-tests/loop-inline.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/loop-inline-PASS.c rename to src/dios-egraphs/Diospyros/llvm-tests/loop-inline.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/naive-norm-inline-PASS.c b/src/dios-egraphs/Diospyros/llvm-tests/naive-norm-inline.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/naive-norm-inline-PASS.c rename to src/dios-egraphs/Diospyros/llvm-tests/naive-norm-inline.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/modified_qr_1_regress_PASS.c b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-modified-no-local-array.c similarity index 97% rename from src/dios-egraphs/Diospyros/fail-tests/modified_qr_1_regress_PASS.c rename to src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-modified-no-local-array.c index 04841b87..6f030330 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/modified_qr_1_regress_PASS.c +++ b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-modified-no-local-array.c @@ -13,6 +13,7 @@ #include #define SIZE 2 +#define DELTA 0.1f float sgn(float v) __attribute__((always_inline)); float naive_norm(float *x, int m) __attribute__((always_inline)); @@ -170,12 +171,14 @@ int main(void) { for (int j = 0; j < SIZE; j++) { printf("Q Output: %f\n", Q[i * SIZE + j]); printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); } } for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { printf("R Output: %f\n", R[i * SIZE + j]); printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); } } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/sgn-inline-PASS.c b/src/dios-egraphs/Diospyros/llvm-tests/sgn-inline.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/sgn-inline-PASS.c rename to src/dios-egraphs/Diospyros/llvm-tests/sgn-inline.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/sgn-naive-norm-inline-PASS.c b/src/dios-egraphs/Diospyros/llvm-tests/sgn-naive-norm-inline-1.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/sgn-naive-norm-inline-PASS.c rename to src/dios-egraphs/Diospyros/llvm-tests/sgn-naive-norm-inline-1.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/sgn-naive-norm-inline2-PASS.c b/src/dios-egraphs/Diospyros/llvm-tests/sgn-naive-norm-inline-2.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/sgn-naive-norm-inline2-PASS.c rename to src/dios-egraphs/Diospyros/llvm-tests/sgn-naive-norm-inline-2.c From 023067d5d59e24500d147072316fb2a62f03214c Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sun, 27 Feb 2022 01:36:30 -0500 Subject: [PATCH 050/143] move directories --- .../{fail-tests => llvm-tests}/qr-decomp-no-local-array.c | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/dios-egraphs/Diospyros/{fail-tests => llvm-tests}/qr-decomp-no-local-array.c (100%) diff --git a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-no-local-array.c b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-no-local-array.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/qr-decomp-no-local-array.c rename to src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-no-local-array.c From 9c6b13d8a2f90bd499c268ce4e12828f6decab17 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 3 Mar 2022 00:09:41 -0500 Subject: [PATCH 051/143] checkpoint before phi operration --- ...10-by-10-random-qr-decomp-no-local-array.c | 313 ++++++++++++++++++ ...15-by-15-random-qr-decomp-no-local-array.c | 313 ++++++++++++++++++ .../2-by-2-random-qr-decomp-no-local-array.c | 313 ++++++++++++++++++ ...20-by-20-random-qr-decomp-no-local-array.c | 313 ++++++++++++++++++ .../5-by-5-random-qr-decomp-no-local-array.c | 313 ++++++++++++++++++ 5 files changed, 1565 insertions(+) create mode 100644 src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-qr-decomp-no-local-array.c create mode 100644 src/dios-egraphs/Diospyros/randomized-tests/15-by-15-random-qr-decomp-no-local-array.c create mode 100644 src/dios-egraphs/Diospyros/randomized-tests/2-by-2-random-qr-decomp-no-local-array.c create mode 100644 src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-qr-decomp-no-local-array.c create mode 100644 src/dios-egraphs/Diospyros/randomized-tests/5-by-5-random-qr-decomp-no-local-array.c diff --git a/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-qr-decomp-no-local-array.c b/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-qr-decomp-no-local-array.c new file mode 100644 index 00000000..784995ca --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-qr-decomp-no-local-array.c @@ -0,0 +1,313 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 10 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], float u[SIZE], + float v[SIZE], float q_min[SIZE * SIZE], + float q_t[SIZE * SIZE], float res[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float I[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float u[SIZE] = {0.0f}; + float v[SIZE] = {0.0f}; + float q_min[SIZE * SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + float res[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, I, x, e, u, v, q_min, q_t, res); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/15-by-15-random-qr-decomp-no-local-array.c b/src/dios-egraphs/Diospyros/randomized-tests/15-by-15-random-qr-decomp-no-local-array.c new file mode 100644 index 00000000..52e6701c --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/15-by-15-random-qr-decomp-no-local-array.c @@ -0,0 +1,313 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 15 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], float u[SIZE], + float v[SIZE], float q_min[SIZE * SIZE], + float q_t[SIZE * SIZE], float res[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float I[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float u[SIZE] = {0.0f}; + float v[SIZE] = {0.0f}; + float q_min[SIZE * SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + float res[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, I, x, e, u, v, q_min, q_t, res); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/2-by-2-random-qr-decomp-no-local-array.c b/src/dios-egraphs/Diospyros/randomized-tests/2-by-2-random-qr-decomp-no-local-array.c new file mode 100644 index 00000000..b5558092 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/2-by-2-random-qr-decomp-no-local-array.c @@ -0,0 +1,313 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], float u[SIZE], + float v[SIZE], float q_min[SIZE * SIZE], + float q_t[SIZE * SIZE], float res[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float I[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float u[SIZE] = {0.0f}; + float v[SIZE] = {0.0f}; + float q_min[SIZE * SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + float res[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, I, x, e, u, v, q_min, q_t, res); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-qr-decomp-no-local-array.c b/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-qr-decomp-no-local-array.c new file mode 100644 index 00000000..f8751161 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-qr-decomp-no-local-array.c @@ -0,0 +1,313 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 20 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], float u[SIZE], + float v[SIZE], float q_min[SIZE * SIZE], + float q_t[SIZE * SIZE], float res[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float I[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float u[SIZE] = {0.0f}; + float v[SIZE] = {0.0f}; + float q_min[SIZE * SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + float res[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, I, x, e, u, v, q_min, q_t, res); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/5-by-5-random-qr-decomp-no-local-array.c b/src/dios-egraphs/Diospyros/randomized-tests/5-by-5-random-qr-decomp-no-local-array.c new file mode 100644 index 00000000..b874e4c8 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/5-by-5-random-qr-decomp-no-local-array.c @@ -0,0 +1,313 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], float u[SIZE], + float v[SIZE], float q_min[SIZE * SIZE], + float q_t[SIZE * SIZE], float res[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float I[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float u[SIZE] = {0.0f}; + float v[SIZE] = {0.0f}; + float q_min[SIZE * SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + float res[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, I, x, e, u, v, q_min, q_t, res); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file From 46dfeea0faab4326821ad0c2ebe661e44f8ca799 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 3 Mar 2022 18:47:25 -0500 Subject: [PATCH 052/143] comment changes to rollback --- src/dios-egraphs/Diospyros/src/lib.rs | 67 +++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 4 deletions(-) diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 4b23b809..a0ce30ec 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -33,9 +33,9 @@ extern "C" { fn isa_constaggregatezero(val: LLVMValueRef) -> bool; fn _isa_constaggregate(val: LLVMValueRef) -> bool; fn isa_integertype(val: LLVMValueRef) -> bool; - fn isa_intptr(val: LLVMValueRef) -> bool; + fn _isa_intptr(val: LLVMValueRef) -> bool; fn isa_floatptr(val: LLVMValueRef) -> bool; - fn isa_floattype(val: LLVMValueRef) -> bool; + fn _isa_floattype(val: LLVMValueRef) -> bool; fn isa_bitcast(val: LLVMValueRef) -> bool; fn isa_sqrt32(val: LLVMValueRef) -> bool; fn isa_sqrt64(val: LLVMValueRef) -> bool; @@ -66,6 +66,7 @@ type GEPMap = BTreeMap<(Symbol, Symbol), LLVMValueRef>; static mut ARG_IDX: i32 = 0; static mut CALL_IDX: i32 = 0; static mut NODE_IDX: u32 = 0; +// static mut PHI_IDX: u32 = 0; // unsafe fn gen_symbol_name() -> String { // SYMBOL_IDX += 1; @@ -93,6 +94,13 @@ unsafe fn gen_call_name() -> String { result } +// unsafe fn gen_phi_name() -> String { +// PHI_IDX += 1; +// let string = "PHI".to_string(); +// let result = format!("{}{}", string, PHI_IDX.to_string()); +// result +// } + /// Converts LLVMValueRef binop to equivalent VecLang Binop node unsafe fn choose_binop(bop: &LLVMValueRef, ids: [Id; 2]) -> VecLang { match LLVMGetInstructionOpcode(*bop) { @@ -802,6 +810,7 @@ enum LLVMOpType { Sqrt32, Sqrt64, FPExt, + // Phi, } // unsafe fn is_pow2(n: u32) -> bool { @@ -960,7 +969,11 @@ unsafe fn match_llvm_op(expr: &LLVMValueRef) -> LLVMOpType { return LLVMOpType::Sqrt64; } else if isa_fpext(*expr) { return LLVMOpType::FPExt; - } else { + } + // else if isa_phi(*expr) { + // return LLVMOpType::Phi; + // } + else { LLVMDumpValue(*expr); println!(); panic!("ref_to_egg: Unmatched case for LLVMValueRef {:?}", *expr); @@ -1479,6 +1492,35 @@ unsafe fn bitcast_to_egg( return result; } +// unsafe fn phi_to_egg( +// expr: LLVMValueRef, +// mut enode_vec: Vec, +// next_idx: i32, +// gep_map: &mut GEPMap, +// _store_map: &mut StoreMap, +// _id_map: &mut IdMap, +// _symbol_map: &mut SymbolMap, +// _llvm_arg_pairs: &Vec, +// _node_to_arg: &mut Vec, +// ) -> (Vec, i32) { +// assert!(isa_phi(expr)); + +// let phi_name = gen_phi_name(); +// let symbol1 = Symbol::from(phi_name.clone()); +// enode_vec.push(VecLang::Symbol(symbol1)); +// let symbol2 = Symbol::from(phi_name); +// enode_vec.push(VecLang::Symbol(symbol2)); + +// let get_node = VecLang::Get([ +// Id::from((next_idx) as usize), +// Id::from((next_idx + 1) as usize), +// ]); +// (*gep_map).insert((symbol1, symbol2), expr); +// enode_vec.push(get_node); + +// return (enode_vec, next_idx + 3); +// } + unsafe fn ref_to_egg( expr: LLVMValueRef, mut enode_vec: Vec, @@ -1660,6 +1702,17 @@ unsafe fn ref_to_egg( llvm_arg_pairs, node_to_arg, ), + // LLVMOpType::Phi => phi_to_egg( + // expr, + // enode_vec, + // next_idx, + // gep_map, + // store_map, + // id_map, + // symbol_map, + // llvm_arg_pairs, + // node_to_arg, + // ), }; return (vec, next_idx); } @@ -1800,7 +1853,13 @@ unsafe fn translate_egg( let cloned_sitofp = LLVMInstructionClone(*gep_value); let new_sitofp = llvm_recursive_add(builder, cloned_sitofp, context); new_sitofp - } else { + } + // else if isa_phi(*gep_value) { + // let cloned_phi = LLVMInstructionClone(*gep_value); + // let new_phi = llvm_recursive_add(builder, cloned_phi, context); + // new_phi + // } + else { LLVMBuildLoad(builder, *gep_value, b"\0".as_ptr() as *const _) }; load_value From 728782165daab975210df0732cdfa09ec542c585 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 3 Mar 2022 20:52:35 -0500 Subject: [PATCH 053/143] collecting instrs, select fix works, 10 by 10 passes random --- src/dios-egraphs/Diospyros/diospyros.cpp | 70 ++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 5 deletions(-) diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index c1ac8ad3..9e024cbc 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -67,12 +67,16 @@ const string SQRT32_FUNCTION_NAME = "llvm.sqrt.f32"; const int SQRT_OPERATOR = 3; const int BINARY_OPERATOR = 2; +/** Number of instructions to search back and see if translated - we keep less + * to search faster; but actually cutting it short is unsound. */ +const int NUM_TRANSLATED_INSTRUCTIONS = 1000; + /** * Fresh counters for temps and array generation */ -int FRESH_INT_COUNTER = 0; -int FRESH_ARRAY_COUNTER = 0; -int FRESH_TEMP_COUNTER = 0; +static int FRESH_INT_COUNTER = 0; +static int FRESH_ARRAY_COUNTER = 0; +static int FRESH_TEMP_COUNTER = 0; /** * Generates a Fresh Index @@ -529,6 +533,7 @@ struct DiospyrosPass : public FunctionPass { return false; } bool has_changes = false; + std::vector translated_exprs = {}; for (auto &B : F) { // We skip over basic blocks without floating point types bool has_float = false; @@ -538,6 +543,13 @@ struct DiospyrosPass : public FunctionPass { } } if (!has_float) { + for (auto &I : B) { + auto *op = wrap(dyn_cast(&I)); + LLVMPair new_pair; + new_pair.original_value = op; + new_pair.new_value = op; + translated_exprs.push_back(new_pair); + } continue; } // We also skip over all basic blocks without stores @@ -548,6 +560,32 @@ struct DiospyrosPass : public FunctionPass { } } if (!has_store) { + for (auto &I : B) { + auto *op = wrap(dyn_cast(&I)); + LLVMPair new_pair; + new_pair.original_value = op; + new_pair.new_value = op; + translated_exprs.push_back(new_pair); + } + continue; + } + + // We also skip over all basic blocks with Select as that is not + // translatable into Egg + bool has_select = false; + for (auto &I : B) { + if (auto *op = dyn_cast(&I)) { + has_select = true; + } + } + if (has_select) { + for (auto &I : B) { + auto *op = wrap(dyn_cast(&I)); + LLVMPair new_pair; + new_pair.original_value = op; + new_pair.new_value = op; + translated_exprs.push_back(new_pair); + } continue; } @@ -581,7 +619,7 @@ struct DiospyrosPass : public FunctionPass { int vec_length = vectorization_accumulator.size(); int counter = 0; - std::vector translated_exprs = {}; + // std::vector translated_exprs = {}; for (auto &vec : vectorization_accumulator) { ++counter; if (not vec.empty()) { @@ -602,10 +640,21 @@ struct DiospyrosPass : public FunctionPass { int size = pair.llvm_pointer_size; LLVMPair const *expr_array = pair.llvm_pointer; - translated_exprs = {}; + // translated_exprs = {}; for (int i = 0; i < size; i++) { translated_exprs.push_back(expr_array[i]); } + // Trim down translated_exprs + std::vector new_translated_exprs = {}; + if (translated_exprs.size() >= + NUM_TRANSLATED_INSTRUCTIONS) { + for (int i = 0; i < NUM_TRANSLATED_INSTRUCTIONS; i++) { + LLVMPair final_instr = translated_exprs.back(); + translated_exprs.pop_back(); + new_translated_exprs.push_back(final_instr); + } + translated_exprs = new_translated_exprs; + } } } std::reverse(bb_instrs.begin(), bb_instrs.end()); @@ -618,6 +667,17 @@ struct DiospyrosPass : public FunctionPass { } BasicBlock::InstListType &final_instrs = B.getInstList(); final_instrs.push_back(cloned_terminator); + + // Trim down translated_exprs + std::vector new_translated_exprs = {}; + if (translated_exprs.size() >= NUM_TRANSLATED_INSTRUCTIONS) { + for (int i = 0; i < NUM_TRANSLATED_INSTRUCTIONS; i++) { + LLVMPair final_instr = translated_exprs.back(); + translated_exprs.pop_back(); + new_translated_exprs.push_back(final_instr); + } + translated_exprs = new_translated_exprs; + } } return true; }; From 70b96a1c7e559a7e5899bd133c3e966aeb351dda Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 4 Mar 2022 03:02:33 -0500 Subject: [PATCH 054/143] starting to deal with memsets --- src/dios-egraphs/Diospyros/diospyros.cpp | 119 +++++++++++++++--- .../{local-array.c => local-array-1.c} | 4 +- .../Diospyros/fail-tests/local-array-2.c | 18 ++- .../Diospyros/fail-tests/local-array-3.c | 16 ++- .../Diospyros/fail-tests/local-array-4.c | 4 +- src/dios-egraphs/Diospyros/src/lib.rs | 33 +++-- 6 files changed, 153 insertions(+), 41 deletions(-) rename src/dios-egraphs/Diospyros/fail-tests/{local-array.c => local-array-1.c} (77%) diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index 9e024cbc..3e5da187 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -13,6 +13,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" @@ -516,6 +517,50 @@ extern "C" bool dfs_llvm_value_ref(LLVMValueRef current_instr, return dfs_llvm_instrs(current_user, match_user); } +Instruction *dfs_instructions(Instruction *current_instr, + std::vector &translated_exprs, + BasicBlock &B) { + for (LLVMPair pair : translated_exprs) { + Instruction *original_val = + dyn_cast(unwrap(pair.original_value)); + Instruction *new_val = dyn_cast(unwrap(pair.new_value)); + if (current_instr == original_val) { + return new_val; + } + } + + Instruction *cloned_instr = current_instr->clone(); + + int num_operands = current_instr->getNumOperands(); + if (num_operands == 0) { + LLVMPair new_pair; + new_pair.original_value = wrap(current_instr); + new_pair.new_value = wrap(cloned_instr); + translated_exprs.push_back(new_pair); + + BasicBlock::InstListType &intermediate_instrs = B.getInstList(); + intermediate_instrs.push_back(cloned_instr); + return cloned_instr; + } + + for (int i = 0; i < num_operands; i++) { + Instruction *arg = dyn_cast(current_instr->getOperand(i)); + if (arg) { + Instruction *cloned_arg = + dfs_instructions(arg, translated_exprs, B); + cloned_instr->setOperand(i, cloned_arg); + } + } + LLVMPair new_pair; + new_pair.original_value = wrap(current_instr); + new_pair.new_value = wrap(cloned_instr); + translated_exprs.push_back(new_pair); + + BasicBlock::InstListType &intermediate_instrs = B.getInstList(); + intermediate_instrs.push_back(cloned_instr); + return cloned_instr; +} + /** * Below is the main DiospyrosPass that activates the Rust lib.rs code, * which calls the Egg vectorizer and rewrites the optimized code in place. @@ -553,13 +598,19 @@ struct DiospyrosPass : public FunctionPass { continue; } // We also skip over all basic blocks without stores - bool has_store = false; + bool has_store_or_mem_intrinsic = false; for (auto &I : B) { if (auto *op = dyn_cast(&I)) { - has_store = true; + has_store_or_mem_intrinsic = true; + } else if (auto *op = dyn_cast(&I)) { + has_store_or_mem_intrinsic = true; + } else if (auto *op = dyn_cast(&I)) { + has_store_or_mem_intrinsic = true; + } else if (auto *op = dyn_cast(&I)) { + has_store_or_mem_intrinsic = true; } } - if (!has_store) { + if (!has_store_or_mem_intrinsic) { for (auto &I : B) { auto *op = wrap(dyn_cast(&I)); LLVMPair new_pair; @@ -602,6 +653,30 @@ struct DiospyrosPass : public FunctionPass { Value *store_loc = op->getOperand(1); store_locations.insert(store_loc); inner_vector.push_back(wrap(op)); + } else if (auto *op = dyn_cast(&I)) { + if (!inner_vector.empty()) { + vectorization_accumulator.push_back(inner_vector); + } + inner_vector = {wrap(op)}; + vectorization_accumulator.push_back(inner_vector); + inner_vector = {}; + store_locations.clear(); + } else if (auto *op = dyn_cast(&I)) { + if (!inner_vector.empty()) { + vectorization_accumulator.push_back(inner_vector); + } + inner_vector = {wrap(op)}; + vectorization_accumulator.push_back(inner_vector); + inner_vector = {}; + store_locations.clear(); + } else if (auto *op = dyn_cast(&I)) { + if (!inner_vector.empty()) { + vectorization_accumulator.push_back(inner_vector); + } + inner_vector = {wrap(op)}; + vectorization_accumulator.push_back(inner_vector); + inner_vector = {}; + store_locations.clear(); } else if (auto *op = dyn_cast(&I)) { Value *load_loc = op->getOperand(0); if (!inner_vector.empty()) { @@ -628,22 +703,30 @@ struct DiospyrosPass : public FunctionPass { IRBuilder<> builder(dyn_cast(last_store)); Instruction *store_instr = dyn_cast(last_store); - assert(isa(store_instr)); - builder.SetInsertPoint(store_instr); - builder.SetInsertPoint(&B); - Module *mod = F.getParent(); - LLVMContext &context = F.getContext(); - VectorPointerSize pair = optimize( - wrap(mod), wrap(&context), wrap(&builder), vec.data(), - vec.size(), translated_exprs.data(), - translated_exprs.size(), RunOpt, PrintOpt); - int size = pair.llvm_pointer_size; - - LLVMPair const *expr_array = pair.llvm_pointer; - // translated_exprs = {}; - for (int i = 0; i < size; i++) { - translated_exprs.push_back(expr_array[i]); + if (auto *op = dyn_cast(store_instr)) { + assert(isa(store_instr)); + builder.SetInsertPoint(store_instr); + builder.SetInsertPoint(&B); + Module *mod = F.getParent(); + LLVMContext &context = F.getContext(); + VectorPointerSize pair = optimize( + wrap(mod), wrap(&context), wrap(&builder), + vec.data(), vec.size(), translated_exprs.data(), + translated_exprs.size(), RunOpt, PrintOpt); + int size = pair.llvm_pointer_size; + + LLVMPair const *expr_array = pair.llvm_pointer; + // translated_exprs = {}; + for (int i = 0; i < size; i++) { + translated_exprs.push_back(expr_array[i]); + } + } else { + assert(isa(last_store) || + isa(last_store) || + isa(last_store)); + dfs_instructions(store_instr, translated_exprs, B); } + // Trim down translated_exprs std::vector new_translated_exprs = {}; if (translated_exprs.size() >= diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array.c b/src/dios-egraphs/Diospyros/fail-tests/local-array-1.c similarity index 77% rename from src/dios-egraphs/Diospyros/fail-tests/local-array.c rename to src/dios-egraphs/Diospyros/fail-tests/local-array-1.c index ca32918e..3bd244aa 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/local-array.c +++ b/src/dios-egraphs/Diospyros/fail-tests/local-array-1.c @@ -6,14 +6,14 @@ #define SIZE 10 void test(float A[SIZE]) { - float x[SIZE] = {3.0f}; + float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; for (int i = 0; i < SIZE; i++) { A[i] = x[i]; } } int main() { - float A[SIZE] = {1.0f}; + float A[SIZE] = {[0 ... SIZE - 1] = 1.0f}; test(A); for (int i = 0; i < SIZE; i++) { printf("A Output: %f\n", A[i]); diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array-2.c b/src/dios-egraphs/Diospyros/fail-tests/local-array-2.c index 3e89895b..da9176d2 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/local-array-2.c +++ b/src/dios-egraphs/Diospyros/fail-tests/local-array-2.c @@ -3,10 +3,10 @@ #include #include -#define SIZE 10 +#define SIZE 1 void test(float A[SIZE], float B[SIZE], float C[SIZE]) { - float x[SIZE] = {3.0f}; + float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; for (int i = 0; i < SIZE; i++) { A[i] += x[i]; } @@ -17,15 +17,21 @@ void test(float A[SIZE], float B[SIZE], float C[SIZE]) { B[i] -= x[i]; } for (int i = 0; i < SIZE; i++) { - C[i] += B[i]; + C[i] *= B[i]; } } int main() { - float A[SIZE] = {1.0f}; - float B[SIZE] = {2.0f}; - float C[SIZE] = {0.0f}; + float A[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + float B[SIZE] = {[0 ... SIZE - 1] = 2.0f}; + float C[SIZE] = {[0 ... SIZE - 1] = 0.0f}; test(A, B, C); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + } + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + } for (int i = 0; i < SIZE; i++) { printf("C Output: %f\n", C[i]); } diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array-3.c b/src/dios-egraphs/Diospyros/fail-tests/local-array-3.c index 0bccfcb3..ffd80b69 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/local-array-3.c +++ b/src/dios-egraphs/Diospyros/fail-tests/local-array-3.c @@ -7,19 +7,25 @@ void test(float A[SIZE], float B[SIZE], float C[SIZE]) { for (int i = 0; i < SIZE; i++) { - float x[SIZE] = {(float)i}; + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = (float)i; + } C[i] = A[i] + x[i]; } for (int i = 0; i < SIZE; i++) { - float x[SIZE] = {(float)i}; + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = (float)i; + } C[i] = B[i] - x[i]; } } int main() { - float A[SIZE] = {1.0f}; - float B[SIZE] = {2.0f}; - float C[SIZE] = {0.0f}; + float A[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + float B[SIZE] = {[0 ... SIZE - 1] = 2.0f}; + float C[SIZE] = {[0 ... SIZE - 1] = 0.0f}; test(A, B, C); for (int i = 0; i < SIZE; i++) { printf("C Output: %f\n", C[i]); diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c b/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c index 17f081f0..116cee86 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c +++ b/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c @@ -7,7 +7,7 @@ void test(float A[SIZE]) { for (int i = 0; i < SIZE; i++) { - float x[SIZE] = {0.0f}; + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; for (int j = 0; j < SIZE; j++) { x[j] = 1.0f; } @@ -20,7 +20,7 @@ void test(float A[SIZE]) { } int main() { - float A[SIZE] = {0.0f}; + float A[SIZE] = {[0 ... SIZE - 1] = 0.0f}; for (int i = 0; i < SIZE; i++) { A[i] = (float)i; } diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index a0ce30ec..bca94621 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -906,6 +906,7 @@ unsafe fn llvm_recursive_add( builder: LLVMBuilderRef, inst: LLVMValueRef, context: LLVMContextRef, + llvm_arg_pairs: &mut Vec, ) -> LLVMValueRef { if isa_argument(inst) { let mut indices = Vec::new(); @@ -933,8 +934,24 @@ unsafe fn llvm_recursive_add( let num_ops = LLVMGetNumOperands(inst); for i in 0..num_ops { let operand = LLVMGetOperand(inst, i as u32); - let new_inst = llvm_recursive_add(builder, operand, context); - LLVMSetOperand(cloned_inst, i as u32, new_inst); + + // search the llvm_arg_pairs + let mut matched = false; + let mut ret_value = operand; + for llvm_pair in &*llvm_arg_pairs { + let original_llvm = llvm_pair.original_value; + let new_llvm = llvm_pair.new_value; + if !matched && original_llvm == operand { + matched = true; + ret_value = new_llvm; + } + } + if matched { + LLVMSetOperand(cloned_inst, i as u32, ret_value); + } else { + let new_inst = llvm_recursive_add(builder, operand, context, llvm_arg_pairs); + LLVMSetOperand(cloned_inst, i as u32, new_inst); + } } LLVMInsertIntoBuilder(builder, cloned_inst); return cloned_inst; @@ -1772,7 +1789,7 @@ unsafe fn translate_egg( let instr = match enode { VecLang::Symbol(symbol) => { match symbol_map.get(enode) { - Some(llvm_instr) => llvm_recursive_add(builder, *llvm_instr, context), + Some(llvm_instr) => llvm_recursive_add(builder, *llvm_instr, context, llvm_arg_pairs), None => { let mut matched = false; let mut ret_value = LLVMBuildAdd( @@ -1821,7 +1838,7 @@ unsafe fn translate_egg( let load_value = if isa_load(*gep_value) { let addr = LLVMGetOperand(*gep_value, 0); let cloned_gep = LLVMInstructionClone(addr); - let new_gep = llvm_recursive_add(builder, cloned_gep, context); + let new_gep = llvm_recursive_add(builder, cloned_gep, context, llvm_arg_pairs); let new_load = LLVMBuildLoad(builder, new_gep, b"\0".as_ptr() as *const _); let llvm_pair = LLVMPair { original_value: *gep_value, @@ -1831,12 +1848,12 @@ unsafe fn translate_egg( new_load } else if isa_gep(*gep_value) { let cloned_gep = LLVMInstructionClone(*gep_value); - let new_gep = llvm_recursive_add(builder, cloned_gep, context); + let new_gep = llvm_recursive_add(builder, cloned_gep, context, llvm_arg_pairs); LLVMBuildLoad(builder, new_gep, b"\0".as_ptr() as *const _) } else if isa_bitcast(*gep_value) { // TODO: DO NOT REGERATE CALLS. THESE SHOULD BE CACHED!!. e.g. a CALLOC let cloned_bitcast = LLVMInstructionClone(*gep_value); - let mut new_bitcast = llvm_recursive_add(builder, cloned_bitcast, context); + let mut new_bitcast = llvm_recursive_add(builder, cloned_bitcast, context, llvm_arg_pairs); // if bitcast was to i32, handle bitcast from float* to i32* if !isa_floatptr(new_bitcast) { let addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(new_bitcast)); @@ -1851,7 +1868,7 @@ unsafe fn translate_egg( LLVMBuildLoad(builder, new_bitcast, b"\0".as_ptr() as *const _) } else if isa_sitofp(*gep_value) { let cloned_sitofp = LLVMInstructionClone(*gep_value); - let new_sitofp = llvm_recursive_add(builder, cloned_sitofp, context); + let new_sitofp = llvm_recursive_add(builder, cloned_sitofp, context, llvm_arg_pairs); new_sitofp } // else if isa_phi(*gep_value) { @@ -2302,7 +2319,7 @@ unsafe fn egg_to_llvm( LLVMBuildStore(builder, extracted_value, mut_addr); } else { let cloned_addr = LLVMInstructionClone(mut_addr); - let new_addr = llvm_recursive_add(builder, cloned_addr, context); + let new_addr = llvm_recursive_add(builder, cloned_addr, context, llvm_arg_pairs); if LLVMTypeOf(extracted_value) != LLVMGetElementType(LLVMTypeOf(new_addr)) { extracted_value = gen_type_cast( extracted_value, From e59736879617e85baa87d12573b1828de8430aee Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 4 Mar 2022 16:43:57 -0500 Subject: [PATCH 055/143] test 2 with local arrrays works Error was where the llvm arg pairs were added Ignore the abovew Arg pairs needed to be added where loads are created, because loads affect memory If a load is built fresh, it is added to pairs Otherwise, search all prior loads to avoid rebuilding old loads Also, comparison of rust pointers is not double equals (==) I stole from stack overflow (cited) a way to compare addressses of objects for equality, which fixes the problem --- .../Diospyros/fail-tests/local-array-2.c | 6 +- src/dios-egraphs/Diospyros/src/lib.rs | 84 ++++++++++++++----- 2 files changed, 66 insertions(+), 24 deletions(-) diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array-2.c b/src/dios-egraphs/Diospyros/fail-tests/local-array-2.c index da9176d2..ecea4f9d 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/local-array-2.c +++ b/src/dios-egraphs/Diospyros/fail-tests/local-array-2.c @@ -3,7 +3,7 @@ #include #include -#define SIZE 1 +#define SIZE 10 void test(float A[SIZE], float B[SIZE], float C[SIZE]) { float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; @@ -11,13 +11,13 @@ void test(float A[SIZE], float B[SIZE], float C[SIZE]) { A[i] += x[i]; } for (int i = 0; i < SIZE; i++) { - C[i] *= A[i]; + C[i] += A[i]; } for (int i = 0; i < SIZE; i++) { B[i] -= x[i]; } for (int i = 0; i < SIZE; i++) { - C[i] *= B[i]; + C[i] += B[i]; } } diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index bca94621..fd386c9e 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -101,6 +101,13 @@ unsafe fn gen_call_name() -> String { // result // } +// Reference Comparison: https://www.reddit.com/r/rust/comments/2r3wjk/is_there_way_to_compare_objects_by_address_in_rust/ +// Compares whether addresses of LLVMValueRefs are the same. +// Not the contents of the Value Refs +fn cmp(a1: &LLVMValueRef, a2: &LLVMValueRef) -> bool { + a1 as *const _ == a2 as *const _ +} + /// Converts LLVMValueRef binop to equivalent VecLang Binop node unsafe fn choose_binop(bop: &LLVMValueRef, ids: [Id; 2]) -> VecLang { match LLVMGetInstructionOpcode(*bop) { @@ -941,7 +948,7 @@ unsafe fn llvm_recursive_add( for llvm_pair in &*llvm_arg_pairs { let original_llvm = llvm_pair.original_value; let new_llvm = llvm_pair.new_value; - if !matched && original_llvm == operand { + if !matched && cmp(&original_llvm, &operand) { matched = true; ret_value = new_llvm; } @@ -954,6 +961,15 @@ unsafe fn llvm_recursive_add( } } LLVMInsertIntoBuilder(builder, cloned_inst); + + if isa_load(inst) { + let pair = LLVMPair { + new_value: cloned_inst, + original_value: inst, + }; + llvm_arg_pairs.push(pair); + } + return cloned_inst; } @@ -1550,7 +1566,7 @@ unsafe fn ref_to_egg( node_to_arg: &mut Vec, ) -> (Vec, i32) { for pair in llvm_arg_pairs { - if pair.original_value == expr { + if cmp(&pair.original_value, &expr) { // Here we create a new numbered variable node let var_idx = gen_node_idx(); let var_idx_str = var_idx.to_string(); @@ -1806,7 +1822,7 @@ unsafe fn translate_egg( for llvm_pair in &mut *llvm_arg_pairs { let original_llvm = llvm_pair.original_value; let new_llvm = llvm_pair.new_value; - if original_llvm == llvm_node { + if cmp(&original_llvm, &llvm_node) { matched = true; ret_value = new_llvm; break; @@ -1836,16 +1852,29 @@ unsafe fn translate_egg( .get(&(array_name, array_offsets)) .expect("Symbol map lookup error: Cannot Find GEP"); let load_value = if isa_load(*gep_value) { - let addr = LLVMGetOperand(*gep_value, 0); - let cloned_gep = LLVMInstructionClone(addr); - let new_gep = llvm_recursive_add(builder, cloned_gep, context, llvm_arg_pairs); - let new_load = LLVMBuildLoad(builder, new_gep, b"\0".as_ptr() as *const _); - let llvm_pair = LLVMPair { - original_value: *gep_value, - new_value: new_load, - }; - llvm_arg_pairs.push(llvm_pair); - new_load + let mut matched = false; + let mut matched_expr = *gep_value; + for pair in &*llvm_arg_pairs { + if cmp(&pair.original_value, &*gep_value) { + matched = true; + matched_expr = pair.new_value; + break; + } + } + if matched { + matched_expr + } else { + let addr = LLVMGetOperand(*gep_value, 0); + let cloned_gep = LLVMInstructionClone(addr); + let new_gep = llvm_recursive_add(builder, cloned_gep, context, llvm_arg_pairs); + let new_load = LLVMBuildLoad(builder, new_gep, b"\0".as_ptr() as *const _); + let llvm_pair = LLVMPair { + original_value: *gep_value, + new_value: new_load, + }; + llvm_arg_pairs.push(llvm_pair); + new_load + } } else if isa_gep(*gep_value) { let cloned_gep = LLVMInstructionClone(*gep_value); let new_gep = llvm_recursive_add(builder, cloned_gep, context, llvm_arg_pairs); @@ -1870,14 +1899,27 @@ unsafe fn translate_egg( let cloned_sitofp = LLVMInstructionClone(*gep_value); let new_sitofp = llvm_recursive_add(builder, cloned_sitofp, context, llvm_arg_pairs); new_sitofp - } - // else if isa_phi(*gep_value) { - // let cloned_phi = LLVMInstructionClone(*gep_value); - // let new_phi = llvm_recursive_add(builder, cloned_phi, context); - // new_phi - // } - else { - LLVMBuildLoad(builder, *gep_value, b"\0".as_ptr() as *const _) + } else { + let mut matched = false; + let mut matched_expr = *gep_value; + for pair in &*llvm_arg_pairs { + if cmp(&pair.original_value, &*gep_value) { + matched = true; + matched_expr = pair.new_value; + break; + } + } + if matched { + matched_expr + } else { + let new_load_value = LLVMBuildLoad(builder, *gep_value, b"\0".as_ptr() as *const _); + let llvm_pair = LLVMPair { + original_value: *gep_value, + new_value: new_load_value, + }; + llvm_arg_pairs.push(llvm_pair); + new_load_value + } }; load_value } From 5cf4a4a3631bbe4991e603134c54cd0986010085 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Tue, 8 Mar 2022 22:17:12 -0500 Subject: [PATCH 056/143] checkpoint 3/8 with alloc copy problems --- src/dios-egraphs/Diospyros/diospyros.cpp | 133 ++++++++++++++++-- .../Diospyros/fail-tests/local-array-4.c | 2 +- src/dios-egraphs/Diospyros/src/lib.rs | 100 +++++++++---- 3 files changed, 194 insertions(+), 41 deletions(-) diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index 3e5da187..d9104762 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -65,6 +65,12 @@ const string ARRAY_NAME = "no-array-name"; const string TEMP_NAME = "no-temp-name"; const string SQRT64_FUNCTION_NAME = "llvm.sqrt.f64"; const string SQRT32_FUNCTION_NAME = "llvm.sqrt.f32"; +const string MEMSET_PREFIX = "memset"; +const string LLVM_MEMSET_PREFIX = "llvm.memset"; +const string MEMMOVE_PREFIX = "memmove"; +const string MEMCOPY_PREFIX = "memcopy"; +const string MAIN_FUNCTION_NAME = "main"; +const string NO_OPT_PREFIX = "no_opt_"; const int SQRT_OPERATOR = 3; const int BINARY_OPERATOR = 2; @@ -437,8 +443,11 @@ extern "C" float get_constant_float(LLVMValueRef val) { Value *v = unwrap(val); if (auto *num = dyn_cast(v)) { return num->getValue().convertToFloat(); + } else if (auto *num = dyn_cast(v)) { + return num->getValue().bitsToFloat(); } - return -1; + errs() << "Not a Constant Float or Constant Int " << *unwrap(val) << "\n"; + throw "LLVM Value Must be a Constant Float or Constant Int"; } extern "C" LLVMValueRef build_constant_float(double n, LLVMContextRef context) { @@ -519,7 +528,7 @@ extern "C" bool dfs_llvm_value_ref(LLVMValueRef current_instr, Instruction *dfs_instructions(Instruction *current_instr, std::vector &translated_exprs, - BasicBlock &B) { + BasicBlock *B) { for (LLVMPair pair : translated_exprs) { Instruction *original_val = dyn_cast(unwrap(pair.original_value)); @@ -533,22 +542,28 @@ Instruction *dfs_instructions(Instruction *current_instr, int num_operands = current_instr->getNumOperands(); if (num_operands == 0) { + BasicBlock::InstListType &intermediate_instrs = B->getInstList(); + intermediate_instrs.push_back(cloned_instr); + LLVMPair new_pair; new_pair.original_value = wrap(current_instr); new_pair.new_value = wrap(cloned_instr); translated_exprs.push_back(new_pair); - BasicBlock::InstListType &intermediate_instrs = B.getInstList(); - intermediate_instrs.push_back(cloned_instr); return cloned_instr; } for (int i = 0; i < num_operands; i++) { Instruction *arg = dyn_cast(current_instr->getOperand(i)); - if (arg) { + if (arg != NULL) { Instruction *cloned_arg = dfs_instructions(arg, translated_exprs, B); cloned_instr->setOperand(i, cloned_arg); + + LLVMPair new_pair; + new_pair.original_value = wrap(arg); + new_pair.new_value = wrap(cloned_arg); + translated_exprs.push_back(new_pair); } } LLVMPair new_pair; @@ -556,11 +571,44 @@ Instruction *dfs_instructions(Instruction *current_instr, new_pair.new_value = wrap(cloned_instr); translated_exprs.push_back(new_pair); - BasicBlock::InstListType &intermediate_instrs = B.getInstList(); + BasicBlock::InstListType &intermediate_instrs = B->getInstList(); intermediate_instrs.push_back(cloned_instr); return cloned_instr; } +bool is_memset_variety(CallInst *inst) { + Function *function = inst->getCalledFunction(); + if (function != NULL) { + StringRef name = function->getName(); + return (name.size() > MEMSET_PREFIX.size() && + name.substr(0, MEMSET_PREFIX.size()) == MEMSET_PREFIX) || + (name.size() > LLVM_MEMSET_PREFIX.size() && + name.substr(0, LLVM_MEMSET_PREFIX.size()) == + LLVM_MEMSET_PREFIX); + } + return false; +} + +bool is_memcopy_variety(CallInst *inst) { + Function *function = inst->getCalledFunction(); + if (function != NULL) { + StringRef name = function->getName(); + return name.size() > MEMCOPY_PREFIX.size() && + name.substr(0, MEMCOPY_PREFIX.size()) == MEMCOPY_PREFIX; + } + return false; +} + +bool is_memmove_variety(CallInst *inst) { + Function *function = inst->getCalledFunction(); + if (function != NULL) { + StringRef name = function->getName(); + return name.size() > MEMMOVE_PREFIX.size() && + name.substr(0, MEMMOVE_PREFIX.size()) == MEMMOVE_PREFIX; + } + return false; +} + /** * Below is the main DiospyrosPass that activates the Rust lib.rs code, * which calls the Egg vectorizer and rewrites the optimized code in place. @@ -573,8 +621,9 @@ struct DiospyrosPass : public FunctionPass { virtual bool runOnFunction(Function &F) override { // do not optimize on main function or no_opt functions. - if (F.getName() == "main" || - (F.getName().size() > 7 && F.getName().substr(0, 7) == "no_opt_")) { + if (F.getName() == MAIN_FUNCTION_NAME || + (F.getName().size() > NO_OPT_PREFIX.size() && + F.getName().substr(0, NO_OPT_PREFIX.size()) == NO_OPT_PREFIX)) { return false; } bool has_changes = false; @@ -608,6 +657,14 @@ struct DiospyrosPass : public FunctionPass { has_store_or_mem_intrinsic = true; } else if (auto *op = dyn_cast(&I)) { has_store_or_mem_intrinsic = true; + } else if (CallInst *op = dyn_cast(&I)) { + if (is_memset_variety(op)) { + has_store_or_mem_intrinsic = true; + } else if (is_memcopy_variety(op)) { + has_store_or_mem_intrinsic = true; + } else if (is_memmove_variety(op)) { + has_store_or_mem_intrinsic = true; + } } } if (!has_store_or_mem_intrinsic) { @@ -677,6 +734,35 @@ struct DiospyrosPass : public FunctionPass { vectorization_accumulator.push_back(inner_vector); inner_vector = {}; store_locations.clear(); + } else if (CallInst *call_inst = dyn_cast(&I)) { + if (is_memset_variety(call_inst)) { + if (!inner_vector.empty()) { + vectorization_accumulator.push_back(inner_vector); + } + Instruction *memset = dyn_cast(call_inst); + inner_vector = {wrap(memset)}; + vectorization_accumulator.push_back(inner_vector); + inner_vector = {}; + store_locations.clear(); + } else if (is_memcopy_variety(call_inst)) { + if (!inner_vector.empty()) { + vectorization_accumulator.push_back(inner_vector); + } + Instruction *memcopy = dyn_cast(call_inst); + inner_vector = {wrap(memcopy)}; + vectorization_accumulator.push_back(inner_vector); + inner_vector = {}; + store_locations.clear(); + } else if (is_memmove_variety(call_inst)) { + if (!inner_vector.empty()) { + vectorization_accumulator.push_back(inner_vector); + } + Instruction *memmove = dyn_cast(call_inst); + inner_vector = {wrap(memmove)}; + vectorization_accumulator.push_back(inner_vector); + inner_vector = {}; + store_locations.clear(); + } } else if (auto *op = dyn_cast(&I)) { Value *load_loc = op->getOperand(0); if (!inner_vector.empty()) { @@ -687,7 +773,9 @@ struct DiospyrosPass : public FunctionPass { } bb_instrs.push_back(dyn_cast(&I)); } - vectorization_accumulator.push_back(inner_vector); + if (!inner_vector.empty()) { + vectorization_accumulator.push_back(inner_vector); + } // Acquire each of the instructions in the "run" that terminates at // a store We will send these instructions to optimize. @@ -723,8 +811,18 @@ struct DiospyrosPass : public FunctionPass { } else { assert(isa(last_store) || isa(last_store) || - isa(last_store)); - dfs_instructions(store_instr, translated_exprs, B); + isa(last_store) || + (isa(last_store) && + is_memset_variety( + dyn_cast(last_store))) || + (isa(last_store) && + is_memcopy_variety( + dyn_cast(last_store))) || + (isa(last_store) && + is_memmove_variety( + dyn_cast(last_store)))); + + dfs_instructions(store_instr, translated_exprs, &B); } // Trim down translated_exprs @@ -746,6 +844,19 @@ struct DiospyrosPass : public FunctionPass { I->eraseFromParent(); } else if (isa(I)) { I->eraseFromParent(); + } else if ((isa(I) && + is_memset_variety(dyn_cast(I))) || + (isa(I) && + is_memcopy_variety(dyn_cast(I))) || + (isa(I) && + is_memmove_variety(dyn_cast(I)))) { + I->eraseFromParent(); + } else if (isa(I)) { + I->eraseFromParent(); + } else if (isa(I)) { + I->eraseFromParent(); + } else if (isa(I)) { + I->eraseFromParent(); } } BasicBlock::InstListType &final_instrs = B.getInstList(); diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c b/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c index 116cee86..ac3d32c1 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c +++ b/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c @@ -3,7 +3,7 @@ #include #include -#define SIZE 2 +#define SIZE 4 void test(float A[SIZE]) { for (int i = 0; i < SIZE; i++) { diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index fd386c9e..1e1744eb 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -104,7 +104,11 @@ unsafe fn gen_call_name() -> String { // Reference Comparison: https://www.reddit.com/r/rust/comments/2r3wjk/is_there_way_to_compare_objects_by_address_in_rust/ // Compares whether addresses of LLVMValueRefs are the same. // Not the contents of the Value Refs -fn cmp(a1: &LLVMValueRef, a2: &LLVMValueRef) -> bool { +fn cmp_val_ref_address(a1: &LLVMValueRef, a2: &LLVMValueRef) -> bool { + a1 as *const _ == a2 as *const _ +} + +fn _cmp_typ(a1: &LLVMTypeRef, a2: &LLVMTypeRef) -> bool { a1 as *const _ == a2 as *const _ } @@ -928,10 +932,29 @@ unsafe fn llvm_recursive_add( } else if isa_phi(inst) { return inst; } else if isa_alloca(inst) { - let cloned_inst = LLVMInstructionClone(inst); - LLVMInsertIntoBuilder(builder, cloned_inst); - return cloned_inst; + // We have this in the base case to stop reconstruction of allocas, + // because allocas are like loads, and should not get reconstructioned + // search the llvm_arg_pairs for allocas that were already created + let mut matched = false; + let mut ret_value = inst; + for llvm_pair in &*llvm_arg_pairs { + let original_llvm = llvm_pair.original_value; + let new_llvm = llvm_pair.new_value; + if cmp_val_ref_address(&original_llvm, &inst) { + matched = true; + ret_value = new_llvm; + break; + } + } + if matched { + return ret_value; + } else { + // Don't clone Inst; we should only clone if recursive call, + // which is handled previously + return inst; + } } + // TODO: CALLs should not be rebuilt? // else if isa_call(inst) { // let cloned_inst = LLVMInstructionClone(inst); // LLVMInsertIntoBuilder(builder, cloned_inst); @@ -941,14 +964,13 @@ unsafe fn llvm_recursive_add( let num_ops = LLVMGetNumOperands(inst); for i in 0..num_ops { let operand = LLVMGetOperand(inst, i as u32); - // search the llvm_arg_pairs let mut matched = false; let mut ret_value = operand; - for llvm_pair in &*llvm_arg_pairs { + for llvm_pair in &mut *llvm_arg_pairs { let original_llvm = llvm_pair.original_value; let new_llvm = llvm_pair.new_value; - if !matched && cmp(&original_llvm, &operand) { + if !matched && cmp_val_ref_address(&original_llvm, &operand) { matched = true; ret_value = new_llvm; } @@ -958,17 +980,21 @@ unsafe fn llvm_recursive_add( } else { let new_inst = llvm_recursive_add(builder, operand, context, llvm_arg_pairs); LLVMSetOperand(cloned_inst, i as u32, new_inst); + + let pair = LLVMPair { + new_value: new_inst, + original_value: operand, + }; + llvm_arg_pairs.push(pair); } } LLVMInsertIntoBuilder(builder, cloned_inst); - if isa_load(inst) { - let pair = LLVMPair { - new_value: cloned_inst, - original_value: inst, - }; - llvm_arg_pairs.push(pair); - } + let pair = LLVMPair { + new_value: cloned_inst, + original_value: inst, + }; + llvm_arg_pairs.push(pair); return cloned_inst; } @@ -1566,7 +1592,7 @@ unsafe fn ref_to_egg( node_to_arg: &mut Vec, ) -> (Vec, i32) { for pair in llvm_arg_pairs { - if cmp(&pair.original_value, &expr) { + if cmp_val_ref_address(&pair.original_value, &expr) { // Here we create a new numbered variable node let var_idx = gen_node_idx(); let var_idx_str = var_idx.to_string(); @@ -1822,7 +1848,7 @@ unsafe fn translate_egg( for llvm_pair in &mut *llvm_arg_pairs { let original_llvm = llvm_pair.original_value; let new_llvm = llvm_pair.new_value; - if cmp(&original_llvm, &llvm_node) { + if cmp_val_ref_address(&original_llvm, &llvm_node) { matched = true; ret_value = new_llvm; break; @@ -1855,7 +1881,7 @@ unsafe fn translate_egg( let mut matched = false; let mut matched_expr = *gep_value; for pair in &*llvm_arg_pairs { - if cmp(&pair.original_value, &*gep_value) { + if cmp_val_ref_address(&pair.original_value, &*gep_value) { matched = true; matched_expr = pair.new_value; break; @@ -1876,13 +1902,13 @@ unsafe fn translate_egg( new_load } } else if isa_gep(*gep_value) { - let cloned_gep = LLVMInstructionClone(*gep_value); - let new_gep = llvm_recursive_add(builder, cloned_gep, context, llvm_arg_pairs); + // let cloned_gep = LLVMInstructionClone(*gep_value); + let new_gep = llvm_recursive_add(builder, *gep_value, context, llvm_arg_pairs); LLVMBuildLoad(builder, new_gep, b"\0".as_ptr() as *const _) } else if isa_bitcast(*gep_value) { // TODO: DO NOT REGERATE CALLS. THESE SHOULD BE CACHED!!. e.g. a CALLOC - let cloned_bitcast = LLVMInstructionClone(*gep_value); - let mut new_bitcast = llvm_recursive_add(builder, cloned_bitcast, context, llvm_arg_pairs); + // let cloned_bitcast = LLVMInstructionClone(*gep_value); + let mut new_bitcast = llvm_recursive_add(builder, *gep_value, context, llvm_arg_pairs); // if bitcast was to i32, handle bitcast from float* to i32* if !isa_floatptr(new_bitcast) { let addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(new_bitcast)); @@ -1903,7 +1929,7 @@ unsafe fn translate_egg( let mut matched = false; let mut matched_expr = *gep_value; for pair in &*llvm_arg_pairs { - if cmp(&pair.original_value, &*gep_value) { + if cmp_val_ref_address(&pair.original_value, &*gep_value) { matched = true; matched_expr = pair.new_value; break; @@ -1948,12 +1974,18 @@ unsafe fn translate_egg( ); // check if the elt is an int if isa_integertype(elt_val) { - elt_val = LLVMBuildSIToFP( + elt_val = LLVMBuildBitCast( builder, elt_val, LLVMFloatTypeInContext(context), b"\0".as_ptr() as *const _, ); + // elt_val = LLVMBuildSIToFP( + // builder, + // elt_val, + // LLVMFloatTypeInContext(context), + // b"\0".as_ptr() as *const _, + // ); } vector = LLVMBuildInsertElement( builder, @@ -2250,12 +2282,18 @@ unsafe fn translate_egg( module, ); if isa_integertype(number) { - number = LLVMBuildSIToFP( + number = LLVMBuildBitCast( builder, number, LLVMFloatTypeInContext(context), b"\0".as_ptr() as *const _, - ) + ); + // number = LLVMBuildSIToFP( + // builder, + // number, + // LLVMFloatTypeInContext(context), + // b"\0".as_ptr() as *const _, + // ) } translate_unop( enode, @@ -2285,6 +2323,10 @@ unsafe fn gen_type_cast( } else if typ1 == LLVMInt16TypeInContext(context) && typ2 == LLVMInt32TypeInContext(context) { return LLVMBuildZExt(builder, val, typ2, b"\0".as_ptr() as *const _); } + LLVMDumpType(typ1); + println!(); + LLVMDumpType(typ2); + println!(); panic!("Cannot convert between {:?} {:?}\n.", typ1, typ2); } @@ -2360,13 +2402,13 @@ unsafe fn egg_to_llvm( assert!(LLVMTypeOf(extracted_value) == LLVMGetElementType(LLVMTypeOf(mut_addr))); LLVMBuildStore(builder, extracted_value, mut_addr); } else { - let cloned_addr = LLVMInstructionClone(mut_addr); - let new_addr = llvm_recursive_add(builder, cloned_addr, context, llvm_arg_pairs); - if LLVMTypeOf(extracted_value) != LLVMGetElementType(LLVMTypeOf(new_addr)) { + // let cloned_addr = LLVMInstructionClone(mut_addr); + let new_addr = llvm_recursive_add(builder, mut_addr, context, llvm_arg_pairs); + if LLVMTypeOf(extracted_value) != LLVMGetElementType(LLVMTypeOf(mut_addr)) { extracted_value = gen_type_cast( extracted_value, LLVMTypeOf(extracted_value), - LLVMGetElementType(LLVMTypeOf(new_addr)), + LLVMGetElementType(LLVMTypeOf(mut_addr)), context, builder, ); From 3365d230c2c54addc084fb429e5a4ae0cc7eb4b3 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 9 Mar 2022 14:36:57 -0500 Subject: [PATCH 057/143] alloca issue resolved Wrap/Unwrap creates new LLVMValueRef objects each time! To get oriignal object have to derefference twice, as in ** Then get address, e.g. &**inst Compare the underlying llvm::LLVMValue objects in rust, using object comparisons --- src/dios-egraphs/Diospyros/Makefile | 3 + src/dios-egraphs/Diospyros/diospyros.cpp | 95 ++++++++--- .../Diospyros/fail-tests/local-array-4.c | 2 +- src/dios-egraphs/Diospyros/src/lib.rs | 158 ++++++++++++------ 4 files changed, 180 insertions(+), 78 deletions(-) diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index 82d85f04..e780eca4 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -77,6 +77,9 @@ no-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp $(CLANG) build/dce.ll -o build/final build/final +run-diospyros: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp + $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt=false -mllvm -print=false $(test) -o build/diospyros.ll + run-polybench: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp $(CLANG) -I polybench-tests/utilities -I polybench-tests/linear-algebra/kernels/atax $(test) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index d9104762..4745574f 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -76,7 +76,7 @@ const int BINARY_OPERATOR = 2; /** Number of instructions to search back and see if translated - we keep less * to search faster; but actually cutting it short is unsound. */ -const int NUM_TRANSLATED_INSTRUCTIONS = 1000; +const int NUM_TRANSLATED_INSTRUCTIONS = 2000; /** * Fresh counters for temps and array generation @@ -526,51 +526,94 @@ extern "C" bool dfs_llvm_value_ref(LLVMValueRef current_instr, return dfs_llvm_instrs(current_user, match_user); } +// Instruction *dfs_instructions(Instruction *current_instr, +// std::vector &translated_exprs, +// BasicBlock *B) { +// for (LLVMPair pair : translated_exprs) { +// Instruction *original_val = +// dyn_cast(unwrap(pair.original_value)); +// Instruction *new_val = dyn_cast(unwrap(pair.new_value)); +// if (current_instr == original_val) { +// return new_val; +// } +// } + +// Instruction *cloned_instr = current_instr->clone(); + +// int num_operands = current_instr->getNumOperands(); +// if (num_operands == 0) { +// BasicBlock::InstListType &intermediate_instrs = B->getInstList(); +// intermediate_instrs.push_back(cloned_instr); + +// LLVMPair new_pair; +// new_pair.original_value = wrap(current_instr); +// new_pair.new_value = wrap(cloned_instr); +// translated_exprs.push_back(new_pair); + +// return cloned_instr; +// } + +// for (int i = 0; i < num_operands; i++) { +// Instruction *arg = +// dyn_cast(current_instr->getOperand(i)); if (arg != NULL) +// { +// Instruction *cloned_arg = +// dfs_instructions(arg, translated_exprs, B); +// cloned_instr->setOperand(i, cloned_arg); + +// LLVMPair new_pair; +// new_pair.original_value = wrap(arg); +// new_pair.new_value = wrap(cloned_arg); +// translated_exprs.push_back(new_pair); +// } +// } +// LLVMPair new_pair; +// new_pair.original_value = wrap(current_instr); +// new_pair.new_value = wrap(cloned_instr); +// translated_exprs.push_back(new_pair); + +// BasicBlock::InstListType &intermediate_instrs = B->getInstList(); +// intermediate_instrs.push_back(cloned_instr); +// return cloned_instr; +// } + Instruction *dfs_instructions(Instruction *current_instr, std::vector &translated_exprs, BasicBlock *B) { - for (LLVMPair pair : translated_exprs) { - Instruction *original_val = - dyn_cast(unwrap(pair.original_value)); - Instruction *new_val = dyn_cast(unwrap(pair.new_value)); - if (current_instr == original_val) { - return new_val; - } - } - Instruction *cloned_instr = current_instr->clone(); - - int num_operands = current_instr->getNumOperands(); - if (num_operands == 0) { - BasicBlock::InstListType &intermediate_instrs = B->getInstList(); - intermediate_instrs.push_back(cloned_instr); - + if (isa(current_instr)) { + return current_instr; + } else if (isa(current_instr)) { + return current_instr; + } else if (isa(current_instr)) { + for (LLVMPair pair : translated_exprs) { + Instruction *original_val = + dyn_cast(unwrap(pair.original_value)); + Instruction *new_val = + dyn_cast(unwrap(pair.new_value)); + if (current_instr == original_val) { + return new_val; + } + } LLVMPair new_pair; new_pair.original_value = wrap(current_instr); new_pair.new_value = wrap(cloned_instr); translated_exprs.push_back(new_pair); + BasicBlock::InstListType &intermediate_instrs = B->getInstList(); + intermediate_instrs.push_back(cloned_instr); return cloned_instr; } + int num_operands = current_instr->getNumOperands(); for (int i = 0; i < num_operands; i++) { Instruction *arg = dyn_cast(current_instr->getOperand(i)); if (arg != NULL) { Instruction *cloned_arg = dfs_instructions(arg, translated_exprs, B); cloned_instr->setOperand(i, cloned_arg); - - LLVMPair new_pair; - new_pair.original_value = wrap(arg); - new_pair.new_value = wrap(cloned_arg); - translated_exprs.push_back(new_pair); } } - LLVMPair new_pair; - new_pair.original_value = wrap(current_instr); - new_pair.new_value = wrap(cloned_instr); - translated_exprs.push_back(new_pair); - BasicBlock::InstListType &intermediate_instrs = B->getInstList(); intermediate_instrs.push_back(cloned_instr); return cloned_instr; diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c b/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c index ac3d32c1..86abcb01 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c +++ b/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c @@ -3,7 +3,7 @@ #include #include -#define SIZE 4 +#define SIZE 10 void test(float A[SIZE]) { for (int i = 0; i < SIZE; i++) { diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 1e1744eb..83d92675 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -104,7 +104,7 @@ unsafe fn gen_call_name() -> String { // Reference Comparison: https://www.reddit.com/r/rust/comments/2r3wjk/is_there_way_to_compare_objects_by_address_in_rust/ // Compares whether addresses of LLVMValueRefs are the same. // Not the contents of the Value Refs -fn cmp_val_ref_address(a1: &LLVMValueRef, a2: &LLVMValueRef) -> bool { +fn cmp_val_ref_address(a1: &llvm::LLVMValue, a2: &llvm::LLVMValue) -> bool { a1 as *const _ == a2 as *const _ } @@ -913,12 +913,99 @@ unsafe fn _llvm_recursive_print(inst: LLVMValueRef) -> () { return; } +// unsafe fn llvm_recursive_add( +// builder: LLVMBuilderRef, +// inst: LLVMValueRef, +// context: LLVMContextRef, +// llvm_arg_pairs: &mut Vec, +// ) -> LLVMValueRef { +// if isa_argument(inst) { +// let mut indices = Vec::new(); +// for i in 0..1 { +// indices.push(LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0)); +// } +// let indices_vector = indices.as_mut_ptr(); +// return LLVMBuildGEP(builder, inst, indices_vector, 1, b"\0".as_ptr() as *const _); +// // return inst; +// } else if isa_constant(inst) { +// return inst; +// } else if isa_phi(inst) { +// return inst; +// } else if isa_alloca(inst) { +// // We have this in the base case to stop reconstruction of allocas, +// // because allocas are like loads, and should not get reconstructioned +// // search the llvm_arg_pairs for allocas that were already created +// let mut matched = false; +// let mut ret_value = inst; +// for llvm_pair in &*llvm_arg_pairs { +// let original_llvm = llvm_pair.original_value; +// let new_llvm = llvm_pair.new_value; +// if cmp_val_ref_address(&original_llvm, &inst) { +// matched = true; +// ret_value = new_llvm; +// break; +// } +// } +// if matched { +// return ret_value; +// } else { +// // Don't clone Inst; we should only clone if recursive call, +// // which is handled previously +// return inst; +// } +// } +// // TODO: CALLs should not be rebuilt? +// // else if isa_call(inst) { +// // let cloned_inst = LLVMInstructionClone(inst); +// // LLVMInsertIntoBuilder(builder, cloned_inst); +// // return cloned_inst; +// // } +// let cloned_inst = LLVMInstructionClone(inst); +// let num_ops = LLVMGetNumOperands(inst); +// for i in 0..num_ops { +// let operand = LLVMGetOperand(inst, i as u32); +// // search the llvm_arg_pairs +// let mut matched = false; +// let mut ret_value = operand; +// for llvm_pair in &mut *llvm_arg_pairs { +// let original_llvm = llvm_pair.original_value; +// let new_llvm = llvm_pair.new_value; +// if !matched && cmp_val_ref_address(&original_llvm, &operand) { +// matched = true; +// ret_value = new_llvm; +// } +// } +// if matched { +// LLVMSetOperand(cloned_inst, i as u32, ret_value); +// } else { +// let new_inst = llvm_recursive_add(builder, operand, context, llvm_arg_pairs); +// LLVMSetOperand(cloned_inst, i as u32, new_inst); + +// let pair = LLVMPair { +// new_value: new_inst, +// original_value: operand, +// }; +// llvm_arg_pairs.push(pair); +// } +// } +// LLVMInsertIntoBuilder(builder, cloned_inst); + +// let pair = LLVMPair { +// new_value: cloned_inst, +// original_value: inst, +// }; +// llvm_arg_pairs.push(pair); + +// return cloned_inst; +// } + unsafe fn llvm_recursive_add( builder: LLVMBuilderRef, inst: LLVMValueRef, context: LLVMContextRef, llvm_arg_pairs: &mut Vec, ) -> LLVMValueRef { + let cloned_inst = LLVMInstructionClone(inst); if isa_argument(inst) { let mut indices = Vec::new(); for i in 0..1 { @@ -926,7 +1013,6 @@ unsafe fn llvm_recursive_add( } let indices_vector = indices.as_mut_ptr(); return LLVMBuildGEP(builder, inst, indices_vector, 1, b"\0".as_ptr() as *const _); - // return inst; } else if isa_constant(inst) { return inst; } else if isa_phi(inst) { @@ -940,7 +1026,7 @@ unsafe fn llvm_recursive_add( for llvm_pair in &*llvm_arg_pairs { let original_llvm = llvm_pair.original_value; let new_llvm = llvm_pair.new_value; - if cmp_val_ref_address(&original_llvm, &inst) { + if cmp_val_ref_address(&*original_llvm, &*inst) { matched = true; ret_value = new_llvm; break; @@ -949,53 +1035,22 @@ unsafe fn llvm_recursive_add( if matched { return ret_value; } else { - // Don't clone Inst; we should only clone if recursive call, - // which is handled previously - return inst; + let pair = LLVMPair { + new_value: cloned_inst, + original_value: inst, + }; + llvm_arg_pairs.push(pair); + LLVMInsertIntoBuilder(builder, cloned_inst); + return cloned_inst; } } - // TODO: CALLs should not be rebuilt? - // else if isa_call(inst) { - // let cloned_inst = LLVMInstructionClone(inst); - // LLVMInsertIntoBuilder(builder, cloned_inst); - // return cloned_inst; - // } - let cloned_inst = LLVMInstructionClone(inst); let num_ops = LLVMGetNumOperands(inst); for i in 0..num_ops { let operand = LLVMGetOperand(inst, i as u32); - // search the llvm_arg_pairs - let mut matched = false; - let mut ret_value = operand; - for llvm_pair in &mut *llvm_arg_pairs { - let original_llvm = llvm_pair.original_value; - let new_llvm = llvm_pair.new_value; - if !matched && cmp_val_ref_address(&original_llvm, &operand) { - matched = true; - ret_value = new_llvm; - } - } - if matched { - LLVMSetOperand(cloned_inst, i as u32, ret_value); - } else { - let new_inst = llvm_recursive_add(builder, operand, context, llvm_arg_pairs); - LLVMSetOperand(cloned_inst, i as u32, new_inst); - - let pair = LLVMPair { - new_value: new_inst, - original_value: operand, - }; - llvm_arg_pairs.push(pair); - } + let new_operand = llvm_recursive_add(builder, operand, context, llvm_arg_pairs); + LLVMSetOperand(cloned_inst, i as u32, new_operand); } LLVMInsertIntoBuilder(builder, cloned_inst); - - let pair = LLVMPair { - new_value: cloned_inst, - original_value: inst, - }; - llvm_arg_pairs.push(pair); - return cloned_inst; } @@ -1592,7 +1647,7 @@ unsafe fn ref_to_egg( node_to_arg: &mut Vec, ) -> (Vec, i32) { for pair in llvm_arg_pairs { - if cmp_val_ref_address(&pair.original_value, &expr) { + if cmp_val_ref_address(&*pair.original_value, &*expr) { // Here we create a new numbered variable node let var_idx = gen_node_idx(); let var_idx_str = var_idx.to_string(); @@ -1848,7 +1903,7 @@ unsafe fn translate_egg( for llvm_pair in &mut *llvm_arg_pairs { let original_llvm = llvm_pair.original_value; let new_llvm = llvm_pair.new_value; - if cmp_val_ref_address(&original_llvm, &llvm_node) { + if cmp_val_ref_address(&*original_llvm, &*llvm_node) { matched = true; ret_value = new_llvm; break; @@ -1881,7 +1936,7 @@ unsafe fn translate_egg( let mut matched = false; let mut matched_expr = *gep_value; for pair in &*llvm_arg_pairs { - if cmp_val_ref_address(&pair.original_value, &*gep_value) { + if cmp_val_ref_address(&*pair.original_value, &**gep_value) { matched = true; matched_expr = pair.new_value; break; @@ -1891,8 +1946,8 @@ unsafe fn translate_egg( matched_expr } else { let addr = LLVMGetOperand(*gep_value, 0); - let cloned_gep = LLVMInstructionClone(addr); - let new_gep = llvm_recursive_add(builder, cloned_gep, context, llvm_arg_pairs); + // let cloned_gep = LLVMInstructionClone(addr); + let new_gep = llvm_recursive_add(builder, addr, context, llvm_arg_pairs); let new_load = LLVMBuildLoad(builder, new_gep, b"\0".as_ptr() as *const _); let llvm_pair = LLVMPair { original_value: *gep_value, @@ -1922,14 +1977,15 @@ unsafe fn translate_egg( } LLVMBuildLoad(builder, new_bitcast, b"\0".as_ptr() as *const _) } else if isa_sitofp(*gep_value) { - let cloned_sitofp = LLVMInstructionClone(*gep_value); - let new_sitofp = llvm_recursive_add(builder, cloned_sitofp, context, llvm_arg_pairs); + // let cloned_sitofp = LLVMInstructionClone(*gep_value); + let new_sitofp = llvm_recursive_add(builder, *gep_value, context, llvm_arg_pairs); new_sitofp } else { + // includes isa_alloca case let mut matched = false; let mut matched_expr = *gep_value; for pair in &*llvm_arg_pairs { - if cmp_val_ref_address(&pair.original_value, &*gep_value) { + if cmp_val_ref_address(&*pair.original_value, &**gep_value) { matched = true; matched_expr = pair.new_value; break; From 134bfc81d5f06783f934dc327a4752cedf32411d Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 9 Mar 2022 14:38:46 -0500 Subject: [PATCH 058/143] start creating llvm-tests Tests at the LLVM IR Level There's little to no changes at the IR Level, compared to when extra passes are added to compiling with C Tests here are more stable, and should add more regression power Change llvm-tests of old to new c-tests: --- .../{llvm-tests => c-tests}/2d-2d-conv.c | 0 .../{llvm-tests => c-tests}/2d-conv.c | 0 .../2d-matrix-multiply-new.c | 0 .../2d-matrix-multiply.c | 0 .../Diospyros/{llvm-tests => c-tests}/2d.c | 0 .../3-by-3-matrix-multiply.c | 0 .../5-by-5-matrix-multiply.c | 0 .../Diospyros/{llvm-tests => c-tests}/add.c | 0 .../{llvm-tests => c-tests}/add_mult.c | 0 .../{llvm-tests => c-tests}/break-w.c | 0 .../Diospyros/{llvm-tests => c-tests}/break.c | 0 .../{llvm-tests => c-tests}/calloc.c | 0 .../{llvm-tests => c-tests}/continue-w.c | 0 .../{llvm-tests => c-tests}/continue.c | 0 .../Diospyros/{llvm-tests => c-tests}/cube.c | 0 .../{llvm-tests => c-tests}/five_binops.c | 0 .../{llvm-tests => c-tests}/identity_matrix.c | 0 .../{llvm-tests => c-tests}/if-else.c | 0 .../{llvm-tests => c-tests}/inline-float.c | 0 .../{llvm-tests => c-tests}/inline-void.c | 0 .../{llvm-tests => c-tests}/load_reuse.c | 0 .../{llvm-tests => c-tests}/loop-inline.c | 0 .../Diospyros/{llvm-tests => c-tests}/mac.c | 0 .../{llvm-tests => c-tests}/malloc.c | 0 .../{llvm-tests => c-tests}/mat_mul.c | 0 .../{llvm-tests => c-tests}/matrix-multiply.c | 0 .../Diospyros/{llvm-tests => c-tests}/mixed.c | 0 .../Diospyros/{llvm-tests => c-tests}/mult.c | 0 .../{llvm-tests => c-tests}/multi-mat-mul.c | 0 .../{llvm-tests => c-tests}/multiple_adds.c | 0 .../naive-norm-inline.c | 0 .../{llvm-tests => c-tests}/naive-norm.c | 0 .../{llvm-tests => c-tests}/nested-inline.c | 0 .../{llvm-tests => c-tests}/out_of_order.c | 0 .../{llvm-tests => c-tests}/point-product.c | 0 .../{llvm-tests => c-tests}/q-prod.c | 0 .../qr-decomp-modified-no-local-array.c | 0 .../qr-decomp-no-local-array.c | 0 .../qr-decomp-test-1.c | 0 .../qr-decomp-test-2.c | 0 .../qr-decomp-test-3.c | 0 .../qr-decomp-test-4-1-linear-array.c | 0 .../qr-decomp-test-4-1.c | 0 .../qr-decomp-test-4-2-2.c | 0 .../qr-decomp-test-4-2-linear-array.c | 0 .../qr-decomp-test-4-2.c | 0 .../qr-decomp-test-4-3-linear-array.c | 0 .../qr-decomp-test-4-3.c | 0 .../qr-decomp-test-4-4-linear-array.c | 0 .../qr-decomp-test-4-5-linear-array.c | 0 .../{llvm-tests => c-tests}/return.c | 0 .../{llvm-tests => c-tests}/scalar.c | 0 .../{llvm-tests => c-tests}/sgn-inline.c | 0 .../sgn-naive-norm-inline-1.c | 0 .../sgn-naive-norm-inline-2.c | 0 .../Diospyros/{llvm-tests => c-tests}/sqrt.c | 0 .../{llvm-tests => c-tests}/stencil-2d.c | 0 .../{llvm-tests => c-tests}/ternary.c | 0 .../{llvm-tests => c-tests}/transpose.c | 0 .../Diospyros/{llvm-tests => c-tests}/var.c | 0 .../{llvm-tests => c-tests}/width5.c | 0 .../{llvm-tests => c-tests}/width9.c | 0 .../Diospyros/llvm-tests/repeat-alloc.ll | 64 +++++++++++++++++++ .../Diospyros/llvm-tests/repeat-load.ll | 1 + 64 files changed, 65 insertions(+) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/2d-2d-conv.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/2d-conv.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/2d-matrix-multiply-new.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/2d-matrix-multiply.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/2d.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/3-by-3-matrix-multiply.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/5-by-5-matrix-multiply.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/add.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/add_mult.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/break-w.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/break.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/calloc.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/continue-w.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/continue.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/cube.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/five_binops.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/identity_matrix.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/if-else.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/inline-float.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/inline-void.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/load_reuse.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/loop-inline.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/mac.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/malloc.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/mat_mul.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/matrix-multiply.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/mixed.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/mult.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/multi-mat-mul.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/multiple_adds.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/naive-norm-inline.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/naive-norm.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/nested-inline.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/out_of_order.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/point-product.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/q-prod.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/qr-decomp-modified-no-local-array.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/qr-decomp-no-local-array.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/qr-decomp-test-1.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/qr-decomp-test-2.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/qr-decomp-test-3.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/qr-decomp-test-4-1-linear-array.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/qr-decomp-test-4-1.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/qr-decomp-test-4-2-2.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/qr-decomp-test-4-2-linear-array.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/qr-decomp-test-4-2.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/qr-decomp-test-4-3-linear-array.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/qr-decomp-test-4-3.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/qr-decomp-test-4-4-linear-array.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/qr-decomp-test-4-5-linear-array.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/return.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/scalar.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/sgn-inline.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/sgn-naive-norm-inline-1.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/sgn-naive-norm-inline-2.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/sqrt.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/stencil-2d.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/ternary.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/transpose.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/var.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/width5.c (100%) rename src/dios-egraphs/Diospyros/{llvm-tests => c-tests}/width9.c (100%) create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/repeat-alloc.ll create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/repeat-load.ll diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv.c b/src/dios-egraphs/Diospyros/c-tests/2d-2d-conv.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv.c rename to src/dios-egraphs/Diospyros/c-tests/2d-2d-conv.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-conv.c b/src/dios-egraphs/Diospyros/c-tests/2d-conv.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/2d-conv.c rename to src/dios-egraphs/Diospyros/c-tests/2d-conv.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-matrix-multiply-new.c b/src/dios-egraphs/Diospyros/c-tests/2d-matrix-multiply-new.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/2d-matrix-multiply-new.c rename to src/dios-egraphs/Diospyros/c-tests/2d-matrix-multiply-new.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/2d-matrix-multiply.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/2d-matrix-multiply.c rename to src/dios-egraphs/Diospyros/c-tests/2d-matrix-multiply.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d.c b/src/dios-egraphs/Diospyros/c-tests/2d.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/2d.c rename to src/dios-egraphs/Diospyros/c-tests/2d.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/3-by-3-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/3-by-3-matrix-multiply.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/3-by-3-matrix-multiply.c rename to src/dios-egraphs/Diospyros/c-tests/3-by-3-matrix-multiply.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/5-by-5-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/5-by-5-matrix-multiply.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/5-by-5-matrix-multiply.c rename to src/dios-egraphs/Diospyros/c-tests/5-by-5-matrix-multiply.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/add.c b/src/dios-egraphs/Diospyros/c-tests/add.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/add.c rename to src/dios-egraphs/Diospyros/c-tests/add.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/add_mult.c b/src/dios-egraphs/Diospyros/c-tests/add_mult.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/add_mult.c rename to src/dios-egraphs/Diospyros/c-tests/add_mult.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/break-w.c b/src/dios-egraphs/Diospyros/c-tests/break-w.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/break-w.c rename to src/dios-egraphs/Diospyros/c-tests/break-w.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/break.c b/src/dios-egraphs/Diospyros/c-tests/break.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/break.c rename to src/dios-egraphs/Diospyros/c-tests/break.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/calloc.c b/src/dios-egraphs/Diospyros/c-tests/calloc.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/calloc.c rename to src/dios-egraphs/Diospyros/c-tests/calloc.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/continue-w.c b/src/dios-egraphs/Diospyros/c-tests/continue-w.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/continue-w.c rename to src/dios-egraphs/Diospyros/c-tests/continue-w.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/continue.c b/src/dios-egraphs/Diospyros/c-tests/continue.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/continue.c rename to src/dios-egraphs/Diospyros/c-tests/continue.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/cube.c b/src/dios-egraphs/Diospyros/c-tests/cube.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/cube.c rename to src/dios-egraphs/Diospyros/c-tests/cube.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/five_binops.c b/src/dios-egraphs/Diospyros/c-tests/five_binops.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/five_binops.c rename to src/dios-egraphs/Diospyros/c-tests/five_binops.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/identity_matrix.c b/src/dios-egraphs/Diospyros/c-tests/identity_matrix.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/identity_matrix.c rename to src/dios-egraphs/Diospyros/c-tests/identity_matrix.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/if-else.c b/src/dios-egraphs/Diospyros/c-tests/if-else.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/if-else.c rename to src/dios-egraphs/Diospyros/c-tests/if-else.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/inline-float.c b/src/dios-egraphs/Diospyros/c-tests/inline-float.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/inline-float.c rename to src/dios-egraphs/Diospyros/c-tests/inline-float.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/inline-void.c b/src/dios-egraphs/Diospyros/c-tests/inline-void.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/inline-void.c rename to src/dios-egraphs/Diospyros/c-tests/inline-void.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/load_reuse.c b/src/dios-egraphs/Diospyros/c-tests/load_reuse.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/load_reuse.c rename to src/dios-egraphs/Diospyros/c-tests/load_reuse.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/loop-inline.c b/src/dios-egraphs/Diospyros/c-tests/loop-inline.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/loop-inline.c rename to src/dios-egraphs/Diospyros/c-tests/loop-inline.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mac.c b/src/dios-egraphs/Diospyros/c-tests/mac.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/mac.c rename to src/dios-egraphs/Diospyros/c-tests/mac.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/malloc.c b/src/dios-egraphs/Diospyros/c-tests/malloc.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/malloc.c rename to src/dios-egraphs/Diospyros/c-tests/malloc.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mat_mul.c b/src/dios-egraphs/Diospyros/c-tests/mat_mul.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/mat_mul.c rename to src/dios-egraphs/Diospyros/c-tests/mat_mul.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/matrix-multiply.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply.c rename to src/dios-egraphs/Diospyros/c-tests/matrix-multiply.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mixed.c b/src/dios-egraphs/Diospyros/c-tests/mixed.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/mixed.c rename to src/dios-egraphs/Diospyros/c-tests/mixed.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mult.c b/src/dios-egraphs/Diospyros/c-tests/mult.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/mult.c rename to src/dios-egraphs/Diospyros/c-tests/mult.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/multi-mat-mul.c b/src/dios-egraphs/Diospyros/c-tests/multi-mat-mul.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/multi-mat-mul.c rename to src/dios-egraphs/Diospyros/c-tests/multi-mat-mul.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds.c b/src/dios-egraphs/Diospyros/c-tests/multiple_adds.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/multiple_adds.c rename to src/dios-egraphs/Diospyros/c-tests/multiple_adds.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/naive-norm-inline.c b/src/dios-egraphs/Diospyros/c-tests/naive-norm-inline.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/naive-norm-inline.c rename to src/dios-egraphs/Diospyros/c-tests/naive-norm-inline.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/naive-norm.c b/src/dios-egraphs/Diospyros/c-tests/naive-norm.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/naive-norm.c rename to src/dios-egraphs/Diospyros/c-tests/naive-norm.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/nested-inline.c b/src/dios-egraphs/Diospyros/c-tests/nested-inline.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/nested-inline.c rename to src/dios-egraphs/Diospyros/c-tests/nested-inline.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/out_of_order.c b/src/dios-egraphs/Diospyros/c-tests/out_of_order.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/out_of_order.c rename to src/dios-egraphs/Diospyros/c-tests/out_of_order.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/point-product.c b/src/dios-egraphs/Diospyros/c-tests/point-product.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/point-product.c rename to src/dios-egraphs/Diospyros/c-tests/point-product.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/q-prod.c b/src/dios-egraphs/Diospyros/c-tests/q-prod.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/q-prod.c rename to src/dios-egraphs/Diospyros/c-tests/q-prod.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-modified-no-local-array.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-modified-no-local-array.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-modified-no-local-array.c rename to src/dios-egraphs/Diospyros/c-tests/qr-decomp-modified-no-local-array.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-no-local-array.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-no-local-array.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-no-local-array.c rename to src/dios-egraphs/Diospyros/c-tests/qr-decomp-no-local-array.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-1.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-1.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-1.c rename to src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-1.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-2.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-2.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-2.c rename to src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-2.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-3.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-3.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-3.c rename to src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-3.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-1-linear-array.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-1-linear-array.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-1-linear-array.c rename to src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-1-linear-array.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-1.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-1.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-1.c rename to src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-1.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-2-2.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-2-2.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-2-2.c rename to src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-2-2.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-2-linear-array.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-2-linear-array.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-2-linear-array.c rename to src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-2-linear-array.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-2.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-2.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-2.c rename to src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-2.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-3-linear-array.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-3-linear-array.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-3-linear-array.c rename to src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-3-linear-array.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-3.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-3.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-3.c rename to src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-3.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-4-linear-array.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-4-linear-array.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-4-linear-array.c rename to src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-4-linear-array.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-5-linear-array.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-5-linear-array.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-test-4-5-linear-array.c rename to src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-5-linear-array.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/return.c b/src/dios-egraphs/Diospyros/c-tests/return.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/return.c rename to src/dios-egraphs/Diospyros/c-tests/return.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/scalar.c b/src/dios-egraphs/Diospyros/c-tests/scalar.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/scalar.c rename to src/dios-egraphs/Diospyros/c-tests/scalar.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/sgn-inline.c b/src/dios-egraphs/Diospyros/c-tests/sgn-inline.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/sgn-inline.c rename to src/dios-egraphs/Diospyros/c-tests/sgn-inline.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/sgn-naive-norm-inline-1.c b/src/dios-egraphs/Diospyros/c-tests/sgn-naive-norm-inline-1.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/sgn-naive-norm-inline-1.c rename to src/dios-egraphs/Diospyros/c-tests/sgn-naive-norm-inline-1.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/sgn-naive-norm-inline-2.c b/src/dios-egraphs/Diospyros/c-tests/sgn-naive-norm-inline-2.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/sgn-naive-norm-inline-2.c rename to src/dios-egraphs/Diospyros/c-tests/sgn-naive-norm-inline-2.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/sqrt.c b/src/dios-egraphs/Diospyros/c-tests/sqrt.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/sqrt.c rename to src/dios-egraphs/Diospyros/c-tests/sqrt.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d.c b/src/dios-egraphs/Diospyros/c-tests/stencil-2d.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/stencil-2d.c rename to src/dios-egraphs/Diospyros/c-tests/stencil-2d.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/ternary.c b/src/dios-egraphs/Diospyros/c-tests/ternary.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/ternary.c rename to src/dios-egraphs/Diospyros/c-tests/ternary.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/transpose.c b/src/dios-egraphs/Diospyros/c-tests/transpose.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/transpose.c rename to src/dios-egraphs/Diospyros/c-tests/transpose.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/var.c b/src/dios-egraphs/Diospyros/c-tests/var.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/var.c rename to src/dios-egraphs/Diospyros/c-tests/var.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/width5.c b/src/dios-egraphs/Diospyros/c-tests/width5.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/width5.c rename to src/dios-egraphs/Diospyros/c-tests/width5.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/width9.c b/src/dios-egraphs/Diospyros/c-tests/width9.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/width9.c rename to src/dios-egraphs/Diospyros/c-tests/width9.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/repeat-alloc.ll b/src/dios-egraphs/Diospyros/llvm-tests/repeat-alloc.ll new file mode 100644 index 00000000..390f7a24 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/repeat-alloc.ll @@ -0,0 +1,64 @@ +; ModuleID = 'build/opt.ll' +source_filename = "fail-tests/local-array-4.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@.str = private unnamed_addr constant [14 x i8] c"A Output: %f\0A\00", align 1 +@.memset_pattern = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 + +; Function Attrs: noinline nounwind ssp uwtable +define void @test(float* %0) #0 { +.preheader: + %1 = alloca i64, align 8 + %tmpcast = bitcast i64* %1 to [2 x float]* + %2 = bitcast i64* %1 to i8* + %3 = bitcast i64* %1 to float* + store i64 0, i64* %1, align 8 + call void @memset_pattern16(i8* nonnull %2, i8* bitcast ([4 x float]* @.memset_pattern to i8*), i64 8) #4 + %4 = load float, float* %3, align 8 + %5 = fadd float %4, 0.000000e+00 + %6 = getelementptr inbounds [2 x float], [2 x float]* %tmpcast, i64 0, i64 1 + %7 = load float, float* %6, align 4 + %8 = fadd float %5, %7 + store float %8, float* %0, align 4 + ret void +} + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #1 + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #0 { + %1 = alloca i64, align 8 + %tmpcast = bitcast i64* %1 to [2 x float]* + %2 = bitcast i64* %1 to float* + store float 0.000000e+00, float* %2, align 8 + %3 = getelementptr inbounds [2 x float], [2 x float]* %tmpcast, i64 0, i64 1 + store float 1.000000e+00, float* %3, align 4 + call void @test(float* nonnull %2) + %4 = load float, float* %2, align 8 + %5 = fpext float %4 to double + %6 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %5) #4 + %7 = load float, float* %3, align 4 + %8 = fpext float %7 to double + %9 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %8) #4 + ret i32 0 +} + +declare i32 @printf(i8*, ...) #2 + +; Function Attrs: argmemonly nofree +declare void @memset_pattern16(i8* nocapture, i8* nocapture readonly, i64) #3 + +attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn writeonly } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { argmemonly nofree } +attributes #4 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/llvm-tests/repeat-load.ll b/src/dios-egraphs/Diospyros/llvm-tests/repeat-load.ll new file mode 100644 index 00000000..2c63e1eb --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/repeat-load.ll @@ -0,0 +1 @@ +; repeats loads that cannot be reused \ No newline at end of file From 1025e5a303c784fe0691a5ae4dbd6d4706bce14c Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 9 Mar 2022 15:38:15 -0500 Subject: [PATCH 059/143] change llvm_to_egg load case now just store entire load and reconstruct backwards from load This eliminates load copy problems from previously, where a load was never actually stored, so we could not detect the load LLVm instruction address, and it was copied. Now, if a load is referenced twice, only 1 is built --- .../Diospyros/fail-tests/local-array-1.c | 13 ++ .../Diospyros/fail-tests/local-array-2.c | 30 ++++- .../Diospyros/fail-tests/local-array-3.c | 25 ++++ .../Diospyros/fail-tests/local-array-4.c | 23 ++++ .../Diospyros/fail-tests/test1-local-array.c | 4 +- src/dios-egraphs/Diospyros/src/lib.rs | 123 ++++++++++++------ 6 files changed, 176 insertions(+), 42 deletions(-) diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array-1.c b/src/dios-egraphs/Diospyros/fail-tests/local-array-1.c index 3bd244aa..1622fb14 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/local-array-1.c +++ b/src/dios-egraphs/Diospyros/fail-tests/local-array-1.c @@ -1,9 +1,11 @@ #include +#include #include #include #include #define SIZE 10 +#define DELTA 0.1f void test(float A[SIZE]) { float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; @@ -12,11 +14,22 @@ void test(float A[SIZE]) { } } +void no_opt_test(float A[SIZE]) { + float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = x[i]; + } +} + int main() { float A[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + float expectedA[SIZE] = {[0 ... SIZE - 1] = 1.0f}; test(A); + no_opt_test(expectedA); for (int i = 0; i < SIZE; i++) { printf("A Output: %f\n", A[i]); + printf("expected: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); } return 0; } diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array-2.c b/src/dios-egraphs/Diospyros/fail-tests/local-array-2.c index ecea4f9d..b7be1f82 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/local-array-2.c +++ b/src/dios-egraphs/Diospyros/fail-tests/local-array-2.c @@ -1,9 +1,11 @@ #include +#include #include #include #include -#define SIZE 10 +#define SIZE 2 +#define DELTA 0.1f void test(float A[SIZE], float B[SIZE], float C[SIZE]) { float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; @@ -21,19 +23,45 @@ void test(float A[SIZE], float B[SIZE], float C[SIZE]) { } } +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] += x[i]; + } + for (int i = 0; i < SIZE; i++) { + C[i] += A[i]; + } + for (int i = 0; i < SIZE; i++) { + B[i] -= x[i]; + } + for (int i = 0; i < SIZE; i++) { + C[i] += B[i]; + } +} + int main() { float A[SIZE] = {[0 ... SIZE - 1] = 1.0f}; float B[SIZE] = {[0 ... SIZE - 1] = 2.0f}; float C[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float expectedA[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + float expectedB[SIZE] = {[0 ... SIZE - 1] = 2.0f}; + float expectedC[SIZE] = {[0 ... SIZE - 1] = 0.0f}; test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); for (int i = 0; i < SIZE; i++) { printf("A Output: %f\n", A[i]); + printf("expected: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); } for (int i = 0; i < SIZE; i++) { printf("B Output: %f\n", B[i]); + printf("expected: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); } for (int i = 0; i < SIZE; i++) { printf("C Output: %f\n", C[i]); + printf("expected: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); } return 0; } diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array-3.c b/src/dios-egraphs/Diospyros/fail-tests/local-array-3.c index ffd80b69..3c334c5e 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/local-array-3.c +++ b/src/dios-egraphs/Diospyros/fail-tests/local-array-3.c @@ -1,8 +1,12 @@ #include +#include #include #include #include +#define SIZE 2 +#define DELTA 0.1f + #define SIZE 10 void test(float A[SIZE], float B[SIZE], float C[SIZE]) { @@ -22,13 +26,34 @@ void test(float A[SIZE], float B[SIZE], float C[SIZE]) { } } +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = (float)i; + } + C[i] = A[i] + x[i]; + } + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = (float)i; + } + C[i] = B[i] - x[i]; + } +} + int main() { float A[SIZE] = {[0 ... SIZE - 1] = 1.0f}; float B[SIZE] = {[0 ... SIZE - 1] = 2.0f}; float C[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float expectedC[SIZE] = {[0 ... SIZE - 1] = 0.0f}; test(A, B, C); + no_opt_test(A, B, expectedC); for (int i = 0; i < SIZE; i++) { printf("C Output: %f\n", C[i]); + printf("expected: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); } return 0; } diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c b/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c index 86abcb01..27a65675 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c +++ b/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c @@ -1,9 +1,11 @@ #include +#include #include #include #include #define SIZE 10 +#define DELTA 0.1f void test(float A[SIZE]) { for (int i = 0; i < SIZE; i++) { @@ -19,14 +21,35 @@ void test(float A[SIZE]) { } } +void no_opt_test(float A[SIZE]) { + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int j = 0; j < SIZE; j++) { + x[j] = 1.0f; + } + float sum = 0.0f; + for (int j = 0; j < SIZE; j++) { + sum += x[j]; + } + A[i] = sum; + } +} + int main() { float A[SIZE] = {[0 ... SIZE - 1] = 0.0f}; for (int i = 0; i < SIZE; i++) { A[i] = (float)i; } + float expectedA[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + expectedA[i] = (float)i; + } test(A); + no_opt_test(expectedA); for (int i = 0; i < SIZE; i++) { printf("A Output: %f\n", A[i]); + printf("expected: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); } return 0; } diff --git a/src/dios-egraphs/Diospyros/fail-tests/test1-local-array.c b/src/dios-egraphs/Diospyros/fail-tests/test1-local-array.c index 2a91056a..cb9ba117 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test1-local-array.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test1-local-array.c @@ -96,6 +96,8 @@ int main(void) { printf("Expected X Output: %f\n", expectedx[i]); printf("E Output: %f\n", e[i]); printf("Expected E Output: %f\n", expectede[i]); - // assert(fabs(expectedA[i] - A[i]) < DELTA); + assert(fabs(expectedA[i] - A[i]) < DELTA); + assert(fabs(expectedx[i] - x[i]) < DELTA); + assert(fabs(expectede[i] - e[i]) < DELTA); } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 83d92675..819dd942 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -1013,7 +1013,22 @@ unsafe fn llvm_recursive_add( } let indices_vector = indices.as_mut_ptr(); return LLVMBuildGEP(builder, inst, indices_vector, 1, b"\0".as_ptr() as *const _); - } else if isa_constant(inst) { + } + let mut matched = false; + let mut ret_value = inst; + for llvm_pair in &*llvm_arg_pairs { + let original_llvm = llvm_pair.original_value; + let new_llvm = llvm_pair.new_value; + if cmp_val_ref_address(&*original_llvm, &*inst) { + matched = true; + ret_value = new_llvm; + break; + } + } + if matched { + return ret_value; + } + if isa_constant(inst) { return inst; } else if isa_phi(inst) { return inst; @@ -1051,6 +1066,12 @@ unsafe fn llvm_recursive_add( LLVMSetOperand(cloned_inst, i as u32, new_operand); } LLVMInsertIntoBuilder(builder, cloned_inst); + + let pair = LLVMPair { + new_value: cloned_inst, + original_value: inst, + }; + llvm_arg_pairs.push(pair); return cloned_inst; } @@ -1202,7 +1223,7 @@ unsafe fn gep_to_egg( _llvm_arg_pairs: &Vec, _node_to_arg: &mut Vec, ) -> (Vec, i32) { - assert!(isa_argument(expr) || isa_gep(expr) || isa_load(expr)); + // assert!(isa_argument(expr) || isa_gep(expr) || isa_load(expr)); // let mut enode_vec = Vec::new(); let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); @@ -1315,44 +1336,55 @@ unsafe fn load_to_egg( llvm_arg_pairs: &Vec, node_to_arg: &mut Vec, ) -> (Vec, i32) { - let addr = LLVMGetOperand(expr, 0); - if isa_argument(addr) { - return load_arg_to_egg( - addr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - } else if isa_gep(addr) { - return gep_to_egg( - expr, // we pass the entire instruction and not just the address - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - } else { - return address_to_egg( - addr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - } + return gep_to_egg( + expr, // we pass the entire instruction and not just the address + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ); + // let addr = LLVMGetOperand(expr, 0); + // if isa_argument(addr) { + // return load_arg_to_egg( + // addr, + // enode_vec, + // next_idx, + // gep_map, + // store_map, + // id_map, + // symbol_map, + // llvm_arg_pairs, + // node_to_arg, + // ); + // } else if isa_gep(addr) { + // return gep_to_egg( + // expr, // we pass the entire instruction and not just the address + // enode_vec, + // next_idx, + // gep_map, + // store_map, + // id_map, + // symbol_map, + // llvm_arg_pairs, + // node_to_arg, + // ); + // } else { + // return address_to_egg( + // addr, + // enode_vec, + // next_idx, + // gep_map, + // store_map, + // id_map, + // symbol_map, + // llvm_arg_pairs, + // node_to_arg, + // ); + // } } unsafe fn store_to_egg( @@ -1933,6 +1965,8 @@ unsafe fn translate_egg( .get(&(array_name, array_offsets)) .expect("Symbol map lookup error: Cannot Find GEP"); let load_value = if isa_load(*gep_value) { + println!("Load"); + _llvm_print(*gep_value); let mut matched = false; let mut matched_expr = *gep_value; for pair in &*llvm_arg_pairs { @@ -1980,6 +2014,10 @@ unsafe fn translate_egg( // let cloned_sitofp = LLVMInstructionClone(*gep_value); let new_sitofp = llvm_recursive_add(builder, *gep_value, context, llvm_arg_pairs); new_sitofp + } else if isa_argument(*gep_value) { + _llvm_print(*gep_value); + let new_load_value = LLVMBuildLoad(builder, *gep_value, b"\0".as_ptr() as *const _); + new_load_value } else { // includes isa_alloca case let mut matched = false; @@ -2003,6 +2041,11 @@ unsafe fn translate_egg( new_load_value } }; + let llvm_pair = LLVMPair { + original_value: *gep_value, + new_value: load_value, + }; + llvm_arg_pairs.push(llvm_pair); load_value } VecLang::LitVec(boxed_ids) | VecLang::Vec(boxed_ids) | VecLang::List(boxed_ids) => { From ec9e7c5d552cece2b26b91839190fc3531b8735d Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 9 Mar 2022 16:29:45 -0500 Subject: [PATCH 060/143] works for n = 2, fails for n = 3 --- .../fail-tests/qr-decomp-local-arrays.c | 310 ++++++++++++++++++ src/dios-egraphs/Diospyros/src/lib.rs | 3 - 2 files changed, 310 insertions(+), 3 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c b/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c new file mode 100644 index 00000000..0ad00997 --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c @@ -0,0 +1,310 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + float I[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float e[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + float u[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float v[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + float q_min[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + float q_t[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + float res[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 819dd942..e5aa95cf 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -1965,8 +1965,6 @@ unsafe fn translate_egg( .get(&(array_name, array_offsets)) .expect("Symbol map lookup error: Cannot Find GEP"); let load_value = if isa_load(*gep_value) { - println!("Load"); - _llvm_print(*gep_value); let mut matched = false; let mut matched_expr = *gep_value; for pair in &*llvm_arg_pairs { @@ -2015,7 +2013,6 @@ unsafe fn translate_egg( let new_sitofp = llvm_recursive_add(builder, *gep_value, context, llvm_arg_pairs); new_sitofp } else if isa_argument(*gep_value) { - _llvm_print(*gep_value); let new_load_value = LLVMBuildLoad(builder, *gep_value, b"\0".as_ptr() as *const _); new_load_value } else { From b15a0bb7782a63dc96b089be33232a572af4e71b Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 10 Mar 2022 14:14:58 -0500 Subject: [PATCH 061/143] checkpoint before replacing with hashmap for llvm arg pairs for faster accesses --- src/dios-egraphs/Diospyros/diospyros.cpp | 2 +- .../Diospyros/fail-tests/qr-decomp-local-arrays.c | 6 +++--- src/dios-egraphs/Diospyros/fail-tests/test1-local-array.c | 6 +++--- src/dios-egraphs/Diospyros/src/lib.rs | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index 4745574f..2fb9c096 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -76,7 +76,7 @@ const int BINARY_OPERATOR = 2; /** Number of instructions to search back and see if translated - we keep less * to search faster; but actually cutting it short is unsound. */ -const int NUM_TRANSLATED_INSTRUCTIONS = 2000; +const int NUM_TRANSLATED_INSTRUCTIONS = 10000000; /** * Fresh counters for temps and array generation diff --git a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c b/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c index 0ad00997..6ba1473d 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c +++ b/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c @@ -7,7 +7,7 @@ #include #include -#define SIZE 2 +#define SIZE 3 #define MAX_FLOAT 100.00f #define DELTA 0.1f @@ -297,14 +297,14 @@ int main(void) { for (int j = 0; j < SIZE; j++) { printf("Q Output: %f\n", Q[i * SIZE + j]); printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); - assert(fabs(expectedQ[i] - Q[i]) < DELTA); + // assert(fabs(expectedQ[i] - Q[i]) < DELTA); } } for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { printf("R Output: %f\n", R[i * SIZE + j]); printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); - assert(fabs(expectedR[i] - R[i]) < DELTA); + // assert(fabs(expectedR[i] - R[i]) < DELTA); } } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test1-local-array.c b/src/dios-egraphs/Diospyros/fail-tests/test1-local-array.c index cb9ba117..6423eebc 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test1-local-array.c +++ b/src/dios-egraphs/Diospyros/fail-tests/test1-local-array.c @@ -96,8 +96,8 @@ int main(void) { printf("Expected X Output: %f\n", expectedx[i]); printf("E Output: %f\n", e[i]); printf("Expected E Output: %f\n", expectede[i]); - assert(fabs(expectedA[i] - A[i]) < DELTA); - assert(fabs(expectedx[i] - x[i]) < DELTA); - assert(fabs(expectede[i] - e[i]) < DELTA); + // assert(fabs(expectedA[i] - A[i]) < DELTA); + // assert(fabs(expectedx[i] - x[i]) < DELTA); + // assert(fabs(expectede[i] - e[i]) < DELTA); } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index e5aa95cf..3851f351 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -760,7 +760,7 @@ pub fn optimize( } let mut best = expr.clone(); if run_egg { - let pair = rules::run(&expr, 180, true, !run_egg); + let pair = rules::run(&expr, 3, true, !run_egg); best = pair.1; } if print_opt { From b9016f7a398ac4af5cb317bcb9f273c9e8182307 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 10 Mar 2022 15:21:42 -0500 Subject: [PATCH 062/143] 3 by 3 works, with a hash map The hash map makes searches for previous loads faster We can probably remove the limit on the number of elements stored to be unlimited --- src/dios-egraphs/Diospyros/src/lib.rs | 135 ++++++++++++++------------ 1 file changed, 75 insertions(+), 60 deletions(-) diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 3851f351..bce981dc 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -5,7 +5,7 @@ use libc::size_t; use llvm::{core::*, prelude::*, LLVMOpcode::*, LLVMRealPredicate}; use std::{ cmp, - collections::{BTreeMap, BTreeSet}, + collections::{BTreeMap, BTreeSet, HashMap}, ffi::CStr, mem, os::raw::c_char, @@ -50,6 +50,7 @@ extern "C" { // GEPMap : Maps the array name and array offset as symbols to the GEP // LLVM Value Ref that LLVM Generated type GEPMap = BTreeMap<(Symbol, Symbol), LLVMValueRef>; +type LLVMPairMap = HashMap; // VarMap : Maps a symbol to a llvm value ref representing a variable // type VarMap = BTreeMap; // // BopMap : Maps a binary oeprator llvm value ref to an ID, indicating a @@ -742,14 +743,20 @@ pub fn optimize( // llvm to egg let llvm_instrs = from_raw_parts(bb, size); let past_llvm_instrs = from_raw_parts(past_instrs, past_size); - let mut llvm_arg_pairs = Vec::new(); + // let mut llvm_arg_pairs = Vec::new(); + let mut llvm_arg_pairs = HashMap::new(); for instr_pair in past_llvm_instrs { - let new_instr_pair = LLVMPair { - original_value: instr_pair.original_value, - new_value: instr_pair.new_value, - }; - llvm_arg_pairs.push(new_instr_pair); + let original_value = instr_pair.original_value; + let new_value = instr_pair.new_value; + llvm_arg_pairs.insert(original_value, new_value); } + // for instr_pair in past_llvm_instrs { + // let new_instr_pair = LLVMPair { + // original_value: instr_pair.original_value, + // new_value: instr_pair.new_value, + // }; + // llvm_arg_pairs.push(new_instr_pair); + // } let mut node_to_arg = Vec::new(); let (expr, gep_map, store_map, symbol_map) = llvm_to_egg(llvm_instrs, &mut llvm_arg_pairs, &mut node_to_arg); @@ -760,7 +767,7 @@ pub fn optimize( } let mut best = expr.clone(); if run_egg { - let pair = rules::run(&expr, 3, true, !run_egg); + let pair = rules::run(&expr, 180, true, !run_egg); best = pair.1; } if print_opt { @@ -780,8 +787,16 @@ pub fn optimize( builder, ); + // let mut final_llvm_arg_pairs = Vec::new(); + // for pair in llvm_arg_pairs { + // final_llvm_arg_pairs.push(pair); + // } let mut final_llvm_arg_pairs = Vec::new(); - for pair in llvm_arg_pairs { + for (unchanged_val, new_val) in llvm_arg_pairs.iter() { + let pair = LLVMPair { + original_value: *unchanged_val, + new_value: *new_val, + }; final_llvm_arg_pairs.push(pair); } @@ -917,7 +932,7 @@ unsafe fn _llvm_recursive_print(inst: LLVMValueRef) -> () { // builder: LLVMBuilderRef, // inst: LLVMValueRef, // context: LLVMContextRef, -// llvm_arg_pairs: &mut Vec, +// llvm_arg_pairs: &mut LLVMPairMap, // ) -> LLVMValueRef { // if isa_argument(inst) { // let mut indices = Vec::new(); @@ -1003,7 +1018,7 @@ unsafe fn llvm_recursive_add( builder: LLVMBuilderRef, inst: LLVMValueRef, context: LLVMContextRef, - llvm_arg_pairs: &mut Vec, + llvm_arg_pairs: &mut LLVMPairMap, ) -> LLVMValueRef { let cloned_inst = LLVMInstructionClone(inst); if isa_argument(inst) { @@ -1016,12 +1031,12 @@ unsafe fn llvm_recursive_add( } let mut matched = false; let mut ret_value = inst; - for llvm_pair in &*llvm_arg_pairs { - let original_llvm = llvm_pair.original_value; - let new_llvm = llvm_pair.new_value; - if cmp_val_ref_address(&*original_llvm, &*inst) { + for (original_val, new_val) in &*llvm_arg_pairs { + // let original_llvm = llvm_pair.original_value; + // let new_llvm = llvm_pair.new_value; + if cmp_val_ref_address(&**original_val, &*inst) { matched = true; - ret_value = new_llvm; + ret_value = *new_val; break; } } @@ -1038,12 +1053,12 @@ unsafe fn llvm_recursive_add( // search the llvm_arg_pairs for allocas that were already created let mut matched = false; let mut ret_value = inst; - for llvm_pair in &*llvm_arg_pairs { - let original_llvm = llvm_pair.original_value; - let new_llvm = llvm_pair.new_value; - if cmp_val_ref_address(&*original_llvm, &*inst) { + for (original_val, new_val) in &*llvm_arg_pairs { + // let original_llvm = llvm_pair.original_value; + // let new_llvm = llvm_pair.new_value; + if cmp_val_ref_address(&**original_val, &*inst) { matched = true; - ret_value = new_llvm; + ret_value = *new_val; break; } } @@ -1054,7 +1069,7 @@ unsafe fn llvm_recursive_add( new_value: cloned_inst, original_value: inst, }; - llvm_arg_pairs.push(pair); + llvm_arg_pairs.insert(inst, cloned_inst); LLVMInsertIntoBuilder(builder, cloned_inst); return cloned_inst; } @@ -1071,7 +1086,7 @@ unsafe fn llvm_recursive_add( new_value: cloned_inst, original_value: inst, }; - llvm_arg_pairs.push(pair); + llvm_arg_pairs.insert(inst, cloned_inst); return cloned_inst; } @@ -1130,7 +1145,7 @@ unsafe fn arg_to_egg( _store_map: &mut StoreMap, _id_map: &mut IdMap, symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &Vec, + _llvm_arg_pairs: &LLVMPairMap, _node_to_arg: &mut Vec, ) -> (Vec, i32) { let sym_name = gen_arg_name(); @@ -1148,7 +1163,7 @@ unsafe fn bop_to_egg( store_map: &mut StoreMap, id_map: &mut IdMap, symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, + llvm_arg_pairs: &LLVMPairMap, node_to_arg: &mut Vec, ) -> (Vec, i32) { let left = LLVMGetOperand(expr, 0); @@ -1192,7 +1207,7 @@ unsafe fn unop_to_egg( store_map: &mut StoreMap, id_map: &mut IdMap, symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, + llvm_arg_pairs: &LLVMPairMap, node_to_arg: &mut Vec, ) -> (Vec, i32) { let sub_expr = LLVMGetOperand(expr, 0); @@ -1220,7 +1235,7 @@ unsafe fn gep_to_egg( _store_map: &mut StoreMap, _id_map: &mut IdMap, _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &Vec, + _llvm_arg_pairs: &LLVMPairMap, _node_to_arg: &mut Vec, ) -> (Vec, i32) { // assert!(isa_argument(expr) || isa_gep(expr) || isa_load(expr)); @@ -1259,7 +1274,7 @@ unsafe fn address_to_egg( _store_map: &mut StoreMap, _id_map: &mut IdMap, _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &Vec, + _llvm_arg_pairs: &LLVMPairMap, _node_to_arg: &mut Vec, ) -> (Vec, i32) { let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); @@ -1296,7 +1311,7 @@ unsafe fn sitofp_to_egg( _store_map: &mut StoreMap, _id_map: &mut IdMap, _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &Vec, + _llvm_arg_pairs: &LLVMPairMap, _node_to_arg: &mut Vec, ) -> (Vec, i32) { let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); @@ -1333,7 +1348,7 @@ unsafe fn load_to_egg( store_map: &mut StoreMap, id_map: &mut IdMap, symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, + llvm_arg_pairs: &LLVMPairMap, node_to_arg: &mut Vec, ) -> (Vec, i32) { return gep_to_egg( @@ -1395,7 +1410,7 @@ unsafe fn store_to_egg( store_map: &mut StoreMap, id_map: &mut IdMap, symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, + llvm_arg_pairs: &LLVMPairMap, node_to_arg: &mut Vec, ) -> (Vec, i32) { let data = LLVMGetOperand(expr, 0); @@ -1424,7 +1439,7 @@ unsafe fn const_to_egg( _store_map: &mut StoreMap, _id_map: &mut IdMap, _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &Vec, + _llvm_arg_pairs: &LLVMPairMap, _node_to_arg: &mut Vec, ) -> (Vec, i32) { let value = get_constant_float(expr); @@ -1440,7 +1455,7 @@ unsafe fn load_arg_to_egg( _store_map: &mut StoreMap, _id_map: &mut IdMap, _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &Vec, + _llvm_arg_pairs: &LLVMPairMap, _node_to_arg: &mut Vec, ) -> (Vec, i32) { assert!(isa_argument(expr) || isa_gep(expr)); @@ -1478,7 +1493,7 @@ unsafe fn load_call_to_egg( store_map: &mut StoreMap, id_map: &mut IdMap, symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, + llvm_arg_pairs: &LLVMPairMap, node_to_arg: &mut Vec, ) -> (Vec, i32) { if isa_sqrt32(expr) { @@ -1509,7 +1524,7 @@ unsafe fn fpext_to_egg( store_map: &mut StoreMap, id_map: &mut IdMap, symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, + llvm_arg_pairs: &LLVMPairMap, node_to_arg: &mut Vec, ) -> (Vec, i32) { assert!(isa_fpext(expr)); @@ -1535,7 +1550,7 @@ unsafe fn sqrt32_to_egg( store_map: &mut StoreMap, id_map: &mut IdMap, symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, + llvm_arg_pairs: &LLVMPairMap, node_to_arg: &mut Vec, ) -> (Vec, i32) { assert!(isa_sqrt32(expr)); @@ -1564,7 +1579,7 @@ unsafe fn sqrt64_to_egg( _store_map: &mut StoreMap, _id_map: &mut IdMap, _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &Vec, + _llvm_arg_pairs: &LLVMPairMap, _node_to_arg: &mut Vec, ) -> (Vec, i32) { assert!(isa_sqrt64(expr)); @@ -1579,7 +1594,7 @@ unsafe fn fptrunc_to_egg( store_map: &mut StoreMap, id_map: &mut IdMap, symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, + llvm_arg_pairs: &LLVMPairMap, node_to_arg: &mut Vec, ) -> (Vec, i32) { assert!(isa_fptrunc(expr)); @@ -1619,7 +1634,7 @@ unsafe fn bitcast_to_egg( store_map: &mut StoreMap, id_map: &mut IdMap, symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, + llvm_arg_pairs: &LLVMPairMap, node_to_arg: &mut Vec, ) -> (Vec, i32) { assert!(isa_bitcast(expr)); @@ -1646,7 +1661,7 @@ unsafe fn bitcast_to_egg( // _store_map: &mut StoreMap, // _id_map: &mut IdMap, // _symbol_map: &mut SymbolMap, -// _llvm_arg_pairs: &Vec, +// _llvm_arg_pairs: LLVMPairMap, // _node_to_arg: &mut Vec, // ) -> (Vec, i32) { // assert!(isa_phi(expr)); @@ -1675,11 +1690,11 @@ unsafe fn ref_to_egg( store_map: &mut StoreMap, id_map: &mut IdMap, symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, + llvm_arg_pairs: &LLVMPairMap, node_to_arg: &mut Vec, ) -> (Vec, i32) { - for pair in llvm_arg_pairs { - if cmp_val_ref_address(&*pair.original_value, &*expr) { + for (original_val, new_val) in llvm_arg_pairs.iter() { + if cmp_val_ref_address(&**original_val, &*expr) { // Here we create a new numbered variable node let var_idx = gen_node_idx(); let var_idx_str = var_idx.to_string(); @@ -1865,7 +1880,7 @@ unsafe fn ref_to_egg( unsafe fn llvm_to_egg<'a>( bb_vec: &[LLVMValueRef], - llvm_arg_pairs: &Vec, + llvm_arg_pairs: &mut LLVMPairMap, node_to_arg: &mut Vec, ) -> (RecExpr, GEPMap, StoreMap, SymbolMap) { let mut enode_vec = Vec::new(); @@ -1909,7 +1924,7 @@ unsafe fn translate_egg( gep_map: &GEPMap, store_map: &StoreMap, symbol_map: &SymbolMap, - llvm_arg_pairs: &mut Vec, + llvm_arg_pairs: &mut LLVMPairMap, node_to_arg_pair: &Vec, builder: LLVMBuilderRef, context: LLVMContextRef, @@ -1932,12 +1947,12 @@ unsafe fn translate_egg( let node_index = node_arg_pair.node_int; let string_node_index = node_index.to_string(); if string_node_index.parse::().unwrap() == *symbol { - for llvm_pair in &mut *llvm_arg_pairs { - let original_llvm = llvm_pair.original_value; - let new_llvm = llvm_pair.new_value; - if cmp_val_ref_address(&*original_llvm, &*llvm_node) { + for (original_val, new_val) in &mut *llvm_arg_pairs { + // let original_llvm = llvm_pair.original_value; + // let new_llvm = llvm_pair.new_value; + if cmp_val_ref_address(&**original_val, &*llvm_node) { matched = true; - ret_value = new_llvm; + ret_value = *new_val; break; } } @@ -1967,10 +1982,10 @@ unsafe fn translate_egg( let load_value = if isa_load(*gep_value) { let mut matched = false; let mut matched_expr = *gep_value; - for pair in &*llvm_arg_pairs { - if cmp_val_ref_address(&*pair.original_value, &**gep_value) { + for (original_val, new_val) in &*llvm_arg_pairs { + if cmp_val_ref_address(&**original_val, &**gep_value) { matched = true; - matched_expr = pair.new_value; + matched_expr = *new_val; break; } } @@ -1985,7 +2000,7 @@ unsafe fn translate_egg( original_value: *gep_value, new_value: new_load, }; - llvm_arg_pairs.push(llvm_pair); + llvm_arg_pairs.insert(*gep_value, new_load); new_load } } else if isa_gep(*gep_value) { @@ -2019,10 +2034,10 @@ unsafe fn translate_egg( // includes isa_alloca case let mut matched = false; let mut matched_expr = *gep_value; - for pair in &*llvm_arg_pairs { - if cmp_val_ref_address(&*pair.original_value, &**gep_value) { + for (original_val, new_val) in &*llvm_arg_pairs { + if cmp_val_ref_address(&**original_val, &**gep_value) { matched = true; - matched_expr = pair.new_value; + matched_expr = *new_val; break; } } @@ -2034,7 +2049,7 @@ unsafe fn translate_egg( original_value: *gep_value, new_value: new_load_value, }; - llvm_arg_pairs.push(llvm_pair); + llvm_arg_pairs.insert(*gep_value, new_load_value); new_load_value } }; @@ -2042,7 +2057,7 @@ unsafe fn translate_egg( original_value: *gep_value, new_value: load_value, }; - llvm_arg_pairs.push(llvm_pair); + llvm_arg_pairs.insert(*gep_value, load_value); load_value } VecLang::LitVec(boxed_ids) | VecLang::Vec(boxed_ids) | VecLang::List(boxed_ids) => { @@ -2431,7 +2446,7 @@ unsafe fn egg_to_llvm( gep_map: &GEPMap, store_map: &StoreMap, symbol_map: &SymbolMap, - llvm_arg_pairs: &mut Vec, + llvm_arg_pairs: &mut LLVMPairMap, node_to_arg_pair: &Vec, module: LLVMModuleRef, context: LLVMContextRef, From 76180f2d657025ba75301ce3b065c5023fb2ef7a Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 10 Mar 2022 16:48:33 -0500 Subject: [PATCH 063/143] with assertions on, passses random 3's Remove length restriction on vector since we use hash map on rust side --- src/dios-egraphs/Diospyros/diospyros.cpp | 29 +++++++------------ .../fail-tests/qr-decomp-local-arrays.c | 4 +-- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index 2fb9c096..b7f7bc53 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -74,10 +74,6 @@ const string NO_OPT_PREFIX = "no_opt_"; const int SQRT_OPERATOR = 3; const int BINARY_OPERATOR = 2; -/** Number of instructions to search back and see if translated - we keep less - * to search faster; but actually cutting it short is unsound. */ -const int NUM_TRANSLATED_INSTRUCTIONS = 10000000; - /** * Fresh counters for temps and array generation */ @@ -870,15 +866,12 @@ struct DiospyrosPass : public FunctionPass { // Trim down translated_exprs std::vector new_translated_exprs = {}; - if (translated_exprs.size() >= - NUM_TRANSLATED_INSTRUCTIONS) { - for (int i = 0; i < NUM_TRANSLATED_INSTRUCTIONS; i++) { - LLVMPair final_instr = translated_exprs.back(); - translated_exprs.pop_back(); - new_translated_exprs.push_back(final_instr); - } - translated_exprs = new_translated_exprs; + for (int i = 0; i < translated_exprs.size(); i++) { + LLVMPair final_instr = translated_exprs.back(); + translated_exprs.pop_back(); + new_translated_exprs.push_back(final_instr); } + translated_exprs = new_translated_exprs; } } std::reverse(bb_instrs.begin(), bb_instrs.end()); @@ -907,14 +900,12 @@ struct DiospyrosPass : public FunctionPass { // Trim down translated_exprs std::vector new_translated_exprs = {}; - if (translated_exprs.size() >= NUM_TRANSLATED_INSTRUCTIONS) { - for (int i = 0; i < NUM_TRANSLATED_INSTRUCTIONS; i++) { - LLVMPair final_instr = translated_exprs.back(); - translated_exprs.pop_back(); - new_translated_exprs.push_back(final_instr); - } - translated_exprs = new_translated_exprs; + for (int i = 0; i < translated_exprs.size(); i++) { + LLVMPair final_instr = translated_exprs.back(); + translated_exprs.pop_back(); + new_translated_exprs.push_back(final_instr); } + translated_exprs = new_translated_exprs; } return true; }; diff --git a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c b/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c index 6ba1473d..e152c277 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c +++ b/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c @@ -297,14 +297,14 @@ int main(void) { for (int j = 0; j < SIZE; j++) { printf("Q Output: %f\n", Q[i * SIZE + j]); printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); - // assert(fabs(expectedQ[i] - Q[i]) < DELTA); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); } } for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { printf("R Output: %f\n", R[i * SIZE + j]); printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); - // assert(fabs(expectedR[i] - R[i]) < DELTA); + assert(fabs(expectedR[i] - R[i]) < DELTA); } } } \ No newline at end of file From 0b2799539ef471711a074ac88bb82b463e8adecb Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Mon, 14 Mar 2022 16:14:19 -0400 Subject: [PATCH 064/143] fix flaky test Issue with that Rust Hashmaps do not overwrite old keys For one case, load was adding multiple times Need to check the load was not in the map before adding it Add these checks in all places --- src/dios-egraphs/Diospyros/dce.ll | 3403 ++++++++++ src/dios-egraphs/Diospyros/diospyros.cpp | 50 +- .../fail-tests/qr-decomp-local-arrays.c | 7 +- src/dios-egraphs/Diospyros/failed-test/aa.ll | 750 +++ .../Diospyros/failed-test/clang.ll | 2293 +++++++ src/dios-egraphs/Diospyros/failed-test/dce.ll | 4711 ++++++++++++++ .../Diospyros/failed-test/diospyros.ll | 5489 +++++++++++++++++ src/dios-egraphs/Diospyros/failed-test/final | Bin 0 -> 13628 bytes src/dios-egraphs/Diospyros/failed-test/opt.ll | 750 +++ .../Diospyros/flaky-outputs/diff-aa.txt | 0 .../Diospyros/flaky-outputs/diff-clang.txt | 0 .../Diospyros/flaky-outputs/diff-dce.txt | 5416 ++++++++++++++++ .../flaky-outputs/diff-diospyros.txt | 5450 ++++++++++++++++ .../Diospyros/flaky-outputs/diff-final.txt | 1 + .../Diospyros/flaky-outputs/diff-opt.txt | 0 .../Diospyros/flaky-outputs/flaky-aa.ll | 828 +++ .../Diospyros/flaky-outputs/flaky-clang.ll | 2356 +++++++ .../Diospyros/flaky-outputs/flaky-dce.ll | 3482 +++++++++++ .../flaky-outputs/flaky-diospyros.ll | 4260 +++++++++++++ .../Diospyros/flaky-outputs/flaky-final | Bin 0 -> 13676 bytes .../Diospyros/flaky-outputs/flaky-opt.ll | 828 +++ src/dios-egraphs/Diospyros/src/lib.rs | 86 +- 22 files changed, 40126 insertions(+), 34 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/dce.ll create mode 100644 src/dios-egraphs/Diospyros/failed-test/aa.ll create mode 100644 src/dios-egraphs/Diospyros/failed-test/clang.ll create mode 100644 src/dios-egraphs/Diospyros/failed-test/dce.ll create mode 100644 src/dios-egraphs/Diospyros/failed-test/diospyros.ll create mode 100755 src/dios-egraphs/Diospyros/failed-test/final create mode 100644 src/dios-egraphs/Diospyros/failed-test/opt.ll create mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/diff-aa.txt create mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/diff-clang.txt create mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/diff-dce.txt create mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/diff-diospyros.txt create mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/diff-final.txt create mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/diff-opt.txt create mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/flaky-aa.ll create mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/flaky-clang.ll create mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/flaky-dce.ll create mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/flaky-diospyros.ll create mode 100755 src/dios-egraphs/Diospyros/flaky-outputs/flaky-final create mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/flaky-opt.ll diff --git a/src/dios-egraphs/Diospyros/dce.ll b/src/dios-egraphs/Diospyros/dce.ll new file mode 100644 index 00000000..bc275dc2 --- /dev/null +++ b/src/dios-egraphs/Diospyros/dce.ll @@ -0,0 +1,3403 @@ +; ModuleID = 'build/diospyros.ll' +source_filename = "fail-tests/qr-decomp-local-arrays.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1 +@.str.1 = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 +@.str.2 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 +@.str.3 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 +@.str.4 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @sgn(float %0) #0 { + %2 = fcmp ogt float %0, 0.000000e+00 + %3 = zext i1 %2 to i32 + %4 = fcmp olt float %0, 0.000000e+00 + %.neg = sext i1 %4 to i32 + %5 = add nsw i32 %.neg, %3 + %6 = sitofp i32 %5 to float + ret float %6 +} + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_sgn(float %0) #1 { + %2 = fcmp ogt float %0, 0.000000e+00 + %3 = zext i1 %2 to i32 + %4 = fcmp olt float %0, 0.000000e+00 + %.neg = sext i1 %4 to i32 + %5 = add nsw i32 %.neg, %3 + %6 = sitofp i32 %5 to float + ret float %6 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @naive_norm(float* %0, i32 %1) #0 { + %3 = icmp sgt i32 %1, 0 + %smax = select i1 %3, i32 %1, i32 0 + %wide.trip.count = zext i32 %smax to i64 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %2 + %4 = add nsw i64 %wide.trip.count, -1 + %xtraiter = and i64 %wide.trip.count, 3 + %5 = icmp ult i64 %4, 3 + br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count, 2147483644 + br label %6 + +6: ; preds = %6, %.lr.ph.new + %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] + %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 + %8 = load float, float* %7, align 4 + %9 = fmul float %8, %8 + %10 = fadd float %.013, %9 + %indvars.iv.next = or i64 %indvars.iv2, 1 + %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next + %12 = load float, float* %11, align 4 + %13 = fmul float %12, %12 + %14 = fadd float %10, %13 + %indvars.iv.next.1 = or i64 %indvars.iv2, 2 + %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 + %16 = load float, float* %15, align 4 + %17 = fmul float %16, %16 + %18 = fadd float %14, %17 + %indvars.iv.next.2 = or i64 %indvars.iv2, 3 + %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 + %20 = load float, float* %19, align 4 + %21 = fmul float %20, %20 + %22 = fadd float %18, %21 + %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 + %niter.nsub.3 = add i64 %niter, -4 + %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 + br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 + +._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] + %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] + %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] + %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil + %24 = load float, float* %23, align 4 + %25 = fmul float %24, %24 + %26 = fadd float %.013.epil, %25 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !3 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 + %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] + %27 = call float @llvm.sqrt.f32(float %.01.lcssa) + ret float %27 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32(float) #2 + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_naive_norm(float* %0, i32 %1) #1 { + %3 = icmp sgt i32 %1, 0 + %smax = select i1 %3, i32 %1, i32 0 + %wide.trip.count = zext i32 %smax to i64 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %2 + %4 = add nsw i64 %wide.trip.count, -1 + %xtraiter = and i64 %wide.trip.count, 3 + %5 = icmp ult i64 %4, 3 + br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count, 2147483644 + br label %6 + +6: ; preds = %6, %.lr.ph.new + %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] + %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 + %8 = load float, float* %7, align 4 + %9 = fmul float %8, %8 + %10 = fadd float %.013, %9 + %indvars.iv.next = or i64 %indvars.iv2, 1 + %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next + %12 = load float, float* %11, align 4 + %13 = fmul float %12, %12 + %14 = fadd float %10, %13 + %indvars.iv.next.1 = or i64 %indvars.iv2, 2 + %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 + %16 = load float, float* %15, align 4 + %17 = fmul float %16, %16 + %18 = fadd float %14, %17 + %indvars.iv.next.2 = or i64 %indvars.iv2, 3 + %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 + %20 = load float, float* %19, align 4 + %21 = fmul float %20, %20 + %22 = fadd float %18, %21 + %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 + %niter.nsub.3 = add i64 %niter, -4 + %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 + br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 + +._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] + %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] + %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] + %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil + %24 = load float, float* %23, align 4 + %25 = fmul float %24, %24 + %26 = fadd float %.013.epil, %25 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 + %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] + %27 = call float @llvm.sqrt.f32(float %.01.lcssa) + ret float %27 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_transpose(float* %0) #0 { +.lr.ph: + %1 = getelementptr inbounds float, float* %0, i64 1 + %2 = bitcast float* %1 to i32* + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds float, float* %0, i64 2 + %5 = bitcast float* %4 to i32* + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %2, align 4 + store i32 %3, i32* %5, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_transpose(float* %0) #1 { +.lr.ph: + %1 = getelementptr inbounds float, float* %0, i64 1 + %2 = bitcast float* %1 to i32* + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds float, float* %0, i64 2 + %5 = bitcast float* %4 to i32* + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %2, align 4 + store i32 %3, i32* %5, align 4 + ret void +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { +.preheader: + store float 0.000000e+00, float* %2, align 4 + %3 = getelementptr float, float* %0, i32 0 + %4 = load float, float* %3, align 4 + %5 = insertelement <4 x float> zeroinitializer, float %4, i32 0 + %6 = insertelement <4 x float> %5, float 0.000000e+00, i32 1 + %7 = insertelement <4 x float> %6, float 0.000000e+00, i32 2 + %8 = insertelement <4 x float> %7, float 0.000000e+00, i32 3 + %9 = getelementptr float, float* %1, i32 0 + %10 = load float, float* %9, align 4 + %11 = insertelement <4 x float> zeroinitializer, float %10, i32 0 + %12 = insertelement <4 x float> %11, float 0.000000e+00, i32 1 + %13 = insertelement <4 x float> %12, float 0.000000e+00, i32 2 + %14 = insertelement <4 x float> %13, float 0.000000e+00, i32 3 + %15 = call <4 x float> @llvm.fma.v4f32(<4 x float> %8, <4 x float> %14, <4 x float> zeroinitializer) + %16 = extractelement <4 x float> %15, i32 0 + store float %16, float* %2, align 4 + %17 = getelementptr float, float* %0, i32 0 + %18 = load float, float* %17, align 4 + %19 = insertelement <4 x float> zeroinitializer, float %18, i32 0 + %20 = insertelement <4 x float> %19, float 1.000000e+00, i32 1 + %21 = insertelement <4 x float> %20, float 1.000000e+00, i32 2 + %22 = insertelement <4 x float> %21, float 1.000000e+00, i32 3 + %23 = insertelement <4 x float> zeroinitializer, float %10, i32 0 + %24 = insertelement <4 x float> %23, float 0.000000e+00, i32 1 + %25 = insertelement <4 x float> %24, float 0.000000e+00, i32 2 + %26 = insertelement <4 x float> %25, float 0.000000e+00, i32 3 + %27 = fmul <4 x float> %22, %26 + %28 = fadd <4 x float> %27, zeroinitializer + %29 = getelementptr float, float* %0, i32 0 + %30 = getelementptr inbounds float, float* %29, i64 1 + %31 = load float, float* %30, align 4 + %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 + %33 = insertelement <4 x float> %32, float 0.000000e+00, i32 1 + %34 = insertelement <4 x float> %33, float 0.000000e+00, i32 2 + %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 3 + %36 = getelementptr float, float* %1, i32 0 + %37 = getelementptr inbounds float, float* %36, i64 2 + %38 = load float, float* %37, align 4 + %39 = insertelement <4 x float> zeroinitializer, float %38, i32 0 + %40 = insertelement <4 x float> %39, float 0.000000e+00, i32 1 + %41 = insertelement <4 x float> %40, float 0.000000e+00, i32 2 + %42 = insertelement <4 x float> %41, float 0.000000e+00, i32 3 + %43 = call <4 x float> @llvm.fma.v4f32(<4 x float> %35, <4 x float> %42, <4 x float> %28) + %44 = extractelement <4 x float> %43, i32 0 + store float %44, float* %2, align 4 + %45 = extractelement <4 x float> %43, i32 1 + %46 = getelementptr float, float* %2, i32 0 + %47 = getelementptr inbounds float, float* %46, i64 1 + store float %45, float* %47, align 4 + %48 = getelementptr float, float* %0, i32 0 + %49 = load float, float* %48, align 4 + %50 = insertelement <4 x float> zeroinitializer, float %49, i32 0 + %51 = insertelement <4 x float> %50, float 0.000000e+00, i32 1 + %52 = insertelement <4 x float> %51, float 0.000000e+00, i32 2 + %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 3 + %54 = getelementptr float, float* %1, i32 0 + %55 = getelementptr inbounds float, float* %54, i64 1 + %56 = load float, float* %55, align 4 + %57 = insertelement <4 x float> zeroinitializer, float %56, i32 0 + %58 = insertelement <4 x float> %57, float 0.000000e+00, i32 1 + %59 = insertelement <4 x float> %58, float 0.000000e+00, i32 2 + %60 = insertelement <4 x float> %59, float 0.000000e+00, i32 3 + %61 = call <4 x float> @llvm.fma.v4f32(<4 x float> %53, <4 x float> %60, <4 x float> zeroinitializer) + %62 = extractelement <4 x float> %61, i32 0 + store float %62, float* %47, align 4 + %63 = insertelement <4 x float> zeroinitializer, float %49, i32 0 + %64 = insertelement <4 x float> %63, float 1.000000e+00, i32 1 + %65 = insertelement <4 x float> %64, float 1.000000e+00, i32 2 + %66 = insertelement <4 x float> %65, float 1.000000e+00, i32 3 + %67 = insertelement <4 x float> zeroinitializer, float %56, i32 0 + %68 = insertelement <4 x float> %67, float 0.000000e+00, i32 1 + %69 = insertelement <4 x float> %68, float 0.000000e+00, i32 2 + %70 = insertelement <4 x float> %69, float 0.000000e+00, i32 3 + %71 = fmul <4 x float> %66, %70 + %72 = fadd <4 x float> %71, zeroinitializer + %73 = getelementptr float, float* %0, i32 0 + %74 = getelementptr inbounds float, float* %73, i64 1 + %75 = load float, float* %74, align 4 + %76 = insertelement <4 x float> zeroinitializer, float %75, i32 0 + %77 = insertelement <4 x float> %76, float 0.000000e+00, i32 1 + %78 = insertelement <4 x float> %77, float 0.000000e+00, i32 2 + %79 = insertelement <4 x float> %78, float 0.000000e+00, i32 3 + %80 = getelementptr float, float* %1, i32 0 + %81 = getelementptr inbounds float, float* %80, i64 3 + %82 = load float, float* %81, align 4 + %83 = insertelement <4 x float> zeroinitializer, float %82, i32 0 + %84 = insertelement <4 x float> %83, float 0.000000e+00, i32 1 + %85 = insertelement <4 x float> %84, float 0.000000e+00, i32 2 + %86 = insertelement <4 x float> %85, float 0.000000e+00, i32 3 + %87 = call <4 x float> @llvm.fma.v4f32(<4 x float> %79, <4 x float> %86, <4 x float> %72) + %88 = extractelement <4 x float> %87, i32 0 + store float %88, float* %47, align 4 + %89 = extractelement <4 x float> %87, i32 1 + %90 = getelementptr float, float* %2, i32 0 + %91 = getelementptr inbounds float, float* %90, i64 2 + store float %89, float* %91, align 4 + %92 = getelementptr float, float* %0, i32 0 + %93 = getelementptr inbounds float, float* %92, i64 2 + %94 = load float, float* %93, align 4 + %95 = insertelement <4 x float> zeroinitializer, float %94, i32 0 + %96 = insertelement <4 x float> %95, float 0.000000e+00, i32 1 + %97 = insertelement <4 x float> %96, float 0.000000e+00, i32 2 + %98 = insertelement <4 x float> %97, float 0.000000e+00, i32 3 + %99 = getelementptr float, float* %1, i32 0 + %100 = load float, float* %99, align 4 + %101 = insertelement <4 x float> zeroinitializer, float %100, i32 0 + %102 = insertelement <4 x float> %101, float 0.000000e+00, i32 1 + %103 = insertelement <4 x float> %102, float 0.000000e+00, i32 2 + %104 = insertelement <4 x float> %103, float 0.000000e+00, i32 3 + %105 = call <4 x float> @llvm.fma.v4f32(<4 x float> %98, <4 x float> %104, <4 x float> zeroinitializer) + %106 = extractelement <4 x float> %105, i32 0 + store float %106, float* %91, align 4 + %107 = insertelement <4 x float> zeroinitializer, float %94, i32 0 + %108 = insertelement <4 x float> %107, float 1.000000e+00, i32 1 + %109 = insertelement <4 x float> %108, float 1.000000e+00, i32 2 + %110 = insertelement <4 x float> %109, float 1.000000e+00, i32 3 + %111 = insertelement <4 x float> zeroinitializer, float %100, i32 0 + %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 1 + %113 = insertelement <4 x float> %112, float 0.000000e+00, i32 2 + %114 = insertelement <4 x float> %113, float 0.000000e+00, i32 3 + %115 = fmul <4 x float> %110, %114 + %116 = fadd <4 x float> %115, zeroinitializer + %117 = getelementptr float, float* %0, i32 0 + %118 = getelementptr inbounds float, float* %117, i64 3 + %119 = load float, float* %118, align 4 + %120 = insertelement <4 x float> zeroinitializer, float %119, i32 0 + %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 1 + %122 = insertelement <4 x float> %121, float 0.000000e+00, i32 2 + %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3 + %124 = load float, float* %37, align 4 + %125 = insertelement <4 x float> zeroinitializer, float %124, i32 0 + %126 = insertelement <4 x float> %125, float 0.000000e+00, i32 1 + %127 = insertelement <4 x float> %126, float 0.000000e+00, i32 2 + %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 3 + %129 = call <4 x float> @llvm.fma.v4f32(<4 x float> %123, <4 x float> %128, <4 x float> %116) + %130 = extractelement <4 x float> %129, i32 0 + store float %130, float* %91, align 4 + %131 = extractelement <4 x float> %129, i32 1 + %132 = getelementptr float, float* %2, i32 0 + %133 = getelementptr inbounds float, float* %132, i64 3 + store float %131, float* %133, align 4 + %134 = load float, float* %93, align 4 + %135 = insertelement <4 x float> zeroinitializer, float %134, i32 0 + %136 = insertelement <4 x float> %135, float 0.000000e+00, i32 1 + %137 = insertelement <4 x float> %136, float 0.000000e+00, i32 2 + %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 3 + %139 = load float, float* %55, align 4 + %140 = insertelement <4 x float> zeroinitializer, float %139, i32 0 + %141 = insertelement <4 x float> %140, float 0.000000e+00, i32 1 + %142 = insertelement <4 x float> %141, float 0.000000e+00, i32 2 + %143 = insertelement <4 x float> %142, float 0.000000e+00, i32 3 + %144 = call <4 x float> @llvm.fma.v4f32(<4 x float> %138, <4 x float> %143, <4 x float> zeroinitializer) + %145 = extractelement <4 x float> %144, i32 0 + store float %145, float* %133, align 4 + %146 = load float, float* %93, align 4 + %147 = insertelement <4 x float> zeroinitializer, float %146, i32 0 + %148 = insertelement <4 x float> %147, float 1.000000e+00, i32 1 + %149 = insertelement <4 x float> %148, float 1.000000e+00, i32 2 + %150 = insertelement <4 x float> %149, float 1.000000e+00, i32 3 + %151 = insertelement <4 x float> zeroinitializer, float %139, i32 0 + %152 = insertelement <4 x float> %151, float 0.000000e+00, i32 1 + %153 = insertelement <4 x float> %152, float 0.000000e+00, i32 2 + %154 = insertelement <4 x float> %153, float 0.000000e+00, i32 3 + %155 = fmul <4 x float> %150, %154 + %156 = fadd <4 x float> %155, zeroinitializer + %157 = load float, float* %118, align 4 + %158 = insertelement <4 x float> zeroinitializer, float %157, i32 0 + %159 = insertelement <4 x float> %158, float 0.000000e+00, i32 1 + %160 = insertelement <4 x float> %159, float 0.000000e+00, i32 2 + %161 = insertelement <4 x float> %160, float 0.000000e+00, i32 3 + %162 = load float, float* %81, align 4 + %163 = insertelement <4 x float> zeroinitializer, float %162, i32 0 + %164 = insertelement <4 x float> %163, float 0.000000e+00, i32 1 + %165 = insertelement <4 x float> %164, float 0.000000e+00, i32 2 + %166 = insertelement <4 x float> %165, float 0.000000e+00, i32 3 + %167 = call <4 x float> @llvm.fma.v4f32(<4 x float> %161, <4 x float> %166, <4 x float> %156) + %168 = extractelement <4 x float> %167, i32 0 + store float %168, float* %133, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { +.preheader: + store float 0.000000e+00, float* %2, align 4 + %3 = load float, float* %0, align 4 + %4 = load float, float* %1, align 4 + %5 = fmul float %3, %4 + %6 = fadd float %5, 0.000000e+00 + store float %6, float* %2, align 4 + %7 = getelementptr inbounds float, float* %0, i64 1 + %8 = load float, float* %7, align 4 + %9 = getelementptr inbounds float, float* %1, i64 2 + %10 = load float, float* %9, align 4 + %11 = fmul float %8, %10 + %12 = fadd float %6, %11 + store float %12, float* %2, align 4 + %13 = getelementptr inbounds float, float* %2, i64 1 + store float 0.000000e+00, float* %13, align 4 + %14 = load float, float* %0, align 4 + %15 = getelementptr inbounds float, float* %1, i64 1 + %16 = load float, float* %15, align 4 + %17 = fmul float %14, %16 + %18 = fadd float %17, 0.000000e+00 + store float %18, float* %13, align 4 + %19 = load float, float* %7, align 4 + %20 = getelementptr inbounds float, float* %1, i64 3 + %21 = load float, float* %20, align 4 + %22 = fmul float %19, %21 + %23 = fadd float %18, %22 + store float %23, float* %13, align 4 + %24 = getelementptr inbounds float, float* %0, i64 2 + %25 = getelementptr inbounds float, float* %2, i64 2 + store float 0.000000e+00, float* %25, align 4 + %26 = load float, float* %24, align 4 + %27 = load float, float* %1, align 4 + %28 = fmul float %26, %27 + %29 = fadd float %28, 0.000000e+00 + store float %29, float* %25, align 4 + %30 = getelementptr inbounds float, float* %0, i64 3 + %31 = load float, float* %30, align 4 + %32 = load float, float* %9, align 4 + %33 = fmul float %31, %32 + %34 = fadd float %29, %33 + store float %34, float* %25, align 4 + %35 = getelementptr inbounds float, float* %2, i64 3 + store float 0.000000e+00, float* %35, align 4 + %36 = load float, float* %24, align 4 + %37 = load float, float* %15, align 4 + %38 = fmul float %36, %37 + %39 = fadd float %38, 0.000000e+00 + store float %39, float* %35, align 4 + %40 = load float, float* %30, align 4 + %41 = load float, float* %20, align 4 + %42 = fmul float %40, %41 + %43 = fadd float %39, %42 + store float %43, float* %35, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { +.preheader49: + %3 = getelementptr float, float* %0, i32 0 + %4 = bitcast float* %3 to i32* + %5 = load i32, i32* %4, align 4 + %6 = bitcast i32 %5 to float + %7 = insertelement <4 x float> zeroinitializer, float %6, i32 0 + %8 = insertelement <4 x float> %7, float 0.000000e+00, i32 1 + %9 = insertelement <4 x float> %8, float 0.000000e+00, i32 2 + %10 = insertelement <4 x float> %9, float 0.000000e+00, i32 3 + %11 = extractelement <4 x float> %10, i32 0 + %12 = getelementptr float, float* %2, i32 0 + %13 = bitcast float* %12 to i32* + %14 = bitcast i32* %13 to float* + store float %11, float* %14, align 4 + %15 = getelementptr float, float* %0, i32 0 + %16 = getelementptr inbounds float, float* %15, i64 1 + %17 = bitcast float* %16 to i32* + %18 = load i32, i32* %17, align 4 + %19 = bitcast i32 %18 to float + %20 = insertelement <4 x float> zeroinitializer, float %19, i32 0 + %21 = insertelement <4 x float> %20, float 0.000000e+00, i32 1 + %22 = insertelement <4 x float> %21, float 0.000000e+00, i32 2 + %23 = insertelement <4 x float> %22, float 0.000000e+00, i32 3 + %24 = extractelement <4 x float> %23, i32 0 + %25 = getelementptr float, float* %2, i32 0 + %26 = getelementptr inbounds float, float* %25, i64 1 + %27 = bitcast float* %26 to i32* + %28 = bitcast i32* %27 to float* + store float %24, float* %28, align 4 + %29 = getelementptr float, float* %0, i32 0 + %30 = getelementptr inbounds float, float* %29, i64 2 + %31 = bitcast float* %30 to i32* + %32 = load i32, i32* %31, align 4 + %33 = bitcast i32 %32 to float + %34 = insertelement <4 x float> zeroinitializer, float %33, i32 0 + %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 1 + %36 = insertelement <4 x float> %35, float 0.000000e+00, i32 2 + %37 = insertelement <4 x float> %36, float 0.000000e+00, i32 3 + %38 = extractelement <4 x float> %37, i32 0 + %39 = getelementptr float, float* %2, i32 0 + %40 = getelementptr inbounds float, float* %39, i64 2 + %41 = bitcast float* %40 to i32* + %42 = bitcast i32* %41 to float* + store float %38, float* %42, align 4 + %43 = getelementptr float, float* %0, i32 0 + %44 = getelementptr inbounds float, float* %43, i64 3 + %45 = bitcast float* %44 to i32* + %46 = load i32, i32* %45, align 4 + %47 = bitcast i32 %46 to float + %48 = fneg float %47 + %49 = insertelement <4 x float> zeroinitializer, float %48, i32 0 + %50 = bitcast i32 %5 to float ; !! This is referencing an old load: match that should not occur? + %51 = bitcast i32 %5 to float + %52 = fmul float %50, %51 + %53 = fadd float %52, 0.000000e+00 + %54 = load i32, i32* %31, align 4 + %55 = bitcast i32 %54 to float + %56 = bitcast i32 %54 to float + %57 = fmul float %55, %56 + %58 = fadd float %53, %57 + %59 = call float @llvm.sqrt.f32(float %58) + %60 = bitcast i32 %5 to float + %61 = fcmp olt float %60, 0.000000e+00 + %62 = sext i1 %61 to i32 + %63 = fcmp ogt float %60, 0.000000e+00 + %64 = zext i1 %63 to i32 + %65 = add nsw i32 %62, %64 + %66 = sitofp i32 %65 to float + %67 = fneg float %66 + %68 = fmul float %59, %67 + %69 = bitcast i32 %5 to float + %70 = fadd float %69, %68 + %71 = bitcast i32 %5 to float + %72 = bitcast i32 %5 to float + %73 = fmul float %71, %72 + %74 = fadd float %73, 0.000000e+00 + %75 = bitcast i32 %54 to float + %76 = bitcast i32 %54 to float + %77 = fmul float %75, %76 + %78 = fadd float %74, %77 + %79 = call float @llvm.sqrt.f32(float %78) + %80 = fneg float %66 + %81 = fmul float %79, %80 + %82 = bitcast i32 %5 to float + %83 = fadd float %82, %81 + %84 = bitcast i32 %5 to float + %85 = bitcast i32 %5 to float + %86 = fmul float %84, %85 + %87 = fadd float %86, 0.000000e+00 + %88 = bitcast i32 %54 to float + %89 = bitcast i32 %54 to float + %90 = fmul float %88, %89 + %91 = fadd float %87, %90 + %92 = call float @llvm.sqrt.f32(float %91) + %93 = fneg float %66 + %94 = fmul float %92, %93 + %95 = bitcast i32 %5 to float + %96 = fadd float %95, %94 + %97 = fmul float %83, %96 + %98 = fadd float %97, 0.000000e+00 + %99 = bitcast i32 %5 to float + %100 = bitcast i32 %5 to float + %101 = fmul float %99, %100 + %102 = fadd float %101, 0.000000e+00 + %103 = bitcast i32 %54 to float + %104 = bitcast i32 %54 to float + %105 = fmul float %103, %104 + %106 = fadd float %102, %105 + %107 = call float @llvm.sqrt.f32(float %106) + %108 = fneg float %66 + %109 = fmul float %107, %108 + %110 = fmul float %109, 0.000000e+00 + %111 = bitcast i32 %54 to float + %112 = fadd float %111, %110 + %113 = bitcast i32 %5 to float + %114 = bitcast i32 %5 to float + %115 = fmul float %113, %114 + %116 = fadd float %115, 0.000000e+00 + %117 = bitcast i32 %54 to float + %118 = bitcast i32 %54 to float + %119 = fmul float %117, %118 + %120 = fadd float %116, %119 + %121 = call float @llvm.sqrt.f32(float %120) + %122 = fneg float %66 + %123 = fmul float %121, %122 + %124 = fmul float %123, 0.000000e+00 + %125 = bitcast i32 %54 to float + %126 = fadd float %125, %124 + %127 = fmul float %112, %126 + %128 = fadd float %98, %127 + %129 = call float @llvm.sqrt.f32(float %128) + %130 = fadd float %129, 0.000000e+00 + %131 = fdiv float %70, %130 + %132 = fmul float %131, 2.000000e+00 + %133 = bitcast i32 %5 to float + %134 = bitcast i32 %5 to float + %135 = fmul float %133, %134 + %136 = fadd float %135, 0.000000e+00 + %137 = bitcast i32 %54 to float + %138 = bitcast i32 %54 to float + %139 = fmul float %137, %138 + %140 = fadd float %136, %139 + %141 = call float @llvm.sqrt.f32(float %140) + %142 = fneg float %66 + %143 = fmul float %141, %142 + %144 = bitcast i32 %5 to float + %145 = fadd float %144, %143 + %146 = bitcast i32 %5 to float + %147 = bitcast i32 %5 to float + %148 = fmul float %146, %147 + %149 = fadd float %148, 0.000000e+00 + %150 = bitcast i32 %54 to float + %151 = bitcast i32 %54 to float + %152 = fmul float %150, %151 + %153 = fadd float %149, %152 + %154 = call float @llvm.sqrt.f32(float %153) + %155 = fneg float %66 + %156 = fmul float %154, %155 + %157 = bitcast i32 %5 to float + %158 = fadd float %157, %156 + %159 = bitcast i32 %5 to float + %160 = bitcast i32 %5 to float + %161 = fmul float %159, %160 + %162 = fadd float %161, 0.000000e+00 + %163 = bitcast i32 %54 to float + %164 = bitcast i32 %54 to float + %165 = fmul float %163, %164 + %166 = fadd float %162, %165 + %167 = call float @llvm.sqrt.f32(float %166) + %168 = fneg float %66 + %169 = fmul float %167, %168 + %170 = bitcast i32 %5 to float + %171 = fadd float %170, %169 + %172 = fmul float %158, %171 + %173 = fadd float %172, 0.000000e+00 + %174 = bitcast i32 %5 to float + %175 = bitcast i32 %5 to float + %176 = fmul float %174, %175 + %177 = fadd float %176, 0.000000e+00 + %178 = bitcast i32 %54 to float + %179 = bitcast i32 %54 to float + %180 = fmul float %178, %179 + %181 = fadd float %177, %180 + %182 = call float @llvm.sqrt.f32(float %181) + %183 = fneg float %66 + %184 = fmul float %182, %183 + %185 = fmul float %184, 0.000000e+00 + %186 = bitcast i32 %54 to float + %187 = fadd float %186, %185 + %188 = bitcast i32 %5 to float + %189 = bitcast i32 %5 to float + %190 = fmul float %188, %189 + %191 = fadd float %190, 0.000000e+00 + %192 = bitcast i32 %54 to float + %193 = bitcast i32 %54 to float + %194 = fmul float %192, %193 + %195 = fadd float %191, %194 + %196 = call float @llvm.sqrt.f32(float %195) + %197 = fneg float %66 + %198 = fmul float %196, %197 + %199 = fmul float %198, 0.000000e+00 + %200 = bitcast i32 %54 to float + %201 = fadd float %200, %199 + %202 = fmul float %187, %201 + %203 = fadd float %173, %202 + %204 = call float @llvm.sqrt.f32(float %203) + %205 = fadd float %204, 0.000000e+00 + %206 = fdiv float %145, %205 + %207 = fmul float %132, %206 + %208 = insertelement <4 x float> %49, float %207, i32 1 + %209 = bitcast i32 %5 to float + %210 = bitcast i32 %5 to float + %211 = fmul float %209, %210 + %212 = fadd float %211, 0.000000e+00 + %213 = bitcast i32 %54 to float + %214 = bitcast i32 %54 to float + %215 = fmul float %213, %214 + %216 = fadd float %212, %215 + %217 = call float @llvm.sqrt.f32(float %216) + %218 = fneg float %66 + %219 = fmul float %217, %218 + %220 = bitcast i32 %5 to float + %221 = fadd float %220, %219 + %222 = bitcast i32 %5 to float + %223 = bitcast i32 %5 to float + %224 = fmul float %222, %223 + %225 = fadd float %224, 0.000000e+00 + %226 = bitcast i32 %54 to float + %227 = bitcast i32 %54 to float + %228 = fmul float %226, %227 + %229 = fadd float %225, %228 + %230 = call float @llvm.sqrt.f32(float %229) + %231 = fneg float %66 + %232 = fmul float %230, %231 + %233 = bitcast i32 %5 to float + %234 = fadd float %233, %232 + %235 = bitcast i32 %5 to float + %236 = bitcast i32 %5 to float + %237 = fmul float %235, %236 + %238 = fadd float %237, 0.000000e+00 + %239 = bitcast i32 %54 to float + %240 = bitcast i32 %54 to float + %241 = fmul float %239, %240 + %242 = fadd float %238, %241 + %243 = call float @llvm.sqrt.f32(float %242) + %244 = fneg float %66 + %245 = fmul float %243, %244 + %246 = bitcast i32 %5 to float + %247 = fadd float %246, %245 + %248 = fmul float %234, %247 + %249 = fadd float %248, 0.000000e+00 + %250 = bitcast i32 %5 to float + %251 = bitcast i32 %5 to float + %252 = fmul float %250, %251 + %253 = fadd float %252, 0.000000e+00 + %254 = bitcast i32 %54 to float + %255 = bitcast i32 %54 to float + %256 = fmul float %254, %255 + %257 = fadd float %253, %256 + %258 = call float @llvm.sqrt.f32(float %257) + %259 = fneg float %66 + %260 = fmul float %258, %259 + %261 = fmul float %260, 0.000000e+00 + %262 = bitcast i32 %54 to float + %263 = fadd float %262, %261 + %264 = bitcast i32 %5 to float + %265 = bitcast i32 %5 to float + %266 = fmul float %264, %265 + %267 = fadd float %266, 0.000000e+00 + %268 = bitcast i32 %54 to float + %269 = bitcast i32 %54 to float + %270 = fmul float %268, %269 + %271 = fadd float %267, %270 + %272 = call float @llvm.sqrt.f32(float %271) + %273 = fneg float %66 + %274 = fmul float %272, %273 + %275 = fmul float %274, 0.000000e+00 + %276 = bitcast i32 %54 to float + %277 = fadd float %276, %275 + %278 = fmul float %263, %277 + %279 = fadd float %249, %278 + %280 = call float @llvm.sqrt.f32(float %279) + %281 = fadd float %280, 0.000000e+00 + %282 = fdiv float %221, %281 + %283 = fmul float %282, 2.000000e+00 + %284 = bitcast i32 %5 to float + %285 = bitcast i32 %5 to float + %286 = fmul float %284, %285 + %287 = fadd float %286, 0.000000e+00 + %288 = bitcast i32 %54 to float + %289 = bitcast i32 %54 to float + %290 = fmul float %288, %289 + %291 = fadd float %287, %290 + %292 = call float @llvm.sqrt.f32(float %291) + %293 = fneg float %66 + %294 = fmul float %292, %293 + %295 = fmul float %294, 0.000000e+00 + %296 = bitcast i32 %54 to float + %297 = fadd float %296, %295 + %298 = bitcast i32 %5 to float + %299 = bitcast i32 %5 to float + %300 = fmul float %298, %299 + %301 = fadd float %300, 0.000000e+00 + %302 = bitcast i32 %54 to float + %303 = bitcast i32 %54 to float + %304 = fmul float %302, %303 + %305 = fadd float %301, %304 + %306 = call float @llvm.sqrt.f32(float %305) + %307 = fneg float %66 + %308 = fmul float %306, %307 + %309 = bitcast i32 %5 to float + %310 = fadd float %309, %308 + %311 = bitcast i32 %5 to float + %312 = bitcast i32 %5 to float + %313 = fmul float %311, %312 + %314 = fadd float %313, 0.000000e+00 + %315 = bitcast i32 %54 to float + %316 = bitcast i32 %54 to float + %317 = fmul float %315, %316 + %318 = fadd float %314, %317 + %319 = call float @llvm.sqrt.f32(float %318) + %320 = fneg float %66 + %321 = fmul float %319, %320 + %322 = bitcast i32 %5 to float + %323 = fadd float %322, %321 + %324 = fmul float %310, %323 + %325 = fadd float %324, 0.000000e+00 + %326 = bitcast i32 %5 to float + %327 = bitcast i32 %5 to float + %328 = fmul float %326, %327 + %329 = fadd float %328, 0.000000e+00 + %330 = bitcast i32 %54 to float + %331 = bitcast i32 %54 to float + %332 = fmul float %330, %331 + %333 = fadd float %329, %332 + %334 = call float @llvm.sqrt.f32(float %333) + %335 = fneg float %66 + %336 = fmul float %334, %335 + %337 = fmul float %336, 0.000000e+00 + %338 = bitcast i32 %54 to float + %339 = fadd float %338, %337 + %340 = bitcast i32 %5 to float + %341 = bitcast i32 %5 to float + %342 = fmul float %340, %341 + %343 = fadd float %342, 0.000000e+00 + %344 = bitcast i32 %54 to float + %345 = bitcast i32 %54 to float + %346 = fmul float %344, %345 + %347 = fadd float %343, %346 + %348 = call float @llvm.sqrt.f32(float %347) + %349 = fneg float %66 + %350 = fmul float %348, %349 + %351 = fmul float %350, 0.000000e+00 + %352 = bitcast i32 %54 to float + %353 = fadd float %352, %351 + %354 = fmul float %339, %353 + %355 = fadd float %325, %354 + %356 = call float @llvm.sqrt.f32(float %355) + %357 = fadd float %356, 0.000000e+00 + %358 = fdiv float %297, %357 + %359 = fmul float %283, %358 + %360 = insertelement <4 x float> %208, float %359, i32 2 + %361 = bitcast i32 %5 to float + %362 = bitcast i32 %5 to float + %363 = fmul float %361, %362 + %364 = fadd float %363, 0.000000e+00 + %365 = bitcast i32 %54 to float + %366 = bitcast i32 %54 to float + %367 = fmul float %365, %366 + %368 = fadd float %364, %367 + %369 = call float @llvm.sqrt.f32(float %368) + %370 = fneg float %66 + %371 = fmul float %369, %370 + %372 = fmul float %371, 0.000000e+00 + %373 = bitcast i32 %54 to float + %374 = fadd float %373, %372 + %375 = bitcast i32 %5 to float + %376 = bitcast i32 %5 to float + %377 = fmul float %375, %376 + %378 = fadd float %377, 0.000000e+00 + %379 = bitcast i32 %54 to float + %380 = bitcast i32 %54 to float + %381 = fmul float %379, %380 + %382 = fadd float %378, %381 + %383 = call float @llvm.sqrt.f32(float %382) + %384 = fneg float %66 + %385 = fmul float %383, %384 + %386 = bitcast i32 %5 to float + %387 = fadd float %386, %385 + %388 = bitcast i32 %5 to float + %389 = bitcast i32 %5 to float + %390 = fmul float %388, %389 + %391 = fadd float %390, 0.000000e+00 + %392 = bitcast i32 %54 to float + %393 = bitcast i32 %54 to float + %394 = fmul float %392, %393 + %395 = fadd float %391, %394 + %396 = call float @llvm.sqrt.f32(float %395) + %397 = fneg float %66 + %398 = fmul float %396, %397 + %399 = bitcast i32 %5 to float + %400 = fadd float %399, %398 + %401 = fmul float %387, %400 + %402 = fadd float %401, 0.000000e+00 + %403 = bitcast i32 %5 to float + %404 = bitcast i32 %5 to float + %405 = fmul float %403, %404 + %406 = fadd float %405, 0.000000e+00 + %407 = bitcast i32 %54 to float + %408 = bitcast i32 %54 to float + %409 = fmul float %407, %408 + %410 = fadd float %406, %409 + %411 = call float @llvm.sqrt.f32(float %410) + %412 = fneg float %66 + %413 = fmul float %411, %412 + %414 = fmul float %413, 0.000000e+00 + %415 = bitcast i32 %54 to float + %416 = fadd float %415, %414 + %417 = bitcast i32 %5 to float + %418 = bitcast i32 %5 to float + %419 = fmul float %417, %418 + %420 = fadd float %419, 0.000000e+00 + %421 = bitcast i32 %54 to float + %422 = bitcast i32 %54 to float + %423 = fmul float %421, %422 + %424 = fadd float %420, %423 + %425 = call float @llvm.sqrt.f32(float %424) + %426 = fneg float %66 + %427 = fmul float %425, %426 + %428 = fmul float %427, 0.000000e+00 + %429 = bitcast i32 %54 to float + %430 = fadd float %429, %428 + %431 = fmul float %416, %430 + %432 = fadd float %402, %431 + %433 = call float @llvm.sqrt.f32(float %432) + %434 = fadd float %433, 0.000000e+00 + %435 = fdiv float %374, %434 + %436 = fmul float %435, 2.000000e+00 + %437 = bitcast i32 %5 to float + %438 = bitcast i32 %5 to float + %439 = fmul float %437, %438 + %440 = fadd float %439, 0.000000e+00 + %441 = bitcast i32 %54 to float + %442 = bitcast i32 %54 to float + %443 = fmul float %441, %442 + %444 = fadd float %440, %443 + %445 = call float @llvm.sqrt.f32(float %444) + %446 = fneg float %66 + %447 = fmul float %445, %446 + %448 = bitcast i32 %5 to float + %449 = fadd float %448, %447 + %450 = bitcast i32 %5 to float + %451 = bitcast i32 %5 to float + %452 = fmul float %450, %451 + %453 = fadd float %452, 0.000000e+00 + %454 = bitcast i32 %54 to float + %455 = bitcast i32 %54 to float + %456 = fmul float %454, %455 + %457 = fadd float %453, %456 + %458 = call float @llvm.sqrt.f32(float %457) + %459 = fneg float %66 + %460 = fmul float %458, %459 + %461 = bitcast i32 %5 to float + %462 = fadd float %461, %460 + %463 = bitcast i32 %5 to float + %464 = bitcast i32 %5 to float + %465 = fmul float %463, %464 + %466 = fadd float %465, 0.000000e+00 + %467 = bitcast i32 %54 to float + %468 = bitcast i32 %54 to float + %469 = fmul float %467, %468 + %470 = fadd float %466, %469 + %471 = call float @llvm.sqrt.f32(float %470) + %472 = fneg float %66 + %473 = fmul float %471, %472 + %474 = bitcast i32 %5 to float + %475 = fadd float %474, %473 + %476 = fmul float %462, %475 + %477 = fadd float %476, 0.000000e+00 + %478 = bitcast i32 %5 to float + %479 = bitcast i32 %5 to float + %480 = fmul float %478, %479 + %481 = fadd float %480, 0.000000e+00 + %482 = bitcast i32 %54 to float + %483 = bitcast i32 %54 to float + %484 = fmul float %482, %483 + %485 = fadd float %481, %484 + %486 = call float @llvm.sqrt.f32(float %485) + %487 = fneg float %66 + %488 = fmul float %486, %487 + %489 = fmul float %488, 0.000000e+00 + %490 = bitcast i32 %54 to float + %491 = fadd float %490, %489 + %492 = bitcast i32 %5 to float + %493 = bitcast i32 %5 to float + %494 = fmul float %492, %493 + %495 = fadd float %494, 0.000000e+00 + %496 = bitcast i32 %54 to float + %497 = bitcast i32 %54 to float + %498 = fmul float %496, %497 + %499 = fadd float %495, %498 + %500 = call float @llvm.sqrt.f32(float %499) + %501 = fneg float %66 + %502 = fmul float %500, %501 + %503 = fmul float %502, 0.000000e+00 + %504 = bitcast i32 %54 to float + %505 = fadd float %504, %503 + %506 = fmul float %491, %505 + %507 = fadd float %477, %506 + %508 = call float @llvm.sqrt.f32(float %507) + %509 = fadd float %508, 0.000000e+00 + %510 = fdiv float %449, %509 + %511 = fmul float %436, %510 + %512 = insertelement <4 x float> %360, float %511, i32 3 + %513 = fsub <4 x float> , %512 + %514 = bitcast i32 %5 to float + %515 = bitcast i32 %5 to float + %516 = fmul float %514, %515 + %517 = fadd float %516, 0.000000e+00 + %518 = bitcast i32 %54 to float + %519 = bitcast i32 %54 to float + %520 = fmul float %518, %519 + %521 = fadd float %517, %520 + %522 = call float @llvm.sqrt.f32(float %521) + %523 = fneg float %66 + %524 = fmul float %522, %523 + %525 = fmul float %524, 0.000000e+00 + %526 = bitcast i32 %54 to float + %527 = fadd float %526, %525 + %528 = bitcast i32 %5 to float + %529 = bitcast i32 %5 to float + %530 = fmul float %528, %529 + %531 = fadd float %530, 0.000000e+00 + %532 = bitcast i32 %54 to float + %533 = bitcast i32 %54 to float + %534 = fmul float %532, %533 + %535 = fadd float %531, %534 + %536 = call float @llvm.sqrt.f32(float %535) + %537 = fneg float %66 + %538 = fmul float %536, %537 + %539 = bitcast i32 %5 to float + %540 = fadd float %539, %538 + %541 = bitcast i32 %5 to float + %542 = bitcast i32 %5 to float + %543 = fmul float %541, %542 + %544 = fadd float %543, 0.000000e+00 + %545 = bitcast i32 %54 to float + %546 = bitcast i32 %54 to float + %547 = fmul float %545, %546 + %548 = fadd float %544, %547 + %549 = call float @llvm.sqrt.f32(float %548) + %550 = fneg float %66 + %551 = fmul float %549, %550 + %552 = bitcast i32 %5 to float + %553 = fadd float %552, %551 + %554 = fmul float %540, %553 + %555 = fadd float %554, 0.000000e+00 + %556 = bitcast i32 %5 to float + %557 = bitcast i32 %5 to float + %558 = fmul float %556, %557 + %559 = fadd float %558, 0.000000e+00 + %560 = bitcast i32 %54 to float + %561 = bitcast i32 %54 to float + %562 = fmul float %560, %561 + %563 = fadd float %559, %562 + %564 = call float @llvm.sqrt.f32(float %563) + %565 = fneg float %66 + %566 = fmul float %564, %565 + %567 = fmul float %566, 0.000000e+00 + %568 = bitcast i32 %54 to float + %569 = fadd float %568, %567 + %570 = bitcast i32 %5 to float + %571 = bitcast i32 %5 to float + %572 = fmul float %570, %571 + %573 = fadd float %572, 0.000000e+00 + %574 = bitcast i32 %54 to float + %575 = bitcast i32 %54 to float + %576 = fmul float %574, %575 + %577 = fadd float %573, %576 + %578 = call float @llvm.sqrt.f32(float %577) + %579 = fneg float %66 + %580 = fmul float %578, %579 + %581 = fmul float %580, 0.000000e+00 + %582 = bitcast i32 %54 to float + %583 = fadd float %582, %581 + %584 = fmul float %569, %583 + %585 = fadd float %555, %584 + %586 = call float @llvm.sqrt.f32(float %585) + %587 = fadd float %586, 0.000000e+00 + %588 = fdiv float %527, %587 + %589 = fmul float %588, 2.000000e+00 + %590 = bitcast i32 %5 to float + %591 = bitcast i32 %5 to float + %592 = fmul float %590, %591 + %593 = fadd float %592, 0.000000e+00 + %594 = bitcast i32 %54 to float + %595 = bitcast i32 %54 to float + %596 = fmul float %594, %595 + %597 = fadd float %593, %596 + %598 = call float @llvm.sqrt.f32(float %597) + %599 = fneg float %66 + %600 = fmul float %598, %599 + %601 = fmul float %600, 0.000000e+00 + %602 = bitcast i32 %54 to float + %603 = fadd float %602, %601 + %604 = bitcast i32 %5 to float + %605 = bitcast i32 %5 to float + %606 = fmul float %604, %605 + %607 = fadd float %606, 0.000000e+00 + %608 = bitcast i32 %54 to float + %609 = bitcast i32 %54 to float + %610 = fmul float %608, %609 + %611 = fadd float %607, %610 + %612 = call float @llvm.sqrt.f32(float %611) + %613 = fneg float %66 + %614 = fmul float %612, %613 + %615 = bitcast i32 %5 to float + %616 = fadd float %615, %614 + %617 = bitcast i32 %5 to float + %618 = bitcast i32 %5 to float + %619 = fmul float %617, %618 + %620 = fadd float %619, 0.000000e+00 + %621 = bitcast i32 %54 to float + %622 = bitcast i32 %54 to float + %623 = fmul float %621, %622 + %624 = fadd float %620, %623 + %625 = call float @llvm.sqrt.f32(float %624) + %626 = fneg float %66 + %627 = fmul float %625, %626 + %628 = bitcast i32 %5 to float + %629 = fadd float %628, %627 + %630 = fmul float %616, %629 + %631 = fadd float %630, 0.000000e+00 + %632 = bitcast i32 %5 to float + %633 = bitcast i32 %5 to float + %634 = fmul float %632, %633 + %635 = fadd float %634, 0.000000e+00 + %636 = bitcast i32 %54 to float + %637 = bitcast i32 %54 to float + %638 = fmul float %636, %637 + %639 = fadd float %635, %638 + %640 = call float @llvm.sqrt.f32(float %639) + %641 = fneg float %66 + %642 = fmul float %640, %641 + %643 = fmul float %642, 0.000000e+00 + %644 = bitcast i32 %54 to float + %645 = fadd float %644, %643 + %646 = bitcast i32 %5 to float + %647 = bitcast i32 %5 to float + %648 = fmul float %646, %647 + %649 = fadd float %648, 0.000000e+00 + %650 = bitcast i32 %54 to float + %651 = bitcast i32 %54 to float + %652 = fmul float %650, %651 + %653 = fadd float %649, %652 + %654 = call float @llvm.sqrt.f32(float %653) + %655 = fneg float %66 + %656 = fmul float %654, %655 + %657 = fmul float %656, 0.000000e+00 + %658 = bitcast i32 %54 to float + %659 = fadd float %658, %657 + %660 = fmul float %645, %659 + %661 = fadd float %631, %660 + %662 = call float @llvm.sqrt.f32(float %661) + %663 = fadd float %662, 0.000000e+00 + %664 = fdiv float %603, %663 + %665 = fmul float %589, %664 + %666 = fsub float 1.000000e+00, %665 + %667 = insertelement <4 x float> zeroinitializer, float %666, i32 0 + %668 = insertelement <4 x float> %667, float 0.000000e+00, i32 1 + %669 = insertelement <4 x float> %668, float 0.000000e+00, i32 2 + %670 = insertelement <4 x float> %669, float 0.000000e+00, i32 3 + %671 = shufflevector <4 x float> %513, <4 x float> %670, <8 x i32> + %672 = extractelement <8 x float> %671, i32 0 + %673 = getelementptr float, float* %2, i32 0 + %674 = getelementptr inbounds float, float* %673, i64 3 + %675 = bitcast float* %674 to i32* + %676 = bitcast i32* %675 to float* + store float %672, float* %676, align 4 + %677 = bitcast float* %1 to i8* + %678 = alloca [4 x float], align 16 + %679 = bitcast [4 x float]* %678 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %677, i8* nonnull align 16 dereferenceable(16) %679, i64 16, i1 false) + store float 0.000000e+00, float* %2, align 4 + %680 = bitcast i32 %5 to float + %681 = bitcast i32 %5 to float + %682 = fmul float %680, %681 + %683 = fadd float %682, 0.000000e+00 + %684 = load i32, i32* %31, align 4 + %685 = bitcast i32 %684 to float + %686 = bitcast i32 %684 to float + %687 = fmul float %685, %686 + %688 = fadd float %683, %687 + %689 = call float @llvm.sqrt.f32(float %688) + %690 = sext i1 %61 to i32 + %691 = zext i1 %63 to i32 + %692 = add nsw i32 %690, %691 + %693 = sitofp i32 %692 to float + %694 = fneg float %693 + %695 = fmul float %689, %694 + %696 = bitcast i32 %5 to float + %697 = fadd float %696, %695 + %698 = bitcast i32 %5 to float + %699 = bitcast i32 %5 to float + %700 = fmul float %698, %699 + %701 = fadd float %700, 0.000000e+00 + %702 = bitcast i32 %684 to float + %703 = bitcast i32 %684 to float + %704 = fmul float %702, %703 + %705 = fadd float %701, %704 + %706 = call float @llvm.sqrt.f32(float %705) + %707 = fneg float %693 + %708 = fmul float %706, %707 + %709 = bitcast i32 %5 to float + %710 = fadd float %709, %708 + %711 = bitcast i32 %5 to float + %712 = bitcast i32 %5 to float + %713 = fmul float %711, %712 + %714 = fadd float %713, 0.000000e+00 + %715 = bitcast i32 %684 to float + %716 = bitcast i32 %684 to float + %717 = fmul float %715, %716 + %718 = fadd float %714, %717 + %719 = call float @llvm.sqrt.f32(float %718) + %720 = fneg float %693 + %721 = fmul float %719, %720 + %722 = bitcast i32 %5 to float + %723 = fadd float %722, %721 + %724 = fmul float %710, %723 + %725 = fadd float %724, 0.000000e+00 + %726 = bitcast i32 %5 to float + %727 = bitcast i32 %5 to float + %728 = fmul float %726, %727 + %729 = fadd float %728, 0.000000e+00 + %730 = bitcast i32 %684 to float + %731 = bitcast i32 %684 to float + %732 = fmul float %730, %731 + %733 = fadd float %729, %732 + %734 = call float @llvm.sqrt.f32(float %733) + %735 = fneg float %693 + %736 = fmul float %734, %735 + %737 = fmul float %736, 0.000000e+00 + %738 = bitcast i32 %684 to float + %739 = fadd float %738, %737 + %740 = bitcast i32 %5 to float + %741 = bitcast i32 %5 to float + %742 = fmul float %740, %741 + %743 = fadd float %742, 0.000000e+00 + %744 = bitcast i32 %684 to float + %745 = bitcast i32 %684 to float + %746 = fmul float %744, %745 + %747 = fadd float %743, %746 + %748 = call float @llvm.sqrt.f32(float %747) + %749 = fneg float %693 + %750 = fmul float %748, %749 + %751 = fmul float %750, 0.000000e+00 + %752 = bitcast i32 %684 to float + %753 = fadd float %752, %751 + %754 = fmul float %739, %753 + %755 = fadd float %725, %754 + %756 = call float @llvm.sqrt.f32(float %755) + %757 = fadd float %756, 0.000000e+00 + %758 = fdiv float %697, %757 + %759 = fmul float %758, 2.000000e+00 + %760 = bitcast i32 %5 to float + %761 = bitcast i32 %5 to float + %762 = fmul float %760, %761 + %763 = fadd float %762, 0.000000e+00 + %764 = bitcast i32 %684 to float + %765 = bitcast i32 %684 to float + %766 = fmul float %764, %765 + %767 = fadd float %763, %766 + %768 = call float @llvm.sqrt.f32(float %767) + %769 = fneg float %693 + %770 = fmul float %768, %769 + %771 = bitcast i32 %5 to float + %772 = fadd float %771, %770 + %773 = bitcast i32 %5 to float + %774 = bitcast i32 %5 to float + %775 = fmul float %773, %774 + %776 = fadd float %775, 0.000000e+00 + %777 = bitcast i32 %684 to float + %778 = bitcast i32 %684 to float + %779 = fmul float %777, %778 + %780 = fadd float %776, %779 + %781 = call float @llvm.sqrt.f32(float %780) + %782 = fneg float %693 + %783 = fmul float %781, %782 + %784 = bitcast i32 %5 to float + %785 = fadd float %784, %783 + %786 = bitcast i32 %5 to float + %787 = bitcast i32 %5 to float + %788 = fmul float %786, %787 + %789 = fadd float %788, 0.000000e+00 + %790 = bitcast i32 %684 to float + %791 = bitcast i32 %684 to float + %792 = fmul float %790, %791 + %793 = fadd float %789, %792 + %794 = call float @llvm.sqrt.f32(float %793) + %795 = fneg float %693 + %796 = fmul float %794, %795 + %797 = bitcast i32 %5 to float + %798 = fadd float %797, %796 + %799 = fmul float %785, %798 + %800 = fadd float %799, 0.000000e+00 + %801 = bitcast i32 %5 to float + %802 = bitcast i32 %5 to float + %803 = fmul float %801, %802 + %804 = fadd float %803, 0.000000e+00 + %805 = bitcast i32 %684 to float + %806 = bitcast i32 %684 to float + %807 = fmul float %805, %806 + %808 = fadd float %804, %807 + %809 = call float @llvm.sqrt.f32(float %808) + %810 = fneg float %693 + %811 = fmul float %809, %810 + %812 = fmul float %811, 0.000000e+00 + %813 = bitcast i32 %684 to float + %814 = fadd float %813, %812 + %815 = bitcast i32 %5 to float + %816 = bitcast i32 %5 to float + %817 = fmul float %815, %816 + %818 = fadd float %817, 0.000000e+00 + %819 = bitcast i32 %684 to float + %820 = bitcast i32 %684 to float + %821 = fmul float %819, %820 + %822 = fadd float %818, %821 + %823 = call float @llvm.sqrt.f32(float %822) + %824 = fneg float %693 + %825 = fmul float %823, %824 + %826 = fmul float %825, 0.000000e+00 + %827 = bitcast i32 %684 to float + %828 = fadd float %827, %826 + %829 = fmul float %814, %828 + %830 = fadd float %800, %829 + %831 = call float @llvm.sqrt.f32(float %830) + %832 = fadd float %831, 0.000000e+00 + %833 = fdiv float %772, %832 + %834 = fmul float %759, %833 + %835 = fsub float 1.000000e+00, %834 + %836 = insertelement <4 x float> zeroinitializer, float %835, i32 0 + %837 = insertelement <4 x float> %836, float 0.000000e+00, i32 1 + %838 = insertelement <4 x float> %837, float 0.000000e+00, i32 2 + %839 = insertelement <4 x float> %838, float 0.000000e+00, i32 3 + %840 = getelementptr float, float* %0, i32 0 + %841 = load float, float* %840, align 4 + %842 = insertelement <4 x float> zeroinitializer, float %841, i32 0 + %843 = insertelement <4 x float> %842, float 0.000000e+00, i32 1 + %844 = insertelement <4 x float> %843, float 0.000000e+00, i32 2 + %845 = insertelement <4 x float> %844, float 0.000000e+00, i32 3 + %846 = call <4 x float> @llvm.fma.v4f32(<4 x float> %839, <4 x float> %845, <4 x float> zeroinitializer) + %847 = extractelement <4 x float> %846, i32 0 + store float %847, float* %2, align 4 + %848 = bitcast i32 %5 to float + %849 = bitcast i32 %5 to float + %850 = fmul float %848, %849 + %851 = fadd float %850, 0.000000e+00 + %852 = bitcast i32 %684 to float + %853 = bitcast i32 %684 to float + %854 = fmul float %852, %853 + %855 = fadd float %851, %854 + %856 = call float @llvm.sqrt.f32(float %855) + %857 = fneg float %693 + %858 = fmul float %856, %857 + %859 = bitcast i32 %5 to float + %860 = fadd float %859, %858 + %861 = bitcast i32 %5 to float + %862 = bitcast i32 %5 to float + %863 = fmul float %861, %862 + %864 = fadd float %863, 0.000000e+00 + %865 = bitcast i32 %684 to float + %866 = bitcast i32 %684 to float + %867 = fmul float %865, %866 + %868 = fadd float %864, %867 + %869 = call float @llvm.sqrt.f32(float %868) + %870 = fneg float %693 + %871 = fmul float %869, %870 + %872 = bitcast i32 %5 to float + %873 = fadd float %872, %871 + %874 = bitcast i32 %5 to float + %875 = bitcast i32 %5 to float + %876 = fmul float %874, %875 + %877 = fadd float %876, 0.000000e+00 + %878 = bitcast i32 %684 to float + %879 = bitcast i32 %684 to float + %880 = fmul float %878, %879 + %881 = fadd float %877, %880 + %882 = call float @llvm.sqrt.f32(float %881) + %883 = fneg float %693 + %884 = fmul float %882, %883 + %885 = bitcast i32 %5 to float + %886 = fadd float %885, %884 + %887 = fmul float %873, %886 + %888 = fadd float %887, 0.000000e+00 + %889 = bitcast i32 %5 to float + %890 = bitcast i32 %5 to float + %891 = fmul float %889, %890 + %892 = fadd float %891, 0.000000e+00 + %893 = bitcast i32 %684 to float + %894 = bitcast i32 %684 to float + %895 = fmul float %893, %894 + %896 = fadd float %892, %895 + %897 = call float @llvm.sqrt.f32(float %896) + %898 = fneg float %693 + %899 = fmul float %897, %898 + %900 = fmul float %899, 0.000000e+00 + %901 = bitcast i32 %684 to float + %902 = fadd float %901, %900 + %903 = bitcast i32 %5 to float + %904 = bitcast i32 %5 to float + %905 = fmul float %903, %904 + %906 = fadd float %905, 0.000000e+00 + %907 = bitcast i32 %684 to float + %908 = bitcast i32 %684 to float + %909 = fmul float %907, %908 + %910 = fadd float %906, %909 + %911 = call float @llvm.sqrt.f32(float %910) + %912 = fneg float %693 + %913 = fmul float %911, %912 + %914 = fmul float %913, 0.000000e+00 + %915 = bitcast i32 %684 to float + %916 = fadd float %915, %914 + %917 = fmul float %902, %916 + %918 = fadd float %888, %917 + %919 = call float @llvm.sqrt.f32(float %918) + %920 = fadd float %919, 0.000000e+00 + %921 = fdiv float %860, %920 + %922 = fmul float %921, 2.000000e+00 + %923 = bitcast i32 %5 to float + %924 = bitcast i32 %5 to float + %925 = fmul float %923, %924 + %926 = fadd float %925, 0.000000e+00 + %927 = bitcast i32 %684 to float + %928 = bitcast i32 %684 to float + %929 = fmul float %927, %928 + %930 = fadd float %926, %929 + %931 = call float @llvm.sqrt.f32(float %930) + %932 = fneg float %693 + %933 = fmul float %931, %932 + %934 = bitcast i32 %5 to float + %935 = fadd float %934, %933 + %936 = bitcast i32 %5 to float + %937 = bitcast i32 %5 to float + %938 = fmul float %936, %937 + %939 = fadd float %938, 0.000000e+00 + %940 = bitcast i32 %684 to float + %941 = bitcast i32 %684 to float + %942 = fmul float %940, %941 + %943 = fadd float %939, %942 + %944 = call float @llvm.sqrt.f32(float %943) + %945 = fneg float %693 + %946 = fmul float %944, %945 + %947 = bitcast i32 %5 to float + %948 = fadd float %947, %946 + %949 = bitcast i32 %5 to float + %950 = bitcast i32 %5 to float + %951 = fmul float %949, %950 + %952 = fadd float %951, 0.000000e+00 + %953 = bitcast i32 %684 to float + %954 = bitcast i32 %684 to float + %955 = fmul float %953, %954 + %956 = fadd float %952, %955 + %957 = call float @llvm.sqrt.f32(float %956) + %958 = fneg float %693 + %959 = fmul float %957, %958 + %960 = bitcast i32 %5 to float + %961 = fadd float %960, %959 + %962 = fmul float %948, %961 + %963 = fadd float %962, 0.000000e+00 + %964 = bitcast i32 %5 to float + %965 = bitcast i32 %5 to float + %966 = fmul float %964, %965 + %967 = fadd float %966, 0.000000e+00 + %968 = bitcast i32 %684 to float + %969 = bitcast i32 %684 to float + %970 = fmul float %968, %969 + %971 = fadd float %967, %970 + %972 = call float @llvm.sqrt.f32(float %971) + %973 = fneg float %693 + %974 = fmul float %972, %973 + %975 = fmul float %974, 0.000000e+00 + %976 = bitcast i32 %684 to float + %977 = fadd float %976, %975 + %978 = bitcast i32 %5 to float + %979 = bitcast i32 %5 to float + %980 = fmul float %978, %979 + %981 = fadd float %980, 0.000000e+00 + %982 = bitcast i32 %684 to float + %983 = bitcast i32 %684 to float + %984 = fmul float %982, %983 + %985 = fadd float %981, %984 + %986 = call float @llvm.sqrt.f32(float %985) + %987 = fneg float %693 + %988 = fmul float %986, %987 + %989 = fmul float %988, 0.000000e+00 + %990 = bitcast i32 %684 to float + %991 = fadd float %990, %989 + %992 = fmul float %977, %991 + %993 = fadd float %963, %992 + %994 = call float @llvm.sqrt.f32(float %993) + %995 = fadd float %994, 0.000000e+00 + %996 = fdiv float %935, %995 + %997 = fmul float %922, %996 + %998 = fsub float 1.000000e+00, %997 + %999 = fmul float %998, %841 + %1000 = fadd float %999, 0.000000e+00 + %1001 = bitcast i32 %5 to float + %1002 = bitcast i32 %5 to float + %1003 = fmul float %1001, %1002 + %1004 = fadd float %1003, 0.000000e+00 + %1005 = bitcast i32 %684 to float + %1006 = bitcast i32 %684 to float + %1007 = fmul float %1005, %1006 + %1008 = fadd float %1004, %1007 + %1009 = call float @llvm.sqrt.f32(float %1008) + %1010 = fneg float %693 + %1011 = fmul float %1009, %1010 + %1012 = bitcast i32 %5 to float + %1013 = fadd float %1012, %1011 + %1014 = bitcast i32 %5 to float + %1015 = bitcast i32 %5 to float + %1016 = fmul float %1014, %1015 + %1017 = fadd float %1016, 0.000000e+00 + %1018 = bitcast i32 %684 to float + %1019 = bitcast i32 %684 to float + %1020 = fmul float %1018, %1019 + %1021 = fadd float %1017, %1020 + %1022 = call float @llvm.sqrt.f32(float %1021) + %1023 = fneg float %693 + %1024 = fmul float %1022, %1023 + %1025 = bitcast i32 %5 to float + %1026 = fadd float %1025, %1024 + %1027 = bitcast i32 %5 to float + %1028 = bitcast i32 %5 to float + %1029 = fmul float %1027, %1028 + %1030 = fadd float %1029, 0.000000e+00 + %1031 = bitcast i32 %684 to float + %1032 = bitcast i32 %684 to float + %1033 = fmul float %1031, %1032 + %1034 = fadd float %1030, %1033 + %1035 = call float @llvm.sqrt.f32(float %1034) + %1036 = fneg float %693 + %1037 = fmul float %1035, %1036 + %1038 = bitcast i32 %5 to float + %1039 = fadd float %1038, %1037 + %1040 = fmul float %1026, %1039 + %1041 = fadd float %1040, 0.000000e+00 + %1042 = bitcast i32 %5 to float + %1043 = bitcast i32 %5 to float + %1044 = fmul float %1042, %1043 + %1045 = fadd float %1044, 0.000000e+00 + %1046 = bitcast i32 %684 to float + %1047 = bitcast i32 %684 to float + %1048 = fmul float %1046, %1047 + %1049 = fadd float %1045, %1048 + %1050 = call float @llvm.sqrt.f32(float %1049) + %1051 = fneg float %693 + %1052 = fmul float %1050, %1051 + %1053 = fmul float %1052, 0.000000e+00 + %1054 = bitcast i32 %684 to float + %1055 = fadd float %1054, %1053 + %1056 = bitcast i32 %5 to float + %1057 = bitcast i32 %5 to float + %1058 = fmul float %1056, %1057 + %1059 = fadd float %1058, 0.000000e+00 + %1060 = bitcast i32 %684 to float + %1061 = bitcast i32 %684 to float + %1062 = fmul float %1060, %1061 + %1063 = fadd float %1059, %1062 + %1064 = call float @llvm.sqrt.f32(float %1063) + %1065 = fneg float %693 + %1066 = fmul float %1064, %1065 + %1067 = fmul float %1066, 0.000000e+00 + %1068 = bitcast i32 %684 to float + %1069 = fadd float %1068, %1067 + %1070 = fmul float %1055, %1069 + %1071 = fadd float %1041, %1070 + %1072 = call float @llvm.sqrt.f32(float %1071) + %1073 = fadd float %1072, 0.000000e+00 + %1074 = fdiv float %1013, %1073 + %1075 = fmul float %1074, 2.000000e+00 + %1076 = bitcast i32 %5 to float + %1077 = bitcast i32 %5 to float + %1078 = fmul float %1076, %1077 + %1079 = fadd float %1078, 0.000000e+00 + %1080 = bitcast i32 %684 to float + %1081 = bitcast i32 %684 to float + %1082 = fmul float %1080, %1081 + %1083 = fadd float %1079, %1082 + %1084 = call float @llvm.sqrt.f32(float %1083) + %1085 = fneg float %693 + %1086 = fmul float %1084, %1085 + %1087 = fmul float %1086, 0.000000e+00 + %1088 = bitcast i32 %684 to float + %1089 = fadd float %1088, %1087 + %1090 = bitcast i32 %5 to float + %1091 = bitcast i32 %5 to float + %1092 = fmul float %1090, %1091 + %1093 = fadd float %1092, 0.000000e+00 + %1094 = bitcast i32 %684 to float + %1095 = bitcast i32 %684 to float + %1096 = fmul float %1094, %1095 + %1097 = fadd float %1093, %1096 + %1098 = call float @llvm.sqrt.f32(float %1097) + %1099 = fneg float %693 + %1100 = fmul float %1098, %1099 + %1101 = bitcast i32 %5 to float + %1102 = fadd float %1101, %1100 + %1103 = bitcast i32 %5 to float + %1104 = bitcast i32 %5 to float + %1105 = fmul float %1103, %1104 + %1106 = fadd float %1105, 0.000000e+00 + %1107 = bitcast i32 %684 to float + %1108 = bitcast i32 %684 to float + %1109 = fmul float %1107, %1108 + %1110 = fadd float %1106, %1109 + %1111 = call float @llvm.sqrt.f32(float %1110) + %1112 = fneg float %693 + %1113 = fmul float %1111, %1112 + %1114 = bitcast i32 %5 to float + %1115 = fadd float %1114, %1113 + %1116 = fmul float %1102, %1115 + %1117 = fadd float %1116, 0.000000e+00 + %1118 = bitcast i32 %5 to float + %1119 = bitcast i32 %5 to float + %1120 = fmul float %1118, %1119 + %1121 = fadd float %1120, 0.000000e+00 + %1122 = bitcast i32 %684 to float + %1123 = bitcast i32 %684 to float + %1124 = fmul float %1122, %1123 + %1125 = fadd float %1121, %1124 + %1126 = call float @llvm.sqrt.f32(float %1125) + %1127 = fneg float %693 + %1128 = fmul float %1126, %1127 + %1129 = fmul float %1128, 0.000000e+00 + %1130 = bitcast i32 %684 to float + %1131 = fadd float %1130, %1129 + %1132 = bitcast i32 %5 to float + %1133 = bitcast i32 %5 to float + %1134 = fmul float %1132, %1133 + %1135 = fadd float %1134, 0.000000e+00 + %1136 = bitcast i32 %684 to float + %1137 = bitcast i32 %684 to float + %1138 = fmul float %1136, %1137 + %1139 = fadd float %1135, %1138 + %1140 = call float @llvm.sqrt.f32(float %1139) + %1141 = fneg float %693 + %1142 = fmul float %1140, %1141 + %1143 = fmul float %1142, 0.000000e+00 + %1144 = bitcast i32 %684 to float + %1145 = fadd float %1144, %1143 + %1146 = fmul float %1131, %1145 + %1147 = fadd float %1117, %1146 + %1148 = call float @llvm.sqrt.f32(float %1147) + %1149 = fadd float %1148, 0.000000e+00 + %1150 = fdiv float %1089, %1149 + %1151 = fmul float %1075, %1150 + %1152 = fneg float %1151 + %1153 = getelementptr float, float* %0, i32 0 + %1154 = getelementptr inbounds float, float* %1153, i64 2 + %1155 = load float, float* %1154, align 4 + %1156 = fmul float %1152, %1155 + %1157 = fadd float %1000, %1156 + %1158 = insertelement <4 x float> zeroinitializer, float %1157, i32 0 + %1159 = insertelement <4 x float> %1158, float 0.000000e+00, i32 1 + %1160 = insertelement <4 x float> %1159, float 0.000000e+00, i32 2 + %1161 = insertelement <4 x float> %1160, float 0.000000e+00, i32 3 + %1162 = extractelement <4 x float> %1161, i32 0 + store float %1162, float* %2, align 4 + %1163 = extractelement <4 x float> %1161, i32 1 + %1164 = getelementptr float, float* %2, i32 0 + %1165 = getelementptr inbounds float, float* %1164, i64 1 + store float %1163, float* %1165, align 4 + %1166 = bitcast i32 %5 to float + %1167 = bitcast i32 %5 to float + %1168 = fmul float %1166, %1167 + %1169 = fadd float %1168, 0.000000e+00 + %1170 = bitcast i32 %684 to float + %1171 = bitcast i32 %684 to float + %1172 = fmul float %1170, %1171 + %1173 = fadd float %1169, %1172 + %1174 = call float @llvm.sqrt.f32(float %1173) + %1175 = fneg float %693 + %1176 = fmul float %1174, %1175 + %1177 = bitcast i32 %5 to float + %1178 = fadd float %1177, %1176 + %1179 = bitcast i32 %5 to float + %1180 = bitcast i32 %5 to float + %1181 = fmul float %1179, %1180 + %1182 = fadd float %1181, 0.000000e+00 + %1183 = bitcast i32 %684 to float + %1184 = bitcast i32 %684 to float + %1185 = fmul float %1183, %1184 + %1186 = fadd float %1182, %1185 + %1187 = call float @llvm.sqrt.f32(float %1186) + %1188 = fneg float %693 + %1189 = fmul float %1187, %1188 + %1190 = bitcast i32 %5 to float + %1191 = fadd float %1190, %1189 + %1192 = bitcast i32 %5 to float + %1193 = bitcast i32 %5 to float + %1194 = fmul float %1192, %1193 + %1195 = fadd float %1194, 0.000000e+00 + %1196 = bitcast i32 %684 to float + %1197 = bitcast i32 %684 to float + %1198 = fmul float %1196, %1197 + %1199 = fadd float %1195, %1198 + %1200 = call float @llvm.sqrt.f32(float %1199) + %1201 = fneg float %693 + %1202 = fmul float %1200, %1201 + %1203 = bitcast i32 %5 to float + %1204 = fadd float %1203, %1202 + %1205 = fmul float %1191, %1204 + %1206 = fadd float %1205, 0.000000e+00 + %1207 = bitcast i32 %5 to float + %1208 = bitcast i32 %5 to float + %1209 = fmul float %1207, %1208 + %1210 = fadd float %1209, 0.000000e+00 + %1211 = bitcast i32 %684 to float + %1212 = bitcast i32 %684 to float + %1213 = fmul float %1211, %1212 + %1214 = fadd float %1210, %1213 + %1215 = call float @llvm.sqrt.f32(float %1214) + %1216 = fneg float %693 + %1217 = fmul float %1215, %1216 + %1218 = fmul float %1217, 0.000000e+00 + %1219 = bitcast i32 %684 to float + %1220 = fadd float %1219, %1218 + %1221 = bitcast i32 %5 to float + %1222 = bitcast i32 %5 to float + %1223 = fmul float %1221, %1222 + %1224 = fadd float %1223, 0.000000e+00 + %1225 = bitcast i32 %684 to float + %1226 = bitcast i32 %684 to float + %1227 = fmul float %1225, %1226 + %1228 = fadd float %1224, %1227 + %1229 = call float @llvm.sqrt.f32(float %1228) + %1230 = fneg float %693 + %1231 = fmul float %1229, %1230 + %1232 = fmul float %1231, 0.000000e+00 + %1233 = bitcast i32 %684 to float + %1234 = fadd float %1233, %1232 + %1235 = fmul float %1220, %1234 + %1236 = fadd float %1206, %1235 + %1237 = call float @llvm.sqrt.f32(float %1236) + %1238 = fadd float %1237, 0.000000e+00 + %1239 = fdiv float %1178, %1238 + %1240 = fmul float %1239, 2.000000e+00 + %1241 = bitcast i32 %5 to float + %1242 = bitcast i32 %5 to float + %1243 = fmul float %1241, %1242 + %1244 = fadd float %1243, 0.000000e+00 + %1245 = bitcast i32 %684 to float + %1246 = bitcast i32 %684 to float + %1247 = fmul float %1245, %1246 + %1248 = fadd float %1244, %1247 + %1249 = call float @llvm.sqrt.f32(float %1248) + %1250 = fneg float %693 + %1251 = fmul float %1249, %1250 + %1252 = bitcast i32 %5 to float + %1253 = fadd float %1252, %1251 + %1254 = bitcast i32 %5 to float + %1255 = bitcast i32 %5 to float + %1256 = fmul float %1254, %1255 + %1257 = fadd float %1256, 0.000000e+00 + %1258 = bitcast i32 %684 to float + %1259 = bitcast i32 %684 to float + %1260 = fmul float %1258, %1259 + %1261 = fadd float %1257, %1260 + %1262 = call float @llvm.sqrt.f32(float %1261) + %1263 = fneg float %693 + %1264 = fmul float %1262, %1263 + %1265 = bitcast i32 %5 to float + %1266 = fadd float %1265, %1264 + %1267 = bitcast i32 %5 to float + %1268 = bitcast i32 %5 to float + %1269 = fmul float %1267, %1268 + %1270 = fadd float %1269, 0.000000e+00 + %1271 = bitcast i32 %684 to float + %1272 = bitcast i32 %684 to float + %1273 = fmul float %1271, %1272 + %1274 = fadd float %1270, %1273 + %1275 = call float @llvm.sqrt.f32(float %1274) + %1276 = fneg float %693 + %1277 = fmul float %1275, %1276 + %1278 = bitcast i32 %5 to float + %1279 = fadd float %1278, %1277 + %1280 = fmul float %1266, %1279 + %1281 = fadd float %1280, 0.000000e+00 + %1282 = bitcast i32 %5 to float + %1283 = bitcast i32 %5 to float + %1284 = fmul float %1282, %1283 + %1285 = fadd float %1284, 0.000000e+00 + %1286 = bitcast i32 %684 to float + %1287 = bitcast i32 %684 to float + %1288 = fmul float %1286, %1287 + %1289 = fadd float %1285, %1288 + %1290 = call float @llvm.sqrt.f32(float %1289) + %1291 = fneg float %693 + %1292 = fmul float %1290, %1291 + %1293 = fmul float %1292, 0.000000e+00 + %1294 = bitcast i32 %684 to float + %1295 = fadd float %1294, %1293 + %1296 = bitcast i32 %5 to float + %1297 = bitcast i32 %5 to float + %1298 = fmul float %1296, %1297 + %1299 = fadd float %1298, 0.000000e+00 + %1300 = bitcast i32 %684 to float + %1301 = bitcast i32 %684 to float + %1302 = fmul float %1300, %1301 + %1303 = fadd float %1299, %1302 + %1304 = call float @llvm.sqrt.f32(float %1303) + %1305 = fneg float %693 + %1306 = fmul float %1304, %1305 + %1307 = fmul float %1306, 0.000000e+00 + %1308 = bitcast i32 %684 to float + %1309 = fadd float %1308, %1307 + %1310 = fmul float %1295, %1309 + %1311 = fadd float %1281, %1310 + %1312 = call float @llvm.sqrt.f32(float %1311) + %1313 = fadd float %1312, 0.000000e+00 + %1314 = fdiv float %1253, %1313 + %1315 = fmul float %1240, %1314 + %1316 = fsub float 1.000000e+00, %1315 + %1317 = insertelement <4 x float> zeroinitializer, float %1316, i32 0 + %1318 = insertelement <4 x float> %1317, float 0.000000e+00, i32 1 + %1319 = insertelement <4 x float> %1318, float 0.000000e+00, i32 2 + %1320 = insertelement <4 x float> %1319, float 0.000000e+00, i32 3 + %1321 = getelementptr float, float* %0, i32 0 + %1322 = getelementptr inbounds float, float* %1321, i64 1 + %1323 = load float, float* %1322, align 4 + %1324 = insertelement <4 x float> zeroinitializer, float %1323, i32 0 + %1325 = insertelement <4 x float> %1324, float 0.000000e+00, i32 1 + %1326 = insertelement <4 x float> %1325, float 0.000000e+00, i32 2 + %1327 = insertelement <4 x float> %1326, float 0.000000e+00, i32 3 + %1328 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1320, <4 x float> %1327, <4 x float> zeroinitializer) + %1329 = extractelement <4 x float> %1328, i32 0 + store float %1329, float* %1165, align 4 + %1330 = bitcast i32 %5 to float + %1331 = bitcast i32 %5 to float + %1332 = fmul float %1330, %1331 + %1333 = fadd float %1332, 0.000000e+00 + %1334 = bitcast i32 %684 to float + %1335 = bitcast i32 %684 to float + %1336 = fmul float %1334, %1335 + %1337 = fadd float %1333, %1336 + %1338 = call float @llvm.sqrt.f32(float %1337) + %1339 = fneg float %693 + %1340 = fmul float %1338, %1339 + %1341 = bitcast i32 %5 to float + %1342 = fadd float %1341, %1340 + %1343 = bitcast i32 %5 to float + %1344 = bitcast i32 %5 to float + %1345 = fmul float %1343, %1344 + %1346 = fadd float %1345, 0.000000e+00 + %1347 = bitcast i32 %684 to float + %1348 = bitcast i32 %684 to float + %1349 = fmul float %1347, %1348 + %1350 = fadd float %1346, %1349 + %1351 = call float @llvm.sqrt.f32(float %1350) + %1352 = fneg float %693 + %1353 = fmul float %1351, %1352 + %1354 = bitcast i32 %5 to float + %1355 = fadd float %1354, %1353 + %1356 = bitcast i32 %5 to float + %1357 = bitcast i32 %5 to float + %1358 = fmul float %1356, %1357 + %1359 = fadd float %1358, 0.000000e+00 + %1360 = bitcast i32 %684 to float + %1361 = bitcast i32 %684 to float + %1362 = fmul float %1360, %1361 + %1363 = fadd float %1359, %1362 + %1364 = call float @llvm.sqrt.f32(float %1363) + %1365 = fneg float %693 + %1366 = fmul float %1364, %1365 + %1367 = bitcast i32 %5 to float + %1368 = fadd float %1367, %1366 + %1369 = fmul float %1355, %1368 + %1370 = fadd float %1369, 0.000000e+00 + %1371 = bitcast i32 %5 to float + %1372 = bitcast i32 %5 to float + %1373 = fmul float %1371, %1372 + %1374 = fadd float %1373, 0.000000e+00 + %1375 = bitcast i32 %684 to float + %1376 = bitcast i32 %684 to float + %1377 = fmul float %1375, %1376 + %1378 = fadd float %1374, %1377 + %1379 = call float @llvm.sqrt.f32(float %1378) + %1380 = fneg float %693 + %1381 = fmul float %1379, %1380 + %1382 = fmul float %1381, 0.000000e+00 + %1383 = bitcast i32 %684 to float + %1384 = fadd float %1383, %1382 + %1385 = bitcast i32 %5 to float + %1386 = bitcast i32 %5 to float + %1387 = fmul float %1385, %1386 + %1388 = fadd float %1387, 0.000000e+00 + %1389 = bitcast i32 %684 to float + %1390 = bitcast i32 %684 to float + %1391 = fmul float %1389, %1390 + %1392 = fadd float %1388, %1391 + %1393 = call float @llvm.sqrt.f32(float %1392) + %1394 = fneg float %693 + %1395 = fmul float %1393, %1394 + %1396 = fmul float %1395, 0.000000e+00 + %1397 = bitcast i32 %684 to float + %1398 = fadd float %1397, %1396 + %1399 = fmul float %1384, %1398 + %1400 = fadd float %1370, %1399 + %1401 = call float @llvm.sqrt.f32(float %1400) + %1402 = fadd float %1401, 0.000000e+00 + %1403 = fdiv float %1342, %1402 + %1404 = fmul float %1403, 2.000000e+00 + %1405 = bitcast i32 %5 to float + %1406 = bitcast i32 %5 to float + %1407 = fmul float %1405, %1406 + %1408 = fadd float %1407, 0.000000e+00 + %1409 = bitcast i32 %684 to float + %1410 = bitcast i32 %684 to float + %1411 = fmul float %1409, %1410 + %1412 = fadd float %1408, %1411 + %1413 = call float @llvm.sqrt.f32(float %1412) + %1414 = fneg float %693 + %1415 = fmul float %1413, %1414 + %1416 = bitcast i32 %5 to float + %1417 = fadd float %1416, %1415 + %1418 = bitcast i32 %5 to float + %1419 = bitcast i32 %5 to float + %1420 = fmul float %1418, %1419 + %1421 = fadd float %1420, 0.000000e+00 + %1422 = bitcast i32 %684 to float + %1423 = bitcast i32 %684 to float + %1424 = fmul float %1422, %1423 + %1425 = fadd float %1421, %1424 + %1426 = call float @llvm.sqrt.f32(float %1425) + %1427 = fneg float %693 + %1428 = fmul float %1426, %1427 + %1429 = bitcast i32 %5 to float + %1430 = fadd float %1429, %1428 + %1431 = bitcast i32 %5 to float + %1432 = bitcast i32 %5 to float + %1433 = fmul float %1431, %1432 + %1434 = fadd float %1433, 0.000000e+00 + %1435 = bitcast i32 %684 to float + %1436 = bitcast i32 %684 to float + %1437 = fmul float %1435, %1436 + %1438 = fadd float %1434, %1437 + %1439 = call float @llvm.sqrt.f32(float %1438) + %1440 = fneg float %693 + %1441 = fmul float %1439, %1440 + %1442 = bitcast i32 %5 to float + %1443 = fadd float %1442, %1441 + %1444 = fmul float %1430, %1443 + %1445 = fadd float %1444, 0.000000e+00 + %1446 = bitcast i32 %5 to float + %1447 = bitcast i32 %5 to float + %1448 = fmul float %1446, %1447 + %1449 = fadd float %1448, 0.000000e+00 + %1450 = bitcast i32 %684 to float + %1451 = bitcast i32 %684 to float + %1452 = fmul float %1450, %1451 + %1453 = fadd float %1449, %1452 + %1454 = call float @llvm.sqrt.f32(float %1453) + %1455 = fneg float %693 + %1456 = fmul float %1454, %1455 + %1457 = fmul float %1456, 0.000000e+00 + %1458 = bitcast i32 %684 to float + %1459 = fadd float %1458, %1457 + %1460 = bitcast i32 %5 to float + %1461 = bitcast i32 %5 to float + %1462 = fmul float %1460, %1461 + %1463 = fadd float %1462, 0.000000e+00 + %1464 = bitcast i32 %684 to float + %1465 = bitcast i32 %684 to float + %1466 = fmul float %1464, %1465 + %1467 = fadd float %1463, %1466 + %1468 = call float @llvm.sqrt.f32(float %1467) + %1469 = fneg float %693 + %1470 = fmul float %1468, %1469 + %1471 = fmul float %1470, 0.000000e+00 + %1472 = bitcast i32 %684 to float + %1473 = fadd float %1472, %1471 + %1474 = fmul float %1459, %1473 + %1475 = fadd float %1445, %1474 + %1476 = call float @llvm.sqrt.f32(float %1475) + %1477 = fadd float %1476, 0.000000e+00 + %1478 = fdiv float %1417, %1477 + %1479 = fmul float %1404, %1478 + %1480 = fsub float 1.000000e+00, %1479 + %1481 = load float, float* %1322, align 4 + %1482 = fmul float %1480, %1481 + %1483 = fadd float %1482, 0.000000e+00 + %1484 = bitcast i32 %5 to float + %1485 = bitcast i32 %5 to float + %1486 = fmul float %1484, %1485 + %1487 = fadd float %1486, 0.000000e+00 + %1488 = bitcast i32 %684 to float + %1489 = bitcast i32 %684 to float + %1490 = fmul float %1488, %1489 + %1491 = fadd float %1487, %1490 + %1492 = call float @llvm.sqrt.f32(float %1491) + %1493 = fneg float %693 + %1494 = fmul float %1492, %1493 + %1495 = bitcast i32 %5 to float + %1496 = fadd float %1495, %1494 + %1497 = bitcast i32 %5 to float + %1498 = bitcast i32 %5 to float + %1499 = fmul float %1497, %1498 + %1500 = fadd float %1499, 0.000000e+00 + %1501 = bitcast i32 %684 to float + %1502 = bitcast i32 %684 to float + %1503 = fmul float %1501, %1502 + %1504 = fadd float %1500, %1503 + %1505 = call float @llvm.sqrt.f32(float %1504) + %1506 = fneg float %693 + %1507 = fmul float %1505, %1506 + %1508 = bitcast i32 %5 to float + %1509 = fadd float %1508, %1507 + %1510 = bitcast i32 %5 to float + %1511 = bitcast i32 %5 to float + %1512 = fmul float %1510, %1511 + %1513 = fadd float %1512, 0.000000e+00 + %1514 = bitcast i32 %684 to float + %1515 = bitcast i32 %684 to float + %1516 = fmul float %1514, %1515 + %1517 = fadd float %1513, %1516 + %1518 = call float @llvm.sqrt.f32(float %1517) + %1519 = fneg float %693 + %1520 = fmul float %1518, %1519 + %1521 = bitcast i32 %5 to float + %1522 = fadd float %1521, %1520 + %1523 = fmul float %1509, %1522 + %1524 = fadd float %1523, 0.000000e+00 + %1525 = bitcast i32 %5 to float + %1526 = bitcast i32 %5 to float + %1527 = fmul float %1525, %1526 + %1528 = fadd float %1527, 0.000000e+00 + %1529 = bitcast i32 %684 to float + %1530 = bitcast i32 %684 to float + %1531 = fmul float %1529, %1530 + %1532 = fadd float %1528, %1531 + %1533 = call float @llvm.sqrt.f32(float %1532) + %1534 = fneg float %693 + %1535 = fmul float %1533, %1534 + %1536 = fmul float %1535, 0.000000e+00 + %1537 = bitcast i32 %684 to float + %1538 = fadd float %1537, %1536 + %1539 = bitcast i32 %5 to float + %1540 = bitcast i32 %5 to float + %1541 = fmul float %1539, %1540 + %1542 = fadd float %1541, 0.000000e+00 + %1543 = bitcast i32 %684 to float + %1544 = bitcast i32 %684 to float + %1545 = fmul float %1543, %1544 + %1546 = fadd float %1542, %1545 + %1547 = call float @llvm.sqrt.f32(float %1546) + %1548 = fneg float %693 + %1549 = fmul float %1547, %1548 + %1550 = fmul float %1549, 0.000000e+00 + %1551 = bitcast i32 %684 to float + %1552 = fadd float %1551, %1550 + %1553 = fmul float %1538, %1552 + %1554 = fadd float %1524, %1553 + %1555 = call float @llvm.sqrt.f32(float %1554) + %1556 = fadd float %1555, 0.000000e+00 + %1557 = fdiv float %1496, %1556 + %1558 = fmul float %1557, 2.000000e+00 + %1559 = bitcast i32 %5 to float + %1560 = bitcast i32 %5 to float + %1561 = fmul float %1559, %1560 + %1562 = fadd float %1561, 0.000000e+00 + %1563 = bitcast i32 %684 to float + %1564 = bitcast i32 %684 to float + %1565 = fmul float %1563, %1564 + %1566 = fadd float %1562, %1565 + %1567 = call float @llvm.sqrt.f32(float %1566) + %1568 = fneg float %693 + %1569 = fmul float %1567, %1568 + %1570 = fmul float %1569, 0.000000e+00 + %1571 = bitcast i32 %684 to float + %1572 = fadd float %1571, %1570 + %1573 = bitcast i32 %5 to float + %1574 = bitcast i32 %5 to float + %1575 = fmul float %1573, %1574 + %1576 = fadd float %1575, 0.000000e+00 + %1577 = bitcast i32 %684 to float + %1578 = bitcast i32 %684 to float + %1579 = fmul float %1577, %1578 + %1580 = fadd float %1576, %1579 + %1581 = call float @llvm.sqrt.f32(float %1580) + %1582 = fneg float %693 + %1583 = fmul float %1581, %1582 + %1584 = bitcast i32 %5 to float + %1585 = fadd float %1584, %1583 + %1586 = bitcast i32 %5 to float + %1587 = bitcast i32 %5 to float + %1588 = fmul float %1586, %1587 + %1589 = fadd float %1588, 0.000000e+00 + %1590 = bitcast i32 %684 to float + %1591 = bitcast i32 %684 to float + %1592 = fmul float %1590, %1591 + %1593 = fadd float %1589, %1592 + %1594 = call float @llvm.sqrt.f32(float %1593) + %1595 = fneg float %693 + %1596 = fmul float %1594, %1595 + %1597 = bitcast i32 %5 to float + %1598 = fadd float %1597, %1596 + %1599 = fmul float %1585, %1598 + %1600 = fadd float %1599, 0.000000e+00 + %1601 = bitcast i32 %5 to float + %1602 = bitcast i32 %5 to float + %1603 = fmul float %1601, %1602 + %1604 = fadd float %1603, 0.000000e+00 + %1605 = bitcast i32 %684 to float + %1606 = bitcast i32 %684 to float + %1607 = fmul float %1605, %1606 + %1608 = fadd float %1604, %1607 + %1609 = call float @llvm.sqrt.f32(float %1608) + %1610 = fneg float %693 + %1611 = fmul float %1609, %1610 + %1612 = fmul float %1611, 0.000000e+00 + %1613 = bitcast i32 %684 to float + %1614 = fadd float %1613, %1612 + %1615 = bitcast i32 %5 to float + %1616 = bitcast i32 %5 to float + %1617 = fmul float %1615, %1616 + %1618 = fadd float %1617, 0.000000e+00 + %1619 = bitcast i32 %684 to float + %1620 = bitcast i32 %684 to float + %1621 = fmul float %1619, %1620 + %1622 = fadd float %1618, %1621 + %1623 = call float @llvm.sqrt.f32(float %1622) + %1624 = fneg float %693 + %1625 = fmul float %1623, %1624 + %1626 = fmul float %1625, 0.000000e+00 + %1627 = bitcast i32 %684 to float + %1628 = fadd float %1627, %1626 + %1629 = fmul float %1614, %1628 + %1630 = fadd float %1600, %1629 + %1631 = call float @llvm.sqrt.f32(float %1630) + %1632 = fadd float %1631, 0.000000e+00 + %1633 = fdiv float %1572, %1632 + %1634 = fmul float %1558, %1633 + %1635 = fneg float %1634 + %1636 = getelementptr float, float* %0, i32 0 + %1637 = getelementptr inbounds float, float* %1636, i64 3 + %1638 = load float, float* %1637, align 4 + %1639 = fmul float %1635, %1638 + %1640 = fadd float %1483, %1639 + %1641 = insertelement <4 x float> zeroinitializer, float %1640, i32 0 + %1642 = insertelement <4 x float> %1641, float 0.000000e+00, i32 1 + %1643 = insertelement <4 x float> %1642, float 0.000000e+00, i32 2 + %1644 = insertelement <4 x float> %1643, float 0.000000e+00, i32 3 + %1645 = extractelement <4 x float> %1644, i32 0 + store float %1645, float* %1165, align 4 + %1646 = extractelement <4 x float> %1644, i32 1 + %1647 = getelementptr float, float* %2, i32 0 + %1648 = getelementptr inbounds float, float* %1647, i64 2 + store float %1646, float* %1648, align 4 + %1649 = getelementptr float, float* %0, i32 0 + %1650 = bitcast float* %1649 to i32* + %1651 = load i32, i32* %1650, align 4 + %1652 = bitcast i32 %1651 to float + %1653 = bitcast i32 %1651 to float + %1654 = fmul float %1652, %1653 + %1655 = fadd float %1654, 0.000000e+00 + %1656 = bitcast i32 %684 to float + %1657 = bitcast i32 %684 to float + %1658 = fmul float %1656, %1657 + %1659 = fadd float %1655, %1658 + %1660 = call float @llvm.sqrt.f32(float %1659) + %1661 = fneg float %693 + %1662 = fmul float %1660, %1661 + %1663 = fmul float %1662, 0.000000e+00 + %1664 = bitcast i32 %684 to float + %1665 = fadd float %1664, %1663 + %1666 = bitcast i32 %1651 to float + %1667 = bitcast i32 %1651 to float + %1668 = fmul float %1666, %1667 + %1669 = fadd float %1668, 0.000000e+00 + %1670 = bitcast i32 %684 to float + %1671 = bitcast i32 %684 to float + %1672 = fmul float %1670, %1671 + %1673 = fadd float %1669, %1672 + %1674 = call float @llvm.sqrt.f32(float %1673) + %1675 = fneg float %693 + %1676 = fmul float %1674, %1675 + %1677 = bitcast i32 %1651 to float + %1678 = fadd float %1677, %1676 + %1679 = bitcast i32 %1651 to float + %1680 = bitcast i32 %1651 to float + %1681 = fmul float %1679, %1680 + %1682 = fadd float %1681, 0.000000e+00 + %1683 = bitcast i32 %684 to float + %1684 = bitcast i32 %684 to float + %1685 = fmul float %1683, %1684 + %1686 = fadd float %1682, %1685 + %1687 = call float @llvm.sqrt.f32(float %1686) + %1688 = fneg float %693 + %1689 = fmul float %1687, %1688 + %1690 = bitcast i32 %1651 to float + %1691 = fadd float %1690, %1689 + %1692 = fmul float %1678, %1691 + %1693 = fadd float %1692, 0.000000e+00 + %1694 = bitcast i32 %1651 to float + %1695 = bitcast i32 %1651 to float + %1696 = fmul float %1694, %1695 + %1697 = fadd float %1696, 0.000000e+00 + %1698 = bitcast i32 %684 to float + %1699 = bitcast i32 %684 to float + %1700 = fmul float %1698, %1699 + %1701 = fadd float %1697, %1700 + %1702 = call float @llvm.sqrt.f32(float %1701) + %1703 = fneg float %693 + %1704 = fmul float %1702, %1703 + %1705 = fmul float %1704, 0.000000e+00 + %1706 = bitcast i32 %684 to float + %1707 = fadd float %1706, %1705 + %1708 = bitcast i32 %1651 to float + %1709 = bitcast i32 %1651 to float + %1710 = fmul float %1708, %1709 + %1711 = fadd float %1710, 0.000000e+00 + %1712 = bitcast i32 %684 to float + %1713 = bitcast i32 %684 to float + %1714 = fmul float %1712, %1713 + %1715 = fadd float %1711, %1714 + %1716 = call float @llvm.sqrt.f32(float %1715) + %1717 = fneg float %693 + %1718 = fmul float %1716, %1717 + %1719 = fmul float %1718, 0.000000e+00 + %1720 = bitcast i32 %684 to float + %1721 = fadd float %1720, %1719 + %1722 = fmul float %1707, %1721 + %1723 = fadd float %1693, %1722 + %1724 = call float @llvm.sqrt.f32(float %1723) + %1725 = fadd float %1724, 0.000000e+00 + %1726 = fdiv float %1665, %1725 + %1727 = fmul float %1726, 2.000000e+00 + %1728 = bitcast i32 %1651 to float + %1729 = bitcast i32 %1651 to float + %1730 = fmul float %1728, %1729 + %1731 = fadd float %1730, 0.000000e+00 + %1732 = bitcast i32 %684 to float + %1733 = bitcast i32 %684 to float + %1734 = fmul float %1732, %1733 + %1735 = fadd float %1731, %1734 + %1736 = call float @llvm.sqrt.f32(float %1735) + %1737 = fneg float %693 + %1738 = fmul float %1736, %1737 + %1739 = bitcast i32 %1651 to float + %1740 = fadd float %1739, %1738 + %1741 = bitcast i32 %1651 to float + %1742 = bitcast i32 %1651 to float + %1743 = fmul float %1741, %1742 + %1744 = fadd float %1743, 0.000000e+00 + %1745 = bitcast i32 %684 to float + %1746 = bitcast i32 %684 to float + %1747 = fmul float %1745, %1746 + %1748 = fadd float %1744, %1747 + %1749 = call float @llvm.sqrt.f32(float %1748) + %1750 = fneg float %693 + %1751 = fmul float %1749, %1750 + %1752 = bitcast i32 %1651 to float + %1753 = fadd float %1752, %1751 + %1754 = bitcast i32 %1651 to float + %1755 = bitcast i32 %1651 to float + %1756 = fmul float %1754, %1755 + %1757 = fadd float %1756, 0.000000e+00 + %1758 = bitcast i32 %684 to float + %1759 = bitcast i32 %684 to float + %1760 = fmul float %1758, %1759 + %1761 = fadd float %1757, %1760 + %1762 = call float @llvm.sqrt.f32(float %1761) + %1763 = fneg float %693 + %1764 = fmul float %1762, %1763 + %1765 = bitcast i32 %1651 to float + %1766 = fadd float %1765, %1764 + %1767 = fmul float %1753, %1766 + %1768 = fadd float %1767, 0.000000e+00 + %1769 = bitcast i32 %1651 to float + %1770 = bitcast i32 %1651 to float + %1771 = fmul float %1769, %1770 + %1772 = fadd float %1771, 0.000000e+00 + %1773 = bitcast i32 %684 to float + %1774 = bitcast i32 %684 to float + %1775 = fmul float %1773, %1774 + %1776 = fadd float %1772, %1775 + %1777 = call float @llvm.sqrt.f32(float %1776) + %1778 = fneg float %693 + %1779 = fmul float %1777, %1778 + %1780 = fmul float %1779, 0.000000e+00 + %1781 = bitcast i32 %684 to float + %1782 = fadd float %1781, %1780 + %1783 = bitcast i32 %1651 to float + %1784 = bitcast i32 %1651 to float + %1785 = fmul float %1783, %1784 + %1786 = fadd float %1785, 0.000000e+00 + %1787 = bitcast i32 %684 to float + %1788 = bitcast i32 %684 to float + %1789 = fmul float %1787, %1788 + %1790 = fadd float %1786, %1789 + %1791 = call float @llvm.sqrt.f32(float %1790) + %1792 = fneg float %693 + %1793 = fmul float %1791, %1792 + %1794 = fmul float %1793, 0.000000e+00 + %1795 = bitcast i32 %684 to float + %1796 = fadd float %1795, %1794 + %1797 = fmul float %1782, %1796 + %1798 = fadd float %1768, %1797 + %1799 = call float @llvm.sqrt.f32(float %1798) + %1800 = fadd float %1799, 0.000000e+00 + %1801 = fdiv float %1740, %1800 + %1802 = fmul float %1727, %1801 + %1803 = fneg float %1802 + %1804 = insertelement <4 x float> zeroinitializer, float %1803, i32 0 + %1805 = insertelement <4 x float> %1804, float 0.000000e+00, i32 1 + %1806 = insertelement <4 x float> %1805, float 0.000000e+00, i32 2 + %1807 = insertelement <4 x float> %1806, float 0.000000e+00, i32 3 + %1808 = getelementptr float, float* %0, i32 0 + %1809 = load float, float* %1808, align 4 + %1810 = insertelement <4 x float> zeroinitializer, float %1809, i32 0 + %1811 = insertelement <4 x float> %1810, float 0.000000e+00, i32 1 + %1812 = insertelement <4 x float> %1811, float 0.000000e+00, i32 2 + %1813 = insertelement <4 x float> %1812, float 0.000000e+00, i32 3 + %1814 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1807, <4 x float> %1813, <4 x float> zeroinitializer) + %1815 = extractelement <4 x float> %1814, i32 0 + store float %1815, float* %1648, align 4 + %1816 = bitcast i32 %1651 to float + %1817 = bitcast i32 %1651 to float + %1818 = fmul float %1816, %1817 + %1819 = fadd float %1818, 0.000000e+00 + %1820 = bitcast i32 %684 to float + %1821 = bitcast i32 %684 to float + %1822 = fmul float %1820, %1821 + %1823 = fadd float %1819, %1822 + %1824 = call float @llvm.sqrt.f32(float %1823) + %1825 = fneg float %693 + %1826 = fmul float %1824, %1825 + %1827 = fmul float %1826, 0.000000e+00 + %1828 = bitcast i32 %684 to float + %1829 = fadd float %1828, %1827 + %1830 = bitcast i32 %1651 to float + %1831 = bitcast i32 %1651 to float + %1832 = fmul float %1830, %1831 + %1833 = fadd float %1832, 0.000000e+00 + %1834 = bitcast i32 %684 to float + %1835 = bitcast i32 %684 to float + %1836 = fmul float %1834, %1835 + %1837 = fadd float %1833, %1836 + %1838 = call float @llvm.sqrt.f32(float %1837) + %1839 = fneg float %693 + %1840 = fmul float %1838, %1839 + %1841 = bitcast i32 %1651 to float + %1842 = fadd float %1841, %1840 + %1843 = bitcast i32 %1651 to float + %1844 = bitcast i32 %1651 to float + %1845 = fmul float %1843, %1844 + %1846 = fadd float %1845, 0.000000e+00 + %1847 = bitcast i32 %684 to float + %1848 = bitcast i32 %684 to float + %1849 = fmul float %1847, %1848 + %1850 = fadd float %1846, %1849 + %1851 = call float @llvm.sqrt.f32(float %1850) + %1852 = fneg float %693 + %1853 = fmul float %1851, %1852 + %1854 = bitcast i32 %1651 to float + %1855 = fadd float %1854, %1853 + %1856 = fmul float %1842, %1855 + %1857 = fadd float %1856, 0.000000e+00 + %1858 = bitcast i32 %1651 to float + %1859 = bitcast i32 %1651 to float + %1860 = fmul float %1858, %1859 + %1861 = fadd float %1860, 0.000000e+00 + %1862 = bitcast i32 %684 to float + %1863 = bitcast i32 %684 to float + %1864 = fmul float %1862, %1863 + %1865 = fadd float %1861, %1864 + %1866 = call float @llvm.sqrt.f32(float %1865) + %1867 = fneg float %693 + %1868 = fmul float %1866, %1867 + %1869 = fmul float %1868, 0.000000e+00 + %1870 = bitcast i32 %684 to float + %1871 = fadd float %1870, %1869 + %1872 = bitcast i32 %1651 to float + %1873 = bitcast i32 %1651 to float + %1874 = fmul float %1872, %1873 + %1875 = fadd float %1874, 0.000000e+00 + %1876 = bitcast i32 %684 to float + %1877 = bitcast i32 %684 to float + %1878 = fmul float %1876, %1877 + %1879 = fadd float %1875, %1878 + %1880 = call float @llvm.sqrt.f32(float %1879) + %1881 = fneg float %693 + %1882 = fmul float %1880, %1881 + %1883 = fmul float %1882, 0.000000e+00 + %1884 = bitcast i32 %684 to float + %1885 = fadd float %1884, %1883 + %1886 = fmul float %1871, %1885 + %1887 = fadd float %1857, %1886 + %1888 = call float @llvm.sqrt.f32(float %1887) + %1889 = fadd float %1888, 0.000000e+00 + %1890 = fdiv float %1829, %1889 + %1891 = fmul float %1890, 2.000000e+00 + %1892 = bitcast i32 %1651 to float + %1893 = bitcast i32 %1651 to float + %1894 = fmul float %1892, %1893 + %1895 = fadd float %1894, 0.000000e+00 + %1896 = bitcast i32 %684 to float + %1897 = bitcast i32 %684 to float + %1898 = fmul float %1896, %1897 + %1899 = fadd float %1895, %1898 + %1900 = call float @llvm.sqrt.f32(float %1899) + %1901 = fneg float %693 + %1902 = fmul float %1900, %1901 + %1903 = bitcast i32 %1651 to float + %1904 = fadd float %1903, %1902 + %1905 = bitcast i32 %1651 to float + %1906 = bitcast i32 %1651 to float + %1907 = fmul float %1905, %1906 + %1908 = fadd float %1907, 0.000000e+00 + %1909 = bitcast i32 %684 to float + %1910 = bitcast i32 %684 to float + %1911 = fmul float %1909, %1910 + %1912 = fadd float %1908, %1911 + %1913 = call float @llvm.sqrt.f32(float %1912) + %1914 = fneg float %693 + %1915 = fmul float %1913, %1914 + %1916 = bitcast i32 %1651 to float + %1917 = fadd float %1916, %1915 + %1918 = bitcast i32 %1651 to float + %1919 = bitcast i32 %1651 to float + %1920 = fmul float %1918, %1919 + %1921 = fadd float %1920, 0.000000e+00 + %1922 = bitcast i32 %684 to float + %1923 = bitcast i32 %684 to float + %1924 = fmul float %1922, %1923 + %1925 = fadd float %1921, %1924 + %1926 = call float @llvm.sqrt.f32(float %1925) + %1927 = fneg float %693 + %1928 = fmul float %1926, %1927 + %1929 = bitcast i32 %1651 to float + %1930 = fadd float %1929, %1928 + %1931 = fmul float %1917, %1930 + %1932 = fadd float %1931, 0.000000e+00 + %1933 = bitcast i32 %1651 to float + %1934 = bitcast i32 %1651 to float + %1935 = fmul float %1933, %1934 + %1936 = fadd float %1935, 0.000000e+00 + %1937 = bitcast i32 %684 to float + %1938 = bitcast i32 %684 to float + %1939 = fmul float %1937, %1938 + %1940 = fadd float %1936, %1939 + %1941 = call float @llvm.sqrt.f32(float %1940) + %1942 = fneg float %693 + %1943 = fmul float %1941, %1942 + %1944 = fmul float %1943, 0.000000e+00 + %1945 = bitcast i32 %684 to float + %1946 = fadd float %1945, %1944 + %1947 = bitcast i32 %1651 to float + %1948 = bitcast i32 %1651 to float + %1949 = fmul float %1947, %1948 + %1950 = fadd float %1949, 0.000000e+00 + %1951 = bitcast i32 %684 to float + %1952 = bitcast i32 %684 to float + %1953 = fmul float %1951, %1952 + %1954 = fadd float %1950, %1953 + %1955 = call float @llvm.sqrt.f32(float %1954) + %1956 = fneg float %693 + %1957 = fmul float %1955, %1956 + %1958 = fmul float %1957, 0.000000e+00 + %1959 = bitcast i32 %684 to float + %1960 = fadd float %1959, %1958 + %1961 = fmul float %1946, %1960 + %1962 = fadd float %1932, %1961 + %1963 = call float @llvm.sqrt.f32(float %1962) + %1964 = fadd float %1963, 0.000000e+00 + %1965 = fdiv float %1904, %1964 + %1966 = fmul float %1891, %1965 + %1967 = fneg float %1966 + %1968 = fmul float %1967, %1809 + %1969 = fadd float %1968, 0.000000e+00 + %1970 = bitcast i32 %1651 to float + %1971 = bitcast i32 %1651 to float + %1972 = fmul float %1970, %1971 + %1973 = fadd float %1972, 0.000000e+00 + %1974 = bitcast i32 %684 to float + %1975 = bitcast i32 %684 to float + %1976 = fmul float %1974, %1975 + %1977 = fadd float %1973, %1976 + %1978 = call float @llvm.sqrt.f32(float %1977) + %1979 = fneg float %693 + %1980 = fmul float %1978, %1979 + %1981 = fmul float %1980, 0.000000e+00 + %1982 = bitcast i32 %684 to float + %1983 = fadd float %1982, %1981 + %1984 = bitcast i32 %1651 to float + %1985 = bitcast i32 %1651 to float + %1986 = fmul float %1984, %1985 + %1987 = fadd float %1986, 0.000000e+00 + %1988 = bitcast i32 %684 to float + %1989 = bitcast i32 %684 to float + %1990 = fmul float %1988, %1989 + %1991 = fadd float %1987, %1990 + %1992 = call float @llvm.sqrt.f32(float %1991) + %1993 = fneg float %693 + %1994 = fmul float %1992, %1993 + %1995 = bitcast i32 %1651 to float + %1996 = fadd float %1995, %1994 + %1997 = bitcast i32 %1651 to float + %1998 = bitcast i32 %1651 to float + %1999 = fmul float %1997, %1998 + %2000 = fadd float %1999, 0.000000e+00 + %2001 = bitcast i32 %684 to float + %2002 = bitcast i32 %684 to float + %2003 = fmul float %2001, %2002 + %2004 = fadd float %2000, %2003 + %2005 = call float @llvm.sqrt.f32(float %2004) + %2006 = fneg float %693 + %2007 = fmul float %2005, %2006 + %2008 = bitcast i32 %1651 to float + %2009 = fadd float %2008, %2007 + %2010 = fmul float %1996, %2009 + %2011 = fadd float %2010, 0.000000e+00 + %2012 = bitcast i32 %1651 to float + %2013 = bitcast i32 %1651 to float + %2014 = fmul float %2012, %2013 + %2015 = fadd float %2014, 0.000000e+00 + %2016 = bitcast i32 %684 to float + %2017 = bitcast i32 %684 to float + %2018 = fmul float %2016, %2017 + %2019 = fadd float %2015, %2018 + %2020 = call float @llvm.sqrt.f32(float %2019) + %2021 = fneg float %693 + %2022 = fmul float %2020, %2021 + %2023 = fmul float %2022, 0.000000e+00 + %2024 = bitcast i32 %684 to float + %2025 = fadd float %2024, %2023 + %2026 = bitcast i32 %1651 to float + %2027 = bitcast i32 %1651 to float + %2028 = fmul float %2026, %2027 + %2029 = fadd float %2028, 0.000000e+00 + %2030 = bitcast i32 %684 to float + %2031 = bitcast i32 %684 to float + %2032 = fmul float %2030, %2031 + %2033 = fadd float %2029, %2032 + %2034 = call float @llvm.sqrt.f32(float %2033) + %2035 = fneg float %693 + %2036 = fmul float %2034, %2035 + %2037 = fmul float %2036, 0.000000e+00 + %2038 = bitcast i32 %684 to float + %2039 = fadd float %2038, %2037 + %2040 = fmul float %2025, %2039 + %2041 = fadd float %2011, %2040 + %2042 = call float @llvm.sqrt.f32(float %2041) + %2043 = fadd float %2042, 0.000000e+00 + %2044 = fdiv float %1983, %2043 + %2045 = fmul float %2044, 2.000000e+00 + %2046 = bitcast i32 %1651 to float + %2047 = bitcast i32 %1651 to float + %2048 = fmul float %2046, %2047 + %2049 = fadd float %2048, 0.000000e+00 + %2050 = bitcast i32 %684 to float + %2051 = bitcast i32 %684 to float + %2052 = fmul float %2050, %2051 + %2053 = fadd float %2049, %2052 + %2054 = call float @llvm.sqrt.f32(float %2053) + %2055 = fneg float %693 + %2056 = fmul float %2054, %2055 + %2057 = fmul float %2056, 0.000000e+00 + %2058 = bitcast i32 %684 to float + %2059 = fadd float %2058, %2057 + %2060 = bitcast i32 %1651 to float + %2061 = bitcast i32 %1651 to float + %2062 = fmul float %2060, %2061 + %2063 = fadd float %2062, 0.000000e+00 + %2064 = bitcast i32 %684 to float + %2065 = bitcast i32 %684 to float + %2066 = fmul float %2064, %2065 + %2067 = fadd float %2063, %2066 + %2068 = call float @llvm.sqrt.f32(float %2067) + %2069 = fneg float %693 + %2070 = fmul float %2068, %2069 + %2071 = bitcast i32 %1651 to float + %2072 = fadd float %2071, %2070 + %2073 = bitcast i32 %1651 to float + %2074 = bitcast i32 %1651 to float + %2075 = fmul float %2073, %2074 + %2076 = fadd float %2075, 0.000000e+00 + %2077 = bitcast i32 %684 to float + %2078 = bitcast i32 %684 to float + %2079 = fmul float %2077, %2078 + %2080 = fadd float %2076, %2079 + %2081 = call float @llvm.sqrt.f32(float %2080) + %2082 = fneg float %693 + %2083 = fmul float %2081, %2082 + %2084 = bitcast i32 %1651 to float + %2085 = fadd float %2084, %2083 + %2086 = fmul float %2072, %2085 + %2087 = fadd float %2086, 0.000000e+00 + %2088 = bitcast i32 %1651 to float + %2089 = bitcast i32 %1651 to float + %2090 = fmul float %2088, %2089 + %2091 = fadd float %2090, 0.000000e+00 + %2092 = bitcast i32 %684 to float + %2093 = bitcast i32 %684 to float + %2094 = fmul float %2092, %2093 + %2095 = fadd float %2091, %2094 + %2096 = call float @llvm.sqrt.f32(float %2095) + %2097 = fneg float %693 + %2098 = fmul float %2096, %2097 + %2099 = fmul float %2098, 0.000000e+00 + %2100 = bitcast i32 %684 to float + %2101 = fadd float %2100, %2099 + %2102 = bitcast i32 %1651 to float + %2103 = bitcast i32 %1651 to float + %2104 = fmul float %2102, %2103 + %2105 = fadd float %2104, 0.000000e+00 + %2106 = bitcast i32 %684 to float + %2107 = bitcast i32 %684 to float + %2108 = fmul float %2106, %2107 + %2109 = fadd float %2105, %2108 + %2110 = call float @llvm.sqrt.f32(float %2109) + %2111 = fneg float %693 + %2112 = fmul float %2110, %2111 + %2113 = fmul float %2112, 0.000000e+00 + %2114 = bitcast i32 %684 to float + %2115 = fadd float %2114, %2113 + %2116 = fmul float %2101, %2115 + %2117 = fadd float %2087, %2116 + %2118 = call float @llvm.sqrt.f32(float %2117) + %2119 = fadd float %2118, 0.000000e+00 + %2120 = fdiv float %2059, %2119 + %2121 = fmul float %2045, %2120 + %2122 = fsub float 1.000000e+00, %2121 + %2123 = load float, float* %1154, align 4 + %2124 = fmul float %2122, %2123 + %2125 = fadd float %1969, %2124 + %2126 = insertelement <4 x float> zeroinitializer, float %2125, i32 0 + %2127 = insertelement <4 x float> %2126, float 0.000000e+00, i32 1 + %2128 = insertelement <4 x float> %2127, float 0.000000e+00, i32 2 + %2129 = insertelement <4 x float> %2128, float 0.000000e+00, i32 3 + %2130 = extractelement <4 x float> %2129, i32 0 + store float %2130, float* %1648, align 4 + %2131 = extractelement <4 x float> %2129, i32 1 + %2132 = getelementptr float, float* %2, i32 0 + %2133 = getelementptr inbounds float, float* %2132, i64 3 + store float %2131, float* %2133, align 4 + %2134 = bitcast i32 %1651 to float + %2135 = bitcast i32 %1651 to float + %2136 = fmul float %2134, %2135 + %2137 = fadd float %2136, 0.000000e+00 + %2138 = bitcast i32 %684 to float + %2139 = bitcast i32 %684 to float + %2140 = fmul float %2138, %2139 + %2141 = fadd float %2137, %2140 + %2142 = call float @llvm.sqrt.f32(float %2141) + %2143 = fneg float %693 + %2144 = fmul float %2142, %2143 + %2145 = fmul float %2144, 0.000000e+00 + %2146 = bitcast i32 %684 to float + %2147 = fadd float %2146, %2145 + %2148 = bitcast i32 %1651 to float + %2149 = bitcast i32 %1651 to float + %2150 = fmul float %2148, %2149 + %2151 = fadd float %2150, 0.000000e+00 + %2152 = bitcast i32 %684 to float + %2153 = bitcast i32 %684 to float + %2154 = fmul float %2152, %2153 + %2155 = fadd float %2151, %2154 + %2156 = call float @llvm.sqrt.f32(float %2155) + %2157 = fneg float %693 + %2158 = fmul float %2156, %2157 + %2159 = bitcast i32 %1651 to float + %2160 = fadd float %2159, %2158 + %2161 = bitcast i32 %1651 to float + %2162 = bitcast i32 %1651 to float + %2163 = fmul float %2161, %2162 + %2164 = fadd float %2163, 0.000000e+00 + %2165 = bitcast i32 %684 to float + %2166 = bitcast i32 %684 to float + %2167 = fmul float %2165, %2166 + %2168 = fadd float %2164, %2167 + %2169 = call float @llvm.sqrt.f32(float %2168) + %2170 = fneg float %693 + %2171 = fmul float %2169, %2170 + %2172 = bitcast i32 %1651 to float + %2173 = fadd float %2172, %2171 + %2174 = fmul float %2160, %2173 + %2175 = fadd float %2174, 0.000000e+00 + %2176 = bitcast i32 %1651 to float + %2177 = bitcast i32 %1651 to float + %2178 = fmul float %2176, %2177 + %2179 = fadd float %2178, 0.000000e+00 + %2180 = bitcast i32 %684 to float + %2181 = bitcast i32 %684 to float + %2182 = fmul float %2180, %2181 + %2183 = fadd float %2179, %2182 + %2184 = call float @llvm.sqrt.f32(float %2183) + %2185 = fneg float %693 + %2186 = fmul float %2184, %2185 + %2187 = fmul float %2186, 0.000000e+00 + %2188 = bitcast i32 %684 to float + %2189 = fadd float %2188, %2187 + %2190 = bitcast i32 %1651 to float + %2191 = bitcast i32 %1651 to float + %2192 = fmul float %2190, %2191 + %2193 = fadd float %2192, 0.000000e+00 + %2194 = bitcast i32 %684 to float + %2195 = bitcast i32 %684 to float + %2196 = fmul float %2194, %2195 + %2197 = fadd float %2193, %2196 + %2198 = call float @llvm.sqrt.f32(float %2197) + %2199 = fneg float %693 + %2200 = fmul float %2198, %2199 + %2201 = fmul float %2200, 0.000000e+00 + %2202 = bitcast i32 %684 to float + %2203 = fadd float %2202, %2201 + %2204 = fmul float %2189, %2203 + %2205 = fadd float %2175, %2204 + %2206 = call float @llvm.sqrt.f32(float %2205) + %2207 = fadd float %2206, 0.000000e+00 + %2208 = fdiv float %2147, %2207 + %2209 = fmul float %2208, 2.000000e+00 + %2210 = bitcast i32 %1651 to float + %2211 = bitcast i32 %1651 to float + %2212 = fmul float %2210, %2211 + %2213 = fadd float %2212, 0.000000e+00 + %2214 = bitcast i32 %684 to float + %2215 = bitcast i32 %684 to float + %2216 = fmul float %2214, %2215 + %2217 = fadd float %2213, %2216 + %2218 = call float @llvm.sqrt.f32(float %2217) + %2219 = fneg float %693 + %2220 = fmul float %2218, %2219 + %2221 = bitcast i32 %1651 to float + %2222 = fadd float %2221, %2220 + %2223 = bitcast i32 %1651 to float + %2224 = bitcast i32 %1651 to float + %2225 = fmul float %2223, %2224 + %2226 = fadd float %2225, 0.000000e+00 + %2227 = bitcast i32 %684 to float + %2228 = bitcast i32 %684 to float + %2229 = fmul float %2227, %2228 + %2230 = fadd float %2226, %2229 + %2231 = call float @llvm.sqrt.f32(float %2230) + %2232 = fneg float %693 + %2233 = fmul float %2231, %2232 + %2234 = bitcast i32 %1651 to float + %2235 = fadd float %2234, %2233 + %2236 = bitcast i32 %1651 to float + %2237 = bitcast i32 %1651 to float + %2238 = fmul float %2236, %2237 + %2239 = fadd float %2238, 0.000000e+00 + %2240 = bitcast i32 %684 to float + %2241 = bitcast i32 %684 to float + %2242 = fmul float %2240, %2241 + %2243 = fadd float %2239, %2242 + %2244 = call float @llvm.sqrt.f32(float %2243) + %2245 = fneg float %693 + %2246 = fmul float %2244, %2245 + %2247 = bitcast i32 %1651 to float + %2248 = fadd float %2247, %2246 + %2249 = fmul float %2235, %2248 + %2250 = fadd float %2249, 0.000000e+00 + %2251 = bitcast i32 %1651 to float + %2252 = bitcast i32 %1651 to float + %2253 = fmul float %2251, %2252 + %2254 = fadd float %2253, 0.000000e+00 + %2255 = bitcast i32 %684 to float + %2256 = bitcast i32 %684 to float + %2257 = fmul float %2255, %2256 + %2258 = fadd float %2254, %2257 + %2259 = call float @llvm.sqrt.f32(float %2258) + %2260 = fneg float %693 + %2261 = fmul float %2259, %2260 + %2262 = fmul float %2261, 0.000000e+00 + %2263 = bitcast i32 %684 to float + %2264 = fadd float %2263, %2262 + %2265 = bitcast i32 %1651 to float + %2266 = bitcast i32 %1651 to float + %2267 = fmul float %2265, %2266 + %2268 = fadd float %2267, 0.000000e+00 + %2269 = bitcast i32 %684 to float + %2270 = bitcast i32 %684 to float + %2271 = fmul float %2269, %2270 + %2272 = fadd float %2268, %2271 + %2273 = call float @llvm.sqrt.f32(float %2272) + %2274 = fneg float %693 + %2275 = fmul float %2273, %2274 + %2276 = fmul float %2275, 0.000000e+00 + %2277 = bitcast i32 %684 to float + %2278 = fadd float %2277, %2276 + %2279 = fmul float %2264, %2278 + %2280 = fadd float %2250, %2279 + %2281 = call float @llvm.sqrt.f32(float %2280) + %2282 = fadd float %2281, 0.000000e+00 + %2283 = fdiv float %2222, %2282 + %2284 = fmul float %2209, %2283 + %2285 = fneg float %2284 + %2286 = insertelement <4 x float> zeroinitializer, float %2285, i32 0 + %2287 = insertelement <4 x float> %2286, float 0.000000e+00, i32 1 + %2288 = insertelement <4 x float> %2287, float 0.000000e+00, i32 2 + %2289 = insertelement <4 x float> %2288, float 0.000000e+00, i32 3 + %2290 = load float, float* %1322, align 4 + %2291 = insertelement <4 x float> zeroinitializer, float %2290, i32 0 + %2292 = insertelement <4 x float> %2291, float 0.000000e+00, i32 1 + %2293 = insertelement <4 x float> %2292, float 0.000000e+00, i32 2 + %2294 = insertelement <4 x float> %2293, float 0.000000e+00, i32 3 + %2295 = call <4 x float> @llvm.fma.v4f32(<4 x float> %2289, <4 x float> %2294, <4 x float> zeroinitializer) + %2296 = extractelement <4 x float> %2295, i32 0 + store float %2296, float* %2133, align 4 + %2297 = bitcast i32 %1651 to float + %2298 = bitcast i32 %1651 to float + %2299 = fmul float %2297, %2298 + %2300 = fadd float %2299, 0.000000e+00 + %2301 = bitcast i32 %684 to float + %2302 = bitcast i32 %684 to float + %2303 = fmul float %2301, %2302 + %2304 = fadd float %2300, %2303 + %2305 = call float @llvm.sqrt.f32(float %2304) + %2306 = fneg float %693 + %2307 = fmul float %2305, %2306 + %2308 = fmul float %2307, 0.000000e+00 + %2309 = bitcast i32 %684 to float + %2310 = fadd float %2309, %2308 + %2311 = bitcast i32 %1651 to float + %2312 = bitcast i32 %1651 to float + %2313 = fmul float %2311, %2312 + %2314 = fadd float %2313, 0.000000e+00 + %2315 = bitcast i32 %684 to float + %2316 = bitcast i32 %684 to float + %2317 = fmul float %2315, %2316 + %2318 = fadd float %2314, %2317 + %2319 = call float @llvm.sqrt.f32(float %2318) + %2320 = fneg float %693 + %2321 = fmul float %2319, %2320 + %2322 = bitcast i32 %1651 to float + %2323 = fadd float %2322, %2321 + %2324 = bitcast i32 %1651 to float + %2325 = bitcast i32 %1651 to float + %2326 = fmul float %2324, %2325 + %2327 = fadd float %2326, 0.000000e+00 + %2328 = bitcast i32 %684 to float + %2329 = bitcast i32 %684 to float + %2330 = fmul float %2328, %2329 + %2331 = fadd float %2327, %2330 + %2332 = call float @llvm.sqrt.f32(float %2331) + %2333 = fneg float %693 + %2334 = fmul float %2332, %2333 + %2335 = bitcast i32 %1651 to float + %2336 = fadd float %2335, %2334 + %2337 = fmul float %2323, %2336 + %2338 = fadd float %2337, 0.000000e+00 + %2339 = bitcast i32 %1651 to float + %2340 = bitcast i32 %1651 to float + %2341 = fmul float %2339, %2340 + %2342 = fadd float %2341, 0.000000e+00 + %2343 = bitcast i32 %684 to float + %2344 = bitcast i32 %684 to float + %2345 = fmul float %2343, %2344 + %2346 = fadd float %2342, %2345 + %2347 = call float @llvm.sqrt.f32(float %2346) + %2348 = fneg float %693 + %2349 = fmul float %2347, %2348 + %2350 = fmul float %2349, 0.000000e+00 + %2351 = bitcast i32 %684 to float + %2352 = fadd float %2351, %2350 + %2353 = bitcast i32 %1651 to float + %2354 = bitcast i32 %1651 to float + %2355 = fmul float %2353, %2354 + %2356 = fadd float %2355, 0.000000e+00 + %2357 = bitcast i32 %684 to float + %2358 = bitcast i32 %684 to float + %2359 = fmul float %2357, %2358 + %2360 = fadd float %2356, %2359 + %2361 = call float @llvm.sqrt.f32(float %2360) + %2362 = fneg float %693 + %2363 = fmul float %2361, %2362 + %2364 = fmul float %2363, 0.000000e+00 + %2365 = bitcast i32 %684 to float + %2366 = fadd float %2365, %2364 + %2367 = fmul float %2352, %2366 + %2368 = fadd float %2338, %2367 + %2369 = call float @llvm.sqrt.f32(float %2368) + %2370 = fadd float %2369, 0.000000e+00 + %2371 = fdiv float %2310, %2370 + %2372 = fmul float %2371, 2.000000e+00 + %2373 = bitcast i32 %1651 to float + %2374 = bitcast i32 %1651 to float + %2375 = fmul float %2373, %2374 + %2376 = fadd float %2375, 0.000000e+00 + %2377 = bitcast i32 %684 to float + %2378 = bitcast i32 %684 to float + %2379 = fmul float %2377, %2378 + %2380 = fadd float %2376, %2379 + %2381 = call float @llvm.sqrt.f32(float %2380) + %2382 = fneg float %693 + %2383 = fmul float %2381, %2382 + %2384 = bitcast i32 %1651 to float + %2385 = fadd float %2384, %2383 + %2386 = bitcast i32 %1651 to float + %2387 = bitcast i32 %1651 to float + %2388 = fmul float %2386, %2387 + %2389 = fadd float %2388, 0.000000e+00 + %2390 = bitcast i32 %684 to float + %2391 = bitcast i32 %684 to float + %2392 = fmul float %2390, %2391 + %2393 = fadd float %2389, %2392 + %2394 = call float @llvm.sqrt.f32(float %2393) + %2395 = fneg float %693 + %2396 = fmul float %2394, %2395 + %2397 = bitcast i32 %1651 to float + %2398 = fadd float %2397, %2396 + %2399 = bitcast i32 %1651 to float + %2400 = bitcast i32 %1651 to float + %2401 = fmul float %2399, %2400 + %2402 = fadd float %2401, 0.000000e+00 + %2403 = bitcast i32 %684 to float + %2404 = bitcast i32 %684 to float + %2405 = fmul float %2403, %2404 + %2406 = fadd float %2402, %2405 + %2407 = call float @llvm.sqrt.f32(float %2406) + %2408 = fneg float %693 + %2409 = fmul float %2407, %2408 + %2410 = bitcast i32 %1651 to float + %2411 = fadd float %2410, %2409 + %2412 = fmul float %2398, %2411 + %2413 = fadd float %2412, 0.000000e+00 + %2414 = bitcast i32 %1651 to float + %2415 = bitcast i32 %1651 to float + %2416 = fmul float %2414, %2415 + %2417 = fadd float %2416, 0.000000e+00 + %2418 = bitcast i32 %684 to float + %2419 = bitcast i32 %684 to float + %2420 = fmul float %2418, %2419 + %2421 = fadd float %2417, %2420 + %2422 = call float @llvm.sqrt.f32(float %2421) + %2423 = fneg float %693 + %2424 = fmul float %2422, %2423 + %2425 = fmul float %2424, 0.000000e+00 + %2426 = bitcast i32 %684 to float + %2427 = fadd float %2426, %2425 + %2428 = bitcast i32 %1651 to float + %2429 = bitcast i32 %1651 to float + %2430 = fmul float %2428, %2429 + %2431 = fadd float %2430, 0.000000e+00 + %2432 = bitcast i32 %684 to float + %2433 = bitcast i32 %684 to float + %2434 = fmul float %2432, %2433 + %2435 = fadd float %2431, %2434 + %2436 = call float @llvm.sqrt.f32(float %2435) + %2437 = fneg float %693 + %2438 = fmul float %2436, %2437 + %2439 = fmul float %2438, 0.000000e+00 + %2440 = bitcast i32 %684 to float + %2441 = fadd float %2440, %2439 + %2442 = fmul float %2427, %2441 + %2443 = fadd float %2413, %2442 + %2444 = call float @llvm.sqrt.f32(float %2443) + %2445 = fadd float %2444, 0.000000e+00 + %2446 = fdiv float %2385, %2445 + %2447 = fmul float %2372, %2446 + %2448 = fneg float %2447 + %2449 = fmul float %2448, %2290 + %2450 = fadd float %2449, 0.000000e+00 + %2451 = bitcast i32 %1651 to float + %2452 = bitcast i32 %1651 to float + %2453 = fmul float %2451, %2452 + %2454 = fadd float %2453, 0.000000e+00 + %2455 = bitcast i32 %684 to float + %2456 = bitcast i32 %684 to float + %2457 = fmul float %2455, %2456 + %2458 = fadd float %2454, %2457 + %2459 = call float @llvm.sqrt.f32(float %2458) + %2460 = fneg float %693 + %2461 = fmul float %2459, %2460 + %2462 = fmul float %2461, 0.000000e+00 + %2463 = bitcast i32 %684 to float + %2464 = fadd float %2463, %2462 + %2465 = bitcast i32 %1651 to float + %2466 = bitcast i32 %1651 to float + %2467 = fmul float %2465, %2466 + %2468 = fadd float %2467, 0.000000e+00 + %2469 = bitcast i32 %684 to float + %2470 = bitcast i32 %684 to float + %2471 = fmul float %2469, %2470 + %2472 = fadd float %2468, %2471 + %2473 = call float @llvm.sqrt.f32(float %2472) + %2474 = fneg float %693 + %2475 = fmul float %2473, %2474 + %2476 = bitcast i32 %1651 to float + %2477 = fadd float %2476, %2475 + %2478 = bitcast i32 %1651 to float + %2479 = bitcast i32 %1651 to float + %2480 = fmul float %2478, %2479 + %2481 = fadd float %2480, 0.000000e+00 + %2482 = bitcast i32 %684 to float + %2483 = bitcast i32 %684 to float + %2484 = fmul float %2482, %2483 + %2485 = fadd float %2481, %2484 + %2486 = call float @llvm.sqrt.f32(float %2485) + %2487 = fneg float %693 + %2488 = fmul float %2486, %2487 + %2489 = bitcast i32 %1651 to float + %2490 = fadd float %2489, %2488 + %2491 = fmul float %2477, %2490 + %2492 = fadd float %2491, 0.000000e+00 + %2493 = bitcast i32 %1651 to float + %2494 = bitcast i32 %1651 to float + %2495 = fmul float %2493, %2494 + %2496 = fadd float %2495, 0.000000e+00 + %2497 = bitcast i32 %684 to float + %2498 = bitcast i32 %684 to float + %2499 = fmul float %2497, %2498 + %2500 = fadd float %2496, %2499 + %2501 = call float @llvm.sqrt.f32(float %2500) + %2502 = fneg float %693 + %2503 = fmul float %2501, %2502 + %2504 = fmul float %2503, 0.000000e+00 + %2505 = bitcast i32 %684 to float + %2506 = fadd float %2505, %2504 + %2507 = bitcast i32 %1651 to float + %2508 = bitcast i32 %1651 to float + %2509 = fmul float %2507, %2508 + %2510 = fadd float %2509, 0.000000e+00 + %2511 = bitcast i32 %684 to float + %2512 = bitcast i32 %684 to float + %2513 = fmul float %2511, %2512 + %2514 = fadd float %2510, %2513 + %2515 = call float @llvm.sqrt.f32(float %2514) + %2516 = fneg float %693 + %2517 = fmul float %2515, %2516 + %2518 = fmul float %2517, 0.000000e+00 + %2519 = bitcast i32 %684 to float + %2520 = fadd float %2519, %2518 + %2521 = fmul float %2506, %2520 + %2522 = fadd float %2492, %2521 + %2523 = call float @llvm.sqrt.f32(float %2522) + %2524 = fadd float %2523, 0.000000e+00 + %2525 = fdiv float %2464, %2524 + %2526 = fmul float %2525, 2.000000e+00 + %2527 = bitcast i32 %1651 to float + %2528 = bitcast i32 %1651 to float + %2529 = fmul float %2527, %2528 + %2530 = fadd float %2529, 0.000000e+00 + %2531 = bitcast i32 %684 to float + %2532 = bitcast i32 %684 to float + %2533 = fmul float %2531, %2532 + %2534 = fadd float %2530, %2533 + %2535 = call float @llvm.sqrt.f32(float %2534) + %2536 = fneg float %693 + %2537 = fmul float %2535, %2536 + %2538 = fmul float %2537, 0.000000e+00 + %2539 = bitcast i32 %684 to float + %2540 = fadd float %2539, %2538 + %2541 = bitcast i32 %1651 to float + %2542 = bitcast i32 %1651 to float + %2543 = fmul float %2541, %2542 + %2544 = fadd float %2543, 0.000000e+00 + %2545 = bitcast i32 %684 to float + %2546 = bitcast i32 %684 to float + %2547 = fmul float %2545, %2546 + %2548 = fadd float %2544, %2547 + %2549 = call float @llvm.sqrt.f32(float %2548) + %2550 = fneg float %693 + %2551 = fmul float %2549, %2550 + %2552 = bitcast i32 %1651 to float + %2553 = fadd float %2552, %2551 + %2554 = bitcast i32 %1651 to float + %2555 = bitcast i32 %1651 to float + %2556 = fmul float %2554, %2555 + %2557 = fadd float %2556, 0.000000e+00 + %2558 = bitcast i32 %684 to float + %2559 = bitcast i32 %684 to float + %2560 = fmul float %2558, %2559 + %2561 = fadd float %2557, %2560 + %2562 = call float @llvm.sqrt.f32(float %2561) + %2563 = fneg float %693 + %2564 = fmul float %2562, %2563 + %2565 = bitcast i32 %1651 to float + %2566 = fadd float %2565, %2564 + %2567 = fmul float %2553, %2566 + %2568 = fadd float %2567, 0.000000e+00 + %2569 = bitcast i32 %1651 to float + %2570 = bitcast i32 %1651 to float + %2571 = fmul float %2569, %2570 + %2572 = fadd float %2571, 0.000000e+00 + %2573 = bitcast i32 %684 to float + %2574 = bitcast i32 %684 to float + %2575 = fmul float %2573, %2574 + %2576 = fadd float %2572, %2575 + %2577 = call float @llvm.sqrt.f32(float %2576) + %2578 = fneg float %693 + %2579 = fmul float %2577, %2578 + %2580 = fmul float %2579, 0.000000e+00 + %2581 = bitcast i32 %684 to float + %2582 = fadd float %2581, %2580 + %2583 = bitcast i32 %1651 to float + %2584 = bitcast i32 %1651 to float + %2585 = fmul float %2583, %2584 + %2586 = fadd float %2585, 0.000000e+00 + %2587 = bitcast i32 %684 to float + %2588 = bitcast i32 %684 to float + %2589 = fmul float %2587, %2588 + %2590 = fadd float %2586, %2589 + %2591 = call float @llvm.sqrt.f32(float %2590) + %2592 = fneg float %693 + %2593 = fmul float %2591, %2592 + %2594 = fmul float %2593, 0.000000e+00 + %2595 = bitcast i32 %684 to float + %2596 = fadd float %2595, %2594 + %2597 = fmul float %2582, %2596 + %2598 = fadd float %2568, %2597 + %2599 = call float @llvm.sqrt.f32(float %2598) + %2600 = fadd float %2599, 0.000000e+00 + %2601 = fdiv float %2540, %2600 + %2602 = fmul float %2526, %2601 + %2603 = fsub float 1.000000e+00, %2602 + %2604 = load float, float* %1637, align 4 + %2605 = fmul float %2603, %2604 + %2606 = fadd float %2450, %2605 + %2607 = insertelement <4 x float> zeroinitializer, float %2606, i32 0 + %2608 = insertelement <4 x float> %2607, float 0.000000e+00, i32 1 + %2609 = insertelement <4 x float> %2608, float 0.000000e+00, i32 2 + %2610 = insertelement <4 x float> %2609, float 0.000000e+00, i32 3 + %2611 = extractelement <4 x float> %2610, i32 0 + store float %2611, float* %2133, align 4 + %2612 = getelementptr float, float* %1, i32 0 + %2613 = getelementptr inbounds float, float* %2612, i64 2 + %2614 = bitcast float* %2613 to i32* + %2615 = load i32, i32* %2614, align 4 + %2616 = bitcast i32 %2615 to float + %2617 = insertelement <4 x float> zeroinitializer, float %2616, i32 0 + %2618 = getelementptr float, float* %1, i32 0 + %2619 = getelementptr inbounds float, float* %2618, i64 1 + %2620 = bitcast float* %2619 to i32* + %2621 = load i32, i32* %2620, align 4 + %2622 = bitcast i32 %2621 to float + %2623 = insertelement <4 x float> %2617, float %2622, i32 1 + %2624 = insertelement <4 x float> %2623, float 0.000000e+00, i32 2 + %2625 = insertelement <4 x float> %2624, float 0.000000e+00, i32 3 + %2626 = extractelement <4 x float> %2625, i32 0 + %2627 = bitcast i32* %2620 to float* + store float %2626, float* %2627, align 4 + %2628 = extractelement <4 x float> %2625, i32 1 + %2629 = bitcast i32* %2614 to float* + store float %2628, float* %2629, align 4 + ret void +} + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { +.preheader13: + %3 = bitcast float* %2 to i8* + %4 = bitcast float* %0 to i8* + %5 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false, i1 true, i1 false) + %6 = call i8* @__memcpy_chk(i8* %3, i8* %4, i64 16, i64 %5) #8 + %7 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 + %8 = bitcast i8* %7 to float* + store float 1.000000e+00, float* %8, align 4 + %9 = getelementptr inbounds i8, i8* %7, i64 8 + %10 = getelementptr inbounds i8, i8* %7, i64 12 + %11 = bitcast i8* %10 to float* + store float 1.000000e+00, float* %11, align 4 + %12 = bitcast float* %1 to i8* + %13 = call i64 @llvm.objectsize.i64.p0i8(i8* %12, i1 false, i1 true, i1 false) + %14 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %15 = bitcast i8* %14 to float* + %16 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %17 = bitcast i8* %16 to float* + %18 = bitcast float* %2 to i32* + %19 = load i32, i32* %18, align 4 + %20 = bitcast i8* %14 to i32* + store i32 %19, i32* %20, align 4 + %21 = bitcast i8* %7 to i32* + %22 = load i32, i32* %21, align 4 + %23 = bitcast i8* %16 to i32* + store i32 %22, i32* %23, align 4 + %24 = getelementptr inbounds float, float* %2, i64 2 + %25 = bitcast float* %24 to i32* + %26 = load i32, i32* %25, align 4 + %27 = getelementptr inbounds i8, i8* %14, i64 4 + %28 = bitcast i8* %27 to i32* + store i32 %26, i32* %28, align 4 + %29 = bitcast i8* %9 to i32* + %30 = load i32, i32* %29, align 4 + %31 = getelementptr inbounds i8, i8* %16, i64 4 + %32 = bitcast i8* %31 to i32* + store i32 %30, i32* %32, align 4 + %33 = load float, float* %15, align 4 + %34 = call float @no_opt_sgn(float %33) + %35 = fneg float %34 + %36 = call float @no_opt_naive_norm(float* nonnull %15, i32 2) + %37 = fmul float %36, %35 + %38 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %39 = bitcast i8* %38 to float* + %40 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %41 = load float, float* %15, align 4 + %42 = load float, float* %17, align 4 + %43 = fmul float %37, %42 + %44 = fadd float %41, %43 + store float %44, float* %39, align 4 + %45 = bitcast i8* %27 to float* + %46 = load float, float* %45, align 4 + %47 = bitcast i8* %31 to float* + %48 = load float, float* %47, align 4 + %49 = fmul float %37, %48 + %50 = fadd float %46, %49 + %51 = getelementptr inbounds i8, i8* %38, i64 4 + %52 = bitcast i8* %51 to float* + store float %50, float* %52, align 4 + %53 = bitcast i8* %40 to float* + %54 = call float @no_opt_naive_norm(float* nonnull %39, i32 2) + %55 = fadd float %54, 0x3EE4F8B580000000 + %56 = load float, float* %39, align 4 + %57 = fdiv float %56, %55 + store float %57, float* %53, align 4 + %58 = load float, float* %52, align 4 + %59 = fdiv float %58, %55 + %60 = getelementptr inbounds i8, i8* %40, i64 4 + %61 = bitcast i8* %60 to float* + store float %59, float* %61, align 4 + %62 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 + %63 = bitcast i8* %62 to float* + %64 = load float, float* %53, align 4 + %65 = fmul float %64, 2.000000e+00 + %66 = fmul float %65, %64 + %67 = fsub float 1.000000e+00, %66 + store float %67, float* %63, align 4 + %68 = load float, float* %53, align 4 + %69 = fmul float %68, 2.000000e+00 + %70 = load float, float* %61, align 4 + %71 = fmul float %69, %70 + %72 = fsub float 0.000000e+00, %71 + %73 = getelementptr inbounds i8, i8* %62, i64 4 + %74 = bitcast i8* %73 to float* + store float %72, float* %74, align 4 + %75 = load float, float* %61, align 4 + %76 = fmul float %75, 2.000000e+00 + %77 = load float, float* %53, align 4 + %78 = fmul float %76, %77 + %79 = fsub float 0.000000e+00, %78 + %80 = getelementptr inbounds i8, i8* %62, i64 8 + %81 = bitcast i8* %80 to float* + store float %79, float* %81, align 4 + %82 = load float, float* %61, align 4 + %83 = fmul float %82, 2.000000e+00 + %84 = fmul float %83, %82 + %85 = fsub float 1.000000e+00, %84 + %86 = getelementptr inbounds i8, i8* %62, i64 12 + %87 = bitcast i8* %86 to float* + store float %85, float* %87, align 4 + %88 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 + %89 = bitcast i8* %88 to float* + %90 = bitcast i8* %62 to i32* + %91 = load i32, i32* %90, align 4 + %92 = bitcast i8* %88 to i32* + store i32 %91, i32* %92, align 4 + %93 = bitcast i8* %73 to i32* + %94 = load i32, i32* %93, align 4 + %95 = getelementptr inbounds i8, i8* %88, i64 4 + %96 = bitcast i8* %95 to i32* + store i32 %94, i32* %96, align 4 + %97 = bitcast i8* %80 to i32* + %98 = load i32, i32* %97, align 4 + %99 = getelementptr inbounds i8, i8* %88, i64 8 + %100 = bitcast i8* %99 to i32* + store i32 %98, i32* %100, align 4 + %101 = bitcast i8* %86 to i32* + %102 = load i32, i32* %101, align 4 + %103 = getelementptr inbounds i8, i8* %88, i64 12 + %104 = bitcast i8* %103 to i32* + store i32 %102, i32* %104, align 4 + %105 = call i8* @__memcpy_chk(i8* %12, i8* %88, i64 16, i64 %13) #8 + call void @no_opt_naive_fixed_matrix_multiply(float* %89, float* %0, float* %2) + call void @free(i8* %14) + call void @free(i8* %16) + call void @free(i8* %38) + call void @free(i8* %40) + call void @free(i8* %62) + call void @free(i8* %88) + call void @no_opt_naive_fixed_transpose(float* %1) + ret void +} + +; Function Attrs: nounwind +declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 + +; Function Attrs: nounwind readnone speculatable willreturn +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 + +; Function Attrs: allocsize(0,1) +declare i8* @calloc(i64, i64) #5 + +declare void @free(i8*) #6 + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #1 { +.preheader6: + %0 = alloca i64, align 8 + %1 = alloca [4 x float], align 16 + %2 = alloca [4 x float], align 16 + %3 = alloca [4 x float], align 16 + %4 = alloca [4 x float], align 16 + %5 = alloca [4 x float], align 16 + %6 = call i64 @time(i64* null) #8 + store i64 %6, i64* %0, align 8 + %7 = call i64 @time(i64* nonnull %0) #8 + %8 = trunc i64 %7 to i32 + call void @srand(i32 %8) #8 + %9 = call i32 @rand() #8 + %10 = sitofp i32 %9 to float + %11 = fdiv float %10, 0x41747AE140000000 + %12 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 + store float %11, float* %12, align 16 + %13 = fpext float %11 to double + %14 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %13) #8 + %15 = call i32 @rand() #8 + %16 = sitofp i32 %15 to float + %17 = fdiv float %16, 0x41747AE140000000 + %18 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1 + store float %17, float* %18, align 4 + %19 = fpext float %17 to double + %20 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %19) #8 + %21 = call i32 @rand() #8 + %22 = sitofp i32 %21 to float + %23 = fdiv float %22, 0x41747AE140000000 + %24 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2 + store float %23, float* %24, align 8 + %25 = fpext float %23 to double + %26 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %25) #8 + %27 = call i32 @rand() #8 + %28 = sitofp i32 %27 to float + %29 = fdiv float %28, 0x41747AE140000000 + %30 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3 + store float %29, float* %30, align 4 + %31 = fpext float %29 to double + %32 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %31) #8 + %33 = bitcast [4 x float]* %2 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %33, i8 0, i64 16, i1 false) + %34 = bitcast [4 x float]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %34, i8 0, i64 16, i1 false) + %35 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 + %36 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 + call void @naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %35, float* nonnull %36) + %37 = bitcast [4 x float]* %4 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %37, i8 0, i64 16, i1 false) + %38 = bitcast [4 x float]* %5 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %38, i8 0, i64 16, i1 false) + %39 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 + %40 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 + call void @no_opt_naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %39, float* nonnull %40) + %41 = load float, float* %35, align 16 + %42 = fpext float %41 to double + %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %42) #8 + %44 = load float, float* %39, align 16 + %45 = fpext float %44 to double + %46 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %45) #8 + %47 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1 + %48 = load float, float* %47, align 4 + %49 = fpext float %48 to double + %50 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %49) #8 + %51 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 + %52 = load float, float* %51, align 4 + %53 = fpext float %52 to double + %54 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %53) #8 + %55 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2 + %56 = load float, float* %55, align 8 + %57 = fpext float %56 to double + %58 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %57) #8 + %59 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 + %60 = load float, float* %59, align 8 + %61 = fpext float %60 to double + %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %61) #8 + %63 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3 + %64 = load float, float* %63, align 4 + %65 = fpext float %64 to double + %66 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %65) #8 + %67 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 + %68 = load float, float* %67, align 4 + %69 = fpext float %68 to double + %70 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %69) #8 + %71 = load float, float* %36, align 16 + %72 = fpext float %71 to double + %73 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %72) #8 + %74 = load float, float* %40, align 16 + %75 = fpext float %74 to double + %76 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %75) #8 + %77 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1 + %78 = load float, float* %77, align 4 + %79 = fpext float %78 to double + %80 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %79) #8 + %81 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 1 + %82 = load float, float* %81, align 4 + %83 = fpext float %82 to double + %84 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %83) #8 + %85 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2 + %86 = load float, float* %85, align 8 + %87 = fpext float %86 to double + %88 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %87) #8 + %89 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 2 + %90 = load float, float* %89, align 8 + %91 = fpext float %90 to double + %92 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %91) #8 + %93 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3 + %94 = load float, float* %93, align 4 + %95 = fpext float %94 to double + %96 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %95) #8 + %97 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 3 + %98 = load float, float* %97, align 4 + %99 = fpext float %98 to double + %100 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %99) #8 + ret i32 0 +} + +declare i64 @time(i64*) #6 + +declare void @srand(i32) #6 + +declare i32 @rand() #6 + +declare i32 @printf(i8*, ...) #6 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #7 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #2 + +attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone speculatable willreturn } +attributes #3 = { argmemonly nounwind willreturn writeonly } +attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #7 = { argmemonly nounwind willreturn } +attributes #8 = { nounwind } +attributes #9 = { nounwind allocsize(0,1) } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} +!3 = distinct !{!3, !4} +!4 = !{!"llvm.loop.unroll.disable"} +!5 = distinct !{!5, !4} diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index b7f7bc53..884fc280 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -592,6 +592,8 @@ Instruction *dfs_instructions(Instruction *current_instr, } } LLVMPair new_pair; + assert(isa(current_instr) || isa(current_instr)); + assert(isa(cloned_instr) || isa(cloned_instr)); new_pair.original_value = wrap(current_instr); new_pair.new_value = wrap(cloned_instr); translated_exprs.push_back(new_pair); @@ -610,6 +612,27 @@ Instruction *dfs_instructions(Instruction *current_instr, cloned_instr->setOperand(i, cloned_arg); } } + + if (isa(current_instr)) { + bool load_in_map = false; + for (LLVMPair pair : translated_exprs) { + Instruction *original_val = + dyn_cast(unwrap(pair.original_value)); + if (current_instr == original_val) { + load_in_map = true; + } + } + if (!load_in_map) { + LLVMPair new_pair; + assert(isa(current_instr) || + isa(current_instr)); + assert(isa(cloned_instr) || + isa(cloned_instr)); + new_pair.original_value = wrap(current_instr); + new_pair.new_value = wrap(cloned_instr); + translated_exprs.push_back(new_pair); + } + } BasicBlock::InstListType &intermediate_instrs = B->getInstList(); intermediate_instrs.push_back(cloned_instr); return cloned_instr; @@ -648,6 +671,16 @@ bool is_memmove_variety(CallInst *inst) { return false; } +bool call_is_not_sqrt(CallInst *inst) { + Function *function = inst->getCalledFunction(); + if (function != NULL) { + return !(function->getName() == SQRT32_FUNCTION_NAME || + function->getName() == SQRT64_FUNCTION_NAME); + } + return true; // just assume it is not a sqrt. This means no optimization + // will be done +} + /** * Below is the main DiospyrosPass that activates the Rust lib.rs code, * which calls the Egg vectorizer and rewrites the optimized code in place. @@ -802,6 +835,20 @@ struct DiospyrosPass : public FunctionPass { inner_vector = {}; store_locations.clear(); } + // else if (call_is_not_sqrt(call_inst)) { + // // All Calls that are not to sqrt functions + // // are not optimized. + // errs() << "There was a call!\n"; + // errs() << *call_inst << "\n"; + // if (!inner_vector.empty()) { + // vectorization_accumulator.push_back(inner_vector); + // } + // Instruction *call = dyn_cast(call_inst); + // inner_vector = {wrap(call)}; + // vectorization_accumulator.push_back(inner_vector); + // inner_vector = {}; + // store_locations.clear(); + // } } else if (auto *op = dyn_cast(&I)) { Value *load_loc = op->getOperand(0); if (!inner_vector.empty()) { @@ -859,7 +906,8 @@ struct DiospyrosPass : public FunctionPass { dyn_cast(last_store))) || (isa(last_store) && is_memmove_variety( - dyn_cast(last_store)))); + dyn_cast(last_store))) || + (isa(last_store))); dfs_instructions(store_instr, translated_exprs, &B); } diff --git a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c b/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c index e152c277..2678fc1c 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c +++ b/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c @@ -7,7 +7,7 @@ #include #include -#define SIZE 3 +#define SIZE 2 #define MAX_FLOAT 100.00f #define DELTA 0.1f @@ -284,6 +284,7 @@ int main(void) { float A[SIZE * SIZE] = {0.0f}; for (int i = 0; i < SIZE * SIZE; i++) { A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + printf("%f\n", A[i]); } float Q[SIZE * SIZE] = {0.0f}; @@ -297,14 +298,14 @@ int main(void) { for (int j = 0; j < SIZE; j++) { printf("Q Output: %f\n", Q[i * SIZE + j]); printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); - assert(fabs(expectedQ[i] - Q[i]) < DELTA); + // assert(fabs(expectedQ[i] - Q[i]) < DELTA); } } for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { printf("R Output: %f\n", R[i * SIZE + j]); printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); - assert(fabs(expectedR[i] - R[i]) < DELTA); + // assert(fabs(expectedR[i] - R[i]) < DELTA); } } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/failed-test/aa.ll b/src/dios-egraphs/Diospyros/failed-test/aa.ll new file mode 100644 index 00000000..5be2aa79 --- /dev/null +++ b/src/dios-egraphs/Diospyros/failed-test/aa.ll @@ -0,0 +1,750 @@ +; ModuleID = 'build/opt.ll' +source_filename = "fail-tests/qr-decomp-local-arrays.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1 +@.str.1 = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 +@.str.2 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 +@.str.3 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 +@.str.4 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @sgn(float %0) #0 { + %2 = fcmp ogt float %0, 0.000000e+00 + %3 = zext i1 %2 to i32 + %4 = fcmp olt float %0, 0.000000e+00 + %.neg = sext i1 %4 to i32 + %5 = add nsw i32 %.neg, %3 + %6 = sitofp i32 %5 to float + ret float %6 +} + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_sgn(float %0) #1 { + %2 = fcmp ogt float %0, 0.000000e+00 + %3 = zext i1 %2 to i32 + %4 = fcmp olt float %0, 0.000000e+00 + %.neg = sext i1 %4 to i32 + %5 = add nsw i32 %.neg, %3 + %6 = sitofp i32 %5 to float + ret float %6 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @naive_norm(float* %0, i32 %1) #0 { + %3 = icmp sgt i32 %1, 0 + %smax = select i1 %3, i32 %1, i32 0 + %wide.trip.count = zext i32 %smax to i64 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %2 + %4 = add nsw i64 %wide.trip.count, -1 + %xtraiter = and i64 %wide.trip.count, 3 + %5 = icmp ult i64 %4, 3 + br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count, 2147483644 + br label %6 + +6: ; preds = %6, %.lr.ph.new + %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] + %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 + %8 = load float, float* %7, align 4 + %9 = fmul float %8, %8 + %10 = fadd float %.013, %9 + %indvars.iv.next = or i64 %indvars.iv2, 1 + %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next + %12 = load float, float* %11, align 4 + %13 = fmul float %12, %12 + %14 = fadd float %10, %13 + %indvars.iv.next.1 = or i64 %indvars.iv2, 2 + %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 + %16 = load float, float* %15, align 4 + %17 = fmul float %16, %16 + %18 = fadd float %14, %17 + %indvars.iv.next.2 = or i64 %indvars.iv2, 3 + %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 + %20 = load float, float* %19, align 4 + %21 = fmul float %20, %20 + %22 = fadd float %18, %21 + %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 + %niter.nsub.3 = add i64 %niter, -4 + %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 + br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 + +._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] + %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] + %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] + %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil + %24 = load float, float* %23, align 4 + %25 = fmul float %24, %24 + %26 = fadd float %.013.epil, %25 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !3 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 + %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] + %27 = call float @llvm.sqrt.f32(float %.01.lcssa) + ret float %27 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32(float) #2 + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_naive_norm(float* %0, i32 %1) #1 { + %3 = icmp sgt i32 %1, 0 + %smax = select i1 %3, i32 %1, i32 0 + %wide.trip.count = zext i32 %smax to i64 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %2 + %4 = add nsw i64 %wide.trip.count, -1 + %xtraiter = and i64 %wide.trip.count, 3 + %5 = icmp ult i64 %4, 3 + br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count, 2147483644 + br label %6 + +6: ; preds = %6, %.lr.ph.new + %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] + %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 + %8 = load float, float* %7, align 4 + %9 = fmul float %8, %8 + %10 = fadd float %.013, %9 + %indvars.iv.next = or i64 %indvars.iv2, 1 + %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next + %12 = load float, float* %11, align 4 + %13 = fmul float %12, %12 + %14 = fadd float %10, %13 + %indvars.iv.next.1 = or i64 %indvars.iv2, 2 + %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 + %16 = load float, float* %15, align 4 + %17 = fmul float %16, %16 + %18 = fadd float %14, %17 + %indvars.iv.next.2 = or i64 %indvars.iv2, 3 + %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 + %20 = load float, float* %19, align 4 + %21 = fmul float %20, %20 + %22 = fadd float %18, %21 + %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 + %niter.nsub.3 = add i64 %niter, -4 + %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 + br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 + +._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] + %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] + %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] + %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil + %24 = load float, float* %23, align 4 + %25 = fmul float %24, %24 + %26 = fadd float %.013.epil, %25 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 + %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] + %27 = call float @llvm.sqrt.f32(float %.01.lcssa) + ret float %27 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_transpose(float* %0) #0 { +.lr.ph: + %1 = getelementptr inbounds float, float* %0, i64 1 + %2 = bitcast float* %1 to i32* + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds float, float* %0, i64 2 + %5 = bitcast float* %4 to i32* + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %2, align 4 + store i32 %3, i32* %5, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_transpose(float* %0) #1 { +.lr.ph: + %1 = getelementptr inbounds float, float* %0, i64 1 + %2 = bitcast float* %1 to i32* + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds float, float* %0, i64 2 + %5 = bitcast float* %4 to i32* + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %2, align 4 + store i32 %3, i32* %5, align 4 + ret void +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { +.preheader: + store float 0.000000e+00, float* %2, align 4 + %3 = load float, float* %0, align 4 + %4 = load float, float* %1, align 4 + %5 = fmul float %3, %4 + %6 = fadd float %5, 0.000000e+00 + store float %6, float* %2, align 4 + %7 = getelementptr inbounds float, float* %0, i64 1 + %8 = load float, float* %7, align 4 + %9 = getelementptr inbounds float, float* %1, i64 2 + %10 = load float, float* %9, align 4 + %11 = fmul float %8, %10 + %12 = fadd float %6, %11 + store float %12, float* %2, align 4 + %13 = getelementptr inbounds float, float* %2, i64 1 + store float 0.000000e+00, float* %13, align 4 + %14 = load float, float* %0, align 4 + %15 = getelementptr inbounds float, float* %1, i64 1 + %16 = load float, float* %15, align 4 + %17 = fmul float %14, %16 + %18 = fadd float %17, 0.000000e+00 + store float %18, float* %13, align 4 + %19 = load float, float* %7, align 4 + %20 = getelementptr inbounds float, float* %1, i64 3 + %21 = load float, float* %20, align 4 + %22 = fmul float %19, %21 + %23 = fadd float %18, %22 + store float %23, float* %13, align 4 + %24 = getelementptr inbounds float, float* %0, i64 2 + %25 = getelementptr inbounds float, float* %2, i64 2 + store float 0.000000e+00, float* %25, align 4 + %26 = load float, float* %24, align 4 + %27 = load float, float* %1, align 4 + %28 = fmul float %26, %27 + %29 = fadd float %28, 0.000000e+00 + store float %29, float* %25, align 4 + %30 = getelementptr inbounds float, float* %0, i64 3 + %31 = load float, float* %30, align 4 + %32 = load float, float* %9, align 4 + %33 = fmul float %31, %32 + %34 = fadd float %29, %33 + store float %34, float* %25, align 4 + %35 = getelementptr inbounds float, float* %2, i64 3 + store float 0.000000e+00, float* %35, align 4 + %36 = load float, float* %24, align 4 + %37 = load float, float* %15, align 4 + %38 = fmul float %36, %37 + %39 = fadd float %38, 0.000000e+00 + store float %39, float* %35, align 4 + %40 = load float, float* %30, align 4 + %41 = load float, float* %20, align 4 + %42 = fmul float %40, %41 + %43 = fadd float %39, %42 + store float %43, float* %35, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { +.preheader: + store float 0.000000e+00, float* %2, align 4 + %3 = load float, float* %0, align 4 + %4 = load float, float* %1, align 4 + %5 = fmul float %3, %4 + %6 = fadd float %5, 0.000000e+00 + store float %6, float* %2, align 4 + %7 = getelementptr inbounds float, float* %0, i64 1 + %8 = load float, float* %7, align 4 + %9 = getelementptr inbounds float, float* %1, i64 2 + %10 = load float, float* %9, align 4 + %11 = fmul float %8, %10 + %12 = fadd float %6, %11 + store float %12, float* %2, align 4 + %13 = getelementptr inbounds float, float* %2, i64 1 + store float 0.000000e+00, float* %13, align 4 + %14 = load float, float* %0, align 4 + %15 = getelementptr inbounds float, float* %1, i64 1 + %16 = load float, float* %15, align 4 + %17 = fmul float %14, %16 + %18 = fadd float %17, 0.000000e+00 + store float %18, float* %13, align 4 + %19 = load float, float* %7, align 4 + %20 = getelementptr inbounds float, float* %1, i64 3 + %21 = load float, float* %20, align 4 + %22 = fmul float %19, %21 + %23 = fadd float %18, %22 + store float %23, float* %13, align 4 + %24 = getelementptr inbounds float, float* %0, i64 2 + %25 = getelementptr inbounds float, float* %2, i64 2 + store float 0.000000e+00, float* %25, align 4 + %26 = load float, float* %24, align 4 + %27 = load float, float* %1, align 4 + %28 = fmul float %26, %27 + %29 = fadd float %28, 0.000000e+00 + store float %29, float* %25, align 4 + %30 = getelementptr inbounds float, float* %0, i64 3 + %31 = load float, float* %30, align 4 + %32 = load float, float* %9, align 4 + %33 = fmul float %31, %32 + %34 = fadd float %29, %33 + store float %34, float* %25, align 4 + %35 = getelementptr inbounds float, float* %2, i64 3 + store float 0.000000e+00, float* %35, align 4 + %36 = load float, float* %24, align 4 + %37 = load float, float* %15, align 4 + %38 = fmul float %36, %37 + %39 = fadd float %38, 0.000000e+00 + store float %39, float* %35, align 4 + %40 = load float, float* %30, align 4 + %41 = load float, float* %20, align 4 + %42 = fmul float %40, %41 + %43 = fadd float %39, %42 + store float %43, float* %35, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { +.preheader49: + %3 = bitcast float* %1 to i8* + %4 = alloca [4 x float], align 16 + %5 = bitcast [4 x float]* %4 to i8* + %6 = bitcast float* %0 to i32* + %7 = load i32, i32* %6, align 4 + %8 = bitcast float* %2 to i32* + store i32 %7, i32* %8, align 4 + %9 = getelementptr inbounds float, float* %0, i64 1 + %10 = bitcast float* %9 to i32* + %11 = load i32, i32* %10, align 4 + %12 = getelementptr inbounds float, float* %2, i64 1 + %13 = bitcast float* %12 to i32* + store i32 %11, i32* %13, align 4 + %14 = getelementptr inbounds float, float* %0, i64 2 + %15 = bitcast float* %14 to i32* + %16 = load i32, i32* %15, align 4 + %17 = getelementptr inbounds float, float* %2, i64 2 + %18 = bitcast float* %17 to i32* + store i32 %16, i32* %18, align 4 + %19 = getelementptr inbounds float, float* %0, i64 3 + %20 = bitcast float* %19 to i32* + %21 = load i32, i32* %20, align 4 + %22 = getelementptr inbounds float, float* %2, i64 3 + %23 = bitcast float* %22 to i32* + store i32 %21, i32* %23, align 4 + %24 = bitcast i32 %7 to float + %25 = fcmp ogt float %24, 0.000000e+00 + %26 = zext i1 %25 to i32 + %27 = fcmp olt float %24, 0.000000e+00 + %.neg = sext i1 %27 to i32 + %28 = add nsw i32 %.neg, %26 + %29 = sitofp i32 %28 to float + %30 = fmul float %24, %24 + %31 = fadd float %30, 0.000000e+00 + %32 = bitcast i32 %16 to float + %33 = fmul float %32, %32 + %34 = fadd float %31, %33 + %35 = call float @llvm.sqrt.f32(float %34) #8 + %36 = fneg float %29 + %37 = fmul float %35, %36 + %38 = fadd float %24, %37 + %39 = fmul float %37, 0.000000e+00 + %40 = fadd float %32, %39 + %41 = fmul float %38, %38 + %42 = fadd float %41, 0.000000e+00 + %43 = fmul float %40, %40 + %44 = fadd float %42, %43 + %45 = call float @llvm.sqrt.f32(float %44) #8 + %46 = fadd float %45, 0x3EE4F8B580000000 + %47 = fdiv float %38, %46 + %48 = fdiv float %40, %46 + %49 = fmul float %47, 2.000000e+00 + %50 = fmul float %49, %47 + %51 = fsub float 1.000000e+00, %50 + %52 = fmul float %49, %48 + %53 = fsub float 0.000000e+00, %52 + %54 = fmul float %48, 2.000000e+00 + %55 = fmul float %54, %47 + %56 = fsub float 0.000000e+00, %55 + %57 = fmul float %54, %48 + %58 = fsub float 1.000000e+00, %57 + %59 = bitcast float %51 to i32 + %60 = bitcast [4 x float]* %4 to i32* + store i32 %59, i32* %60, align 16 + %61 = bitcast float %53 to i32 + %62 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 + %63 = bitcast float* %62 to i32* + store i32 %61, i32* %63, align 4 + %64 = bitcast float %56 to i32 + %65 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 + %66 = bitcast float* %65 to i32* + store i32 %64, i32* %66, align 8 + %67 = bitcast float %58 to i32 + %68 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 + %69 = bitcast float* %68 to i32* + store i32 %67, i32* %69, align 4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %3, i8* nonnull align 16 dereferenceable(16) %5, i64 16, i1 false) + store float 0.000000e+00, float* %2, align 4 + %70 = load float, float* %0, align 4 + %71 = fmul float %51, %70 + %72 = fadd float %71, 0.000000e+00 + store float %72, float* %2, align 4 + %73 = load float, float* %14, align 4 + %74 = fmul float %53, %73 + %75 = fadd float %72, %74 + store float %75, float* %2, align 4 + store float 0.000000e+00, float* %12, align 4 + %76 = load float, float* %9, align 4 + %77 = fmul float %51, %76 + %78 = fadd float %77, 0.000000e+00 + store float %78, float* %12, align 4 + %79 = load float, float* %19, align 4 + %80 = fmul float %53, %79 + %81 = fadd float %78, %80 + store float %81, float* %12, align 4 + store float 0.000000e+00, float* %17, align 4 + %82 = load float, float* %0, align 4 + %83 = fmul float %56, %82 + %84 = fadd float %83, 0.000000e+00 + store float %84, float* %17, align 4 + %85 = load float, float* %14, align 4 + %86 = fmul float %58, %85 + %87 = fadd float %84, %86 + store float %87, float* %17, align 4 + store float 0.000000e+00, float* %22, align 4 + %88 = load float, float* %9, align 4 + %89 = fmul float %56, %88 + %90 = fadd float %89, 0.000000e+00 + store float %90, float* %22, align 4 + %91 = load float, float* %19, align 4 + %92 = fmul float %58, %91 + %93 = fadd float %90, %92 + store float %93, float* %22, align 4 + %94 = getelementptr inbounds float, float* %1, i64 1 + %95 = bitcast float* %94 to i32* + %96 = load i32, i32* %95, align 4 + %97 = getelementptr inbounds float, float* %1, i64 2 + %98 = bitcast float* %97 to i32* + %99 = load i32, i32* %98, align 4 + store i32 %99, i32* %95, align 4 + store i32 %96, i32* %98, align 4 + ret void +} + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { +.preheader13: + %3 = bitcast float* %2 to i8* + %4 = bitcast float* %0 to i8* + %5 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false, i1 true, i1 false) + %6 = call i8* @__memcpy_chk(i8* %3, i8* %4, i64 16, i64 %5) #8 + %7 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 + %8 = bitcast i8* %7 to float* + store float 1.000000e+00, float* %8, align 4 + %9 = getelementptr inbounds i8, i8* %7, i64 8 + %10 = getelementptr inbounds i8, i8* %7, i64 12 + %11 = bitcast i8* %10 to float* + store float 1.000000e+00, float* %11, align 4 + %12 = bitcast float* %1 to i8* + %13 = call i64 @llvm.objectsize.i64.p0i8(i8* %12, i1 false, i1 true, i1 false) + %14 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %15 = bitcast i8* %14 to float* + %16 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %17 = bitcast i8* %16 to float* + %18 = bitcast float* %2 to i32* + %19 = load i32, i32* %18, align 4 + %20 = bitcast i8* %14 to i32* + store i32 %19, i32* %20, align 4 + %21 = bitcast i8* %7 to i32* + %22 = load i32, i32* %21, align 4 + %23 = bitcast i8* %16 to i32* + store i32 %22, i32* %23, align 4 + %24 = getelementptr inbounds float, float* %2, i64 2 + %25 = bitcast float* %24 to i32* + %26 = load i32, i32* %25, align 4 + %27 = getelementptr inbounds i8, i8* %14, i64 4 + %28 = bitcast i8* %27 to i32* + store i32 %26, i32* %28, align 4 + %29 = bitcast i8* %9 to i32* + %30 = load i32, i32* %29, align 4 + %31 = getelementptr inbounds i8, i8* %16, i64 4 + %32 = bitcast i8* %31 to i32* + store i32 %30, i32* %32, align 4 + %33 = load float, float* %15, align 4 + %34 = call float @no_opt_sgn(float %33) + %35 = fneg float %34 + %36 = call float @no_opt_naive_norm(float* nonnull %15, i32 2) + %37 = fmul float %36, %35 + %38 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %39 = bitcast i8* %38 to float* + %40 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %41 = load float, float* %15, align 4 + %42 = load float, float* %17, align 4 + %43 = fmul float %37, %42 + %44 = fadd float %41, %43 + store float %44, float* %39, align 4 + %45 = bitcast i8* %27 to float* + %46 = load float, float* %45, align 4 + %47 = bitcast i8* %31 to float* + %48 = load float, float* %47, align 4 + %49 = fmul float %37, %48 + %50 = fadd float %46, %49 + %51 = getelementptr inbounds i8, i8* %38, i64 4 + %52 = bitcast i8* %51 to float* + store float %50, float* %52, align 4 + %53 = bitcast i8* %40 to float* + %54 = call float @no_opt_naive_norm(float* nonnull %39, i32 2) + %55 = fadd float %54, 0x3EE4F8B580000000 + %56 = load float, float* %39, align 4 + %57 = fdiv float %56, %55 + store float %57, float* %53, align 4 + %58 = load float, float* %52, align 4 + %59 = fdiv float %58, %55 + %60 = getelementptr inbounds i8, i8* %40, i64 4 + %61 = bitcast i8* %60 to float* + store float %59, float* %61, align 4 + %62 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 + %63 = bitcast i8* %62 to float* + %64 = load float, float* %53, align 4 + %65 = fmul float %64, 2.000000e+00 + %66 = fmul float %65, %64 + %67 = fsub float 1.000000e+00, %66 + store float %67, float* %63, align 4 + %68 = load float, float* %53, align 4 + %69 = fmul float %68, 2.000000e+00 + %70 = load float, float* %61, align 4 + %71 = fmul float %69, %70 + %72 = fsub float 0.000000e+00, %71 + %73 = getelementptr inbounds i8, i8* %62, i64 4 + %74 = bitcast i8* %73 to float* + store float %72, float* %74, align 4 + %75 = load float, float* %61, align 4 + %76 = fmul float %75, 2.000000e+00 + %77 = load float, float* %53, align 4 + %78 = fmul float %76, %77 + %79 = fsub float 0.000000e+00, %78 + %80 = getelementptr inbounds i8, i8* %62, i64 8 + %81 = bitcast i8* %80 to float* + store float %79, float* %81, align 4 + %82 = load float, float* %61, align 4 + %83 = fmul float %82, 2.000000e+00 + %84 = fmul float %83, %82 + %85 = fsub float 1.000000e+00, %84 + %86 = getelementptr inbounds i8, i8* %62, i64 12 + %87 = bitcast i8* %86 to float* + store float %85, float* %87, align 4 + %88 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 + %89 = bitcast i8* %88 to float* + %90 = bitcast i8* %62 to i32* + %91 = load i32, i32* %90, align 4 + %92 = bitcast i8* %88 to i32* + store i32 %91, i32* %92, align 4 + %93 = bitcast i8* %73 to i32* + %94 = load i32, i32* %93, align 4 + %95 = getelementptr inbounds i8, i8* %88, i64 4 + %96 = bitcast i8* %95 to i32* + store i32 %94, i32* %96, align 4 + %97 = bitcast i8* %80 to i32* + %98 = load i32, i32* %97, align 4 + %99 = getelementptr inbounds i8, i8* %88, i64 8 + %100 = bitcast i8* %99 to i32* + store i32 %98, i32* %100, align 4 + %101 = bitcast i8* %86 to i32* + %102 = load i32, i32* %101, align 4 + %103 = getelementptr inbounds i8, i8* %88, i64 12 + %104 = bitcast i8* %103 to i32* + store i32 %102, i32* %104, align 4 + %105 = call i8* @__memcpy_chk(i8* %12, i8* %88, i64 16, i64 %13) #8 + call void @no_opt_naive_fixed_matrix_multiply(float* %89, float* %0, float* %2) + call void @free(i8* %14) + call void @free(i8* %16) + call void @free(i8* %38) + call void @free(i8* %40) + call void @free(i8* %62) + call void @free(i8* %88) + call void @no_opt_naive_fixed_transpose(float* %1) + ret void +} + +; Function Attrs: nounwind +declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 + +; Function Attrs: nounwind readnone speculatable willreturn +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 + +; Function Attrs: allocsize(0,1) +declare i8* @calloc(i64, i64) #5 + +declare void @free(i8*) #6 + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #1 { +.preheader6: + %0 = alloca i64, align 8 + %1 = alloca [4 x float], align 16 + %2 = alloca [4 x float], align 16 + %3 = alloca [4 x float], align 16 + %4 = alloca [4 x float], align 16 + %5 = alloca [4 x float], align 16 + %6 = call i64 @time(i64* null) #8 + store i64 %6, i64* %0, align 8 + %7 = call i64 @time(i64* nonnull %0) #8 + %8 = trunc i64 %7 to i32 + call void @srand(i32 %8) #8 + %9 = call i32 @rand() #8 + %10 = sitofp i32 %9 to float + %11 = fdiv float %10, 0x41747AE140000000 + %12 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 + store float %11, float* %12, align 16 + %13 = fpext float %11 to double + %14 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %13) #8 + %15 = call i32 @rand() #8 + %16 = sitofp i32 %15 to float + %17 = fdiv float %16, 0x41747AE140000000 + %18 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1 + store float %17, float* %18, align 4 + %19 = fpext float %17 to double + %20 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %19) #8 + %21 = call i32 @rand() #8 + %22 = sitofp i32 %21 to float + %23 = fdiv float %22, 0x41747AE140000000 + %24 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2 + store float %23, float* %24, align 8 + %25 = fpext float %23 to double + %26 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %25) #8 + %27 = call i32 @rand() #8 + %28 = sitofp i32 %27 to float + %29 = fdiv float %28, 0x41747AE140000000 + %30 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3 + store float %29, float* %30, align 4 + %31 = fpext float %29 to double + %32 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %31) #8 + %33 = bitcast [4 x float]* %2 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %33, i8 0, i64 16, i1 false) + %34 = bitcast [4 x float]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %34, i8 0, i64 16, i1 false) + %35 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 + %36 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 + call void @naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %35, float* nonnull %36) + %37 = bitcast [4 x float]* %4 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %37, i8 0, i64 16, i1 false) + %38 = bitcast [4 x float]* %5 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %38, i8 0, i64 16, i1 false) + %39 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 + %40 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 + call void @no_opt_naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %39, float* nonnull %40) + %41 = load float, float* %35, align 16 + %42 = fpext float %41 to double + %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %42) #8 + %44 = load float, float* %39, align 16 + %45 = fpext float %44 to double + %46 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %45) #8 + %47 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1 + %48 = load float, float* %47, align 4 + %49 = fpext float %48 to double + %50 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %49) #8 + %51 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 + %52 = load float, float* %51, align 4 + %53 = fpext float %52 to double + %54 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %53) #8 + %55 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2 + %56 = load float, float* %55, align 8 + %57 = fpext float %56 to double + %58 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %57) #8 + %59 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 + %60 = load float, float* %59, align 8 + %61 = fpext float %60 to double + %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %61) #8 + %63 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3 + %64 = load float, float* %63, align 4 + %65 = fpext float %64 to double + %66 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %65) #8 + %67 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 + %68 = load float, float* %67, align 4 + %69 = fpext float %68 to double + %70 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %69) #8 + %71 = load float, float* %36, align 16 + %72 = fpext float %71 to double + %73 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %72) #8 + %74 = load float, float* %40, align 16 + %75 = fpext float %74 to double + %76 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %75) #8 + %77 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1 + %78 = load float, float* %77, align 4 + %79 = fpext float %78 to double + %80 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %79) #8 + %81 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 1 + %82 = load float, float* %81, align 4 + %83 = fpext float %82 to double + %84 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %83) #8 + %85 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2 + %86 = load float, float* %85, align 8 + %87 = fpext float %86 to double + %88 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %87) #8 + %89 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 2 + %90 = load float, float* %89, align 8 + %91 = fpext float %90 to double + %92 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %91) #8 + %93 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3 + %94 = load float, float* %93, align 4 + %95 = fpext float %94 to double + %96 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %95) #8 + %97 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 3 + %98 = load float, float* %97, align 4 + %99 = fpext float %98 to double + %100 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %99) #8 + ret i32 0 +} + +declare i64 @time(i64*) #6 + +declare void @srand(i32) #6 + +declare i32 @rand() #6 + +declare i32 @printf(i8*, ...) #6 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #7 + +attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone speculatable willreturn } +attributes #3 = { argmemonly nounwind willreturn writeonly } +attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #7 = { argmemonly nounwind willreturn } +attributes #8 = { nounwind } +attributes #9 = { nounwind allocsize(0,1) } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} +!3 = distinct !{!3, !4} +!4 = !{!"llvm.loop.unroll.disable"} +!5 = distinct !{!5, !4} diff --git a/src/dios-egraphs/Diospyros/failed-test/clang.ll b/src/dios-egraphs/Diospyros/failed-test/clang.ll new file mode 100644 index 00000000..35018816 --- /dev/null +++ b/src/dios-egraphs/Diospyros/failed-test/clang.ll @@ -0,0 +1,2293 @@ +; ModuleID = 'fail-tests/qr-decomp-local-arrays.c' +source_filename = "fail-tests/qr-decomp-local-arrays.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1 +@.str.1 = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 +@.str.2 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 +@.str.3 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 +@.str.4 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @sgn(float %0) #0 { + %2 = alloca float, align 4 + store float %0, float* %2, align 4 + %3 = load float, float* %2, align 4 + %4 = fcmp ogt float %3, 0.000000e+00 + %5 = zext i1 %4 to i32 + %6 = load float, float* %2, align 4 + %7 = fcmp olt float %6, 0.000000e+00 + %8 = zext i1 %7 to i32 + %9 = sub nsw i32 %5, %8 + %10 = sitofp i32 %9 to float + ret float %10 +} + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_sgn(float %0) #1 { + %2 = alloca float, align 4 + store float %0, float* %2, align 4 + %3 = load float, float* %2, align 4 + %4 = fcmp ogt float %3, 0.000000e+00 + %5 = zext i1 %4 to i32 + %6 = load float, float* %2, align 4 + %7 = fcmp olt float %6, 0.000000e+00 + %8 = zext i1 %7 to i32 + %9 = sub nsw i32 %5, %8 + %10 = sitofp i32 %9 to float + ret float %10 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @naive_norm(float* %0, i32 %1) #0 { + %3 = alloca float*, align 8 + %4 = alloca i32, align 4 + %5 = alloca float, align 4 + %6 = alloca i32, align 4 + store float* %0, float** %3, align 8 + store i32 %1, i32* %4, align 4 + store float 0.000000e+00, float* %5, align 4 + store i32 0, i32* %6, align 4 + br label %7 + +7: ; preds = %25, %2 + %8 = load i32, i32* %6, align 4 + %9 = load i32, i32* %4, align 4 + %10 = icmp slt i32 %8, %9 + br i1 %10, label %11, label %28 + +11: ; preds = %7 + %12 = load float*, float** %3, align 8 + %13 = load i32, i32* %6, align 4 + %14 = sext i32 %13 to i64 + %15 = getelementptr inbounds float, float* %12, i64 %14 + %16 = load float, float* %15, align 4 + %17 = load float*, float** %3, align 8 + %18 = load i32, i32* %6, align 4 + %19 = sext i32 %18 to i64 + %20 = getelementptr inbounds float, float* %17, i64 %19 + %21 = load float, float* %20, align 4 + %22 = fmul float %16, %21 + %23 = load float, float* %5, align 4 + %24 = fadd float %23, %22 + store float %24, float* %5, align 4 + br label %25 + +25: ; preds = %11 + %26 = load i32, i32* %6, align 4 + %27 = add nsw i32 %26, 1 + store i32 %27, i32* %6, align 4 + br label %7 + +28: ; preds = %7 + %29 = load float, float* %5, align 4 + %30 = call float @llvm.sqrt.f32(float %29) + ret float %30 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32(float) #2 + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_naive_norm(float* %0, i32 %1) #1 { + %3 = alloca float*, align 8 + %4 = alloca i32, align 4 + %5 = alloca float, align 4 + %6 = alloca i32, align 4 + store float* %0, float** %3, align 8 + store i32 %1, i32* %4, align 4 + store float 0.000000e+00, float* %5, align 4 + store i32 0, i32* %6, align 4 + br label %7 + +7: ; preds = %25, %2 + %8 = load i32, i32* %6, align 4 + %9 = load i32, i32* %4, align 4 + %10 = icmp slt i32 %8, %9 + br i1 %10, label %11, label %28 + +11: ; preds = %7 + %12 = load float*, float** %3, align 8 + %13 = load i32, i32* %6, align 4 + %14 = sext i32 %13 to i64 + %15 = getelementptr inbounds float, float* %12, i64 %14 + %16 = load float, float* %15, align 4 + %17 = load float*, float** %3, align 8 + %18 = load i32, i32* %6, align 4 + %19 = sext i32 %18 to i64 + %20 = getelementptr inbounds float, float* %17, i64 %19 + %21 = load float, float* %20, align 4 + %22 = fmul float %16, %21 + %23 = load float, float* %5, align 4 + %24 = fadd float %23, %22 + store float %24, float* %5, align 4 + br label %25 + +25: ; preds = %11 + %26 = load i32, i32* %6, align 4 + %27 = add nsw i32 %26, 1 + store i32 %27, i32* %6, align 4 + br label %7 + +28: ; preds = %7 + %29 = load float, float* %5, align 4 + %30 = call float @llvm.sqrt.f32(float %29) + ret float %30 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_transpose(float* %0) #0 { + %2 = alloca float*, align 8 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca float, align 4 + store float* %0, float** %2, align 8 + store i32 0, i32* %3, align 4 + br label %6 + +6: ; preds = %51, %1 + %7 = load i32, i32* %3, align 4 + %8 = icmp slt i32 %7, 2 + br i1 %8, label %9, label %54 + +9: ; preds = %6 + %10 = load i32, i32* %3, align 4 + %11 = add nsw i32 %10, 1 + store i32 %11, i32* %4, align 4 + br label %12 + +12: ; preds = %47, %9 + %13 = load i32, i32* %4, align 4 + %14 = icmp slt i32 %13, 2 + br i1 %14, label %15, label %50 + +15: ; preds = %12 + %16 = load float*, float** %2, align 8 + %17 = load i32, i32* %3, align 4 + %18 = mul nsw i32 %17, 2 + %19 = load i32, i32* %4, align 4 + %20 = add nsw i32 %18, %19 + %21 = sext i32 %20 to i64 + %22 = getelementptr inbounds float, float* %16, i64 %21 + %23 = load float, float* %22, align 4 + store float %23, float* %5, align 4 + %24 = load float*, float** %2, align 8 + %25 = load i32, i32* %4, align 4 + %26 = mul nsw i32 %25, 2 + %27 = load i32, i32* %3, align 4 + %28 = add nsw i32 %26, %27 + %29 = sext i32 %28 to i64 + %30 = getelementptr inbounds float, float* %24, i64 %29 + %31 = load float, float* %30, align 4 + %32 = load float*, float** %2, align 8 + %33 = load i32, i32* %3, align 4 + %34 = mul nsw i32 %33, 2 + %35 = load i32, i32* %4, align 4 + %36 = add nsw i32 %34, %35 + %37 = sext i32 %36 to i64 + %38 = getelementptr inbounds float, float* %32, i64 %37 + store float %31, float* %38, align 4 + %39 = load float, float* %5, align 4 + %40 = load float*, float** %2, align 8 + %41 = load i32, i32* %4, align 4 + %42 = mul nsw i32 %41, 2 + %43 = load i32, i32* %3, align 4 + %44 = add nsw i32 %42, %43 + %45 = sext i32 %44 to i64 + %46 = getelementptr inbounds float, float* %40, i64 %45 + store float %39, float* %46, align 4 + br label %47 + +47: ; preds = %15 + %48 = load i32, i32* %4, align 4 + %49 = add nsw i32 %48, 1 + store i32 %49, i32* %4, align 4 + br label %12 + +50: ; preds = %12 + br label %51 + +51: ; preds = %50 + %52 = load i32, i32* %3, align 4 + %53 = add nsw i32 %52, 1 + store i32 %53, i32* %3, align 4 + br label %6 + +54: ; preds = %6 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_transpose(float* %0) #1 { + %2 = alloca float*, align 8 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca float, align 4 + store float* %0, float** %2, align 8 + store i32 0, i32* %3, align 4 + br label %6 + +6: ; preds = %51, %1 + %7 = load i32, i32* %3, align 4 + %8 = icmp slt i32 %7, 2 + br i1 %8, label %9, label %54 + +9: ; preds = %6 + %10 = load i32, i32* %3, align 4 + %11 = add nsw i32 %10, 1 + store i32 %11, i32* %4, align 4 + br label %12 + +12: ; preds = %47, %9 + %13 = load i32, i32* %4, align 4 + %14 = icmp slt i32 %13, 2 + br i1 %14, label %15, label %50 + +15: ; preds = %12 + %16 = load float*, float** %2, align 8 + %17 = load i32, i32* %3, align 4 + %18 = mul nsw i32 %17, 2 + %19 = load i32, i32* %4, align 4 + %20 = add nsw i32 %18, %19 + %21 = sext i32 %20 to i64 + %22 = getelementptr inbounds float, float* %16, i64 %21 + %23 = load float, float* %22, align 4 + store float %23, float* %5, align 4 + %24 = load float*, float** %2, align 8 + %25 = load i32, i32* %4, align 4 + %26 = mul nsw i32 %25, 2 + %27 = load i32, i32* %3, align 4 + %28 = add nsw i32 %26, %27 + %29 = sext i32 %28 to i64 + %30 = getelementptr inbounds float, float* %24, i64 %29 + %31 = load float, float* %30, align 4 + %32 = load float*, float** %2, align 8 + %33 = load i32, i32* %3, align 4 + %34 = mul nsw i32 %33, 2 + %35 = load i32, i32* %4, align 4 + %36 = add nsw i32 %34, %35 + %37 = sext i32 %36 to i64 + %38 = getelementptr inbounds float, float* %32, i64 %37 + store float %31, float* %38, align 4 + %39 = load float, float* %5, align 4 + %40 = load float*, float** %2, align 8 + %41 = load i32, i32* %4, align 4 + %42 = mul nsw i32 %41, 2 + %43 = load i32, i32* %3, align 4 + %44 = add nsw i32 %42, %43 + %45 = sext i32 %44 to i64 + %46 = getelementptr inbounds float, float* %40, i64 %45 + store float %39, float* %46, align 4 + br label %47 + +47: ; preds = %15 + %48 = load i32, i32* %4, align 4 + %49 = add nsw i32 %48, 1 + store i32 %49, i32* %4, align 4 + br label %12 + +50: ; preds = %12 + br label %51 + +51: ; preds = %50 + %52 = load i32, i32* %3, align 4 + %53 = add nsw i32 %52, 1 + store i32 %53, i32* %3, align 4 + br label %6 + +54: ; preds = %6 + ret void +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { + %4 = alloca float*, align 8 + %5 = alloca float*, align 8 + %6 = alloca float*, align 8 + %7 = alloca i32, align 4 + %8 = alloca i32, align 4 + %9 = alloca i32, align 4 + store float* %0, float** %4, align 8 + store float* %1, float** %5, align 8 + store float* %2, float** %6, align 8 + store i32 0, i32* %7, align 4 + br label %10 + +10: ; preds = %63, %3 + %11 = load i32, i32* %7, align 4 + %12 = icmp slt i32 %11, 2 + br i1 %12, label %13, label %66 + +13: ; preds = %10 + store i32 0, i32* %8, align 4 + br label %14 + +14: ; preds = %59, %13 + %15 = load i32, i32* %8, align 4 + %16 = icmp slt i32 %15, 2 + br i1 %16, label %17, label %62 + +17: ; preds = %14 + %18 = load float*, float** %6, align 8 + %19 = load i32, i32* %7, align 4 + %20 = mul nsw i32 2, %19 + %21 = load i32, i32* %8, align 4 + %22 = add nsw i32 %20, %21 + %23 = sext i32 %22 to i64 + %24 = getelementptr inbounds float, float* %18, i64 %23 + store float 0.000000e+00, float* %24, align 4 + store i32 0, i32* %9, align 4 + br label %25 + +25: ; preds = %55, %17 + %26 = load i32, i32* %9, align 4 + %27 = icmp slt i32 %26, 2 + br i1 %27, label %28, label %58 + +28: ; preds = %25 + %29 = load float*, float** %4, align 8 + %30 = load i32, i32* %7, align 4 + %31 = mul nsw i32 2, %30 + %32 = load i32, i32* %9, align 4 + %33 = add nsw i32 %31, %32 + %34 = sext i32 %33 to i64 + %35 = getelementptr inbounds float, float* %29, i64 %34 + %36 = load float, float* %35, align 4 + %37 = load float*, float** %5, align 8 + %38 = load i32, i32* %9, align 4 + %39 = mul nsw i32 2, %38 + %40 = load i32, i32* %8, align 4 + %41 = add nsw i32 %39, %40 + %42 = sext i32 %41 to i64 + %43 = getelementptr inbounds float, float* %37, i64 %42 + %44 = load float, float* %43, align 4 + %45 = fmul float %36, %44 + %46 = load float*, float** %6, align 8 + %47 = load i32, i32* %7, align 4 + %48 = mul nsw i32 2, %47 + %49 = load i32, i32* %8, align 4 + %50 = add nsw i32 %48, %49 + %51 = sext i32 %50 to i64 + %52 = getelementptr inbounds float, float* %46, i64 %51 + %53 = load float, float* %52, align 4 + %54 = fadd float %53, %45 + store float %54, float* %52, align 4 + br label %55 + +55: ; preds = %28 + %56 = load i32, i32* %9, align 4 + %57 = add nsw i32 %56, 1 + store i32 %57, i32* %9, align 4 + br label %25 + +58: ; preds = %25 + br label %59 + +59: ; preds = %58 + %60 = load i32, i32* %8, align 4 + %61 = add nsw i32 %60, 1 + store i32 %61, i32* %8, align 4 + br label %14 + +62: ; preds = %14 + br label %63 + +63: ; preds = %62 + %64 = load i32, i32* %7, align 4 + %65 = add nsw i32 %64, 1 + store i32 %65, i32* %7, align 4 + br label %10 + +66: ; preds = %10 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { + %4 = alloca float*, align 8 + %5 = alloca float*, align 8 + %6 = alloca float*, align 8 + %7 = alloca i32, align 4 + %8 = alloca i32, align 4 + %9 = alloca i32, align 4 + store float* %0, float** %4, align 8 + store float* %1, float** %5, align 8 + store float* %2, float** %6, align 8 + store i32 0, i32* %7, align 4 + br label %10 + +10: ; preds = %63, %3 + %11 = load i32, i32* %7, align 4 + %12 = icmp slt i32 %11, 2 + br i1 %12, label %13, label %66 + +13: ; preds = %10 + store i32 0, i32* %8, align 4 + br label %14 + +14: ; preds = %59, %13 + %15 = load i32, i32* %8, align 4 + %16 = icmp slt i32 %15, 2 + br i1 %16, label %17, label %62 + +17: ; preds = %14 + %18 = load float*, float** %6, align 8 + %19 = load i32, i32* %7, align 4 + %20 = mul nsw i32 2, %19 + %21 = load i32, i32* %8, align 4 + %22 = add nsw i32 %20, %21 + %23 = sext i32 %22 to i64 + %24 = getelementptr inbounds float, float* %18, i64 %23 + store float 0.000000e+00, float* %24, align 4 + store i32 0, i32* %9, align 4 + br label %25 + +25: ; preds = %55, %17 + %26 = load i32, i32* %9, align 4 + %27 = icmp slt i32 %26, 2 + br i1 %27, label %28, label %58 + +28: ; preds = %25 + %29 = load float*, float** %4, align 8 + %30 = load i32, i32* %7, align 4 + %31 = mul nsw i32 2, %30 + %32 = load i32, i32* %9, align 4 + %33 = add nsw i32 %31, %32 + %34 = sext i32 %33 to i64 + %35 = getelementptr inbounds float, float* %29, i64 %34 + %36 = load float, float* %35, align 4 + %37 = load float*, float** %5, align 8 + %38 = load i32, i32* %9, align 4 + %39 = mul nsw i32 2, %38 + %40 = load i32, i32* %8, align 4 + %41 = add nsw i32 %39, %40 + %42 = sext i32 %41 to i64 + %43 = getelementptr inbounds float, float* %37, i64 %42 + %44 = load float, float* %43, align 4 + %45 = fmul float %36, %44 + %46 = load float*, float** %6, align 8 + %47 = load i32, i32* %7, align 4 + %48 = mul nsw i32 2, %47 + %49 = load i32, i32* %8, align 4 + %50 = add nsw i32 %48, %49 + %51 = sext i32 %50 to i64 + %52 = getelementptr inbounds float, float* %46, i64 %51 + %53 = load float, float* %52, align 4 + %54 = fadd float %53, %45 + store float %54, float* %52, align 4 + br label %55 + +55: ; preds = %28 + %56 = load i32, i32* %9, align 4 + %57 = add nsw i32 %56, 1 + store i32 %57, i32* %9, align 4 + br label %25 + +58: ; preds = %25 + br label %59 + +59: ; preds = %58 + %60 = load i32, i32* %8, align 4 + %61 = add nsw i32 %60, 1 + store i32 %61, i32* %8, align 4 + br label %14 + +62: ; preds = %14 + br label %63 + +63: ; preds = %62 + %64 = load i32, i32* %7, align 4 + %65 = add nsw i32 %64, 1 + store i32 %65, i32* %7, align 4 + br label %10 + +66: ; preds = %10 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { + %4 = alloca float*, align 8 + %5 = alloca i32, align 4 + %6 = alloca float, align 4 + %7 = alloca i32, align 4 + %8 = alloca float*, align 8 + %9 = alloca i32, align 4 + %10 = alloca float, align 4 + %11 = alloca i32, align 4 + %12 = alloca float*, align 8 + %13 = alloca float*, align 8 + %14 = alloca float*, align 8 + %15 = alloca i32, align 4 + %16 = alloca i32, align 4 + %17 = alloca i32, align 4 + %18 = alloca float*, align 8 + %19 = alloca float*, align 8 + %20 = alloca float*, align 8 + %21 = alloca i32, align 4 + %22 = alloca i32, align 4 + %23 = alloca i32, align 4 + %24 = alloca float*, align 8 + %25 = alloca float*, align 8 + %26 = alloca float*, align 8 + %27 = alloca i32, align 4 + %28 = alloca i32, align 4 + %29 = alloca i32, align 4 + %30 = alloca float*, align 8 + %31 = alloca i32, align 4 + %32 = alloca i32, align 4 + %33 = alloca float, align 4 + %34 = alloca float, align 4 + %35 = alloca float*, align 8 + %36 = alloca float*, align 8 + %37 = alloca float*, align 8 + %38 = alloca i32, align 4 + %39 = alloca [4 x float], align 16 + %40 = alloca i32, align 4 + %41 = alloca i32, align 4 + %42 = alloca i32, align 4 + %43 = alloca i32, align 4 + %44 = alloca [2 x float], align 4 + %45 = alloca [2 x float], align 4 + %46 = alloca i32, align 4 + %47 = alloca i32, align 4 + %48 = alloca i32, align 4 + %49 = alloca float, align 4 + %50 = alloca [2 x float], align 4 + %51 = alloca [2 x float], align 4 + %52 = alloca i32, align 4 + %53 = alloca i32, align 4 + %54 = alloca float, align 4 + %55 = alloca i32, align 4 + %56 = alloca [4 x float], align 16 + %57 = alloca i32, align 4 + %58 = alloca i32, align 4 + %59 = alloca i32, align 4 + %60 = alloca float, align 4 + %61 = alloca [4 x float], align 16 + %62 = alloca i32, align 4 + %63 = alloca i32, align 4 + %64 = alloca i32, align 4 + %65 = alloca float, align 4 + %66 = alloca i32, align 4 + %67 = alloca [4 x float], align 16 + %68 = alloca i32, align 4 + %69 = alloca i32, align 4 + %70 = alloca i32, align 4 + store float* %0, float** %35, align 8 + store float* %1, float** %36, align 8 + store float* %2, float** %37, align 8 + store i32 0, i32* %38, align 4 + br label %71 + +71: ; preds = %84, %3 + %72 = load i32, i32* %38, align 4 + %73 = icmp slt i32 %72, 4 + br i1 %73, label %74, label %87 + +74: ; preds = %71 + %75 = load float*, float** %35, align 8 + %76 = load i32, i32* %38, align 4 + %77 = sext i32 %76 to i64 + %78 = getelementptr inbounds float, float* %75, i64 %77 + %79 = load float, float* %78, align 4 + %80 = load float*, float** %37, align 8 + %81 = load i32, i32* %38, align 4 + %82 = sext i32 %81 to i64 + %83 = getelementptr inbounds float, float* %80, i64 %82 + store float %79, float* %83, align 4 + br label %84 + +84: ; preds = %74 + %85 = load i32, i32* %38, align 4 + %86 = add nsw i32 %85, 1 + store i32 %86, i32* %38, align 4 + br label %71 + +87: ; preds = %71 + %88 = bitcast [4 x float]* %39 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %88, i8 0, i64 16, i1 false) + store i32 0, i32* %40, align 4 + br label %89 + +89: ; preds = %112, %87 + %90 = load i32, i32* %40, align 4 + %91 = icmp slt i32 %90, 2 + br i1 %91, label %92, label %115 + +92: ; preds = %89 + store i32 0, i32* %41, align 4 + br label %93 + +93: ; preds = %108, %92 + %94 = load i32, i32* %41, align 4 + %95 = icmp slt i32 %94, 2 + br i1 %95, label %96, label %111 + +96: ; preds = %93 + %97 = load i32, i32* %40, align 4 + %98 = load i32, i32* %41, align 4 + %99 = icmp eq i32 %97, %98 + %100 = zext i1 %99 to i32 + %101 = sitofp i32 %100 to float + %102 = load i32, i32* %40, align 4 + %103 = mul nsw i32 %102, 2 + %104 = load i32, i32* %41, align 4 + %105 = add nsw i32 %103, %104 + %106 = sext i32 %105 to i64 + %107 = getelementptr inbounds [4 x float], [4 x float]* %39, i64 0, i64 %106 + store float %101, float* %107, align 4 + br label %108 + +108: ; preds = %96 + %109 = load i32, i32* %41, align 4 + %110 = add nsw i32 %109, 1 + store i32 %110, i32* %41, align 4 + br label %93 + +111: ; preds = %93 + br label %112 + +112: ; preds = %111 + %113 = load i32, i32* %40, align 4 + %114 = add nsw i32 %113, 1 + store i32 %114, i32* %40, align 4 + br label %89 + +115: ; preds = %89 + store i32 0, i32* %42, align 4 + br label %116 + +116: ; preds = %643, %115 + %117 = load i32, i32* %42, align 4 + %118 = icmp slt i32 %117, 1 + br i1 %118, label %119, label %646 + +119: ; preds = %116 + %120 = load i32, i32* %42, align 4 + %121 = sub nsw i32 2, %120 + store i32 %121, i32* %43, align 4 + %122 = bitcast [2 x float]* %44 to i8* + call void @llvm.memset.p0i8.i64(i8* align 4 %122, i8 0, i64 8, i1 false) + %123 = bitcast [2 x float]* %45 to i8* + call void @llvm.memset.p0i8.i64(i8* align 4 %123, i8 0, i64 8, i1 false) + store i32 0, i32* %46, align 4 + br label %124 + +124: ; preds = %134, %119 + %125 = load i32, i32* %46, align 4 + %126 = icmp slt i32 %125, 2 + br i1 %126, label %127, label %137 + +127: ; preds = %124 + %128 = load i32, i32* %46, align 4 + %129 = sext i32 %128 to i64 + %130 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 %129 + store float 0.000000e+00, float* %130, align 4 + %131 = load i32, i32* %46, align 4 + %132 = sext i32 %131 to i64 + %133 = getelementptr inbounds [2 x float], [2 x float]* %45, i64 0, i64 %132 + store float 0.000000e+00, float* %133, align 4 + br label %134 + +134: ; preds = %127 + %135 = load i32, i32* %46, align 4 + %136 = add nsw i32 %135, 1 + store i32 %136, i32* %46, align 4 + br label %124 + +137: ; preds = %124 + store i32 0, i32* %47, align 4 + br label %138 + +138: ; preds = %167, %137 + %139 = load i32, i32* %47, align 4 + %140 = load i32, i32* %43, align 4 + %141 = icmp slt i32 %139, %140 + br i1 %141, label %142, label %170 + +142: ; preds = %138 + %143 = load i32, i32* %42, align 4 + %144 = load i32, i32* %47, align 4 + %145 = add nsw i32 %143, %144 + store i32 %145, i32* %48, align 4 + %146 = load float*, float** %37, align 8 + %147 = load i32, i32* %48, align 4 + %148 = mul nsw i32 %147, 2 + %149 = load i32, i32* %42, align 4 + %150 = add nsw i32 %148, %149 + %151 = sext i32 %150 to i64 + %152 = getelementptr inbounds float, float* %146, i64 %151 + %153 = load float, float* %152, align 4 + %154 = load i32, i32* %47, align 4 + %155 = sext i32 %154 to i64 + %156 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 %155 + store float %153, float* %156, align 4 + %157 = load i32, i32* %48, align 4 + %158 = mul nsw i32 %157, 2 + %159 = load i32, i32* %42, align 4 + %160 = add nsw i32 %158, %159 + %161 = sext i32 %160 to i64 + %162 = getelementptr inbounds [4 x float], [4 x float]* %39, i64 0, i64 %161 + %163 = load float, float* %162, align 4 + %164 = load i32, i32* %47, align 4 + %165 = sext i32 %164 to i64 + %166 = getelementptr inbounds [2 x float], [2 x float]* %45, i64 0, i64 %165 + store float %163, float* %166, align 4 + br label %167 + +167: ; preds = %142 + %168 = load i32, i32* %47, align 4 + %169 = add nsw i32 %168, 1 + store i32 %169, i32* %47, align 4 + br label %138 + +170: ; preds = %138 + %171 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 0 + %172 = load float, float* %171, align 4 + store float %172, float* %34, align 4 + %173 = load float, float* %34, align 4 + %174 = fcmp ogt float %173, 0.000000e+00 + %175 = zext i1 %174 to i32 + %176 = load float, float* %34, align 4 + %177 = fcmp olt float %176, 0.000000e+00 + %178 = zext i1 %177 to i32 + %179 = sub nsw i32 %175, %178 + %180 = sitofp i32 %179 to float + %181 = fneg float %180 + %182 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 0 + %183 = load i32, i32* %43, align 4 + store float* %182, float** %4, align 8 + store i32 %183, i32* %5, align 4 + store float 0.000000e+00, float* %6, align 4 + store i32 0, i32* %7, align 4 + br label %184 + +184: ; preds = %188, %170 + %185 = load i32, i32* %7, align 4 + %186 = load i32, i32* %5, align 4 + %187 = icmp slt i32 %185, %186 + br i1 %187, label %188, label %204 + +188: ; preds = %184 + %189 = load float*, float** %4, align 8 + %190 = load i32, i32* %7, align 4 + %191 = sext i32 %190 to i64 + %192 = getelementptr inbounds float, float* %189, i64 %191 + %193 = load float, float* %192, align 4 + %194 = load float*, float** %4, align 8 + %195 = load i32, i32* %7, align 4 + %196 = sext i32 %195 to i64 + %197 = getelementptr inbounds float, float* %194, i64 %196 + %198 = load float, float* %197, align 4 + %199 = fmul float %193, %198 + %200 = load float, float* %6, align 4 + %201 = fadd float %200, %199 + store float %201, float* %6, align 4 + %202 = load i32, i32* %7, align 4 + %203 = add nsw i32 %202, 1 + store i32 %203, i32* %7, align 4 + br label %184 + +204: ; preds = %184 + %205 = load float, float* %6, align 4 + %206 = call float @llvm.sqrt.f32(float %205) #7 + %207 = fmul float %181, %206 + store float %207, float* %49, align 4 + %208 = bitcast [2 x float]* %50 to i8* + call void @llvm.memset.p0i8.i64(i8* align 4 %208, i8 0, i64 8, i1 false) + %209 = bitcast [2 x float]* %51 to i8* + call void @llvm.memset.p0i8.i64(i8* align 4 %209, i8 0, i64 8, i1 false) + store i32 0, i32* %52, align 4 + br label %210 + +210: ; preds = %220, %204 + %211 = load i32, i32* %52, align 4 + %212 = icmp slt i32 %211, 2 + br i1 %212, label %213, label %223 + +213: ; preds = %210 + %214 = load i32, i32* %52, align 4 + %215 = sext i32 %214 to i64 + %216 = getelementptr inbounds [2 x float], [2 x float]* %50, i64 0, i64 %215 + store float 0.000000e+00, float* %216, align 4 + %217 = load i32, i32* %52, align 4 + %218 = sext i32 %217 to i64 + %219 = getelementptr inbounds [2 x float], [2 x float]* %51, i64 0, i64 %218 + store float 0.000000e+00, float* %219, align 4 + br label %220 + +220: ; preds = %213 + %221 = load i32, i32* %52, align 4 + %222 = add nsw i32 %221, 1 + store i32 %222, i32* %52, align 4 + br label %210 + +223: ; preds = %210 + store i32 0, i32* %53, align 4 + br label %224 + +224: ; preds = %243, %223 + %225 = load i32, i32* %53, align 4 + %226 = load i32, i32* %43, align 4 + %227 = icmp slt i32 %225, %226 + br i1 %227, label %228, label %246 + +228: ; preds = %224 + %229 = load i32, i32* %53, align 4 + %230 = sext i32 %229 to i64 + %231 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 %230 + %232 = load float, float* %231, align 4 + %233 = load float, float* %49, align 4 + %234 = load i32, i32* %53, align 4 + %235 = sext i32 %234 to i64 + %236 = getelementptr inbounds [2 x float], [2 x float]* %45, i64 0, i64 %235 + %237 = load float, float* %236, align 4 + %238 = fmul float %233, %237 + %239 = fadd float %232, %238 + %240 = load i32, i32* %53, align 4 + %241 = sext i32 %240 to i64 + %242 = getelementptr inbounds [2 x float], [2 x float]* %50, i64 0, i64 %241 + store float %239, float* %242, align 4 + br label %243 + +243: ; preds = %228 + %244 = load i32, i32* %53, align 4 + %245 = add nsw i32 %244, 1 + store i32 %245, i32* %53, align 4 + br label %224 + +246: ; preds = %224 + %247 = getelementptr inbounds [2 x float], [2 x float]* %50, i64 0, i64 0 + %248 = load i32, i32* %43, align 4 + store float* %247, float** %8, align 8 + store i32 %248, i32* %9, align 4 + store float 0.000000e+00, float* %10, align 4 + store i32 0, i32* %11, align 4 + br label %249 + +249: ; preds = %253, %246 + %250 = load i32, i32* %11, align 4 + %251 = load i32, i32* %9, align 4 + %252 = icmp slt i32 %250, %251 + br i1 %252, label %253, label %269 + +253: ; preds = %249 + %254 = load float*, float** %8, align 8 + %255 = load i32, i32* %11, align 4 + %256 = sext i32 %255 to i64 + %257 = getelementptr inbounds float, float* %254, i64 %256 + %258 = load float, float* %257, align 4 + %259 = load float*, float** %8, align 8 + %260 = load i32, i32* %11, align 4 + %261 = sext i32 %260 to i64 + %262 = getelementptr inbounds float, float* %259, i64 %261 + %263 = load float, float* %262, align 4 + %264 = fmul float %258, %263 + %265 = load float, float* %10, align 4 + %266 = fadd float %265, %264 + store float %266, float* %10, align 4 + %267 = load i32, i32* %11, align 4 + %268 = add nsw i32 %267, 1 + store i32 %268, i32* %11, align 4 + br label %249 + +269: ; preds = %249 + %270 = load float, float* %10, align 4 + %271 = call float @llvm.sqrt.f32(float %270) #7 + store float %271, float* %54, align 4 + store i32 0, i32* %55, align 4 + br label %272 + +272: ; preds = %287, %269 + %273 = load i32, i32* %55, align 4 + %274 = load i32, i32* %43, align 4 + %275 = icmp slt i32 %273, %274 + br i1 %275, label %276, label %290 + +276: ; preds = %272 + %277 = load i32, i32* %55, align 4 + %278 = sext i32 %277 to i64 + %279 = getelementptr inbounds [2 x float], [2 x float]* %50, i64 0, i64 %278 + %280 = load float, float* %279, align 4 + %281 = load float, float* %54, align 4 + %282 = fadd float %281, 0x3EE4F8B580000000 + %283 = fdiv float %280, %282 + %284 = load i32, i32* %55, align 4 + %285 = sext i32 %284 to i64 + %286 = getelementptr inbounds [2 x float], [2 x float]* %51, i64 0, i64 %285 + store float %283, float* %286, align 4 + br label %287 + +287: ; preds = %276 + %288 = load i32, i32* %55, align 4 + %289 = add nsw i32 %288, 1 + store i32 %289, i32* %55, align 4 + br label %272 + +290: ; preds = %272 + %291 = bitcast [4 x float]* %56 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %291, i8 0, i64 16, i1 false) + store i32 0, i32* %57, align 4 + br label %292 + +292: ; preds = %299, %290 + %293 = load i32, i32* %57, align 4 + %294 = icmp slt i32 %293, 4 + br i1 %294, label %295, label %302 + +295: ; preds = %292 + %296 = load i32, i32* %57, align 4 + %297 = sext i32 %296 to i64 + %298 = getelementptr inbounds [4 x float], [4 x float]* %56, i64 0, i64 %297 + store float 0.000000e+00, float* %298, align 4 + br label %299 + +299: ; preds = %295 + %300 = load i32, i32* %57, align 4 + %301 = add nsw i32 %300, 1 + store i32 %301, i32* %57, align 4 + br label %292 + +302: ; preds = %292 + store i32 0, i32* %58, align 4 + br label %303 + +303: ; preds = %341, %302 + %304 = load i32, i32* %58, align 4 + %305 = load i32, i32* %43, align 4 + %306 = icmp slt i32 %304, %305 + br i1 %306, label %307, label %344 + +307: ; preds = %303 + store i32 0, i32* %59, align 4 + br label %308 + +308: ; preds = %337, %307 + %309 = load i32, i32* %59, align 4 + %310 = load i32, i32* %43, align 4 + %311 = icmp slt i32 %309, %310 + br i1 %311, label %312, label %340 + +312: ; preds = %308 + %313 = load i32, i32* %58, align 4 + %314 = load i32, i32* %59, align 4 + %315 = icmp eq i32 %313, %314 + %316 = zext i1 %315 to i64 + %317 = select i1 %315, float 1.000000e+00, float 0.000000e+00 + %318 = load i32, i32* %58, align 4 + %319 = sext i32 %318 to i64 + %320 = getelementptr inbounds [2 x float], [2 x float]* %51, i64 0, i64 %319 + %321 = load float, float* %320, align 4 + %322 = fmul float 2.000000e+00, %321 + %323 = load i32, i32* %59, align 4 + %324 = sext i32 %323 to i64 + %325 = getelementptr inbounds [2 x float], [2 x float]* %51, i64 0, i64 %324 + %326 = load float, float* %325, align 4 + %327 = fmul float %322, %326 + %328 = fsub float %317, %327 + store float %328, float* %60, align 4 + %329 = load float, float* %60, align 4 + %330 = load i32, i32* %58, align 4 + %331 = load i32, i32* %43, align 4 + %332 = mul nsw i32 %330, %331 + %333 = load i32, i32* %59, align 4 + %334 = add nsw i32 %332, %333 + %335 = sext i32 %334 to i64 + %336 = getelementptr inbounds [4 x float], [4 x float]* %56, i64 0, i64 %335 + store float %329, float* %336, align 4 + br label %337 + +337: ; preds = %312 + %338 = load i32, i32* %59, align 4 + %339 = add nsw i32 %338, 1 + store i32 %339, i32* %59, align 4 + br label %308 + +340: ; preds = %308 + br label %341 + +341: ; preds = %340 + %342 = load i32, i32* %58, align 4 + %343 = add nsw i32 %342, 1 + store i32 %343, i32* %58, align 4 + br label %303 + +344: ; preds = %303 + %345 = bitcast [4 x float]* %61 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %345, i8 0, i64 16, i1 false) + store i32 0, i32* %62, align 4 + br label %346 + +346: ; preds = %353, %344 + %347 = load i32, i32* %62, align 4 + %348 = icmp slt i32 %347, 4 + br i1 %348, label %349, label %356 + +349: ; preds = %346 + %350 = load i32, i32* %62, align 4 + %351 = sext i32 %350 to i64 + %352 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 %351 + store float 0.000000e+00, float* %352, align 4 + br label %353 + +353: ; preds = %349 + %354 = load i32, i32* %62, align 4 + %355 = add nsw i32 %354, 1 + store i32 %355, i32* %62, align 4 + br label %346 + +356: ; preds = %346 + store i32 0, i32* %63, align 4 + br label %357 + +357: ; preds = %403, %356 + %358 = load i32, i32* %63, align 4 + %359 = icmp slt i32 %358, 2 + br i1 %359, label %360, label %406 + +360: ; preds = %357 + store i32 0, i32* %64, align 4 + br label %361 + +361: ; preds = %399, %360 + %362 = load i32, i32* %64, align 4 + %363 = icmp slt i32 %362, 2 + br i1 %363, label %364, label %402 + +364: ; preds = %361 + %365 = load i32, i32* %63, align 4 + %366 = load i32, i32* %42, align 4 + %367 = icmp slt i32 %365, %366 + br i1 %367, label %372, label %368 + +368: ; preds = %364 + %369 = load i32, i32* %64, align 4 + %370 = load i32, i32* %42, align 4 + %371 = icmp slt i32 %369, %370 + br i1 %371, label %372, label %378 + +372: ; preds = %368, %364 + %373 = load i32, i32* %63, align 4 + %374 = load i32, i32* %64, align 4 + %375 = icmp eq i32 %373, %374 + %376 = zext i1 %375 to i64 + %377 = select i1 %375, float 1.000000e+00, float 0.000000e+00 + store float %377, float* %65, align 4 + br label %391 + +378: ; preds = %368 + %379 = load i32, i32* %63, align 4 + %380 = load i32, i32* %42, align 4 + %381 = sub nsw i32 %379, %380 + %382 = load i32, i32* %43, align 4 + %383 = mul nsw i32 %381, %382 + %384 = load i32, i32* %64, align 4 + %385 = load i32, i32* %42, align 4 + %386 = sub nsw i32 %384, %385 + %387 = add nsw i32 %383, %386 + %388 = sext i32 %387 to i64 + %389 = getelementptr inbounds [4 x float], [4 x float]* %56, i64 0, i64 %388 + %390 = load float, float* %389, align 4 + store float %390, float* %65, align 4 + br label %391 + +391: ; preds = %378, %372 + %392 = load float, float* %65, align 4 + %393 = load i32, i32* %63, align 4 + %394 = mul nsw i32 %393, 2 + %395 = load i32, i32* %64, align 4 + %396 = add nsw i32 %394, %395 + %397 = sext i32 %396 to i64 + %398 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 %397 + store float %392, float* %398, align 4 + br label %399 + +399: ; preds = %391 + %400 = load i32, i32* %64, align 4 + %401 = add nsw i32 %400, 1 + store i32 %401, i32* %64, align 4 + br label %361 + +402: ; preds = %361 + br label %403 + +403: ; preds = %402 + %404 = load i32, i32* %63, align 4 + %405 = add nsw i32 %404, 1 + store i32 %405, i32* %63, align 4 + br label %357 + +406: ; preds = %357 + %407 = load i32, i32* %42, align 4 + %408 = icmp eq i32 %407, 0 + br i1 %408, label %409, label %483 + +409: ; preds = %406 + store i32 0, i32* %66, align 4 + br label %410 + +410: ; preds = %422, %409 + %411 = load i32, i32* %66, align 4 + %412 = icmp slt i32 %411, 4 + br i1 %412, label %413, label %425 + +413: ; preds = %410 + %414 = load i32, i32* %66, align 4 + %415 = sext i32 %414 to i64 + %416 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 %415 + %417 = load float, float* %416, align 4 + %418 = load float*, float** %36, align 8 + %419 = load i32, i32* %66, align 4 + %420 = sext i32 %419 to i64 + %421 = getelementptr inbounds float, float* %418, i64 %420 + store float %417, float* %421, align 4 + br label %422 + +422: ; preds = %413 + %423 = load i32, i32* %66, align 4 + %424 = add nsw i32 %423, 1 + store i32 %424, i32* %66, align 4 + br label %410 + +425: ; preds = %410 + %426 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 0 + %427 = load float*, float** %35, align 8 + %428 = load float*, float** %37, align 8 + store float* %426, float** %12, align 8 + store float* %427, float** %13, align 8 + store float* %428, float** %14, align 8 + store i32 0, i32* %15, align 4 + br label %429 + +429: ; preds = %479, %425 + %430 = load i32, i32* %15, align 4 + %431 = icmp slt i32 %430, 2 + br i1 %431, label %432, label %482 + +432: ; preds = %429 + store i32 0, i32* %16, align 4 + br label %433 + +433: ; preds = %476, %432 + %434 = load i32, i32* %16, align 4 + %435 = icmp slt i32 %434, 2 + br i1 %435, label %436, label %479 + +436: ; preds = %433 + %437 = load float*, float** %14, align 8 + %438 = load i32, i32* %15, align 4 + %439 = mul nsw i32 2, %438 + %440 = load i32, i32* %16, align 4 + %441 = add nsw i32 %439, %440 + %442 = sext i32 %441 to i64 + %443 = getelementptr inbounds float, float* %437, i64 %442 + store float 0.000000e+00, float* %443, align 4 + store i32 0, i32* %17, align 4 + br label %444 + +444: ; preds = %447, %436 + %445 = load i32, i32* %17, align 4 + %446 = icmp slt i32 %445, 2 + br i1 %446, label %447, label %476 + +447: ; preds = %444 + %448 = load float*, float** %12, align 8 + %449 = load i32, i32* %15, align 4 + %450 = mul nsw i32 2, %449 + %451 = load i32, i32* %17, align 4 + %452 = add nsw i32 %450, %451 + %453 = sext i32 %452 to i64 + %454 = getelementptr inbounds float, float* %448, i64 %453 + %455 = load float, float* %454, align 4 + %456 = load float*, float** %13, align 8 + %457 = load i32, i32* %17, align 4 + %458 = mul nsw i32 2, %457 + %459 = load i32, i32* %16, align 4 + %460 = add nsw i32 %458, %459 + %461 = sext i32 %460 to i64 + %462 = getelementptr inbounds float, float* %456, i64 %461 + %463 = load float, float* %462, align 4 + %464 = fmul float %455, %463 + %465 = load float*, float** %14, align 8 + %466 = load i32, i32* %15, align 4 + %467 = mul nsw i32 2, %466 + %468 = load i32, i32* %16, align 4 + %469 = add nsw i32 %467, %468 + %470 = sext i32 %469 to i64 + %471 = getelementptr inbounds float, float* %465, i64 %470 + %472 = load float, float* %471, align 4 + %473 = fadd float %472, %464 + store float %473, float* %471, align 4 + %474 = load i32, i32* %17, align 4 + %475 = add nsw i32 %474, 1 + store i32 %475, i32* %17, align 4 + br label %444 + +476: ; preds = %444 + %477 = load i32, i32* %16, align 4 + %478 = add nsw i32 %477, 1 + store i32 %478, i32* %16, align 4 + br label %433 + +479: ; preds = %433 + %480 = load i32, i32* %15, align 4 + %481 = add nsw i32 %480, 1 + store i32 %481, i32* %15, align 4 + br label %429 + +482: ; preds = %429 + br label %642 + +483: ; preds = %406 + %484 = bitcast [4 x float]* %67 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %484, i8 0, i64 16, i1 false) + store i32 0, i32* %68, align 4 + br label %485 + +485: ; preds = %492, %483 + %486 = load i32, i32* %68, align 4 + %487 = icmp slt i32 %486, 4 + br i1 %487, label %488, label %495 + +488: ; preds = %485 + %489 = load i32, i32* %68, align 4 + %490 = sext i32 %489 to i64 + %491 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 %490 + store float 0.000000e+00, float* %491, align 4 + br label %492 + +492: ; preds = %488 + %493 = load i32, i32* %68, align 4 + %494 = add nsw i32 %493, 1 + store i32 %494, i32* %68, align 4 + br label %485 + +495: ; preds = %485 + %496 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 0 + %497 = load float*, float** %36, align 8 + %498 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 0 + store float* %496, float** %18, align 8 + store float* %497, float** %19, align 8 + store float* %498, float** %20, align 8 + store i32 0, i32* %21, align 4 + br label %499 + +499: ; preds = %549, %495 + %500 = load i32, i32* %21, align 4 + %501 = icmp slt i32 %500, 2 + br i1 %501, label %502, label %552 + +502: ; preds = %499 + store i32 0, i32* %22, align 4 + br label %503 + +503: ; preds = %546, %502 + %504 = load i32, i32* %22, align 4 + %505 = icmp slt i32 %504, 2 + br i1 %505, label %506, label %549 + +506: ; preds = %503 + %507 = load float*, float** %20, align 8 + %508 = load i32, i32* %21, align 4 + %509 = mul nsw i32 2, %508 + %510 = load i32, i32* %22, align 4 + %511 = add nsw i32 %509, %510 + %512 = sext i32 %511 to i64 + %513 = getelementptr inbounds float, float* %507, i64 %512 + store float 0.000000e+00, float* %513, align 4 + store i32 0, i32* %23, align 4 + br label %514 + +514: ; preds = %517, %506 + %515 = load i32, i32* %23, align 4 + %516 = icmp slt i32 %515, 2 + br i1 %516, label %517, label %546 + +517: ; preds = %514 + %518 = load float*, float** %18, align 8 + %519 = load i32, i32* %21, align 4 + %520 = mul nsw i32 2, %519 + %521 = load i32, i32* %23, align 4 + %522 = add nsw i32 %520, %521 + %523 = sext i32 %522 to i64 + %524 = getelementptr inbounds float, float* %518, i64 %523 + %525 = load float, float* %524, align 4 + %526 = load float*, float** %19, align 8 + %527 = load i32, i32* %23, align 4 + %528 = mul nsw i32 2, %527 + %529 = load i32, i32* %22, align 4 + %530 = add nsw i32 %528, %529 + %531 = sext i32 %530 to i64 + %532 = getelementptr inbounds float, float* %526, i64 %531 + %533 = load float, float* %532, align 4 + %534 = fmul float %525, %533 + %535 = load float*, float** %20, align 8 + %536 = load i32, i32* %21, align 4 + %537 = mul nsw i32 2, %536 + %538 = load i32, i32* %22, align 4 + %539 = add nsw i32 %537, %538 + %540 = sext i32 %539 to i64 + %541 = getelementptr inbounds float, float* %535, i64 %540 + %542 = load float, float* %541, align 4 + %543 = fadd float %542, %534 + store float %543, float* %541, align 4 + %544 = load i32, i32* %23, align 4 + %545 = add nsw i32 %544, 1 + store i32 %545, i32* %23, align 4 + br label %514 + +546: ; preds = %514 + %547 = load i32, i32* %22, align 4 + %548 = add nsw i32 %547, 1 + store i32 %548, i32* %22, align 4 + br label %503 + +549: ; preds = %503 + %550 = load i32, i32* %21, align 4 + %551 = add nsw i32 %550, 1 + store i32 %551, i32* %21, align 4 + br label %499 + +552: ; preds = %499 + store i32 0, i32* %69, align 4 + br label %553 + +553: ; preds = %565, %552 + %554 = load i32, i32* %69, align 4 + %555 = icmp slt i32 %554, 4 + br i1 %555, label %556, label %568 + +556: ; preds = %553 + %557 = load i32, i32* %69, align 4 + %558 = sext i32 %557 to i64 + %559 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 %558 + %560 = load float, float* %559, align 4 + %561 = load float*, float** %36, align 8 + %562 = load i32, i32* %69, align 4 + %563 = sext i32 %562 to i64 + %564 = getelementptr inbounds float, float* %561, i64 %563 + store float %560, float* %564, align 4 + br label %565 + +565: ; preds = %556 + %566 = load i32, i32* %69, align 4 + %567 = add nsw i32 %566, 1 + store i32 %567, i32* %69, align 4 + br label %553 + +568: ; preds = %553 + %569 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 0 + %570 = load float*, float** %37, align 8 + %571 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 0 + store float* %569, float** %24, align 8 + store float* %570, float** %25, align 8 + store float* %571, float** %26, align 8 + store i32 0, i32* %27, align 4 + br label %572 + +572: ; preds = %622, %568 + %573 = load i32, i32* %27, align 4 + %574 = icmp slt i32 %573, 2 + br i1 %574, label %575, label %625 + +575: ; preds = %572 + store i32 0, i32* %28, align 4 + br label %576 + +576: ; preds = %619, %575 + %577 = load i32, i32* %28, align 4 + %578 = icmp slt i32 %577, 2 + br i1 %578, label %579, label %622 + +579: ; preds = %576 + %580 = load float*, float** %26, align 8 + %581 = load i32, i32* %27, align 4 + %582 = mul nsw i32 2, %581 + %583 = load i32, i32* %28, align 4 + %584 = add nsw i32 %582, %583 + %585 = sext i32 %584 to i64 + %586 = getelementptr inbounds float, float* %580, i64 %585 + store float 0.000000e+00, float* %586, align 4 + store i32 0, i32* %29, align 4 + br label %587 + +587: ; preds = %590, %579 + %588 = load i32, i32* %29, align 4 + %589 = icmp slt i32 %588, 2 + br i1 %589, label %590, label %619 + +590: ; preds = %587 + %591 = load float*, float** %24, align 8 + %592 = load i32, i32* %27, align 4 + %593 = mul nsw i32 2, %592 + %594 = load i32, i32* %29, align 4 + %595 = add nsw i32 %593, %594 + %596 = sext i32 %595 to i64 + %597 = getelementptr inbounds float, float* %591, i64 %596 + %598 = load float, float* %597, align 4 + %599 = load float*, float** %25, align 8 + %600 = load i32, i32* %29, align 4 + %601 = mul nsw i32 2, %600 + %602 = load i32, i32* %28, align 4 + %603 = add nsw i32 %601, %602 + %604 = sext i32 %603 to i64 + %605 = getelementptr inbounds float, float* %599, i64 %604 + %606 = load float, float* %605, align 4 + %607 = fmul float %598, %606 + %608 = load float*, float** %26, align 8 + %609 = load i32, i32* %27, align 4 + %610 = mul nsw i32 2, %609 + %611 = load i32, i32* %28, align 4 + %612 = add nsw i32 %610, %611 + %613 = sext i32 %612 to i64 + %614 = getelementptr inbounds float, float* %608, i64 %613 + %615 = load float, float* %614, align 4 + %616 = fadd float %615, %607 + store float %616, float* %614, align 4 + %617 = load i32, i32* %29, align 4 + %618 = add nsw i32 %617, 1 + store i32 %618, i32* %29, align 4 + br label %587 + +619: ; preds = %587 + %620 = load i32, i32* %28, align 4 + %621 = add nsw i32 %620, 1 + store i32 %621, i32* %28, align 4 + br label %576 + +622: ; preds = %576 + %623 = load i32, i32* %27, align 4 + %624 = add nsw i32 %623, 1 + store i32 %624, i32* %27, align 4 + br label %572 + +625: ; preds = %572 + store i32 0, i32* %70, align 4 + br label %626 + +626: ; preds = %638, %625 + %627 = load i32, i32* %70, align 4 + %628 = icmp slt i32 %627, 4 + br i1 %628, label %629, label %641 + +629: ; preds = %626 + %630 = load i32, i32* %70, align 4 + %631 = sext i32 %630 to i64 + %632 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 %631 + %633 = load float, float* %632, align 4 + %634 = load float*, float** %37, align 8 + %635 = load i32, i32* %70, align 4 + %636 = sext i32 %635 to i64 + %637 = getelementptr inbounds float, float* %634, i64 %636 + store float %633, float* %637, align 4 + br label %638 + +638: ; preds = %629 + %639 = load i32, i32* %70, align 4 + %640 = add nsw i32 %639, 1 + store i32 %640, i32* %70, align 4 + br label %626 + +641: ; preds = %626 + br label %642 + +642: ; preds = %641, %482 + br label %643 + +643: ; preds = %642 + %644 = load i32, i32* %42, align 4 + %645 = add nsw i32 %644, 1 + store i32 %645, i32* %42, align 4 + br label %116 + +646: ; preds = %116 + %647 = load float*, float** %36, align 8 + store float* %647, float** %30, align 8 + store i32 0, i32* %31, align 4 + br label %648 + +648: ; preds = %691, %646 + %649 = load i32, i32* %31, align 4 + %650 = icmp slt i32 %649, 2 + br i1 %650, label %651, label %694 + +651: ; preds = %648 + %652 = load i32, i32* %31, align 4 + %653 = add nsw i32 %652, 1 + store i32 %653, i32* %32, align 4 + br label %654 + +654: ; preds = %657, %651 + %655 = load i32, i32* %32, align 4 + %656 = icmp slt i32 %655, 2 + br i1 %656, label %657, label %691 + +657: ; preds = %654 + %658 = load float*, float** %30, align 8 + %659 = load i32, i32* %31, align 4 + %660 = mul nsw i32 %659, 2 + %661 = load i32, i32* %32, align 4 + %662 = add nsw i32 %660, %661 + %663 = sext i32 %662 to i64 + %664 = getelementptr inbounds float, float* %658, i64 %663 + %665 = load float, float* %664, align 4 + store float %665, float* %33, align 4 + %666 = load float*, float** %30, align 8 + %667 = load i32, i32* %32, align 4 + %668 = mul nsw i32 %667, 2 + %669 = load i32, i32* %31, align 4 + %670 = add nsw i32 %668, %669 + %671 = sext i32 %670 to i64 + %672 = getelementptr inbounds float, float* %666, i64 %671 + %673 = load float, float* %672, align 4 + %674 = load float*, float** %30, align 8 + %675 = load i32, i32* %31, align 4 + %676 = mul nsw i32 %675, 2 + %677 = load i32, i32* %32, align 4 + %678 = add nsw i32 %676, %677 + %679 = sext i32 %678 to i64 + %680 = getelementptr inbounds float, float* %674, i64 %679 + store float %673, float* %680, align 4 + %681 = load float, float* %33, align 4 + %682 = load float*, float** %30, align 8 + %683 = load i32, i32* %32, align 4 + %684 = mul nsw i32 %683, 2 + %685 = load i32, i32* %31, align 4 + %686 = add nsw i32 %684, %685 + %687 = sext i32 %686 to i64 + %688 = getelementptr inbounds float, float* %682, i64 %687 + store float %681, float* %688, align 4 + %689 = load i32, i32* %32, align 4 + %690 = add nsw i32 %689, 1 + store i32 %690, i32* %32, align 4 + br label %654 + +691: ; preds = %654 + %692 = load i32, i32* %31, align 4 + %693 = add nsw i32 %692, 1 + store i32 %693, i32* %31, align 4 + br label %648 + +694: ; preds = %648 + ret void +} + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { + %4 = alloca float*, align 8 + %5 = alloca float*, align 8 + %6 = alloca float*, align 8 + %7 = alloca float*, align 8 + %8 = alloca i32, align 4 + %9 = alloca i32, align 4 + %10 = alloca i32, align 4 + %11 = alloca i32, align 4 + %12 = alloca float*, align 8 + %13 = alloca float*, align 8 + %14 = alloca i32, align 4 + %15 = alloca i32, align 4 + %16 = alloca float, align 4 + %17 = alloca float*, align 8 + %18 = alloca float*, align 8 + %19 = alloca i32, align 4 + %20 = alloca float, align 4 + %21 = alloca i32, align 4 + %22 = alloca float*, align 8 + %23 = alloca i32, align 4 + %24 = alloca i32, align 4 + %25 = alloca float, align 4 + %26 = alloca float*, align 8 + %27 = alloca i32, align 4 + %28 = alloca i32, align 4 + %29 = alloca float, align 4 + %30 = alloca float*, align 8 + store float* %0, float** %4, align 8 + store float* %1, float** %5, align 8 + store float* %2, float** %6, align 8 + %31 = load float*, float** %6, align 8 + %32 = bitcast float* %31 to i8* + %33 = load float*, float** %4, align 8 + %34 = bitcast float* %33 to i8* + %35 = load float*, float** %6, align 8 + %36 = bitcast float* %35 to i8* + %37 = call i64 @llvm.objectsize.i64.p0i8(i8* %36, i1 false, i1 true, i1 false) + %38 = call i8* @__memcpy_chk(i8* %32, i8* %34, i64 16, i64 %37) #7 + %39 = call i8* @calloc(i64 4, i64 4) #8 + %40 = bitcast i8* %39 to float* + store float* %40, float** %7, align 8 + store i32 0, i32* %8, align 4 + br label %41 + +41: ; preds = %65, %3 + %42 = load i32, i32* %8, align 4 + %43 = icmp slt i32 %42, 2 + br i1 %43, label %44, label %68 + +44: ; preds = %41 + store i32 0, i32* %9, align 4 + br label %45 + +45: ; preds = %61, %44 + %46 = load i32, i32* %9, align 4 + %47 = icmp slt i32 %46, 2 + br i1 %47, label %48, label %64 + +48: ; preds = %45 + %49 = load i32, i32* %8, align 4 + %50 = load i32, i32* %9, align 4 + %51 = icmp eq i32 %49, %50 + %52 = zext i1 %51 to i32 + %53 = sitofp i32 %52 to float + %54 = load float*, float** %7, align 8 + %55 = load i32, i32* %8, align 4 + %56 = mul nsw i32 %55, 2 + %57 = load i32, i32* %9, align 4 + %58 = add nsw i32 %56, %57 + %59 = sext i32 %58 to i64 + %60 = getelementptr inbounds float, float* %54, i64 %59 + store float %53, float* %60, align 4 + br label %61 + +61: ; preds = %48 + %62 = load i32, i32* %9, align 4 + %63 = add nsw i32 %62, 1 + store i32 %63, i32* %9, align 4 + br label %45 + +64: ; preds = %45 + br label %65 + +65: ; preds = %64 + %66 = load i32, i32* %8, align 4 + %67 = add nsw i32 %66, 1 + store i32 %67, i32* %8, align 4 + br label %41 + +68: ; preds = %41 + store i32 0, i32* %10, align 4 + br label %69 + +69: ; preds = %343, %68 + %70 = load i32, i32* %10, align 4 + %71 = icmp slt i32 %70, 1 + br i1 %71, label %72, label %346 + +72: ; preds = %69 + %73 = load i32, i32* %10, align 4 + %74 = sub nsw i32 2, %73 + store i32 %74, i32* %11, align 4 + %75 = load i32, i32* %11, align 4 + %76 = sext i32 %75 to i64 + %77 = call i8* @calloc(i64 4, i64 %76) #8 + %78 = bitcast i8* %77 to float* + store float* %78, float** %12, align 8 + %79 = load i32, i32* %11, align 4 + %80 = sext i32 %79 to i64 + %81 = call i8* @calloc(i64 4, i64 %80) #8 + %82 = bitcast i8* %81 to float* + store float* %82, float** %13, align 8 + store i32 0, i32* %14, align 4 + br label %83 + +83: ; preds = %115, %72 + %84 = load i32, i32* %14, align 4 + %85 = load i32, i32* %11, align 4 + %86 = icmp slt i32 %84, %85 + br i1 %86, label %87, label %118 + +87: ; preds = %83 + %88 = load i32, i32* %10, align 4 + %89 = load i32, i32* %14, align 4 + %90 = add nsw i32 %88, %89 + store i32 %90, i32* %15, align 4 + %91 = load float*, float** %6, align 8 + %92 = load i32, i32* %15, align 4 + %93 = mul nsw i32 %92, 2 + %94 = load i32, i32* %10, align 4 + %95 = add nsw i32 %93, %94 + %96 = sext i32 %95 to i64 + %97 = getelementptr inbounds float, float* %91, i64 %96 + %98 = load float, float* %97, align 4 + %99 = load float*, float** %12, align 8 + %100 = load i32, i32* %14, align 4 + %101 = sext i32 %100 to i64 + %102 = getelementptr inbounds float, float* %99, i64 %101 + store float %98, float* %102, align 4 + %103 = load float*, float** %7, align 8 + %104 = load i32, i32* %15, align 4 + %105 = mul nsw i32 %104, 2 + %106 = load i32, i32* %10, align 4 + %107 = add nsw i32 %105, %106 + %108 = sext i32 %107 to i64 + %109 = getelementptr inbounds float, float* %103, i64 %108 + %110 = load float, float* %109, align 4 + %111 = load float*, float** %13, align 8 + %112 = load i32, i32* %14, align 4 + %113 = sext i32 %112 to i64 + %114 = getelementptr inbounds float, float* %111, i64 %113 + store float %110, float* %114, align 4 + br label %115 + +115: ; preds = %87 + %116 = load i32, i32* %14, align 4 + %117 = add nsw i32 %116, 1 + store i32 %117, i32* %14, align 4 + br label %83 + +118: ; preds = %83 + %119 = load float*, float** %12, align 8 + %120 = getelementptr inbounds float, float* %119, i64 0 + %121 = load float, float* %120, align 4 + %122 = call float @no_opt_sgn(float %121) + %123 = fneg float %122 + %124 = load float*, float** %12, align 8 + %125 = load i32, i32* %11, align 4 + %126 = call float @no_opt_naive_norm(float* %124, i32 %125) + %127 = fmul float %123, %126 + store float %127, float* %16, align 4 + %128 = load i32, i32* %11, align 4 + %129 = sext i32 %128 to i64 + %130 = call i8* @calloc(i64 4, i64 %129) #8 + %131 = bitcast i8* %130 to float* + store float* %131, float** %17, align 8 + %132 = load i32, i32* %11, align 4 + %133 = sext i32 %132 to i64 + %134 = call i8* @calloc(i64 4, i64 %133) #8 + %135 = bitcast i8* %134 to float* + store float* %135, float** %18, align 8 + store i32 0, i32* %19, align 4 + br label %136 + +136: ; preds = %158, %118 + %137 = load i32, i32* %19, align 4 + %138 = load i32, i32* %11, align 4 + %139 = icmp slt i32 %137, %138 + br i1 %139, label %140, label %161 + +140: ; preds = %136 + %141 = load float*, float** %12, align 8 + %142 = load i32, i32* %19, align 4 + %143 = sext i32 %142 to i64 + %144 = getelementptr inbounds float, float* %141, i64 %143 + %145 = load float, float* %144, align 4 + %146 = load float, float* %16, align 4 + %147 = load float*, float** %13, align 8 + %148 = load i32, i32* %19, align 4 + %149 = sext i32 %148 to i64 + %150 = getelementptr inbounds float, float* %147, i64 %149 + %151 = load float, float* %150, align 4 + %152 = fmul float %146, %151 + %153 = fadd float %145, %152 + %154 = load float*, float** %17, align 8 + %155 = load i32, i32* %19, align 4 + %156 = sext i32 %155 to i64 + %157 = getelementptr inbounds float, float* %154, i64 %156 + store float %153, float* %157, align 4 + br label %158 + +158: ; preds = %140 + %159 = load i32, i32* %19, align 4 + %160 = add nsw i32 %159, 1 + store i32 %160, i32* %19, align 4 + br label %136 + +161: ; preds = %136 + %162 = load float*, float** %17, align 8 + %163 = load i32, i32* %11, align 4 + %164 = call float @no_opt_naive_norm(float* %162, i32 %163) + store float %164, float* %20, align 4 + store i32 0, i32* %21, align 4 + br label %165 + +165: ; preds = %182, %161 + %166 = load i32, i32* %21, align 4 + %167 = load i32, i32* %11, align 4 + %168 = icmp slt i32 %166, %167 + br i1 %168, label %169, label %185 + +169: ; preds = %165 + %170 = load float*, float** %17, align 8 + %171 = load i32, i32* %21, align 4 + %172 = sext i32 %171 to i64 + %173 = getelementptr inbounds float, float* %170, i64 %172 + %174 = load float, float* %173, align 4 + %175 = load float, float* %20, align 4 + %176 = fadd float %175, 0x3EE4F8B580000000 + %177 = fdiv float %174, %176 + %178 = load float*, float** %18, align 8 + %179 = load i32, i32* %21, align 4 + %180 = sext i32 %179 to i64 + %181 = getelementptr inbounds float, float* %178, i64 %180 + store float %177, float* %181, align 4 + br label %182 + +182: ; preds = %169 + %183 = load i32, i32* %21, align 4 + %184 = add nsw i32 %183, 1 + store i32 %184, i32* %21, align 4 + br label %165 + +185: ; preds = %165 + %186 = load i32, i32* %11, align 4 + %187 = load i32, i32* %11, align 4 + %188 = mul nsw i32 %186, %187 + %189 = sext i32 %188 to i64 + %190 = call i8* @calloc(i64 4, i64 %189) #8 + %191 = bitcast i8* %190 to float* + store float* %191, float** %22, align 8 + store i32 0, i32* %23, align 4 + br label %192 + +192: ; preds = %233, %185 + %193 = load i32, i32* %23, align 4 + %194 = load i32, i32* %11, align 4 + %195 = icmp slt i32 %193, %194 + br i1 %195, label %196, label %236 + +196: ; preds = %192 + store i32 0, i32* %24, align 4 + br label %197 + +197: ; preds = %229, %196 + %198 = load i32, i32* %24, align 4 + %199 = load i32, i32* %11, align 4 + %200 = icmp slt i32 %198, %199 + br i1 %200, label %201, label %232 + +201: ; preds = %197 + %202 = load i32, i32* %23, align 4 + %203 = load i32, i32* %24, align 4 + %204 = icmp eq i32 %202, %203 + %205 = zext i1 %204 to i64 + %206 = select i1 %204, float 1.000000e+00, float 0.000000e+00 + %207 = load float*, float** %18, align 8 + %208 = load i32, i32* %23, align 4 + %209 = sext i32 %208 to i64 + %210 = getelementptr inbounds float, float* %207, i64 %209 + %211 = load float, float* %210, align 4 + %212 = fmul float 2.000000e+00, %211 + %213 = load float*, float** %18, align 8 + %214 = load i32, i32* %24, align 4 + %215 = sext i32 %214 to i64 + %216 = getelementptr inbounds float, float* %213, i64 %215 + %217 = load float, float* %216, align 4 + %218 = fmul float %212, %217 + %219 = fsub float %206, %218 + store float %219, float* %25, align 4 + %220 = load float, float* %25, align 4 + %221 = load float*, float** %22, align 8 + %222 = load i32, i32* %23, align 4 + %223 = load i32, i32* %11, align 4 + %224 = mul nsw i32 %222, %223 + %225 = load i32, i32* %24, align 4 + %226 = add nsw i32 %224, %225 + %227 = sext i32 %226 to i64 + %228 = getelementptr inbounds float, float* %221, i64 %227 + store float %220, float* %228, align 4 + br label %229 + +229: ; preds = %201 + %230 = load i32, i32* %24, align 4 + %231 = add nsw i32 %230, 1 + store i32 %231, i32* %24, align 4 + br label %197 + +232: ; preds = %197 + br label %233 + +233: ; preds = %232 + %234 = load i32, i32* %23, align 4 + %235 = add nsw i32 %234, 1 + store i32 %235, i32* %23, align 4 + br label %192 + +236: ; preds = %192 + %237 = call i8* @calloc(i64 4, i64 4) #8 + %238 = bitcast i8* %237 to float* + store float* %238, float** %26, align 8 + store i32 0, i32* %27, align 4 + br label %239 + +239: ; preds = %287, %236 + %240 = load i32, i32* %27, align 4 + %241 = icmp slt i32 %240, 2 + br i1 %241, label %242, label %290 + +242: ; preds = %239 + store i32 0, i32* %28, align 4 + br label %243 + +243: ; preds = %283, %242 + %244 = load i32, i32* %28, align 4 + %245 = icmp slt i32 %244, 2 + br i1 %245, label %246, label %286 + +246: ; preds = %243 + %247 = load i32, i32* %27, align 4 + %248 = load i32, i32* %10, align 4 + %249 = icmp slt i32 %247, %248 + br i1 %249, label %254, label %250 + +250: ; preds = %246 + %251 = load i32, i32* %28, align 4 + %252 = load i32, i32* %10, align 4 + %253 = icmp slt i32 %251, %252 + br i1 %253, label %254, label %260 + +254: ; preds = %250, %246 + %255 = load i32, i32* %27, align 4 + %256 = load i32, i32* %28, align 4 + %257 = icmp eq i32 %255, %256 + %258 = zext i1 %257 to i64 + %259 = select i1 %257, float 1.000000e+00, float 0.000000e+00 + store float %259, float* %29, align 4 + br label %274 + +260: ; preds = %250 + %261 = load float*, float** %22, align 8 + %262 = load i32, i32* %27, align 4 + %263 = load i32, i32* %10, align 4 + %264 = sub nsw i32 %262, %263 + %265 = load i32, i32* %11, align 4 + %266 = mul nsw i32 %264, %265 + %267 = load i32, i32* %28, align 4 + %268 = load i32, i32* %10, align 4 + %269 = sub nsw i32 %267, %268 + %270 = add nsw i32 %266, %269 + %271 = sext i32 %270 to i64 + %272 = getelementptr inbounds float, float* %261, i64 %271 + %273 = load float, float* %272, align 4 + store float %273, float* %29, align 4 + br label %274 + +274: ; preds = %260, %254 + %275 = load float, float* %29, align 4 + %276 = load float*, float** %26, align 8 + %277 = load i32, i32* %27, align 4 + %278 = mul nsw i32 %277, 2 + %279 = load i32, i32* %28, align 4 + %280 = add nsw i32 %278, %279 + %281 = sext i32 %280 to i64 + %282 = getelementptr inbounds float, float* %276, i64 %281 + store float %275, float* %282, align 4 + br label %283 + +283: ; preds = %274 + %284 = load i32, i32* %28, align 4 + %285 = add nsw i32 %284, 1 + store i32 %285, i32* %28, align 4 + br label %243 + +286: ; preds = %243 + br label %287 + +287: ; preds = %286 + %288 = load i32, i32* %27, align 4 + %289 = add nsw i32 %288, 1 + store i32 %289, i32* %27, align 4 + br label %239 + +290: ; preds = %239 + %291 = load i32, i32* %10, align 4 + %292 = icmp eq i32 %291, 0 + br i1 %292, label %293, label %305 + +293: ; preds = %290 + %294 = load float*, float** %5, align 8 + %295 = bitcast float* %294 to i8* + %296 = load float*, float** %26, align 8 + %297 = bitcast float* %296 to i8* + %298 = load float*, float** %5, align 8 + %299 = bitcast float* %298 to i8* + %300 = call i64 @llvm.objectsize.i64.p0i8(i8* %299, i1 false, i1 true, i1 false) + %301 = call i8* @__memcpy_chk(i8* %295, i8* %297, i64 16, i64 %300) #7 + %302 = load float*, float** %26, align 8 + %303 = load float*, float** %4, align 8 + %304 = load float*, float** %6, align 8 + call void @no_opt_naive_fixed_matrix_multiply(float* %302, float* %303, float* %304) + br label %330 + +305: ; preds = %290 + %306 = call i8* @calloc(i64 4, i64 4) #8 + %307 = bitcast i8* %306 to float* + store float* %307, float** %30, align 8 + %308 = load float*, float** %26, align 8 + %309 = load float*, float** %5, align 8 + %310 = load float*, float** %30, align 8 + call void @no_opt_naive_fixed_matrix_multiply(float* %308, float* %309, float* %310) + %311 = load float*, float** %5, align 8 + %312 = bitcast float* %311 to i8* + %313 = load float*, float** %30, align 8 + %314 = bitcast float* %313 to i8* + %315 = load float*, float** %5, align 8 + %316 = bitcast float* %315 to i8* + %317 = call i64 @llvm.objectsize.i64.p0i8(i8* %316, i1 false, i1 true, i1 false) + %318 = call i8* @__memcpy_chk(i8* %312, i8* %314, i64 16, i64 %317) #7 + %319 = load float*, float** %26, align 8 + %320 = load float*, float** %6, align 8 + %321 = load float*, float** %30, align 8 + call void @no_opt_naive_fixed_matrix_multiply(float* %319, float* %320, float* %321) + %322 = load float*, float** %6, align 8 + %323 = bitcast float* %322 to i8* + %324 = load float*, float** %30, align 8 + %325 = bitcast float* %324 to i8* + %326 = load float*, float** %6, align 8 + %327 = bitcast float* %326 to i8* + %328 = call i64 @llvm.objectsize.i64.p0i8(i8* %327, i1 false, i1 true, i1 false) + %329 = call i8* @__memcpy_chk(i8* %323, i8* %325, i64 16, i64 %328) #7 + br label %330 + +330: ; preds = %305, %293 + %331 = load float*, float** %12, align 8 + %332 = bitcast float* %331 to i8* + call void @free(i8* %332) + %333 = load float*, float** %13, align 8 + %334 = bitcast float* %333 to i8* + call void @free(i8* %334) + %335 = load float*, float** %17, align 8 + %336 = bitcast float* %335 to i8* + call void @free(i8* %336) + %337 = load float*, float** %18, align 8 + %338 = bitcast float* %337 to i8* + call void @free(i8* %338) + %339 = load float*, float** %22, align 8 + %340 = bitcast float* %339 to i8* + call void @free(i8* %340) + %341 = load float*, float** %26, align 8 + %342 = bitcast float* %341 to i8* + call void @free(i8* %342) + br label %343 + +343: ; preds = %330 + %344 = load i32, i32* %10, align 4 + %345 = add nsw i32 %344, 1 + store i32 %345, i32* %10, align 4 + br label %69 + +346: ; preds = %69 + %347 = load float*, float** %5, align 8 + call void @no_opt_naive_fixed_transpose(float* %347) + ret void +} + +; Function Attrs: nounwind +declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 + +; Function Attrs: nounwind readnone speculatable willreturn +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 + +; Function Attrs: allocsize(0,1) +declare i8* @calloc(i64, i64) #5 + +declare void @free(i8*) #6 + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #1 { + %1 = alloca i32, align 4 + %2 = alloca i64, align 8 + %3 = alloca [4 x float], align 16 + %4 = alloca i32, align 4 + %5 = alloca [4 x float], align 16 + %6 = alloca [4 x float], align 16 + %7 = alloca [4 x float], align 16 + %8 = alloca [4 x float], align 16 + %9 = alloca i32, align 4 + %10 = alloca i32, align 4 + %11 = alloca i32, align 4 + %12 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + %13 = call i64 @time(i64* null) + store i64 %13, i64* %2, align 8 + %14 = call i64 @time(i64* %2) + %15 = trunc i64 %14 to i32 + call void @srand(i32 %15) + %16 = bitcast [4 x float]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %16, i8 0, i64 16, i1 false) + store i32 0, i32* %4, align 4 + br label %17 + +17: ; preds = %33, %0 + %18 = load i32, i32* %4, align 4 + %19 = icmp slt i32 %18, 4 + br i1 %19, label %20, label %36 + +20: ; preds = %17 + %21 = call i32 @rand() + %22 = sitofp i32 %21 to float + %23 = fdiv float %22, 0x41747AE140000000 + %24 = load i32, i32* %4, align 4 + %25 = sext i32 %24 to i64 + %26 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 %25 + store float %23, float* %26, align 4 + %27 = load i32, i32* %4, align 4 + %28 = sext i32 %27 to i64 + %29 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 %28 + %30 = load float, float* %29, align 4 + %31 = fpext float %30 to double + %32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %31) + br label %33 + +33: ; preds = %20 + %34 = load i32, i32* %4, align 4 + %35 = add nsw i32 %34, 1 + store i32 %35, i32* %4, align 4 + br label %17 + +36: ; preds = %17 + %37 = bitcast [4 x float]* %5 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %37, i8 0, i64 16, i1 false) + %38 = bitcast [4 x float]* %6 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %38, i8 0, i64 16, i1 false) + %39 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 + %40 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 + %41 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 + call void @naive_fixed_qr_decomp(float* %39, float* %40, float* %41) + %42 = bitcast [4 x float]* %7 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %42, i8 0, i64 16, i1 false) + %43 = bitcast [4 x float]* %8 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %43, i8 0, i64 16, i1 false) + %44 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 + %45 = getelementptr inbounds [4 x float], [4 x float]* %7, i64 0, i64 0 + %46 = getelementptr inbounds [4 x float], [4 x float]* %8, i64 0, i64 0 + call void @no_opt_naive_fixed_qr_decomp(float* %44, float* %45, float* %46) + store i32 0, i32* %9, align 4 + br label %47 + +47: ; preds = %77, %36 + %48 = load i32, i32* %9, align 4 + %49 = icmp slt i32 %48, 2 + br i1 %49, label %50, label %80 + +50: ; preds = %47 + store i32 0, i32* %10, align 4 + br label %51 + +51: ; preds = %73, %50 + %52 = load i32, i32* %10, align 4 + %53 = icmp slt i32 %52, 2 + br i1 %53, label %54, label %76 + +54: ; preds = %51 + %55 = load i32, i32* %9, align 4 + %56 = mul nsw i32 %55, 2 + %57 = load i32, i32* %10, align 4 + %58 = add nsw i32 %56, %57 + %59 = sext i32 %58 to i64 + %60 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 %59 + %61 = load float, float* %60, align 4 + %62 = fpext float %61 to double + %63 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %62) + %64 = load i32, i32* %9, align 4 + %65 = mul nsw i32 %64, 2 + %66 = load i32, i32* %10, align 4 + %67 = add nsw i32 %65, %66 + %68 = sext i32 %67 to i64 + %69 = getelementptr inbounds [4 x float], [4 x float]* %7, i64 0, i64 %68 + %70 = load float, float* %69, align 4 + %71 = fpext float %70 to double + %72 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %71) + br label %73 + +73: ; preds = %54 + %74 = load i32, i32* %10, align 4 + %75 = add nsw i32 %74, 1 + store i32 %75, i32* %10, align 4 + br label %51 + +76: ; preds = %51 + br label %77 + +77: ; preds = %76 + %78 = load i32, i32* %9, align 4 + %79 = add nsw i32 %78, 1 + store i32 %79, i32* %9, align 4 + br label %47 + +80: ; preds = %47 + store i32 0, i32* %11, align 4 + br label %81 + +81: ; preds = %111, %80 + %82 = load i32, i32* %11, align 4 + %83 = icmp slt i32 %82, 2 + br i1 %83, label %84, label %114 + +84: ; preds = %81 + store i32 0, i32* %12, align 4 + br label %85 + +85: ; preds = %107, %84 + %86 = load i32, i32* %12, align 4 + %87 = icmp slt i32 %86, 2 + br i1 %87, label %88, label %110 + +88: ; preds = %85 + %89 = load i32, i32* %11, align 4 + %90 = mul nsw i32 %89, 2 + %91 = load i32, i32* %12, align 4 + %92 = add nsw i32 %90, %91 + %93 = sext i32 %92 to i64 + %94 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 %93 + %95 = load float, float* %94, align 4 + %96 = fpext float %95 to double + %97 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %96) + %98 = load i32, i32* %11, align 4 + %99 = mul nsw i32 %98, 2 + %100 = load i32, i32* %12, align 4 + %101 = add nsw i32 %99, %100 + %102 = sext i32 %101 to i64 + %103 = getelementptr inbounds [4 x float], [4 x float]* %8, i64 0, i64 %102 + %104 = load float, float* %103, align 4 + %105 = fpext float %104 to double + %106 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %105) + br label %107 + +107: ; preds = %88 + %108 = load i32, i32* %12, align 4 + %109 = add nsw i32 %108, 1 + store i32 %109, i32* %12, align 4 + br label %85 + +110: ; preds = %85 + br label %111 + +111: ; preds = %110 + %112 = load i32, i32* %11, align 4 + %113 = add nsw i32 %112, 1 + store i32 %113, i32* %11, align 4 + br label %81 + +114: ; preds = %81 + %115 = load i32, i32* %1, align 4 + ret i32 %115 +} + +declare i64 @time(i64*) #6 + +declare void @srand(i32) #6 + +declare i32 @rand() #6 + +declare i32 @printf(i8*, ...) #6 + +attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone speculatable willreturn } +attributes #3 = { argmemonly nounwind willreturn writeonly } +attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #7 = { nounwind } +attributes #8 = { allocsize(0,1) } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/failed-test/dce.ll b/src/dios-egraphs/Diospyros/failed-test/dce.ll new file mode 100644 index 00000000..19512d60 --- /dev/null +++ b/src/dios-egraphs/Diospyros/failed-test/dce.ll @@ -0,0 +1,4711 @@ +; ModuleID = 'build/diospyros.ll' +source_filename = "fail-tests/qr-decomp-local-arrays.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1 +@.str.1 = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 +@.str.2 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 +@.str.3 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 +@.str.4 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @sgn(float %0) #0 { + %2 = fcmp ogt float %0, 0.000000e+00 + %3 = zext i1 %2 to i32 + %4 = fcmp olt float %0, 0.000000e+00 + %.neg = sext i1 %4 to i32 + %5 = add nsw i32 %.neg, %3 + %6 = sitofp i32 %5 to float + ret float %6 +} + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_sgn(float %0) #1 { + %2 = fcmp ogt float %0, 0.000000e+00 + %3 = zext i1 %2 to i32 + %4 = fcmp olt float %0, 0.000000e+00 + %.neg = sext i1 %4 to i32 + %5 = add nsw i32 %.neg, %3 + %6 = sitofp i32 %5 to float + ret float %6 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @naive_norm(float* %0, i32 %1) #0 { + %3 = icmp sgt i32 %1, 0 + %smax = select i1 %3, i32 %1, i32 0 + %wide.trip.count = zext i32 %smax to i64 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %2 + %4 = add nsw i64 %wide.trip.count, -1 + %xtraiter = and i64 %wide.trip.count, 3 + %5 = icmp ult i64 %4, 3 + br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count, 2147483644 + br label %6 + +6: ; preds = %6, %.lr.ph.new + %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] + %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 + %8 = load float, float* %7, align 4 + %9 = fmul float %8, %8 + %10 = fadd float %.013, %9 + %indvars.iv.next = or i64 %indvars.iv2, 1 + %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next + %12 = load float, float* %11, align 4 + %13 = fmul float %12, %12 + %14 = fadd float %10, %13 + %indvars.iv.next.1 = or i64 %indvars.iv2, 2 + %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 + %16 = load float, float* %15, align 4 + %17 = fmul float %16, %16 + %18 = fadd float %14, %17 + %indvars.iv.next.2 = or i64 %indvars.iv2, 3 + %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 + %20 = load float, float* %19, align 4 + %21 = fmul float %20, %20 + %22 = fadd float %18, %21 + %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 + %niter.nsub.3 = add i64 %niter, -4 + %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 + br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 + +._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] + %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] + %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] + %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil + %24 = load float, float* %23, align 4 + %25 = fmul float %24, %24 + %26 = fadd float %.013.epil, %25 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !3 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 + %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] + %27 = call float @llvm.sqrt.f32(float %.01.lcssa) + ret float %27 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32(float) #2 + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_naive_norm(float* %0, i32 %1) #1 { + %3 = icmp sgt i32 %1, 0 + %smax = select i1 %3, i32 %1, i32 0 + %wide.trip.count = zext i32 %smax to i64 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %2 + %4 = add nsw i64 %wide.trip.count, -1 + %xtraiter = and i64 %wide.trip.count, 3 + %5 = icmp ult i64 %4, 3 + br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count, 2147483644 + br label %6 + +6: ; preds = %6, %.lr.ph.new + %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] + %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 + %8 = load float, float* %7, align 4 + %9 = fmul float %8, %8 + %10 = fadd float %.013, %9 + %indvars.iv.next = or i64 %indvars.iv2, 1 + %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next + %12 = load float, float* %11, align 4 + %13 = fmul float %12, %12 + %14 = fadd float %10, %13 + %indvars.iv.next.1 = or i64 %indvars.iv2, 2 + %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 + %16 = load float, float* %15, align 4 + %17 = fmul float %16, %16 + %18 = fadd float %14, %17 + %indvars.iv.next.2 = or i64 %indvars.iv2, 3 + %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 + %20 = load float, float* %19, align 4 + %21 = fmul float %20, %20 + %22 = fadd float %18, %21 + %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 + %niter.nsub.3 = add i64 %niter, -4 + %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 + br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 + +._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] + %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] + %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] + %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil + %24 = load float, float* %23, align 4 + %25 = fmul float %24, %24 + %26 = fadd float %.013.epil, %25 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 + %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] + %27 = call float @llvm.sqrt.f32(float %.01.lcssa) + ret float %27 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_transpose(float* %0) #0 { +.lr.ph: + %1 = getelementptr inbounds float, float* %0, i64 1 + %2 = bitcast float* %1 to i32* + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds float, float* %0, i64 2 + %5 = bitcast float* %4 to i32* + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %2, align 4 + store i32 %3, i32* %5, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_transpose(float* %0) #1 { +.lr.ph: + %1 = getelementptr inbounds float, float* %0, i64 1 + %2 = bitcast float* %1 to i32* + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds float, float* %0, i64 2 + %5 = bitcast float* %4 to i32* + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %2, align 4 + store i32 %3, i32* %5, align 4 + ret void +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { +.preheader: + store float 0.000000e+00, float* %2, align 4 + %3 = getelementptr float, float* %0, i32 0 + %4 = load float, float* %3, align 4 + %5 = insertelement <4 x float> zeroinitializer, float %4, i32 0 + %6 = insertelement <4 x float> %5, float 0.000000e+00, i32 1 + %7 = insertelement <4 x float> %6, float 0.000000e+00, i32 2 + %8 = insertelement <4 x float> %7, float 0.000000e+00, i32 3 + %9 = getelementptr float, float* %1, i32 0 + %10 = load float, float* %9, align 4 + %11 = insertelement <4 x float> zeroinitializer, float %10, i32 0 + %12 = insertelement <4 x float> %11, float 0.000000e+00, i32 1 + %13 = insertelement <4 x float> %12, float 0.000000e+00, i32 2 + %14 = insertelement <4 x float> %13, float 0.000000e+00, i32 3 + %15 = call <4 x float> @llvm.fma.v4f32(<4 x float> %8, <4 x float> %14, <4 x float> zeroinitializer) + %16 = extractelement <4 x float> %15, i32 0 + store float %16, float* %2, align 4 + %17 = getelementptr float, float* %0, i32 0 + %18 = load float, float* %17, align 4 + %19 = insertelement <4 x float> zeroinitializer, float %18, i32 0 + %20 = insertelement <4 x float> %19, float 1.000000e+00, i32 1 + %21 = insertelement <4 x float> %20, float 1.000000e+00, i32 2 + %22 = insertelement <4 x float> %21, float 1.000000e+00, i32 3 + %23 = insertelement <4 x float> zeroinitializer, float %10, i32 0 + %24 = insertelement <4 x float> %23, float 0.000000e+00, i32 1 + %25 = insertelement <4 x float> %24, float 0.000000e+00, i32 2 + %26 = insertelement <4 x float> %25, float 0.000000e+00, i32 3 + %27 = fmul <4 x float> %22, %26 + %28 = fadd <4 x float> %27, zeroinitializer + %29 = getelementptr float, float* %0, i32 0 + %30 = getelementptr inbounds float, float* %29, i64 1 + %31 = load float, float* %30, align 4 + %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 + %33 = insertelement <4 x float> %32, float 0.000000e+00, i32 1 + %34 = insertelement <4 x float> %33, float 0.000000e+00, i32 2 + %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 3 + %36 = getelementptr float, float* %1, i32 0 + %37 = getelementptr inbounds float, float* %36, i64 2 + %38 = load float, float* %37, align 4 + %39 = insertelement <4 x float> zeroinitializer, float %38, i32 0 + %40 = insertelement <4 x float> %39, float 0.000000e+00, i32 1 + %41 = insertelement <4 x float> %40, float 0.000000e+00, i32 2 + %42 = insertelement <4 x float> %41, float 0.000000e+00, i32 3 + %43 = call <4 x float> @llvm.fma.v4f32(<4 x float> %35, <4 x float> %42, <4 x float> %28) + %44 = extractelement <4 x float> %43, i32 0 + store float %44, float* %2, align 4 + %45 = extractelement <4 x float> %43, i32 1 + %46 = getelementptr float, float* %2, i32 0 + %47 = getelementptr inbounds float, float* %46, i64 1 + store float %45, float* %47, align 4 + %48 = getelementptr float, float* %0, i32 0 + %49 = load float, float* %48, align 4 + %50 = insertelement <4 x float> zeroinitializer, float %49, i32 0 + %51 = insertelement <4 x float> %50, float 0.000000e+00, i32 1 + %52 = insertelement <4 x float> %51, float 0.000000e+00, i32 2 + %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 3 + %54 = getelementptr float, float* %1, i32 0 + %55 = getelementptr inbounds float, float* %54, i64 1 + %56 = load float, float* %55, align 4 + %57 = insertelement <4 x float> zeroinitializer, float %56, i32 0 + %58 = insertelement <4 x float> %57, float 0.000000e+00, i32 1 + %59 = insertelement <4 x float> %58, float 0.000000e+00, i32 2 + %60 = insertelement <4 x float> %59, float 0.000000e+00, i32 3 + %61 = call <4 x float> @llvm.fma.v4f32(<4 x float> %53, <4 x float> %60, <4 x float> zeroinitializer) + %62 = extractelement <4 x float> %61, i32 0 + %63 = getelementptr float, float* %2, i32 0 + %64 = getelementptr inbounds float, float* %63, i64 1 + store float %62, float* %64, align 4 + %65 = insertelement <4 x float> zeroinitializer, float %49, i32 0 + %66 = insertelement <4 x float> %65, float 1.000000e+00, i32 1 + %67 = insertelement <4 x float> %66, float 1.000000e+00, i32 2 + %68 = insertelement <4 x float> %67, float 1.000000e+00, i32 3 + %69 = insertelement <4 x float> zeroinitializer, float %56, i32 0 + %70 = insertelement <4 x float> %69, float 0.000000e+00, i32 1 + %71 = insertelement <4 x float> %70, float 0.000000e+00, i32 2 + %72 = insertelement <4 x float> %71, float 0.000000e+00, i32 3 + %73 = fmul <4 x float> %68, %72 + %74 = fadd <4 x float> %73, zeroinitializer + %75 = getelementptr float, float* %0, i32 0 + %76 = getelementptr inbounds float, float* %75, i64 1 + %77 = load float, float* %76, align 4 + %78 = insertelement <4 x float> zeroinitializer, float %77, i32 0 + %79 = insertelement <4 x float> %78, float 0.000000e+00, i32 1 + %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 2 + %81 = insertelement <4 x float> %80, float 0.000000e+00, i32 3 + %82 = getelementptr float, float* %1, i32 0 + %83 = getelementptr inbounds float, float* %82, i64 3 + %84 = load float, float* %83, align 4 + %85 = insertelement <4 x float> zeroinitializer, float %84, i32 0 + %86 = insertelement <4 x float> %85, float 0.000000e+00, i32 1 + %87 = insertelement <4 x float> %86, float 0.000000e+00, i32 2 + %88 = insertelement <4 x float> %87, float 0.000000e+00, i32 3 + %89 = call <4 x float> @llvm.fma.v4f32(<4 x float> %81, <4 x float> %88, <4 x float> %74) + %90 = extractelement <4 x float> %89, i32 0 + %91 = getelementptr float, float* %2, i32 0 + %92 = getelementptr inbounds float, float* %91, i64 1 + store float %90, float* %92, align 4 + %93 = extractelement <4 x float> %89, i32 1 + %94 = getelementptr float, float* %2, i32 0 + %95 = getelementptr inbounds float, float* %94, i64 2 + store float %93, float* %95, align 4 + %96 = getelementptr float, float* %0, i32 0 + %97 = getelementptr inbounds float, float* %96, i64 2 + %98 = load float, float* %97, align 4 + %99 = insertelement <4 x float> zeroinitializer, float %98, i32 0 + %100 = insertelement <4 x float> %99, float 0.000000e+00, i32 1 + %101 = insertelement <4 x float> %100, float 0.000000e+00, i32 2 + %102 = insertelement <4 x float> %101, float 0.000000e+00, i32 3 + %103 = getelementptr float, float* %1, i32 0 + %104 = load float, float* %103, align 4 + %105 = insertelement <4 x float> zeroinitializer, float %104, i32 0 + %106 = insertelement <4 x float> %105, float 0.000000e+00, i32 1 + %107 = insertelement <4 x float> %106, float 0.000000e+00, i32 2 + %108 = insertelement <4 x float> %107, float 0.000000e+00, i32 3 + %109 = call <4 x float> @llvm.fma.v4f32(<4 x float> %102, <4 x float> %108, <4 x float> zeroinitializer) + %110 = extractelement <4 x float> %109, i32 0 + %111 = getelementptr float, float* %2, i32 0 + %112 = getelementptr inbounds float, float* %111, i64 2 + store float %110, float* %112, align 4 + %113 = insertelement <4 x float> zeroinitializer, float %98, i32 0 + %114 = insertelement <4 x float> %113, float 1.000000e+00, i32 1 + %115 = insertelement <4 x float> %114, float 1.000000e+00, i32 2 + %116 = insertelement <4 x float> %115, float 1.000000e+00, i32 3 + %117 = insertelement <4 x float> zeroinitializer, float %104, i32 0 + %118 = insertelement <4 x float> %117, float 0.000000e+00, i32 1 + %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 2 + %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 3 + %121 = fmul <4 x float> %116, %120 + %122 = fadd <4 x float> %121, zeroinitializer + %123 = getelementptr float, float* %0, i32 0 + %124 = getelementptr inbounds float, float* %123, i64 3 + %125 = load float, float* %124, align 4 + %126 = insertelement <4 x float> zeroinitializer, float %125, i32 0 + %127 = insertelement <4 x float> %126, float 0.000000e+00, i32 1 + %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 2 + %129 = insertelement <4 x float> %128, float 0.000000e+00, i32 3 + %130 = getelementptr float, float* %1, i32 0 + %131 = getelementptr inbounds float, float* %130, i64 2 + %132 = load float, float* %131, align 4 + %133 = insertelement <4 x float> zeroinitializer, float %132, i32 0 + %134 = insertelement <4 x float> %133, float 0.000000e+00, i32 1 + %135 = insertelement <4 x float> %134, float 0.000000e+00, i32 2 + %136 = insertelement <4 x float> %135, float 0.000000e+00, i32 3 + %137 = call <4 x float> @llvm.fma.v4f32(<4 x float> %129, <4 x float> %136, <4 x float> %122) + %138 = extractelement <4 x float> %137, i32 0 + %139 = getelementptr float, float* %2, i32 0 + %140 = getelementptr inbounds float, float* %139, i64 2 + store float %138, float* %140, align 4 + %141 = extractelement <4 x float> %137, i32 1 + %142 = getelementptr float, float* %2, i32 0 + %143 = getelementptr inbounds float, float* %142, i64 3 + store float %141, float* %143, align 4 + %144 = getelementptr float, float* %0, i32 0 + %145 = getelementptr inbounds float, float* %144, i64 2 + %146 = load float, float* %145, align 4 + %147 = insertelement <4 x float> zeroinitializer, float %146, i32 0 + %148 = insertelement <4 x float> %147, float 0.000000e+00, i32 1 + %149 = insertelement <4 x float> %148, float 0.000000e+00, i32 2 + %150 = insertelement <4 x float> %149, float 0.000000e+00, i32 3 + %151 = getelementptr float, float* %1, i32 0 + %152 = getelementptr inbounds float, float* %151, i64 1 + %153 = load float, float* %152, align 4 + %154 = insertelement <4 x float> zeroinitializer, float %153, i32 0 + %155 = insertelement <4 x float> %154, float 0.000000e+00, i32 1 + %156 = insertelement <4 x float> %155, float 0.000000e+00, i32 2 + %157 = insertelement <4 x float> %156, float 0.000000e+00, i32 3 + %158 = call <4 x float> @llvm.fma.v4f32(<4 x float> %150, <4 x float> %157, <4 x float> zeroinitializer) + %159 = extractelement <4 x float> %158, i32 0 + %160 = getelementptr float, float* %2, i32 0 + %161 = getelementptr inbounds float, float* %160, i64 3 + store float %159, float* %161, align 4 + %162 = insertelement <4 x float> zeroinitializer, float %146, i32 0 + %163 = insertelement <4 x float> %162, float 1.000000e+00, i32 1 + %164 = insertelement <4 x float> %163, float 1.000000e+00, i32 2 + %165 = insertelement <4 x float> %164, float 1.000000e+00, i32 3 + %166 = insertelement <4 x float> zeroinitializer, float %153, i32 0 + %167 = insertelement <4 x float> %166, float 0.000000e+00, i32 1 + %168 = insertelement <4 x float> %167, float 0.000000e+00, i32 2 + %169 = insertelement <4 x float> %168, float 0.000000e+00, i32 3 + %170 = fmul <4 x float> %165, %169 + %171 = fadd <4 x float> %170, zeroinitializer + %172 = getelementptr float, float* %0, i32 0 + %173 = getelementptr inbounds float, float* %172, i64 3 + %174 = load float, float* %173, align 4 + %175 = insertelement <4 x float> zeroinitializer, float %174, i32 0 + %176 = insertelement <4 x float> %175, float 0.000000e+00, i32 1 + %177 = insertelement <4 x float> %176, float 0.000000e+00, i32 2 + %178 = insertelement <4 x float> %177, float 0.000000e+00, i32 3 + %179 = getelementptr float, float* %1, i32 0 + %180 = getelementptr inbounds float, float* %179, i64 3 + %181 = load float, float* %180, align 4 + %182 = insertelement <4 x float> zeroinitializer, float %181, i32 0 + %183 = insertelement <4 x float> %182, float 0.000000e+00, i32 1 + %184 = insertelement <4 x float> %183, float 0.000000e+00, i32 2 + %185 = insertelement <4 x float> %184, float 0.000000e+00, i32 3 + %186 = call <4 x float> @llvm.fma.v4f32(<4 x float> %178, <4 x float> %185, <4 x float> %171) + %187 = extractelement <4 x float> %186, i32 0 + %188 = getelementptr float, float* %2, i32 0 + %189 = getelementptr inbounds float, float* %188, i64 3 + store float %187, float* %189, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { +.preheader: + store float 0.000000e+00, float* %2, align 4 + %3 = load float, float* %0, align 4 + %4 = load float, float* %1, align 4 + %5 = fmul float %3, %4 + %6 = fadd float %5, 0.000000e+00 + store float %6, float* %2, align 4 + %7 = getelementptr inbounds float, float* %0, i64 1 + %8 = load float, float* %7, align 4 + %9 = getelementptr inbounds float, float* %1, i64 2 + %10 = load float, float* %9, align 4 + %11 = fmul float %8, %10 + %12 = fadd float %6, %11 + store float %12, float* %2, align 4 + %13 = getelementptr inbounds float, float* %2, i64 1 + store float 0.000000e+00, float* %13, align 4 + %14 = load float, float* %0, align 4 + %15 = getelementptr inbounds float, float* %1, i64 1 + %16 = load float, float* %15, align 4 + %17 = fmul float %14, %16 + %18 = fadd float %17, 0.000000e+00 + store float %18, float* %13, align 4 + %19 = load float, float* %7, align 4 + %20 = getelementptr inbounds float, float* %1, i64 3 + %21 = load float, float* %20, align 4 + %22 = fmul float %19, %21 + %23 = fadd float %18, %22 + store float %23, float* %13, align 4 + %24 = getelementptr inbounds float, float* %0, i64 2 + %25 = getelementptr inbounds float, float* %2, i64 2 + store float 0.000000e+00, float* %25, align 4 + %26 = load float, float* %24, align 4 + %27 = load float, float* %1, align 4 + %28 = fmul float %26, %27 + %29 = fadd float %28, 0.000000e+00 + store float %29, float* %25, align 4 + %30 = getelementptr inbounds float, float* %0, i64 3 + %31 = load float, float* %30, align 4 + %32 = load float, float* %9, align 4 + %33 = fmul float %31, %32 + %34 = fadd float %29, %33 + store float %34, float* %25, align 4 + %35 = getelementptr inbounds float, float* %2, i64 3 + store float 0.000000e+00, float* %35, align 4 + %36 = load float, float* %24, align 4 + %37 = load float, float* %15, align 4 + %38 = fmul float %36, %37 + %39 = fadd float %38, 0.000000e+00 + store float %39, float* %35, align 4 + %40 = load float, float* %30, align 4 + %41 = load float, float* %20, align 4 + %42 = fmul float %40, %41 + %43 = fadd float %39, %42 + store float %43, float* %35, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { +.preheader49: + %3 = getelementptr float, float* %0, i32 0 + %4 = bitcast float* %3 to i32* + %5 = load i32, i32* %4, align 4 + %6 = bitcast i32 %5 to float + %7 = insertelement <4 x float> zeroinitializer, float %6, i32 0 + %8 = insertelement <4 x float> %7, float 0.000000e+00, i32 1 + %9 = insertelement <4 x float> %8, float 0.000000e+00, i32 2 + %10 = insertelement <4 x float> %9, float 0.000000e+00, i32 3 + %11 = extractelement <4 x float> %10, i32 0 + %12 = getelementptr float, float* %2, i32 0 + %13 = bitcast float* %12 to i32* + %14 = bitcast i32* %13 to float* + store float %11, float* %14, align 4 + %15 = getelementptr float, float* %0, i32 0 + %16 = getelementptr inbounds float, float* %15, i64 1 + %17 = bitcast float* %16 to i32* + %18 = load i32, i32* %17, align 4 + %19 = bitcast i32 %18 to float + %20 = insertelement <4 x float> zeroinitializer, float %19, i32 0 + %21 = insertelement <4 x float> %20, float 0.000000e+00, i32 1 + %22 = insertelement <4 x float> %21, float 0.000000e+00, i32 2 + %23 = insertelement <4 x float> %22, float 0.000000e+00, i32 3 + %24 = extractelement <4 x float> %23, i32 0 + %25 = getelementptr float, float* %2, i32 0 + %26 = getelementptr inbounds float, float* %25, i64 1 + %27 = bitcast float* %26 to i32* + %28 = bitcast i32* %27 to float* + store float %24, float* %28, align 4 + %29 = getelementptr float, float* %0, i32 0 + %30 = getelementptr inbounds float, float* %29, i64 2 + %31 = bitcast float* %30 to i32* + %32 = load i32, i32* %31, align 4 + %33 = bitcast i32 %32 to float + %34 = insertelement <4 x float> zeroinitializer, float %33, i32 0 + %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 1 + %36 = insertelement <4 x float> %35, float 0.000000e+00, i32 2 + %37 = insertelement <4 x float> %36, float 0.000000e+00, i32 3 + %38 = extractelement <4 x float> %37, i32 0 + %39 = getelementptr float, float* %2, i32 0 + %40 = getelementptr inbounds float, float* %39, i64 2 + %41 = bitcast float* %40 to i32* + %42 = bitcast i32* %41 to float* + store float %38, float* %42, align 4 + %43 = getelementptr float, float* %0, i32 0 + %44 = getelementptr inbounds float, float* %43, i64 3 + %45 = bitcast float* %44 to i32* + %46 = load i32, i32* %45, align 4 + %47 = bitcast i32 %46 to float + %48 = fneg float %47 + %49 = insertelement <4 x float> zeroinitializer, float %48, i32 0 + %50 = bitcast i32 %5 to float + %51 = bitcast i32 %5 to float + %52 = fmul float %50, %51 + %53 = fadd float %52, 0.000000e+00 + %54 = bitcast i32 %32 to float + %55 = bitcast i32 %32 to float + %56 = fmul float %54, %55 + %57 = fadd float %53, %56 + %58 = call float @llvm.sqrt.f32(float %57) + %59 = bitcast i32 %5 to float + %60 = fcmp olt float %59, 0.000000e+00 + %61 = sext i1 %60 to i32 + %62 = bitcast i32 %5 to float + %63 = fcmp ogt float %62, 0.000000e+00 + %64 = zext i1 %63 to i32 + %65 = add nsw i32 %61, %64 + %66 = sitofp i32 %65 to float + %67 = fneg float %66 + %68 = fmul float %58, %67 + %69 = bitcast i32 %5 to float + %70 = fadd float %69, %68 + %71 = bitcast i32 %5 to float + %72 = bitcast i32 %5 to float + %73 = fmul float %71, %72 + %74 = fadd float %73, 0.000000e+00 + %75 = bitcast i32 %32 to float + %76 = bitcast i32 %32 to float + %77 = fmul float %75, %76 + %78 = fadd float %74, %77 + %79 = call float @llvm.sqrt.f32(float %78) + %80 = bitcast i32 %5 to float + %81 = fcmp olt float %80, 0.000000e+00 + %82 = sext i1 %81 to i32 + %83 = bitcast i32 %5 to float + %84 = fcmp ogt float %83, 0.000000e+00 + %85 = zext i1 %84 to i32 + %86 = add nsw i32 %82, %85 + %87 = sitofp i32 %86 to float + %88 = fneg float %87 + %89 = fmul float %79, %88 + %90 = bitcast i32 %5 to float + %91 = fadd float %90, %89 + %92 = bitcast i32 %5 to float + %93 = bitcast i32 %5 to float + %94 = fmul float %92, %93 + %95 = fadd float %94, 0.000000e+00 + %96 = bitcast i32 %32 to float + %97 = bitcast i32 %32 to float + %98 = fmul float %96, %97 + %99 = fadd float %95, %98 + %100 = call float @llvm.sqrt.f32(float %99) + %101 = bitcast i32 %5 to float + %102 = fcmp olt float %101, 0.000000e+00 + %103 = sext i1 %102 to i32 + %104 = bitcast i32 %5 to float + %105 = fcmp ogt float %104, 0.000000e+00 + %106 = zext i1 %105 to i32 + %107 = add nsw i32 %103, %106 + %108 = sitofp i32 %107 to float + %109 = fneg float %108 + %110 = fmul float %100, %109 + %111 = bitcast i32 %5 to float + %112 = fadd float %111, %110 + %113 = fmul float %91, %112 + %114 = fadd float %113, 0.000000e+00 + %115 = bitcast i32 %5 to float + %116 = bitcast i32 %5 to float + %117 = fmul float %115, %116 + %118 = fadd float %117, 0.000000e+00 + %119 = bitcast i32 %32 to float + %120 = bitcast i32 %32 to float + %121 = fmul float %119, %120 + %122 = fadd float %118, %121 + %123 = call float @llvm.sqrt.f32(float %122) + %124 = bitcast i32 %5 to float + %125 = fcmp olt float %124, 0.000000e+00 + %126 = sext i1 %125 to i32 + %127 = bitcast i32 %5 to float + %128 = fcmp ogt float %127, 0.000000e+00 + %129 = zext i1 %128 to i32 + %130 = add nsw i32 %126, %129 + %131 = sitofp i32 %130 to float + %132 = fneg float %131 + %133 = fmul float %123, %132 + %134 = fmul float %133, 0.000000e+00 + %135 = bitcast i32 %32 to float + %136 = fadd float %135, %134 + %137 = bitcast i32 %5 to float + %138 = bitcast i32 %5 to float + %139 = fmul float %137, %138 + %140 = fadd float %139, 0.000000e+00 + %141 = bitcast i32 %32 to float + %142 = bitcast i32 %32 to float + %143 = fmul float %141, %142 + %144 = fadd float %140, %143 + %145 = call float @llvm.sqrt.f32(float %144) + %146 = bitcast i32 %5 to float + %147 = fcmp olt float %146, 0.000000e+00 + %148 = sext i1 %147 to i32 + %149 = bitcast i32 %5 to float + %150 = fcmp ogt float %149, 0.000000e+00 + %151 = zext i1 %150 to i32 + %152 = add nsw i32 %148, %151 + %153 = sitofp i32 %152 to float + %154 = fneg float %153 + %155 = fmul float %145, %154 + %156 = fmul float %155, 0.000000e+00 + %157 = bitcast i32 %32 to float + %158 = fadd float %157, %156 + %159 = fmul float %136, %158 + %160 = fadd float %114, %159 + %161 = call float @llvm.sqrt.f32(float %160) + %162 = fadd float %161, 0.000000e+00 + %163 = fdiv float %70, %162 + %164 = fmul float %163, 2.000000e+00 + %165 = bitcast i32 %5 to float + %166 = bitcast i32 %5 to float + %167 = fmul float %165, %166 + %168 = fadd float %167, 0.000000e+00 + %169 = bitcast i32 %32 to float + %170 = bitcast i32 %32 to float + %171 = fmul float %169, %170 + %172 = fadd float %168, %171 + %173 = call float @llvm.sqrt.f32(float %172) + %174 = bitcast i32 %5 to float + %175 = fcmp olt float %174, 0.000000e+00 + %176 = sext i1 %175 to i32 + %177 = bitcast i32 %5 to float + %178 = fcmp ogt float %177, 0.000000e+00 + %179 = zext i1 %178 to i32 + %180 = add nsw i32 %176, %179 + %181 = sitofp i32 %180 to float + %182 = fneg float %181 + %183 = fmul float %173, %182 + %184 = bitcast i32 %5 to float + %185 = fadd float %184, %183 + %186 = bitcast i32 %5 to float + %187 = bitcast i32 %5 to float + %188 = fmul float %186, %187 + %189 = fadd float %188, 0.000000e+00 + %190 = bitcast i32 %32 to float + %191 = bitcast i32 %32 to float + %192 = fmul float %190, %191 + %193 = fadd float %189, %192 + %194 = call float @llvm.sqrt.f32(float %193) + %195 = bitcast i32 %5 to float + %196 = fcmp olt float %195, 0.000000e+00 + %197 = sext i1 %196 to i32 + %198 = bitcast i32 %5 to float + %199 = fcmp ogt float %198, 0.000000e+00 + %200 = zext i1 %199 to i32 + %201 = add nsw i32 %197, %200 + %202 = sitofp i32 %201 to float + %203 = fneg float %202 + %204 = fmul float %194, %203 + %205 = bitcast i32 %5 to float + %206 = fadd float %205, %204 + %207 = bitcast i32 %5 to float + %208 = bitcast i32 %5 to float + %209 = fmul float %207, %208 + %210 = fadd float %209, 0.000000e+00 + %211 = bitcast i32 %32 to float + %212 = bitcast i32 %32 to float + %213 = fmul float %211, %212 + %214 = fadd float %210, %213 + %215 = call float @llvm.sqrt.f32(float %214) + %216 = bitcast i32 %5 to float + %217 = fcmp olt float %216, 0.000000e+00 + %218 = sext i1 %217 to i32 + %219 = bitcast i32 %5 to float + %220 = fcmp ogt float %219, 0.000000e+00 + %221 = zext i1 %220 to i32 + %222 = add nsw i32 %218, %221 + %223 = sitofp i32 %222 to float + %224 = fneg float %223 + %225 = fmul float %215, %224 + %226 = bitcast i32 %5 to float + %227 = fadd float %226, %225 + %228 = fmul float %206, %227 + %229 = fadd float %228, 0.000000e+00 + %230 = bitcast i32 %5 to float + %231 = bitcast i32 %5 to float + %232 = fmul float %230, %231 + %233 = fadd float %232, 0.000000e+00 + %234 = bitcast i32 %32 to float + %235 = bitcast i32 %32 to float + %236 = fmul float %234, %235 + %237 = fadd float %233, %236 + %238 = call float @llvm.sqrt.f32(float %237) + %239 = bitcast i32 %5 to float + %240 = fcmp olt float %239, 0.000000e+00 + %241 = sext i1 %240 to i32 + %242 = bitcast i32 %5 to float + %243 = fcmp ogt float %242, 0.000000e+00 + %244 = zext i1 %243 to i32 + %245 = add nsw i32 %241, %244 + %246 = sitofp i32 %245 to float + %247 = fneg float %246 + %248 = fmul float %238, %247 + %249 = fmul float %248, 0.000000e+00 + %250 = bitcast i32 %32 to float + %251 = fadd float %250, %249 + %252 = bitcast i32 %5 to float + %253 = bitcast i32 %5 to float + %254 = fmul float %252, %253 + %255 = fadd float %254, 0.000000e+00 + %256 = bitcast i32 %32 to float + %257 = bitcast i32 %32 to float + %258 = fmul float %256, %257 + %259 = fadd float %255, %258 + %260 = call float @llvm.sqrt.f32(float %259) + %261 = bitcast i32 %5 to float + %262 = fcmp olt float %261, 0.000000e+00 + %263 = sext i1 %262 to i32 + %264 = bitcast i32 %5 to float + %265 = fcmp ogt float %264, 0.000000e+00 + %266 = zext i1 %265 to i32 + %267 = add nsw i32 %263, %266 + %268 = sitofp i32 %267 to float + %269 = fneg float %268 + %270 = fmul float %260, %269 + %271 = fmul float %270, 0.000000e+00 + %272 = bitcast i32 %32 to float + %273 = fadd float %272, %271 + %274 = fmul float %251, %273 + %275 = fadd float %229, %274 + %276 = call float @llvm.sqrt.f32(float %275) + %277 = fadd float %276, 0.000000e+00 + %278 = fdiv float %185, %277 + %279 = fmul float %164, %278 + %280 = insertelement <4 x float> %49, float %279, i32 1 + %281 = bitcast i32 %5 to float + %282 = bitcast i32 %5 to float + %283 = fmul float %281, %282 + %284 = fadd float %283, 0.000000e+00 + %285 = bitcast i32 %32 to float + %286 = bitcast i32 %32 to float + %287 = fmul float %285, %286 + %288 = fadd float %284, %287 + %289 = call float @llvm.sqrt.f32(float %288) + %290 = bitcast i32 %5 to float + %291 = fcmp olt float %290, 0.000000e+00 + %292 = sext i1 %291 to i32 + %293 = bitcast i32 %5 to float + %294 = fcmp ogt float %293, 0.000000e+00 + %295 = zext i1 %294 to i32 + %296 = add nsw i32 %292, %295 + %297 = sitofp i32 %296 to float + %298 = fneg float %297 + %299 = fmul float %289, %298 + %300 = bitcast i32 %5 to float + %301 = fadd float %300, %299 + %302 = bitcast i32 %5 to float + %303 = bitcast i32 %5 to float + %304 = fmul float %302, %303 + %305 = fadd float %304, 0.000000e+00 + %306 = bitcast i32 %32 to float + %307 = bitcast i32 %32 to float + %308 = fmul float %306, %307 + %309 = fadd float %305, %308 + %310 = call float @llvm.sqrt.f32(float %309) + %311 = bitcast i32 %5 to float + %312 = fcmp olt float %311, 0.000000e+00 + %313 = sext i1 %312 to i32 + %314 = bitcast i32 %5 to float + %315 = fcmp ogt float %314, 0.000000e+00 + %316 = zext i1 %315 to i32 + %317 = add nsw i32 %313, %316 + %318 = sitofp i32 %317 to float + %319 = fneg float %318 + %320 = fmul float %310, %319 + %321 = bitcast i32 %5 to float + %322 = fadd float %321, %320 + %323 = bitcast i32 %5 to float + %324 = bitcast i32 %5 to float + %325 = fmul float %323, %324 + %326 = fadd float %325, 0.000000e+00 + %327 = bitcast i32 %32 to float + %328 = bitcast i32 %32 to float + %329 = fmul float %327, %328 + %330 = fadd float %326, %329 + %331 = call float @llvm.sqrt.f32(float %330) + %332 = bitcast i32 %5 to float + %333 = fcmp olt float %332, 0.000000e+00 + %334 = sext i1 %333 to i32 + %335 = bitcast i32 %5 to float + %336 = fcmp ogt float %335, 0.000000e+00 + %337 = zext i1 %336 to i32 + %338 = add nsw i32 %334, %337 + %339 = sitofp i32 %338 to float + %340 = fneg float %339 + %341 = fmul float %331, %340 + %342 = bitcast i32 %5 to float + %343 = fadd float %342, %341 + %344 = fmul float %322, %343 + %345 = fadd float %344, 0.000000e+00 + %346 = bitcast i32 %5 to float + %347 = bitcast i32 %5 to float + %348 = fmul float %346, %347 + %349 = fadd float %348, 0.000000e+00 + %350 = bitcast i32 %32 to float + %351 = bitcast i32 %32 to float + %352 = fmul float %350, %351 + %353 = fadd float %349, %352 + %354 = call float @llvm.sqrt.f32(float %353) + %355 = bitcast i32 %5 to float + %356 = fcmp olt float %355, 0.000000e+00 + %357 = sext i1 %356 to i32 + %358 = bitcast i32 %5 to float + %359 = fcmp ogt float %358, 0.000000e+00 + %360 = zext i1 %359 to i32 + %361 = add nsw i32 %357, %360 + %362 = sitofp i32 %361 to float + %363 = fneg float %362 + %364 = fmul float %354, %363 + %365 = fmul float %364, 0.000000e+00 + %366 = bitcast i32 %32 to float + %367 = fadd float %366, %365 + %368 = bitcast i32 %5 to float + %369 = bitcast i32 %5 to float + %370 = fmul float %368, %369 + %371 = fadd float %370, 0.000000e+00 + %372 = bitcast i32 %32 to float + %373 = bitcast i32 %32 to float + %374 = fmul float %372, %373 + %375 = fadd float %371, %374 + %376 = call float @llvm.sqrt.f32(float %375) + %377 = bitcast i32 %5 to float + %378 = fcmp olt float %377, 0.000000e+00 + %379 = sext i1 %378 to i32 + %380 = bitcast i32 %5 to float + %381 = fcmp ogt float %380, 0.000000e+00 + %382 = zext i1 %381 to i32 + %383 = add nsw i32 %379, %382 + %384 = sitofp i32 %383 to float + %385 = fneg float %384 + %386 = fmul float %376, %385 + %387 = fmul float %386, 0.000000e+00 + %388 = bitcast i32 %32 to float + %389 = fadd float %388, %387 + %390 = fmul float %367, %389 + %391 = fadd float %345, %390 + %392 = call float @llvm.sqrt.f32(float %391) + %393 = fadd float %392, 0.000000e+00 + %394 = fdiv float %301, %393 + %395 = fmul float %394, 2.000000e+00 + %396 = bitcast i32 %5 to float + %397 = bitcast i32 %5 to float + %398 = fmul float %396, %397 + %399 = fadd float %398, 0.000000e+00 + %400 = bitcast i32 %32 to float + %401 = bitcast i32 %32 to float + %402 = fmul float %400, %401 + %403 = fadd float %399, %402 + %404 = call float @llvm.sqrt.f32(float %403) + %405 = bitcast i32 %5 to float + %406 = fcmp olt float %405, 0.000000e+00 + %407 = sext i1 %406 to i32 + %408 = bitcast i32 %5 to float + %409 = fcmp ogt float %408, 0.000000e+00 + %410 = zext i1 %409 to i32 + %411 = add nsw i32 %407, %410 + %412 = sitofp i32 %411 to float + %413 = fneg float %412 + %414 = fmul float %404, %413 + %415 = fmul float %414, 0.000000e+00 + %416 = bitcast i32 %32 to float + %417 = fadd float %416, %415 + %418 = bitcast i32 %5 to float + %419 = bitcast i32 %5 to float + %420 = fmul float %418, %419 + %421 = fadd float %420, 0.000000e+00 + %422 = bitcast i32 %32 to float + %423 = bitcast i32 %32 to float + %424 = fmul float %422, %423 + %425 = fadd float %421, %424 + %426 = call float @llvm.sqrt.f32(float %425) + %427 = bitcast i32 %5 to float + %428 = fcmp olt float %427, 0.000000e+00 + %429 = sext i1 %428 to i32 + %430 = bitcast i32 %5 to float + %431 = fcmp ogt float %430, 0.000000e+00 + %432 = zext i1 %431 to i32 + %433 = add nsw i32 %429, %432 + %434 = sitofp i32 %433 to float + %435 = fneg float %434 + %436 = fmul float %426, %435 + %437 = bitcast i32 %5 to float + %438 = fadd float %437, %436 + %439 = bitcast i32 %5 to float + %440 = bitcast i32 %5 to float + %441 = fmul float %439, %440 + %442 = fadd float %441, 0.000000e+00 + %443 = bitcast i32 %32 to float + %444 = bitcast i32 %32 to float + %445 = fmul float %443, %444 + %446 = fadd float %442, %445 + %447 = call float @llvm.sqrt.f32(float %446) + %448 = bitcast i32 %5 to float + %449 = fcmp olt float %448, 0.000000e+00 + %450 = sext i1 %449 to i32 + %451 = bitcast i32 %5 to float + %452 = fcmp ogt float %451, 0.000000e+00 + %453 = zext i1 %452 to i32 + %454 = add nsw i32 %450, %453 + %455 = sitofp i32 %454 to float + %456 = fneg float %455 + %457 = fmul float %447, %456 + %458 = bitcast i32 %5 to float + %459 = fadd float %458, %457 + %460 = fmul float %438, %459 + %461 = fadd float %460, 0.000000e+00 + %462 = bitcast i32 %5 to float + %463 = bitcast i32 %5 to float + %464 = fmul float %462, %463 + %465 = fadd float %464, 0.000000e+00 + %466 = bitcast i32 %32 to float + %467 = bitcast i32 %32 to float + %468 = fmul float %466, %467 + %469 = fadd float %465, %468 + %470 = call float @llvm.sqrt.f32(float %469) + %471 = bitcast i32 %5 to float + %472 = fcmp olt float %471, 0.000000e+00 + %473 = sext i1 %472 to i32 + %474 = bitcast i32 %5 to float + %475 = fcmp ogt float %474, 0.000000e+00 + %476 = zext i1 %475 to i32 + %477 = add nsw i32 %473, %476 + %478 = sitofp i32 %477 to float + %479 = fneg float %478 + %480 = fmul float %470, %479 + %481 = fmul float %480, 0.000000e+00 + %482 = bitcast i32 %32 to float + %483 = fadd float %482, %481 + %484 = bitcast i32 %5 to float + %485 = bitcast i32 %5 to float + %486 = fmul float %484, %485 + %487 = fadd float %486, 0.000000e+00 + %488 = bitcast i32 %32 to float + %489 = bitcast i32 %32 to float + %490 = fmul float %488, %489 + %491 = fadd float %487, %490 + %492 = call float @llvm.sqrt.f32(float %491) + %493 = bitcast i32 %5 to float + %494 = fcmp olt float %493, 0.000000e+00 + %495 = sext i1 %494 to i32 + %496 = bitcast i32 %5 to float + %497 = fcmp ogt float %496, 0.000000e+00 + %498 = zext i1 %497 to i32 + %499 = add nsw i32 %495, %498 + %500 = sitofp i32 %499 to float + %501 = fneg float %500 + %502 = fmul float %492, %501 + %503 = fmul float %502, 0.000000e+00 + %504 = bitcast i32 %32 to float + %505 = fadd float %504, %503 + %506 = fmul float %483, %505 + %507 = fadd float %461, %506 + %508 = call float @llvm.sqrt.f32(float %507) + %509 = fadd float %508, 0.000000e+00 + %510 = fdiv float %417, %509 + %511 = fmul float %395, %510 + %512 = insertelement <4 x float> %280, float %511, i32 2 + %513 = bitcast i32 %5 to float + %514 = bitcast i32 %5 to float + %515 = fmul float %513, %514 + %516 = fadd float %515, 0.000000e+00 + %517 = bitcast i32 %32 to float + %518 = bitcast i32 %32 to float + %519 = fmul float %517, %518 + %520 = fadd float %516, %519 + %521 = call float @llvm.sqrt.f32(float %520) + %522 = bitcast i32 %5 to float + %523 = fcmp olt float %522, 0.000000e+00 + %524 = sext i1 %523 to i32 + %525 = bitcast i32 %5 to float + %526 = fcmp ogt float %525, 0.000000e+00 + %527 = zext i1 %526 to i32 + %528 = add nsw i32 %524, %527 + %529 = sitofp i32 %528 to float + %530 = fneg float %529 + %531 = fmul float %521, %530 + %532 = fmul float %531, 0.000000e+00 + %533 = bitcast i32 %32 to float + %534 = fadd float %533, %532 + %535 = bitcast i32 %5 to float + %536 = bitcast i32 %5 to float + %537 = fmul float %535, %536 + %538 = fadd float %537, 0.000000e+00 + %539 = bitcast i32 %32 to float + %540 = bitcast i32 %32 to float + %541 = fmul float %539, %540 + %542 = fadd float %538, %541 + %543 = call float @llvm.sqrt.f32(float %542) + %544 = bitcast i32 %5 to float + %545 = fcmp olt float %544, 0.000000e+00 + %546 = sext i1 %545 to i32 + %547 = bitcast i32 %5 to float + %548 = fcmp ogt float %547, 0.000000e+00 + %549 = zext i1 %548 to i32 + %550 = add nsw i32 %546, %549 + %551 = sitofp i32 %550 to float + %552 = fneg float %551 + %553 = fmul float %543, %552 + %554 = bitcast i32 %5 to float + %555 = fadd float %554, %553 + %556 = bitcast i32 %5 to float + %557 = bitcast i32 %5 to float + %558 = fmul float %556, %557 + %559 = fadd float %558, 0.000000e+00 + %560 = bitcast i32 %32 to float + %561 = bitcast i32 %32 to float + %562 = fmul float %560, %561 + %563 = fadd float %559, %562 + %564 = call float @llvm.sqrt.f32(float %563) + %565 = bitcast i32 %5 to float + %566 = fcmp olt float %565, 0.000000e+00 + %567 = sext i1 %566 to i32 + %568 = bitcast i32 %5 to float + %569 = fcmp ogt float %568, 0.000000e+00 + %570 = zext i1 %569 to i32 + %571 = add nsw i32 %567, %570 + %572 = sitofp i32 %571 to float + %573 = fneg float %572 + %574 = fmul float %564, %573 + %575 = bitcast i32 %5 to float + %576 = fadd float %575, %574 + %577 = fmul float %555, %576 + %578 = fadd float %577, 0.000000e+00 + %579 = bitcast i32 %5 to float + %580 = bitcast i32 %5 to float + %581 = fmul float %579, %580 + %582 = fadd float %581, 0.000000e+00 + %583 = bitcast i32 %32 to float + %584 = bitcast i32 %32 to float + %585 = fmul float %583, %584 + %586 = fadd float %582, %585 + %587 = call float @llvm.sqrt.f32(float %586) + %588 = bitcast i32 %5 to float + %589 = fcmp olt float %588, 0.000000e+00 + %590 = sext i1 %589 to i32 + %591 = bitcast i32 %5 to float + %592 = fcmp ogt float %591, 0.000000e+00 + %593 = zext i1 %592 to i32 + %594 = add nsw i32 %590, %593 + %595 = sitofp i32 %594 to float + %596 = fneg float %595 + %597 = fmul float %587, %596 + %598 = fmul float %597, 0.000000e+00 + %599 = bitcast i32 %32 to float + %600 = fadd float %599, %598 + %601 = bitcast i32 %5 to float + %602 = bitcast i32 %5 to float + %603 = fmul float %601, %602 + %604 = fadd float %603, 0.000000e+00 + %605 = bitcast i32 %32 to float + %606 = bitcast i32 %32 to float + %607 = fmul float %605, %606 + %608 = fadd float %604, %607 + %609 = call float @llvm.sqrt.f32(float %608) + %610 = bitcast i32 %5 to float + %611 = fcmp olt float %610, 0.000000e+00 + %612 = sext i1 %611 to i32 + %613 = bitcast i32 %5 to float + %614 = fcmp ogt float %613, 0.000000e+00 + %615 = zext i1 %614 to i32 + %616 = add nsw i32 %612, %615 + %617 = sitofp i32 %616 to float + %618 = fneg float %617 + %619 = fmul float %609, %618 + %620 = fmul float %619, 0.000000e+00 + %621 = bitcast i32 %32 to float + %622 = fadd float %621, %620 + %623 = fmul float %600, %622 + %624 = fadd float %578, %623 + %625 = call float @llvm.sqrt.f32(float %624) + %626 = fadd float %625, 0.000000e+00 + %627 = fdiv float %534, %626 + %628 = fmul float %627, 2.000000e+00 + %629 = bitcast i32 %5 to float + %630 = bitcast i32 %5 to float + %631 = fmul float %629, %630 + %632 = fadd float %631, 0.000000e+00 + %633 = bitcast i32 %32 to float + %634 = bitcast i32 %32 to float + %635 = fmul float %633, %634 + %636 = fadd float %632, %635 + %637 = call float @llvm.sqrt.f32(float %636) + %638 = bitcast i32 %5 to float + %639 = fcmp olt float %638, 0.000000e+00 + %640 = sext i1 %639 to i32 + %641 = bitcast i32 %5 to float + %642 = fcmp ogt float %641, 0.000000e+00 + %643 = zext i1 %642 to i32 + %644 = add nsw i32 %640, %643 + %645 = sitofp i32 %644 to float + %646 = fneg float %645 + %647 = fmul float %637, %646 + %648 = bitcast i32 %5 to float + %649 = fadd float %648, %647 + %650 = bitcast i32 %5 to float + %651 = bitcast i32 %5 to float + %652 = fmul float %650, %651 + %653 = fadd float %652, 0.000000e+00 + %654 = bitcast i32 %32 to float + %655 = bitcast i32 %32 to float + %656 = fmul float %654, %655 + %657 = fadd float %653, %656 + %658 = call float @llvm.sqrt.f32(float %657) + %659 = bitcast i32 %5 to float + %660 = fcmp olt float %659, 0.000000e+00 + %661 = sext i1 %660 to i32 + %662 = bitcast i32 %5 to float + %663 = fcmp ogt float %662, 0.000000e+00 + %664 = zext i1 %663 to i32 + %665 = add nsw i32 %661, %664 + %666 = sitofp i32 %665 to float + %667 = fneg float %666 + %668 = fmul float %658, %667 + %669 = bitcast i32 %5 to float + %670 = fadd float %669, %668 + %671 = bitcast i32 %5 to float + %672 = bitcast i32 %5 to float + %673 = fmul float %671, %672 + %674 = fadd float %673, 0.000000e+00 + %675 = bitcast i32 %32 to float + %676 = bitcast i32 %32 to float + %677 = fmul float %675, %676 + %678 = fadd float %674, %677 + %679 = call float @llvm.sqrt.f32(float %678) + %680 = bitcast i32 %5 to float + %681 = fcmp olt float %680, 0.000000e+00 + %682 = sext i1 %681 to i32 + %683 = bitcast i32 %5 to float + %684 = fcmp ogt float %683, 0.000000e+00 + %685 = zext i1 %684 to i32 + %686 = add nsw i32 %682, %685 + %687 = sitofp i32 %686 to float + %688 = fneg float %687 + %689 = fmul float %679, %688 + %690 = bitcast i32 %5 to float + %691 = fadd float %690, %689 + %692 = fmul float %670, %691 + %693 = fadd float %692, 0.000000e+00 + %694 = bitcast i32 %5 to float + %695 = bitcast i32 %5 to float + %696 = fmul float %694, %695 + %697 = fadd float %696, 0.000000e+00 + %698 = bitcast i32 %32 to float + %699 = bitcast i32 %32 to float + %700 = fmul float %698, %699 + %701 = fadd float %697, %700 + %702 = call float @llvm.sqrt.f32(float %701) + %703 = bitcast i32 %5 to float + %704 = fcmp olt float %703, 0.000000e+00 + %705 = sext i1 %704 to i32 + %706 = bitcast i32 %5 to float + %707 = fcmp ogt float %706, 0.000000e+00 + %708 = zext i1 %707 to i32 + %709 = add nsw i32 %705, %708 + %710 = sitofp i32 %709 to float + %711 = fneg float %710 + %712 = fmul float %702, %711 + %713 = fmul float %712, 0.000000e+00 + %714 = bitcast i32 %32 to float + %715 = fadd float %714, %713 + %716 = bitcast i32 %5 to float + %717 = bitcast i32 %5 to float + %718 = fmul float %716, %717 + %719 = fadd float %718, 0.000000e+00 + %720 = bitcast i32 %32 to float + %721 = bitcast i32 %32 to float + %722 = fmul float %720, %721 + %723 = fadd float %719, %722 + %724 = call float @llvm.sqrt.f32(float %723) + %725 = bitcast i32 %5 to float + %726 = fcmp olt float %725, 0.000000e+00 + %727 = sext i1 %726 to i32 + %728 = bitcast i32 %5 to float + %729 = fcmp ogt float %728, 0.000000e+00 + %730 = zext i1 %729 to i32 + %731 = add nsw i32 %727, %730 + %732 = sitofp i32 %731 to float + %733 = fneg float %732 + %734 = fmul float %724, %733 + %735 = fmul float %734, 0.000000e+00 + %736 = bitcast i32 %32 to float + %737 = fadd float %736, %735 + %738 = fmul float %715, %737 + %739 = fadd float %693, %738 + %740 = call float @llvm.sqrt.f32(float %739) + %741 = fadd float %740, 0.000000e+00 + %742 = fdiv float %649, %741 + %743 = fmul float %628, %742 + %744 = insertelement <4 x float> %512, float %743, i32 3 + %745 = fsub <4 x float> , %744 + %746 = bitcast i32 %5 to float + %747 = bitcast i32 %5 to float + %748 = fmul float %746, %747 + %749 = fadd float %748, 0.000000e+00 + %750 = bitcast i32 %32 to float + %751 = bitcast i32 %32 to float + %752 = fmul float %750, %751 + %753 = fadd float %749, %752 + %754 = call float @llvm.sqrt.f32(float %753) + %755 = bitcast i32 %5 to float + %756 = fcmp olt float %755, 0.000000e+00 + %757 = sext i1 %756 to i32 + %758 = bitcast i32 %5 to float + %759 = fcmp ogt float %758, 0.000000e+00 + %760 = zext i1 %759 to i32 + %761 = add nsw i32 %757, %760 + %762 = sitofp i32 %761 to float + %763 = fneg float %762 + %764 = fmul float %754, %763 + %765 = fmul float %764, 0.000000e+00 + %766 = bitcast i32 %32 to float + %767 = fadd float %766, %765 + %768 = bitcast i32 %5 to float + %769 = bitcast i32 %5 to float + %770 = fmul float %768, %769 + %771 = fadd float %770, 0.000000e+00 + %772 = bitcast i32 %32 to float + %773 = bitcast i32 %32 to float + %774 = fmul float %772, %773 + %775 = fadd float %771, %774 + %776 = call float @llvm.sqrt.f32(float %775) + %777 = bitcast i32 %5 to float + %778 = fcmp olt float %777, 0.000000e+00 + %779 = sext i1 %778 to i32 + %780 = bitcast i32 %5 to float + %781 = fcmp ogt float %780, 0.000000e+00 + %782 = zext i1 %781 to i32 + %783 = add nsw i32 %779, %782 + %784 = sitofp i32 %783 to float + %785 = fneg float %784 + %786 = fmul float %776, %785 + %787 = bitcast i32 %5 to float + %788 = fadd float %787, %786 + %789 = bitcast i32 %5 to float + %790 = bitcast i32 %5 to float + %791 = fmul float %789, %790 + %792 = fadd float %791, 0.000000e+00 + %793 = bitcast i32 %32 to float + %794 = bitcast i32 %32 to float + %795 = fmul float %793, %794 + %796 = fadd float %792, %795 + %797 = call float @llvm.sqrt.f32(float %796) + %798 = bitcast i32 %5 to float + %799 = fcmp olt float %798, 0.000000e+00 + %800 = sext i1 %799 to i32 + %801 = bitcast i32 %5 to float + %802 = fcmp ogt float %801, 0.000000e+00 + %803 = zext i1 %802 to i32 + %804 = add nsw i32 %800, %803 + %805 = sitofp i32 %804 to float + %806 = fneg float %805 + %807 = fmul float %797, %806 + %808 = bitcast i32 %5 to float + %809 = fadd float %808, %807 + %810 = fmul float %788, %809 + %811 = fadd float %810, 0.000000e+00 + %812 = bitcast i32 %5 to float + %813 = bitcast i32 %5 to float + %814 = fmul float %812, %813 + %815 = fadd float %814, 0.000000e+00 + %816 = bitcast i32 %32 to float + %817 = bitcast i32 %32 to float + %818 = fmul float %816, %817 + %819 = fadd float %815, %818 + %820 = call float @llvm.sqrt.f32(float %819) + %821 = bitcast i32 %5 to float + %822 = fcmp olt float %821, 0.000000e+00 + %823 = sext i1 %822 to i32 + %824 = bitcast i32 %5 to float + %825 = fcmp ogt float %824, 0.000000e+00 + %826 = zext i1 %825 to i32 + %827 = add nsw i32 %823, %826 + %828 = sitofp i32 %827 to float + %829 = fneg float %828 + %830 = fmul float %820, %829 + %831 = fmul float %830, 0.000000e+00 + %832 = bitcast i32 %32 to float + %833 = fadd float %832, %831 + %834 = bitcast i32 %5 to float + %835 = bitcast i32 %5 to float + %836 = fmul float %834, %835 + %837 = fadd float %836, 0.000000e+00 + %838 = bitcast i32 %32 to float + %839 = bitcast i32 %32 to float + %840 = fmul float %838, %839 + %841 = fadd float %837, %840 + %842 = call float @llvm.sqrt.f32(float %841) + %843 = bitcast i32 %5 to float + %844 = fcmp olt float %843, 0.000000e+00 + %845 = sext i1 %844 to i32 + %846 = bitcast i32 %5 to float + %847 = fcmp ogt float %846, 0.000000e+00 + %848 = zext i1 %847 to i32 + %849 = add nsw i32 %845, %848 + %850 = sitofp i32 %849 to float + %851 = fneg float %850 + %852 = fmul float %842, %851 + %853 = fmul float %852, 0.000000e+00 + %854 = bitcast i32 %32 to float + %855 = fadd float %854, %853 + %856 = fmul float %833, %855 + %857 = fadd float %811, %856 + %858 = call float @llvm.sqrt.f32(float %857) + %859 = fadd float %858, 0.000000e+00 + %860 = fdiv float %767, %859 + %861 = fmul float %860, 2.000000e+00 + %862 = bitcast i32 %5 to float + %863 = bitcast i32 %5 to float + %864 = fmul float %862, %863 + %865 = fadd float %864, 0.000000e+00 + %866 = bitcast i32 %32 to float + %867 = bitcast i32 %32 to float + %868 = fmul float %866, %867 + %869 = fadd float %865, %868 + %870 = call float @llvm.sqrt.f32(float %869) + %871 = bitcast i32 %5 to float + %872 = fcmp olt float %871, 0.000000e+00 + %873 = sext i1 %872 to i32 + %874 = bitcast i32 %5 to float + %875 = fcmp ogt float %874, 0.000000e+00 + %876 = zext i1 %875 to i32 + %877 = add nsw i32 %873, %876 + %878 = sitofp i32 %877 to float + %879 = fneg float %878 + %880 = fmul float %870, %879 + %881 = fmul float %880, 0.000000e+00 + %882 = bitcast i32 %32 to float + %883 = fadd float %882, %881 + %884 = bitcast i32 %5 to float + %885 = bitcast i32 %5 to float + %886 = fmul float %884, %885 + %887 = fadd float %886, 0.000000e+00 + %888 = bitcast i32 %32 to float + %889 = bitcast i32 %32 to float + %890 = fmul float %888, %889 + %891 = fadd float %887, %890 + %892 = call float @llvm.sqrt.f32(float %891) + %893 = bitcast i32 %5 to float + %894 = fcmp olt float %893, 0.000000e+00 + %895 = sext i1 %894 to i32 + %896 = bitcast i32 %5 to float + %897 = fcmp ogt float %896, 0.000000e+00 + %898 = zext i1 %897 to i32 + %899 = add nsw i32 %895, %898 + %900 = sitofp i32 %899 to float + %901 = fneg float %900 + %902 = fmul float %892, %901 + %903 = bitcast i32 %5 to float + %904 = fadd float %903, %902 + %905 = bitcast i32 %5 to float + %906 = bitcast i32 %5 to float + %907 = fmul float %905, %906 + %908 = fadd float %907, 0.000000e+00 + %909 = bitcast i32 %32 to float + %910 = bitcast i32 %32 to float + %911 = fmul float %909, %910 + %912 = fadd float %908, %911 + %913 = call float @llvm.sqrt.f32(float %912) + %914 = bitcast i32 %5 to float + %915 = fcmp olt float %914, 0.000000e+00 + %916 = sext i1 %915 to i32 + %917 = bitcast i32 %5 to float + %918 = fcmp ogt float %917, 0.000000e+00 + %919 = zext i1 %918 to i32 + %920 = add nsw i32 %916, %919 + %921 = sitofp i32 %920 to float + %922 = fneg float %921 + %923 = fmul float %913, %922 + %924 = bitcast i32 %5 to float + %925 = fadd float %924, %923 + %926 = fmul float %904, %925 + %927 = fadd float %926, 0.000000e+00 + %928 = bitcast i32 %5 to float + %929 = bitcast i32 %5 to float + %930 = fmul float %928, %929 + %931 = fadd float %930, 0.000000e+00 + %932 = bitcast i32 %32 to float + %933 = bitcast i32 %32 to float + %934 = fmul float %932, %933 + %935 = fadd float %931, %934 + %936 = call float @llvm.sqrt.f32(float %935) + %937 = bitcast i32 %5 to float + %938 = fcmp olt float %937, 0.000000e+00 + %939 = sext i1 %938 to i32 + %940 = bitcast i32 %5 to float + %941 = fcmp ogt float %940, 0.000000e+00 + %942 = zext i1 %941 to i32 + %943 = add nsw i32 %939, %942 + %944 = sitofp i32 %943 to float + %945 = fneg float %944 + %946 = fmul float %936, %945 + %947 = fmul float %946, 0.000000e+00 + %948 = bitcast i32 %32 to float + %949 = fadd float %948, %947 + %950 = bitcast i32 %5 to float + %951 = bitcast i32 %5 to float + %952 = fmul float %950, %951 + %953 = fadd float %952, 0.000000e+00 + %954 = bitcast i32 %32 to float + %955 = bitcast i32 %32 to float + %956 = fmul float %954, %955 + %957 = fadd float %953, %956 + %958 = call float @llvm.sqrt.f32(float %957) + %959 = bitcast i32 %5 to float + %960 = fcmp olt float %959, 0.000000e+00 + %961 = sext i1 %960 to i32 + %962 = bitcast i32 %5 to float + %963 = fcmp ogt float %962, 0.000000e+00 + %964 = zext i1 %963 to i32 + %965 = add nsw i32 %961, %964 + %966 = sitofp i32 %965 to float + %967 = fneg float %966 + %968 = fmul float %958, %967 + %969 = fmul float %968, 0.000000e+00 + %970 = bitcast i32 %32 to float + %971 = fadd float %970, %969 + %972 = fmul float %949, %971 + %973 = fadd float %927, %972 + %974 = call float @llvm.sqrt.f32(float %973) + %975 = fadd float %974, 0.000000e+00 + %976 = fdiv float %883, %975 + %977 = fmul float %861, %976 + %978 = fsub float 1.000000e+00, %977 + %979 = insertelement <4 x float> zeroinitializer, float %978, i32 0 + %980 = insertelement <4 x float> %979, float 0.000000e+00, i32 1 + %981 = insertelement <4 x float> %980, float 0.000000e+00, i32 2 + %982 = insertelement <4 x float> %981, float 0.000000e+00, i32 3 + %983 = shufflevector <4 x float> %745, <4 x float> %982, <8 x i32> + %984 = extractelement <8 x float> %983, i32 0 + %985 = getelementptr float, float* %2, i32 0 + %986 = getelementptr inbounds float, float* %985, i64 3 + %987 = bitcast float* %986 to i32* + %988 = bitcast i32* %987 to float* + store float %984, float* %988, align 4 + %989 = bitcast float* %1 to i8* + %990 = alloca [4 x float], align 16 + %991 = bitcast [4 x float]* %990 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %989, i8* nonnull align 16 dereferenceable(16) %991, i64 16, i1 false) + store float 0.000000e+00, float* %2, align 4 + %992 = bitcast i32 %5 to float + %993 = bitcast i32 %5 to float + %994 = fmul float %992, %993 + %995 = fadd float %994, 0.000000e+00 + %996 = bitcast i32 %32 to float + %997 = bitcast i32 %32 to float + %998 = fmul float %996, %997 + %999 = fadd float %995, %998 + %1000 = call float @llvm.sqrt.f32(float %999) + %1001 = bitcast i32 %5 to float + %1002 = fcmp olt float %1001, 0.000000e+00 + %1003 = sext i1 %1002 to i32 + %1004 = bitcast i32 %5 to float + %1005 = fcmp ogt float %1004, 0.000000e+00 + %1006 = zext i1 %1005 to i32 + %1007 = add nsw i32 %1003, %1006 + %1008 = sitofp i32 %1007 to float + %1009 = fneg float %1008 + %1010 = fmul float %1000, %1009 + %1011 = bitcast i32 %5 to float + %1012 = fadd float %1011, %1010 + %1013 = bitcast i32 %5 to float + %1014 = bitcast i32 %5 to float + %1015 = fmul float %1013, %1014 + %1016 = fadd float %1015, 0.000000e+00 + %1017 = bitcast i32 %32 to float + %1018 = bitcast i32 %32 to float + %1019 = fmul float %1017, %1018 + %1020 = fadd float %1016, %1019 + %1021 = call float @llvm.sqrt.f32(float %1020) + %1022 = bitcast i32 %5 to float + %1023 = fcmp olt float %1022, 0.000000e+00 + %1024 = sext i1 %1023 to i32 + %1025 = bitcast i32 %5 to float + %1026 = fcmp ogt float %1025, 0.000000e+00 + %1027 = zext i1 %1026 to i32 + %1028 = add nsw i32 %1024, %1027 + %1029 = sitofp i32 %1028 to float + %1030 = fneg float %1029 + %1031 = fmul float %1021, %1030 + %1032 = bitcast i32 %5 to float + %1033 = fadd float %1032, %1031 + %1034 = bitcast i32 %5 to float + %1035 = bitcast i32 %5 to float + %1036 = fmul float %1034, %1035 + %1037 = fadd float %1036, 0.000000e+00 + %1038 = bitcast i32 %32 to float + %1039 = bitcast i32 %32 to float + %1040 = fmul float %1038, %1039 + %1041 = fadd float %1037, %1040 + %1042 = call float @llvm.sqrt.f32(float %1041) + %1043 = bitcast i32 %5 to float + %1044 = fcmp olt float %1043, 0.000000e+00 + %1045 = sext i1 %1044 to i32 + %1046 = bitcast i32 %5 to float + %1047 = fcmp ogt float %1046, 0.000000e+00 + %1048 = zext i1 %1047 to i32 + %1049 = add nsw i32 %1045, %1048 + %1050 = sitofp i32 %1049 to float + %1051 = fneg float %1050 + %1052 = fmul float %1042, %1051 + %1053 = bitcast i32 %5 to float + %1054 = fadd float %1053, %1052 + %1055 = fmul float %1033, %1054 + %1056 = fadd float %1055, 0.000000e+00 + %1057 = bitcast i32 %5 to float + %1058 = bitcast i32 %5 to float + %1059 = fmul float %1057, %1058 + %1060 = fadd float %1059, 0.000000e+00 + %1061 = bitcast i32 %32 to float + %1062 = bitcast i32 %32 to float + %1063 = fmul float %1061, %1062 + %1064 = fadd float %1060, %1063 + %1065 = call float @llvm.sqrt.f32(float %1064) + %1066 = bitcast i32 %5 to float + %1067 = fcmp olt float %1066, 0.000000e+00 + %1068 = sext i1 %1067 to i32 + %1069 = bitcast i32 %5 to float + %1070 = fcmp ogt float %1069, 0.000000e+00 + %1071 = zext i1 %1070 to i32 + %1072 = add nsw i32 %1068, %1071 + %1073 = sitofp i32 %1072 to float + %1074 = fneg float %1073 + %1075 = fmul float %1065, %1074 + %1076 = fmul float %1075, 0.000000e+00 + %1077 = bitcast i32 %32 to float + %1078 = fadd float %1077, %1076 + %1079 = bitcast i32 %5 to float + %1080 = bitcast i32 %5 to float + %1081 = fmul float %1079, %1080 + %1082 = fadd float %1081, 0.000000e+00 + %1083 = bitcast i32 %32 to float + %1084 = bitcast i32 %32 to float + %1085 = fmul float %1083, %1084 + %1086 = fadd float %1082, %1085 + %1087 = call float @llvm.sqrt.f32(float %1086) + %1088 = bitcast i32 %5 to float + %1089 = fcmp olt float %1088, 0.000000e+00 + %1090 = sext i1 %1089 to i32 + %1091 = bitcast i32 %5 to float + %1092 = fcmp ogt float %1091, 0.000000e+00 + %1093 = zext i1 %1092 to i32 + %1094 = add nsw i32 %1090, %1093 + %1095 = sitofp i32 %1094 to float + %1096 = fneg float %1095 + %1097 = fmul float %1087, %1096 + %1098 = fmul float %1097, 0.000000e+00 + %1099 = bitcast i32 %32 to float + %1100 = fadd float %1099, %1098 + %1101 = fmul float %1078, %1100 + %1102 = fadd float %1056, %1101 + %1103 = call float @llvm.sqrt.f32(float %1102) + %1104 = fadd float %1103, 0.000000e+00 + %1105 = fdiv float %1012, %1104 + %1106 = fmul float %1105, 2.000000e+00 + %1107 = bitcast i32 %5 to float + %1108 = bitcast i32 %5 to float + %1109 = fmul float %1107, %1108 + %1110 = fadd float %1109, 0.000000e+00 + %1111 = bitcast i32 %32 to float + %1112 = bitcast i32 %32 to float + %1113 = fmul float %1111, %1112 + %1114 = fadd float %1110, %1113 + %1115 = call float @llvm.sqrt.f32(float %1114) + %1116 = bitcast i32 %5 to float + %1117 = fcmp olt float %1116, 0.000000e+00 + %1118 = sext i1 %1117 to i32 + %1119 = bitcast i32 %5 to float + %1120 = fcmp ogt float %1119, 0.000000e+00 + %1121 = zext i1 %1120 to i32 + %1122 = add nsw i32 %1118, %1121 + %1123 = sitofp i32 %1122 to float + %1124 = fneg float %1123 + %1125 = fmul float %1115, %1124 + %1126 = bitcast i32 %5 to float + %1127 = fadd float %1126, %1125 + %1128 = bitcast i32 %5 to float + %1129 = bitcast i32 %5 to float + %1130 = fmul float %1128, %1129 + %1131 = fadd float %1130, 0.000000e+00 + %1132 = bitcast i32 %32 to float + %1133 = bitcast i32 %32 to float + %1134 = fmul float %1132, %1133 + %1135 = fadd float %1131, %1134 + %1136 = call float @llvm.sqrt.f32(float %1135) + %1137 = bitcast i32 %5 to float + %1138 = fcmp olt float %1137, 0.000000e+00 + %1139 = sext i1 %1138 to i32 + %1140 = bitcast i32 %5 to float + %1141 = fcmp ogt float %1140, 0.000000e+00 + %1142 = zext i1 %1141 to i32 + %1143 = add nsw i32 %1139, %1142 + %1144 = sitofp i32 %1143 to float + %1145 = fneg float %1144 + %1146 = fmul float %1136, %1145 + %1147 = bitcast i32 %5 to float + %1148 = fadd float %1147, %1146 + %1149 = bitcast i32 %5 to float + %1150 = bitcast i32 %5 to float + %1151 = fmul float %1149, %1150 + %1152 = fadd float %1151, 0.000000e+00 + %1153 = bitcast i32 %32 to float + %1154 = bitcast i32 %32 to float + %1155 = fmul float %1153, %1154 + %1156 = fadd float %1152, %1155 + %1157 = call float @llvm.sqrt.f32(float %1156) + %1158 = bitcast i32 %5 to float + %1159 = fcmp olt float %1158, 0.000000e+00 + %1160 = sext i1 %1159 to i32 + %1161 = bitcast i32 %5 to float + %1162 = fcmp ogt float %1161, 0.000000e+00 + %1163 = zext i1 %1162 to i32 + %1164 = add nsw i32 %1160, %1163 + %1165 = sitofp i32 %1164 to float + %1166 = fneg float %1165 + %1167 = fmul float %1157, %1166 + %1168 = bitcast i32 %5 to float + %1169 = fadd float %1168, %1167 + %1170 = fmul float %1148, %1169 + %1171 = fadd float %1170, 0.000000e+00 + %1172 = bitcast i32 %5 to float + %1173 = bitcast i32 %5 to float + %1174 = fmul float %1172, %1173 + %1175 = fadd float %1174, 0.000000e+00 + %1176 = bitcast i32 %32 to float + %1177 = bitcast i32 %32 to float + %1178 = fmul float %1176, %1177 + %1179 = fadd float %1175, %1178 + %1180 = call float @llvm.sqrt.f32(float %1179) + %1181 = bitcast i32 %5 to float + %1182 = fcmp olt float %1181, 0.000000e+00 + %1183 = sext i1 %1182 to i32 + %1184 = bitcast i32 %5 to float + %1185 = fcmp ogt float %1184, 0.000000e+00 + %1186 = zext i1 %1185 to i32 + %1187 = add nsw i32 %1183, %1186 + %1188 = sitofp i32 %1187 to float + %1189 = fneg float %1188 + %1190 = fmul float %1180, %1189 + %1191 = fmul float %1190, 0.000000e+00 + %1192 = bitcast i32 %32 to float + %1193 = fadd float %1192, %1191 + %1194 = bitcast i32 %5 to float + %1195 = bitcast i32 %5 to float + %1196 = fmul float %1194, %1195 + %1197 = fadd float %1196, 0.000000e+00 + %1198 = bitcast i32 %32 to float + %1199 = bitcast i32 %32 to float + %1200 = fmul float %1198, %1199 + %1201 = fadd float %1197, %1200 + %1202 = call float @llvm.sqrt.f32(float %1201) + %1203 = bitcast i32 %5 to float + %1204 = fcmp olt float %1203, 0.000000e+00 + %1205 = sext i1 %1204 to i32 + %1206 = bitcast i32 %5 to float + %1207 = fcmp ogt float %1206, 0.000000e+00 + %1208 = zext i1 %1207 to i32 + %1209 = add nsw i32 %1205, %1208 + %1210 = sitofp i32 %1209 to float + %1211 = fneg float %1210 + %1212 = fmul float %1202, %1211 + %1213 = fmul float %1212, 0.000000e+00 + %1214 = bitcast i32 %32 to float + %1215 = fadd float %1214, %1213 + %1216 = fmul float %1193, %1215 + %1217 = fadd float %1171, %1216 + %1218 = call float @llvm.sqrt.f32(float %1217) + %1219 = fadd float %1218, 0.000000e+00 + %1220 = fdiv float %1127, %1219 + %1221 = fmul float %1106, %1220 + %1222 = fsub float 1.000000e+00, %1221 + %1223 = insertelement <4 x float> zeroinitializer, float %1222, i32 0 + %1224 = insertelement <4 x float> %1223, float 0.000000e+00, i32 1 + %1225 = insertelement <4 x float> %1224, float 0.000000e+00, i32 2 + %1226 = insertelement <4 x float> %1225, float 0.000000e+00, i32 3 + %1227 = getelementptr float, float* %0, i32 0 + %1228 = load float, float* %1227, align 4 + %1229 = insertelement <4 x float> zeroinitializer, float %1228, i32 0 + %1230 = insertelement <4 x float> %1229, float 0.000000e+00, i32 1 + %1231 = insertelement <4 x float> %1230, float 0.000000e+00, i32 2 + %1232 = insertelement <4 x float> %1231, float 0.000000e+00, i32 3 + %1233 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1226, <4 x float> %1232, <4 x float> zeroinitializer) + %1234 = extractelement <4 x float> %1233, i32 0 + store float %1234, float* %2, align 4 + %1235 = bitcast i32 %5 to float + %1236 = bitcast i32 %5 to float + %1237 = fmul float %1235, %1236 + %1238 = fadd float %1237, 0.000000e+00 + %1239 = bitcast i32 %32 to float + %1240 = bitcast i32 %32 to float + %1241 = fmul float %1239, %1240 + %1242 = fadd float %1238, %1241 + %1243 = call float @llvm.sqrt.f32(float %1242) + %1244 = bitcast i32 %5 to float + %1245 = fcmp olt float %1244, 0.000000e+00 + %1246 = sext i1 %1245 to i32 + %1247 = bitcast i32 %5 to float + %1248 = fcmp ogt float %1247, 0.000000e+00 + %1249 = zext i1 %1248 to i32 + %1250 = add nsw i32 %1246, %1249 + %1251 = sitofp i32 %1250 to float + %1252 = fneg float %1251 + %1253 = fmul float %1243, %1252 + %1254 = bitcast i32 %5 to float + %1255 = fadd float %1254, %1253 + %1256 = bitcast i32 %5 to float + %1257 = bitcast i32 %5 to float + %1258 = fmul float %1256, %1257 + %1259 = fadd float %1258, 0.000000e+00 + %1260 = bitcast i32 %32 to float + %1261 = bitcast i32 %32 to float + %1262 = fmul float %1260, %1261 + %1263 = fadd float %1259, %1262 + %1264 = call float @llvm.sqrt.f32(float %1263) + %1265 = bitcast i32 %5 to float + %1266 = fcmp olt float %1265, 0.000000e+00 + %1267 = sext i1 %1266 to i32 + %1268 = bitcast i32 %5 to float + %1269 = fcmp ogt float %1268, 0.000000e+00 + %1270 = zext i1 %1269 to i32 + %1271 = add nsw i32 %1267, %1270 + %1272 = sitofp i32 %1271 to float + %1273 = fneg float %1272 + %1274 = fmul float %1264, %1273 + %1275 = bitcast i32 %5 to float + %1276 = fadd float %1275, %1274 + %1277 = bitcast i32 %5 to float + %1278 = bitcast i32 %5 to float + %1279 = fmul float %1277, %1278 + %1280 = fadd float %1279, 0.000000e+00 + %1281 = bitcast i32 %32 to float + %1282 = bitcast i32 %32 to float + %1283 = fmul float %1281, %1282 + %1284 = fadd float %1280, %1283 + %1285 = call float @llvm.sqrt.f32(float %1284) + %1286 = bitcast i32 %5 to float + %1287 = fcmp olt float %1286, 0.000000e+00 + %1288 = sext i1 %1287 to i32 + %1289 = bitcast i32 %5 to float + %1290 = fcmp ogt float %1289, 0.000000e+00 + %1291 = zext i1 %1290 to i32 + %1292 = add nsw i32 %1288, %1291 + %1293 = sitofp i32 %1292 to float + %1294 = fneg float %1293 + %1295 = fmul float %1285, %1294 + %1296 = bitcast i32 %5 to float + %1297 = fadd float %1296, %1295 + %1298 = fmul float %1276, %1297 + %1299 = fadd float %1298, 0.000000e+00 + %1300 = bitcast i32 %5 to float + %1301 = bitcast i32 %5 to float + %1302 = fmul float %1300, %1301 + %1303 = fadd float %1302, 0.000000e+00 + %1304 = bitcast i32 %32 to float + %1305 = bitcast i32 %32 to float + %1306 = fmul float %1304, %1305 + %1307 = fadd float %1303, %1306 + %1308 = call float @llvm.sqrt.f32(float %1307) + %1309 = bitcast i32 %5 to float + %1310 = fcmp olt float %1309, 0.000000e+00 + %1311 = sext i1 %1310 to i32 + %1312 = bitcast i32 %5 to float + %1313 = fcmp ogt float %1312, 0.000000e+00 + %1314 = zext i1 %1313 to i32 + %1315 = add nsw i32 %1311, %1314 + %1316 = sitofp i32 %1315 to float + %1317 = fneg float %1316 + %1318 = fmul float %1308, %1317 + %1319 = fmul float %1318, 0.000000e+00 + %1320 = bitcast i32 %32 to float + %1321 = fadd float %1320, %1319 + %1322 = bitcast i32 %5 to float + %1323 = bitcast i32 %5 to float + %1324 = fmul float %1322, %1323 + %1325 = fadd float %1324, 0.000000e+00 + %1326 = bitcast i32 %32 to float + %1327 = bitcast i32 %32 to float + %1328 = fmul float %1326, %1327 + %1329 = fadd float %1325, %1328 + %1330 = call float @llvm.sqrt.f32(float %1329) + %1331 = bitcast i32 %5 to float + %1332 = fcmp olt float %1331, 0.000000e+00 + %1333 = sext i1 %1332 to i32 + %1334 = bitcast i32 %5 to float + %1335 = fcmp ogt float %1334, 0.000000e+00 + %1336 = zext i1 %1335 to i32 + %1337 = add nsw i32 %1333, %1336 + %1338 = sitofp i32 %1337 to float + %1339 = fneg float %1338 + %1340 = fmul float %1330, %1339 + %1341 = fmul float %1340, 0.000000e+00 + %1342 = bitcast i32 %32 to float + %1343 = fadd float %1342, %1341 + %1344 = fmul float %1321, %1343 + %1345 = fadd float %1299, %1344 + %1346 = call float @llvm.sqrt.f32(float %1345) + %1347 = fadd float %1346, 0.000000e+00 + %1348 = fdiv float %1255, %1347 + %1349 = fmul float %1348, 2.000000e+00 + %1350 = bitcast i32 %5 to float + %1351 = bitcast i32 %5 to float + %1352 = fmul float %1350, %1351 + %1353 = fadd float %1352, 0.000000e+00 + %1354 = bitcast i32 %32 to float + %1355 = bitcast i32 %32 to float + %1356 = fmul float %1354, %1355 + %1357 = fadd float %1353, %1356 + %1358 = call float @llvm.sqrt.f32(float %1357) + %1359 = bitcast i32 %5 to float + %1360 = fcmp olt float %1359, 0.000000e+00 + %1361 = sext i1 %1360 to i32 + %1362 = bitcast i32 %5 to float + %1363 = fcmp ogt float %1362, 0.000000e+00 + %1364 = zext i1 %1363 to i32 + %1365 = add nsw i32 %1361, %1364 + %1366 = sitofp i32 %1365 to float + %1367 = fneg float %1366 + %1368 = fmul float %1358, %1367 + %1369 = bitcast i32 %5 to float + %1370 = fadd float %1369, %1368 + %1371 = bitcast i32 %5 to float + %1372 = bitcast i32 %5 to float + %1373 = fmul float %1371, %1372 + %1374 = fadd float %1373, 0.000000e+00 + %1375 = bitcast i32 %32 to float + %1376 = bitcast i32 %32 to float + %1377 = fmul float %1375, %1376 + %1378 = fadd float %1374, %1377 + %1379 = call float @llvm.sqrt.f32(float %1378) + %1380 = bitcast i32 %5 to float + %1381 = fcmp olt float %1380, 0.000000e+00 + %1382 = sext i1 %1381 to i32 + %1383 = bitcast i32 %5 to float + %1384 = fcmp ogt float %1383, 0.000000e+00 + %1385 = zext i1 %1384 to i32 + %1386 = add nsw i32 %1382, %1385 + %1387 = sitofp i32 %1386 to float + %1388 = fneg float %1387 + %1389 = fmul float %1379, %1388 + %1390 = bitcast i32 %5 to float + %1391 = fadd float %1390, %1389 + %1392 = bitcast i32 %5 to float + %1393 = bitcast i32 %5 to float + %1394 = fmul float %1392, %1393 + %1395 = fadd float %1394, 0.000000e+00 + %1396 = bitcast i32 %32 to float + %1397 = bitcast i32 %32 to float + %1398 = fmul float %1396, %1397 + %1399 = fadd float %1395, %1398 + %1400 = call float @llvm.sqrt.f32(float %1399) + %1401 = bitcast i32 %5 to float + %1402 = fcmp olt float %1401, 0.000000e+00 + %1403 = sext i1 %1402 to i32 + %1404 = bitcast i32 %5 to float + %1405 = fcmp ogt float %1404, 0.000000e+00 + %1406 = zext i1 %1405 to i32 + %1407 = add nsw i32 %1403, %1406 + %1408 = sitofp i32 %1407 to float + %1409 = fneg float %1408 + %1410 = fmul float %1400, %1409 + %1411 = bitcast i32 %5 to float + %1412 = fadd float %1411, %1410 + %1413 = fmul float %1391, %1412 + %1414 = fadd float %1413, 0.000000e+00 + %1415 = bitcast i32 %5 to float + %1416 = bitcast i32 %5 to float + %1417 = fmul float %1415, %1416 + %1418 = fadd float %1417, 0.000000e+00 + %1419 = bitcast i32 %32 to float + %1420 = bitcast i32 %32 to float + %1421 = fmul float %1419, %1420 + %1422 = fadd float %1418, %1421 + %1423 = call float @llvm.sqrt.f32(float %1422) + %1424 = bitcast i32 %5 to float + %1425 = fcmp olt float %1424, 0.000000e+00 + %1426 = sext i1 %1425 to i32 + %1427 = bitcast i32 %5 to float + %1428 = fcmp ogt float %1427, 0.000000e+00 + %1429 = zext i1 %1428 to i32 + %1430 = add nsw i32 %1426, %1429 + %1431 = sitofp i32 %1430 to float + %1432 = fneg float %1431 + %1433 = fmul float %1423, %1432 + %1434 = fmul float %1433, 0.000000e+00 + %1435 = bitcast i32 %32 to float + %1436 = fadd float %1435, %1434 + %1437 = bitcast i32 %5 to float + %1438 = bitcast i32 %5 to float + %1439 = fmul float %1437, %1438 + %1440 = fadd float %1439, 0.000000e+00 + %1441 = bitcast i32 %32 to float + %1442 = bitcast i32 %32 to float + %1443 = fmul float %1441, %1442 + %1444 = fadd float %1440, %1443 + %1445 = call float @llvm.sqrt.f32(float %1444) + %1446 = bitcast i32 %5 to float + %1447 = fcmp olt float %1446, 0.000000e+00 + %1448 = sext i1 %1447 to i32 + %1449 = bitcast i32 %5 to float + %1450 = fcmp ogt float %1449, 0.000000e+00 + %1451 = zext i1 %1450 to i32 + %1452 = add nsw i32 %1448, %1451 + %1453 = sitofp i32 %1452 to float + %1454 = fneg float %1453 + %1455 = fmul float %1445, %1454 + %1456 = fmul float %1455, 0.000000e+00 + %1457 = bitcast i32 %32 to float + %1458 = fadd float %1457, %1456 + %1459 = fmul float %1436, %1458 + %1460 = fadd float %1414, %1459 + %1461 = call float @llvm.sqrt.f32(float %1460) + %1462 = fadd float %1461, 0.000000e+00 + %1463 = fdiv float %1370, %1462 + %1464 = fmul float %1349, %1463 + %1465 = fsub float 1.000000e+00, %1464 + %1466 = fmul float %1465, %1228 + %1467 = fadd float %1466, 0.000000e+00 + %1468 = bitcast i32 %5 to float + %1469 = bitcast i32 %5 to float + %1470 = fmul float %1468, %1469 + %1471 = fadd float %1470, 0.000000e+00 + %1472 = bitcast i32 %32 to float + %1473 = bitcast i32 %32 to float + %1474 = fmul float %1472, %1473 + %1475 = fadd float %1471, %1474 + %1476 = call float @llvm.sqrt.f32(float %1475) + %1477 = bitcast i32 %5 to float + %1478 = fcmp olt float %1477, 0.000000e+00 + %1479 = sext i1 %1478 to i32 + %1480 = bitcast i32 %5 to float + %1481 = fcmp ogt float %1480, 0.000000e+00 + %1482 = zext i1 %1481 to i32 + %1483 = add nsw i32 %1479, %1482 + %1484 = sitofp i32 %1483 to float + %1485 = fneg float %1484 + %1486 = fmul float %1476, %1485 + %1487 = bitcast i32 %5 to float + %1488 = fadd float %1487, %1486 + %1489 = bitcast i32 %5 to float + %1490 = bitcast i32 %5 to float + %1491 = fmul float %1489, %1490 + %1492 = fadd float %1491, 0.000000e+00 + %1493 = bitcast i32 %32 to float + %1494 = bitcast i32 %32 to float + %1495 = fmul float %1493, %1494 + %1496 = fadd float %1492, %1495 + %1497 = call float @llvm.sqrt.f32(float %1496) + %1498 = bitcast i32 %5 to float + %1499 = fcmp olt float %1498, 0.000000e+00 + %1500 = sext i1 %1499 to i32 + %1501 = bitcast i32 %5 to float + %1502 = fcmp ogt float %1501, 0.000000e+00 + %1503 = zext i1 %1502 to i32 + %1504 = add nsw i32 %1500, %1503 + %1505 = sitofp i32 %1504 to float + %1506 = fneg float %1505 + %1507 = fmul float %1497, %1506 + %1508 = bitcast i32 %5 to float + %1509 = fadd float %1508, %1507 + %1510 = bitcast i32 %5 to float + %1511 = bitcast i32 %5 to float + %1512 = fmul float %1510, %1511 + %1513 = fadd float %1512, 0.000000e+00 + %1514 = bitcast i32 %32 to float + %1515 = bitcast i32 %32 to float + %1516 = fmul float %1514, %1515 + %1517 = fadd float %1513, %1516 + %1518 = call float @llvm.sqrt.f32(float %1517) + %1519 = bitcast i32 %5 to float + %1520 = fcmp olt float %1519, 0.000000e+00 + %1521 = sext i1 %1520 to i32 + %1522 = bitcast i32 %5 to float + %1523 = fcmp ogt float %1522, 0.000000e+00 + %1524 = zext i1 %1523 to i32 + %1525 = add nsw i32 %1521, %1524 + %1526 = sitofp i32 %1525 to float + %1527 = fneg float %1526 + %1528 = fmul float %1518, %1527 + %1529 = bitcast i32 %5 to float + %1530 = fadd float %1529, %1528 + %1531 = fmul float %1509, %1530 + %1532 = fadd float %1531, 0.000000e+00 + %1533 = bitcast i32 %5 to float + %1534 = bitcast i32 %5 to float + %1535 = fmul float %1533, %1534 + %1536 = fadd float %1535, 0.000000e+00 + %1537 = bitcast i32 %32 to float + %1538 = bitcast i32 %32 to float + %1539 = fmul float %1537, %1538 + %1540 = fadd float %1536, %1539 + %1541 = call float @llvm.sqrt.f32(float %1540) + %1542 = bitcast i32 %5 to float + %1543 = fcmp olt float %1542, 0.000000e+00 + %1544 = sext i1 %1543 to i32 + %1545 = bitcast i32 %5 to float + %1546 = fcmp ogt float %1545, 0.000000e+00 + %1547 = zext i1 %1546 to i32 + %1548 = add nsw i32 %1544, %1547 + %1549 = sitofp i32 %1548 to float + %1550 = fneg float %1549 + %1551 = fmul float %1541, %1550 + %1552 = fmul float %1551, 0.000000e+00 + %1553 = bitcast i32 %32 to float + %1554 = fadd float %1553, %1552 + %1555 = bitcast i32 %5 to float + %1556 = bitcast i32 %5 to float + %1557 = fmul float %1555, %1556 + %1558 = fadd float %1557, 0.000000e+00 + %1559 = bitcast i32 %32 to float + %1560 = bitcast i32 %32 to float + %1561 = fmul float %1559, %1560 + %1562 = fadd float %1558, %1561 + %1563 = call float @llvm.sqrt.f32(float %1562) + %1564 = bitcast i32 %5 to float + %1565 = fcmp olt float %1564, 0.000000e+00 + %1566 = sext i1 %1565 to i32 + %1567 = bitcast i32 %5 to float + %1568 = fcmp ogt float %1567, 0.000000e+00 + %1569 = zext i1 %1568 to i32 + %1570 = add nsw i32 %1566, %1569 + %1571 = sitofp i32 %1570 to float + %1572 = fneg float %1571 + %1573 = fmul float %1563, %1572 + %1574 = fmul float %1573, 0.000000e+00 + %1575 = bitcast i32 %32 to float + %1576 = fadd float %1575, %1574 + %1577 = fmul float %1554, %1576 + %1578 = fadd float %1532, %1577 + %1579 = call float @llvm.sqrt.f32(float %1578) + %1580 = fadd float %1579, 0.000000e+00 + %1581 = fdiv float %1488, %1580 + %1582 = fmul float %1581, 2.000000e+00 + %1583 = bitcast i32 %5 to float + %1584 = bitcast i32 %5 to float + %1585 = fmul float %1583, %1584 + %1586 = fadd float %1585, 0.000000e+00 + %1587 = bitcast i32 %32 to float + %1588 = bitcast i32 %32 to float + %1589 = fmul float %1587, %1588 + %1590 = fadd float %1586, %1589 + %1591 = call float @llvm.sqrt.f32(float %1590) + %1592 = bitcast i32 %5 to float + %1593 = fcmp olt float %1592, 0.000000e+00 + %1594 = sext i1 %1593 to i32 + %1595 = bitcast i32 %5 to float + %1596 = fcmp ogt float %1595, 0.000000e+00 + %1597 = zext i1 %1596 to i32 + %1598 = add nsw i32 %1594, %1597 + %1599 = sitofp i32 %1598 to float + %1600 = fneg float %1599 + %1601 = fmul float %1591, %1600 + %1602 = fmul float %1601, 0.000000e+00 + %1603 = bitcast i32 %32 to float + %1604 = fadd float %1603, %1602 + %1605 = bitcast i32 %5 to float + %1606 = bitcast i32 %5 to float + %1607 = fmul float %1605, %1606 + %1608 = fadd float %1607, 0.000000e+00 + %1609 = bitcast i32 %32 to float + %1610 = bitcast i32 %32 to float + %1611 = fmul float %1609, %1610 + %1612 = fadd float %1608, %1611 + %1613 = call float @llvm.sqrt.f32(float %1612) + %1614 = bitcast i32 %5 to float + %1615 = fcmp olt float %1614, 0.000000e+00 + %1616 = sext i1 %1615 to i32 + %1617 = bitcast i32 %5 to float + %1618 = fcmp ogt float %1617, 0.000000e+00 + %1619 = zext i1 %1618 to i32 + %1620 = add nsw i32 %1616, %1619 + %1621 = sitofp i32 %1620 to float + %1622 = fneg float %1621 + %1623 = fmul float %1613, %1622 + %1624 = bitcast i32 %5 to float + %1625 = fadd float %1624, %1623 + %1626 = bitcast i32 %5 to float + %1627 = bitcast i32 %5 to float + %1628 = fmul float %1626, %1627 + %1629 = fadd float %1628, 0.000000e+00 + %1630 = bitcast i32 %32 to float + %1631 = bitcast i32 %32 to float + %1632 = fmul float %1630, %1631 + %1633 = fadd float %1629, %1632 + %1634 = call float @llvm.sqrt.f32(float %1633) + %1635 = bitcast i32 %5 to float + %1636 = fcmp olt float %1635, 0.000000e+00 + %1637 = sext i1 %1636 to i32 + %1638 = bitcast i32 %5 to float + %1639 = fcmp ogt float %1638, 0.000000e+00 + %1640 = zext i1 %1639 to i32 + %1641 = add nsw i32 %1637, %1640 + %1642 = sitofp i32 %1641 to float + %1643 = fneg float %1642 + %1644 = fmul float %1634, %1643 + %1645 = bitcast i32 %5 to float + %1646 = fadd float %1645, %1644 + %1647 = fmul float %1625, %1646 + %1648 = fadd float %1647, 0.000000e+00 + %1649 = bitcast i32 %5 to float + %1650 = bitcast i32 %5 to float + %1651 = fmul float %1649, %1650 + %1652 = fadd float %1651, 0.000000e+00 + %1653 = bitcast i32 %32 to float + %1654 = bitcast i32 %32 to float + %1655 = fmul float %1653, %1654 + %1656 = fadd float %1652, %1655 + %1657 = call float @llvm.sqrt.f32(float %1656) + %1658 = bitcast i32 %5 to float + %1659 = fcmp olt float %1658, 0.000000e+00 + %1660 = sext i1 %1659 to i32 + %1661 = bitcast i32 %5 to float + %1662 = fcmp ogt float %1661, 0.000000e+00 + %1663 = zext i1 %1662 to i32 + %1664 = add nsw i32 %1660, %1663 + %1665 = sitofp i32 %1664 to float + %1666 = fneg float %1665 + %1667 = fmul float %1657, %1666 + %1668 = fmul float %1667, 0.000000e+00 + %1669 = bitcast i32 %32 to float + %1670 = fadd float %1669, %1668 + %1671 = bitcast i32 %5 to float + %1672 = bitcast i32 %5 to float + %1673 = fmul float %1671, %1672 + %1674 = fadd float %1673, 0.000000e+00 + %1675 = bitcast i32 %32 to float + %1676 = bitcast i32 %32 to float + %1677 = fmul float %1675, %1676 + %1678 = fadd float %1674, %1677 + %1679 = call float @llvm.sqrt.f32(float %1678) + %1680 = bitcast i32 %5 to float + %1681 = fcmp olt float %1680, 0.000000e+00 + %1682 = sext i1 %1681 to i32 + %1683 = bitcast i32 %5 to float + %1684 = fcmp ogt float %1683, 0.000000e+00 + %1685 = zext i1 %1684 to i32 + %1686 = add nsw i32 %1682, %1685 + %1687 = sitofp i32 %1686 to float + %1688 = fneg float %1687 + %1689 = fmul float %1679, %1688 + %1690 = fmul float %1689, 0.000000e+00 + %1691 = bitcast i32 %32 to float + %1692 = fadd float %1691, %1690 + %1693 = fmul float %1670, %1692 + %1694 = fadd float %1648, %1693 + %1695 = call float @llvm.sqrt.f32(float %1694) + %1696 = fadd float %1695, 0.000000e+00 + %1697 = fdiv float %1604, %1696 + %1698 = fmul float %1582, %1697 + %1699 = fneg float %1698 + %1700 = getelementptr float, float* %0, i32 0 + %1701 = getelementptr inbounds float, float* %1700, i64 2 + %1702 = load float, float* %1701, align 4 + %1703 = fmul float %1699, %1702 + %1704 = fadd float %1467, %1703 + %1705 = insertelement <4 x float> zeroinitializer, float %1704, i32 0 + %1706 = insertelement <4 x float> %1705, float 0.000000e+00, i32 1 + %1707 = insertelement <4 x float> %1706, float 0.000000e+00, i32 2 + %1708 = insertelement <4 x float> %1707, float 0.000000e+00, i32 3 + %1709 = extractelement <4 x float> %1708, i32 0 + store float %1709, float* %2, align 4 + %1710 = extractelement <4 x float> %1708, i32 1 + %1711 = getelementptr float, float* %2, i32 0 + %1712 = getelementptr inbounds float, float* %1711, i64 1 + store float %1710, float* %1712, align 4 + %1713 = bitcast i32 %5 to float + %1714 = bitcast i32 %5 to float + %1715 = fmul float %1713, %1714 + %1716 = fadd float %1715, 0.000000e+00 + %1717 = bitcast i32 %32 to float + %1718 = bitcast i32 %32 to float + %1719 = fmul float %1717, %1718 + %1720 = fadd float %1716, %1719 + %1721 = call float @llvm.sqrt.f32(float %1720) + %1722 = bitcast i32 %5 to float + %1723 = fcmp olt float %1722, 0.000000e+00 + %1724 = sext i1 %1723 to i32 + %1725 = bitcast i32 %5 to float + %1726 = fcmp ogt float %1725, 0.000000e+00 + %1727 = zext i1 %1726 to i32 + %1728 = add nsw i32 %1724, %1727 + %1729 = sitofp i32 %1728 to float + %1730 = fneg float %1729 + %1731 = fmul float %1721, %1730 + %1732 = bitcast i32 %5 to float + %1733 = fadd float %1732, %1731 + %1734 = bitcast i32 %5 to float + %1735 = bitcast i32 %5 to float + %1736 = fmul float %1734, %1735 + %1737 = fadd float %1736, 0.000000e+00 + %1738 = bitcast i32 %32 to float + %1739 = bitcast i32 %32 to float + %1740 = fmul float %1738, %1739 + %1741 = fadd float %1737, %1740 + %1742 = call float @llvm.sqrt.f32(float %1741) + %1743 = bitcast i32 %5 to float + %1744 = fcmp olt float %1743, 0.000000e+00 + %1745 = sext i1 %1744 to i32 + %1746 = bitcast i32 %5 to float + %1747 = fcmp ogt float %1746, 0.000000e+00 + %1748 = zext i1 %1747 to i32 + %1749 = add nsw i32 %1745, %1748 + %1750 = sitofp i32 %1749 to float + %1751 = fneg float %1750 + %1752 = fmul float %1742, %1751 + %1753 = bitcast i32 %5 to float + %1754 = fadd float %1753, %1752 + %1755 = bitcast i32 %5 to float + %1756 = bitcast i32 %5 to float + %1757 = fmul float %1755, %1756 + %1758 = fadd float %1757, 0.000000e+00 + %1759 = bitcast i32 %32 to float + %1760 = bitcast i32 %32 to float + %1761 = fmul float %1759, %1760 + %1762 = fadd float %1758, %1761 + %1763 = call float @llvm.sqrt.f32(float %1762) + %1764 = bitcast i32 %5 to float + %1765 = fcmp olt float %1764, 0.000000e+00 + %1766 = sext i1 %1765 to i32 + %1767 = bitcast i32 %5 to float + %1768 = fcmp ogt float %1767, 0.000000e+00 + %1769 = zext i1 %1768 to i32 + %1770 = add nsw i32 %1766, %1769 + %1771 = sitofp i32 %1770 to float + %1772 = fneg float %1771 + %1773 = fmul float %1763, %1772 + %1774 = bitcast i32 %5 to float + %1775 = fadd float %1774, %1773 + %1776 = fmul float %1754, %1775 + %1777 = fadd float %1776, 0.000000e+00 + %1778 = bitcast i32 %5 to float + %1779 = bitcast i32 %5 to float + %1780 = fmul float %1778, %1779 + %1781 = fadd float %1780, 0.000000e+00 + %1782 = bitcast i32 %32 to float + %1783 = bitcast i32 %32 to float + %1784 = fmul float %1782, %1783 + %1785 = fadd float %1781, %1784 + %1786 = call float @llvm.sqrt.f32(float %1785) + %1787 = bitcast i32 %5 to float + %1788 = fcmp olt float %1787, 0.000000e+00 + %1789 = sext i1 %1788 to i32 + %1790 = bitcast i32 %5 to float + %1791 = fcmp ogt float %1790, 0.000000e+00 + %1792 = zext i1 %1791 to i32 + %1793 = add nsw i32 %1789, %1792 + %1794 = sitofp i32 %1793 to float + %1795 = fneg float %1794 + %1796 = fmul float %1786, %1795 + %1797 = fmul float %1796, 0.000000e+00 + %1798 = bitcast i32 %32 to float + %1799 = fadd float %1798, %1797 + %1800 = bitcast i32 %5 to float + %1801 = bitcast i32 %5 to float + %1802 = fmul float %1800, %1801 + %1803 = fadd float %1802, 0.000000e+00 + %1804 = bitcast i32 %32 to float + %1805 = bitcast i32 %32 to float + %1806 = fmul float %1804, %1805 + %1807 = fadd float %1803, %1806 + %1808 = call float @llvm.sqrt.f32(float %1807) + %1809 = bitcast i32 %5 to float + %1810 = fcmp olt float %1809, 0.000000e+00 + %1811 = sext i1 %1810 to i32 + %1812 = bitcast i32 %5 to float + %1813 = fcmp ogt float %1812, 0.000000e+00 + %1814 = zext i1 %1813 to i32 + %1815 = add nsw i32 %1811, %1814 + %1816 = sitofp i32 %1815 to float + %1817 = fneg float %1816 + %1818 = fmul float %1808, %1817 + %1819 = fmul float %1818, 0.000000e+00 + %1820 = bitcast i32 %32 to float + %1821 = fadd float %1820, %1819 + %1822 = fmul float %1799, %1821 + %1823 = fadd float %1777, %1822 + %1824 = call float @llvm.sqrt.f32(float %1823) + %1825 = fadd float %1824, 0.000000e+00 + %1826 = fdiv float %1733, %1825 + %1827 = fmul float %1826, 2.000000e+00 + %1828 = bitcast i32 %5 to float + %1829 = bitcast i32 %5 to float + %1830 = fmul float %1828, %1829 + %1831 = fadd float %1830, 0.000000e+00 + %1832 = bitcast i32 %32 to float + %1833 = bitcast i32 %32 to float + %1834 = fmul float %1832, %1833 + %1835 = fadd float %1831, %1834 + %1836 = call float @llvm.sqrt.f32(float %1835) + %1837 = bitcast i32 %5 to float + %1838 = fcmp olt float %1837, 0.000000e+00 + %1839 = sext i1 %1838 to i32 + %1840 = bitcast i32 %5 to float + %1841 = fcmp ogt float %1840, 0.000000e+00 + %1842 = zext i1 %1841 to i32 + %1843 = add nsw i32 %1839, %1842 + %1844 = sitofp i32 %1843 to float + %1845 = fneg float %1844 + %1846 = fmul float %1836, %1845 + %1847 = bitcast i32 %5 to float + %1848 = fadd float %1847, %1846 + %1849 = bitcast i32 %5 to float + %1850 = bitcast i32 %5 to float + %1851 = fmul float %1849, %1850 + %1852 = fadd float %1851, 0.000000e+00 + %1853 = bitcast i32 %32 to float + %1854 = bitcast i32 %32 to float + %1855 = fmul float %1853, %1854 + %1856 = fadd float %1852, %1855 + %1857 = call float @llvm.sqrt.f32(float %1856) + %1858 = bitcast i32 %5 to float + %1859 = fcmp olt float %1858, 0.000000e+00 + %1860 = sext i1 %1859 to i32 + %1861 = bitcast i32 %5 to float + %1862 = fcmp ogt float %1861, 0.000000e+00 + %1863 = zext i1 %1862 to i32 + %1864 = add nsw i32 %1860, %1863 + %1865 = sitofp i32 %1864 to float + %1866 = fneg float %1865 + %1867 = fmul float %1857, %1866 + %1868 = bitcast i32 %5 to float + %1869 = fadd float %1868, %1867 + %1870 = bitcast i32 %5 to float + %1871 = bitcast i32 %5 to float + %1872 = fmul float %1870, %1871 + %1873 = fadd float %1872, 0.000000e+00 + %1874 = bitcast i32 %32 to float + %1875 = bitcast i32 %32 to float + %1876 = fmul float %1874, %1875 + %1877 = fadd float %1873, %1876 + %1878 = call float @llvm.sqrt.f32(float %1877) + %1879 = bitcast i32 %5 to float + %1880 = fcmp olt float %1879, 0.000000e+00 + %1881 = sext i1 %1880 to i32 + %1882 = bitcast i32 %5 to float + %1883 = fcmp ogt float %1882, 0.000000e+00 + %1884 = zext i1 %1883 to i32 + %1885 = add nsw i32 %1881, %1884 + %1886 = sitofp i32 %1885 to float + %1887 = fneg float %1886 + %1888 = fmul float %1878, %1887 + %1889 = bitcast i32 %5 to float + %1890 = fadd float %1889, %1888 + %1891 = fmul float %1869, %1890 + %1892 = fadd float %1891, 0.000000e+00 + %1893 = bitcast i32 %5 to float + %1894 = bitcast i32 %5 to float + %1895 = fmul float %1893, %1894 + %1896 = fadd float %1895, 0.000000e+00 + %1897 = bitcast i32 %32 to float + %1898 = bitcast i32 %32 to float + %1899 = fmul float %1897, %1898 + %1900 = fadd float %1896, %1899 + %1901 = call float @llvm.sqrt.f32(float %1900) + %1902 = bitcast i32 %5 to float + %1903 = fcmp olt float %1902, 0.000000e+00 + %1904 = sext i1 %1903 to i32 + %1905 = bitcast i32 %5 to float + %1906 = fcmp ogt float %1905, 0.000000e+00 + %1907 = zext i1 %1906 to i32 + %1908 = add nsw i32 %1904, %1907 + %1909 = sitofp i32 %1908 to float + %1910 = fneg float %1909 + %1911 = fmul float %1901, %1910 + %1912 = fmul float %1911, 0.000000e+00 + %1913 = bitcast i32 %32 to float + %1914 = fadd float %1913, %1912 + %1915 = bitcast i32 %5 to float + %1916 = bitcast i32 %5 to float + %1917 = fmul float %1915, %1916 + %1918 = fadd float %1917, 0.000000e+00 + %1919 = bitcast i32 %32 to float + %1920 = bitcast i32 %32 to float + %1921 = fmul float %1919, %1920 + %1922 = fadd float %1918, %1921 + %1923 = call float @llvm.sqrt.f32(float %1922) + %1924 = bitcast i32 %5 to float + %1925 = fcmp olt float %1924, 0.000000e+00 + %1926 = sext i1 %1925 to i32 + %1927 = bitcast i32 %5 to float + %1928 = fcmp ogt float %1927, 0.000000e+00 + %1929 = zext i1 %1928 to i32 + %1930 = add nsw i32 %1926, %1929 + %1931 = sitofp i32 %1930 to float + %1932 = fneg float %1931 + %1933 = fmul float %1923, %1932 + %1934 = fmul float %1933, 0.000000e+00 + %1935 = bitcast i32 %32 to float + %1936 = fadd float %1935, %1934 + %1937 = fmul float %1914, %1936 + %1938 = fadd float %1892, %1937 + %1939 = call float @llvm.sqrt.f32(float %1938) + %1940 = fadd float %1939, 0.000000e+00 + %1941 = fdiv float %1848, %1940 + %1942 = fmul float %1827, %1941 + %1943 = fsub float 1.000000e+00, %1942 + %1944 = insertelement <4 x float> zeroinitializer, float %1943, i32 0 + %1945 = insertelement <4 x float> %1944, float 0.000000e+00, i32 1 + %1946 = insertelement <4 x float> %1945, float 0.000000e+00, i32 2 + %1947 = insertelement <4 x float> %1946, float 0.000000e+00, i32 3 + %1948 = getelementptr float, float* %0, i32 0 + %1949 = getelementptr inbounds float, float* %1948, i64 1 + %1950 = load float, float* %1949, align 4 + %1951 = insertelement <4 x float> zeroinitializer, float %1950, i32 0 + %1952 = insertelement <4 x float> %1951, float 0.000000e+00, i32 1 + %1953 = insertelement <4 x float> %1952, float 0.000000e+00, i32 2 + %1954 = insertelement <4 x float> %1953, float 0.000000e+00, i32 3 + %1955 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1947, <4 x float> %1954, <4 x float> zeroinitializer) + %1956 = extractelement <4 x float> %1955, i32 0 + %1957 = getelementptr float, float* %2, i32 0 + %1958 = getelementptr inbounds float, float* %1957, i64 1 + store float %1956, float* %1958, align 4 + %1959 = bitcast i32 %5 to float + %1960 = bitcast i32 %5 to float + %1961 = fmul float %1959, %1960 + %1962 = fadd float %1961, 0.000000e+00 + %1963 = bitcast i32 %32 to float + %1964 = bitcast i32 %32 to float + %1965 = fmul float %1963, %1964 + %1966 = fadd float %1962, %1965 + %1967 = call float @llvm.sqrt.f32(float %1966) + %1968 = bitcast i32 %5 to float + %1969 = fcmp olt float %1968, 0.000000e+00 + %1970 = sext i1 %1969 to i32 + %1971 = bitcast i32 %5 to float + %1972 = fcmp ogt float %1971, 0.000000e+00 + %1973 = zext i1 %1972 to i32 + %1974 = add nsw i32 %1970, %1973 + %1975 = sitofp i32 %1974 to float + %1976 = fneg float %1975 + %1977 = fmul float %1967, %1976 + %1978 = bitcast i32 %5 to float + %1979 = fadd float %1978, %1977 + %1980 = bitcast i32 %5 to float + %1981 = bitcast i32 %5 to float + %1982 = fmul float %1980, %1981 + %1983 = fadd float %1982, 0.000000e+00 + %1984 = bitcast i32 %32 to float + %1985 = bitcast i32 %32 to float + %1986 = fmul float %1984, %1985 + %1987 = fadd float %1983, %1986 + %1988 = call float @llvm.sqrt.f32(float %1987) + %1989 = bitcast i32 %5 to float + %1990 = fcmp olt float %1989, 0.000000e+00 + %1991 = sext i1 %1990 to i32 + %1992 = bitcast i32 %5 to float + %1993 = fcmp ogt float %1992, 0.000000e+00 + %1994 = zext i1 %1993 to i32 + %1995 = add nsw i32 %1991, %1994 + %1996 = sitofp i32 %1995 to float + %1997 = fneg float %1996 + %1998 = fmul float %1988, %1997 + %1999 = bitcast i32 %5 to float + %2000 = fadd float %1999, %1998 + %2001 = bitcast i32 %5 to float + %2002 = bitcast i32 %5 to float + %2003 = fmul float %2001, %2002 + %2004 = fadd float %2003, 0.000000e+00 + %2005 = bitcast i32 %32 to float + %2006 = bitcast i32 %32 to float + %2007 = fmul float %2005, %2006 + %2008 = fadd float %2004, %2007 + %2009 = call float @llvm.sqrt.f32(float %2008) + %2010 = bitcast i32 %5 to float + %2011 = fcmp olt float %2010, 0.000000e+00 + %2012 = sext i1 %2011 to i32 + %2013 = bitcast i32 %5 to float + %2014 = fcmp ogt float %2013, 0.000000e+00 + %2015 = zext i1 %2014 to i32 + %2016 = add nsw i32 %2012, %2015 + %2017 = sitofp i32 %2016 to float + %2018 = fneg float %2017 + %2019 = fmul float %2009, %2018 + %2020 = bitcast i32 %5 to float + %2021 = fadd float %2020, %2019 + %2022 = fmul float %2000, %2021 + %2023 = fadd float %2022, 0.000000e+00 + %2024 = bitcast i32 %5 to float + %2025 = bitcast i32 %5 to float + %2026 = fmul float %2024, %2025 + %2027 = fadd float %2026, 0.000000e+00 + %2028 = bitcast i32 %32 to float + %2029 = bitcast i32 %32 to float + %2030 = fmul float %2028, %2029 + %2031 = fadd float %2027, %2030 + %2032 = call float @llvm.sqrt.f32(float %2031) + %2033 = bitcast i32 %5 to float + %2034 = fcmp olt float %2033, 0.000000e+00 + %2035 = sext i1 %2034 to i32 + %2036 = bitcast i32 %5 to float + %2037 = fcmp ogt float %2036, 0.000000e+00 + %2038 = zext i1 %2037 to i32 + %2039 = add nsw i32 %2035, %2038 + %2040 = sitofp i32 %2039 to float + %2041 = fneg float %2040 + %2042 = fmul float %2032, %2041 + %2043 = fmul float %2042, 0.000000e+00 + %2044 = bitcast i32 %32 to float + %2045 = fadd float %2044, %2043 + %2046 = bitcast i32 %5 to float + %2047 = bitcast i32 %5 to float + %2048 = fmul float %2046, %2047 + %2049 = fadd float %2048, 0.000000e+00 + %2050 = bitcast i32 %32 to float + %2051 = bitcast i32 %32 to float + %2052 = fmul float %2050, %2051 + %2053 = fadd float %2049, %2052 + %2054 = call float @llvm.sqrt.f32(float %2053) + %2055 = bitcast i32 %5 to float + %2056 = fcmp olt float %2055, 0.000000e+00 + %2057 = sext i1 %2056 to i32 + %2058 = bitcast i32 %5 to float + %2059 = fcmp ogt float %2058, 0.000000e+00 + %2060 = zext i1 %2059 to i32 + %2061 = add nsw i32 %2057, %2060 + %2062 = sitofp i32 %2061 to float + %2063 = fneg float %2062 + %2064 = fmul float %2054, %2063 + %2065 = fmul float %2064, 0.000000e+00 + %2066 = bitcast i32 %32 to float + %2067 = fadd float %2066, %2065 + %2068 = fmul float %2045, %2067 + %2069 = fadd float %2023, %2068 + %2070 = call float @llvm.sqrt.f32(float %2069) + %2071 = fadd float %2070, 0.000000e+00 + %2072 = fdiv float %1979, %2071 + %2073 = fmul float %2072, 2.000000e+00 + %2074 = bitcast i32 %5 to float + %2075 = bitcast i32 %5 to float + %2076 = fmul float %2074, %2075 + %2077 = fadd float %2076, 0.000000e+00 + %2078 = bitcast i32 %32 to float + %2079 = bitcast i32 %32 to float + %2080 = fmul float %2078, %2079 + %2081 = fadd float %2077, %2080 + %2082 = call float @llvm.sqrt.f32(float %2081) + %2083 = bitcast i32 %5 to float + %2084 = fcmp olt float %2083, 0.000000e+00 + %2085 = sext i1 %2084 to i32 + %2086 = bitcast i32 %5 to float + %2087 = fcmp ogt float %2086, 0.000000e+00 + %2088 = zext i1 %2087 to i32 + %2089 = add nsw i32 %2085, %2088 + %2090 = sitofp i32 %2089 to float + %2091 = fneg float %2090 + %2092 = fmul float %2082, %2091 + %2093 = bitcast i32 %5 to float + %2094 = fadd float %2093, %2092 + %2095 = bitcast i32 %5 to float + %2096 = bitcast i32 %5 to float + %2097 = fmul float %2095, %2096 + %2098 = fadd float %2097, 0.000000e+00 + %2099 = bitcast i32 %32 to float + %2100 = bitcast i32 %32 to float + %2101 = fmul float %2099, %2100 + %2102 = fadd float %2098, %2101 + %2103 = call float @llvm.sqrt.f32(float %2102) + %2104 = bitcast i32 %5 to float + %2105 = fcmp olt float %2104, 0.000000e+00 + %2106 = sext i1 %2105 to i32 + %2107 = bitcast i32 %5 to float + %2108 = fcmp ogt float %2107, 0.000000e+00 + %2109 = zext i1 %2108 to i32 + %2110 = add nsw i32 %2106, %2109 + %2111 = sitofp i32 %2110 to float + %2112 = fneg float %2111 + %2113 = fmul float %2103, %2112 + %2114 = bitcast i32 %5 to float + %2115 = fadd float %2114, %2113 + %2116 = bitcast i32 %5 to float + %2117 = bitcast i32 %5 to float + %2118 = fmul float %2116, %2117 + %2119 = fadd float %2118, 0.000000e+00 + %2120 = bitcast i32 %32 to float + %2121 = bitcast i32 %32 to float + %2122 = fmul float %2120, %2121 + %2123 = fadd float %2119, %2122 + %2124 = call float @llvm.sqrt.f32(float %2123) + %2125 = bitcast i32 %5 to float + %2126 = fcmp olt float %2125, 0.000000e+00 + %2127 = sext i1 %2126 to i32 + %2128 = bitcast i32 %5 to float + %2129 = fcmp ogt float %2128, 0.000000e+00 + %2130 = zext i1 %2129 to i32 + %2131 = add nsw i32 %2127, %2130 + %2132 = sitofp i32 %2131 to float + %2133 = fneg float %2132 + %2134 = fmul float %2124, %2133 + %2135 = bitcast i32 %5 to float + %2136 = fadd float %2135, %2134 + %2137 = fmul float %2115, %2136 + %2138 = fadd float %2137, 0.000000e+00 + %2139 = bitcast i32 %5 to float + %2140 = bitcast i32 %5 to float + %2141 = fmul float %2139, %2140 + %2142 = fadd float %2141, 0.000000e+00 + %2143 = bitcast i32 %32 to float + %2144 = bitcast i32 %32 to float + %2145 = fmul float %2143, %2144 + %2146 = fadd float %2142, %2145 + %2147 = call float @llvm.sqrt.f32(float %2146) + %2148 = bitcast i32 %5 to float + %2149 = fcmp olt float %2148, 0.000000e+00 + %2150 = sext i1 %2149 to i32 + %2151 = bitcast i32 %5 to float + %2152 = fcmp ogt float %2151, 0.000000e+00 + %2153 = zext i1 %2152 to i32 + %2154 = add nsw i32 %2150, %2153 + %2155 = sitofp i32 %2154 to float + %2156 = fneg float %2155 + %2157 = fmul float %2147, %2156 + %2158 = fmul float %2157, 0.000000e+00 + %2159 = bitcast i32 %32 to float + %2160 = fadd float %2159, %2158 + %2161 = bitcast i32 %5 to float + %2162 = bitcast i32 %5 to float + %2163 = fmul float %2161, %2162 + %2164 = fadd float %2163, 0.000000e+00 + %2165 = bitcast i32 %32 to float + %2166 = bitcast i32 %32 to float + %2167 = fmul float %2165, %2166 + %2168 = fadd float %2164, %2167 + %2169 = call float @llvm.sqrt.f32(float %2168) + %2170 = bitcast i32 %5 to float + %2171 = fcmp olt float %2170, 0.000000e+00 + %2172 = sext i1 %2171 to i32 + %2173 = bitcast i32 %5 to float + %2174 = fcmp ogt float %2173, 0.000000e+00 + %2175 = zext i1 %2174 to i32 + %2176 = add nsw i32 %2172, %2175 + %2177 = sitofp i32 %2176 to float + %2178 = fneg float %2177 + %2179 = fmul float %2169, %2178 + %2180 = fmul float %2179, 0.000000e+00 + %2181 = bitcast i32 %32 to float + %2182 = fadd float %2181, %2180 + %2183 = fmul float %2160, %2182 + %2184 = fadd float %2138, %2183 + %2185 = call float @llvm.sqrt.f32(float %2184) + %2186 = fadd float %2185, 0.000000e+00 + %2187 = fdiv float %2094, %2186 + %2188 = fmul float %2073, %2187 + %2189 = fsub float 1.000000e+00, %2188 + %2190 = fmul float %2189, %1950 + %2191 = fadd float %2190, 0.000000e+00 + %2192 = bitcast i32 %5 to float + %2193 = bitcast i32 %5 to float + %2194 = fmul float %2192, %2193 + %2195 = fadd float %2194, 0.000000e+00 + %2196 = bitcast i32 %32 to float + %2197 = bitcast i32 %32 to float + %2198 = fmul float %2196, %2197 + %2199 = fadd float %2195, %2198 + %2200 = call float @llvm.sqrt.f32(float %2199) + %2201 = bitcast i32 %5 to float + %2202 = fcmp olt float %2201, 0.000000e+00 + %2203 = sext i1 %2202 to i32 + %2204 = bitcast i32 %5 to float + %2205 = fcmp ogt float %2204, 0.000000e+00 + %2206 = zext i1 %2205 to i32 + %2207 = add nsw i32 %2203, %2206 + %2208 = sitofp i32 %2207 to float + %2209 = fneg float %2208 + %2210 = fmul float %2200, %2209 + %2211 = bitcast i32 %5 to float + %2212 = fadd float %2211, %2210 + %2213 = bitcast i32 %5 to float + %2214 = bitcast i32 %5 to float + %2215 = fmul float %2213, %2214 + %2216 = fadd float %2215, 0.000000e+00 + %2217 = bitcast i32 %32 to float + %2218 = bitcast i32 %32 to float + %2219 = fmul float %2217, %2218 + %2220 = fadd float %2216, %2219 + %2221 = call float @llvm.sqrt.f32(float %2220) + %2222 = bitcast i32 %5 to float + %2223 = fcmp olt float %2222, 0.000000e+00 + %2224 = sext i1 %2223 to i32 + %2225 = bitcast i32 %5 to float + %2226 = fcmp ogt float %2225, 0.000000e+00 + %2227 = zext i1 %2226 to i32 + %2228 = add nsw i32 %2224, %2227 + %2229 = sitofp i32 %2228 to float + %2230 = fneg float %2229 + %2231 = fmul float %2221, %2230 + %2232 = bitcast i32 %5 to float + %2233 = fadd float %2232, %2231 + %2234 = bitcast i32 %5 to float + %2235 = bitcast i32 %5 to float + %2236 = fmul float %2234, %2235 + %2237 = fadd float %2236, 0.000000e+00 + %2238 = bitcast i32 %32 to float + %2239 = bitcast i32 %32 to float + %2240 = fmul float %2238, %2239 + %2241 = fadd float %2237, %2240 + %2242 = call float @llvm.sqrt.f32(float %2241) + %2243 = bitcast i32 %5 to float + %2244 = fcmp olt float %2243, 0.000000e+00 + %2245 = sext i1 %2244 to i32 + %2246 = bitcast i32 %5 to float + %2247 = fcmp ogt float %2246, 0.000000e+00 + %2248 = zext i1 %2247 to i32 + %2249 = add nsw i32 %2245, %2248 + %2250 = sitofp i32 %2249 to float + %2251 = fneg float %2250 + %2252 = fmul float %2242, %2251 + %2253 = bitcast i32 %5 to float + %2254 = fadd float %2253, %2252 + %2255 = fmul float %2233, %2254 + %2256 = fadd float %2255, 0.000000e+00 + %2257 = bitcast i32 %5 to float + %2258 = bitcast i32 %5 to float + %2259 = fmul float %2257, %2258 + %2260 = fadd float %2259, 0.000000e+00 + %2261 = bitcast i32 %32 to float + %2262 = bitcast i32 %32 to float + %2263 = fmul float %2261, %2262 + %2264 = fadd float %2260, %2263 + %2265 = call float @llvm.sqrt.f32(float %2264) + %2266 = bitcast i32 %5 to float + %2267 = fcmp olt float %2266, 0.000000e+00 + %2268 = sext i1 %2267 to i32 + %2269 = bitcast i32 %5 to float + %2270 = fcmp ogt float %2269, 0.000000e+00 + %2271 = zext i1 %2270 to i32 + %2272 = add nsw i32 %2268, %2271 + %2273 = sitofp i32 %2272 to float + %2274 = fneg float %2273 + %2275 = fmul float %2265, %2274 + %2276 = fmul float %2275, 0.000000e+00 + %2277 = bitcast i32 %32 to float + %2278 = fadd float %2277, %2276 + %2279 = bitcast i32 %5 to float + %2280 = bitcast i32 %5 to float + %2281 = fmul float %2279, %2280 + %2282 = fadd float %2281, 0.000000e+00 + %2283 = bitcast i32 %32 to float + %2284 = bitcast i32 %32 to float + %2285 = fmul float %2283, %2284 + %2286 = fadd float %2282, %2285 + %2287 = call float @llvm.sqrt.f32(float %2286) + %2288 = bitcast i32 %5 to float + %2289 = fcmp olt float %2288, 0.000000e+00 + %2290 = sext i1 %2289 to i32 + %2291 = bitcast i32 %5 to float + %2292 = fcmp ogt float %2291, 0.000000e+00 + %2293 = zext i1 %2292 to i32 + %2294 = add nsw i32 %2290, %2293 + %2295 = sitofp i32 %2294 to float + %2296 = fneg float %2295 + %2297 = fmul float %2287, %2296 + %2298 = fmul float %2297, 0.000000e+00 + %2299 = bitcast i32 %32 to float + %2300 = fadd float %2299, %2298 + %2301 = fmul float %2278, %2300 + %2302 = fadd float %2256, %2301 + %2303 = call float @llvm.sqrt.f32(float %2302) + %2304 = fadd float %2303, 0.000000e+00 + %2305 = fdiv float %2212, %2304 + %2306 = fmul float %2305, 2.000000e+00 + %2307 = bitcast i32 %5 to float + %2308 = bitcast i32 %5 to float + %2309 = fmul float %2307, %2308 + %2310 = fadd float %2309, 0.000000e+00 + %2311 = bitcast i32 %32 to float + %2312 = bitcast i32 %32 to float + %2313 = fmul float %2311, %2312 + %2314 = fadd float %2310, %2313 + %2315 = call float @llvm.sqrt.f32(float %2314) + %2316 = bitcast i32 %5 to float + %2317 = fcmp olt float %2316, 0.000000e+00 + %2318 = sext i1 %2317 to i32 + %2319 = bitcast i32 %5 to float + %2320 = fcmp ogt float %2319, 0.000000e+00 + %2321 = zext i1 %2320 to i32 + %2322 = add nsw i32 %2318, %2321 + %2323 = sitofp i32 %2322 to float + %2324 = fneg float %2323 + %2325 = fmul float %2315, %2324 + %2326 = fmul float %2325, 0.000000e+00 + %2327 = bitcast i32 %32 to float + %2328 = fadd float %2327, %2326 + %2329 = bitcast i32 %5 to float + %2330 = bitcast i32 %5 to float + %2331 = fmul float %2329, %2330 + %2332 = fadd float %2331, 0.000000e+00 + %2333 = bitcast i32 %32 to float + %2334 = bitcast i32 %32 to float + %2335 = fmul float %2333, %2334 + %2336 = fadd float %2332, %2335 + %2337 = call float @llvm.sqrt.f32(float %2336) + %2338 = bitcast i32 %5 to float + %2339 = fcmp olt float %2338, 0.000000e+00 + %2340 = sext i1 %2339 to i32 + %2341 = bitcast i32 %5 to float + %2342 = fcmp ogt float %2341, 0.000000e+00 + %2343 = zext i1 %2342 to i32 + %2344 = add nsw i32 %2340, %2343 + %2345 = sitofp i32 %2344 to float + %2346 = fneg float %2345 + %2347 = fmul float %2337, %2346 + %2348 = bitcast i32 %5 to float + %2349 = fadd float %2348, %2347 + %2350 = bitcast i32 %5 to float + %2351 = bitcast i32 %5 to float + %2352 = fmul float %2350, %2351 + %2353 = fadd float %2352, 0.000000e+00 + %2354 = bitcast i32 %32 to float + %2355 = bitcast i32 %32 to float + %2356 = fmul float %2354, %2355 + %2357 = fadd float %2353, %2356 + %2358 = call float @llvm.sqrt.f32(float %2357) + %2359 = bitcast i32 %5 to float + %2360 = fcmp olt float %2359, 0.000000e+00 + %2361 = sext i1 %2360 to i32 + %2362 = bitcast i32 %5 to float + %2363 = fcmp ogt float %2362, 0.000000e+00 + %2364 = zext i1 %2363 to i32 + %2365 = add nsw i32 %2361, %2364 + %2366 = sitofp i32 %2365 to float + %2367 = fneg float %2366 + %2368 = fmul float %2358, %2367 + %2369 = bitcast i32 %5 to float + %2370 = fadd float %2369, %2368 + %2371 = fmul float %2349, %2370 + %2372 = fadd float %2371, 0.000000e+00 + %2373 = bitcast i32 %5 to float + %2374 = bitcast i32 %5 to float + %2375 = fmul float %2373, %2374 + %2376 = fadd float %2375, 0.000000e+00 + %2377 = bitcast i32 %32 to float + %2378 = bitcast i32 %32 to float + %2379 = fmul float %2377, %2378 + %2380 = fadd float %2376, %2379 + %2381 = call float @llvm.sqrt.f32(float %2380) + %2382 = bitcast i32 %5 to float + %2383 = fcmp olt float %2382, 0.000000e+00 + %2384 = sext i1 %2383 to i32 + %2385 = bitcast i32 %5 to float + %2386 = fcmp ogt float %2385, 0.000000e+00 + %2387 = zext i1 %2386 to i32 + %2388 = add nsw i32 %2384, %2387 + %2389 = sitofp i32 %2388 to float + %2390 = fneg float %2389 + %2391 = fmul float %2381, %2390 + %2392 = fmul float %2391, 0.000000e+00 + %2393 = bitcast i32 %32 to float + %2394 = fadd float %2393, %2392 + %2395 = bitcast i32 %5 to float + %2396 = bitcast i32 %5 to float + %2397 = fmul float %2395, %2396 + %2398 = fadd float %2397, 0.000000e+00 + %2399 = bitcast i32 %32 to float + %2400 = bitcast i32 %32 to float + %2401 = fmul float %2399, %2400 + %2402 = fadd float %2398, %2401 + %2403 = call float @llvm.sqrt.f32(float %2402) + %2404 = bitcast i32 %5 to float + %2405 = fcmp olt float %2404, 0.000000e+00 + %2406 = sext i1 %2405 to i32 + %2407 = bitcast i32 %5 to float + %2408 = fcmp ogt float %2407, 0.000000e+00 + %2409 = zext i1 %2408 to i32 + %2410 = add nsw i32 %2406, %2409 + %2411 = sitofp i32 %2410 to float + %2412 = fneg float %2411 + %2413 = fmul float %2403, %2412 + %2414 = fmul float %2413, 0.000000e+00 + %2415 = bitcast i32 %32 to float + %2416 = fadd float %2415, %2414 + %2417 = fmul float %2394, %2416 + %2418 = fadd float %2372, %2417 + %2419 = call float @llvm.sqrt.f32(float %2418) + %2420 = fadd float %2419, 0.000000e+00 + %2421 = fdiv float %2328, %2420 + %2422 = fmul float %2306, %2421 + %2423 = fneg float %2422 + %2424 = getelementptr float, float* %0, i32 0 + %2425 = getelementptr inbounds float, float* %2424, i64 3 + %2426 = load float, float* %2425, align 4 + %2427 = fmul float %2423, %2426 + %2428 = fadd float %2191, %2427 + %2429 = insertelement <4 x float> zeroinitializer, float %2428, i32 0 + %2430 = insertelement <4 x float> %2429, float 0.000000e+00, i32 1 + %2431 = insertelement <4 x float> %2430, float 0.000000e+00, i32 2 + %2432 = insertelement <4 x float> %2431, float 0.000000e+00, i32 3 + %2433 = extractelement <4 x float> %2432, i32 0 + %2434 = getelementptr float, float* %2, i32 0 + %2435 = getelementptr inbounds float, float* %2434, i64 1 + store float %2433, float* %2435, align 4 + %2436 = extractelement <4 x float> %2432, i32 1 + %2437 = getelementptr float, float* %2, i32 0 + %2438 = getelementptr inbounds float, float* %2437, i64 2 + store float %2436, float* %2438, align 4 + %2439 = bitcast i32 %5 to float + %2440 = bitcast i32 %5 to float + %2441 = fmul float %2439, %2440 + %2442 = fadd float %2441, 0.000000e+00 + %2443 = bitcast i32 %32 to float + %2444 = bitcast i32 %32 to float + %2445 = fmul float %2443, %2444 + %2446 = fadd float %2442, %2445 + %2447 = call float @llvm.sqrt.f32(float %2446) + %2448 = bitcast i32 %5 to float + %2449 = fcmp olt float %2448, 0.000000e+00 + %2450 = sext i1 %2449 to i32 + %2451 = bitcast i32 %5 to float + %2452 = fcmp ogt float %2451, 0.000000e+00 + %2453 = zext i1 %2452 to i32 + %2454 = add nsw i32 %2450, %2453 + %2455 = sitofp i32 %2454 to float + %2456 = fneg float %2455 + %2457 = fmul float %2447, %2456 + %2458 = fmul float %2457, 0.000000e+00 + %2459 = bitcast i32 %32 to float + %2460 = fadd float %2459, %2458 + %2461 = bitcast i32 %5 to float + %2462 = bitcast i32 %5 to float + %2463 = fmul float %2461, %2462 + %2464 = fadd float %2463, 0.000000e+00 + %2465 = bitcast i32 %32 to float + %2466 = bitcast i32 %32 to float + %2467 = fmul float %2465, %2466 + %2468 = fadd float %2464, %2467 + %2469 = call float @llvm.sqrt.f32(float %2468) + %2470 = bitcast i32 %5 to float + %2471 = fcmp olt float %2470, 0.000000e+00 + %2472 = sext i1 %2471 to i32 + %2473 = bitcast i32 %5 to float + %2474 = fcmp ogt float %2473, 0.000000e+00 + %2475 = zext i1 %2474 to i32 + %2476 = add nsw i32 %2472, %2475 + %2477 = sitofp i32 %2476 to float + %2478 = fneg float %2477 + %2479 = fmul float %2469, %2478 + %2480 = bitcast i32 %5 to float + %2481 = fadd float %2480, %2479 + %2482 = bitcast i32 %5 to float + %2483 = bitcast i32 %5 to float + %2484 = fmul float %2482, %2483 + %2485 = fadd float %2484, 0.000000e+00 + %2486 = bitcast i32 %32 to float + %2487 = bitcast i32 %32 to float + %2488 = fmul float %2486, %2487 + %2489 = fadd float %2485, %2488 + %2490 = call float @llvm.sqrt.f32(float %2489) + %2491 = bitcast i32 %5 to float + %2492 = fcmp olt float %2491, 0.000000e+00 + %2493 = sext i1 %2492 to i32 + %2494 = bitcast i32 %5 to float + %2495 = fcmp ogt float %2494, 0.000000e+00 + %2496 = zext i1 %2495 to i32 + %2497 = add nsw i32 %2493, %2496 + %2498 = sitofp i32 %2497 to float + %2499 = fneg float %2498 + %2500 = fmul float %2490, %2499 + %2501 = bitcast i32 %5 to float + %2502 = fadd float %2501, %2500 + %2503 = fmul float %2481, %2502 + %2504 = fadd float %2503, 0.000000e+00 + %2505 = bitcast i32 %5 to float + %2506 = bitcast i32 %5 to float + %2507 = fmul float %2505, %2506 + %2508 = fadd float %2507, 0.000000e+00 + %2509 = bitcast i32 %32 to float + %2510 = bitcast i32 %32 to float + %2511 = fmul float %2509, %2510 + %2512 = fadd float %2508, %2511 + %2513 = call float @llvm.sqrt.f32(float %2512) + %2514 = bitcast i32 %5 to float + %2515 = fcmp olt float %2514, 0.000000e+00 + %2516 = sext i1 %2515 to i32 + %2517 = bitcast i32 %5 to float + %2518 = fcmp ogt float %2517, 0.000000e+00 + %2519 = zext i1 %2518 to i32 + %2520 = add nsw i32 %2516, %2519 + %2521 = sitofp i32 %2520 to float + %2522 = fneg float %2521 + %2523 = fmul float %2513, %2522 + %2524 = fmul float %2523, 0.000000e+00 + %2525 = bitcast i32 %32 to float + %2526 = fadd float %2525, %2524 + %2527 = bitcast i32 %5 to float + %2528 = bitcast i32 %5 to float + %2529 = fmul float %2527, %2528 + %2530 = fadd float %2529, 0.000000e+00 + %2531 = bitcast i32 %32 to float + %2532 = bitcast i32 %32 to float + %2533 = fmul float %2531, %2532 + %2534 = fadd float %2530, %2533 + %2535 = call float @llvm.sqrt.f32(float %2534) + %2536 = bitcast i32 %5 to float + %2537 = fcmp olt float %2536, 0.000000e+00 + %2538 = sext i1 %2537 to i32 + %2539 = bitcast i32 %5 to float + %2540 = fcmp ogt float %2539, 0.000000e+00 + %2541 = zext i1 %2540 to i32 + %2542 = add nsw i32 %2538, %2541 + %2543 = sitofp i32 %2542 to float + %2544 = fneg float %2543 + %2545 = fmul float %2535, %2544 + %2546 = fmul float %2545, 0.000000e+00 + %2547 = bitcast i32 %32 to float + %2548 = fadd float %2547, %2546 + %2549 = fmul float %2526, %2548 + %2550 = fadd float %2504, %2549 + %2551 = call float @llvm.sqrt.f32(float %2550) + %2552 = fadd float %2551, 0.000000e+00 + %2553 = fdiv float %2460, %2552 + %2554 = fmul float %2553, 2.000000e+00 + %2555 = bitcast i32 %5 to float + %2556 = bitcast i32 %5 to float + %2557 = fmul float %2555, %2556 + %2558 = fadd float %2557, 0.000000e+00 + %2559 = bitcast i32 %32 to float + %2560 = bitcast i32 %32 to float + %2561 = fmul float %2559, %2560 + %2562 = fadd float %2558, %2561 + %2563 = call float @llvm.sqrt.f32(float %2562) + %2564 = bitcast i32 %5 to float + %2565 = fcmp olt float %2564, 0.000000e+00 + %2566 = sext i1 %2565 to i32 + %2567 = bitcast i32 %5 to float + %2568 = fcmp ogt float %2567, 0.000000e+00 + %2569 = zext i1 %2568 to i32 + %2570 = add nsw i32 %2566, %2569 + %2571 = sitofp i32 %2570 to float + %2572 = fneg float %2571 + %2573 = fmul float %2563, %2572 + %2574 = bitcast i32 %5 to float + %2575 = fadd float %2574, %2573 + %2576 = bitcast i32 %5 to float + %2577 = bitcast i32 %5 to float + %2578 = fmul float %2576, %2577 + %2579 = fadd float %2578, 0.000000e+00 + %2580 = bitcast i32 %32 to float + %2581 = bitcast i32 %32 to float + %2582 = fmul float %2580, %2581 + %2583 = fadd float %2579, %2582 + %2584 = call float @llvm.sqrt.f32(float %2583) + %2585 = bitcast i32 %5 to float + %2586 = fcmp olt float %2585, 0.000000e+00 + %2587 = sext i1 %2586 to i32 + %2588 = bitcast i32 %5 to float + %2589 = fcmp ogt float %2588, 0.000000e+00 + %2590 = zext i1 %2589 to i32 + %2591 = add nsw i32 %2587, %2590 + %2592 = sitofp i32 %2591 to float + %2593 = fneg float %2592 + %2594 = fmul float %2584, %2593 + %2595 = bitcast i32 %5 to float + %2596 = fadd float %2595, %2594 + %2597 = bitcast i32 %5 to float + %2598 = bitcast i32 %5 to float + %2599 = fmul float %2597, %2598 + %2600 = fadd float %2599, 0.000000e+00 + %2601 = bitcast i32 %32 to float + %2602 = bitcast i32 %32 to float + %2603 = fmul float %2601, %2602 + %2604 = fadd float %2600, %2603 + %2605 = call float @llvm.sqrt.f32(float %2604) + %2606 = bitcast i32 %5 to float + %2607 = fcmp olt float %2606, 0.000000e+00 + %2608 = sext i1 %2607 to i32 + %2609 = bitcast i32 %5 to float + %2610 = fcmp ogt float %2609, 0.000000e+00 + %2611 = zext i1 %2610 to i32 + %2612 = add nsw i32 %2608, %2611 + %2613 = sitofp i32 %2612 to float + %2614 = fneg float %2613 + %2615 = fmul float %2605, %2614 + %2616 = bitcast i32 %5 to float + %2617 = fadd float %2616, %2615 + %2618 = fmul float %2596, %2617 + %2619 = fadd float %2618, 0.000000e+00 + %2620 = bitcast i32 %5 to float + %2621 = bitcast i32 %5 to float + %2622 = fmul float %2620, %2621 + %2623 = fadd float %2622, 0.000000e+00 + %2624 = bitcast i32 %32 to float + %2625 = bitcast i32 %32 to float + %2626 = fmul float %2624, %2625 + %2627 = fadd float %2623, %2626 + %2628 = call float @llvm.sqrt.f32(float %2627) + %2629 = bitcast i32 %5 to float + %2630 = fcmp olt float %2629, 0.000000e+00 + %2631 = sext i1 %2630 to i32 + %2632 = bitcast i32 %5 to float + %2633 = fcmp ogt float %2632, 0.000000e+00 + %2634 = zext i1 %2633 to i32 + %2635 = add nsw i32 %2631, %2634 + %2636 = sitofp i32 %2635 to float + %2637 = fneg float %2636 + %2638 = fmul float %2628, %2637 + %2639 = fmul float %2638, 0.000000e+00 + %2640 = bitcast i32 %32 to float + %2641 = fadd float %2640, %2639 + %2642 = bitcast i32 %5 to float + %2643 = bitcast i32 %5 to float + %2644 = fmul float %2642, %2643 + %2645 = fadd float %2644, 0.000000e+00 + %2646 = bitcast i32 %32 to float + %2647 = bitcast i32 %32 to float + %2648 = fmul float %2646, %2647 + %2649 = fadd float %2645, %2648 + %2650 = call float @llvm.sqrt.f32(float %2649) + %2651 = bitcast i32 %5 to float + %2652 = fcmp olt float %2651, 0.000000e+00 + %2653 = sext i1 %2652 to i32 + %2654 = bitcast i32 %5 to float + %2655 = fcmp ogt float %2654, 0.000000e+00 + %2656 = zext i1 %2655 to i32 + %2657 = add nsw i32 %2653, %2656 + %2658 = sitofp i32 %2657 to float + %2659 = fneg float %2658 + %2660 = fmul float %2650, %2659 + %2661 = fmul float %2660, 0.000000e+00 + %2662 = bitcast i32 %32 to float + %2663 = fadd float %2662, %2661 + %2664 = fmul float %2641, %2663 + %2665 = fadd float %2619, %2664 + %2666 = call float @llvm.sqrt.f32(float %2665) + %2667 = fadd float %2666, 0.000000e+00 + %2668 = fdiv float %2575, %2667 + %2669 = fmul float %2554, %2668 + %2670 = fneg float %2669 + %2671 = insertelement <4 x float> zeroinitializer, float %2670, i32 0 + %2672 = insertelement <4 x float> %2671, float 0.000000e+00, i32 1 + %2673 = insertelement <4 x float> %2672, float 0.000000e+00, i32 2 + %2674 = insertelement <4 x float> %2673, float 0.000000e+00, i32 3 + %2675 = getelementptr float, float* %0, i32 0 + %2676 = load float, float* %2675, align 4 + %2677 = insertelement <4 x float> zeroinitializer, float %2676, i32 0 + %2678 = insertelement <4 x float> %2677, float 0.000000e+00, i32 1 + %2679 = insertelement <4 x float> %2678, float 0.000000e+00, i32 2 + %2680 = insertelement <4 x float> %2679, float 0.000000e+00, i32 3 + %2681 = call <4 x float> @llvm.fma.v4f32(<4 x float> %2674, <4 x float> %2680, <4 x float> zeroinitializer) + %2682 = extractelement <4 x float> %2681, i32 0 + %2683 = getelementptr float, float* %2, i32 0 + %2684 = getelementptr inbounds float, float* %2683, i64 2 + store float %2682, float* %2684, align 4 + %2685 = bitcast i32 %5 to float + %2686 = bitcast i32 %5 to float + %2687 = fmul float %2685, %2686 + %2688 = fadd float %2687, 0.000000e+00 + %2689 = bitcast i32 %32 to float + %2690 = bitcast i32 %32 to float + %2691 = fmul float %2689, %2690 + %2692 = fadd float %2688, %2691 + %2693 = call float @llvm.sqrt.f32(float %2692) + %2694 = bitcast i32 %5 to float + %2695 = fcmp olt float %2694, 0.000000e+00 + %2696 = sext i1 %2695 to i32 + %2697 = bitcast i32 %5 to float + %2698 = fcmp ogt float %2697, 0.000000e+00 + %2699 = zext i1 %2698 to i32 + %2700 = add nsw i32 %2696, %2699 + %2701 = sitofp i32 %2700 to float + %2702 = fneg float %2701 + %2703 = fmul float %2693, %2702 + %2704 = fmul float %2703, 0.000000e+00 + %2705 = bitcast i32 %32 to float + %2706 = fadd float %2705, %2704 + %2707 = bitcast i32 %5 to float + %2708 = bitcast i32 %5 to float + %2709 = fmul float %2707, %2708 + %2710 = fadd float %2709, 0.000000e+00 + %2711 = bitcast i32 %32 to float + %2712 = bitcast i32 %32 to float + %2713 = fmul float %2711, %2712 + %2714 = fadd float %2710, %2713 + %2715 = call float @llvm.sqrt.f32(float %2714) + %2716 = bitcast i32 %5 to float + %2717 = fcmp olt float %2716, 0.000000e+00 + %2718 = sext i1 %2717 to i32 + %2719 = bitcast i32 %5 to float + %2720 = fcmp ogt float %2719, 0.000000e+00 + %2721 = zext i1 %2720 to i32 + %2722 = add nsw i32 %2718, %2721 + %2723 = sitofp i32 %2722 to float + %2724 = fneg float %2723 + %2725 = fmul float %2715, %2724 + %2726 = bitcast i32 %5 to float + %2727 = fadd float %2726, %2725 + %2728 = bitcast i32 %5 to float + %2729 = bitcast i32 %5 to float + %2730 = fmul float %2728, %2729 + %2731 = fadd float %2730, 0.000000e+00 + %2732 = bitcast i32 %32 to float + %2733 = bitcast i32 %32 to float + %2734 = fmul float %2732, %2733 + %2735 = fadd float %2731, %2734 + %2736 = call float @llvm.sqrt.f32(float %2735) + %2737 = bitcast i32 %5 to float + %2738 = fcmp olt float %2737, 0.000000e+00 + %2739 = sext i1 %2738 to i32 + %2740 = bitcast i32 %5 to float + %2741 = fcmp ogt float %2740, 0.000000e+00 + %2742 = zext i1 %2741 to i32 + %2743 = add nsw i32 %2739, %2742 + %2744 = sitofp i32 %2743 to float + %2745 = fneg float %2744 + %2746 = fmul float %2736, %2745 + %2747 = bitcast i32 %5 to float + %2748 = fadd float %2747, %2746 + %2749 = fmul float %2727, %2748 + %2750 = fadd float %2749, 0.000000e+00 + %2751 = bitcast i32 %5 to float + %2752 = bitcast i32 %5 to float + %2753 = fmul float %2751, %2752 + %2754 = fadd float %2753, 0.000000e+00 + %2755 = bitcast i32 %32 to float + %2756 = bitcast i32 %32 to float + %2757 = fmul float %2755, %2756 + %2758 = fadd float %2754, %2757 + %2759 = call float @llvm.sqrt.f32(float %2758) + %2760 = bitcast i32 %5 to float + %2761 = fcmp olt float %2760, 0.000000e+00 + %2762 = sext i1 %2761 to i32 + %2763 = bitcast i32 %5 to float + %2764 = fcmp ogt float %2763, 0.000000e+00 + %2765 = zext i1 %2764 to i32 + %2766 = add nsw i32 %2762, %2765 + %2767 = sitofp i32 %2766 to float + %2768 = fneg float %2767 + %2769 = fmul float %2759, %2768 + %2770 = fmul float %2769, 0.000000e+00 + %2771 = bitcast i32 %32 to float + %2772 = fadd float %2771, %2770 + %2773 = bitcast i32 %5 to float + %2774 = bitcast i32 %5 to float + %2775 = fmul float %2773, %2774 + %2776 = fadd float %2775, 0.000000e+00 + %2777 = bitcast i32 %32 to float + %2778 = bitcast i32 %32 to float + %2779 = fmul float %2777, %2778 + %2780 = fadd float %2776, %2779 + %2781 = call float @llvm.sqrt.f32(float %2780) + %2782 = bitcast i32 %5 to float + %2783 = fcmp olt float %2782, 0.000000e+00 + %2784 = sext i1 %2783 to i32 + %2785 = bitcast i32 %5 to float + %2786 = fcmp ogt float %2785, 0.000000e+00 + %2787 = zext i1 %2786 to i32 + %2788 = add nsw i32 %2784, %2787 + %2789 = sitofp i32 %2788 to float + %2790 = fneg float %2789 + %2791 = fmul float %2781, %2790 + %2792 = fmul float %2791, 0.000000e+00 + %2793 = bitcast i32 %32 to float + %2794 = fadd float %2793, %2792 + %2795 = fmul float %2772, %2794 + %2796 = fadd float %2750, %2795 + %2797 = call float @llvm.sqrt.f32(float %2796) + %2798 = fadd float %2797, 0.000000e+00 + %2799 = fdiv float %2706, %2798 + %2800 = fmul float %2799, 2.000000e+00 + %2801 = bitcast i32 %5 to float + %2802 = bitcast i32 %5 to float + %2803 = fmul float %2801, %2802 + %2804 = fadd float %2803, 0.000000e+00 + %2805 = bitcast i32 %32 to float + %2806 = bitcast i32 %32 to float + %2807 = fmul float %2805, %2806 + %2808 = fadd float %2804, %2807 + %2809 = call float @llvm.sqrt.f32(float %2808) + %2810 = bitcast i32 %5 to float + %2811 = fcmp olt float %2810, 0.000000e+00 + %2812 = sext i1 %2811 to i32 + %2813 = bitcast i32 %5 to float + %2814 = fcmp ogt float %2813, 0.000000e+00 + %2815 = zext i1 %2814 to i32 + %2816 = add nsw i32 %2812, %2815 + %2817 = sitofp i32 %2816 to float + %2818 = fneg float %2817 + %2819 = fmul float %2809, %2818 + %2820 = bitcast i32 %5 to float + %2821 = fadd float %2820, %2819 + %2822 = bitcast i32 %5 to float + %2823 = bitcast i32 %5 to float + %2824 = fmul float %2822, %2823 + %2825 = fadd float %2824, 0.000000e+00 + %2826 = bitcast i32 %32 to float + %2827 = bitcast i32 %32 to float + %2828 = fmul float %2826, %2827 + %2829 = fadd float %2825, %2828 + %2830 = call float @llvm.sqrt.f32(float %2829) + %2831 = bitcast i32 %5 to float + %2832 = fcmp olt float %2831, 0.000000e+00 + %2833 = sext i1 %2832 to i32 + %2834 = bitcast i32 %5 to float + %2835 = fcmp ogt float %2834, 0.000000e+00 + %2836 = zext i1 %2835 to i32 + %2837 = add nsw i32 %2833, %2836 + %2838 = sitofp i32 %2837 to float + %2839 = fneg float %2838 + %2840 = fmul float %2830, %2839 + %2841 = bitcast i32 %5 to float + %2842 = fadd float %2841, %2840 + %2843 = bitcast i32 %5 to float + %2844 = bitcast i32 %5 to float + %2845 = fmul float %2843, %2844 + %2846 = fadd float %2845, 0.000000e+00 + %2847 = bitcast i32 %32 to float + %2848 = bitcast i32 %32 to float + %2849 = fmul float %2847, %2848 + %2850 = fadd float %2846, %2849 + %2851 = call float @llvm.sqrt.f32(float %2850) + %2852 = bitcast i32 %5 to float + %2853 = fcmp olt float %2852, 0.000000e+00 + %2854 = sext i1 %2853 to i32 + %2855 = bitcast i32 %5 to float + %2856 = fcmp ogt float %2855, 0.000000e+00 + %2857 = zext i1 %2856 to i32 + %2858 = add nsw i32 %2854, %2857 + %2859 = sitofp i32 %2858 to float + %2860 = fneg float %2859 + %2861 = fmul float %2851, %2860 + %2862 = bitcast i32 %5 to float + %2863 = fadd float %2862, %2861 + %2864 = fmul float %2842, %2863 + %2865 = fadd float %2864, 0.000000e+00 + %2866 = bitcast i32 %5 to float + %2867 = bitcast i32 %5 to float + %2868 = fmul float %2866, %2867 + %2869 = fadd float %2868, 0.000000e+00 + %2870 = bitcast i32 %32 to float + %2871 = bitcast i32 %32 to float + %2872 = fmul float %2870, %2871 + %2873 = fadd float %2869, %2872 + %2874 = call float @llvm.sqrt.f32(float %2873) + %2875 = bitcast i32 %5 to float + %2876 = fcmp olt float %2875, 0.000000e+00 + %2877 = sext i1 %2876 to i32 + %2878 = bitcast i32 %5 to float + %2879 = fcmp ogt float %2878, 0.000000e+00 + %2880 = zext i1 %2879 to i32 + %2881 = add nsw i32 %2877, %2880 + %2882 = sitofp i32 %2881 to float + %2883 = fneg float %2882 + %2884 = fmul float %2874, %2883 + %2885 = fmul float %2884, 0.000000e+00 + %2886 = bitcast i32 %32 to float + %2887 = fadd float %2886, %2885 + %2888 = bitcast i32 %5 to float + %2889 = bitcast i32 %5 to float + %2890 = fmul float %2888, %2889 + %2891 = fadd float %2890, 0.000000e+00 + %2892 = bitcast i32 %32 to float + %2893 = bitcast i32 %32 to float + %2894 = fmul float %2892, %2893 + %2895 = fadd float %2891, %2894 + %2896 = call float @llvm.sqrt.f32(float %2895) + %2897 = bitcast i32 %5 to float + %2898 = fcmp olt float %2897, 0.000000e+00 + %2899 = sext i1 %2898 to i32 + %2900 = bitcast i32 %5 to float + %2901 = fcmp ogt float %2900, 0.000000e+00 + %2902 = zext i1 %2901 to i32 + %2903 = add nsw i32 %2899, %2902 + %2904 = sitofp i32 %2903 to float + %2905 = fneg float %2904 + %2906 = fmul float %2896, %2905 + %2907 = fmul float %2906, 0.000000e+00 + %2908 = bitcast i32 %32 to float + %2909 = fadd float %2908, %2907 + %2910 = fmul float %2887, %2909 + %2911 = fadd float %2865, %2910 + %2912 = call float @llvm.sqrt.f32(float %2911) + %2913 = fadd float %2912, 0.000000e+00 + %2914 = fdiv float %2821, %2913 + %2915 = fmul float %2800, %2914 + %2916 = fneg float %2915 + %2917 = fmul float %2916, %2676 + %2918 = fadd float %2917, 0.000000e+00 + %2919 = bitcast i32 %5 to float + %2920 = bitcast i32 %5 to float + %2921 = fmul float %2919, %2920 + %2922 = fadd float %2921, 0.000000e+00 + %2923 = bitcast i32 %32 to float + %2924 = bitcast i32 %32 to float + %2925 = fmul float %2923, %2924 + %2926 = fadd float %2922, %2925 + %2927 = call float @llvm.sqrt.f32(float %2926) + %2928 = bitcast i32 %5 to float + %2929 = fcmp olt float %2928, 0.000000e+00 + %2930 = sext i1 %2929 to i32 + %2931 = bitcast i32 %5 to float + %2932 = fcmp ogt float %2931, 0.000000e+00 + %2933 = zext i1 %2932 to i32 + %2934 = add nsw i32 %2930, %2933 + %2935 = sitofp i32 %2934 to float + %2936 = fneg float %2935 + %2937 = fmul float %2927, %2936 + %2938 = fmul float %2937, 0.000000e+00 + %2939 = bitcast i32 %32 to float + %2940 = fadd float %2939, %2938 + %2941 = bitcast i32 %5 to float + %2942 = bitcast i32 %5 to float + %2943 = fmul float %2941, %2942 + %2944 = fadd float %2943, 0.000000e+00 + %2945 = bitcast i32 %32 to float + %2946 = bitcast i32 %32 to float + %2947 = fmul float %2945, %2946 + %2948 = fadd float %2944, %2947 + %2949 = call float @llvm.sqrt.f32(float %2948) + %2950 = bitcast i32 %5 to float + %2951 = fcmp olt float %2950, 0.000000e+00 + %2952 = sext i1 %2951 to i32 + %2953 = bitcast i32 %5 to float + %2954 = fcmp ogt float %2953, 0.000000e+00 + %2955 = zext i1 %2954 to i32 + %2956 = add nsw i32 %2952, %2955 + %2957 = sitofp i32 %2956 to float + %2958 = fneg float %2957 + %2959 = fmul float %2949, %2958 + %2960 = bitcast i32 %5 to float + %2961 = fadd float %2960, %2959 + %2962 = bitcast i32 %5 to float + %2963 = bitcast i32 %5 to float + %2964 = fmul float %2962, %2963 + %2965 = fadd float %2964, 0.000000e+00 + %2966 = bitcast i32 %32 to float + %2967 = bitcast i32 %32 to float + %2968 = fmul float %2966, %2967 + %2969 = fadd float %2965, %2968 + %2970 = call float @llvm.sqrt.f32(float %2969) + %2971 = bitcast i32 %5 to float + %2972 = fcmp olt float %2971, 0.000000e+00 + %2973 = sext i1 %2972 to i32 + %2974 = bitcast i32 %5 to float + %2975 = fcmp ogt float %2974, 0.000000e+00 + %2976 = zext i1 %2975 to i32 + %2977 = add nsw i32 %2973, %2976 + %2978 = sitofp i32 %2977 to float + %2979 = fneg float %2978 + %2980 = fmul float %2970, %2979 + %2981 = bitcast i32 %5 to float + %2982 = fadd float %2981, %2980 + %2983 = fmul float %2961, %2982 + %2984 = fadd float %2983, 0.000000e+00 + %2985 = bitcast i32 %5 to float + %2986 = bitcast i32 %5 to float + %2987 = fmul float %2985, %2986 + %2988 = fadd float %2987, 0.000000e+00 + %2989 = bitcast i32 %32 to float + %2990 = bitcast i32 %32 to float + %2991 = fmul float %2989, %2990 + %2992 = fadd float %2988, %2991 + %2993 = call float @llvm.sqrt.f32(float %2992) + %2994 = bitcast i32 %5 to float + %2995 = fcmp olt float %2994, 0.000000e+00 + %2996 = sext i1 %2995 to i32 + %2997 = bitcast i32 %5 to float + %2998 = fcmp ogt float %2997, 0.000000e+00 + %2999 = zext i1 %2998 to i32 + %3000 = add nsw i32 %2996, %2999 + %3001 = sitofp i32 %3000 to float + %3002 = fneg float %3001 + %3003 = fmul float %2993, %3002 + %3004 = fmul float %3003, 0.000000e+00 + %3005 = bitcast i32 %32 to float + %3006 = fadd float %3005, %3004 + %3007 = bitcast i32 %5 to float + %3008 = bitcast i32 %5 to float + %3009 = fmul float %3007, %3008 + %3010 = fadd float %3009, 0.000000e+00 + %3011 = bitcast i32 %32 to float + %3012 = bitcast i32 %32 to float + %3013 = fmul float %3011, %3012 + %3014 = fadd float %3010, %3013 + %3015 = call float @llvm.sqrt.f32(float %3014) + %3016 = bitcast i32 %5 to float + %3017 = fcmp olt float %3016, 0.000000e+00 + %3018 = sext i1 %3017 to i32 + %3019 = bitcast i32 %5 to float + %3020 = fcmp ogt float %3019, 0.000000e+00 + %3021 = zext i1 %3020 to i32 + %3022 = add nsw i32 %3018, %3021 + %3023 = sitofp i32 %3022 to float + %3024 = fneg float %3023 + %3025 = fmul float %3015, %3024 + %3026 = fmul float %3025, 0.000000e+00 + %3027 = bitcast i32 %32 to float + %3028 = fadd float %3027, %3026 + %3029 = fmul float %3006, %3028 + %3030 = fadd float %2984, %3029 + %3031 = call float @llvm.sqrt.f32(float %3030) + %3032 = fadd float %3031, 0.000000e+00 + %3033 = fdiv float %2940, %3032 + %3034 = fmul float %3033, 2.000000e+00 + %3035 = bitcast i32 %5 to float + %3036 = bitcast i32 %5 to float + %3037 = fmul float %3035, %3036 + %3038 = fadd float %3037, 0.000000e+00 + %3039 = bitcast i32 %32 to float + %3040 = bitcast i32 %32 to float + %3041 = fmul float %3039, %3040 + %3042 = fadd float %3038, %3041 + %3043 = call float @llvm.sqrt.f32(float %3042) + %3044 = bitcast i32 %5 to float + %3045 = fcmp olt float %3044, 0.000000e+00 + %3046 = sext i1 %3045 to i32 + %3047 = bitcast i32 %5 to float + %3048 = fcmp ogt float %3047, 0.000000e+00 + %3049 = zext i1 %3048 to i32 + %3050 = add nsw i32 %3046, %3049 + %3051 = sitofp i32 %3050 to float + %3052 = fneg float %3051 + %3053 = fmul float %3043, %3052 + %3054 = fmul float %3053, 0.000000e+00 + %3055 = bitcast i32 %32 to float + %3056 = fadd float %3055, %3054 + %3057 = bitcast i32 %5 to float + %3058 = bitcast i32 %5 to float + %3059 = fmul float %3057, %3058 + %3060 = fadd float %3059, 0.000000e+00 + %3061 = bitcast i32 %32 to float + %3062 = bitcast i32 %32 to float + %3063 = fmul float %3061, %3062 + %3064 = fadd float %3060, %3063 + %3065 = call float @llvm.sqrt.f32(float %3064) + %3066 = bitcast i32 %5 to float + %3067 = fcmp olt float %3066, 0.000000e+00 + %3068 = sext i1 %3067 to i32 + %3069 = bitcast i32 %5 to float + %3070 = fcmp ogt float %3069, 0.000000e+00 + %3071 = zext i1 %3070 to i32 + %3072 = add nsw i32 %3068, %3071 + %3073 = sitofp i32 %3072 to float + %3074 = fneg float %3073 + %3075 = fmul float %3065, %3074 + %3076 = bitcast i32 %5 to float + %3077 = fadd float %3076, %3075 + %3078 = bitcast i32 %5 to float + %3079 = bitcast i32 %5 to float + %3080 = fmul float %3078, %3079 + %3081 = fadd float %3080, 0.000000e+00 + %3082 = bitcast i32 %32 to float + %3083 = bitcast i32 %32 to float + %3084 = fmul float %3082, %3083 + %3085 = fadd float %3081, %3084 + %3086 = call float @llvm.sqrt.f32(float %3085) + %3087 = bitcast i32 %5 to float + %3088 = fcmp olt float %3087, 0.000000e+00 + %3089 = sext i1 %3088 to i32 + %3090 = bitcast i32 %5 to float + %3091 = fcmp ogt float %3090, 0.000000e+00 + %3092 = zext i1 %3091 to i32 + %3093 = add nsw i32 %3089, %3092 + %3094 = sitofp i32 %3093 to float + %3095 = fneg float %3094 + %3096 = fmul float %3086, %3095 + %3097 = bitcast i32 %5 to float + %3098 = fadd float %3097, %3096 + %3099 = fmul float %3077, %3098 + %3100 = fadd float %3099, 0.000000e+00 + %3101 = bitcast i32 %5 to float + %3102 = bitcast i32 %5 to float + %3103 = fmul float %3101, %3102 + %3104 = fadd float %3103, 0.000000e+00 + %3105 = bitcast i32 %32 to float + %3106 = bitcast i32 %32 to float + %3107 = fmul float %3105, %3106 + %3108 = fadd float %3104, %3107 + %3109 = call float @llvm.sqrt.f32(float %3108) + %3110 = bitcast i32 %5 to float + %3111 = fcmp olt float %3110, 0.000000e+00 + %3112 = sext i1 %3111 to i32 + %3113 = bitcast i32 %5 to float + %3114 = fcmp ogt float %3113, 0.000000e+00 + %3115 = zext i1 %3114 to i32 + %3116 = add nsw i32 %3112, %3115 + %3117 = sitofp i32 %3116 to float + %3118 = fneg float %3117 + %3119 = fmul float %3109, %3118 + %3120 = fmul float %3119, 0.000000e+00 + %3121 = bitcast i32 %32 to float + %3122 = fadd float %3121, %3120 + %3123 = bitcast i32 %5 to float + %3124 = bitcast i32 %5 to float + %3125 = fmul float %3123, %3124 + %3126 = fadd float %3125, 0.000000e+00 + %3127 = bitcast i32 %32 to float + %3128 = bitcast i32 %32 to float + %3129 = fmul float %3127, %3128 + %3130 = fadd float %3126, %3129 + %3131 = call float @llvm.sqrt.f32(float %3130) + %3132 = bitcast i32 %5 to float + %3133 = fcmp olt float %3132, 0.000000e+00 + %3134 = sext i1 %3133 to i32 + %3135 = bitcast i32 %5 to float + %3136 = fcmp ogt float %3135, 0.000000e+00 + %3137 = zext i1 %3136 to i32 + %3138 = add nsw i32 %3134, %3137 + %3139 = sitofp i32 %3138 to float + %3140 = fneg float %3139 + %3141 = fmul float %3131, %3140 + %3142 = fmul float %3141, 0.000000e+00 + %3143 = bitcast i32 %32 to float + %3144 = fadd float %3143, %3142 + %3145 = fmul float %3122, %3144 + %3146 = fadd float %3100, %3145 + %3147 = call float @llvm.sqrt.f32(float %3146) + %3148 = fadd float %3147, 0.000000e+00 + %3149 = fdiv float %3056, %3148 + %3150 = fmul float %3034, %3149 + %3151 = fsub float 1.000000e+00, %3150 + %3152 = getelementptr float, float* %0, i32 0 + %3153 = getelementptr inbounds float, float* %3152, i64 2 + %3154 = load float, float* %3153, align 4 + %3155 = fmul float %3151, %3154 + %3156 = fadd float %2918, %3155 + %3157 = insertelement <4 x float> zeroinitializer, float %3156, i32 0 + %3158 = insertelement <4 x float> %3157, float 0.000000e+00, i32 1 + %3159 = insertelement <4 x float> %3158, float 0.000000e+00, i32 2 + %3160 = insertelement <4 x float> %3159, float 0.000000e+00, i32 3 + %3161 = extractelement <4 x float> %3160, i32 0 + %3162 = getelementptr float, float* %2, i32 0 + %3163 = getelementptr inbounds float, float* %3162, i64 2 + store float %3161, float* %3163, align 4 + %3164 = extractelement <4 x float> %3160, i32 1 + %3165 = getelementptr float, float* %2, i32 0 + %3166 = getelementptr inbounds float, float* %3165, i64 3 + store float %3164, float* %3166, align 4 + %3167 = bitcast i32 %5 to float + %3168 = bitcast i32 %5 to float + %3169 = fmul float %3167, %3168 + %3170 = fadd float %3169, 0.000000e+00 + %3171 = bitcast i32 %32 to float + %3172 = bitcast i32 %32 to float + %3173 = fmul float %3171, %3172 + %3174 = fadd float %3170, %3173 + %3175 = call float @llvm.sqrt.f32(float %3174) + %3176 = bitcast i32 %5 to float + %3177 = fcmp olt float %3176, 0.000000e+00 + %3178 = sext i1 %3177 to i32 + %3179 = bitcast i32 %5 to float + %3180 = fcmp ogt float %3179, 0.000000e+00 + %3181 = zext i1 %3180 to i32 + %3182 = add nsw i32 %3178, %3181 + %3183 = sitofp i32 %3182 to float + %3184 = fneg float %3183 + %3185 = fmul float %3175, %3184 + %3186 = fmul float %3185, 0.000000e+00 + %3187 = bitcast i32 %32 to float + %3188 = fadd float %3187, %3186 + %3189 = bitcast i32 %5 to float + %3190 = bitcast i32 %5 to float + %3191 = fmul float %3189, %3190 + %3192 = fadd float %3191, 0.000000e+00 + %3193 = bitcast i32 %32 to float + %3194 = bitcast i32 %32 to float + %3195 = fmul float %3193, %3194 + %3196 = fadd float %3192, %3195 + %3197 = call float @llvm.sqrt.f32(float %3196) + %3198 = bitcast i32 %5 to float + %3199 = fcmp olt float %3198, 0.000000e+00 + %3200 = sext i1 %3199 to i32 + %3201 = bitcast i32 %5 to float + %3202 = fcmp ogt float %3201, 0.000000e+00 + %3203 = zext i1 %3202 to i32 + %3204 = add nsw i32 %3200, %3203 + %3205 = sitofp i32 %3204 to float + %3206 = fneg float %3205 + %3207 = fmul float %3197, %3206 + %3208 = bitcast i32 %5 to float + %3209 = fadd float %3208, %3207 + %3210 = bitcast i32 %5 to float + %3211 = bitcast i32 %5 to float + %3212 = fmul float %3210, %3211 + %3213 = fadd float %3212, 0.000000e+00 + %3214 = bitcast i32 %32 to float + %3215 = bitcast i32 %32 to float + %3216 = fmul float %3214, %3215 + %3217 = fadd float %3213, %3216 + %3218 = call float @llvm.sqrt.f32(float %3217) + %3219 = bitcast i32 %5 to float + %3220 = fcmp olt float %3219, 0.000000e+00 + %3221 = sext i1 %3220 to i32 + %3222 = bitcast i32 %5 to float + %3223 = fcmp ogt float %3222, 0.000000e+00 + %3224 = zext i1 %3223 to i32 + %3225 = add nsw i32 %3221, %3224 + %3226 = sitofp i32 %3225 to float + %3227 = fneg float %3226 + %3228 = fmul float %3218, %3227 + %3229 = bitcast i32 %5 to float + %3230 = fadd float %3229, %3228 + %3231 = fmul float %3209, %3230 + %3232 = fadd float %3231, 0.000000e+00 + %3233 = bitcast i32 %5 to float + %3234 = bitcast i32 %5 to float + %3235 = fmul float %3233, %3234 + %3236 = fadd float %3235, 0.000000e+00 + %3237 = bitcast i32 %32 to float + %3238 = bitcast i32 %32 to float + %3239 = fmul float %3237, %3238 + %3240 = fadd float %3236, %3239 + %3241 = call float @llvm.sqrt.f32(float %3240) + %3242 = bitcast i32 %5 to float + %3243 = fcmp olt float %3242, 0.000000e+00 + %3244 = sext i1 %3243 to i32 + %3245 = bitcast i32 %5 to float + %3246 = fcmp ogt float %3245, 0.000000e+00 + %3247 = zext i1 %3246 to i32 + %3248 = add nsw i32 %3244, %3247 + %3249 = sitofp i32 %3248 to float + %3250 = fneg float %3249 + %3251 = fmul float %3241, %3250 + %3252 = fmul float %3251, 0.000000e+00 + %3253 = bitcast i32 %32 to float + %3254 = fadd float %3253, %3252 + %3255 = bitcast i32 %5 to float + %3256 = bitcast i32 %5 to float + %3257 = fmul float %3255, %3256 + %3258 = fadd float %3257, 0.000000e+00 + %3259 = bitcast i32 %32 to float + %3260 = bitcast i32 %32 to float + %3261 = fmul float %3259, %3260 + %3262 = fadd float %3258, %3261 + %3263 = call float @llvm.sqrt.f32(float %3262) + %3264 = bitcast i32 %5 to float + %3265 = fcmp olt float %3264, 0.000000e+00 + %3266 = sext i1 %3265 to i32 + %3267 = bitcast i32 %5 to float + %3268 = fcmp ogt float %3267, 0.000000e+00 + %3269 = zext i1 %3268 to i32 + %3270 = add nsw i32 %3266, %3269 + %3271 = sitofp i32 %3270 to float + %3272 = fneg float %3271 + %3273 = fmul float %3263, %3272 + %3274 = fmul float %3273, 0.000000e+00 + %3275 = bitcast i32 %32 to float + %3276 = fadd float %3275, %3274 + %3277 = fmul float %3254, %3276 + %3278 = fadd float %3232, %3277 + %3279 = call float @llvm.sqrt.f32(float %3278) + %3280 = fadd float %3279, 0.000000e+00 + %3281 = fdiv float %3188, %3280 + %3282 = fmul float %3281, 2.000000e+00 + %3283 = bitcast i32 %5 to float + %3284 = bitcast i32 %5 to float + %3285 = fmul float %3283, %3284 + %3286 = fadd float %3285, 0.000000e+00 + %3287 = bitcast i32 %32 to float + %3288 = bitcast i32 %32 to float + %3289 = fmul float %3287, %3288 + %3290 = fadd float %3286, %3289 + %3291 = call float @llvm.sqrt.f32(float %3290) + %3292 = bitcast i32 %5 to float + %3293 = fcmp olt float %3292, 0.000000e+00 + %3294 = sext i1 %3293 to i32 + %3295 = bitcast i32 %5 to float + %3296 = fcmp ogt float %3295, 0.000000e+00 + %3297 = zext i1 %3296 to i32 + %3298 = add nsw i32 %3294, %3297 + %3299 = sitofp i32 %3298 to float + %3300 = fneg float %3299 + %3301 = fmul float %3291, %3300 + %3302 = bitcast i32 %5 to float + %3303 = fadd float %3302, %3301 + %3304 = bitcast i32 %5 to float + %3305 = bitcast i32 %5 to float + %3306 = fmul float %3304, %3305 + %3307 = fadd float %3306, 0.000000e+00 + %3308 = bitcast i32 %32 to float + %3309 = bitcast i32 %32 to float + %3310 = fmul float %3308, %3309 + %3311 = fadd float %3307, %3310 + %3312 = call float @llvm.sqrt.f32(float %3311) + %3313 = bitcast i32 %5 to float + %3314 = fcmp olt float %3313, 0.000000e+00 + %3315 = sext i1 %3314 to i32 + %3316 = bitcast i32 %5 to float + %3317 = fcmp ogt float %3316, 0.000000e+00 + %3318 = zext i1 %3317 to i32 + %3319 = add nsw i32 %3315, %3318 + %3320 = sitofp i32 %3319 to float + %3321 = fneg float %3320 + %3322 = fmul float %3312, %3321 + %3323 = bitcast i32 %5 to float + %3324 = fadd float %3323, %3322 + %3325 = bitcast i32 %5 to float + %3326 = bitcast i32 %5 to float + %3327 = fmul float %3325, %3326 + %3328 = fadd float %3327, 0.000000e+00 + %3329 = bitcast i32 %32 to float + %3330 = bitcast i32 %32 to float + %3331 = fmul float %3329, %3330 + %3332 = fadd float %3328, %3331 + %3333 = call float @llvm.sqrt.f32(float %3332) + %3334 = bitcast i32 %5 to float + %3335 = fcmp olt float %3334, 0.000000e+00 + %3336 = sext i1 %3335 to i32 + %3337 = bitcast i32 %5 to float + %3338 = fcmp ogt float %3337, 0.000000e+00 + %3339 = zext i1 %3338 to i32 + %3340 = add nsw i32 %3336, %3339 + %3341 = sitofp i32 %3340 to float + %3342 = fneg float %3341 + %3343 = fmul float %3333, %3342 + %3344 = bitcast i32 %5 to float + %3345 = fadd float %3344, %3343 + %3346 = fmul float %3324, %3345 + %3347 = fadd float %3346, 0.000000e+00 + %3348 = bitcast i32 %5 to float + %3349 = bitcast i32 %5 to float + %3350 = fmul float %3348, %3349 + %3351 = fadd float %3350, 0.000000e+00 + %3352 = bitcast i32 %32 to float + %3353 = bitcast i32 %32 to float + %3354 = fmul float %3352, %3353 + %3355 = fadd float %3351, %3354 + %3356 = call float @llvm.sqrt.f32(float %3355) + %3357 = bitcast i32 %5 to float + %3358 = fcmp olt float %3357, 0.000000e+00 + %3359 = sext i1 %3358 to i32 + %3360 = bitcast i32 %5 to float + %3361 = fcmp ogt float %3360, 0.000000e+00 + %3362 = zext i1 %3361 to i32 + %3363 = add nsw i32 %3359, %3362 + %3364 = sitofp i32 %3363 to float + %3365 = fneg float %3364 + %3366 = fmul float %3356, %3365 + %3367 = fmul float %3366, 0.000000e+00 + %3368 = bitcast i32 %32 to float + %3369 = fadd float %3368, %3367 + %3370 = bitcast i32 %5 to float + %3371 = bitcast i32 %5 to float + %3372 = fmul float %3370, %3371 + %3373 = fadd float %3372, 0.000000e+00 + %3374 = bitcast i32 %32 to float + %3375 = bitcast i32 %32 to float + %3376 = fmul float %3374, %3375 + %3377 = fadd float %3373, %3376 + %3378 = call float @llvm.sqrt.f32(float %3377) + %3379 = bitcast i32 %5 to float + %3380 = fcmp olt float %3379, 0.000000e+00 + %3381 = sext i1 %3380 to i32 + %3382 = bitcast i32 %5 to float + %3383 = fcmp ogt float %3382, 0.000000e+00 + %3384 = zext i1 %3383 to i32 + %3385 = add nsw i32 %3381, %3384 + %3386 = sitofp i32 %3385 to float + %3387 = fneg float %3386 + %3388 = fmul float %3378, %3387 + %3389 = fmul float %3388, 0.000000e+00 + %3390 = bitcast i32 %32 to float + %3391 = fadd float %3390, %3389 + %3392 = fmul float %3369, %3391 + %3393 = fadd float %3347, %3392 + %3394 = call float @llvm.sqrt.f32(float %3393) + %3395 = fadd float %3394, 0.000000e+00 + %3396 = fdiv float %3303, %3395 + %3397 = fmul float %3282, %3396 + %3398 = fneg float %3397 + %3399 = insertelement <4 x float> zeroinitializer, float %3398, i32 0 + %3400 = insertelement <4 x float> %3399, float 0.000000e+00, i32 1 + %3401 = insertelement <4 x float> %3400, float 0.000000e+00, i32 2 + %3402 = insertelement <4 x float> %3401, float 0.000000e+00, i32 3 + %3403 = getelementptr float, float* %0, i32 0 + %3404 = getelementptr inbounds float, float* %3403, i64 1 + %3405 = load float, float* %3404, align 4 + %3406 = insertelement <4 x float> zeroinitializer, float %3405, i32 0 + %3407 = insertelement <4 x float> %3406, float 0.000000e+00, i32 1 + %3408 = insertelement <4 x float> %3407, float 0.000000e+00, i32 2 + %3409 = insertelement <4 x float> %3408, float 0.000000e+00, i32 3 + %3410 = call <4 x float> @llvm.fma.v4f32(<4 x float> %3402, <4 x float> %3409, <4 x float> zeroinitializer) + %3411 = extractelement <4 x float> %3410, i32 0 + %3412 = getelementptr float, float* %2, i32 0 + %3413 = getelementptr inbounds float, float* %3412, i64 3 + store float %3411, float* %3413, align 4 + %3414 = bitcast i32 %5 to float + %3415 = bitcast i32 %5 to float + %3416 = fmul float %3414, %3415 + %3417 = fadd float %3416, 0.000000e+00 + %3418 = bitcast i32 %32 to float + %3419 = bitcast i32 %32 to float + %3420 = fmul float %3418, %3419 + %3421 = fadd float %3417, %3420 + %3422 = call float @llvm.sqrt.f32(float %3421) + %3423 = bitcast i32 %5 to float + %3424 = fcmp olt float %3423, 0.000000e+00 + %3425 = sext i1 %3424 to i32 + %3426 = bitcast i32 %5 to float + %3427 = fcmp ogt float %3426, 0.000000e+00 + %3428 = zext i1 %3427 to i32 + %3429 = add nsw i32 %3425, %3428 + %3430 = sitofp i32 %3429 to float + %3431 = fneg float %3430 + %3432 = fmul float %3422, %3431 + %3433 = fmul float %3432, 0.000000e+00 + %3434 = bitcast i32 %32 to float + %3435 = fadd float %3434, %3433 + %3436 = bitcast i32 %5 to float + %3437 = bitcast i32 %5 to float + %3438 = fmul float %3436, %3437 + %3439 = fadd float %3438, 0.000000e+00 + %3440 = bitcast i32 %32 to float + %3441 = bitcast i32 %32 to float + %3442 = fmul float %3440, %3441 + %3443 = fadd float %3439, %3442 + %3444 = call float @llvm.sqrt.f32(float %3443) + %3445 = bitcast i32 %5 to float + %3446 = fcmp olt float %3445, 0.000000e+00 + %3447 = sext i1 %3446 to i32 + %3448 = bitcast i32 %5 to float + %3449 = fcmp ogt float %3448, 0.000000e+00 + %3450 = zext i1 %3449 to i32 + %3451 = add nsw i32 %3447, %3450 + %3452 = sitofp i32 %3451 to float + %3453 = fneg float %3452 + %3454 = fmul float %3444, %3453 + %3455 = bitcast i32 %5 to float + %3456 = fadd float %3455, %3454 + %3457 = bitcast i32 %5 to float + %3458 = bitcast i32 %5 to float + %3459 = fmul float %3457, %3458 + %3460 = fadd float %3459, 0.000000e+00 + %3461 = bitcast i32 %32 to float + %3462 = bitcast i32 %32 to float + %3463 = fmul float %3461, %3462 + %3464 = fadd float %3460, %3463 + %3465 = call float @llvm.sqrt.f32(float %3464) + %3466 = bitcast i32 %5 to float + %3467 = fcmp olt float %3466, 0.000000e+00 + %3468 = sext i1 %3467 to i32 + %3469 = bitcast i32 %5 to float + %3470 = fcmp ogt float %3469, 0.000000e+00 + %3471 = zext i1 %3470 to i32 + %3472 = add nsw i32 %3468, %3471 + %3473 = sitofp i32 %3472 to float + %3474 = fneg float %3473 + %3475 = fmul float %3465, %3474 + %3476 = bitcast i32 %5 to float + %3477 = fadd float %3476, %3475 + %3478 = fmul float %3456, %3477 + %3479 = fadd float %3478, 0.000000e+00 + %3480 = bitcast i32 %5 to float + %3481 = bitcast i32 %5 to float + %3482 = fmul float %3480, %3481 + %3483 = fadd float %3482, 0.000000e+00 + %3484 = bitcast i32 %32 to float + %3485 = bitcast i32 %32 to float + %3486 = fmul float %3484, %3485 + %3487 = fadd float %3483, %3486 + %3488 = call float @llvm.sqrt.f32(float %3487) + %3489 = bitcast i32 %5 to float + %3490 = fcmp olt float %3489, 0.000000e+00 + %3491 = sext i1 %3490 to i32 + %3492 = bitcast i32 %5 to float + %3493 = fcmp ogt float %3492, 0.000000e+00 + %3494 = zext i1 %3493 to i32 + %3495 = add nsw i32 %3491, %3494 + %3496 = sitofp i32 %3495 to float + %3497 = fneg float %3496 + %3498 = fmul float %3488, %3497 + %3499 = fmul float %3498, 0.000000e+00 + %3500 = bitcast i32 %32 to float + %3501 = fadd float %3500, %3499 + %3502 = bitcast i32 %5 to float + %3503 = bitcast i32 %5 to float + %3504 = fmul float %3502, %3503 + %3505 = fadd float %3504, 0.000000e+00 + %3506 = bitcast i32 %32 to float + %3507 = bitcast i32 %32 to float + %3508 = fmul float %3506, %3507 + %3509 = fadd float %3505, %3508 + %3510 = call float @llvm.sqrt.f32(float %3509) + %3511 = bitcast i32 %5 to float + %3512 = fcmp olt float %3511, 0.000000e+00 + %3513 = sext i1 %3512 to i32 + %3514 = bitcast i32 %5 to float + %3515 = fcmp ogt float %3514, 0.000000e+00 + %3516 = zext i1 %3515 to i32 + %3517 = add nsw i32 %3513, %3516 + %3518 = sitofp i32 %3517 to float + %3519 = fneg float %3518 + %3520 = fmul float %3510, %3519 + %3521 = fmul float %3520, 0.000000e+00 + %3522 = bitcast i32 %32 to float + %3523 = fadd float %3522, %3521 + %3524 = fmul float %3501, %3523 + %3525 = fadd float %3479, %3524 + %3526 = call float @llvm.sqrt.f32(float %3525) + %3527 = fadd float %3526, 0.000000e+00 + %3528 = fdiv float %3435, %3527 + %3529 = fmul float %3528, 2.000000e+00 + %3530 = bitcast i32 %5 to float + %3531 = bitcast i32 %5 to float + %3532 = fmul float %3530, %3531 + %3533 = fadd float %3532, 0.000000e+00 + %3534 = bitcast i32 %32 to float + %3535 = bitcast i32 %32 to float + %3536 = fmul float %3534, %3535 + %3537 = fadd float %3533, %3536 + %3538 = call float @llvm.sqrt.f32(float %3537) + %3539 = bitcast i32 %5 to float + %3540 = fcmp olt float %3539, 0.000000e+00 + %3541 = sext i1 %3540 to i32 + %3542 = bitcast i32 %5 to float + %3543 = fcmp ogt float %3542, 0.000000e+00 + %3544 = zext i1 %3543 to i32 + %3545 = add nsw i32 %3541, %3544 + %3546 = sitofp i32 %3545 to float + %3547 = fneg float %3546 + %3548 = fmul float %3538, %3547 + %3549 = bitcast i32 %5 to float + %3550 = fadd float %3549, %3548 + %3551 = bitcast i32 %5 to float + %3552 = bitcast i32 %5 to float + %3553 = fmul float %3551, %3552 + %3554 = fadd float %3553, 0.000000e+00 + %3555 = bitcast i32 %32 to float + %3556 = bitcast i32 %32 to float + %3557 = fmul float %3555, %3556 + %3558 = fadd float %3554, %3557 + %3559 = call float @llvm.sqrt.f32(float %3558) + %3560 = bitcast i32 %5 to float + %3561 = fcmp olt float %3560, 0.000000e+00 + %3562 = sext i1 %3561 to i32 + %3563 = bitcast i32 %5 to float + %3564 = fcmp ogt float %3563, 0.000000e+00 + %3565 = zext i1 %3564 to i32 + %3566 = add nsw i32 %3562, %3565 + %3567 = sitofp i32 %3566 to float + %3568 = fneg float %3567 + %3569 = fmul float %3559, %3568 + %3570 = bitcast i32 %5 to float + %3571 = fadd float %3570, %3569 + %3572 = bitcast i32 %5 to float + %3573 = bitcast i32 %5 to float + %3574 = fmul float %3572, %3573 + %3575 = fadd float %3574, 0.000000e+00 + %3576 = bitcast i32 %32 to float + %3577 = bitcast i32 %32 to float + %3578 = fmul float %3576, %3577 + %3579 = fadd float %3575, %3578 + %3580 = call float @llvm.sqrt.f32(float %3579) + %3581 = bitcast i32 %5 to float + %3582 = fcmp olt float %3581, 0.000000e+00 + %3583 = sext i1 %3582 to i32 + %3584 = bitcast i32 %5 to float + %3585 = fcmp ogt float %3584, 0.000000e+00 + %3586 = zext i1 %3585 to i32 + %3587 = add nsw i32 %3583, %3586 + %3588 = sitofp i32 %3587 to float + %3589 = fneg float %3588 + %3590 = fmul float %3580, %3589 + %3591 = bitcast i32 %5 to float + %3592 = fadd float %3591, %3590 + %3593 = fmul float %3571, %3592 + %3594 = fadd float %3593, 0.000000e+00 + %3595 = bitcast i32 %5 to float + %3596 = bitcast i32 %5 to float + %3597 = fmul float %3595, %3596 + %3598 = fadd float %3597, 0.000000e+00 + %3599 = bitcast i32 %32 to float + %3600 = bitcast i32 %32 to float + %3601 = fmul float %3599, %3600 + %3602 = fadd float %3598, %3601 + %3603 = call float @llvm.sqrt.f32(float %3602) + %3604 = bitcast i32 %5 to float + %3605 = fcmp olt float %3604, 0.000000e+00 + %3606 = sext i1 %3605 to i32 + %3607 = bitcast i32 %5 to float + %3608 = fcmp ogt float %3607, 0.000000e+00 + %3609 = zext i1 %3608 to i32 + %3610 = add nsw i32 %3606, %3609 + %3611 = sitofp i32 %3610 to float + %3612 = fneg float %3611 + %3613 = fmul float %3603, %3612 + %3614 = fmul float %3613, 0.000000e+00 + %3615 = bitcast i32 %32 to float + %3616 = fadd float %3615, %3614 + %3617 = bitcast i32 %5 to float + %3618 = bitcast i32 %5 to float + %3619 = fmul float %3617, %3618 + %3620 = fadd float %3619, 0.000000e+00 + %3621 = bitcast i32 %32 to float + %3622 = bitcast i32 %32 to float + %3623 = fmul float %3621, %3622 + %3624 = fadd float %3620, %3623 + %3625 = call float @llvm.sqrt.f32(float %3624) + %3626 = bitcast i32 %5 to float + %3627 = fcmp olt float %3626, 0.000000e+00 + %3628 = sext i1 %3627 to i32 + %3629 = bitcast i32 %5 to float + %3630 = fcmp ogt float %3629, 0.000000e+00 + %3631 = zext i1 %3630 to i32 + %3632 = add nsw i32 %3628, %3631 + %3633 = sitofp i32 %3632 to float + %3634 = fneg float %3633 + %3635 = fmul float %3625, %3634 + %3636 = fmul float %3635, 0.000000e+00 + %3637 = bitcast i32 %32 to float + %3638 = fadd float %3637, %3636 + %3639 = fmul float %3616, %3638 + %3640 = fadd float %3594, %3639 + %3641 = call float @llvm.sqrt.f32(float %3640) + %3642 = fadd float %3641, 0.000000e+00 + %3643 = fdiv float %3550, %3642 + %3644 = fmul float %3529, %3643 + %3645 = fneg float %3644 + %3646 = fmul float %3645, %3405 + %3647 = fadd float %3646, 0.000000e+00 + %3648 = bitcast i32 %5 to float + %3649 = bitcast i32 %5 to float + %3650 = fmul float %3648, %3649 + %3651 = fadd float %3650, 0.000000e+00 + %3652 = bitcast i32 %32 to float + %3653 = bitcast i32 %32 to float + %3654 = fmul float %3652, %3653 + %3655 = fadd float %3651, %3654 + %3656 = call float @llvm.sqrt.f32(float %3655) + %3657 = bitcast i32 %5 to float + %3658 = fcmp olt float %3657, 0.000000e+00 + %3659 = sext i1 %3658 to i32 + %3660 = bitcast i32 %5 to float + %3661 = fcmp ogt float %3660, 0.000000e+00 + %3662 = zext i1 %3661 to i32 + %3663 = add nsw i32 %3659, %3662 + %3664 = sitofp i32 %3663 to float + %3665 = fneg float %3664 + %3666 = fmul float %3656, %3665 + %3667 = fmul float %3666, 0.000000e+00 + %3668 = bitcast i32 %32 to float + %3669 = fadd float %3668, %3667 + %3670 = bitcast i32 %5 to float + %3671 = bitcast i32 %5 to float + %3672 = fmul float %3670, %3671 + %3673 = fadd float %3672, 0.000000e+00 + %3674 = bitcast i32 %32 to float + %3675 = bitcast i32 %32 to float + %3676 = fmul float %3674, %3675 + %3677 = fadd float %3673, %3676 + %3678 = call float @llvm.sqrt.f32(float %3677) + %3679 = bitcast i32 %5 to float + %3680 = fcmp olt float %3679, 0.000000e+00 + %3681 = sext i1 %3680 to i32 + %3682 = bitcast i32 %5 to float + %3683 = fcmp ogt float %3682, 0.000000e+00 + %3684 = zext i1 %3683 to i32 + %3685 = add nsw i32 %3681, %3684 + %3686 = sitofp i32 %3685 to float + %3687 = fneg float %3686 + %3688 = fmul float %3678, %3687 + %3689 = bitcast i32 %5 to float + %3690 = fadd float %3689, %3688 + %3691 = bitcast i32 %5 to float + %3692 = bitcast i32 %5 to float + %3693 = fmul float %3691, %3692 + %3694 = fadd float %3693, 0.000000e+00 + %3695 = bitcast i32 %32 to float + %3696 = bitcast i32 %32 to float + %3697 = fmul float %3695, %3696 + %3698 = fadd float %3694, %3697 + %3699 = call float @llvm.sqrt.f32(float %3698) + %3700 = bitcast i32 %5 to float + %3701 = fcmp olt float %3700, 0.000000e+00 + %3702 = sext i1 %3701 to i32 + %3703 = bitcast i32 %5 to float + %3704 = fcmp ogt float %3703, 0.000000e+00 + %3705 = zext i1 %3704 to i32 + %3706 = add nsw i32 %3702, %3705 + %3707 = sitofp i32 %3706 to float + %3708 = fneg float %3707 + %3709 = fmul float %3699, %3708 + %3710 = bitcast i32 %5 to float + %3711 = fadd float %3710, %3709 + %3712 = fmul float %3690, %3711 + %3713 = fadd float %3712, 0.000000e+00 + %3714 = bitcast i32 %5 to float + %3715 = bitcast i32 %5 to float + %3716 = fmul float %3714, %3715 + %3717 = fadd float %3716, 0.000000e+00 + %3718 = bitcast i32 %32 to float + %3719 = bitcast i32 %32 to float + %3720 = fmul float %3718, %3719 + %3721 = fadd float %3717, %3720 + %3722 = call float @llvm.sqrt.f32(float %3721) + %3723 = bitcast i32 %5 to float + %3724 = fcmp olt float %3723, 0.000000e+00 + %3725 = sext i1 %3724 to i32 + %3726 = bitcast i32 %5 to float + %3727 = fcmp ogt float %3726, 0.000000e+00 + %3728 = zext i1 %3727 to i32 + %3729 = add nsw i32 %3725, %3728 + %3730 = sitofp i32 %3729 to float + %3731 = fneg float %3730 + %3732 = fmul float %3722, %3731 + %3733 = fmul float %3732, 0.000000e+00 + %3734 = bitcast i32 %32 to float + %3735 = fadd float %3734, %3733 + %3736 = bitcast i32 %5 to float + %3737 = bitcast i32 %5 to float + %3738 = fmul float %3736, %3737 + %3739 = fadd float %3738, 0.000000e+00 + %3740 = bitcast i32 %32 to float + %3741 = bitcast i32 %32 to float + %3742 = fmul float %3740, %3741 + %3743 = fadd float %3739, %3742 + %3744 = call float @llvm.sqrt.f32(float %3743) + %3745 = bitcast i32 %5 to float + %3746 = fcmp olt float %3745, 0.000000e+00 + %3747 = sext i1 %3746 to i32 + %3748 = bitcast i32 %5 to float + %3749 = fcmp ogt float %3748, 0.000000e+00 + %3750 = zext i1 %3749 to i32 + %3751 = add nsw i32 %3747, %3750 + %3752 = sitofp i32 %3751 to float + %3753 = fneg float %3752 + %3754 = fmul float %3744, %3753 + %3755 = fmul float %3754, 0.000000e+00 + %3756 = bitcast i32 %32 to float + %3757 = fadd float %3756, %3755 + %3758 = fmul float %3735, %3757 + %3759 = fadd float %3713, %3758 + %3760 = call float @llvm.sqrt.f32(float %3759) + %3761 = fadd float %3760, 0.000000e+00 + %3762 = fdiv float %3669, %3761 + %3763 = fmul float %3762, 2.000000e+00 + %3764 = bitcast i32 %5 to float + %3765 = bitcast i32 %5 to float + %3766 = fmul float %3764, %3765 + %3767 = fadd float %3766, 0.000000e+00 + %3768 = bitcast i32 %32 to float + %3769 = bitcast i32 %32 to float + %3770 = fmul float %3768, %3769 + %3771 = fadd float %3767, %3770 + %3772 = call float @llvm.sqrt.f32(float %3771) + %3773 = bitcast i32 %5 to float + %3774 = fcmp olt float %3773, 0.000000e+00 + %3775 = sext i1 %3774 to i32 + %3776 = bitcast i32 %5 to float + %3777 = fcmp ogt float %3776, 0.000000e+00 + %3778 = zext i1 %3777 to i32 + %3779 = add nsw i32 %3775, %3778 + %3780 = sitofp i32 %3779 to float + %3781 = fneg float %3780 + %3782 = fmul float %3772, %3781 + %3783 = fmul float %3782, 0.000000e+00 + %3784 = bitcast i32 %32 to float + %3785 = fadd float %3784, %3783 + %3786 = bitcast i32 %5 to float + %3787 = bitcast i32 %5 to float + %3788 = fmul float %3786, %3787 + %3789 = fadd float %3788, 0.000000e+00 + %3790 = bitcast i32 %32 to float + %3791 = bitcast i32 %32 to float + %3792 = fmul float %3790, %3791 + %3793 = fadd float %3789, %3792 + %3794 = call float @llvm.sqrt.f32(float %3793) + %3795 = bitcast i32 %5 to float + %3796 = fcmp olt float %3795, 0.000000e+00 + %3797 = sext i1 %3796 to i32 + %3798 = bitcast i32 %5 to float + %3799 = fcmp ogt float %3798, 0.000000e+00 + %3800 = zext i1 %3799 to i32 + %3801 = add nsw i32 %3797, %3800 + %3802 = sitofp i32 %3801 to float + %3803 = fneg float %3802 + %3804 = fmul float %3794, %3803 + %3805 = bitcast i32 %5 to float + %3806 = fadd float %3805, %3804 + %3807 = bitcast i32 %5 to float + %3808 = bitcast i32 %5 to float + %3809 = fmul float %3807, %3808 + %3810 = fadd float %3809, 0.000000e+00 + %3811 = bitcast i32 %32 to float + %3812 = bitcast i32 %32 to float + %3813 = fmul float %3811, %3812 + %3814 = fadd float %3810, %3813 + %3815 = call float @llvm.sqrt.f32(float %3814) + %3816 = bitcast i32 %5 to float + %3817 = fcmp olt float %3816, 0.000000e+00 + %3818 = sext i1 %3817 to i32 + %3819 = bitcast i32 %5 to float + %3820 = fcmp ogt float %3819, 0.000000e+00 + %3821 = zext i1 %3820 to i32 + %3822 = add nsw i32 %3818, %3821 + %3823 = sitofp i32 %3822 to float + %3824 = fneg float %3823 + %3825 = fmul float %3815, %3824 + %3826 = bitcast i32 %5 to float + %3827 = fadd float %3826, %3825 + %3828 = fmul float %3806, %3827 + %3829 = fadd float %3828, 0.000000e+00 + %3830 = bitcast i32 %5 to float + %3831 = bitcast i32 %5 to float + %3832 = fmul float %3830, %3831 + %3833 = fadd float %3832, 0.000000e+00 + %3834 = bitcast i32 %32 to float + %3835 = bitcast i32 %32 to float + %3836 = fmul float %3834, %3835 + %3837 = fadd float %3833, %3836 + %3838 = call float @llvm.sqrt.f32(float %3837) + %3839 = bitcast i32 %5 to float + %3840 = fcmp olt float %3839, 0.000000e+00 + %3841 = sext i1 %3840 to i32 + %3842 = bitcast i32 %5 to float + %3843 = fcmp ogt float %3842, 0.000000e+00 + %3844 = zext i1 %3843 to i32 + %3845 = add nsw i32 %3841, %3844 + %3846 = sitofp i32 %3845 to float + %3847 = fneg float %3846 + %3848 = fmul float %3838, %3847 + %3849 = fmul float %3848, 0.000000e+00 + %3850 = bitcast i32 %32 to float + %3851 = fadd float %3850, %3849 + %3852 = bitcast i32 %5 to float + %3853 = bitcast i32 %5 to float + %3854 = fmul float %3852, %3853 + %3855 = fadd float %3854, 0.000000e+00 + %3856 = bitcast i32 %32 to float + %3857 = bitcast i32 %32 to float + %3858 = fmul float %3856, %3857 + %3859 = fadd float %3855, %3858 + %3860 = call float @llvm.sqrt.f32(float %3859) + %3861 = bitcast i32 %5 to float + %3862 = fcmp olt float %3861, 0.000000e+00 + %3863 = sext i1 %3862 to i32 + %3864 = bitcast i32 %5 to float + %3865 = fcmp ogt float %3864, 0.000000e+00 + %3866 = zext i1 %3865 to i32 + %3867 = add nsw i32 %3863, %3866 + %3868 = sitofp i32 %3867 to float + %3869 = fneg float %3868 + %3870 = fmul float %3860, %3869 + %3871 = fmul float %3870, 0.000000e+00 + %3872 = bitcast i32 %32 to float + %3873 = fadd float %3872, %3871 + %3874 = fmul float %3851, %3873 + %3875 = fadd float %3829, %3874 + %3876 = call float @llvm.sqrt.f32(float %3875) + %3877 = fadd float %3876, 0.000000e+00 + %3878 = fdiv float %3785, %3877 + %3879 = fmul float %3763, %3878 + %3880 = fsub float 1.000000e+00, %3879 + %3881 = getelementptr float, float* %0, i32 0 + %3882 = getelementptr inbounds float, float* %3881, i64 3 + %3883 = load float, float* %3882, align 4 + %3884 = fmul float %3880, %3883 + %3885 = fadd float %3647, %3884 + %3886 = insertelement <4 x float> zeroinitializer, float %3885, i32 0 + %3887 = insertelement <4 x float> %3886, float 0.000000e+00, i32 1 + %3888 = insertelement <4 x float> %3887, float 0.000000e+00, i32 2 + %3889 = insertelement <4 x float> %3888, float 0.000000e+00, i32 3 + %3890 = extractelement <4 x float> %3889, i32 0 + %3891 = getelementptr float, float* %2, i32 0 + %3892 = getelementptr inbounds float, float* %3891, i64 3 + store float %3890, float* %3892, align 4 + %3893 = getelementptr float, float* %1, i32 0 + %3894 = getelementptr inbounds float, float* %3893, i64 2 + %3895 = bitcast float* %3894 to i32* + %3896 = load i32, i32* %3895, align 4 + %3897 = bitcast i32 %3896 to float + %3898 = insertelement <4 x float> zeroinitializer, float %3897, i32 0 + %3899 = getelementptr float, float* %1, i32 0 + %3900 = getelementptr inbounds float, float* %3899, i64 1 + %3901 = bitcast float* %3900 to i32* + %3902 = load i32, i32* %3901, align 4 + %3903 = bitcast i32 %3902 to float + %3904 = insertelement <4 x float> %3898, float %3903, i32 1 + %3905 = insertelement <4 x float> %3904, float 0.000000e+00, i32 2 + %3906 = insertelement <4 x float> %3905, float 0.000000e+00, i32 3 + %3907 = extractelement <4 x float> %3906, i32 0 + %3908 = getelementptr float, float* %1, i32 0 + %3909 = getelementptr inbounds float, float* %3908, i64 1 + %3910 = bitcast float* %3909 to i32* + %3911 = bitcast i32* %3910 to float* + store float %3907, float* %3911, align 4 + %3912 = extractelement <4 x float> %3906, i32 1 + %3913 = getelementptr float, float* %1, i32 0 + %3914 = getelementptr inbounds float, float* %3913, i64 2 + %3915 = bitcast float* %3914 to i32* + %3916 = bitcast i32* %3915 to float* + store float %3912, float* %3916, align 4 + ret void +} + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { +.preheader13: + %3 = bitcast float* %2 to i8* + %4 = bitcast float* %0 to i8* + %5 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false, i1 true, i1 false) + %6 = call i8* @__memcpy_chk(i8* %3, i8* %4, i64 16, i64 %5) #8 + %7 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 + %8 = bitcast i8* %7 to float* + store float 1.000000e+00, float* %8, align 4 + %9 = getelementptr inbounds i8, i8* %7, i64 8 + %10 = getelementptr inbounds i8, i8* %7, i64 12 + %11 = bitcast i8* %10 to float* + store float 1.000000e+00, float* %11, align 4 + %12 = bitcast float* %1 to i8* + %13 = call i64 @llvm.objectsize.i64.p0i8(i8* %12, i1 false, i1 true, i1 false) + %14 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %15 = bitcast i8* %14 to float* + %16 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %17 = bitcast i8* %16 to float* + %18 = bitcast float* %2 to i32* + %19 = load i32, i32* %18, align 4 + %20 = bitcast i8* %14 to i32* + store i32 %19, i32* %20, align 4 + %21 = bitcast i8* %7 to i32* + %22 = load i32, i32* %21, align 4 + %23 = bitcast i8* %16 to i32* + store i32 %22, i32* %23, align 4 + %24 = getelementptr inbounds float, float* %2, i64 2 + %25 = bitcast float* %24 to i32* + %26 = load i32, i32* %25, align 4 + %27 = getelementptr inbounds i8, i8* %14, i64 4 + %28 = bitcast i8* %27 to i32* + store i32 %26, i32* %28, align 4 + %29 = bitcast i8* %9 to i32* + %30 = load i32, i32* %29, align 4 + %31 = getelementptr inbounds i8, i8* %16, i64 4 + %32 = bitcast i8* %31 to i32* + store i32 %30, i32* %32, align 4 + %33 = load float, float* %15, align 4 + %34 = call float @no_opt_sgn(float %33) + %35 = fneg float %34 + %36 = call float @no_opt_naive_norm(float* nonnull %15, i32 2) + %37 = fmul float %36, %35 + %38 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %39 = bitcast i8* %38 to float* + %40 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %41 = load float, float* %15, align 4 + %42 = load float, float* %17, align 4 + %43 = fmul float %37, %42 + %44 = fadd float %41, %43 + store float %44, float* %39, align 4 + %45 = bitcast i8* %27 to float* + %46 = load float, float* %45, align 4 + %47 = bitcast i8* %31 to float* + %48 = load float, float* %47, align 4 + %49 = fmul float %37, %48 + %50 = fadd float %46, %49 + %51 = getelementptr inbounds i8, i8* %38, i64 4 + %52 = bitcast i8* %51 to float* + store float %50, float* %52, align 4 + %53 = bitcast i8* %40 to float* + %54 = call float @no_opt_naive_norm(float* nonnull %39, i32 2) + %55 = fadd float %54, 0x3EE4F8B580000000 + %56 = load float, float* %39, align 4 + %57 = fdiv float %56, %55 + store float %57, float* %53, align 4 + %58 = load float, float* %52, align 4 + %59 = fdiv float %58, %55 + %60 = getelementptr inbounds i8, i8* %40, i64 4 + %61 = bitcast i8* %60 to float* + store float %59, float* %61, align 4 + %62 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 + %63 = bitcast i8* %62 to float* + %64 = load float, float* %53, align 4 + %65 = fmul float %64, 2.000000e+00 + %66 = fmul float %65, %64 + %67 = fsub float 1.000000e+00, %66 + store float %67, float* %63, align 4 + %68 = load float, float* %53, align 4 + %69 = fmul float %68, 2.000000e+00 + %70 = load float, float* %61, align 4 + %71 = fmul float %69, %70 + %72 = fsub float 0.000000e+00, %71 + %73 = getelementptr inbounds i8, i8* %62, i64 4 + %74 = bitcast i8* %73 to float* + store float %72, float* %74, align 4 + %75 = load float, float* %61, align 4 + %76 = fmul float %75, 2.000000e+00 + %77 = load float, float* %53, align 4 + %78 = fmul float %76, %77 + %79 = fsub float 0.000000e+00, %78 + %80 = getelementptr inbounds i8, i8* %62, i64 8 + %81 = bitcast i8* %80 to float* + store float %79, float* %81, align 4 + %82 = load float, float* %61, align 4 + %83 = fmul float %82, 2.000000e+00 + %84 = fmul float %83, %82 + %85 = fsub float 1.000000e+00, %84 + %86 = getelementptr inbounds i8, i8* %62, i64 12 + %87 = bitcast i8* %86 to float* + store float %85, float* %87, align 4 + %88 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 + %89 = bitcast i8* %88 to float* + %90 = bitcast i8* %62 to i32* + %91 = load i32, i32* %90, align 4 + %92 = bitcast i8* %88 to i32* + store i32 %91, i32* %92, align 4 + %93 = bitcast i8* %73 to i32* + %94 = load i32, i32* %93, align 4 + %95 = getelementptr inbounds i8, i8* %88, i64 4 + %96 = bitcast i8* %95 to i32* + store i32 %94, i32* %96, align 4 + %97 = bitcast i8* %80 to i32* + %98 = load i32, i32* %97, align 4 + %99 = getelementptr inbounds i8, i8* %88, i64 8 + %100 = bitcast i8* %99 to i32* + store i32 %98, i32* %100, align 4 + %101 = bitcast i8* %86 to i32* + %102 = load i32, i32* %101, align 4 + %103 = getelementptr inbounds i8, i8* %88, i64 12 + %104 = bitcast i8* %103 to i32* + store i32 %102, i32* %104, align 4 + %105 = call i8* @__memcpy_chk(i8* %12, i8* %88, i64 16, i64 %13) #8 + call void @no_opt_naive_fixed_matrix_multiply(float* %89, float* %0, float* %2) + call void @free(i8* %14) + call void @free(i8* %16) + call void @free(i8* %38) + call void @free(i8* %40) + call void @free(i8* %62) + call void @free(i8* %88) + call void @no_opt_naive_fixed_transpose(float* %1) + ret void +} + +; Function Attrs: nounwind +declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 + +; Function Attrs: nounwind readnone speculatable willreturn +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 + +; Function Attrs: allocsize(0,1) +declare i8* @calloc(i64, i64) #5 + +declare void @free(i8*) #6 + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #1 { +.preheader6: + %0 = alloca i64, align 8 + %1 = alloca [4 x float], align 16 + %2 = alloca [4 x float], align 16 + %3 = alloca [4 x float], align 16 + %4 = alloca [4 x float], align 16 + %5 = alloca [4 x float], align 16 + %6 = call i64 @time(i64* null) #8 + store i64 %6, i64* %0, align 8 + %7 = call i64 @time(i64* nonnull %0) #8 + %8 = trunc i64 %7 to i32 + call void @srand(i32 %8) #8 + %9 = call i32 @rand() #8 + %10 = sitofp i32 %9 to float + %11 = fdiv float %10, 0x41747AE140000000 + %12 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 + store float %11, float* %12, align 16 + %13 = fpext float %11 to double + %14 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %13) #8 + %15 = call i32 @rand() #8 + %16 = sitofp i32 %15 to float + %17 = fdiv float %16, 0x41747AE140000000 + %18 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1 + store float %17, float* %18, align 4 + %19 = fpext float %17 to double + %20 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %19) #8 + %21 = call i32 @rand() #8 + %22 = sitofp i32 %21 to float + %23 = fdiv float %22, 0x41747AE140000000 + %24 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2 + store float %23, float* %24, align 8 + %25 = fpext float %23 to double + %26 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %25) #8 + %27 = call i32 @rand() #8 + %28 = sitofp i32 %27 to float + %29 = fdiv float %28, 0x41747AE140000000 + %30 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3 + store float %29, float* %30, align 4 + %31 = fpext float %29 to double + %32 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %31) #8 + %33 = bitcast [4 x float]* %2 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %33, i8 0, i64 16, i1 false) + %34 = bitcast [4 x float]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %34, i8 0, i64 16, i1 false) + %35 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 + %36 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 + call void @naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %35, float* nonnull %36) + %37 = bitcast [4 x float]* %4 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %37, i8 0, i64 16, i1 false) + %38 = bitcast [4 x float]* %5 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %38, i8 0, i64 16, i1 false) + %39 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 + %40 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 + call void @no_opt_naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %39, float* nonnull %40) + %41 = load float, float* %35, align 16 + %42 = fpext float %41 to double + %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %42) #8 + %44 = load float, float* %39, align 16 + %45 = fpext float %44 to double + %46 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %45) #8 + %47 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1 + %48 = load float, float* %47, align 4 + %49 = fpext float %48 to double + %50 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %49) #8 + %51 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 + %52 = load float, float* %51, align 4 + %53 = fpext float %52 to double + %54 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %53) #8 + %55 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2 + %56 = load float, float* %55, align 8 + %57 = fpext float %56 to double + %58 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %57) #8 + %59 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 + %60 = load float, float* %59, align 8 + %61 = fpext float %60 to double + %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %61) #8 + %63 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3 + %64 = load float, float* %63, align 4 + %65 = fpext float %64 to double + %66 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %65) #8 + %67 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 + %68 = load float, float* %67, align 4 + %69 = fpext float %68 to double + %70 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %69) #8 + %71 = load float, float* %36, align 16 + %72 = fpext float %71 to double + %73 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %72) #8 + %74 = load float, float* %40, align 16 + %75 = fpext float %74 to double + %76 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %75) #8 + %77 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1 + %78 = load float, float* %77, align 4 + %79 = fpext float %78 to double + %80 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %79) #8 + %81 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 1 + %82 = load float, float* %81, align 4 + %83 = fpext float %82 to double + %84 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %83) #8 + %85 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2 + %86 = load float, float* %85, align 8 + %87 = fpext float %86 to double + %88 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %87) #8 + %89 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 2 + %90 = load float, float* %89, align 8 + %91 = fpext float %90 to double + %92 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %91) #8 + %93 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3 + %94 = load float, float* %93, align 4 + %95 = fpext float %94 to double + %96 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %95) #8 + %97 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 3 + %98 = load float, float* %97, align 4 + %99 = fpext float %98 to double + %100 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %99) #8 + ret i32 0 +} + +declare i64 @time(i64*) #6 + +declare void @srand(i32) #6 + +declare i32 @rand() #6 + +declare i32 @printf(i8*, ...) #6 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #7 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #2 + +attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone speculatable willreturn } +attributes #3 = { argmemonly nounwind willreturn writeonly } +attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #7 = { argmemonly nounwind willreturn } +attributes #8 = { nounwind } +attributes #9 = { nounwind allocsize(0,1) } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} +!3 = distinct !{!3, !4} +!4 = !{!"llvm.loop.unroll.disable"} +!5 = distinct !{!5, !4} diff --git a/src/dios-egraphs/Diospyros/failed-test/diospyros.ll b/src/dios-egraphs/Diospyros/failed-test/diospyros.ll new file mode 100644 index 00000000..548f014e --- /dev/null +++ b/src/dios-egraphs/Diospyros/failed-test/diospyros.ll @@ -0,0 +1,5489 @@ +; ModuleID = 'build/aa.ll' +source_filename = "fail-tests/qr-decomp-local-arrays.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1 +@.str.1 = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 +@.str.2 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 +@.str.3 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 +@.str.4 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @sgn(float %0) #0 { + %2 = fcmp ogt float %0, 0.000000e+00 + %3 = zext i1 %2 to i32 + %4 = fcmp olt float %0, 0.000000e+00 + %.neg = sext i1 %4 to i32 + %5 = add nsw i32 %.neg, %3 + %6 = sitofp i32 %5 to float + ret float %6 +} + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_sgn(float %0) #1 { + %2 = fcmp ogt float %0, 0.000000e+00 + %3 = zext i1 %2 to i32 + %4 = fcmp olt float %0, 0.000000e+00 + %.neg = sext i1 %4 to i32 + %5 = add nsw i32 %.neg, %3 + %6 = sitofp i32 %5 to float + ret float %6 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @naive_norm(float* %0, i32 %1) #0 { + %3 = icmp sgt i32 %1, 0 + %smax = select i1 %3, i32 %1, i32 0 + %wide.trip.count = zext i32 %smax to i64 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %2 + %4 = add nsw i64 %wide.trip.count, -1 + %xtraiter = and i64 %wide.trip.count, 3 + %5 = icmp ult i64 %4, 3 + br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count, 2147483644 + br label %6 + +6: ; preds = %6, %.lr.ph.new + %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] + %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 + %8 = load float, float* %7, align 4 + %9 = fmul float %8, %8 + %10 = fadd float %.013, %9 + %indvars.iv.next = or i64 %indvars.iv2, 1 + %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next + %12 = load float, float* %11, align 4 + %13 = fmul float %12, %12 + %14 = fadd float %10, %13 + %indvars.iv.next.1 = or i64 %indvars.iv2, 2 + %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 + %16 = load float, float* %15, align 4 + %17 = fmul float %16, %16 + %18 = fadd float %14, %17 + %indvars.iv.next.2 = or i64 %indvars.iv2, 3 + %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 + %20 = load float, float* %19, align 4 + %21 = fmul float %20, %20 + %22 = fadd float %18, %21 + %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 + %niter.nsub.3 = add i64 %niter, -4 + %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 + br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 + +._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] + %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] + %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] + %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil + %24 = load float, float* %23, align 4 + %25 = fmul float %24, %24 + %26 = fadd float %.013.epil, %25 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !3 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 + %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] + %27 = call float @llvm.sqrt.f32(float %.01.lcssa) + ret float %27 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32(float) #2 + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_naive_norm(float* %0, i32 %1) #1 { + %3 = icmp sgt i32 %1, 0 + %smax = select i1 %3, i32 %1, i32 0 + %wide.trip.count = zext i32 %smax to i64 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %2 + %4 = add nsw i64 %wide.trip.count, -1 + %xtraiter = and i64 %wide.trip.count, 3 + %5 = icmp ult i64 %4, 3 + br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count, 2147483644 + br label %6 + +6: ; preds = %6, %.lr.ph.new + %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] + %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 + %8 = load float, float* %7, align 4 + %9 = fmul float %8, %8 + %10 = fadd float %.013, %9 + %indvars.iv.next = or i64 %indvars.iv2, 1 + %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next + %12 = load float, float* %11, align 4 + %13 = fmul float %12, %12 + %14 = fadd float %10, %13 + %indvars.iv.next.1 = or i64 %indvars.iv2, 2 + %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 + %16 = load float, float* %15, align 4 + %17 = fmul float %16, %16 + %18 = fadd float %14, %17 + %indvars.iv.next.2 = or i64 %indvars.iv2, 3 + %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 + %20 = load float, float* %19, align 4 + %21 = fmul float %20, %20 + %22 = fadd float %18, %21 + %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 + %niter.nsub.3 = add i64 %niter, -4 + %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 + br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 + +._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] + %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] + %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] + %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil + %24 = load float, float* %23, align 4 + %25 = fmul float %24, %24 + %26 = fadd float %.013.epil, %25 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 + %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] + %27 = call float @llvm.sqrt.f32(float %.01.lcssa) + ret float %27 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_transpose(float* %0) #0 { +.lr.ph: + %1 = getelementptr inbounds float, float* %0, i64 1 + %2 = bitcast float* %1 to i32* + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds float, float* %0, i64 2 + %5 = bitcast float* %4 to i32* + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %2, align 4 + store i32 %3, i32* %5, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_transpose(float* %0) #1 { +.lr.ph: + %1 = getelementptr inbounds float, float* %0, i64 1 + %2 = bitcast float* %1 to i32* + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds float, float* %0, i64 2 + %5 = bitcast float* %4 to i32* + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %2, align 4 + store i32 %3, i32* %5, align 4 + ret void +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { +.preheader: + %3 = load float, float* %0, align 4 + %4 = load float, float* %1, align 4 + %5 = fmul float %3, %4 + %6 = fadd float %5, 0.000000e+00 + %7 = getelementptr inbounds float, float* %0, i64 1 + %8 = load float, float* %7, align 4 + %9 = getelementptr inbounds float, float* %1, i64 2 + %10 = load float, float* %9, align 4 + %11 = fmul float %8, %10 + %12 = fadd float %6, %11 + %13 = getelementptr inbounds float, float* %2, i64 1 + %14 = load float, float* %0, align 4 + %15 = getelementptr inbounds float, float* %1, i64 1 + %16 = load float, float* %15, align 4 + %17 = fmul float %14, %16 + %18 = fadd float %17, 0.000000e+00 + %19 = load float, float* %7, align 4 + %20 = getelementptr inbounds float, float* %1, i64 3 + %21 = load float, float* %20, align 4 + %22 = fmul float %19, %21 + %23 = fadd float %18, %22 + %24 = getelementptr inbounds float, float* %0, i64 2 + %25 = getelementptr inbounds float, float* %2, i64 2 + %26 = load float, float* %24, align 4 + %27 = load float, float* %1, align 4 + %28 = fmul float %26, %27 + %29 = fadd float %28, 0.000000e+00 + %30 = getelementptr inbounds float, float* %0, i64 3 + %31 = load float, float* %30, align 4 + %32 = load float, float* %9, align 4 + %33 = fmul float %31, %32 + %34 = fadd float %29, %33 + %35 = getelementptr inbounds float, float* %2, i64 3 + %36 = load float, float* %24, align 4 + %37 = load float, float* %15, align 4 + %38 = fmul float %36, %37 + %39 = fadd float %38, 0.000000e+00 + %40 = load float, float* %30, align 4 + %41 = load float, float* %20, align 4 + %42 = fmul float %40, %41 + %43 = fadd float %39, %42 + store float 0.000000e+00, float* %2, align 4 + %44 = getelementptr float, float* %0, i32 0 + %45 = load float, float* %44, align 4 + %46 = insertelement <4 x float> zeroinitializer, float %45, i32 0 + %47 = insertelement <4 x float> %46, float 0.000000e+00, i32 1 + %48 = insertelement <4 x float> %47, float 0.000000e+00, i32 2 + %49 = insertelement <4 x float> %48, float 0.000000e+00, i32 3 + %50 = getelementptr float, float* %1, i32 0 + %51 = load float, float* %50, align 4 + %52 = insertelement <4 x float> zeroinitializer, float %51, i32 0 + %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 1 + %54 = insertelement <4 x float> %53, float 0.000000e+00, i32 2 + %55 = insertelement <4 x float> %54, float 0.000000e+00, i32 3 + %56 = call <4 x float> @llvm.fma.f32(<4 x float> %49, <4 x float> %55, <4 x float> zeroinitializer) + %57 = extractelement <4 x float> %56, i32 0 + store float %57, float* %2, align 4 + %58 = getelementptr float, float* %0, i32 0 + %59 = load float, float* %58, align 4 + %60 = insertelement <4 x float> zeroinitializer, float %59, i32 0 + %61 = insertelement <4 x float> %60, float 1.000000e+00, i32 1 + %62 = insertelement <4 x float> %61, float 1.000000e+00, i32 2 + %63 = insertelement <4 x float> %62, float 1.000000e+00, i32 3 + %64 = insertelement <4 x float> zeroinitializer, float %51, i32 0 + %65 = insertelement <4 x float> %64, float 0.000000e+00, i32 1 + %66 = insertelement <4 x float> %65, float 0.000000e+00, i32 2 + %67 = insertelement <4 x float> %66, float 0.000000e+00, i32 3 + %68 = fmul <4 x float> %63, %67 + %69 = fadd <4 x float> %68, zeroinitializer + %70 = getelementptr float, float* %0, i32 0 + %71 = getelementptr inbounds float, float* %70, i64 1 + %72 = load float, float* %71, align 4 + %73 = insertelement <4 x float> zeroinitializer, float %72, i32 0 + %74 = insertelement <4 x float> %73, float 0.000000e+00, i32 1 + %75 = insertelement <4 x float> %74, float 0.000000e+00, i32 2 + %76 = insertelement <4 x float> %75, float 0.000000e+00, i32 3 + %77 = getelementptr float, float* %1, i32 0 + %78 = getelementptr inbounds float, float* %77, i64 2 + %79 = load float, float* %78, align 4 + %80 = insertelement <4 x float> zeroinitializer, float %79, i32 0 + %81 = insertelement <4 x float> %80, float 0.000000e+00, i32 1 + %82 = insertelement <4 x float> %81, float 0.000000e+00, i32 2 + %83 = insertelement <4 x float> %82, float 0.000000e+00, i32 3 + %84 = call <4 x float> @llvm.fma.f32.1(<4 x float> %76, <4 x float> %83, <4 x float> %69) + %85 = extractelement <4 x float> %84, i32 0 + store float %85, float* %2, align 4 + %86 = extractelement <4 x float> %84, i32 1 + %87 = getelementptr float, float* %2, i32 0 + %88 = getelementptr inbounds float, float* %87, i64 1 + store float %86, float* %88, align 4 + %89 = getelementptr float, float* %0, i32 0 + %90 = load float, float* %89, align 4 + %91 = insertelement <4 x float> zeroinitializer, float %90, i32 0 + %92 = insertelement <4 x float> %91, float 0.000000e+00, i32 1 + %93 = insertelement <4 x float> %92, float 0.000000e+00, i32 2 + %94 = insertelement <4 x float> %93, float 0.000000e+00, i32 3 + %95 = getelementptr float, float* %1, i32 0 + %96 = getelementptr inbounds float, float* %95, i64 1 + %97 = load float, float* %96, align 4 + %98 = insertelement <4 x float> zeroinitializer, float %97, i32 0 + %99 = insertelement <4 x float> %98, float 0.000000e+00, i32 1 + %100 = insertelement <4 x float> %99, float 0.000000e+00, i32 2 + %101 = insertelement <4 x float> %100, float 0.000000e+00, i32 3 + %102 = call <4 x float> @llvm.fma.f32.2(<4 x float> %94, <4 x float> %101, <4 x float> zeroinitializer) + %103 = extractelement <4 x float> %102, i32 0 + %104 = getelementptr float, float* %2, i32 0 + %105 = getelementptr inbounds float, float* %104, i64 1 + store float %103, float* %105, align 4 + %106 = insertelement <4 x float> zeroinitializer, float %90, i32 0 + %107 = insertelement <4 x float> %106, float 1.000000e+00, i32 1 + %108 = insertelement <4 x float> %107, float 1.000000e+00, i32 2 + %109 = insertelement <4 x float> %108, float 1.000000e+00, i32 3 + %110 = insertelement <4 x float> zeroinitializer, float %97, i32 0 + %111 = insertelement <4 x float> %110, float 0.000000e+00, i32 1 + %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 2 + %113 = insertelement <4 x float> %112, float 0.000000e+00, i32 3 + %114 = fmul <4 x float> %109, %113 + %115 = fadd <4 x float> %114, zeroinitializer + %116 = getelementptr float, float* %0, i32 0 + %117 = getelementptr inbounds float, float* %116, i64 1 + %118 = load float, float* %117, align 4 + %119 = insertelement <4 x float> zeroinitializer, float %118, i32 0 + %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 1 + %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 2 + %122 = insertelement <4 x float> %121, float 0.000000e+00, i32 3 + %123 = getelementptr float, float* %1, i32 0 + %124 = getelementptr inbounds float, float* %123, i64 3 + %125 = load float, float* %124, align 4 + %126 = insertelement <4 x float> zeroinitializer, float %125, i32 0 + %127 = insertelement <4 x float> %126, float 0.000000e+00, i32 1 + %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 2 + %129 = insertelement <4 x float> %128, float 0.000000e+00, i32 3 + %130 = call <4 x float> @llvm.fma.f32.3(<4 x float> %122, <4 x float> %129, <4 x float> %115) + %131 = extractelement <4 x float> %130, i32 0 + %132 = getelementptr float, float* %2, i32 0 + %133 = getelementptr inbounds float, float* %132, i64 1 + store float %131, float* %133, align 4 + %134 = extractelement <4 x float> %130, i32 1 + %135 = getelementptr float, float* %2, i32 0 + %136 = getelementptr inbounds float, float* %135, i64 2 + store float %134, float* %136, align 4 + %137 = getelementptr float, float* %0, i32 0 + %138 = getelementptr inbounds float, float* %137, i64 2 + %139 = load float, float* %138, align 4 + %140 = insertelement <4 x float> zeroinitializer, float %139, i32 0 + %141 = insertelement <4 x float> %140, float 0.000000e+00, i32 1 + %142 = insertelement <4 x float> %141, float 0.000000e+00, i32 2 + %143 = insertelement <4 x float> %142, float 0.000000e+00, i32 3 + %144 = getelementptr float, float* %1, i32 0 + %145 = load float, float* %144, align 4 + %146 = insertelement <4 x float> zeroinitializer, float %145, i32 0 + %147 = insertelement <4 x float> %146, float 0.000000e+00, i32 1 + %148 = insertelement <4 x float> %147, float 0.000000e+00, i32 2 + %149 = insertelement <4 x float> %148, float 0.000000e+00, i32 3 + %150 = call <4 x float> @llvm.fma.f32.4(<4 x float> %143, <4 x float> %149, <4 x float> zeroinitializer) + %151 = extractelement <4 x float> %150, i32 0 + %152 = getelementptr float, float* %2, i32 0 + %153 = getelementptr inbounds float, float* %152, i64 2 + store float %151, float* %153, align 4 + %154 = insertelement <4 x float> zeroinitializer, float %139, i32 0 + %155 = insertelement <4 x float> %154, float 1.000000e+00, i32 1 + %156 = insertelement <4 x float> %155, float 1.000000e+00, i32 2 + %157 = insertelement <4 x float> %156, float 1.000000e+00, i32 3 + %158 = insertelement <4 x float> zeroinitializer, float %145, i32 0 + %159 = insertelement <4 x float> %158, float 0.000000e+00, i32 1 + %160 = insertelement <4 x float> %159, float 0.000000e+00, i32 2 + %161 = insertelement <4 x float> %160, float 0.000000e+00, i32 3 + %162 = fmul <4 x float> %157, %161 + %163 = fadd <4 x float> %162, zeroinitializer + %164 = getelementptr float, float* %0, i32 0 + %165 = getelementptr inbounds float, float* %164, i64 3 + %166 = load float, float* %165, align 4 + %167 = insertelement <4 x float> zeroinitializer, float %166, i32 0 + %168 = insertelement <4 x float> %167, float 0.000000e+00, i32 1 + %169 = insertelement <4 x float> %168, float 0.000000e+00, i32 2 + %170 = insertelement <4 x float> %169, float 0.000000e+00, i32 3 + %171 = getelementptr float, float* %1, i32 0 + %172 = getelementptr inbounds float, float* %171, i64 2 + %173 = load float, float* %172, align 4 + %174 = insertelement <4 x float> zeroinitializer, float %173, i32 0 + %175 = insertelement <4 x float> %174, float 0.000000e+00, i32 1 + %176 = insertelement <4 x float> %175, float 0.000000e+00, i32 2 + %177 = insertelement <4 x float> %176, float 0.000000e+00, i32 3 + %178 = call <4 x float> @llvm.fma.f32.5(<4 x float> %170, <4 x float> %177, <4 x float> %163) + %179 = extractelement <4 x float> %178, i32 0 + %180 = getelementptr float, float* %2, i32 0 + %181 = getelementptr inbounds float, float* %180, i64 2 + store float %179, float* %181, align 4 + %182 = extractelement <4 x float> %178, i32 1 + %183 = getelementptr float, float* %2, i32 0 + %184 = getelementptr inbounds float, float* %183, i64 3 + store float %182, float* %184, align 4 + %185 = getelementptr float, float* %0, i32 0 + %186 = getelementptr inbounds float, float* %185, i64 2 + %187 = load float, float* %186, align 4 + %188 = insertelement <4 x float> zeroinitializer, float %187, i32 0 + %189 = insertelement <4 x float> %188, float 0.000000e+00, i32 1 + %190 = insertelement <4 x float> %189, float 0.000000e+00, i32 2 + %191 = insertelement <4 x float> %190, float 0.000000e+00, i32 3 + %192 = getelementptr float, float* %1, i32 0 + %193 = getelementptr inbounds float, float* %192, i64 1 + %194 = load float, float* %193, align 4 + %195 = insertelement <4 x float> zeroinitializer, float %194, i32 0 + %196 = insertelement <4 x float> %195, float 0.000000e+00, i32 1 + %197 = insertelement <4 x float> %196, float 0.000000e+00, i32 2 + %198 = insertelement <4 x float> %197, float 0.000000e+00, i32 3 + %199 = call <4 x float> @llvm.fma.f32.6(<4 x float> %191, <4 x float> %198, <4 x float> zeroinitializer) + %200 = extractelement <4 x float> %199, i32 0 + %201 = getelementptr float, float* %2, i32 0 + %202 = getelementptr inbounds float, float* %201, i64 3 + store float %200, float* %202, align 4 + %203 = insertelement <4 x float> zeroinitializer, float %187, i32 0 + %204 = insertelement <4 x float> %203, float 1.000000e+00, i32 1 + %205 = insertelement <4 x float> %204, float 1.000000e+00, i32 2 + %206 = insertelement <4 x float> %205, float 1.000000e+00, i32 3 + %207 = insertelement <4 x float> zeroinitializer, float %194, i32 0 + %208 = insertelement <4 x float> %207, float 0.000000e+00, i32 1 + %209 = insertelement <4 x float> %208, float 0.000000e+00, i32 2 + %210 = insertelement <4 x float> %209, float 0.000000e+00, i32 3 + %211 = fmul <4 x float> %206, %210 + %212 = fadd <4 x float> %211, zeroinitializer + %213 = getelementptr float, float* %0, i32 0 + %214 = getelementptr inbounds float, float* %213, i64 3 + %215 = load float, float* %214, align 4 + %216 = insertelement <4 x float> zeroinitializer, float %215, i32 0 + %217 = insertelement <4 x float> %216, float 0.000000e+00, i32 1 + %218 = insertelement <4 x float> %217, float 0.000000e+00, i32 2 + %219 = insertelement <4 x float> %218, float 0.000000e+00, i32 3 + %220 = getelementptr float, float* %1, i32 0 + %221 = getelementptr inbounds float, float* %220, i64 3 + %222 = load float, float* %221, align 4 + %223 = insertelement <4 x float> zeroinitializer, float %222, i32 0 + %224 = insertelement <4 x float> %223, float 0.000000e+00, i32 1 + %225 = insertelement <4 x float> %224, float 0.000000e+00, i32 2 + %226 = insertelement <4 x float> %225, float 0.000000e+00, i32 3 + %227 = call <4 x float> @llvm.fma.f32.7(<4 x float> %219, <4 x float> %226, <4 x float> %212) + %228 = extractelement <4 x float> %227, i32 0 + %229 = getelementptr float, float* %2, i32 0 + %230 = getelementptr inbounds float, float* %229, i64 3 + store float %228, float* %230, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { +.preheader: + store float 0.000000e+00, float* %2, align 4 + %3 = load float, float* %0, align 4 + %4 = load float, float* %1, align 4 + %5 = fmul float %3, %4 + %6 = fadd float %5, 0.000000e+00 + store float %6, float* %2, align 4 + %7 = getelementptr inbounds float, float* %0, i64 1 + %8 = load float, float* %7, align 4 + %9 = getelementptr inbounds float, float* %1, i64 2 + %10 = load float, float* %9, align 4 + %11 = fmul float %8, %10 + %12 = fadd float %6, %11 + store float %12, float* %2, align 4 + %13 = getelementptr inbounds float, float* %2, i64 1 + store float 0.000000e+00, float* %13, align 4 + %14 = load float, float* %0, align 4 + %15 = getelementptr inbounds float, float* %1, i64 1 + %16 = load float, float* %15, align 4 + %17 = fmul float %14, %16 + %18 = fadd float %17, 0.000000e+00 + store float %18, float* %13, align 4 + %19 = load float, float* %7, align 4 + %20 = getelementptr inbounds float, float* %1, i64 3 + %21 = load float, float* %20, align 4 + %22 = fmul float %19, %21 + %23 = fadd float %18, %22 + store float %23, float* %13, align 4 + %24 = getelementptr inbounds float, float* %0, i64 2 + %25 = getelementptr inbounds float, float* %2, i64 2 + store float 0.000000e+00, float* %25, align 4 + %26 = load float, float* %24, align 4 + %27 = load float, float* %1, align 4 + %28 = fmul float %26, %27 + %29 = fadd float %28, 0.000000e+00 + store float %29, float* %25, align 4 + %30 = getelementptr inbounds float, float* %0, i64 3 + %31 = load float, float* %30, align 4 + %32 = load float, float* %9, align 4 + %33 = fmul float %31, %32 + %34 = fadd float %29, %33 + store float %34, float* %25, align 4 + %35 = getelementptr inbounds float, float* %2, i64 3 + store float 0.000000e+00, float* %35, align 4 + %36 = load float, float* %24, align 4 + %37 = load float, float* %15, align 4 + %38 = fmul float %36, %37 + %39 = fadd float %38, 0.000000e+00 + store float %39, float* %35, align 4 + %40 = load float, float* %30, align 4 + %41 = load float, float* %20, align 4 + %42 = fmul float %40, %41 + %43 = fadd float %39, %42 + store float %43, float* %35, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { +.preheader49: + %3 = bitcast float* %1 to i8* + %4 = alloca [4 x float], align 16 + %5 = bitcast [4 x float]* %4 to i8* + %6 = bitcast float* %0 to i32* + %7 = load i32, i32* %6, align 4 + %8 = bitcast float* %2 to i32* + %9 = getelementptr inbounds float, float* %0, i64 1 + %10 = bitcast float* %9 to i32* + %11 = load i32, i32* %10, align 4 + %12 = getelementptr inbounds float, float* %2, i64 1 + %13 = bitcast float* %12 to i32* + %14 = getelementptr inbounds float, float* %0, i64 2 + %15 = bitcast float* %14 to i32* + %16 = load i32, i32* %15, align 4 + %17 = getelementptr inbounds float, float* %2, i64 2 + %18 = bitcast float* %17 to i32* + %19 = getelementptr inbounds float, float* %0, i64 3 + %20 = bitcast float* %19 to i32* + %21 = load i32, i32* %20, align 4 + %22 = getelementptr inbounds float, float* %2, i64 3 + %23 = bitcast float* %22 to i32* + %24 = bitcast i32 %7 to float + %25 = fcmp ogt float %24, 0.000000e+00 + %26 = zext i1 %25 to i32 + %27 = fcmp olt float %24, 0.000000e+00 + %.neg = sext i1 %27 to i32 + %28 = add nsw i32 %.neg, %26 + %29 = sitofp i32 %28 to float + %30 = fmul float %24, %24 + %31 = fadd float %30, 0.000000e+00 + %32 = bitcast i32 %16 to float + %33 = fmul float %32, %32 + %34 = fadd float %31, %33 + %35 = call float @llvm.sqrt.f32(float %34) #8 + %36 = fneg float %29 + %37 = fmul float %35, %36 + %38 = fadd float %24, %37 + %39 = fmul float %37, 0.000000e+00 + %40 = fadd float %32, %39 + %41 = fmul float %38, %38 + %42 = fadd float %41, 0.000000e+00 + %43 = fmul float %40, %40 + %44 = fadd float %42, %43 + %45 = call float @llvm.sqrt.f32(float %44) #8 + %46 = fadd float %45, 0x3EE4F8B580000000 + %47 = fdiv float %38, %46 + %48 = fdiv float %40, %46 + %49 = fmul float %47, 2.000000e+00 + %50 = fmul float %49, %47 + %51 = fsub float 1.000000e+00, %50 + %52 = fmul float %49, %48 + %53 = fsub float 0.000000e+00, %52 + %54 = fmul float %48, 2.000000e+00 + %55 = fmul float %54, %47 + %56 = fsub float 0.000000e+00, %55 + %57 = fmul float %54, %48 + %58 = fsub float 1.000000e+00, %57 + %59 = bitcast float %51 to i32 + %60 = bitcast [4 x float]* %4 to i32* + %61 = bitcast float %53 to i32 + %62 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 + %63 = bitcast float* %62 to i32* + %64 = bitcast float %56 to i32 + %65 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 + %66 = bitcast float* %65 to i32* + %67 = bitcast float %58 to i32 + %68 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 + %69 = bitcast float* %68 to i32* + %70 = load float, float* %0, align 4 + %71 = fmul float %51, %70 + %72 = fadd float %71, 0.000000e+00 + %73 = load float, float* %14, align 4 + %74 = fmul float %53, %73 + %75 = fadd float %72, %74 + %76 = load float, float* %9, align 4 + %77 = fmul float %51, %76 + %78 = fadd float %77, 0.000000e+00 + %79 = load float, float* %19, align 4 + %80 = fmul float %53, %79 + %81 = fadd float %78, %80 + %82 = load float, float* %0, align 4 + %83 = fmul float %56, %82 + %84 = fadd float %83, 0.000000e+00 + %85 = load float, float* %14, align 4 + %86 = fmul float %58, %85 + %87 = fadd float %84, %86 + %88 = load float, float* %9, align 4 + %89 = fmul float %56, %88 + %90 = fadd float %89, 0.000000e+00 + %91 = load float, float* %19, align 4 + %92 = fmul float %58, %91 + %93 = fadd float %90, %92 + %94 = getelementptr inbounds float, float* %1, i64 1 + %95 = bitcast float* %94 to i32* + %96 = load i32, i32* %95, align 4 + %97 = getelementptr inbounds float, float* %1, i64 2 + %98 = bitcast float* %97 to i32* + %99 = load i32, i32* %98, align 4 + %100 = getelementptr float, float* %0, i32 0 + %101 = bitcast float* %100 to i32* + %102 = load i32, i32* %101, align 4 + %103 = bitcast i32 %102 to float + %104 = insertelement <4 x float> zeroinitializer, float %103, i32 0 + %105 = insertelement <4 x float> %104, float 0.000000e+00, i32 1 + %106 = insertelement <4 x float> %105, float 0.000000e+00, i32 2 + %107 = insertelement <4 x float> %106, float 0.000000e+00, i32 3 + %108 = extractelement <4 x float> %107, i32 0 + %109 = bitcast i32* %8 to float* + %110 = getelementptr float, float* %2, i32 0 + %111 = bitcast float* %110 to i32* + %112 = bitcast i32* %111 to float* + store float %108, float* %112, align 4 + %113 = getelementptr float, float* %0, i32 0 + %114 = getelementptr inbounds float, float* %113, i64 1 + %115 = bitcast float* %114 to i32* + %116 = load i32, i32* %115, align 4 + %117 = bitcast i32 %116 to float + %118 = insertelement <4 x float> zeroinitializer, float %117, i32 0 + %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 1 + %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 2 + %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 3 + %122 = extractelement <4 x float> %121, i32 0 + %123 = bitcast i32* %13 to float* + %124 = getelementptr float, float* %2, i32 0 + %125 = getelementptr inbounds float, float* %124, i64 1 + %126 = bitcast float* %125 to i32* + %127 = bitcast i32* %126 to float* + store float %122, float* %127, align 4 + %128 = getelementptr float, float* %0, i32 0 + %129 = getelementptr inbounds float, float* %128, i64 2 + %130 = bitcast float* %129 to i32* + %131 = load i32, i32* %130, align 4 + %132 = bitcast i32 %131 to float + %133 = insertelement <4 x float> zeroinitializer, float %132, i32 0 + %134 = insertelement <4 x float> %133, float 0.000000e+00, i32 1 + %135 = insertelement <4 x float> %134, float 0.000000e+00, i32 2 + %136 = insertelement <4 x float> %135, float 0.000000e+00, i32 3 + %137 = extractelement <4 x float> %136, i32 0 + %138 = bitcast i32* %18 to float* + %139 = getelementptr float, float* %2, i32 0 + %140 = getelementptr inbounds float, float* %139, i64 2 + %141 = bitcast float* %140 to i32* + %142 = bitcast i32* %141 to float* + store float %137, float* %142, align 4 + %143 = getelementptr float, float* %0, i32 0 + %144 = getelementptr inbounds float, float* %143, i64 3 + %145 = bitcast float* %144 to i32* + %146 = load i32, i32* %145, align 4 + %147 = bitcast i32 %146 to float + %148 = fneg float %147 + %149 = insertelement <4 x float> zeroinitializer, float %148, i32 0 + %150 = bitcast i32 %102 to float + %151 = bitcast i32 %102 to float + %152 = fmul float %150, %151 + %153 = fadd float %152, 0.000000e+00 + %154 = bitcast i32 %131 to float + %155 = bitcast i32 %131 to float + %156 = fmul float %154, %155 + %157 = fadd float %153, %156 + %158 = call float @llvm.sqrt.f32.8(float %157) + %159 = bitcast i32 %102 to float + %160 = fcmp olt float %159, 0.000000e+00 + %161 = sext i1 %160 to i32 + %162 = bitcast i32 %102 to float + %163 = fcmp ogt float %162, 0.000000e+00 + %164 = zext i1 %163 to i32 + %165 = add nsw i32 %161, %164 + %166 = sitofp i32 %165 to float + %167 = fneg float %166 + %168 = fmul float %158, %167 + %169 = bitcast i32 %102 to float + %170 = fadd float %169, %168 + %171 = bitcast i32 %102 to float + %172 = bitcast i32 %102 to float + %173 = fmul float %171, %172 + %174 = fadd float %173, 0.000000e+00 + %175 = bitcast i32 %131 to float + %176 = bitcast i32 %131 to float + %177 = fmul float %175, %176 + %178 = fadd float %174, %177 + %179 = call float @llvm.sqrt.f32.9(float %178) + %180 = bitcast i32 %102 to float + %181 = fcmp olt float %180, 0.000000e+00 + %182 = sext i1 %181 to i32 + %183 = bitcast i32 %102 to float + %184 = fcmp ogt float %183, 0.000000e+00 + %185 = zext i1 %184 to i32 + %186 = add nsw i32 %182, %185 + %187 = sitofp i32 %186 to float + %188 = fneg float %187 + %189 = fmul float %179, %188 + %190 = bitcast i32 %102 to float + %191 = fadd float %190, %189 + %192 = bitcast i32 %102 to float + %193 = bitcast i32 %102 to float + %194 = fmul float %192, %193 + %195 = fadd float %194, 0.000000e+00 + %196 = bitcast i32 %131 to float + %197 = bitcast i32 %131 to float + %198 = fmul float %196, %197 + %199 = fadd float %195, %198 + %200 = call float @llvm.sqrt.f32.10(float %199) + %201 = bitcast i32 %102 to float + %202 = fcmp olt float %201, 0.000000e+00 + %203 = sext i1 %202 to i32 + %204 = bitcast i32 %102 to float + %205 = fcmp ogt float %204, 0.000000e+00 + %206 = zext i1 %205 to i32 + %207 = add nsw i32 %203, %206 + %208 = sitofp i32 %207 to float + %209 = fneg float %208 + %210 = fmul float %200, %209 + %211 = bitcast i32 %102 to float + %212 = fadd float %211, %210 + %213 = fmul float %191, %212 + %214 = fadd float %213, 0.000000e+00 + %215 = bitcast i32 %102 to float + %216 = bitcast i32 %102 to float + %217 = fmul float %215, %216 + %218 = fadd float %217, 0.000000e+00 + %219 = bitcast i32 %131 to float + %220 = bitcast i32 %131 to float + %221 = fmul float %219, %220 + %222 = fadd float %218, %221 + %223 = call float @llvm.sqrt.f32.11(float %222) + %224 = bitcast i32 %102 to float + %225 = fcmp olt float %224, 0.000000e+00 + %226 = sext i1 %225 to i32 + %227 = bitcast i32 %102 to float + %228 = fcmp ogt float %227, 0.000000e+00 + %229 = zext i1 %228 to i32 + %230 = add nsw i32 %226, %229 + %231 = sitofp i32 %230 to float + %232 = fneg float %231 + %233 = fmul float %223, %232 + %234 = fmul float %233, 0.000000e+00 + %235 = bitcast i32 %131 to float + %236 = fadd float %235, %234 + %237 = bitcast i32 %102 to float + %238 = bitcast i32 %102 to float + %239 = fmul float %237, %238 + %240 = fadd float %239, 0.000000e+00 + %241 = bitcast i32 %131 to float + %242 = bitcast i32 %131 to float + %243 = fmul float %241, %242 + %244 = fadd float %240, %243 + %245 = call float @llvm.sqrt.f32.12(float %244) + %246 = bitcast i32 %102 to float + %247 = fcmp olt float %246, 0.000000e+00 + %248 = sext i1 %247 to i32 + %249 = bitcast i32 %102 to float + %250 = fcmp ogt float %249, 0.000000e+00 + %251 = zext i1 %250 to i32 + %252 = add nsw i32 %248, %251 + %253 = sitofp i32 %252 to float + %254 = fneg float %253 + %255 = fmul float %245, %254 + %256 = fmul float %255, 0.000000e+00 + %257 = bitcast i32 %131 to float + %258 = fadd float %257, %256 + %259 = fmul float %236, %258 + %260 = fadd float %214, %259 + %261 = call float @llvm.sqrt.f32.13(float %260) + %262 = fadd float %261, 0.000000e+00 + %263 = fdiv float %170, %262 + %264 = fmul float %263, 2.000000e+00 + %265 = bitcast i32 %102 to float + %266 = bitcast i32 %102 to float + %267 = fmul float %265, %266 + %268 = fadd float %267, 0.000000e+00 + %269 = bitcast i32 %131 to float + %270 = bitcast i32 %131 to float + %271 = fmul float %269, %270 + %272 = fadd float %268, %271 + %273 = call float @llvm.sqrt.f32.14(float %272) + %274 = bitcast i32 %102 to float + %275 = fcmp olt float %274, 0.000000e+00 + %276 = sext i1 %275 to i32 + %277 = bitcast i32 %102 to float + %278 = fcmp ogt float %277, 0.000000e+00 + %279 = zext i1 %278 to i32 + %280 = add nsw i32 %276, %279 + %281 = sitofp i32 %280 to float + %282 = fneg float %281 + %283 = fmul float %273, %282 + %284 = bitcast i32 %102 to float + %285 = fadd float %284, %283 + %286 = bitcast i32 %102 to float + %287 = bitcast i32 %102 to float + %288 = fmul float %286, %287 + %289 = fadd float %288, 0.000000e+00 + %290 = bitcast i32 %131 to float + %291 = bitcast i32 %131 to float + %292 = fmul float %290, %291 + %293 = fadd float %289, %292 + %294 = call float @llvm.sqrt.f32.15(float %293) + %295 = bitcast i32 %102 to float + %296 = fcmp olt float %295, 0.000000e+00 + %297 = sext i1 %296 to i32 + %298 = bitcast i32 %102 to float + %299 = fcmp ogt float %298, 0.000000e+00 + %300 = zext i1 %299 to i32 + %301 = add nsw i32 %297, %300 + %302 = sitofp i32 %301 to float + %303 = fneg float %302 + %304 = fmul float %294, %303 + %305 = bitcast i32 %102 to float + %306 = fadd float %305, %304 + %307 = bitcast i32 %102 to float + %308 = bitcast i32 %102 to float + %309 = fmul float %307, %308 + %310 = fadd float %309, 0.000000e+00 + %311 = bitcast i32 %131 to float + %312 = bitcast i32 %131 to float + %313 = fmul float %311, %312 + %314 = fadd float %310, %313 + %315 = call float @llvm.sqrt.f32.16(float %314) + %316 = bitcast i32 %102 to float + %317 = fcmp olt float %316, 0.000000e+00 + %318 = sext i1 %317 to i32 + %319 = bitcast i32 %102 to float + %320 = fcmp ogt float %319, 0.000000e+00 + %321 = zext i1 %320 to i32 + %322 = add nsw i32 %318, %321 + %323 = sitofp i32 %322 to float + %324 = fneg float %323 + %325 = fmul float %315, %324 + %326 = bitcast i32 %102 to float + %327 = fadd float %326, %325 + %328 = fmul float %306, %327 + %329 = fadd float %328, 0.000000e+00 + %330 = bitcast i32 %102 to float + %331 = bitcast i32 %102 to float + %332 = fmul float %330, %331 + %333 = fadd float %332, 0.000000e+00 + %334 = bitcast i32 %131 to float + %335 = bitcast i32 %131 to float + %336 = fmul float %334, %335 + %337 = fadd float %333, %336 + %338 = call float @llvm.sqrt.f32.17(float %337) + %339 = bitcast i32 %102 to float + %340 = fcmp olt float %339, 0.000000e+00 + %341 = sext i1 %340 to i32 + %342 = bitcast i32 %102 to float + %343 = fcmp ogt float %342, 0.000000e+00 + %344 = zext i1 %343 to i32 + %345 = add nsw i32 %341, %344 + %346 = sitofp i32 %345 to float + %347 = fneg float %346 + %348 = fmul float %338, %347 + %349 = fmul float %348, 0.000000e+00 + %350 = bitcast i32 %131 to float + %351 = fadd float %350, %349 + %352 = bitcast i32 %102 to float + %353 = bitcast i32 %102 to float + %354 = fmul float %352, %353 + %355 = fadd float %354, 0.000000e+00 + %356 = bitcast i32 %131 to float + %357 = bitcast i32 %131 to float + %358 = fmul float %356, %357 + %359 = fadd float %355, %358 + %360 = call float @llvm.sqrt.f32.18(float %359) + %361 = bitcast i32 %102 to float + %362 = fcmp olt float %361, 0.000000e+00 + %363 = sext i1 %362 to i32 + %364 = bitcast i32 %102 to float + %365 = fcmp ogt float %364, 0.000000e+00 + %366 = zext i1 %365 to i32 + %367 = add nsw i32 %363, %366 + %368 = sitofp i32 %367 to float + %369 = fneg float %368 + %370 = fmul float %360, %369 + %371 = fmul float %370, 0.000000e+00 + %372 = bitcast i32 %131 to float + %373 = fadd float %372, %371 + %374 = fmul float %351, %373 + %375 = fadd float %329, %374 + %376 = call float @llvm.sqrt.f32.19(float %375) + %377 = fadd float %376, 0.000000e+00 + %378 = fdiv float %285, %377 + %379 = fmul float %264, %378 + %380 = insertelement <4 x float> %149, float %379, i32 1 + %381 = bitcast i32 %102 to float + %382 = bitcast i32 %102 to float + %383 = fmul float %381, %382 + %384 = fadd float %383, 0.000000e+00 + %385 = bitcast i32 %131 to float + %386 = bitcast i32 %131 to float + %387 = fmul float %385, %386 + %388 = fadd float %384, %387 + %389 = call float @llvm.sqrt.f32.20(float %388) + %390 = bitcast i32 %102 to float + %391 = fcmp olt float %390, 0.000000e+00 + %392 = sext i1 %391 to i32 + %393 = bitcast i32 %102 to float + %394 = fcmp ogt float %393, 0.000000e+00 + %395 = zext i1 %394 to i32 + %396 = add nsw i32 %392, %395 + %397 = sitofp i32 %396 to float + %398 = fneg float %397 + %399 = fmul float %389, %398 + %400 = bitcast i32 %102 to float + %401 = fadd float %400, %399 + %402 = bitcast i32 %102 to float + %403 = bitcast i32 %102 to float + %404 = fmul float %402, %403 + %405 = fadd float %404, 0.000000e+00 + %406 = bitcast i32 %131 to float + %407 = bitcast i32 %131 to float + %408 = fmul float %406, %407 + %409 = fadd float %405, %408 + %410 = call float @llvm.sqrt.f32.21(float %409) + %411 = bitcast i32 %102 to float + %412 = fcmp olt float %411, 0.000000e+00 + %413 = sext i1 %412 to i32 + %414 = bitcast i32 %102 to float + %415 = fcmp ogt float %414, 0.000000e+00 + %416 = zext i1 %415 to i32 + %417 = add nsw i32 %413, %416 + %418 = sitofp i32 %417 to float + %419 = fneg float %418 + %420 = fmul float %410, %419 + %421 = bitcast i32 %102 to float + %422 = fadd float %421, %420 + %423 = bitcast i32 %102 to float + %424 = bitcast i32 %102 to float + %425 = fmul float %423, %424 + %426 = fadd float %425, 0.000000e+00 + %427 = bitcast i32 %131 to float + %428 = bitcast i32 %131 to float + %429 = fmul float %427, %428 + %430 = fadd float %426, %429 + %431 = call float @llvm.sqrt.f32.22(float %430) + %432 = bitcast i32 %102 to float + %433 = fcmp olt float %432, 0.000000e+00 + %434 = sext i1 %433 to i32 + %435 = bitcast i32 %102 to float + %436 = fcmp ogt float %435, 0.000000e+00 + %437 = zext i1 %436 to i32 + %438 = add nsw i32 %434, %437 + %439 = sitofp i32 %438 to float + %440 = fneg float %439 + %441 = fmul float %431, %440 + %442 = bitcast i32 %102 to float + %443 = fadd float %442, %441 + %444 = fmul float %422, %443 + %445 = fadd float %444, 0.000000e+00 + %446 = bitcast i32 %102 to float + %447 = bitcast i32 %102 to float + %448 = fmul float %446, %447 + %449 = fadd float %448, 0.000000e+00 + %450 = bitcast i32 %131 to float + %451 = bitcast i32 %131 to float + %452 = fmul float %450, %451 + %453 = fadd float %449, %452 + %454 = call float @llvm.sqrt.f32.23(float %453) + %455 = bitcast i32 %102 to float + %456 = fcmp olt float %455, 0.000000e+00 + %457 = sext i1 %456 to i32 + %458 = bitcast i32 %102 to float + %459 = fcmp ogt float %458, 0.000000e+00 + %460 = zext i1 %459 to i32 + %461 = add nsw i32 %457, %460 + %462 = sitofp i32 %461 to float + %463 = fneg float %462 + %464 = fmul float %454, %463 + %465 = fmul float %464, 0.000000e+00 + %466 = bitcast i32 %131 to float + %467 = fadd float %466, %465 + %468 = bitcast i32 %102 to float + %469 = bitcast i32 %102 to float + %470 = fmul float %468, %469 + %471 = fadd float %470, 0.000000e+00 + %472 = bitcast i32 %131 to float + %473 = bitcast i32 %131 to float + %474 = fmul float %472, %473 + %475 = fadd float %471, %474 + %476 = call float @llvm.sqrt.f32.24(float %475) + %477 = bitcast i32 %102 to float + %478 = fcmp olt float %477, 0.000000e+00 + %479 = sext i1 %478 to i32 + %480 = bitcast i32 %102 to float + %481 = fcmp ogt float %480, 0.000000e+00 + %482 = zext i1 %481 to i32 + %483 = add nsw i32 %479, %482 + %484 = sitofp i32 %483 to float + %485 = fneg float %484 + %486 = fmul float %476, %485 + %487 = fmul float %486, 0.000000e+00 + %488 = bitcast i32 %131 to float + %489 = fadd float %488, %487 + %490 = fmul float %467, %489 + %491 = fadd float %445, %490 + %492 = call float @llvm.sqrt.f32.25(float %491) + %493 = fadd float %492, 0.000000e+00 + %494 = fdiv float %401, %493 + %495 = fmul float %494, 2.000000e+00 + %496 = bitcast i32 %102 to float + %497 = bitcast i32 %102 to float + %498 = fmul float %496, %497 + %499 = fadd float %498, 0.000000e+00 + %500 = bitcast i32 %131 to float + %501 = bitcast i32 %131 to float + %502 = fmul float %500, %501 + %503 = fadd float %499, %502 + %504 = call float @llvm.sqrt.f32.26(float %503) + %505 = bitcast i32 %102 to float + %506 = fcmp olt float %505, 0.000000e+00 + %507 = sext i1 %506 to i32 + %508 = bitcast i32 %102 to float + %509 = fcmp ogt float %508, 0.000000e+00 + %510 = zext i1 %509 to i32 + %511 = add nsw i32 %507, %510 + %512 = sitofp i32 %511 to float + %513 = fneg float %512 + %514 = fmul float %504, %513 + %515 = fmul float %514, 0.000000e+00 + %516 = bitcast i32 %131 to float + %517 = fadd float %516, %515 + %518 = bitcast i32 %102 to float + %519 = bitcast i32 %102 to float + %520 = fmul float %518, %519 + %521 = fadd float %520, 0.000000e+00 + %522 = bitcast i32 %131 to float + %523 = bitcast i32 %131 to float + %524 = fmul float %522, %523 + %525 = fadd float %521, %524 + %526 = call float @llvm.sqrt.f32.27(float %525) + %527 = bitcast i32 %102 to float + %528 = fcmp olt float %527, 0.000000e+00 + %529 = sext i1 %528 to i32 + %530 = bitcast i32 %102 to float + %531 = fcmp ogt float %530, 0.000000e+00 + %532 = zext i1 %531 to i32 + %533 = add nsw i32 %529, %532 + %534 = sitofp i32 %533 to float + %535 = fneg float %534 + %536 = fmul float %526, %535 + %537 = bitcast i32 %102 to float + %538 = fadd float %537, %536 + %539 = bitcast i32 %102 to float + %540 = bitcast i32 %102 to float + %541 = fmul float %539, %540 + %542 = fadd float %541, 0.000000e+00 + %543 = bitcast i32 %131 to float + %544 = bitcast i32 %131 to float + %545 = fmul float %543, %544 + %546 = fadd float %542, %545 + %547 = call float @llvm.sqrt.f32.28(float %546) + %548 = bitcast i32 %102 to float + %549 = fcmp olt float %548, 0.000000e+00 + %550 = sext i1 %549 to i32 + %551 = bitcast i32 %102 to float + %552 = fcmp ogt float %551, 0.000000e+00 + %553 = zext i1 %552 to i32 + %554 = add nsw i32 %550, %553 + %555 = sitofp i32 %554 to float + %556 = fneg float %555 + %557 = fmul float %547, %556 + %558 = bitcast i32 %102 to float + %559 = fadd float %558, %557 + %560 = fmul float %538, %559 + %561 = fadd float %560, 0.000000e+00 + %562 = bitcast i32 %102 to float + %563 = bitcast i32 %102 to float + %564 = fmul float %562, %563 + %565 = fadd float %564, 0.000000e+00 + %566 = bitcast i32 %131 to float + %567 = bitcast i32 %131 to float + %568 = fmul float %566, %567 + %569 = fadd float %565, %568 + %570 = call float @llvm.sqrt.f32.29(float %569) + %571 = bitcast i32 %102 to float + %572 = fcmp olt float %571, 0.000000e+00 + %573 = sext i1 %572 to i32 + %574 = bitcast i32 %102 to float + %575 = fcmp ogt float %574, 0.000000e+00 + %576 = zext i1 %575 to i32 + %577 = add nsw i32 %573, %576 + %578 = sitofp i32 %577 to float + %579 = fneg float %578 + %580 = fmul float %570, %579 + %581 = fmul float %580, 0.000000e+00 + %582 = bitcast i32 %131 to float + %583 = fadd float %582, %581 + %584 = bitcast i32 %102 to float + %585 = bitcast i32 %102 to float + %586 = fmul float %584, %585 + %587 = fadd float %586, 0.000000e+00 + %588 = bitcast i32 %131 to float + %589 = bitcast i32 %131 to float + %590 = fmul float %588, %589 + %591 = fadd float %587, %590 + %592 = call float @llvm.sqrt.f32.30(float %591) + %593 = bitcast i32 %102 to float + %594 = fcmp olt float %593, 0.000000e+00 + %595 = sext i1 %594 to i32 + %596 = bitcast i32 %102 to float + %597 = fcmp ogt float %596, 0.000000e+00 + %598 = zext i1 %597 to i32 + %599 = add nsw i32 %595, %598 + %600 = sitofp i32 %599 to float + %601 = fneg float %600 + %602 = fmul float %592, %601 + %603 = fmul float %602, 0.000000e+00 + %604 = bitcast i32 %131 to float + %605 = fadd float %604, %603 + %606 = fmul float %583, %605 + %607 = fadd float %561, %606 + %608 = call float @llvm.sqrt.f32.31(float %607) + %609 = fadd float %608, 0.000000e+00 + %610 = fdiv float %517, %609 + %611 = fmul float %495, %610 + %612 = insertelement <4 x float> %380, float %611, i32 2 + %613 = bitcast i32 %102 to float + %614 = bitcast i32 %102 to float + %615 = fmul float %613, %614 + %616 = fadd float %615, 0.000000e+00 + %617 = bitcast i32 %131 to float + %618 = bitcast i32 %131 to float + %619 = fmul float %617, %618 + %620 = fadd float %616, %619 + %621 = call float @llvm.sqrt.f32.32(float %620) + %622 = bitcast i32 %102 to float + %623 = fcmp olt float %622, 0.000000e+00 + %624 = sext i1 %623 to i32 + %625 = bitcast i32 %102 to float + %626 = fcmp ogt float %625, 0.000000e+00 + %627 = zext i1 %626 to i32 + %628 = add nsw i32 %624, %627 + %629 = sitofp i32 %628 to float + %630 = fneg float %629 + %631 = fmul float %621, %630 + %632 = fmul float %631, 0.000000e+00 + %633 = bitcast i32 %131 to float + %634 = fadd float %633, %632 + %635 = bitcast i32 %102 to float + %636 = bitcast i32 %102 to float + %637 = fmul float %635, %636 + %638 = fadd float %637, 0.000000e+00 + %639 = bitcast i32 %131 to float + %640 = bitcast i32 %131 to float + %641 = fmul float %639, %640 + %642 = fadd float %638, %641 + %643 = call float @llvm.sqrt.f32.33(float %642) + %644 = bitcast i32 %102 to float + %645 = fcmp olt float %644, 0.000000e+00 + %646 = sext i1 %645 to i32 + %647 = bitcast i32 %102 to float + %648 = fcmp ogt float %647, 0.000000e+00 + %649 = zext i1 %648 to i32 + %650 = add nsw i32 %646, %649 + %651 = sitofp i32 %650 to float + %652 = fneg float %651 + %653 = fmul float %643, %652 + %654 = bitcast i32 %102 to float + %655 = fadd float %654, %653 + %656 = bitcast i32 %102 to float + %657 = bitcast i32 %102 to float + %658 = fmul float %656, %657 + %659 = fadd float %658, 0.000000e+00 + %660 = bitcast i32 %131 to float + %661 = bitcast i32 %131 to float + %662 = fmul float %660, %661 + %663 = fadd float %659, %662 + %664 = call float @llvm.sqrt.f32.34(float %663) + %665 = bitcast i32 %102 to float + %666 = fcmp olt float %665, 0.000000e+00 + %667 = sext i1 %666 to i32 + %668 = bitcast i32 %102 to float + %669 = fcmp ogt float %668, 0.000000e+00 + %670 = zext i1 %669 to i32 + %671 = add nsw i32 %667, %670 + %672 = sitofp i32 %671 to float + %673 = fneg float %672 + %674 = fmul float %664, %673 + %675 = bitcast i32 %102 to float + %676 = fadd float %675, %674 + %677 = fmul float %655, %676 + %678 = fadd float %677, 0.000000e+00 + %679 = bitcast i32 %102 to float + %680 = bitcast i32 %102 to float + %681 = fmul float %679, %680 + %682 = fadd float %681, 0.000000e+00 + %683 = bitcast i32 %131 to float + %684 = bitcast i32 %131 to float + %685 = fmul float %683, %684 + %686 = fadd float %682, %685 + %687 = call float @llvm.sqrt.f32.35(float %686) + %688 = bitcast i32 %102 to float + %689 = fcmp olt float %688, 0.000000e+00 + %690 = sext i1 %689 to i32 + %691 = bitcast i32 %102 to float + %692 = fcmp ogt float %691, 0.000000e+00 + %693 = zext i1 %692 to i32 + %694 = add nsw i32 %690, %693 + %695 = sitofp i32 %694 to float + %696 = fneg float %695 + %697 = fmul float %687, %696 + %698 = fmul float %697, 0.000000e+00 + %699 = bitcast i32 %131 to float + %700 = fadd float %699, %698 + %701 = bitcast i32 %102 to float + %702 = bitcast i32 %102 to float + %703 = fmul float %701, %702 + %704 = fadd float %703, 0.000000e+00 + %705 = bitcast i32 %131 to float + %706 = bitcast i32 %131 to float + %707 = fmul float %705, %706 + %708 = fadd float %704, %707 + %709 = call float @llvm.sqrt.f32.36(float %708) + %710 = bitcast i32 %102 to float + %711 = fcmp olt float %710, 0.000000e+00 + %712 = sext i1 %711 to i32 + %713 = bitcast i32 %102 to float + %714 = fcmp ogt float %713, 0.000000e+00 + %715 = zext i1 %714 to i32 + %716 = add nsw i32 %712, %715 + %717 = sitofp i32 %716 to float + %718 = fneg float %717 + %719 = fmul float %709, %718 + %720 = fmul float %719, 0.000000e+00 + %721 = bitcast i32 %131 to float + %722 = fadd float %721, %720 + %723 = fmul float %700, %722 + %724 = fadd float %678, %723 + %725 = call float @llvm.sqrt.f32.37(float %724) + %726 = fadd float %725, 0.000000e+00 + %727 = fdiv float %634, %726 + %728 = fmul float %727, 2.000000e+00 + %729 = bitcast i32 %102 to float + %730 = bitcast i32 %102 to float + %731 = fmul float %729, %730 + %732 = fadd float %731, 0.000000e+00 + %733 = bitcast i32 %131 to float + %734 = bitcast i32 %131 to float + %735 = fmul float %733, %734 + %736 = fadd float %732, %735 + %737 = call float @llvm.sqrt.f32.38(float %736) + %738 = bitcast i32 %102 to float + %739 = fcmp olt float %738, 0.000000e+00 + %740 = sext i1 %739 to i32 + %741 = bitcast i32 %102 to float + %742 = fcmp ogt float %741, 0.000000e+00 + %743 = zext i1 %742 to i32 + %744 = add nsw i32 %740, %743 + %745 = sitofp i32 %744 to float + %746 = fneg float %745 + %747 = fmul float %737, %746 + %748 = bitcast i32 %102 to float + %749 = fadd float %748, %747 + %750 = bitcast i32 %102 to float + %751 = bitcast i32 %102 to float + %752 = fmul float %750, %751 + %753 = fadd float %752, 0.000000e+00 + %754 = bitcast i32 %131 to float + %755 = bitcast i32 %131 to float + %756 = fmul float %754, %755 + %757 = fadd float %753, %756 + %758 = call float @llvm.sqrt.f32.39(float %757) + %759 = bitcast i32 %102 to float + %760 = fcmp olt float %759, 0.000000e+00 + %761 = sext i1 %760 to i32 + %762 = bitcast i32 %102 to float + %763 = fcmp ogt float %762, 0.000000e+00 + %764 = zext i1 %763 to i32 + %765 = add nsw i32 %761, %764 + %766 = sitofp i32 %765 to float + %767 = fneg float %766 + %768 = fmul float %758, %767 + %769 = bitcast i32 %102 to float + %770 = fadd float %769, %768 + %771 = bitcast i32 %102 to float + %772 = bitcast i32 %102 to float + %773 = fmul float %771, %772 + %774 = fadd float %773, 0.000000e+00 + %775 = bitcast i32 %131 to float + %776 = bitcast i32 %131 to float + %777 = fmul float %775, %776 + %778 = fadd float %774, %777 + %779 = call float @llvm.sqrt.f32.40(float %778) + %780 = bitcast i32 %102 to float + %781 = fcmp olt float %780, 0.000000e+00 + %782 = sext i1 %781 to i32 + %783 = bitcast i32 %102 to float + %784 = fcmp ogt float %783, 0.000000e+00 + %785 = zext i1 %784 to i32 + %786 = add nsw i32 %782, %785 + %787 = sitofp i32 %786 to float + %788 = fneg float %787 + %789 = fmul float %779, %788 + %790 = bitcast i32 %102 to float + %791 = fadd float %790, %789 + %792 = fmul float %770, %791 + %793 = fadd float %792, 0.000000e+00 + %794 = bitcast i32 %102 to float + %795 = bitcast i32 %102 to float + %796 = fmul float %794, %795 + %797 = fadd float %796, 0.000000e+00 + %798 = bitcast i32 %131 to float + %799 = bitcast i32 %131 to float + %800 = fmul float %798, %799 + %801 = fadd float %797, %800 + %802 = call float @llvm.sqrt.f32.41(float %801) + %803 = bitcast i32 %102 to float + %804 = fcmp olt float %803, 0.000000e+00 + %805 = sext i1 %804 to i32 + %806 = bitcast i32 %102 to float + %807 = fcmp ogt float %806, 0.000000e+00 + %808 = zext i1 %807 to i32 + %809 = add nsw i32 %805, %808 + %810 = sitofp i32 %809 to float + %811 = fneg float %810 + %812 = fmul float %802, %811 + %813 = fmul float %812, 0.000000e+00 + %814 = bitcast i32 %131 to float + %815 = fadd float %814, %813 + %816 = bitcast i32 %102 to float + %817 = bitcast i32 %102 to float + %818 = fmul float %816, %817 + %819 = fadd float %818, 0.000000e+00 + %820 = bitcast i32 %131 to float + %821 = bitcast i32 %131 to float + %822 = fmul float %820, %821 + %823 = fadd float %819, %822 + %824 = call float @llvm.sqrt.f32.42(float %823) + %825 = bitcast i32 %102 to float + %826 = fcmp olt float %825, 0.000000e+00 + %827 = sext i1 %826 to i32 + %828 = bitcast i32 %102 to float + %829 = fcmp ogt float %828, 0.000000e+00 + %830 = zext i1 %829 to i32 + %831 = add nsw i32 %827, %830 + %832 = sitofp i32 %831 to float + %833 = fneg float %832 + %834 = fmul float %824, %833 + %835 = fmul float %834, 0.000000e+00 + %836 = bitcast i32 %131 to float + %837 = fadd float %836, %835 + %838 = fmul float %815, %837 + %839 = fadd float %793, %838 + %840 = call float @llvm.sqrt.f32.43(float %839) + %841 = fadd float %840, 0.000000e+00 + %842 = fdiv float %749, %841 + %843 = fmul float %728, %842 + %844 = insertelement <4 x float> %612, float %843, i32 3 + %845 = fsub <4 x float> , %844 + %846 = bitcast i32 %102 to float + %847 = bitcast i32 %102 to float + %848 = fmul float %846, %847 + %849 = fadd float %848, 0.000000e+00 + %850 = bitcast i32 %131 to float + %851 = bitcast i32 %131 to float + %852 = fmul float %850, %851 + %853 = fadd float %849, %852 + %854 = call float @llvm.sqrt.f32.44(float %853) + %855 = bitcast i32 %102 to float + %856 = fcmp olt float %855, 0.000000e+00 + %857 = sext i1 %856 to i32 + %858 = bitcast i32 %102 to float + %859 = fcmp ogt float %858, 0.000000e+00 + %860 = zext i1 %859 to i32 + %861 = add nsw i32 %857, %860 + %862 = sitofp i32 %861 to float + %863 = fneg float %862 + %864 = fmul float %854, %863 + %865 = fmul float %864, 0.000000e+00 + %866 = bitcast i32 %131 to float + %867 = fadd float %866, %865 + %868 = bitcast i32 %102 to float + %869 = bitcast i32 %102 to float + %870 = fmul float %868, %869 + %871 = fadd float %870, 0.000000e+00 + %872 = bitcast i32 %131 to float + %873 = bitcast i32 %131 to float + %874 = fmul float %872, %873 + %875 = fadd float %871, %874 + %876 = call float @llvm.sqrt.f32.45(float %875) + %877 = bitcast i32 %102 to float + %878 = fcmp olt float %877, 0.000000e+00 + %879 = sext i1 %878 to i32 + %880 = bitcast i32 %102 to float + %881 = fcmp ogt float %880, 0.000000e+00 + %882 = zext i1 %881 to i32 + %883 = add nsw i32 %879, %882 + %884 = sitofp i32 %883 to float + %885 = fneg float %884 + %886 = fmul float %876, %885 + %887 = bitcast i32 %102 to float + %888 = fadd float %887, %886 + %889 = bitcast i32 %102 to float + %890 = bitcast i32 %102 to float + %891 = fmul float %889, %890 + %892 = fadd float %891, 0.000000e+00 + %893 = bitcast i32 %131 to float + %894 = bitcast i32 %131 to float + %895 = fmul float %893, %894 + %896 = fadd float %892, %895 + %897 = call float @llvm.sqrt.f32.46(float %896) + %898 = bitcast i32 %102 to float + %899 = fcmp olt float %898, 0.000000e+00 + %900 = sext i1 %899 to i32 + %901 = bitcast i32 %102 to float + %902 = fcmp ogt float %901, 0.000000e+00 + %903 = zext i1 %902 to i32 + %904 = add nsw i32 %900, %903 + %905 = sitofp i32 %904 to float + %906 = fneg float %905 + %907 = fmul float %897, %906 + %908 = bitcast i32 %102 to float + %909 = fadd float %908, %907 + %910 = fmul float %888, %909 + %911 = fadd float %910, 0.000000e+00 + %912 = bitcast i32 %102 to float + %913 = bitcast i32 %102 to float + %914 = fmul float %912, %913 + %915 = fadd float %914, 0.000000e+00 + %916 = bitcast i32 %131 to float + %917 = bitcast i32 %131 to float + %918 = fmul float %916, %917 + %919 = fadd float %915, %918 + %920 = call float @llvm.sqrt.f32.47(float %919) + %921 = bitcast i32 %102 to float + %922 = fcmp olt float %921, 0.000000e+00 + %923 = sext i1 %922 to i32 + %924 = bitcast i32 %102 to float + %925 = fcmp ogt float %924, 0.000000e+00 + %926 = zext i1 %925 to i32 + %927 = add nsw i32 %923, %926 + %928 = sitofp i32 %927 to float + %929 = fneg float %928 + %930 = fmul float %920, %929 + %931 = fmul float %930, 0.000000e+00 + %932 = bitcast i32 %131 to float + %933 = fadd float %932, %931 + %934 = bitcast i32 %102 to float + %935 = bitcast i32 %102 to float + %936 = fmul float %934, %935 + %937 = fadd float %936, 0.000000e+00 + %938 = bitcast i32 %131 to float + %939 = bitcast i32 %131 to float + %940 = fmul float %938, %939 + %941 = fadd float %937, %940 + %942 = call float @llvm.sqrt.f32.48(float %941) + %943 = bitcast i32 %102 to float + %944 = fcmp olt float %943, 0.000000e+00 + %945 = sext i1 %944 to i32 + %946 = bitcast i32 %102 to float + %947 = fcmp ogt float %946, 0.000000e+00 + %948 = zext i1 %947 to i32 + %949 = add nsw i32 %945, %948 + %950 = sitofp i32 %949 to float + %951 = fneg float %950 + %952 = fmul float %942, %951 + %953 = fmul float %952, 0.000000e+00 + %954 = bitcast i32 %131 to float + %955 = fadd float %954, %953 + %956 = fmul float %933, %955 + %957 = fadd float %911, %956 + %958 = call float @llvm.sqrt.f32.49(float %957) + %959 = fadd float %958, 0.000000e+00 + %960 = fdiv float %867, %959 + %961 = fmul float %960, 2.000000e+00 + %962 = bitcast i32 %102 to float + %963 = bitcast i32 %102 to float + %964 = fmul float %962, %963 + %965 = fadd float %964, 0.000000e+00 + %966 = bitcast i32 %131 to float + %967 = bitcast i32 %131 to float + %968 = fmul float %966, %967 + %969 = fadd float %965, %968 + %970 = call float @llvm.sqrt.f32.50(float %969) + %971 = bitcast i32 %102 to float + %972 = fcmp olt float %971, 0.000000e+00 + %973 = sext i1 %972 to i32 + %974 = bitcast i32 %102 to float + %975 = fcmp ogt float %974, 0.000000e+00 + %976 = zext i1 %975 to i32 + %977 = add nsw i32 %973, %976 + %978 = sitofp i32 %977 to float + %979 = fneg float %978 + %980 = fmul float %970, %979 + %981 = fmul float %980, 0.000000e+00 + %982 = bitcast i32 %131 to float + %983 = fadd float %982, %981 + %984 = bitcast i32 %102 to float + %985 = bitcast i32 %102 to float + %986 = fmul float %984, %985 + %987 = fadd float %986, 0.000000e+00 + %988 = bitcast i32 %131 to float + %989 = bitcast i32 %131 to float + %990 = fmul float %988, %989 + %991 = fadd float %987, %990 + %992 = call float @llvm.sqrt.f32.51(float %991) + %993 = bitcast i32 %102 to float + %994 = fcmp olt float %993, 0.000000e+00 + %995 = sext i1 %994 to i32 + %996 = bitcast i32 %102 to float + %997 = fcmp ogt float %996, 0.000000e+00 + %998 = zext i1 %997 to i32 + %999 = add nsw i32 %995, %998 + %1000 = sitofp i32 %999 to float + %1001 = fneg float %1000 + %1002 = fmul float %992, %1001 + %1003 = bitcast i32 %102 to float + %1004 = fadd float %1003, %1002 + %1005 = bitcast i32 %102 to float + %1006 = bitcast i32 %102 to float + %1007 = fmul float %1005, %1006 + %1008 = fadd float %1007, 0.000000e+00 + %1009 = bitcast i32 %131 to float + %1010 = bitcast i32 %131 to float + %1011 = fmul float %1009, %1010 + %1012 = fadd float %1008, %1011 + %1013 = call float @llvm.sqrt.f32.52(float %1012) + %1014 = bitcast i32 %102 to float + %1015 = fcmp olt float %1014, 0.000000e+00 + %1016 = sext i1 %1015 to i32 + %1017 = bitcast i32 %102 to float + %1018 = fcmp ogt float %1017, 0.000000e+00 + %1019 = zext i1 %1018 to i32 + %1020 = add nsw i32 %1016, %1019 + %1021 = sitofp i32 %1020 to float + %1022 = fneg float %1021 + %1023 = fmul float %1013, %1022 + %1024 = bitcast i32 %102 to float + %1025 = fadd float %1024, %1023 + %1026 = fmul float %1004, %1025 + %1027 = fadd float %1026, 0.000000e+00 + %1028 = bitcast i32 %102 to float + %1029 = bitcast i32 %102 to float + %1030 = fmul float %1028, %1029 + %1031 = fadd float %1030, 0.000000e+00 + %1032 = bitcast i32 %131 to float + %1033 = bitcast i32 %131 to float + %1034 = fmul float %1032, %1033 + %1035 = fadd float %1031, %1034 + %1036 = call float @llvm.sqrt.f32.53(float %1035) + %1037 = bitcast i32 %102 to float + %1038 = fcmp olt float %1037, 0.000000e+00 + %1039 = sext i1 %1038 to i32 + %1040 = bitcast i32 %102 to float + %1041 = fcmp ogt float %1040, 0.000000e+00 + %1042 = zext i1 %1041 to i32 + %1043 = add nsw i32 %1039, %1042 + %1044 = sitofp i32 %1043 to float + %1045 = fneg float %1044 + %1046 = fmul float %1036, %1045 + %1047 = fmul float %1046, 0.000000e+00 + %1048 = bitcast i32 %131 to float + %1049 = fadd float %1048, %1047 + %1050 = bitcast i32 %102 to float + %1051 = bitcast i32 %102 to float + %1052 = fmul float %1050, %1051 + %1053 = fadd float %1052, 0.000000e+00 + %1054 = bitcast i32 %131 to float + %1055 = bitcast i32 %131 to float + %1056 = fmul float %1054, %1055 + %1057 = fadd float %1053, %1056 + %1058 = call float @llvm.sqrt.f32.54(float %1057) + %1059 = bitcast i32 %102 to float + %1060 = fcmp olt float %1059, 0.000000e+00 + %1061 = sext i1 %1060 to i32 + %1062 = bitcast i32 %102 to float + %1063 = fcmp ogt float %1062, 0.000000e+00 + %1064 = zext i1 %1063 to i32 + %1065 = add nsw i32 %1061, %1064 + %1066 = sitofp i32 %1065 to float + %1067 = fneg float %1066 + %1068 = fmul float %1058, %1067 + %1069 = fmul float %1068, 0.000000e+00 + %1070 = bitcast i32 %131 to float + %1071 = fadd float %1070, %1069 + %1072 = fmul float %1049, %1071 + %1073 = fadd float %1027, %1072 + %1074 = call float @llvm.sqrt.f32.55(float %1073) + %1075 = fadd float %1074, 0.000000e+00 + %1076 = fdiv float %983, %1075 + %1077 = fmul float %961, %1076 + %1078 = fsub float 1.000000e+00, %1077 + %1079 = insertelement <4 x float> zeroinitializer, float %1078, i32 0 + %1080 = insertelement <4 x float> %1079, float 0.000000e+00, i32 1 + %1081 = insertelement <4 x float> %1080, float 0.000000e+00, i32 2 + %1082 = insertelement <4 x float> %1081, float 0.000000e+00, i32 3 + %1083 = shufflevector <4 x float> %845, <4 x float> %1082, <8 x i32> + %1084 = extractelement <8 x float> %1083, i32 0 + %1085 = bitcast i32* %23 to float* + %1086 = getelementptr float, float* %2, i32 0 + %1087 = getelementptr inbounds float, float* %1086, i64 3 + %1088 = bitcast float* %1087 to i32* + %1089 = bitcast i32* %1088 to float* + store float %1084, float* %1089, align 4 + %1090 = extractelement <8 x float> %1083, i32 1 + %1091 = bitcast i32* %60 to float* + %1092 = alloca [4 x float], align 16 + %1093 = bitcast [4 x float]* %1092 to i32* + %1094 = bitcast i32* %1093 to float* + store float %1090, float* %1094, align 4 + %1095 = extractelement <8 x float> %1083, i32 2 + %1096 = bitcast i32* %63 to float* + %1097 = getelementptr inbounds [4 x float], [4 x float]* %1092, i64 0, i64 1 + %1098 = bitcast float* %1097 to i32* + %1099 = bitcast i32* %1098 to float* + store float %1095, float* %1099, align 4 + %1100 = extractelement <8 x float> %1083, i32 3 + %1101 = bitcast i32* %66 to float* + %1102 = getelementptr inbounds [4 x float], [4 x float]* %1092, i64 0, i64 2 + %1103 = bitcast float* %1102 to i32* + %1104 = bitcast i32* %1103 to float* + store float %1100, float* %1104, align 4 + %1105 = extractelement <8 x float> %1083, i32 4 + %1106 = bitcast i32* %69 to float* + %1107 = getelementptr inbounds [4 x float], [4 x float]* %1092, i64 0, i64 3 + %1108 = bitcast float* %1107 to i32* + %1109 = bitcast i32* %1108 to float* + store float %1105, float* %1109, align 4 + %1110 = bitcast float* %1 to i8* + %1111 = alloca [4 x float], align 16 + %1112 = bitcast [4 x float]* %1111 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %1110, i8* nonnull align 16 dereferenceable(16) %1112, i64 16, i1 false) + store float 0.000000e+00, float* %2, align 4 + %1113 = bitcast i32 %102 to float + %1114 = bitcast i32 %102 to float + %1115 = fmul float %1113, %1114 + %1116 = fadd float %1115, 0.000000e+00 + %1117 = bitcast i32 %131 to float + %1118 = bitcast i32 %131 to float + %1119 = fmul float %1117, %1118 + %1120 = fadd float %1116, %1119 + %1121 = call float @llvm.sqrt.f32.56(float %1120) + %1122 = bitcast i32 %102 to float + %1123 = fcmp olt float %1122, 0.000000e+00 + %1124 = sext i1 %1123 to i32 + %1125 = bitcast i32 %102 to float + %1126 = fcmp ogt float %1125, 0.000000e+00 + %1127 = zext i1 %1126 to i32 + %1128 = add nsw i32 %1124, %1127 + %1129 = sitofp i32 %1128 to float + %1130 = fneg float %1129 + %1131 = fmul float %1121, %1130 + %1132 = bitcast i32 %102 to float + %1133 = fadd float %1132, %1131 + %1134 = bitcast i32 %102 to float + %1135 = bitcast i32 %102 to float + %1136 = fmul float %1134, %1135 + %1137 = fadd float %1136, 0.000000e+00 + %1138 = bitcast i32 %131 to float + %1139 = bitcast i32 %131 to float + %1140 = fmul float %1138, %1139 + %1141 = fadd float %1137, %1140 + %1142 = call float @llvm.sqrt.f32.57(float %1141) + %1143 = bitcast i32 %102 to float + %1144 = fcmp olt float %1143, 0.000000e+00 + %1145 = sext i1 %1144 to i32 + %1146 = bitcast i32 %102 to float + %1147 = fcmp ogt float %1146, 0.000000e+00 + %1148 = zext i1 %1147 to i32 + %1149 = add nsw i32 %1145, %1148 + %1150 = sitofp i32 %1149 to float + %1151 = fneg float %1150 + %1152 = fmul float %1142, %1151 + %1153 = bitcast i32 %102 to float + %1154 = fadd float %1153, %1152 + %1155 = bitcast i32 %102 to float + %1156 = bitcast i32 %102 to float + %1157 = fmul float %1155, %1156 + %1158 = fadd float %1157, 0.000000e+00 + %1159 = bitcast i32 %131 to float + %1160 = bitcast i32 %131 to float + %1161 = fmul float %1159, %1160 + %1162 = fadd float %1158, %1161 + %1163 = call float @llvm.sqrt.f32.58(float %1162) + %1164 = bitcast i32 %102 to float + %1165 = fcmp olt float %1164, 0.000000e+00 + %1166 = sext i1 %1165 to i32 + %1167 = bitcast i32 %102 to float + %1168 = fcmp ogt float %1167, 0.000000e+00 + %1169 = zext i1 %1168 to i32 + %1170 = add nsw i32 %1166, %1169 + %1171 = sitofp i32 %1170 to float + %1172 = fneg float %1171 + %1173 = fmul float %1163, %1172 + %1174 = bitcast i32 %102 to float + %1175 = fadd float %1174, %1173 + %1176 = fmul float %1154, %1175 + %1177 = fadd float %1176, 0.000000e+00 + %1178 = bitcast i32 %102 to float + %1179 = bitcast i32 %102 to float + %1180 = fmul float %1178, %1179 + %1181 = fadd float %1180, 0.000000e+00 + %1182 = bitcast i32 %131 to float + %1183 = bitcast i32 %131 to float + %1184 = fmul float %1182, %1183 + %1185 = fadd float %1181, %1184 + %1186 = call float @llvm.sqrt.f32.59(float %1185) + %1187 = bitcast i32 %102 to float + %1188 = fcmp olt float %1187, 0.000000e+00 + %1189 = sext i1 %1188 to i32 + %1190 = bitcast i32 %102 to float + %1191 = fcmp ogt float %1190, 0.000000e+00 + %1192 = zext i1 %1191 to i32 + %1193 = add nsw i32 %1189, %1192 + %1194 = sitofp i32 %1193 to float + %1195 = fneg float %1194 + %1196 = fmul float %1186, %1195 + %1197 = fmul float %1196, 0.000000e+00 + %1198 = bitcast i32 %131 to float + %1199 = fadd float %1198, %1197 + %1200 = bitcast i32 %102 to float + %1201 = bitcast i32 %102 to float + %1202 = fmul float %1200, %1201 + %1203 = fadd float %1202, 0.000000e+00 + %1204 = bitcast i32 %131 to float + %1205 = bitcast i32 %131 to float + %1206 = fmul float %1204, %1205 + %1207 = fadd float %1203, %1206 + %1208 = call float @llvm.sqrt.f32.60(float %1207) + %1209 = bitcast i32 %102 to float + %1210 = fcmp olt float %1209, 0.000000e+00 + %1211 = sext i1 %1210 to i32 + %1212 = bitcast i32 %102 to float + %1213 = fcmp ogt float %1212, 0.000000e+00 + %1214 = zext i1 %1213 to i32 + %1215 = add nsw i32 %1211, %1214 + %1216 = sitofp i32 %1215 to float + %1217 = fneg float %1216 + %1218 = fmul float %1208, %1217 + %1219 = fmul float %1218, 0.000000e+00 + %1220 = bitcast i32 %131 to float + %1221 = fadd float %1220, %1219 + %1222 = fmul float %1199, %1221 + %1223 = fadd float %1177, %1222 + %1224 = call float @llvm.sqrt.f32.61(float %1223) + %1225 = fadd float %1224, 0.000000e+00 + %1226 = fdiv float %1133, %1225 + %1227 = fmul float %1226, 2.000000e+00 + %1228 = bitcast i32 %102 to float + %1229 = bitcast i32 %102 to float + %1230 = fmul float %1228, %1229 + %1231 = fadd float %1230, 0.000000e+00 + %1232 = bitcast i32 %131 to float + %1233 = bitcast i32 %131 to float + %1234 = fmul float %1232, %1233 + %1235 = fadd float %1231, %1234 + %1236 = call float @llvm.sqrt.f32.62(float %1235) + %1237 = bitcast i32 %102 to float + %1238 = fcmp olt float %1237, 0.000000e+00 + %1239 = sext i1 %1238 to i32 + %1240 = bitcast i32 %102 to float + %1241 = fcmp ogt float %1240, 0.000000e+00 + %1242 = zext i1 %1241 to i32 + %1243 = add nsw i32 %1239, %1242 + %1244 = sitofp i32 %1243 to float + %1245 = fneg float %1244 + %1246 = fmul float %1236, %1245 + %1247 = bitcast i32 %102 to float + %1248 = fadd float %1247, %1246 + %1249 = bitcast i32 %102 to float + %1250 = bitcast i32 %102 to float + %1251 = fmul float %1249, %1250 + %1252 = fadd float %1251, 0.000000e+00 + %1253 = bitcast i32 %131 to float + %1254 = bitcast i32 %131 to float + %1255 = fmul float %1253, %1254 + %1256 = fadd float %1252, %1255 + %1257 = call float @llvm.sqrt.f32.63(float %1256) + %1258 = bitcast i32 %102 to float + %1259 = fcmp olt float %1258, 0.000000e+00 + %1260 = sext i1 %1259 to i32 + %1261 = bitcast i32 %102 to float + %1262 = fcmp ogt float %1261, 0.000000e+00 + %1263 = zext i1 %1262 to i32 + %1264 = add nsw i32 %1260, %1263 + %1265 = sitofp i32 %1264 to float + %1266 = fneg float %1265 + %1267 = fmul float %1257, %1266 + %1268 = bitcast i32 %102 to float + %1269 = fadd float %1268, %1267 + %1270 = bitcast i32 %102 to float + %1271 = bitcast i32 %102 to float + %1272 = fmul float %1270, %1271 + %1273 = fadd float %1272, 0.000000e+00 + %1274 = bitcast i32 %131 to float + %1275 = bitcast i32 %131 to float + %1276 = fmul float %1274, %1275 + %1277 = fadd float %1273, %1276 + %1278 = call float @llvm.sqrt.f32.64(float %1277) + %1279 = bitcast i32 %102 to float + %1280 = fcmp olt float %1279, 0.000000e+00 + %1281 = sext i1 %1280 to i32 + %1282 = bitcast i32 %102 to float + %1283 = fcmp ogt float %1282, 0.000000e+00 + %1284 = zext i1 %1283 to i32 + %1285 = add nsw i32 %1281, %1284 + %1286 = sitofp i32 %1285 to float + %1287 = fneg float %1286 + %1288 = fmul float %1278, %1287 + %1289 = bitcast i32 %102 to float + %1290 = fadd float %1289, %1288 + %1291 = fmul float %1269, %1290 + %1292 = fadd float %1291, 0.000000e+00 + %1293 = bitcast i32 %102 to float + %1294 = bitcast i32 %102 to float + %1295 = fmul float %1293, %1294 + %1296 = fadd float %1295, 0.000000e+00 + %1297 = bitcast i32 %131 to float + %1298 = bitcast i32 %131 to float + %1299 = fmul float %1297, %1298 + %1300 = fadd float %1296, %1299 + %1301 = call float @llvm.sqrt.f32.65(float %1300) + %1302 = bitcast i32 %102 to float + %1303 = fcmp olt float %1302, 0.000000e+00 + %1304 = sext i1 %1303 to i32 + %1305 = bitcast i32 %102 to float + %1306 = fcmp ogt float %1305, 0.000000e+00 + %1307 = zext i1 %1306 to i32 + %1308 = add nsw i32 %1304, %1307 + %1309 = sitofp i32 %1308 to float + %1310 = fneg float %1309 + %1311 = fmul float %1301, %1310 + %1312 = fmul float %1311, 0.000000e+00 + %1313 = bitcast i32 %131 to float + %1314 = fadd float %1313, %1312 + %1315 = bitcast i32 %102 to float + %1316 = bitcast i32 %102 to float + %1317 = fmul float %1315, %1316 + %1318 = fadd float %1317, 0.000000e+00 + %1319 = bitcast i32 %131 to float + %1320 = bitcast i32 %131 to float + %1321 = fmul float %1319, %1320 + %1322 = fadd float %1318, %1321 + %1323 = call float @llvm.sqrt.f32.66(float %1322) + %1324 = bitcast i32 %102 to float + %1325 = fcmp olt float %1324, 0.000000e+00 + %1326 = sext i1 %1325 to i32 + %1327 = bitcast i32 %102 to float + %1328 = fcmp ogt float %1327, 0.000000e+00 + %1329 = zext i1 %1328 to i32 + %1330 = add nsw i32 %1326, %1329 + %1331 = sitofp i32 %1330 to float + %1332 = fneg float %1331 + %1333 = fmul float %1323, %1332 + %1334 = fmul float %1333, 0.000000e+00 + %1335 = bitcast i32 %131 to float + %1336 = fadd float %1335, %1334 + %1337 = fmul float %1314, %1336 + %1338 = fadd float %1292, %1337 + %1339 = call float @llvm.sqrt.f32.67(float %1338) + %1340 = fadd float %1339, 0.000000e+00 + %1341 = fdiv float %1248, %1340 + %1342 = fmul float %1227, %1341 + %1343 = fsub float 1.000000e+00, %1342 + %1344 = insertelement <4 x float> zeroinitializer, float %1343, i32 0 + %1345 = insertelement <4 x float> %1344, float 0.000000e+00, i32 1 + %1346 = insertelement <4 x float> %1345, float 0.000000e+00, i32 2 + %1347 = insertelement <4 x float> %1346, float 0.000000e+00, i32 3 + %1348 = getelementptr float, float* %0, i32 0 + %1349 = load float, float* %1348, align 4 + %1350 = insertelement <4 x float> zeroinitializer, float %1349, i32 0 + %1351 = insertelement <4 x float> %1350, float 0.000000e+00, i32 1 + %1352 = insertelement <4 x float> %1351, float 0.000000e+00, i32 2 + %1353 = insertelement <4 x float> %1352, float 0.000000e+00, i32 3 + %1354 = call <4 x float> @llvm.fma.f32.68(<4 x float> %1347, <4 x float> %1353, <4 x float> zeroinitializer) + %1355 = extractelement <4 x float> %1354, i32 0 + store float %1355, float* %2, align 4 + %1356 = bitcast i32 %102 to float + %1357 = bitcast i32 %102 to float + %1358 = fmul float %1356, %1357 + %1359 = fadd float %1358, 0.000000e+00 + %1360 = bitcast i32 %131 to float + %1361 = bitcast i32 %131 to float + %1362 = fmul float %1360, %1361 + %1363 = fadd float %1359, %1362 + %1364 = call float @llvm.sqrt.f32.69(float %1363) + %1365 = bitcast i32 %102 to float + %1366 = fcmp olt float %1365, 0.000000e+00 + %1367 = sext i1 %1366 to i32 + %1368 = bitcast i32 %102 to float + %1369 = fcmp ogt float %1368, 0.000000e+00 + %1370 = zext i1 %1369 to i32 + %1371 = add nsw i32 %1367, %1370 + %1372 = sitofp i32 %1371 to float + %1373 = fneg float %1372 + %1374 = fmul float %1364, %1373 + %1375 = bitcast i32 %102 to float + %1376 = fadd float %1375, %1374 + %1377 = bitcast i32 %102 to float + %1378 = bitcast i32 %102 to float + %1379 = fmul float %1377, %1378 + %1380 = fadd float %1379, 0.000000e+00 + %1381 = bitcast i32 %131 to float + %1382 = bitcast i32 %131 to float + %1383 = fmul float %1381, %1382 + %1384 = fadd float %1380, %1383 + %1385 = call float @llvm.sqrt.f32.70(float %1384) + %1386 = bitcast i32 %102 to float + %1387 = fcmp olt float %1386, 0.000000e+00 + %1388 = sext i1 %1387 to i32 + %1389 = bitcast i32 %102 to float + %1390 = fcmp ogt float %1389, 0.000000e+00 + %1391 = zext i1 %1390 to i32 + %1392 = add nsw i32 %1388, %1391 + %1393 = sitofp i32 %1392 to float + %1394 = fneg float %1393 + %1395 = fmul float %1385, %1394 + %1396 = bitcast i32 %102 to float + %1397 = fadd float %1396, %1395 + %1398 = bitcast i32 %102 to float + %1399 = bitcast i32 %102 to float + %1400 = fmul float %1398, %1399 + %1401 = fadd float %1400, 0.000000e+00 + %1402 = bitcast i32 %131 to float + %1403 = bitcast i32 %131 to float + %1404 = fmul float %1402, %1403 + %1405 = fadd float %1401, %1404 + %1406 = call float @llvm.sqrt.f32.71(float %1405) + %1407 = bitcast i32 %102 to float + %1408 = fcmp olt float %1407, 0.000000e+00 + %1409 = sext i1 %1408 to i32 + %1410 = bitcast i32 %102 to float + %1411 = fcmp ogt float %1410, 0.000000e+00 + %1412 = zext i1 %1411 to i32 + %1413 = add nsw i32 %1409, %1412 + %1414 = sitofp i32 %1413 to float + %1415 = fneg float %1414 + %1416 = fmul float %1406, %1415 + %1417 = bitcast i32 %102 to float + %1418 = fadd float %1417, %1416 + %1419 = fmul float %1397, %1418 + %1420 = fadd float %1419, 0.000000e+00 + %1421 = bitcast i32 %102 to float + %1422 = bitcast i32 %102 to float + %1423 = fmul float %1421, %1422 + %1424 = fadd float %1423, 0.000000e+00 + %1425 = bitcast i32 %131 to float + %1426 = bitcast i32 %131 to float + %1427 = fmul float %1425, %1426 + %1428 = fadd float %1424, %1427 + %1429 = call float @llvm.sqrt.f32.72(float %1428) + %1430 = bitcast i32 %102 to float + %1431 = fcmp olt float %1430, 0.000000e+00 + %1432 = sext i1 %1431 to i32 + %1433 = bitcast i32 %102 to float + %1434 = fcmp ogt float %1433, 0.000000e+00 + %1435 = zext i1 %1434 to i32 + %1436 = add nsw i32 %1432, %1435 + %1437 = sitofp i32 %1436 to float + %1438 = fneg float %1437 + %1439 = fmul float %1429, %1438 + %1440 = fmul float %1439, 0.000000e+00 + %1441 = bitcast i32 %131 to float + %1442 = fadd float %1441, %1440 + %1443 = bitcast i32 %102 to float + %1444 = bitcast i32 %102 to float + %1445 = fmul float %1443, %1444 + %1446 = fadd float %1445, 0.000000e+00 + %1447 = bitcast i32 %131 to float + %1448 = bitcast i32 %131 to float + %1449 = fmul float %1447, %1448 + %1450 = fadd float %1446, %1449 + %1451 = call float @llvm.sqrt.f32.73(float %1450) + %1452 = bitcast i32 %102 to float + %1453 = fcmp olt float %1452, 0.000000e+00 + %1454 = sext i1 %1453 to i32 + %1455 = bitcast i32 %102 to float + %1456 = fcmp ogt float %1455, 0.000000e+00 + %1457 = zext i1 %1456 to i32 + %1458 = add nsw i32 %1454, %1457 + %1459 = sitofp i32 %1458 to float + %1460 = fneg float %1459 + %1461 = fmul float %1451, %1460 + %1462 = fmul float %1461, 0.000000e+00 + %1463 = bitcast i32 %131 to float + %1464 = fadd float %1463, %1462 + %1465 = fmul float %1442, %1464 + %1466 = fadd float %1420, %1465 + %1467 = call float @llvm.sqrt.f32.74(float %1466) + %1468 = fadd float %1467, 0.000000e+00 + %1469 = fdiv float %1376, %1468 + %1470 = fmul float %1469, 2.000000e+00 + %1471 = bitcast i32 %102 to float + %1472 = bitcast i32 %102 to float + %1473 = fmul float %1471, %1472 + %1474 = fadd float %1473, 0.000000e+00 + %1475 = bitcast i32 %131 to float + %1476 = bitcast i32 %131 to float + %1477 = fmul float %1475, %1476 + %1478 = fadd float %1474, %1477 + %1479 = call float @llvm.sqrt.f32.75(float %1478) + %1480 = bitcast i32 %102 to float + %1481 = fcmp olt float %1480, 0.000000e+00 + %1482 = sext i1 %1481 to i32 + %1483 = bitcast i32 %102 to float + %1484 = fcmp ogt float %1483, 0.000000e+00 + %1485 = zext i1 %1484 to i32 + %1486 = add nsw i32 %1482, %1485 + %1487 = sitofp i32 %1486 to float + %1488 = fneg float %1487 + %1489 = fmul float %1479, %1488 + %1490 = bitcast i32 %102 to float + %1491 = fadd float %1490, %1489 + %1492 = bitcast i32 %102 to float + %1493 = bitcast i32 %102 to float + %1494 = fmul float %1492, %1493 + %1495 = fadd float %1494, 0.000000e+00 + %1496 = bitcast i32 %131 to float + %1497 = bitcast i32 %131 to float + %1498 = fmul float %1496, %1497 + %1499 = fadd float %1495, %1498 + %1500 = call float @llvm.sqrt.f32.76(float %1499) + %1501 = bitcast i32 %102 to float + %1502 = fcmp olt float %1501, 0.000000e+00 + %1503 = sext i1 %1502 to i32 + %1504 = bitcast i32 %102 to float + %1505 = fcmp ogt float %1504, 0.000000e+00 + %1506 = zext i1 %1505 to i32 + %1507 = add nsw i32 %1503, %1506 + %1508 = sitofp i32 %1507 to float + %1509 = fneg float %1508 + %1510 = fmul float %1500, %1509 + %1511 = bitcast i32 %102 to float + %1512 = fadd float %1511, %1510 + %1513 = bitcast i32 %102 to float + %1514 = bitcast i32 %102 to float + %1515 = fmul float %1513, %1514 + %1516 = fadd float %1515, 0.000000e+00 + %1517 = bitcast i32 %131 to float + %1518 = bitcast i32 %131 to float + %1519 = fmul float %1517, %1518 + %1520 = fadd float %1516, %1519 + %1521 = call float @llvm.sqrt.f32.77(float %1520) + %1522 = bitcast i32 %102 to float + %1523 = fcmp olt float %1522, 0.000000e+00 + %1524 = sext i1 %1523 to i32 + %1525 = bitcast i32 %102 to float + %1526 = fcmp ogt float %1525, 0.000000e+00 + %1527 = zext i1 %1526 to i32 + %1528 = add nsw i32 %1524, %1527 + %1529 = sitofp i32 %1528 to float + %1530 = fneg float %1529 + %1531 = fmul float %1521, %1530 + %1532 = bitcast i32 %102 to float + %1533 = fadd float %1532, %1531 + %1534 = fmul float %1512, %1533 + %1535 = fadd float %1534, 0.000000e+00 + %1536 = bitcast i32 %102 to float + %1537 = bitcast i32 %102 to float + %1538 = fmul float %1536, %1537 + %1539 = fadd float %1538, 0.000000e+00 + %1540 = bitcast i32 %131 to float + %1541 = bitcast i32 %131 to float + %1542 = fmul float %1540, %1541 + %1543 = fadd float %1539, %1542 + %1544 = call float @llvm.sqrt.f32.78(float %1543) + %1545 = bitcast i32 %102 to float + %1546 = fcmp olt float %1545, 0.000000e+00 + %1547 = sext i1 %1546 to i32 + %1548 = bitcast i32 %102 to float + %1549 = fcmp ogt float %1548, 0.000000e+00 + %1550 = zext i1 %1549 to i32 + %1551 = add nsw i32 %1547, %1550 + %1552 = sitofp i32 %1551 to float + %1553 = fneg float %1552 + %1554 = fmul float %1544, %1553 + %1555 = fmul float %1554, 0.000000e+00 + %1556 = bitcast i32 %131 to float + %1557 = fadd float %1556, %1555 + %1558 = bitcast i32 %102 to float + %1559 = bitcast i32 %102 to float + %1560 = fmul float %1558, %1559 + %1561 = fadd float %1560, 0.000000e+00 + %1562 = bitcast i32 %131 to float + %1563 = bitcast i32 %131 to float + %1564 = fmul float %1562, %1563 + %1565 = fadd float %1561, %1564 + %1566 = call float @llvm.sqrt.f32.79(float %1565) + %1567 = bitcast i32 %102 to float + %1568 = fcmp olt float %1567, 0.000000e+00 + %1569 = sext i1 %1568 to i32 + %1570 = bitcast i32 %102 to float + %1571 = fcmp ogt float %1570, 0.000000e+00 + %1572 = zext i1 %1571 to i32 + %1573 = add nsw i32 %1569, %1572 + %1574 = sitofp i32 %1573 to float + %1575 = fneg float %1574 + %1576 = fmul float %1566, %1575 + %1577 = fmul float %1576, 0.000000e+00 + %1578 = bitcast i32 %131 to float + %1579 = fadd float %1578, %1577 + %1580 = fmul float %1557, %1579 + %1581 = fadd float %1535, %1580 + %1582 = call float @llvm.sqrt.f32.80(float %1581) + %1583 = fadd float %1582, 0.000000e+00 + %1584 = fdiv float %1491, %1583 + %1585 = fmul float %1470, %1584 + %1586 = fsub float 1.000000e+00, %1585 + %1587 = fmul float %1586, %1349 + %1588 = fadd float %1587, 0.000000e+00 + %1589 = bitcast i32 %102 to float + %1590 = bitcast i32 %102 to float + %1591 = fmul float %1589, %1590 + %1592 = fadd float %1591, 0.000000e+00 + %1593 = bitcast i32 %131 to float + %1594 = bitcast i32 %131 to float + %1595 = fmul float %1593, %1594 + %1596 = fadd float %1592, %1595 + %1597 = call float @llvm.sqrt.f32.81(float %1596) + %1598 = bitcast i32 %102 to float + %1599 = fcmp olt float %1598, 0.000000e+00 + %1600 = sext i1 %1599 to i32 + %1601 = bitcast i32 %102 to float + %1602 = fcmp ogt float %1601, 0.000000e+00 + %1603 = zext i1 %1602 to i32 + %1604 = add nsw i32 %1600, %1603 + %1605 = sitofp i32 %1604 to float + %1606 = fneg float %1605 + %1607 = fmul float %1597, %1606 + %1608 = bitcast i32 %102 to float + %1609 = fadd float %1608, %1607 + %1610 = bitcast i32 %102 to float + %1611 = bitcast i32 %102 to float + %1612 = fmul float %1610, %1611 + %1613 = fadd float %1612, 0.000000e+00 + %1614 = bitcast i32 %131 to float + %1615 = bitcast i32 %131 to float + %1616 = fmul float %1614, %1615 + %1617 = fadd float %1613, %1616 + %1618 = call float @llvm.sqrt.f32.82(float %1617) + %1619 = bitcast i32 %102 to float + %1620 = fcmp olt float %1619, 0.000000e+00 + %1621 = sext i1 %1620 to i32 + %1622 = bitcast i32 %102 to float + %1623 = fcmp ogt float %1622, 0.000000e+00 + %1624 = zext i1 %1623 to i32 + %1625 = add nsw i32 %1621, %1624 + %1626 = sitofp i32 %1625 to float + %1627 = fneg float %1626 + %1628 = fmul float %1618, %1627 + %1629 = bitcast i32 %102 to float + %1630 = fadd float %1629, %1628 + %1631 = bitcast i32 %102 to float + %1632 = bitcast i32 %102 to float + %1633 = fmul float %1631, %1632 + %1634 = fadd float %1633, 0.000000e+00 + %1635 = bitcast i32 %131 to float + %1636 = bitcast i32 %131 to float + %1637 = fmul float %1635, %1636 + %1638 = fadd float %1634, %1637 + %1639 = call float @llvm.sqrt.f32.83(float %1638) + %1640 = bitcast i32 %102 to float + %1641 = fcmp olt float %1640, 0.000000e+00 + %1642 = sext i1 %1641 to i32 + %1643 = bitcast i32 %102 to float + %1644 = fcmp ogt float %1643, 0.000000e+00 + %1645 = zext i1 %1644 to i32 + %1646 = add nsw i32 %1642, %1645 + %1647 = sitofp i32 %1646 to float + %1648 = fneg float %1647 + %1649 = fmul float %1639, %1648 + %1650 = bitcast i32 %102 to float + %1651 = fadd float %1650, %1649 + %1652 = fmul float %1630, %1651 + %1653 = fadd float %1652, 0.000000e+00 + %1654 = bitcast i32 %102 to float + %1655 = bitcast i32 %102 to float + %1656 = fmul float %1654, %1655 + %1657 = fadd float %1656, 0.000000e+00 + %1658 = bitcast i32 %131 to float + %1659 = bitcast i32 %131 to float + %1660 = fmul float %1658, %1659 + %1661 = fadd float %1657, %1660 + %1662 = call float @llvm.sqrt.f32.84(float %1661) + %1663 = bitcast i32 %102 to float + %1664 = fcmp olt float %1663, 0.000000e+00 + %1665 = sext i1 %1664 to i32 + %1666 = bitcast i32 %102 to float + %1667 = fcmp ogt float %1666, 0.000000e+00 + %1668 = zext i1 %1667 to i32 + %1669 = add nsw i32 %1665, %1668 + %1670 = sitofp i32 %1669 to float + %1671 = fneg float %1670 + %1672 = fmul float %1662, %1671 + %1673 = fmul float %1672, 0.000000e+00 + %1674 = bitcast i32 %131 to float + %1675 = fadd float %1674, %1673 + %1676 = bitcast i32 %102 to float + %1677 = bitcast i32 %102 to float + %1678 = fmul float %1676, %1677 + %1679 = fadd float %1678, 0.000000e+00 + %1680 = bitcast i32 %131 to float + %1681 = bitcast i32 %131 to float + %1682 = fmul float %1680, %1681 + %1683 = fadd float %1679, %1682 + %1684 = call float @llvm.sqrt.f32.85(float %1683) + %1685 = bitcast i32 %102 to float + %1686 = fcmp olt float %1685, 0.000000e+00 + %1687 = sext i1 %1686 to i32 + %1688 = bitcast i32 %102 to float + %1689 = fcmp ogt float %1688, 0.000000e+00 + %1690 = zext i1 %1689 to i32 + %1691 = add nsw i32 %1687, %1690 + %1692 = sitofp i32 %1691 to float + %1693 = fneg float %1692 + %1694 = fmul float %1684, %1693 + %1695 = fmul float %1694, 0.000000e+00 + %1696 = bitcast i32 %131 to float + %1697 = fadd float %1696, %1695 + %1698 = fmul float %1675, %1697 + %1699 = fadd float %1653, %1698 + %1700 = call float @llvm.sqrt.f32.86(float %1699) + %1701 = fadd float %1700, 0.000000e+00 + %1702 = fdiv float %1609, %1701 + %1703 = fmul float %1702, 2.000000e+00 + %1704 = bitcast i32 %102 to float + %1705 = bitcast i32 %102 to float + %1706 = fmul float %1704, %1705 + %1707 = fadd float %1706, 0.000000e+00 + %1708 = bitcast i32 %131 to float + %1709 = bitcast i32 %131 to float + %1710 = fmul float %1708, %1709 + %1711 = fadd float %1707, %1710 + %1712 = call float @llvm.sqrt.f32.87(float %1711) + %1713 = bitcast i32 %102 to float + %1714 = fcmp olt float %1713, 0.000000e+00 + %1715 = sext i1 %1714 to i32 + %1716 = bitcast i32 %102 to float + %1717 = fcmp ogt float %1716, 0.000000e+00 + %1718 = zext i1 %1717 to i32 + %1719 = add nsw i32 %1715, %1718 + %1720 = sitofp i32 %1719 to float + %1721 = fneg float %1720 + %1722 = fmul float %1712, %1721 + %1723 = fmul float %1722, 0.000000e+00 + %1724 = bitcast i32 %131 to float + %1725 = fadd float %1724, %1723 + %1726 = bitcast i32 %102 to float + %1727 = bitcast i32 %102 to float + %1728 = fmul float %1726, %1727 + %1729 = fadd float %1728, 0.000000e+00 + %1730 = bitcast i32 %131 to float + %1731 = bitcast i32 %131 to float + %1732 = fmul float %1730, %1731 + %1733 = fadd float %1729, %1732 + %1734 = call float @llvm.sqrt.f32.88(float %1733) + %1735 = bitcast i32 %102 to float + %1736 = fcmp olt float %1735, 0.000000e+00 + %1737 = sext i1 %1736 to i32 + %1738 = bitcast i32 %102 to float + %1739 = fcmp ogt float %1738, 0.000000e+00 + %1740 = zext i1 %1739 to i32 + %1741 = add nsw i32 %1737, %1740 + %1742 = sitofp i32 %1741 to float + %1743 = fneg float %1742 + %1744 = fmul float %1734, %1743 + %1745 = bitcast i32 %102 to float + %1746 = fadd float %1745, %1744 + %1747 = bitcast i32 %102 to float + %1748 = bitcast i32 %102 to float + %1749 = fmul float %1747, %1748 + %1750 = fadd float %1749, 0.000000e+00 + %1751 = bitcast i32 %131 to float + %1752 = bitcast i32 %131 to float + %1753 = fmul float %1751, %1752 + %1754 = fadd float %1750, %1753 + %1755 = call float @llvm.sqrt.f32.89(float %1754) + %1756 = bitcast i32 %102 to float + %1757 = fcmp olt float %1756, 0.000000e+00 + %1758 = sext i1 %1757 to i32 + %1759 = bitcast i32 %102 to float + %1760 = fcmp ogt float %1759, 0.000000e+00 + %1761 = zext i1 %1760 to i32 + %1762 = add nsw i32 %1758, %1761 + %1763 = sitofp i32 %1762 to float + %1764 = fneg float %1763 + %1765 = fmul float %1755, %1764 + %1766 = bitcast i32 %102 to float + %1767 = fadd float %1766, %1765 + %1768 = fmul float %1746, %1767 + %1769 = fadd float %1768, 0.000000e+00 + %1770 = bitcast i32 %102 to float + %1771 = bitcast i32 %102 to float + %1772 = fmul float %1770, %1771 + %1773 = fadd float %1772, 0.000000e+00 + %1774 = bitcast i32 %131 to float + %1775 = bitcast i32 %131 to float + %1776 = fmul float %1774, %1775 + %1777 = fadd float %1773, %1776 + %1778 = call float @llvm.sqrt.f32.90(float %1777) + %1779 = bitcast i32 %102 to float + %1780 = fcmp olt float %1779, 0.000000e+00 + %1781 = sext i1 %1780 to i32 + %1782 = bitcast i32 %102 to float + %1783 = fcmp ogt float %1782, 0.000000e+00 + %1784 = zext i1 %1783 to i32 + %1785 = add nsw i32 %1781, %1784 + %1786 = sitofp i32 %1785 to float + %1787 = fneg float %1786 + %1788 = fmul float %1778, %1787 + %1789 = fmul float %1788, 0.000000e+00 + %1790 = bitcast i32 %131 to float + %1791 = fadd float %1790, %1789 + %1792 = bitcast i32 %102 to float + %1793 = bitcast i32 %102 to float + %1794 = fmul float %1792, %1793 + %1795 = fadd float %1794, 0.000000e+00 + %1796 = bitcast i32 %131 to float + %1797 = bitcast i32 %131 to float + %1798 = fmul float %1796, %1797 + %1799 = fadd float %1795, %1798 + %1800 = call float @llvm.sqrt.f32.91(float %1799) + %1801 = bitcast i32 %102 to float + %1802 = fcmp olt float %1801, 0.000000e+00 + %1803 = sext i1 %1802 to i32 + %1804 = bitcast i32 %102 to float + %1805 = fcmp ogt float %1804, 0.000000e+00 + %1806 = zext i1 %1805 to i32 + %1807 = add nsw i32 %1803, %1806 + %1808 = sitofp i32 %1807 to float + %1809 = fneg float %1808 + %1810 = fmul float %1800, %1809 + %1811 = fmul float %1810, 0.000000e+00 + %1812 = bitcast i32 %131 to float + %1813 = fadd float %1812, %1811 + %1814 = fmul float %1791, %1813 + %1815 = fadd float %1769, %1814 + %1816 = call float @llvm.sqrt.f32.92(float %1815) + %1817 = fadd float %1816, 0.000000e+00 + %1818 = fdiv float %1725, %1817 + %1819 = fmul float %1703, %1818 + %1820 = fneg float %1819 + %1821 = getelementptr float, float* %0, i32 0 + %1822 = getelementptr inbounds float, float* %1821, i64 2 + %1823 = load float, float* %1822, align 4 + %1824 = fmul float %1820, %1823 + %1825 = fadd float %1588, %1824 + %1826 = insertelement <4 x float> zeroinitializer, float %1825, i32 0 + %1827 = insertelement <4 x float> %1826, float 0.000000e+00, i32 1 + %1828 = insertelement <4 x float> %1827, float 0.000000e+00, i32 2 + %1829 = insertelement <4 x float> %1828, float 0.000000e+00, i32 3 + %1830 = extractelement <4 x float> %1829, i32 0 + store float %1830, float* %2, align 4 + %1831 = extractelement <4 x float> %1829, i32 1 + %1832 = getelementptr float, float* %2, i32 0 + %1833 = getelementptr inbounds float, float* %1832, i64 1 + store float %1831, float* %1833, align 4 + %1834 = bitcast i32 %102 to float + %1835 = bitcast i32 %102 to float + %1836 = fmul float %1834, %1835 + %1837 = fadd float %1836, 0.000000e+00 + %1838 = bitcast i32 %131 to float + %1839 = bitcast i32 %131 to float + %1840 = fmul float %1838, %1839 + %1841 = fadd float %1837, %1840 + %1842 = call float @llvm.sqrt.f32.93(float %1841) + %1843 = bitcast i32 %102 to float + %1844 = fcmp olt float %1843, 0.000000e+00 + %1845 = sext i1 %1844 to i32 + %1846 = bitcast i32 %102 to float + %1847 = fcmp ogt float %1846, 0.000000e+00 + %1848 = zext i1 %1847 to i32 + %1849 = add nsw i32 %1845, %1848 + %1850 = sitofp i32 %1849 to float + %1851 = fneg float %1850 + %1852 = fmul float %1842, %1851 + %1853 = bitcast i32 %102 to float + %1854 = fadd float %1853, %1852 + %1855 = bitcast i32 %102 to float + %1856 = bitcast i32 %102 to float + %1857 = fmul float %1855, %1856 + %1858 = fadd float %1857, 0.000000e+00 + %1859 = bitcast i32 %131 to float + %1860 = bitcast i32 %131 to float + %1861 = fmul float %1859, %1860 + %1862 = fadd float %1858, %1861 + %1863 = call float @llvm.sqrt.f32.94(float %1862) + %1864 = bitcast i32 %102 to float + %1865 = fcmp olt float %1864, 0.000000e+00 + %1866 = sext i1 %1865 to i32 + %1867 = bitcast i32 %102 to float + %1868 = fcmp ogt float %1867, 0.000000e+00 + %1869 = zext i1 %1868 to i32 + %1870 = add nsw i32 %1866, %1869 + %1871 = sitofp i32 %1870 to float + %1872 = fneg float %1871 + %1873 = fmul float %1863, %1872 + %1874 = bitcast i32 %102 to float + %1875 = fadd float %1874, %1873 + %1876 = bitcast i32 %102 to float + %1877 = bitcast i32 %102 to float + %1878 = fmul float %1876, %1877 + %1879 = fadd float %1878, 0.000000e+00 + %1880 = bitcast i32 %131 to float + %1881 = bitcast i32 %131 to float + %1882 = fmul float %1880, %1881 + %1883 = fadd float %1879, %1882 + %1884 = call float @llvm.sqrt.f32.95(float %1883) + %1885 = bitcast i32 %102 to float + %1886 = fcmp olt float %1885, 0.000000e+00 + %1887 = sext i1 %1886 to i32 + %1888 = bitcast i32 %102 to float + %1889 = fcmp ogt float %1888, 0.000000e+00 + %1890 = zext i1 %1889 to i32 + %1891 = add nsw i32 %1887, %1890 + %1892 = sitofp i32 %1891 to float + %1893 = fneg float %1892 + %1894 = fmul float %1884, %1893 + %1895 = bitcast i32 %102 to float + %1896 = fadd float %1895, %1894 + %1897 = fmul float %1875, %1896 + %1898 = fadd float %1897, 0.000000e+00 + %1899 = bitcast i32 %102 to float + %1900 = bitcast i32 %102 to float + %1901 = fmul float %1899, %1900 + %1902 = fadd float %1901, 0.000000e+00 + %1903 = bitcast i32 %131 to float + %1904 = bitcast i32 %131 to float + %1905 = fmul float %1903, %1904 + %1906 = fadd float %1902, %1905 + %1907 = call float @llvm.sqrt.f32.96(float %1906) + %1908 = bitcast i32 %102 to float + %1909 = fcmp olt float %1908, 0.000000e+00 + %1910 = sext i1 %1909 to i32 + %1911 = bitcast i32 %102 to float + %1912 = fcmp ogt float %1911, 0.000000e+00 + %1913 = zext i1 %1912 to i32 + %1914 = add nsw i32 %1910, %1913 + %1915 = sitofp i32 %1914 to float + %1916 = fneg float %1915 + %1917 = fmul float %1907, %1916 + %1918 = fmul float %1917, 0.000000e+00 + %1919 = bitcast i32 %131 to float + %1920 = fadd float %1919, %1918 + %1921 = bitcast i32 %102 to float + %1922 = bitcast i32 %102 to float + %1923 = fmul float %1921, %1922 + %1924 = fadd float %1923, 0.000000e+00 + %1925 = bitcast i32 %131 to float + %1926 = bitcast i32 %131 to float + %1927 = fmul float %1925, %1926 + %1928 = fadd float %1924, %1927 + %1929 = call float @llvm.sqrt.f32.97(float %1928) + %1930 = bitcast i32 %102 to float + %1931 = fcmp olt float %1930, 0.000000e+00 + %1932 = sext i1 %1931 to i32 + %1933 = bitcast i32 %102 to float + %1934 = fcmp ogt float %1933, 0.000000e+00 + %1935 = zext i1 %1934 to i32 + %1936 = add nsw i32 %1932, %1935 + %1937 = sitofp i32 %1936 to float + %1938 = fneg float %1937 + %1939 = fmul float %1929, %1938 + %1940 = fmul float %1939, 0.000000e+00 + %1941 = bitcast i32 %131 to float + %1942 = fadd float %1941, %1940 + %1943 = fmul float %1920, %1942 + %1944 = fadd float %1898, %1943 + %1945 = call float @llvm.sqrt.f32.98(float %1944) + %1946 = fadd float %1945, 0.000000e+00 + %1947 = fdiv float %1854, %1946 + %1948 = fmul float %1947, 2.000000e+00 + %1949 = bitcast i32 %102 to float + %1950 = bitcast i32 %102 to float + %1951 = fmul float %1949, %1950 + %1952 = fadd float %1951, 0.000000e+00 + %1953 = bitcast i32 %131 to float + %1954 = bitcast i32 %131 to float + %1955 = fmul float %1953, %1954 + %1956 = fadd float %1952, %1955 + %1957 = call float @llvm.sqrt.f32.99(float %1956) + %1958 = bitcast i32 %102 to float + %1959 = fcmp olt float %1958, 0.000000e+00 + %1960 = sext i1 %1959 to i32 + %1961 = bitcast i32 %102 to float + %1962 = fcmp ogt float %1961, 0.000000e+00 + %1963 = zext i1 %1962 to i32 + %1964 = add nsw i32 %1960, %1963 + %1965 = sitofp i32 %1964 to float + %1966 = fneg float %1965 + %1967 = fmul float %1957, %1966 + %1968 = bitcast i32 %102 to float + %1969 = fadd float %1968, %1967 + %1970 = bitcast i32 %102 to float + %1971 = bitcast i32 %102 to float + %1972 = fmul float %1970, %1971 + %1973 = fadd float %1972, 0.000000e+00 + %1974 = bitcast i32 %131 to float + %1975 = bitcast i32 %131 to float + %1976 = fmul float %1974, %1975 + %1977 = fadd float %1973, %1976 + %1978 = call float @llvm.sqrt.f32.100(float %1977) + %1979 = bitcast i32 %102 to float + %1980 = fcmp olt float %1979, 0.000000e+00 + %1981 = sext i1 %1980 to i32 + %1982 = bitcast i32 %102 to float + %1983 = fcmp ogt float %1982, 0.000000e+00 + %1984 = zext i1 %1983 to i32 + %1985 = add nsw i32 %1981, %1984 + %1986 = sitofp i32 %1985 to float + %1987 = fneg float %1986 + %1988 = fmul float %1978, %1987 + %1989 = bitcast i32 %102 to float + %1990 = fadd float %1989, %1988 + %1991 = bitcast i32 %102 to float + %1992 = bitcast i32 %102 to float + %1993 = fmul float %1991, %1992 + %1994 = fadd float %1993, 0.000000e+00 + %1995 = bitcast i32 %131 to float + %1996 = bitcast i32 %131 to float + %1997 = fmul float %1995, %1996 + %1998 = fadd float %1994, %1997 + %1999 = call float @llvm.sqrt.f32.101(float %1998) + %2000 = bitcast i32 %102 to float + %2001 = fcmp olt float %2000, 0.000000e+00 + %2002 = sext i1 %2001 to i32 + %2003 = bitcast i32 %102 to float + %2004 = fcmp ogt float %2003, 0.000000e+00 + %2005 = zext i1 %2004 to i32 + %2006 = add nsw i32 %2002, %2005 + %2007 = sitofp i32 %2006 to float + %2008 = fneg float %2007 + %2009 = fmul float %1999, %2008 + %2010 = bitcast i32 %102 to float + %2011 = fadd float %2010, %2009 + %2012 = fmul float %1990, %2011 + %2013 = fadd float %2012, 0.000000e+00 + %2014 = bitcast i32 %102 to float + %2015 = bitcast i32 %102 to float + %2016 = fmul float %2014, %2015 + %2017 = fadd float %2016, 0.000000e+00 + %2018 = bitcast i32 %131 to float + %2019 = bitcast i32 %131 to float + %2020 = fmul float %2018, %2019 + %2021 = fadd float %2017, %2020 + %2022 = call float @llvm.sqrt.f32.102(float %2021) + %2023 = bitcast i32 %102 to float + %2024 = fcmp olt float %2023, 0.000000e+00 + %2025 = sext i1 %2024 to i32 + %2026 = bitcast i32 %102 to float + %2027 = fcmp ogt float %2026, 0.000000e+00 + %2028 = zext i1 %2027 to i32 + %2029 = add nsw i32 %2025, %2028 + %2030 = sitofp i32 %2029 to float + %2031 = fneg float %2030 + %2032 = fmul float %2022, %2031 + %2033 = fmul float %2032, 0.000000e+00 + %2034 = bitcast i32 %131 to float + %2035 = fadd float %2034, %2033 + %2036 = bitcast i32 %102 to float + %2037 = bitcast i32 %102 to float + %2038 = fmul float %2036, %2037 + %2039 = fadd float %2038, 0.000000e+00 + %2040 = bitcast i32 %131 to float + %2041 = bitcast i32 %131 to float + %2042 = fmul float %2040, %2041 + %2043 = fadd float %2039, %2042 + %2044 = call float @llvm.sqrt.f32.103(float %2043) + %2045 = bitcast i32 %102 to float + %2046 = fcmp olt float %2045, 0.000000e+00 + %2047 = sext i1 %2046 to i32 + %2048 = bitcast i32 %102 to float + %2049 = fcmp ogt float %2048, 0.000000e+00 + %2050 = zext i1 %2049 to i32 + %2051 = add nsw i32 %2047, %2050 + %2052 = sitofp i32 %2051 to float + %2053 = fneg float %2052 + %2054 = fmul float %2044, %2053 + %2055 = fmul float %2054, 0.000000e+00 + %2056 = bitcast i32 %131 to float + %2057 = fadd float %2056, %2055 + %2058 = fmul float %2035, %2057 + %2059 = fadd float %2013, %2058 + %2060 = call float @llvm.sqrt.f32.104(float %2059) + %2061 = fadd float %2060, 0.000000e+00 + %2062 = fdiv float %1969, %2061 + %2063 = fmul float %1948, %2062 + %2064 = fsub float 1.000000e+00, %2063 + %2065 = insertelement <4 x float> zeroinitializer, float %2064, i32 0 + %2066 = insertelement <4 x float> %2065, float 0.000000e+00, i32 1 + %2067 = insertelement <4 x float> %2066, float 0.000000e+00, i32 2 + %2068 = insertelement <4 x float> %2067, float 0.000000e+00, i32 3 + %2069 = getelementptr float, float* %0, i32 0 + %2070 = getelementptr inbounds float, float* %2069, i64 1 + %2071 = load float, float* %2070, align 4 + %2072 = insertelement <4 x float> zeroinitializer, float %2071, i32 0 + %2073 = insertelement <4 x float> %2072, float 0.000000e+00, i32 1 + %2074 = insertelement <4 x float> %2073, float 0.000000e+00, i32 2 + %2075 = insertelement <4 x float> %2074, float 0.000000e+00, i32 3 + %2076 = call <4 x float> @llvm.fma.f32.105(<4 x float> %2068, <4 x float> %2075, <4 x float> zeroinitializer) + %2077 = extractelement <4 x float> %2076, i32 0 + %2078 = getelementptr float, float* %2, i32 0 + %2079 = getelementptr inbounds float, float* %2078, i64 1 + store float %2077, float* %2079, align 4 + %2080 = bitcast i32 %102 to float + %2081 = bitcast i32 %102 to float + %2082 = fmul float %2080, %2081 + %2083 = fadd float %2082, 0.000000e+00 + %2084 = bitcast i32 %131 to float + %2085 = bitcast i32 %131 to float + %2086 = fmul float %2084, %2085 + %2087 = fadd float %2083, %2086 + %2088 = call float @llvm.sqrt.f32.106(float %2087) + %2089 = bitcast i32 %102 to float + %2090 = fcmp olt float %2089, 0.000000e+00 + %2091 = sext i1 %2090 to i32 + %2092 = bitcast i32 %102 to float + %2093 = fcmp ogt float %2092, 0.000000e+00 + %2094 = zext i1 %2093 to i32 + %2095 = add nsw i32 %2091, %2094 + %2096 = sitofp i32 %2095 to float + %2097 = fneg float %2096 + %2098 = fmul float %2088, %2097 + %2099 = bitcast i32 %102 to float + %2100 = fadd float %2099, %2098 + %2101 = bitcast i32 %102 to float + %2102 = bitcast i32 %102 to float + %2103 = fmul float %2101, %2102 + %2104 = fadd float %2103, 0.000000e+00 + %2105 = bitcast i32 %131 to float + %2106 = bitcast i32 %131 to float + %2107 = fmul float %2105, %2106 + %2108 = fadd float %2104, %2107 + %2109 = call float @llvm.sqrt.f32.107(float %2108) + %2110 = bitcast i32 %102 to float + %2111 = fcmp olt float %2110, 0.000000e+00 + %2112 = sext i1 %2111 to i32 + %2113 = bitcast i32 %102 to float + %2114 = fcmp ogt float %2113, 0.000000e+00 + %2115 = zext i1 %2114 to i32 + %2116 = add nsw i32 %2112, %2115 + %2117 = sitofp i32 %2116 to float + %2118 = fneg float %2117 + %2119 = fmul float %2109, %2118 + %2120 = bitcast i32 %102 to float + %2121 = fadd float %2120, %2119 + %2122 = bitcast i32 %102 to float + %2123 = bitcast i32 %102 to float + %2124 = fmul float %2122, %2123 + %2125 = fadd float %2124, 0.000000e+00 + %2126 = bitcast i32 %131 to float + %2127 = bitcast i32 %131 to float + %2128 = fmul float %2126, %2127 + %2129 = fadd float %2125, %2128 + %2130 = call float @llvm.sqrt.f32.108(float %2129) + %2131 = bitcast i32 %102 to float + %2132 = fcmp olt float %2131, 0.000000e+00 + %2133 = sext i1 %2132 to i32 + %2134 = bitcast i32 %102 to float + %2135 = fcmp ogt float %2134, 0.000000e+00 + %2136 = zext i1 %2135 to i32 + %2137 = add nsw i32 %2133, %2136 + %2138 = sitofp i32 %2137 to float + %2139 = fneg float %2138 + %2140 = fmul float %2130, %2139 + %2141 = bitcast i32 %102 to float + %2142 = fadd float %2141, %2140 + %2143 = fmul float %2121, %2142 + %2144 = fadd float %2143, 0.000000e+00 + %2145 = bitcast i32 %102 to float + %2146 = bitcast i32 %102 to float + %2147 = fmul float %2145, %2146 + %2148 = fadd float %2147, 0.000000e+00 + %2149 = bitcast i32 %131 to float + %2150 = bitcast i32 %131 to float + %2151 = fmul float %2149, %2150 + %2152 = fadd float %2148, %2151 + %2153 = call float @llvm.sqrt.f32.109(float %2152) + %2154 = bitcast i32 %102 to float + %2155 = fcmp olt float %2154, 0.000000e+00 + %2156 = sext i1 %2155 to i32 + %2157 = bitcast i32 %102 to float + %2158 = fcmp ogt float %2157, 0.000000e+00 + %2159 = zext i1 %2158 to i32 + %2160 = add nsw i32 %2156, %2159 + %2161 = sitofp i32 %2160 to float + %2162 = fneg float %2161 + %2163 = fmul float %2153, %2162 + %2164 = fmul float %2163, 0.000000e+00 + %2165 = bitcast i32 %131 to float + %2166 = fadd float %2165, %2164 + %2167 = bitcast i32 %102 to float + %2168 = bitcast i32 %102 to float + %2169 = fmul float %2167, %2168 + %2170 = fadd float %2169, 0.000000e+00 + %2171 = bitcast i32 %131 to float + %2172 = bitcast i32 %131 to float + %2173 = fmul float %2171, %2172 + %2174 = fadd float %2170, %2173 + %2175 = call float @llvm.sqrt.f32.110(float %2174) + %2176 = bitcast i32 %102 to float + %2177 = fcmp olt float %2176, 0.000000e+00 + %2178 = sext i1 %2177 to i32 + %2179 = bitcast i32 %102 to float + %2180 = fcmp ogt float %2179, 0.000000e+00 + %2181 = zext i1 %2180 to i32 + %2182 = add nsw i32 %2178, %2181 + %2183 = sitofp i32 %2182 to float + %2184 = fneg float %2183 + %2185 = fmul float %2175, %2184 + %2186 = fmul float %2185, 0.000000e+00 + %2187 = bitcast i32 %131 to float + %2188 = fadd float %2187, %2186 + %2189 = fmul float %2166, %2188 + %2190 = fadd float %2144, %2189 + %2191 = call float @llvm.sqrt.f32.111(float %2190) + %2192 = fadd float %2191, 0.000000e+00 + %2193 = fdiv float %2100, %2192 + %2194 = fmul float %2193, 2.000000e+00 + %2195 = bitcast i32 %102 to float + %2196 = bitcast i32 %102 to float + %2197 = fmul float %2195, %2196 + %2198 = fadd float %2197, 0.000000e+00 + %2199 = bitcast i32 %131 to float + %2200 = bitcast i32 %131 to float + %2201 = fmul float %2199, %2200 + %2202 = fadd float %2198, %2201 + %2203 = call float @llvm.sqrt.f32.112(float %2202) + %2204 = bitcast i32 %102 to float + %2205 = fcmp olt float %2204, 0.000000e+00 + %2206 = sext i1 %2205 to i32 + %2207 = bitcast i32 %102 to float + %2208 = fcmp ogt float %2207, 0.000000e+00 + %2209 = zext i1 %2208 to i32 + %2210 = add nsw i32 %2206, %2209 + %2211 = sitofp i32 %2210 to float + %2212 = fneg float %2211 + %2213 = fmul float %2203, %2212 + %2214 = bitcast i32 %102 to float + %2215 = fadd float %2214, %2213 + %2216 = bitcast i32 %102 to float + %2217 = bitcast i32 %102 to float + %2218 = fmul float %2216, %2217 + %2219 = fadd float %2218, 0.000000e+00 + %2220 = bitcast i32 %131 to float + %2221 = bitcast i32 %131 to float + %2222 = fmul float %2220, %2221 + %2223 = fadd float %2219, %2222 + %2224 = call float @llvm.sqrt.f32.113(float %2223) + %2225 = bitcast i32 %102 to float + %2226 = fcmp olt float %2225, 0.000000e+00 + %2227 = sext i1 %2226 to i32 + %2228 = bitcast i32 %102 to float + %2229 = fcmp ogt float %2228, 0.000000e+00 + %2230 = zext i1 %2229 to i32 + %2231 = add nsw i32 %2227, %2230 + %2232 = sitofp i32 %2231 to float + %2233 = fneg float %2232 + %2234 = fmul float %2224, %2233 + %2235 = bitcast i32 %102 to float + %2236 = fadd float %2235, %2234 + %2237 = bitcast i32 %102 to float + %2238 = bitcast i32 %102 to float + %2239 = fmul float %2237, %2238 + %2240 = fadd float %2239, 0.000000e+00 + %2241 = bitcast i32 %131 to float + %2242 = bitcast i32 %131 to float + %2243 = fmul float %2241, %2242 + %2244 = fadd float %2240, %2243 + %2245 = call float @llvm.sqrt.f32.114(float %2244) + %2246 = bitcast i32 %102 to float + %2247 = fcmp olt float %2246, 0.000000e+00 + %2248 = sext i1 %2247 to i32 + %2249 = bitcast i32 %102 to float + %2250 = fcmp ogt float %2249, 0.000000e+00 + %2251 = zext i1 %2250 to i32 + %2252 = add nsw i32 %2248, %2251 + %2253 = sitofp i32 %2252 to float + %2254 = fneg float %2253 + %2255 = fmul float %2245, %2254 + %2256 = bitcast i32 %102 to float + %2257 = fadd float %2256, %2255 + %2258 = fmul float %2236, %2257 + %2259 = fadd float %2258, 0.000000e+00 + %2260 = bitcast i32 %102 to float + %2261 = bitcast i32 %102 to float + %2262 = fmul float %2260, %2261 + %2263 = fadd float %2262, 0.000000e+00 + %2264 = bitcast i32 %131 to float + %2265 = bitcast i32 %131 to float + %2266 = fmul float %2264, %2265 + %2267 = fadd float %2263, %2266 + %2268 = call float @llvm.sqrt.f32.115(float %2267) + %2269 = bitcast i32 %102 to float + %2270 = fcmp olt float %2269, 0.000000e+00 + %2271 = sext i1 %2270 to i32 + %2272 = bitcast i32 %102 to float + %2273 = fcmp ogt float %2272, 0.000000e+00 + %2274 = zext i1 %2273 to i32 + %2275 = add nsw i32 %2271, %2274 + %2276 = sitofp i32 %2275 to float + %2277 = fneg float %2276 + %2278 = fmul float %2268, %2277 + %2279 = fmul float %2278, 0.000000e+00 + %2280 = bitcast i32 %131 to float + %2281 = fadd float %2280, %2279 + %2282 = bitcast i32 %102 to float + %2283 = bitcast i32 %102 to float + %2284 = fmul float %2282, %2283 + %2285 = fadd float %2284, 0.000000e+00 + %2286 = bitcast i32 %131 to float + %2287 = bitcast i32 %131 to float + %2288 = fmul float %2286, %2287 + %2289 = fadd float %2285, %2288 + %2290 = call float @llvm.sqrt.f32.116(float %2289) + %2291 = bitcast i32 %102 to float + %2292 = fcmp olt float %2291, 0.000000e+00 + %2293 = sext i1 %2292 to i32 + %2294 = bitcast i32 %102 to float + %2295 = fcmp ogt float %2294, 0.000000e+00 + %2296 = zext i1 %2295 to i32 + %2297 = add nsw i32 %2293, %2296 + %2298 = sitofp i32 %2297 to float + %2299 = fneg float %2298 + %2300 = fmul float %2290, %2299 + %2301 = fmul float %2300, 0.000000e+00 + %2302 = bitcast i32 %131 to float + %2303 = fadd float %2302, %2301 + %2304 = fmul float %2281, %2303 + %2305 = fadd float %2259, %2304 + %2306 = call float @llvm.sqrt.f32.117(float %2305) + %2307 = fadd float %2306, 0.000000e+00 + %2308 = fdiv float %2215, %2307 + %2309 = fmul float %2194, %2308 + %2310 = fsub float 1.000000e+00, %2309 + %2311 = fmul float %2310, %2071 + %2312 = fadd float %2311, 0.000000e+00 + %2313 = bitcast i32 %102 to float + %2314 = bitcast i32 %102 to float + %2315 = fmul float %2313, %2314 + %2316 = fadd float %2315, 0.000000e+00 + %2317 = bitcast i32 %131 to float + %2318 = bitcast i32 %131 to float + %2319 = fmul float %2317, %2318 + %2320 = fadd float %2316, %2319 + %2321 = call float @llvm.sqrt.f32.118(float %2320) + %2322 = bitcast i32 %102 to float + %2323 = fcmp olt float %2322, 0.000000e+00 + %2324 = sext i1 %2323 to i32 + %2325 = bitcast i32 %102 to float + %2326 = fcmp ogt float %2325, 0.000000e+00 + %2327 = zext i1 %2326 to i32 + %2328 = add nsw i32 %2324, %2327 + %2329 = sitofp i32 %2328 to float + %2330 = fneg float %2329 + %2331 = fmul float %2321, %2330 + %2332 = bitcast i32 %102 to float + %2333 = fadd float %2332, %2331 + %2334 = bitcast i32 %102 to float + %2335 = bitcast i32 %102 to float + %2336 = fmul float %2334, %2335 + %2337 = fadd float %2336, 0.000000e+00 + %2338 = bitcast i32 %131 to float + %2339 = bitcast i32 %131 to float + %2340 = fmul float %2338, %2339 + %2341 = fadd float %2337, %2340 + %2342 = call float @llvm.sqrt.f32.119(float %2341) + %2343 = bitcast i32 %102 to float + %2344 = fcmp olt float %2343, 0.000000e+00 + %2345 = sext i1 %2344 to i32 + %2346 = bitcast i32 %102 to float + %2347 = fcmp ogt float %2346, 0.000000e+00 + %2348 = zext i1 %2347 to i32 + %2349 = add nsw i32 %2345, %2348 + %2350 = sitofp i32 %2349 to float + %2351 = fneg float %2350 + %2352 = fmul float %2342, %2351 + %2353 = bitcast i32 %102 to float + %2354 = fadd float %2353, %2352 + %2355 = bitcast i32 %102 to float + %2356 = bitcast i32 %102 to float + %2357 = fmul float %2355, %2356 + %2358 = fadd float %2357, 0.000000e+00 + %2359 = bitcast i32 %131 to float + %2360 = bitcast i32 %131 to float + %2361 = fmul float %2359, %2360 + %2362 = fadd float %2358, %2361 + %2363 = call float @llvm.sqrt.f32.120(float %2362) + %2364 = bitcast i32 %102 to float + %2365 = fcmp olt float %2364, 0.000000e+00 + %2366 = sext i1 %2365 to i32 + %2367 = bitcast i32 %102 to float + %2368 = fcmp ogt float %2367, 0.000000e+00 + %2369 = zext i1 %2368 to i32 + %2370 = add nsw i32 %2366, %2369 + %2371 = sitofp i32 %2370 to float + %2372 = fneg float %2371 + %2373 = fmul float %2363, %2372 + %2374 = bitcast i32 %102 to float + %2375 = fadd float %2374, %2373 + %2376 = fmul float %2354, %2375 + %2377 = fadd float %2376, 0.000000e+00 + %2378 = bitcast i32 %102 to float + %2379 = bitcast i32 %102 to float + %2380 = fmul float %2378, %2379 + %2381 = fadd float %2380, 0.000000e+00 + %2382 = bitcast i32 %131 to float + %2383 = bitcast i32 %131 to float + %2384 = fmul float %2382, %2383 + %2385 = fadd float %2381, %2384 + %2386 = call float @llvm.sqrt.f32.121(float %2385) + %2387 = bitcast i32 %102 to float + %2388 = fcmp olt float %2387, 0.000000e+00 + %2389 = sext i1 %2388 to i32 + %2390 = bitcast i32 %102 to float + %2391 = fcmp ogt float %2390, 0.000000e+00 + %2392 = zext i1 %2391 to i32 + %2393 = add nsw i32 %2389, %2392 + %2394 = sitofp i32 %2393 to float + %2395 = fneg float %2394 + %2396 = fmul float %2386, %2395 + %2397 = fmul float %2396, 0.000000e+00 + %2398 = bitcast i32 %131 to float + %2399 = fadd float %2398, %2397 + %2400 = bitcast i32 %102 to float + %2401 = bitcast i32 %102 to float + %2402 = fmul float %2400, %2401 + %2403 = fadd float %2402, 0.000000e+00 + %2404 = bitcast i32 %131 to float + %2405 = bitcast i32 %131 to float + %2406 = fmul float %2404, %2405 + %2407 = fadd float %2403, %2406 + %2408 = call float @llvm.sqrt.f32.122(float %2407) + %2409 = bitcast i32 %102 to float + %2410 = fcmp olt float %2409, 0.000000e+00 + %2411 = sext i1 %2410 to i32 + %2412 = bitcast i32 %102 to float + %2413 = fcmp ogt float %2412, 0.000000e+00 + %2414 = zext i1 %2413 to i32 + %2415 = add nsw i32 %2411, %2414 + %2416 = sitofp i32 %2415 to float + %2417 = fneg float %2416 + %2418 = fmul float %2408, %2417 + %2419 = fmul float %2418, 0.000000e+00 + %2420 = bitcast i32 %131 to float + %2421 = fadd float %2420, %2419 + %2422 = fmul float %2399, %2421 + %2423 = fadd float %2377, %2422 + %2424 = call float @llvm.sqrt.f32.123(float %2423) + %2425 = fadd float %2424, 0.000000e+00 + %2426 = fdiv float %2333, %2425 + %2427 = fmul float %2426, 2.000000e+00 + %2428 = bitcast i32 %102 to float + %2429 = bitcast i32 %102 to float + %2430 = fmul float %2428, %2429 + %2431 = fadd float %2430, 0.000000e+00 + %2432 = bitcast i32 %131 to float + %2433 = bitcast i32 %131 to float + %2434 = fmul float %2432, %2433 + %2435 = fadd float %2431, %2434 + %2436 = call float @llvm.sqrt.f32.124(float %2435) + %2437 = bitcast i32 %102 to float + %2438 = fcmp olt float %2437, 0.000000e+00 + %2439 = sext i1 %2438 to i32 + %2440 = bitcast i32 %102 to float + %2441 = fcmp ogt float %2440, 0.000000e+00 + %2442 = zext i1 %2441 to i32 + %2443 = add nsw i32 %2439, %2442 + %2444 = sitofp i32 %2443 to float + %2445 = fneg float %2444 + %2446 = fmul float %2436, %2445 + %2447 = fmul float %2446, 0.000000e+00 + %2448 = bitcast i32 %131 to float + %2449 = fadd float %2448, %2447 + %2450 = bitcast i32 %102 to float + %2451 = bitcast i32 %102 to float + %2452 = fmul float %2450, %2451 + %2453 = fadd float %2452, 0.000000e+00 + %2454 = bitcast i32 %131 to float + %2455 = bitcast i32 %131 to float + %2456 = fmul float %2454, %2455 + %2457 = fadd float %2453, %2456 + %2458 = call float @llvm.sqrt.f32.125(float %2457) + %2459 = bitcast i32 %102 to float + %2460 = fcmp olt float %2459, 0.000000e+00 + %2461 = sext i1 %2460 to i32 + %2462 = bitcast i32 %102 to float + %2463 = fcmp ogt float %2462, 0.000000e+00 + %2464 = zext i1 %2463 to i32 + %2465 = add nsw i32 %2461, %2464 + %2466 = sitofp i32 %2465 to float + %2467 = fneg float %2466 + %2468 = fmul float %2458, %2467 + %2469 = bitcast i32 %102 to float + %2470 = fadd float %2469, %2468 + %2471 = bitcast i32 %102 to float + %2472 = bitcast i32 %102 to float + %2473 = fmul float %2471, %2472 + %2474 = fadd float %2473, 0.000000e+00 + %2475 = bitcast i32 %131 to float + %2476 = bitcast i32 %131 to float + %2477 = fmul float %2475, %2476 + %2478 = fadd float %2474, %2477 + %2479 = call float @llvm.sqrt.f32.126(float %2478) + %2480 = bitcast i32 %102 to float + %2481 = fcmp olt float %2480, 0.000000e+00 + %2482 = sext i1 %2481 to i32 + %2483 = bitcast i32 %102 to float + %2484 = fcmp ogt float %2483, 0.000000e+00 + %2485 = zext i1 %2484 to i32 + %2486 = add nsw i32 %2482, %2485 + %2487 = sitofp i32 %2486 to float + %2488 = fneg float %2487 + %2489 = fmul float %2479, %2488 + %2490 = bitcast i32 %102 to float + %2491 = fadd float %2490, %2489 + %2492 = fmul float %2470, %2491 + %2493 = fadd float %2492, 0.000000e+00 + %2494 = bitcast i32 %102 to float + %2495 = bitcast i32 %102 to float + %2496 = fmul float %2494, %2495 + %2497 = fadd float %2496, 0.000000e+00 + %2498 = bitcast i32 %131 to float + %2499 = bitcast i32 %131 to float + %2500 = fmul float %2498, %2499 + %2501 = fadd float %2497, %2500 + %2502 = call float @llvm.sqrt.f32.127(float %2501) + %2503 = bitcast i32 %102 to float + %2504 = fcmp olt float %2503, 0.000000e+00 + %2505 = sext i1 %2504 to i32 + %2506 = bitcast i32 %102 to float + %2507 = fcmp ogt float %2506, 0.000000e+00 + %2508 = zext i1 %2507 to i32 + %2509 = add nsw i32 %2505, %2508 + %2510 = sitofp i32 %2509 to float + %2511 = fneg float %2510 + %2512 = fmul float %2502, %2511 + %2513 = fmul float %2512, 0.000000e+00 + %2514 = bitcast i32 %131 to float + %2515 = fadd float %2514, %2513 + %2516 = bitcast i32 %102 to float + %2517 = bitcast i32 %102 to float + %2518 = fmul float %2516, %2517 + %2519 = fadd float %2518, 0.000000e+00 + %2520 = bitcast i32 %131 to float + %2521 = bitcast i32 %131 to float + %2522 = fmul float %2520, %2521 + %2523 = fadd float %2519, %2522 + %2524 = call float @llvm.sqrt.f32.128(float %2523) + %2525 = bitcast i32 %102 to float + %2526 = fcmp olt float %2525, 0.000000e+00 + %2527 = sext i1 %2526 to i32 + %2528 = bitcast i32 %102 to float + %2529 = fcmp ogt float %2528, 0.000000e+00 + %2530 = zext i1 %2529 to i32 + %2531 = add nsw i32 %2527, %2530 + %2532 = sitofp i32 %2531 to float + %2533 = fneg float %2532 + %2534 = fmul float %2524, %2533 + %2535 = fmul float %2534, 0.000000e+00 + %2536 = bitcast i32 %131 to float + %2537 = fadd float %2536, %2535 + %2538 = fmul float %2515, %2537 + %2539 = fadd float %2493, %2538 + %2540 = call float @llvm.sqrt.f32.129(float %2539) + %2541 = fadd float %2540, 0.000000e+00 + %2542 = fdiv float %2449, %2541 + %2543 = fmul float %2427, %2542 + %2544 = fneg float %2543 + %2545 = getelementptr float, float* %0, i32 0 + %2546 = getelementptr inbounds float, float* %2545, i64 3 + %2547 = load float, float* %2546, align 4 + %2548 = fmul float %2544, %2547 + %2549 = fadd float %2312, %2548 + %2550 = insertelement <4 x float> zeroinitializer, float %2549, i32 0 + %2551 = insertelement <4 x float> %2550, float 0.000000e+00, i32 1 + %2552 = insertelement <4 x float> %2551, float 0.000000e+00, i32 2 + %2553 = insertelement <4 x float> %2552, float 0.000000e+00, i32 3 + %2554 = extractelement <4 x float> %2553, i32 0 + %2555 = getelementptr float, float* %2, i32 0 + %2556 = getelementptr inbounds float, float* %2555, i64 1 + store float %2554, float* %2556, align 4 + %2557 = extractelement <4 x float> %2553, i32 1 + %2558 = getelementptr float, float* %2, i32 0 + %2559 = getelementptr inbounds float, float* %2558, i64 2 + store float %2557, float* %2559, align 4 + %2560 = bitcast i32 %102 to float + %2561 = bitcast i32 %102 to float + %2562 = fmul float %2560, %2561 + %2563 = fadd float %2562, 0.000000e+00 + %2564 = bitcast i32 %131 to float + %2565 = bitcast i32 %131 to float + %2566 = fmul float %2564, %2565 + %2567 = fadd float %2563, %2566 + %2568 = call float @llvm.sqrt.f32.130(float %2567) + %2569 = bitcast i32 %102 to float + %2570 = fcmp olt float %2569, 0.000000e+00 + %2571 = sext i1 %2570 to i32 + %2572 = bitcast i32 %102 to float + %2573 = fcmp ogt float %2572, 0.000000e+00 + %2574 = zext i1 %2573 to i32 + %2575 = add nsw i32 %2571, %2574 + %2576 = sitofp i32 %2575 to float + %2577 = fneg float %2576 + %2578 = fmul float %2568, %2577 + %2579 = fmul float %2578, 0.000000e+00 + %2580 = bitcast i32 %131 to float + %2581 = fadd float %2580, %2579 + %2582 = bitcast i32 %102 to float + %2583 = bitcast i32 %102 to float + %2584 = fmul float %2582, %2583 + %2585 = fadd float %2584, 0.000000e+00 + %2586 = bitcast i32 %131 to float + %2587 = bitcast i32 %131 to float + %2588 = fmul float %2586, %2587 + %2589 = fadd float %2585, %2588 + %2590 = call float @llvm.sqrt.f32.131(float %2589) + %2591 = bitcast i32 %102 to float + %2592 = fcmp olt float %2591, 0.000000e+00 + %2593 = sext i1 %2592 to i32 + %2594 = bitcast i32 %102 to float + %2595 = fcmp ogt float %2594, 0.000000e+00 + %2596 = zext i1 %2595 to i32 + %2597 = add nsw i32 %2593, %2596 + %2598 = sitofp i32 %2597 to float + %2599 = fneg float %2598 + %2600 = fmul float %2590, %2599 + %2601 = bitcast i32 %102 to float + %2602 = fadd float %2601, %2600 + %2603 = bitcast i32 %102 to float + %2604 = bitcast i32 %102 to float + %2605 = fmul float %2603, %2604 + %2606 = fadd float %2605, 0.000000e+00 + %2607 = bitcast i32 %131 to float + %2608 = bitcast i32 %131 to float + %2609 = fmul float %2607, %2608 + %2610 = fadd float %2606, %2609 + %2611 = call float @llvm.sqrt.f32.132(float %2610) + %2612 = bitcast i32 %102 to float + %2613 = fcmp olt float %2612, 0.000000e+00 + %2614 = sext i1 %2613 to i32 + %2615 = bitcast i32 %102 to float + %2616 = fcmp ogt float %2615, 0.000000e+00 + %2617 = zext i1 %2616 to i32 + %2618 = add nsw i32 %2614, %2617 + %2619 = sitofp i32 %2618 to float + %2620 = fneg float %2619 + %2621 = fmul float %2611, %2620 + %2622 = bitcast i32 %102 to float + %2623 = fadd float %2622, %2621 + %2624 = fmul float %2602, %2623 + %2625 = fadd float %2624, 0.000000e+00 + %2626 = bitcast i32 %102 to float + %2627 = bitcast i32 %102 to float + %2628 = fmul float %2626, %2627 + %2629 = fadd float %2628, 0.000000e+00 + %2630 = bitcast i32 %131 to float + %2631 = bitcast i32 %131 to float + %2632 = fmul float %2630, %2631 + %2633 = fadd float %2629, %2632 + %2634 = call float @llvm.sqrt.f32.133(float %2633) + %2635 = bitcast i32 %102 to float + %2636 = fcmp olt float %2635, 0.000000e+00 + %2637 = sext i1 %2636 to i32 + %2638 = bitcast i32 %102 to float + %2639 = fcmp ogt float %2638, 0.000000e+00 + %2640 = zext i1 %2639 to i32 + %2641 = add nsw i32 %2637, %2640 + %2642 = sitofp i32 %2641 to float + %2643 = fneg float %2642 + %2644 = fmul float %2634, %2643 + %2645 = fmul float %2644, 0.000000e+00 + %2646 = bitcast i32 %131 to float + %2647 = fadd float %2646, %2645 + %2648 = bitcast i32 %102 to float + %2649 = bitcast i32 %102 to float + %2650 = fmul float %2648, %2649 + %2651 = fadd float %2650, 0.000000e+00 + %2652 = bitcast i32 %131 to float + %2653 = bitcast i32 %131 to float + %2654 = fmul float %2652, %2653 + %2655 = fadd float %2651, %2654 + %2656 = call float @llvm.sqrt.f32.134(float %2655) + %2657 = bitcast i32 %102 to float + %2658 = fcmp olt float %2657, 0.000000e+00 + %2659 = sext i1 %2658 to i32 + %2660 = bitcast i32 %102 to float + %2661 = fcmp ogt float %2660, 0.000000e+00 + %2662 = zext i1 %2661 to i32 + %2663 = add nsw i32 %2659, %2662 + %2664 = sitofp i32 %2663 to float + %2665 = fneg float %2664 + %2666 = fmul float %2656, %2665 + %2667 = fmul float %2666, 0.000000e+00 + %2668 = bitcast i32 %131 to float + %2669 = fadd float %2668, %2667 + %2670 = fmul float %2647, %2669 + %2671 = fadd float %2625, %2670 + %2672 = call float @llvm.sqrt.f32.135(float %2671) + %2673 = fadd float %2672, 0.000000e+00 + %2674 = fdiv float %2581, %2673 + %2675 = fmul float %2674, 2.000000e+00 + %2676 = bitcast i32 %102 to float + %2677 = bitcast i32 %102 to float + %2678 = fmul float %2676, %2677 + %2679 = fadd float %2678, 0.000000e+00 + %2680 = bitcast i32 %131 to float + %2681 = bitcast i32 %131 to float + %2682 = fmul float %2680, %2681 + %2683 = fadd float %2679, %2682 + %2684 = call float @llvm.sqrt.f32.136(float %2683) + %2685 = bitcast i32 %102 to float + %2686 = fcmp olt float %2685, 0.000000e+00 + %2687 = sext i1 %2686 to i32 + %2688 = bitcast i32 %102 to float + %2689 = fcmp ogt float %2688, 0.000000e+00 + %2690 = zext i1 %2689 to i32 + %2691 = add nsw i32 %2687, %2690 + %2692 = sitofp i32 %2691 to float + %2693 = fneg float %2692 + %2694 = fmul float %2684, %2693 + %2695 = bitcast i32 %102 to float + %2696 = fadd float %2695, %2694 + %2697 = bitcast i32 %102 to float + %2698 = bitcast i32 %102 to float + %2699 = fmul float %2697, %2698 + %2700 = fadd float %2699, 0.000000e+00 + %2701 = bitcast i32 %131 to float + %2702 = bitcast i32 %131 to float + %2703 = fmul float %2701, %2702 + %2704 = fadd float %2700, %2703 + %2705 = call float @llvm.sqrt.f32.137(float %2704) + %2706 = bitcast i32 %102 to float + %2707 = fcmp olt float %2706, 0.000000e+00 + %2708 = sext i1 %2707 to i32 + %2709 = bitcast i32 %102 to float + %2710 = fcmp ogt float %2709, 0.000000e+00 + %2711 = zext i1 %2710 to i32 + %2712 = add nsw i32 %2708, %2711 + %2713 = sitofp i32 %2712 to float + %2714 = fneg float %2713 + %2715 = fmul float %2705, %2714 + %2716 = bitcast i32 %102 to float + %2717 = fadd float %2716, %2715 + %2718 = bitcast i32 %102 to float + %2719 = bitcast i32 %102 to float + %2720 = fmul float %2718, %2719 + %2721 = fadd float %2720, 0.000000e+00 + %2722 = bitcast i32 %131 to float + %2723 = bitcast i32 %131 to float + %2724 = fmul float %2722, %2723 + %2725 = fadd float %2721, %2724 + %2726 = call float @llvm.sqrt.f32.138(float %2725) + %2727 = bitcast i32 %102 to float + %2728 = fcmp olt float %2727, 0.000000e+00 + %2729 = sext i1 %2728 to i32 + %2730 = bitcast i32 %102 to float + %2731 = fcmp ogt float %2730, 0.000000e+00 + %2732 = zext i1 %2731 to i32 + %2733 = add nsw i32 %2729, %2732 + %2734 = sitofp i32 %2733 to float + %2735 = fneg float %2734 + %2736 = fmul float %2726, %2735 + %2737 = bitcast i32 %102 to float + %2738 = fadd float %2737, %2736 + %2739 = fmul float %2717, %2738 + %2740 = fadd float %2739, 0.000000e+00 + %2741 = bitcast i32 %102 to float + %2742 = bitcast i32 %102 to float + %2743 = fmul float %2741, %2742 + %2744 = fadd float %2743, 0.000000e+00 + %2745 = bitcast i32 %131 to float + %2746 = bitcast i32 %131 to float + %2747 = fmul float %2745, %2746 + %2748 = fadd float %2744, %2747 + %2749 = call float @llvm.sqrt.f32.139(float %2748) + %2750 = bitcast i32 %102 to float + %2751 = fcmp olt float %2750, 0.000000e+00 + %2752 = sext i1 %2751 to i32 + %2753 = bitcast i32 %102 to float + %2754 = fcmp ogt float %2753, 0.000000e+00 + %2755 = zext i1 %2754 to i32 + %2756 = add nsw i32 %2752, %2755 + %2757 = sitofp i32 %2756 to float + %2758 = fneg float %2757 + %2759 = fmul float %2749, %2758 + %2760 = fmul float %2759, 0.000000e+00 + %2761 = bitcast i32 %131 to float + %2762 = fadd float %2761, %2760 + %2763 = bitcast i32 %102 to float + %2764 = bitcast i32 %102 to float + %2765 = fmul float %2763, %2764 + %2766 = fadd float %2765, 0.000000e+00 + %2767 = bitcast i32 %131 to float + %2768 = bitcast i32 %131 to float + %2769 = fmul float %2767, %2768 + %2770 = fadd float %2766, %2769 + %2771 = call float @llvm.sqrt.f32.140(float %2770) + %2772 = bitcast i32 %102 to float + %2773 = fcmp olt float %2772, 0.000000e+00 + %2774 = sext i1 %2773 to i32 + %2775 = bitcast i32 %102 to float + %2776 = fcmp ogt float %2775, 0.000000e+00 + %2777 = zext i1 %2776 to i32 + %2778 = add nsw i32 %2774, %2777 + %2779 = sitofp i32 %2778 to float + %2780 = fneg float %2779 + %2781 = fmul float %2771, %2780 + %2782 = fmul float %2781, 0.000000e+00 + %2783 = bitcast i32 %131 to float + %2784 = fadd float %2783, %2782 + %2785 = fmul float %2762, %2784 + %2786 = fadd float %2740, %2785 + %2787 = call float @llvm.sqrt.f32.141(float %2786) + %2788 = fadd float %2787, 0.000000e+00 + %2789 = fdiv float %2696, %2788 + %2790 = fmul float %2675, %2789 + %2791 = fneg float %2790 + %2792 = insertelement <4 x float> zeroinitializer, float %2791, i32 0 + %2793 = insertelement <4 x float> %2792, float 0.000000e+00, i32 1 + %2794 = insertelement <4 x float> %2793, float 0.000000e+00, i32 2 + %2795 = insertelement <4 x float> %2794, float 0.000000e+00, i32 3 + %2796 = getelementptr float, float* %0, i32 0 + %2797 = load float, float* %2796, align 4 + %2798 = insertelement <4 x float> zeroinitializer, float %2797, i32 0 + %2799 = insertelement <4 x float> %2798, float 0.000000e+00, i32 1 + %2800 = insertelement <4 x float> %2799, float 0.000000e+00, i32 2 + %2801 = insertelement <4 x float> %2800, float 0.000000e+00, i32 3 + %2802 = call <4 x float> @llvm.fma.f32.142(<4 x float> %2795, <4 x float> %2801, <4 x float> zeroinitializer) + %2803 = extractelement <4 x float> %2802, i32 0 + %2804 = getelementptr float, float* %2, i32 0 + %2805 = getelementptr inbounds float, float* %2804, i64 2 + store float %2803, float* %2805, align 4 + %2806 = bitcast i32 %102 to float + %2807 = bitcast i32 %102 to float + %2808 = fmul float %2806, %2807 + %2809 = fadd float %2808, 0.000000e+00 + %2810 = bitcast i32 %131 to float + %2811 = bitcast i32 %131 to float + %2812 = fmul float %2810, %2811 + %2813 = fadd float %2809, %2812 + %2814 = call float @llvm.sqrt.f32.143(float %2813) + %2815 = bitcast i32 %102 to float + %2816 = fcmp olt float %2815, 0.000000e+00 + %2817 = sext i1 %2816 to i32 + %2818 = bitcast i32 %102 to float + %2819 = fcmp ogt float %2818, 0.000000e+00 + %2820 = zext i1 %2819 to i32 + %2821 = add nsw i32 %2817, %2820 + %2822 = sitofp i32 %2821 to float + %2823 = fneg float %2822 + %2824 = fmul float %2814, %2823 + %2825 = fmul float %2824, 0.000000e+00 + %2826 = bitcast i32 %131 to float + %2827 = fadd float %2826, %2825 + %2828 = bitcast i32 %102 to float + %2829 = bitcast i32 %102 to float + %2830 = fmul float %2828, %2829 + %2831 = fadd float %2830, 0.000000e+00 + %2832 = bitcast i32 %131 to float + %2833 = bitcast i32 %131 to float + %2834 = fmul float %2832, %2833 + %2835 = fadd float %2831, %2834 + %2836 = call float @llvm.sqrt.f32.144(float %2835) + %2837 = bitcast i32 %102 to float + %2838 = fcmp olt float %2837, 0.000000e+00 + %2839 = sext i1 %2838 to i32 + %2840 = bitcast i32 %102 to float + %2841 = fcmp ogt float %2840, 0.000000e+00 + %2842 = zext i1 %2841 to i32 + %2843 = add nsw i32 %2839, %2842 + %2844 = sitofp i32 %2843 to float + %2845 = fneg float %2844 + %2846 = fmul float %2836, %2845 + %2847 = bitcast i32 %102 to float + %2848 = fadd float %2847, %2846 + %2849 = bitcast i32 %102 to float + %2850 = bitcast i32 %102 to float + %2851 = fmul float %2849, %2850 + %2852 = fadd float %2851, 0.000000e+00 + %2853 = bitcast i32 %131 to float + %2854 = bitcast i32 %131 to float + %2855 = fmul float %2853, %2854 + %2856 = fadd float %2852, %2855 + %2857 = call float @llvm.sqrt.f32.145(float %2856) + %2858 = bitcast i32 %102 to float + %2859 = fcmp olt float %2858, 0.000000e+00 + %2860 = sext i1 %2859 to i32 + %2861 = bitcast i32 %102 to float + %2862 = fcmp ogt float %2861, 0.000000e+00 + %2863 = zext i1 %2862 to i32 + %2864 = add nsw i32 %2860, %2863 + %2865 = sitofp i32 %2864 to float + %2866 = fneg float %2865 + %2867 = fmul float %2857, %2866 + %2868 = bitcast i32 %102 to float + %2869 = fadd float %2868, %2867 + %2870 = fmul float %2848, %2869 + %2871 = fadd float %2870, 0.000000e+00 + %2872 = bitcast i32 %102 to float + %2873 = bitcast i32 %102 to float + %2874 = fmul float %2872, %2873 + %2875 = fadd float %2874, 0.000000e+00 + %2876 = bitcast i32 %131 to float + %2877 = bitcast i32 %131 to float + %2878 = fmul float %2876, %2877 + %2879 = fadd float %2875, %2878 + %2880 = call float @llvm.sqrt.f32.146(float %2879) + %2881 = bitcast i32 %102 to float + %2882 = fcmp olt float %2881, 0.000000e+00 + %2883 = sext i1 %2882 to i32 + %2884 = bitcast i32 %102 to float + %2885 = fcmp ogt float %2884, 0.000000e+00 + %2886 = zext i1 %2885 to i32 + %2887 = add nsw i32 %2883, %2886 + %2888 = sitofp i32 %2887 to float + %2889 = fneg float %2888 + %2890 = fmul float %2880, %2889 + %2891 = fmul float %2890, 0.000000e+00 + %2892 = bitcast i32 %131 to float + %2893 = fadd float %2892, %2891 + %2894 = bitcast i32 %102 to float + %2895 = bitcast i32 %102 to float + %2896 = fmul float %2894, %2895 + %2897 = fadd float %2896, 0.000000e+00 + %2898 = bitcast i32 %131 to float + %2899 = bitcast i32 %131 to float + %2900 = fmul float %2898, %2899 + %2901 = fadd float %2897, %2900 + %2902 = call float @llvm.sqrt.f32.147(float %2901) + %2903 = bitcast i32 %102 to float + %2904 = fcmp olt float %2903, 0.000000e+00 + %2905 = sext i1 %2904 to i32 + %2906 = bitcast i32 %102 to float + %2907 = fcmp ogt float %2906, 0.000000e+00 + %2908 = zext i1 %2907 to i32 + %2909 = add nsw i32 %2905, %2908 + %2910 = sitofp i32 %2909 to float + %2911 = fneg float %2910 + %2912 = fmul float %2902, %2911 + %2913 = fmul float %2912, 0.000000e+00 + %2914 = bitcast i32 %131 to float + %2915 = fadd float %2914, %2913 + %2916 = fmul float %2893, %2915 + %2917 = fadd float %2871, %2916 + %2918 = call float @llvm.sqrt.f32.148(float %2917) + %2919 = fadd float %2918, 0.000000e+00 + %2920 = fdiv float %2827, %2919 + %2921 = fmul float %2920, 2.000000e+00 + %2922 = bitcast i32 %102 to float + %2923 = bitcast i32 %102 to float + %2924 = fmul float %2922, %2923 + %2925 = fadd float %2924, 0.000000e+00 + %2926 = bitcast i32 %131 to float + %2927 = bitcast i32 %131 to float + %2928 = fmul float %2926, %2927 + %2929 = fadd float %2925, %2928 + %2930 = call float @llvm.sqrt.f32.149(float %2929) + %2931 = bitcast i32 %102 to float + %2932 = fcmp olt float %2931, 0.000000e+00 + %2933 = sext i1 %2932 to i32 + %2934 = bitcast i32 %102 to float + %2935 = fcmp ogt float %2934, 0.000000e+00 + %2936 = zext i1 %2935 to i32 + %2937 = add nsw i32 %2933, %2936 + %2938 = sitofp i32 %2937 to float + %2939 = fneg float %2938 + %2940 = fmul float %2930, %2939 + %2941 = bitcast i32 %102 to float + %2942 = fadd float %2941, %2940 + %2943 = bitcast i32 %102 to float + %2944 = bitcast i32 %102 to float + %2945 = fmul float %2943, %2944 + %2946 = fadd float %2945, 0.000000e+00 + %2947 = bitcast i32 %131 to float + %2948 = bitcast i32 %131 to float + %2949 = fmul float %2947, %2948 + %2950 = fadd float %2946, %2949 + %2951 = call float @llvm.sqrt.f32.150(float %2950) + %2952 = bitcast i32 %102 to float + %2953 = fcmp olt float %2952, 0.000000e+00 + %2954 = sext i1 %2953 to i32 + %2955 = bitcast i32 %102 to float + %2956 = fcmp ogt float %2955, 0.000000e+00 + %2957 = zext i1 %2956 to i32 + %2958 = add nsw i32 %2954, %2957 + %2959 = sitofp i32 %2958 to float + %2960 = fneg float %2959 + %2961 = fmul float %2951, %2960 + %2962 = bitcast i32 %102 to float + %2963 = fadd float %2962, %2961 + %2964 = bitcast i32 %102 to float + %2965 = bitcast i32 %102 to float + %2966 = fmul float %2964, %2965 + %2967 = fadd float %2966, 0.000000e+00 + %2968 = bitcast i32 %131 to float + %2969 = bitcast i32 %131 to float + %2970 = fmul float %2968, %2969 + %2971 = fadd float %2967, %2970 + %2972 = call float @llvm.sqrt.f32.151(float %2971) + %2973 = bitcast i32 %102 to float + %2974 = fcmp olt float %2973, 0.000000e+00 + %2975 = sext i1 %2974 to i32 + %2976 = bitcast i32 %102 to float + %2977 = fcmp ogt float %2976, 0.000000e+00 + %2978 = zext i1 %2977 to i32 + %2979 = add nsw i32 %2975, %2978 + %2980 = sitofp i32 %2979 to float + %2981 = fneg float %2980 + %2982 = fmul float %2972, %2981 + %2983 = bitcast i32 %102 to float + %2984 = fadd float %2983, %2982 + %2985 = fmul float %2963, %2984 + %2986 = fadd float %2985, 0.000000e+00 + %2987 = bitcast i32 %102 to float + %2988 = bitcast i32 %102 to float + %2989 = fmul float %2987, %2988 + %2990 = fadd float %2989, 0.000000e+00 + %2991 = bitcast i32 %131 to float + %2992 = bitcast i32 %131 to float + %2993 = fmul float %2991, %2992 + %2994 = fadd float %2990, %2993 + %2995 = call float @llvm.sqrt.f32.152(float %2994) + %2996 = bitcast i32 %102 to float + %2997 = fcmp olt float %2996, 0.000000e+00 + %2998 = sext i1 %2997 to i32 + %2999 = bitcast i32 %102 to float + %3000 = fcmp ogt float %2999, 0.000000e+00 + %3001 = zext i1 %3000 to i32 + %3002 = add nsw i32 %2998, %3001 + %3003 = sitofp i32 %3002 to float + %3004 = fneg float %3003 + %3005 = fmul float %2995, %3004 + %3006 = fmul float %3005, 0.000000e+00 + %3007 = bitcast i32 %131 to float + %3008 = fadd float %3007, %3006 + %3009 = bitcast i32 %102 to float + %3010 = bitcast i32 %102 to float + %3011 = fmul float %3009, %3010 + %3012 = fadd float %3011, 0.000000e+00 + %3013 = bitcast i32 %131 to float + %3014 = bitcast i32 %131 to float + %3015 = fmul float %3013, %3014 + %3016 = fadd float %3012, %3015 + %3017 = call float @llvm.sqrt.f32.153(float %3016) + %3018 = bitcast i32 %102 to float + %3019 = fcmp olt float %3018, 0.000000e+00 + %3020 = sext i1 %3019 to i32 + %3021 = bitcast i32 %102 to float + %3022 = fcmp ogt float %3021, 0.000000e+00 + %3023 = zext i1 %3022 to i32 + %3024 = add nsw i32 %3020, %3023 + %3025 = sitofp i32 %3024 to float + %3026 = fneg float %3025 + %3027 = fmul float %3017, %3026 + %3028 = fmul float %3027, 0.000000e+00 + %3029 = bitcast i32 %131 to float + %3030 = fadd float %3029, %3028 + %3031 = fmul float %3008, %3030 + %3032 = fadd float %2986, %3031 + %3033 = call float @llvm.sqrt.f32.154(float %3032) + %3034 = fadd float %3033, 0.000000e+00 + %3035 = fdiv float %2942, %3034 + %3036 = fmul float %2921, %3035 + %3037 = fneg float %3036 + %3038 = fmul float %3037, %2797 + %3039 = fadd float %3038, 0.000000e+00 + %3040 = bitcast i32 %102 to float + %3041 = bitcast i32 %102 to float + %3042 = fmul float %3040, %3041 + %3043 = fadd float %3042, 0.000000e+00 + %3044 = bitcast i32 %131 to float + %3045 = bitcast i32 %131 to float + %3046 = fmul float %3044, %3045 + %3047 = fadd float %3043, %3046 + %3048 = call float @llvm.sqrt.f32.155(float %3047) + %3049 = bitcast i32 %102 to float + %3050 = fcmp olt float %3049, 0.000000e+00 + %3051 = sext i1 %3050 to i32 + %3052 = bitcast i32 %102 to float + %3053 = fcmp ogt float %3052, 0.000000e+00 + %3054 = zext i1 %3053 to i32 + %3055 = add nsw i32 %3051, %3054 + %3056 = sitofp i32 %3055 to float + %3057 = fneg float %3056 + %3058 = fmul float %3048, %3057 + %3059 = fmul float %3058, 0.000000e+00 + %3060 = bitcast i32 %131 to float + %3061 = fadd float %3060, %3059 + %3062 = bitcast i32 %102 to float + %3063 = bitcast i32 %102 to float + %3064 = fmul float %3062, %3063 + %3065 = fadd float %3064, 0.000000e+00 + %3066 = bitcast i32 %131 to float + %3067 = bitcast i32 %131 to float + %3068 = fmul float %3066, %3067 + %3069 = fadd float %3065, %3068 + %3070 = call float @llvm.sqrt.f32.156(float %3069) + %3071 = bitcast i32 %102 to float + %3072 = fcmp olt float %3071, 0.000000e+00 + %3073 = sext i1 %3072 to i32 + %3074 = bitcast i32 %102 to float + %3075 = fcmp ogt float %3074, 0.000000e+00 + %3076 = zext i1 %3075 to i32 + %3077 = add nsw i32 %3073, %3076 + %3078 = sitofp i32 %3077 to float + %3079 = fneg float %3078 + %3080 = fmul float %3070, %3079 + %3081 = bitcast i32 %102 to float + %3082 = fadd float %3081, %3080 + %3083 = bitcast i32 %102 to float + %3084 = bitcast i32 %102 to float + %3085 = fmul float %3083, %3084 + %3086 = fadd float %3085, 0.000000e+00 + %3087 = bitcast i32 %131 to float + %3088 = bitcast i32 %131 to float + %3089 = fmul float %3087, %3088 + %3090 = fadd float %3086, %3089 + %3091 = call float @llvm.sqrt.f32.157(float %3090) + %3092 = bitcast i32 %102 to float + %3093 = fcmp olt float %3092, 0.000000e+00 + %3094 = sext i1 %3093 to i32 + %3095 = bitcast i32 %102 to float + %3096 = fcmp ogt float %3095, 0.000000e+00 + %3097 = zext i1 %3096 to i32 + %3098 = add nsw i32 %3094, %3097 + %3099 = sitofp i32 %3098 to float + %3100 = fneg float %3099 + %3101 = fmul float %3091, %3100 + %3102 = bitcast i32 %102 to float + %3103 = fadd float %3102, %3101 + %3104 = fmul float %3082, %3103 + %3105 = fadd float %3104, 0.000000e+00 + %3106 = bitcast i32 %102 to float + %3107 = bitcast i32 %102 to float + %3108 = fmul float %3106, %3107 + %3109 = fadd float %3108, 0.000000e+00 + %3110 = bitcast i32 %131 to float + %3111 = bitcast i32 %131 to float + %3112 = fmul float %3110, %3111 + %3113 = fadd float %3109, %3112 + %3114 = call float @llvm.sqrt.f32.158(float %3113) + %3115 = bitcast i32 %102 to float + %3116 = fcmp olt float %3115, 0.000000e+00 + %3117 = sext i1 %3116 to i32 + %3118 = bitcast i32 %102 to float + %3119 = fcmp ogt float %3118, 0.000000e+00 + %3120 = zext i1 %3119 to i32 + %3121 = add nsw i32 %3117, %3120 + %3122 = sitofp i32 %3121 to float + %3123 = fneg float %3122 + %3124 = fmul float %3114, %3123 + %3125 = fmul float %3124, 0.000000e+00 + %3126 = bitcast i32 %131 to float + %3127 = fadd float %3126, %3125 + %3128 = bitcast i32 %102 to float + %3129 = bitcast i32 %102 to float + %3130 = fmul float %3128, %3129 + %3131 = fadd float %3130, 0.000000e+00 + %3132 = bitcast i32 %131 to float + %3133 = bitcast i32 %131 to float + %3134 = fmul float %3132, %3133 + %3135 = fadd float %3131, %3134 + %3136 = call float @llvm.sqrt.f32.159(float %3135) + %3137 = bitcast i32 %102 to float + %3138 = fcmp olt float %3137, 0.000000e+00 + %3139 = sext i1 %3138 to i32 + %3140 = bitcast i32 %102 to float + %3141 = fcmp ogt float %3140, 0.000000e+00 + %3142 = zext i1 %3141 to i32 + %3143 = add nsw i32 %3139, %3142 + %3144 = sitofp i32 %3143 to float + %3145 = fneg float %3144 + %3146 = fmul float %3136, %3145 + %3147 = fmul float %3146, 0.000000e+00 + %3148 = bitcast i32 %131 to float + %3149 = fadd float %3148, %3147 + %3150 = fmul float %3127, %3149 + %3151 = fadd float %3105, %3150 + %3152 = call float @llvm.sqrt.f32.160(float %3151) + %3153 = fadd float %3152, 0.000000e+00 + %3154 = fdiv float %3061, %3153 + %3155 = fmul float %3154, 2.000000e+00 + %3156 = bitcast i32 %102 to float + %3157 = bitcast i32 %102 to float + %3158 = fmul float %3156, %3157 + %3159 = fadd float %3158, 0.000000e+00 + %3160 = bitcast i32 %131 to float + %3161 = bitcast i32 %131 to float + %3162 = fmul float %3160, %3161 + %3163 = fadd float %3159, %3162 + %3164 = call float @llvm.sqrt.f32.161(float %3163) + %3165 = bitcast i32 %102 to float + %3166 = fcmp olt float %3165, 0.000000e+00 + %3167 = sext i1 %3166 to i32 + %3168 = bitcast i32 %102 to float + %3169 = fcmp ogt float %3168, 0.000000e+00 + %3170 = zext i1 %3169 to i32 + %3171 = add nsw i32 %3167, %3170 + %3172 = sitofp i32 %3171 to float + %3173 = fneg float %3172 + %3174 = fmul float %3164, %3173 + %3175 = fmul float %3174, 0.000000e+00 + %3176 = bitcast i32 %131 to float + %3177 = fadd float %3176, %3175 + %3178 = bitcast i32 %102 to float + %3179 = bitcast i32 %102 to float + %3180 = fmul float %3178, %3179 + %3181 = fadd float %3180, 0.000000e+00 + %3182 = bitcast i32 %131 to float + %3183 = bitcast i32 %131 to float + %3184 = fmul float %3182, %3183 + %3185 = fadd float %3181, %3184 + %3186 = call float @llvm.sqrt.f32.162(float %3185) + %3187 = bitcast i32 %102 to float + %3188 = fcmp olt float %3187, 0.000000e+00 + %3189 = sext i1 %3188 to i32 + %3190 = bitcast i32 %102 to float + %3191 = fcmp ogt float %3190, 0.000000e+00 + %3192 = zext i1 %3191 to i32 + %3193 = add nsw i32 %3189, %3192 + %3194 = sitofp i32 %3193 to float + %3195 = fneg float %3194 + %3196 = fmul float %3186, %3195 + %3197 = bitcast i32 %102 to float + %3198 = fadd float %3197, %3196 + %3199 = bitcast i32 %102 to float + %3200 = bitcast i32 %102 to float + %3201 = fmul float %3199, %3200 + %3202 = fadd float %3201, 0.000000e+00 + %3203 = bitcast i32 %131 to float + %3204 = bitcast i32 %131 to float + %3205 = fmul float %3203, %3204 + %3206 = fadd float %3202, %3205 + %3207 = call float @llvm.sqrt.f32.163(float %3206) + %3208 = bitcast i32 %102 to float + %3209 = fcmp olt float %3208, 0.000000e+00 + %3210 = sext i1 %3209 to i32 + %3211 = bitcast i32 %102 to float + %3212 = fcmp ogt float %3211, 0.000000e+00 + %3213 = zext i1 %3212 to i32 + %3214 = add nsw i32 %3210, %3213 + %3215 = sitofp i32 %3214 to float + %3216 = fneg float %3215 + %3217 = fmul float %3207, %3216 + %3218 = bitcast i32 %102 to float + %3219 = fadd float %3218, %3217 + %3220 = fmul float %3198, %3219 + %3221 = fadd float %3220, 0.000000e+00 + %3222 = bitcast i32 %102 to float + %3223 = bitcast i32 %102 to float + %3224 = fmul float %3222, %3223 + %3225 = fadd float %3224, 0.000000e+00 + %3226 = bitcast i32 %131 to float + %3227 = bitcast i32 %131 to float + %3228 = fmul float %3226, %3227 + %3229 = fadd float %3225, %3228 + %3230 = call float @llvm.sqrt.f32.164(float %3229) + %3231 = bitcast i32 %102 to float + %3232 = fcmp olt float %3231, 0.000000e+00 + %3233 = sext i1 %3232 to i32 + %3234 = bitcast i32 %102 to float + %3235 = fcmp ogt float %3234, 0.000000e+00 + %3236 = zext i1 %3235 to i32 + %3237 = add nsw i32 %3233, %3236 + %3238 = sitofp i32 %3237 to float + %3239 = fneg float %3238 + %3240 = fmul float %3230, %3239 + %3241 = fmul float %3240, 0.000000e+00 + %3242 = bitcast i32 %131 to float + %3243 = fadd float %3242, %3241 + %3244 = bitcast i32 %102 to float + %3245 = bitcast i32 %102 to float + %3246 = fmul float %3244, %3245 + %3247 = fadd float %3246, 0.000000e+00 + %3248 = bitcast i32 %131 to float + %3249 = bitcast i32 %131 to float + %3250 = fmul float %3248, %3249 + %3251 = fadd float %3247, %3250 + %3252 = call float @llvm.sqrt.f32.165(float %3251) + %3253 = bitcast i32 %102 to float + %3254 = fcmp olt float %3253, 0.000000e+00 + %3255 = sext i1 %3254 to i32 + %3256 = bitcast i32 %102 to float + %3257 = fcmp ogt float %3256, 0.000000e+00 + %3258 = zext i1 %3257 to i32 + %3259 = add nsw i32 %3255, %3258 + %3260 = sitofp i32 %3259 to float + %3261 = fneg float %3260 + %3262 = fmul float %3252, %3261 + %3263 = fmul float %3262, 0.000000e+00 + %3264 = bitcast i32 %131 to float + %3265 = fadd float %3264, %3263 + %3266 = fmul float %3243, %3265 + %3267 = fadd float %3221, %3266 + %3268 = call float @llvm.sqrt.f32.166(float %3267) + %3269 = fadd float %3268, 0.000000e+00 + %3270 = fdiv float %3177, %3269 + %3271 = fmul float %3155, %3270 + %3272 = fsub float 1.000000e+00, %3271 + %3273 = getelementptr float, float* %0, i32 0 + %3274 = getelementptr inbounds float, float* %3273, i64 2 + %3275 = load float, float* %3274, align 4 + %3276 = fmul float %3272, %3275 + %3277 = fadd float %3039, %3276 + %3278 = insertelement <4 x float> zeroinitializer, float %3277, i32 0 + %3279 = insertelement <4 x float> %3278, float 0.000000e+00, i32 1 + %3280 = insertelement <4 x float> %3279, float 0.000000e+00, i32 2 + %3281 = insertelement <4 x float> %3280, float 0.000000e+00, i32 3 + %3282 = extractelement <4 x float> %3281, i32 0 + %3283 = getelementptr float, float* %2, i32 0 + %3284 = getelementptr inbounds float, float* %3283, i64 2 + store float %3282, float* %3284, align 4 + %3285 = extractelement <4 x float> %3281, i32 1 + %3286 = getelementptr float, float* %2, i32 0 + %3287 = getelementptr inbounds float, float* %3286, i64 3 + store float %3285, float* %3287, align 4 + %3288 = bitcast i32 %102 to float + %3289 = bitcast i32 %102 to float + %3290 = fmul float %3288, %3289 + %3291 = fadd float %3290, 0.000000e+00 + %3292 = bitcast i32 %131 to float + %3293 = bitcast i32 %131 to float + %3294 = fmul float %3292, %3293 + %3295 = fadd float %3291, %3294 + %3296 = call float @llvm.sqrt.f32.167(float %3295) + %3297 = bitcast i32 %102 to float + %3298 = fcmp olt float %3297, 0.000000e+00 + %3299 = sext i1 %3298 to i32 + %3300 = bitcast i32 %102 to float + %3301 = fcmp ogt float %3300, 0.000000e+00 + %3302 = zext i1 %3301 to i32 + %3303 = add nsw i32 %3299, %3302 + %3304 = sitofp i32 %3303 to float + %3305 = fneg float %3304 + %3306 = fmul float %3296, %3305 + %3307 = fmul float %3306, 0.000000e+00 + %3308 = bitcast i32 %131 to float + %3309 = fadd float %3308, %3307 + %3310 = bitcast i32 %102 to float + %3311 = bitcast i32 %102 to float + %3312 = fmul float %3310, %3311 + %3313 = fadd float %3312, 0.000000e+00 + %3314 = bitcast i32 %131 to float + %3315 = bitcast i32 %131 to float + %3316 = fmul float %3314, %3315 + %3317 = fadd float %3313, %3316 + %3318 = call float @llvm.sqrt.f32.168(float %3317) + %3319 = bitcast i32 %102 to float + %3320 = fcmp olt float %3319, 0.000000e+00 + %3321 = sext i1 %3320 to i32 + %3322 = bitcast i32 %102 to float + %3323 = fcmp ogt float %3322, 0.000000e+00 + %3324 = zext i1 %3323 to i32 + %3325 = add nsw i32 %3321, %3324 + %3326 = sitofp i32 %3325 to float + %3327 = fneg float %3326 + %3328 = fmul float %3318, %3327 + %3329 = bitcast i32 %102 to float + %3330 = fadd float %3329, %3328 + %3331 = bitcast i32 %102 to float + %3332 = bitcast i32 %102 to float + %3333 = fmul float %3331, %3332 + %3334 = fadd float %3333, 0.000000e+00 + %3335 = bitcast i32 %131 to float + %3336 = bitcast i32 %131 to float + %3337 = fmul float %3335, %3336 + %3338 = fadd float %3334, %3337 + %3339 = call float @llvm.sqrt.f32.169(float %3338) + %3340 = bitcast i32 %102 to float + %3341 = fcmp olt float %3340, 0.000000e+00 + %3342 = sext i1 %3341 to i32 + %3343 = bitcast i32 %102 to float + %3344 = fcmp ogt float %3343, 0.000000e+00 + %3345 = zext i1 %3344 to i32 + %3346 = add nsw i32 %3342, %3345 + %3347 = sitofp i32 %3346 to float + %3348 = fneg float %3347 + %3349 = fmul float %3339, %3348 + %3350 = bitcast i32 %102 to float + %3351 = fadd float %3350, %3349 + %3352 = fmul float %3330, %3351 + %3353 = fadd float %3352, 0.000000e+00 + %3354 = bitcast i32 %102 to float + %3355 = bitcast i32 %102 to float + %3356 = fmul float %3354, %3355 + %3357 = fadd float %3356, 0.000000e+00 + %3358 = bitcast i32 %131 to float + %3359 = bitcast i32 %131 to float + %3360 = fmul float %3358, %3359 + %3361 = fadd float %3357, %3360 + %3362 = call float @llvm.sqrt.f32.170(float %3361) + %3363 = bitcast i32 %102 to float + %3364 = fcmp olt float %3363, 0.000000e+00 + %3365 = sext i1 %3364 to i32 + %3366 = bitcast i32 %102 to float + %3367 = fcmp ogt float %3366, 0.000000e+00 + %3368 = zext i1 %3367 to i32 + %3369 = add nsw i32 %3365, %3368 + %3370 = sitofp i32 %3369 to float + %3371 = fneg float %3370 + %3372 = fmul float %3362, %3371 + %3373 = fmul float %3372, 0.000000e+00 + %3374 = bitcast i32 %131 to float + %3375 = fadd float %3374, %3373 + %3376 = bitcast i32 %102 to float + %3377 = bitcast i32 %102 to float + %3378 = fmul float %3376, %3377 + %3379 = fadd float %3378, 0.000000e+00 + %3380 = bitcast i32 %131 to float + %3381 = bitcast i32 %131 to float + %3382 = fmul float %3380, %3381 + %3383 = fadd float %3379, %3382 + %3384 = call float @llvm.sqrt.f32.171(float %3383) + %3385 = bitcast i32 %102 to float + %3386 = fcmp olt float %3385, 0.000000e+00 + %3387 = sext i1 %3386 to i32 + %3388 = bitcast i32 %102 to float + %3389 = fcmp ogt float %3388, 0.000000e+00 + %3390 = zext i1 %3389 to i32 + %3391 = add nsw i32 %3387, %3390 + %3392 = sitofp i32 %3391 to float + %3393 = fneg float %3392 + %3394 = fmul float %3384, %3393 + %3395 = fmul float %3394, 0.000000e+00 + %3396 = bitcast i32 %131 to float + %3397 = fadd float %3396, %3395 + %3398 = fmul float %3375, %3397 + %3399 = fadd float %3353, %3398 + %3400 = call float @llvm.sqrt.f32.172(float %3399) + %3401 = fadd float %3400, 0.000000e+00 + %3402 = fdiv float %3309, %3401 + %3403 = fmul float %3402, 2.000000e+00 + %3404 = bitcast i32 %102 to float + %3405 = bitcast i32 %102 to float + %3406 = fmul float %3404, %3405 + %3407 = fadd float %3406, 0.000000e+00 + %3408 = bitcast i32 %131 to float + %3409 = bitcast i32 %131 to float + %3410 = fmul float %3408, %3409 + %3411 = fadd float %3407, %3410 + %3412 = call float @llvm.sqrt.f32.173(float %3411) + %3413 = bitcast i32 %102 to float + %3414 = fcmp olt float %3413, 0.000000e+00 + %3415 = sext i1 %3414 to i32 + %3416 = bitcast i32 %102 to float + %3417 = fcmp ogt float %3416, 0.000000e+00 + %3418 = zext i1 %3417 to i32 + %3419 = add nsw i32 %3415, %3418 + %3420 = sitofp i32 %3419 to float + %3421 = fneg float %3420 + %3422 = fmul float %3412, %3421 + %3423 = bitcast i32 %102 to float + %3424 = fadd float %3423, %3422 + %3425 = bitcast i32 %102 to float + %3426 = bitcast i32 %102 to float + %3427 = fmul float %3425, %3426 + %3428 = fadd float %3427, 0.000000e+00 + %3429 = bitcast i32 %131 to float + %3430 = bitcast i32 %131 to float + %3431 = fmul float %3429, %3430 + %3432 = fadd float %3428, %3431 + %3433 = call float @llvm.sqrt.f32.174(float %3432) + %3434 = bitcast i32 %102 to float + %3435 = fcmp olt float %3434, 0.000000e+00 + %3436 = sext i1 %3435 to i32 + %3437 = bitcast i32 %102 to float + %3438 = fcmp ogt float %3437, 0.000000e+00 + %3439 = zext i1 %3438 to i32 + %3440 = add nsw i32 %3436, %3439 + %3441 = sitofp i32 %3440 to float + %3442 = fneg float %3441 + %3443 = fmul float %3433, %3442 + %3444 = bitcast i32 %102 to float + %3445 = fadd float %3444, %3443 + %3446 = bitcast i32 %102 to float + %3447 = bitcast i32 %102 to float + %3448 = fmul float %3446, %3447 + %3449 = fadd float %3448, 0.000000e+00 + %3450 = bitcast i32 %131 to float + %3451 = bitcast i32 %131 to float + %3452 = fmul float %3450, %3451 + %3453 = fadd float %3449, %3452 + %3454 = call float @llvm.sqrt.f32.175(float %3453) + %3455 = bitcast i32 %102 to float + %3456 = fcmp olt float %3455, 0.000000e+00 + %3457 = sext i1 %3456 to i32 + %3458 = bitcast i32 %102 to float + %3459 = fcmp ogt float %3458, 0.000000e+00 + %3460 = zext i1 %3459 to i32 + %3461 = add nsw i32 %3457, %3460 + %3462 = sitofp i32 %3461 to float + %3463 = fneg float %3462 + %3464 = fmul float %3454, %3463 + %3465 = bitcast i32 %102 to float + %3466 = fadd float %3465, %3464 + %3467 = fmul float %3445, %3466 + %3468 = fadd float %3467, 0.000000e+00 + %3469 = bitcast i32 %102 to float + %3470 = bitcast i32 %102 to float + %3471 = fmul float %3469, %3470 + %3472 = fadd float %3471, 0.000000e+00 + %3473 = bitcast i32 %131 to float + %3474 = bitcast i32 %131 to float + %3475 = fmul float %3473, %3474 + %3476 = fadd float %3472, %3475 + %3477 = call float @llvm.sqrt.f32.176(float %3476) + %3478 = bitcast i32 %102 to float + %3479 = fcmp olt float %3478, 0.000000e+00 + %3480 = sext i1 %3479 to i32 + %3481 = bitcast i32 %102 to float + %3482 = fcmp ogt float %3481, 0.000000e+00 + %3483 = zext i1 %3482 to i32 + %3484 = add nsw i32 %3480, %3483 + %3485 = sitofp i32 %3484 to float + %3486 = fneg float %3485 + %3487 = fmul float %3477, %3486 + %3488 = fmul float %3487, 0.000000e+00 + %3489 = bitcast i32 %131 to float + %3490 = fadd float %3489, %3488 + %3491 = bitcast i32 %102 to float + %3492 = bitcast i32 %102 to float + %3493 = fmul float %3491, %3492 + %3494 = fadd float %3493, 0.000000e+00 + %3495 = bitcast i32 %131 to float + %3496 = bitcast i32 %131 to float + %3497 = fmul float %3495, %3496 + %3498 = fadd float %3494, %3497 + %3499 = call float @llvm.sqrt.f32.177(float %3498) + %3500 = bitcast i32 %102 to float + %3501 = fcmp olt float %3500, 0.000000e+00 + %3502 = sext i1 %3501 to i32 + %3503 = bitcast i32 %102 to float + %3504 = fcmp ogt float %3503, 0.000000e+00 + %3505 = zext i1 %3504 to i32 + %3506 = add nsw i32 %3502, %3505 + %3507 = sitofp i32 %3506 to float + %3508 = fneg float %3507 + %3509 = fmul float %3499, %3508 + %3510 = fmul float %3509, 0.000000e+00 + %3511 = bitcast i32 %131 to float + %3512 = fadd float %3511, %3510 + %3513 = fmul float %3490, %3512 + %3514 = fadd float %3468, %3513 + %3515 = call float @llvm.sqrt.f32.178(float %3514) + %3516 = fadd float %3515, 0.000000e+00 + %3517 = fdiv float %3424, %3516 + %3518 = fmul float %3403, %3517 + %3519 = fneg float %3518 + %3520 = insertelement <4 x float> zeroinitializer, float %3519, i32 0 + %3521 = insertelement <4 x float> %3520, float 0.000000e+00, i32 1 + %3522 = insertelement <4 x float> %3521, float 0.000000e+00, i32 2 + %3523 = insertelement <4 x float> %3522, float 0.000000e+00, i32 3 + %3524 = getelementptr float, float* %0, i32 0 + %3525 = getelementptr inbounds float, float* %3524, i64 1 + %3526 = load float, float* %3525, align 4 + %3527 = insertelement <4 x float> zeroinitializer, float %3526, i32 0 + %3528 = insertelement <4 x float> %3527, float 0.000000e+00, i32 1 + %3529 = insertelement <4 x float> %3528, float 0.000000e+00, i32 2 + %3530 = insertelement <4 x float> %3529, float 0.000000e+00, i32 3 + %3531 = call <4 x float> @llvm.fma.f32.179(<4 x float> %3523, <4 x float> %3530, <4 x float> zeroinitializer) + %3532 = extractelement <4 x float> %3531, i32 0 + %3533 = getelementptr float, float* %2, i32 0 + %3534 = getelementptr inbounds float, float* %3533, i64 3 + store float %3532, float* %3534, align 4 + %3535 = bitcast i32 %102 to float + %3536 = bitcast i32 %102 to float + %3537 = fmul float %3535, %3536 + %3538 = fadd float %3537, 0.000000e+00 + %3539 = bitcast i32 %131 to float + %3540 = bitcast i32 %131 to float + %3541 = fmul float %3539, %3540 + %3542 = fadd float %3538, %3541 + %3543 = call float @llvm.sqrt.f32.180(float %3542) + %3544 = bitcast i32 %102 to float + %3545 = fcmp olt float %3544, 0.000000e+00 + %3546 = sext i1 %3545 to i32 + %3547 = bitcast i32 %102 to float + %3548 = fcmp ogt float %3547, 0.000000e+00 + %3549 = zext i1 %3548 to i32 + %3550 = add nsw i32 %3546, %3549 + %3551 = sitofp i32 %3550 to float + %3552 = fneg float %3551 + %3553 = fmul float %3543, %3552 + %3554 = fmul float %3553, 0.000000e+00 + %3555 = bitcast i32 %131 to float + %3556 = fadd float %3555, %3554 + %3557 = bitcast i32 %102 to float + %3558 = bitcast i32 %102 to float + %3559 = fmul float %3557, %3558 + %3560 = fadd float %3559, 0.000000e+00 + %3561 = bitcast i32 %131 to float + %3562 = bitcast i32 %131 to float + %3563 = fmul float %3561, %3562 + %3564 = fadd float %3560, %3563 + %3565 = call float @llvm.sqrt.f32.181(float %3564) + %3566 = bitcast i32 %102 to float + %3567 = fcmp olt float %3566, 0.000000e+00 + %3568 = sext i1 %3567 to i32 + %3569 = bitcast i32 %102 to float + %3570 = fcmp ogt float %3569, 0.000000e+00 + %3571 = zext i1 %3570 to i32 + %3572 = add nsw i32 %3568, %3571 + %3573 = sitofp i32 %3572 to float + %3574 = fneg float %3573 + %3575 = fmul float %3565, %3574 + %3576 = bitcast i32 %102 to float + %3577 = fadd float %3576, %3575 + %3578 = bitcast i32 %102 to float + %3579 = bitcast i32 %102 to float + %3580 = fmul float %3578, %3579 + %3581 = fadd float %3580, 0.000000e+00 + %3582 = bitcast i32 %131 to float + %3583 = bitcast i32 %131 to float + %3584 = fmul float %3582, %3583 + %3585 = fadd float %3581, %3584 + %3586 = call float @llvm.sqrt.f32.182(float %3585) + %3587 = bitcast i32 %102 to float + %3588 = fcmp olt float %3587, 0.000000e+00 + %3589 = sext i1 %3588 to i32 + %3590 = bitcast i32 %102 to float + %3591 = fcmp ogt float %3590, 0.000000e+00 + %3592 = zext i1 %3591 to i32 + %3593 = add nsw i32 %3589, %3592 + %3594 = sitofp i32 %3593 to float + %3595 = fneg float %3594 + %3596 = fmul float %3586, %3595 + %3597 = bitcast i32 %102 to float + %3598 = fadd float %3597, %3596 + %3599 = fmul float %3577, %3598 + %3600 = fadd float %3599, 0.000000e+00 + %3601 = bitcast i32 %102 to float + %3602 = bitcast i32 %102 to float + %3603 = fmul float %3601, %3602 + %3604 = fadd float %3603, 0.000000e+00 + %3605 = bitcast i32 %131 to float + %3606 = bitcast i32 %131 to float + %3607 = fmul float %3605, %3606 + %3608 = fadd float %3604, %3607 + %3609 = call float @llvm.sqrt.f32.183(float %3608) + %3610 = bitcast i32 %102 to float + %3611 = fcmp olt float %3610, 0.000000e+00 + %3612 = sext i1 %3611 to i32 + %3613 = bitcast i32 %102 to float + %3614 = fcmp ogt float %3613, 0.000000e+00 + %3615 = zext i1 %3614 to i32 + %3616 = add nsw i32 %3612, %3615 + %3617 = sitofp i32 %3616 to float + %3618 = fneg float %3617 + %3619 = fmul float %3609, %3618 + %3620 = fmul float %3619, 0.000000e+00 + %3621 = bitcast i32 %131 to float + %3622 = fadd float %3621, %3620 + %3623 = bitcast i32 %102 to float + %3624 = bitcast i32 %102 to float + %3625 = fmul float %3623, %3624 + %3626 = fadd float %3625, 0.000000e+00 + %3627 = bitcast i32 %131 to float + %3628 = bitcast i32 %131 to float + %3629 = fmul float %3627, %3628 + %3630 = fadd float %3626, %3629 + %3631 = call float @llvm.sqrt.f32.184(float %3630) + %3632 = bitcast i32 %102 to float + %3633 = fcmp olt float %3632, 0.000000e+00 + %3634 = sext i1 %3633 to i32 + %3635 = bitcast i32 %102 to float + %3636 = fcmp ogt float %3635, 0.000000e+00 + %3637 = zext i1 %3636 to i32 + %3638 = add nsw i32 %3634, %3637 + %3639 = sitofp i32 %3638 to float + %3640 = fneg float %3639 + %3641 = fmul float %3631, %3640 + %3642 = fmul float %3641, 0.000000e+00 + %3643 = bitcast i32 %131 to float + %3644 = fadd float %3643, %3642 + %3645 = fmul float %3622, %3644 + %3646 = fadd float %3600, %3645 + %3647 = call float @llvm.sqrt.f32.185(float %3646) + %3648 = fadd float %3647, 0.000000e+00 + %3649 = fdiv float %3556, %3648 + %3650 = fmul float %3649, 2.000000e+00 + %3651 = bitcast i32 %102 to float + %3652 = bitcast i32 %102 to float + %3653 = fmul float %3651, %3652 + %3654 = fadd float %3653, 0.000000e+00 + %3655 = bitcast i32 %131 to float + %3656 = bitcast i32 %131 to float + %3657 = fmul float %3655, %3656 + %3658 = fadd float %3654, %3657 + %3659 = call float @llvm.sqrt.f32.186(float %3658) + %3660 = bitcast i32 %102 to float + %3661 = fcmp olt float %3660, 0.000000e+00 + %3662 = sext i1 %3661 to i32 + %3663 = bitcast i32 %102 to float + %3664 = fcmp ogt float %3663, 0.000000e+00 + %3665 = zext i1 %3664 to i32 + %3666 = add nsw i32 %3662, %3665 + %3667 = sitofp i32 %3666 to float + %3668 = fneg float %3667 + %3669 = fmul float %3659, %3668 + %3670 = bitcast i32 %102 to float + %3671 = fadd float %3670, %3669 + %3672 = bitcast i32 %102 to float + %3673 = bitcast i32 %102 to float + %3674 = fmul float %3672, %3673 + %3675 = fadd float %3674, 0.000000e+00 + %3676 = bitcast i32 %131 to float + %3677 = bitcast i32 %131 to float + %3678 = fmul float %3676, %3677 + %3679 = fadd float %3675, %3678 + %3680 = call float @llvm.sqrt.f32.187(float %3679) + %3681 = bitcast i32 %102 to float + %3682 = fcmp olt float %3681, 0.000000e+00 + %3683 = sext i1 %3682 to i32 + %3684 = bitcast i32 %102 to float + %3685 = fcmp ogt float %3684, 0.000000e+00 + %3686 = zext i1 %3685 to i32 + %3687 = add nsw i32 %3683, %3686 + %3688 = sitofp i32 %3687 to float + %3689 = fneg float %3688 + %3690 = fmul float %3680, %3689 + %3691 = bitcast i32 %102 to float + %3692 = fadd float %3691, %3690 + %3693 = bitcast i32 %102 to float + %3694 = bitcast i32 %102 to float + %3695 = fmul float %3693, %3694 + %3696 = fadd float %3695, 0.000000e+00 + %3697 = bitcast i32 %131 to float + %3698 = bitcast i32 %131 to float + %3699 = fmul float %3697, %3698 + %3700 = fadd float %3696, %3699 + %3701 = call float @llvm.sqrt.f32.188(float %3700) + %3702 = bitcast i32 %102 to float + %3703 = fcmp olt float %3702, 0.000000e+00 + %3704 = sext i1 %3703 to i32 + %3705 = bitcast i32 %102 to float + %3706 = fcmp ogt float %3705, 0.000000e+00 + %3707 = zext i1 %3706 to i32 + %3708 = add nsw i32 %3704, %3707 + %3709 = sitofp i32 %3708 to float + %3710 = fneg float %3709 + %3711 = fmul float %3701, %3710 + %3712 = bitcast i32 %102 to float + %3713 = fadd float %3712, %3711 + %3714 = fmul float %3692, %3713 + %3715 = fadd float %3714, 0.000000e+00 + %3716 = bitcast i32 %102 to float + %3717 = bitcast i32 %102 to float + %3718 = fmul float %3716, %3717 + %3719 = fadd float %3718, 0.000000e+00 + %3720 = bitcast i32 %131 to float + %3721 = bitcast i32 %131 to float + %3722 = fmul float %3720, %3721 + %3723 = fadd float %3719, %3722 + %3724 = call float @llvm.sqrt.f32.189(float %3723) + %3725 = bitcast i32 %102 to float + %3726 = fcmp olt float %3725, 0.000000e+00 + %3727 = sext i1 %3726 to i32 + %3728 = bitcast i32 %102 to float + %3729 = fcmp ogt float %3728, 0.000000e+00 + %3730 = zext i1 %3729 to i32 + %3731 = add nsw i32 %3727, %3730 + %3732 = sitofp i32 %3731 to float + %3733 = fneg float %3732 + %3734 = fmul float %3724, %3733 + %3735 = fmul float %3734, 0.000000e+00 + %3736 = bitcast i32 %131 to float + %3737 = fadd float %3736, %3735 + %3738 = bitcast i32 %102 to float + %3739 = bitcast i32 %102 to float + %3740 = fmul float %3738, %3739 + %3741 = fadd float %3740, 0.000000e+00 + %3742 = bitcast i32 %131 to float + %3743 = bitcast i32 %131 to float + %3744 = fmul float %3742, %3743 + %3745 = fadd float %3741, %3744 + %3746 = call float @llvm.sqrt.f32.190(float %3745) + %3747 = bitcast i32 %102 to float + %3748 = fcmp olt float %3747, 0.000000e+00 + %3749 = sext i1 %3748 to i32 + %3750 = bitcast i32 %102 to float + %3751 = fcmp ogt float %3750, 0.000000e+00 + %3752 = zext i1 %3751 to i32 + %3753 = add nsw i32 %3749, %3752 + %3754 = sitofp i32 %3753 to float + %3755 = fneg float %3754 + %3756 = fmul float %3746, %3755 + %3757 = fmul float %3756, 0.000000e+00 + %3758 = bitcast i32 %131 to float + %3759 = fadd float %3758, %3757 + %3760 = fmul float %3737, %3759 + %3761 = fadd float %3715, %3760 + %3762 = call float @llvm.sqrt.f32.191(float %3761) + %3763 = fadd float %3762, 0.000000e+00 + %3764 = fdiv float %3671, %3763 + %3765 = fmul float %3650, %3764 + %3766 = fneg float %3765 + %3767 = fmul float %3766, %3526 + %3768 = fadd float %3767, 0.000000e+00 + %3769 = bitcast i32 %102 to float + %3770 = bitcast i32 %102 to float + %3771 = fmul float %3769, %3770 + %3772 = fadd float %3771, 0.000000e+00 + %3773 = bitcast i32 %131 to float + %3774 = bitcast i32 %131 to float + %3775 = fmul float %3773, %3774 + %3776 = fadd float %3772, %3775 + %3777 = call float @llvm.sqrt.f32.192(float %3776) + %3778 = bitcast i32 %102 to float + %3779 = fcmp olt float %3778, 0.000000e+00 + %3780 = sext i1 %3779 to i32 + %3781 = bitcast i32 %102 to float + %3782 = fcmp ogt float %3781, 0.000000e+00 + %3783 = zext i1 %3782 to i32 + %3784 = add nsw i32 %3780, %3783 + %3785 = sitofp i32 %3784 to float + %3786 = fneg float %3785 + %3787 = fmul float %3777, %3786 + %3788 = fmul float %3787, 0.000000e+00 + %3789 = bitcast i32 %131 to float + %3790 = fadd float %3789, %3788 + %3791 = bitcast i32 %102 to float + %3792 = bitcast i32 %102 to float + %3793 = fmul float %3791, %3792 + %3794 = fadd float %3793, 0.000000e+00 + %3795 = bitcast i32 %131 to float + %3796 = bitcast i32 %131 to float + %3797 = fmul float %3795, %3796 + %3798 = fadd float %3794, %3797 + %3799 = call float @llvm.sqrt.f32.193(float %3798) + %3800 = bitcast i32 %102 to float + %3801 = fcmp olt float %3800, 0.000000e+00 + %3802 = sext i1 %3801 to i32 + %3803 = bitcast i32 %102 to float + %3804 = fcmp ogt float %3803, 0.000000e+00 + %3805 = zext i1 %3804 to i32 + %3806 = add nsw i32 %3802, %3805 + %3807 = sitofp i32 %3806 to float + %3808 = fneg float %3807 + %3809 = fmul float %3799, %3808 + %3810 = bitcast i32 %102 to float + %3811 = fadd float %3810, %3809 + %3812 = bitcast i32 %102 to float + %3813 = bitcast i32 %102 to float + %3814 = fmul float %3812, %3813 + %3815 = fadd float %3814, 0.000000e+00 + %3816 = bitcast i32 %131 to float + %3817 = bitcast i32 %131 to float + %3818 = fmul float %3816, %3817 + %3819 = fadd float %3815, %3818 + %3820 = call float @llvm.sqrt.f32.194(float %3819) + %3821 = bitcast i32 %102 to float + %3822 = fcmp olt float %3821, 0.000000e+00 + %3823 = sext i1 %3822 to i32 + %3824 = bitcast i32 %102 to float + %3825 = fcmp ogt float %3824, 0.000000e+00 + %3826 = zext i1 %3825 to i32 + %3827 = add nsw i32 %3823, %3826 + %3828 = sitofp i32 %3827 to float + %3829 = fneg float %3828 + %3830 = fmul float %3820, %3829 + %3831 = bitcast i32 %102 to float + %3832 = fadd float %3831, %3830 + %3833 = fmul float %3811, %3832 + %3834 = fadd float %3833, 0.000000e+00 + %3835 = bitcast i32 %102 to float + %3836 = bitcast i32 %102 to float + %3837 = fmul float %3835, %3836 + %3838 = fadd float %3837, 0.000000e+00 + %3839 = bitcast i32 %131 to float + %3840 = bitcast i32 %131 to float + %3841 = fmul float %3839, %3840 + %3842 = fadd float %3838, %3841 + %3843 = call float @llvm.sqrt.f32.195(float %3842) + %3844 = bitcast i32 %102 to float + %3845 = fcmp olt float %3844, 0.000000e+00 + %3846 = sext i1 %3845 to i32 + %3847 = bitcast i32 %102 to float + %3848 = fcmp ogt float %3847, 0.000000e+00 + %3849 = zext i1 %3848 to i32 + %3850 = add nsw i32 %3846, %3849 + %3851 = sitofp i32 %3850 to float + %3852 = fneg float %3851 + %3853 = fmul float %3843, %3852 + %3854 = fmul float %3853, 0.000000e+00 + %3855 = bitcast i32 %131 to float + %3856 = fadd float %3855, %3854 + %3857 = bitcast i32 %102 to float + %3858 = bitcast i32 %102 to float + %3859 = fmul float %3857, %3858 + %3860 = fadd float %3859, 0.000000e+00 + %3861 = bitcast i32 %131 to float + %3862 = bitcast i32 %131 to float + %3863 = fmul float %3861, %3862 + %3864 = fadd float %3860, %3863 + %3865 = call float @llvm.sqrt.f32.196(float %3864) + %3866 = bitcast i32 %102 to float + %3867 = fcmp olt float %3866, 0.000000e+00 + %3868 = sext i1 %3867 to i32 + %3869 = bitcast i32 %102 to float + %3870 = fcmp ogt float %3869, 0.000000e+00 + %3871 = zext i1 %3870 to i32 + %3872 = add nsw i32 %3868, %3871 + %3873 = sitofp i32 %3872 to float + %3874 = fneg float %3873 + %3875 = fmul float %3865, %3874 + %3876 = fmul float %3875, 0.000000e+00 + %3877 = bitcast i32 %131 to float + %3878 = fadd float %3877, %3876 + %3879 = fmul float %3856, %3878 + %3880 = fadd float %3834, %3879 + %3881 = call float @llvm.sqrt.f32.197(float %3880) + %3882 = fadd float %3881, 0.000000e+00 + %3883 = fdiv float %3790, %3882 + %3884 = fmul float %3883, 2.000000e+00 + %3885 = bitcast i32 %102 to float + %3886 = bitcast i32 %102 to float + %3887 = fmul float %3885, %3886 + %3888 = fadd float %3887, 0.000000e+00 + %3889 = bitcast i32 %131 to float + %3890 = bitcast i32 %131 to float + %3891 = fmul float %3889, %3890 + %3892 = fadd float %3888, %3891 + %3893 = call float @llvm.sqrt.f32.198(float %3892) + %3894 = bitcast i32 %102 to float + %3895 = fcmp olt float %3894, 0.000000e+00 + %3896 = sext i1 %3895 to i32 + %3897 = bitcast i32 %102 to float + %3898 = fcmp ogt float %3897, 0.000000e+00 + %3899 = zext i1 %3898 to i32 + %3900 = add nsw i32 %3896, %3899 + %3901 = sitofp i32 %3900 to float + %3902 = fneg float %3901 + %3903 = fmul float %3893, %3902 + %3904 = fmul float %3903, 0.000000e+00 + %3905 = bitcast i32 %131 to float + %3906 = fadd float %3905, %3904 + %3907 = bitcast i32 %102 to float + %3908 = bitcast i32 %102 to float + %3909 = fmul float %3907, %3908 + %3910 = fadd float %3909, 0.000000e+00 + %3911 = bitcast i32 %131 to float + %3912 = bitcast i32 %131 to float + %3913 = fmul float %3911, %3912 + %3914 = fadd float %3910, %3913 + %3915 = call float @llvm.sqrt.f32.199(float %3914) + %3916 = bitcast i32 %102 to float + %3917 = fcmp olt float %3916, 0.000000e+00 + %3918 = sext i1 %3917 to i32 + %3919 = bitcast i32 %102 to float + %3920 = fcmp ogt float %3919, 0.000000e+00 + %3921 = zext i1 %3920 to i32 + %3922 = add nsw i32 %3918, %3921 + %3923 = sitofp i32 %3922 to float + %3924 = fneg float %3923 + %3925 = fmul float %3915, %3924 + %3926 = bitcast i32 %102 to float + %3927 = fadd float %3926, %3925 + %3928 = bitcast i32 %102 to float + %3929 = bitcast i32 %102 to float + %3930 = fmul float %3928, %3929 + %3931 = fadd float %3930, 0.000000e+00 + %3932 = bitcast i32 %131 to float + %3933 = bitcast i32 %131 to float + %3934 = fmul float %3932, %3933 + %3935 = fadd float %3931, %3934 + %3936 = call float @llvm.sqrt.f32.200(float %3935) + %3937 = bitcast i32 %102 to float + %3938 = fcmp olt float %3937, 0.000000e+00 + %3939 = sext i1 %3938 to i32 + %3940 = bitcast i32 %102 to float + %3941 = fcmp ogt float %3940, 0.000000e+00 + %3942 = zext i1 %3941 to i32 + %3943 = add nsw i32 %3939, %3942 + %3944 = sitofp i32 %3943 to float + %3945 = fneg float %3944 + %3946 = fmul float %3936, %3945 + %3947 = bitcast i32 %102 to float + %3948 = fadd float %3947, %3946 + %3949 = fmul float %3927, %3948 + %3950 = fadd float %3949, 0.000000e+00 + %3951 = bitcast i32 %102 to float + %3952 = bitcast i32 %102 to float + %3953 = fmul float %3951, %3952 + %3954 = fadd float %3953, 0.000000e+00 + %3955 = bitcast i32 %131 to float + %3956 = bitcast i32 %131 to float + %3957 = fmul float %3955, %3956 + %3958 = fadd float %3954, %3957 + %3959 = call float @llvm.sqrt.f32.201(float %3958) + %3960 = bitcast i32 %102 to float + %3961 = fcmp olt float %3960, 0.000000e+00 + %3962 = sext i1 %3961 to i32 + %3963 = bitcast i32 %102 to float + %3964 = fcmp ogt float %3963, 0.000000e+00 + %3965 = zext i1 %3964 to i32 + %3966 = add nsw i32 %3962, %3965 + %3967 = sitofp i32 %3966 to float + %3968 = fneg float %3967 + %3969 = fmul float %3959, %3968 + %3970 = fmul float %3969, 0.000000e+00 + %3971 = bitcast i32 %131 to float + %3972 = fadd float %3971, %3970 + %3973 = bitcast i32 %102 to float + %3974 = bitcast i32 %102 to float + %3975 = fmul float %3973, %3974 + %3976 = fadd float %3975, 0.000000e+00 + %3977 = bitcast i32 %131 to float + %3978 = bitcast i32 %131 to float + %3979 = fmul float %3977, %3978 + %3980 = fadd float %3976, %3979 + %3981 = call float @llvm.sqrt.f32.202(float %3980) + %3982 = bitcast i32 %102 to float + %3983 = fcmp olt float %3982, 0.000000e+00 + %3984 = sext i1 %3983 to i32 + %3985 = bitcast i32 %102 to float + %3986 = fcmp ogt float %3985, 0.000000e+00 + %3987 = zext i1 %3986 to i32 + %3988 = add nsw i32 %3984, %3987 + %3989 = sitofp i32 %3988 to float + %3990 = fneg float %3989 + %3991 = fmul float %3981, %3990 + %3992 = fmul float %3991, 0.000000e+00 + %3993 = bitcast i32 %131 to float + %3994 = fadd float %3993, %3992 + %3995 = fmul float %3972, %3994 + %3996 = fadd float %3950, %3995 + %3997 = call float @llvm.sqrt.f32.203(float %3996) + %3998 = fadd float %3997, 0.000000e+00 + %3999 = fdiv float %3906, %3998 + %4000 = fmul float %3884, %3999 + %4001 = fsub float 1.000000e+00, %4000 + %4002 = getelementptr float, float* %0, i32 0 + %4003 = getelementptr inbounds float, float* %4002, i64 3 + %4004 = load float, float* %4003, align 4 + %4005 = fmul float %4001, %4004 + %4006 = fadd float %3768, %4005 + %4007 = insertelement <4 x float> zeroinitializer, float %4006, i32 0 + %4008 = insertelement <4 x float> %4007, float 0.000000e+00, i32 1 + %4009 = insertelement <4 x float> %4008, float 0.000000e+00, i32 2 + %4010 = insertelement <4 x float> %4009, float 0.000000e+00, i32 3 + %4011 = extractelement <4 x float> %4010, i32 0 + %4012 = getelementptr float, float* %2, i32 0 + %4013 = getelementptr inbounds float, float* %4012, i64 3 + store float %4011, float* %4013, align 4 + %4014 = getelementptr float, float* %1, i32 0 + %4015 = getelementptr inbounds float, float* %4014, i64 2 + %4016 = bitcast float* %4015 to i32* + %4017 = load i32, i32* %4016, align 4 + %4018 = bitcast i32 %4017 to float + %4019 = insertelement <4 x float> zeroinitializer, float %4018, i32 0 + %4020 = getelementptr float, float* %1, i32 0 + %4021 = getelementptr inbounds float, float* %4020, i64 1 + %4022 = bitcast float* %4021 to i32* + %4023 = load i32, i32* %4022, align 4 + %4024 = bitcast i32 %4023 to float + %4025 = insertelement <4 x float> %4019, float %4024, i32 1 + %4026 = insertelement <4 x float> %4025, float 0.000000e+00, i32 2 + %4027 = insertelement <4 x float> %4026, float 0.000000e+00, i32 3 + %4028 = extractelement <4 x float> %4027, i32 0 + %4029 = bitcast i32* %95 to float* + %4030 = getelementptr float, float* %1, i32 0 + %4031 = getelementptr inbounds float, float* %4030, i64 1 + %4032 = bitcast float* %4031 to i32* + %4033 = bitcast i32* %4032 to float* + store float %4028, float* %4033, align 4 + %4034 = extractelement <4 x float> %4027, i32 1 + %4035 = bitcast i32* %98 to float* + %4036 = getelementptr float, float* %1, i32 0 + %4037 = getelementptr inbounds float, float* %4036, i64 2 + %4038 = bitcast float* %4037 to i32* + %4039 = bitcast i32* %4038 to float* + store float %4034, float* %4039, align 4 + ret void +} + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { +.preheader13: + %3 = bitcast float* %2 to i8* + %4 = bitcast float* %0 to i8* + %5 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false, i1 true, i1 false) + %6 = call i8* @__memcpy_chk(i8* %3, i8* %4, i64 16, i64 %5) #8 + %7 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 + %8 = bitcast i8* %7 to float* + store float 1.000000e+00, float* %8, align 4 + %9 = getelementptr inbounds i8, i8* %7, i64 8 + %10 = getelementptr inbounds i8, i8* %7, i64 12 + %11 = bitcast i8* %10 to float* + store float 1.000000e+00, float* %11, align 4 + %12 = bitcast float* %1 to i8* + %13 = call i64 @llvm.objectsize.i64.p0i8(i8* %12, i1 false, i1 true, i1 false) + %14 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %15 = bitcast i8* %14 to float* + %16 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %17 = bitcast i8* %16 to float* + %18 = bitcast float* %2 to i32* + %19 = load i32, i32* %18, align 4 + %20 = bitcast i8* %14 to i32* + store i32 %19, i32* %20, align 4 + %21 = bitcast i8* %7 to i32* + %22 = load i32, i32* %21, align 4 + %23 = bitcast i8* %16 to i32* + store i32 %22, i32* %23, align 4 + %24 = getelementptr inbounds float, float* %2, i64 2 + %25 = bitcast float* %24 to i32* + %26 = load i32, i32* %25, align 4 + %27 = getelementptr inbounds i8, i8* %14, i64 4 + %28 = bitcast i8* %27 to i32* + store i32 %26, i32* %28, align 4 + %29 = bitcast i8* %9 to i32* + %30 = load i32, i32* %29, align 4 + %31 = getelementptr inbounds i8, i8* %16, i64 4 + %32 = bitcast i8* %31 to i32* + store i32 %30, i32* %32, align 4 + %33 = load float, float* %15, align 4 + %34 = call float @no_opt_sgn(float %33) + %35 = fneg float %34 + %36 = call float @no_opt_naive_norm(float* nonnull %15, i32 2) + %37 = fmul float %36, %35 + %38 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %39 = bitcast i8* %38 to float* + %40 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %41 = load float, float* %15, align 4 + %42 = load float, float* %17, align 4 + %43 = fmul float %37, %42 + %44 = fadd float %41, %43 + store float %44, float* %39, align 4 + %45 = bitcast i8* %27 to float* + %46 = load float, float* %45, align 4 + %47 = bitcast i8* %31 to float* + %48 = load float, float* %47, align 4 + %49 = fmul float %37, %48 + %50 = fadd float %46, %49 + %51 = getelementptr inbounds i8, i8* %38, i64 4 + %52 = bitcast i8* %51 to float* + store float %50, float* %52, align 4 + %53 = bitcast i8* %40 to float* + %54 = call float @no_opt_naive_norm(float* nonnull %39, i32 2) + %55 = fadd float %54, 0x3EE4F8B580000000 + %56 = load float, float* %39, align 4 + %57 = fdiv float %56, %55 + store float %57, float* %53, align 4 + %58 = load float, float* %52, align 4 + %59 = fdiv float %58, %55 + %60 = getelementptr inbounds i8, i8* %40, i64 4 + %61 = bitcast i8* %60 to float* + store float %59, float* %61, align 4 + %62 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 + %63 = bitcast i8* %62 to float* + %64 = load float, float* %53, align 4 + %65 = fmul float %64, 2.000000e+00 + %66 = fmul float %65, %64 + %67 = fsub float 1.000000e+00, %66 + store float %67, float* %63, align 4 + %68 = load float, float* %53, align 4 + %69 = fmul float %68, 2.000000e+00 + %70 = load float, float* %61, align 4 + %71 = fmul float %69, %70 + %72 = fsub float 0.000000e+00, %71 + %73 = getelementptr inbounds i8, i8* %62, i64 4 + %74 = bitcast i8* %73 to float* + store float %72, float* %74, align 4 + %75 = load float, float* %61, align 4 + %76 = fmul float %75, 2.000000e+00 + %77 = load float, float* %53, align 4 + %78 = fmul float %76, %77 + %79 = fsub float 0.000000e+00, %78 + %80 = getelementptr inbounds i8, i8* %62, i64 8 + %81 = bitcast i8* %80 to float* + store float %79, float* %81, align 4 + %82 = load float, float* %61, align 4 + %83 = fmul float %82, 2.000000e+00 + %84 = fmul float %83, %82 + %85 = fsub float 1.000000e+00, %84 + %86 = getelementptr inbounds i8, i8* %62, i64 12 + %87 = bitcast i8* %86 to float* + store float %85, float* %87, align 4 + %88 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 + %89 = bitcast i8* %88 to float* + %90 = bitcast i8* %62 to i32* + %91 = load i32, i32* %90, align 4 + %92 = bitcast i8* %88 to i32* + store i32 %91, i32* %92, align 4 + %93 = bitcast i8* %73 to i32* + %94 = load i32, i32* %93, align 4 + %95 = getelementptr inbounds i8, i8* %88, i64 4 + %96 = bitcast i8* %95 to i32* + store i32 %94, i32* %96, align 4 + %97 = bitcast i8* %80 to i32* + %98 = load i32, i32* %97, align 4 + %99 = getelementptr inbounds i8, i8* %88, i64 8 + %100 = bitcast i8* %99 to i32* + store i32 %98, i32* %100, align 4 + %101 = bitcast i8* %86 to i32* + %102 = load i32, i32* %101, align 4 + %103 = getelementptr inbounds i8, i8* %88, i64 12 + %104 = bitcast i8* %103 to i32* + store i32 %102, i32* %104, align 4 + %105 = call i8* @__memcpy_chk(i8* %12, i8* %88, i64 16, i64 %13) #8 + call void @no_opt_naive_fixed_matrix_multiply(float* %89, float* %0, float* %2) + call void @free(i8* %14) + call void @free(i8* %16) + call void @free(i8* %38) + call void @free(i8* %40) + call void @free(i8* %62) + call void @free(i8* %88) + call void @no_opt_naive_fixed_transpose(float* %1) + ret void +} + +; Function Attrs: nounwind +declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 + +; Function Attrs: nounwind readnone speculatable willreturn +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 + +; Function Attrs: allocsize(0,1) +declare i8* @calloc(i64, i64) #5 + +declare void @free(i8*) #6 + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #1 { +.preheader6: + %0 = alloca i64, align 8 + %1 = alloca [4 x float], align 16 + %2 = alloca [4 x float], align 16 + %3 = alloca [4 x float], align 16 + %4 = alloca [4 x float], align 16 + %5 = alloca [4 x float], align 16 + %6 = call i64 @time(i64* null) #8 + store i64 %6, i64* %0, align 8 + %7 = call i64 @time(i64* nonnull %0) #8 + %8 = trunc i64 %7 to i32 + call void @srand(i32 %8) #8 + %9 = call i32 @rand() #8 + %10 = sitofp i32 %9 to float + %11 = fdiv float %10, 0x41747AE140000000 + %12 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 + store float %11, float* %12, align 16 + %13 = fpext float %11 to double + %14 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %13) #8 + %15 = call i32 @rand() #8 + %16 = sitofp i32 %15 to float + %17 = fdiv float %16, 0x41747AE140000000 + %18 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1 + store float %17, float* %18, align 4 + %19 = fpext float %17 to double + %20 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %19) #8 + %21 = call i32 @rand() #8 + %22 = sitofp i32 %21 to float + %23 = fdiv float %22, 0x41747AE140000000 + %24 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2 + store float %23, float* %24, align 8 + %25 = fpext float %23 to double + %26 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %25) #8 + %27 = call i32 @rand() #8 + %28 = sitofp i32 %27 to float + %29 = fdiv float %28, 0x41747AE140000000 + %30 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3 + store float %29, float* %30, align 4 + %31 = fpext float %29 to double + %32 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %31) #8 + %33 = bitcast [4 x float]* %2 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %33, i8 0, i64 16, i1 false) + %34 = bitcast [4 x float]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %34, i8 0, i64 16, i1 false) + %35 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 + %36 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 + call void @naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %35, float* nonnull %36) + %37 = bitcast [4 x float]* %4 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %37, i8 0, i64 16, i1 false) + %38 = bitcast [4 x float]* %5 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %38, i8 0, i64 16, i1 false) + %39 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 + %40 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 + call void @no_opt_naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %39, float* nonnull %40) + %41 = load float, float* %35, align 16 + %42 = fpext float %41 to double + %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %42) #8 + %44 = load float, float* %39, align 16 + %45 = fpext float %44 to double + %46 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %45) #8 + %47 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1 + %48 = load float, float* %47, align 4 + %49 = fpext float %48 to double + %50 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %49) #8 + %51 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 + %52 = load float, float* %51, align 4 + %53 = fpext float %52 to double + %54 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %53) #8 + %55 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2 + %56 = load float, float* %55, align 8 + %57 = fpext float %56 to double + %58 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %57) #8 + %59 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 + %60 = load float, float* %59, align 8 + %61 = fpext float %60 to double + %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %61) #8 + %63 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3 + %64 = load float, float* %63, align 4 + %65 = fpext float %64 to double + %66 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %65) #8 + %67 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 + %68 = load float, float* %67, align 4 + %69 = fpext float %68 to double + %70 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %69) #8 + %71 = load float, float* %36, align 16 + %72 = fpext float %71 to double + %73 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %72) #8 + %74 = load float, float* %40, align 16 + %75 = fpext float %74 to double + %76 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %75) #8 + %77 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1 + %78 = load float, float* %77, align 4 + %79 = fpext float %78 to double + %80 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %79) #8 + %81 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 1 + %82 = load float, float* %81, align 4 + %83 = fpext float %82 to double + %84 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %83) #8 + %85 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2 + %86 = load float, float* %85, align 8 + %87 = fpext float %86 to double + %88 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %87) #8 + %89 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 2 + %90 = load float, float* %89, align 8 + %91 = fpext float %90 to double + %92 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %91) #8 + %93 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3 + %94 = load float, float* %93, align 4 + %95 = fpext float %94 to double + %96 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %95) #8 + %97 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 3 + %98 = load float, float* %97, align 4 + %99 = fpext float %98 to double + %100 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %99) #8 + ret i32 0 +} + +declare i64 @time(i64*) #6 + +declare void @srand(i32) #6 + +declare i32 @rand() #6 + +declare i32 @printf(i8*, ...) #6 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #7 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.1(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.2(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.3(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.4(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.5(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.6(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.7(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.8(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.9(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.10(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.11(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.12(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.13(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.14(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.15(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.16(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.17(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.18(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.19(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.20(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.21(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.22(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.23(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.24(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.25(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.26(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.27(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.28(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.29(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.30(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.31(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.32(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.33(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.34(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.35(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.36(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.37(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.38(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.39(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.40(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.41(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.42(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.43(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.44(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.45(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.46(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.47(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.48(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.49(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.50(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.51(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.52(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.53(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.54(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.55(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.56(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.57(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.58(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.59(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.60(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.61(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.62(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.63(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.64(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.65(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.66(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.67(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.68(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.69(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.70(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.71(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.72(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.73(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.74(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.75(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.76(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.77(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.78(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.79(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.80(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.81(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.82(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.83(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.84(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.85(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.86(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.87(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.88(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.89(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.90(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.91(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.92(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.93(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.94(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.95(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.96(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.97(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.98(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.99(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.100(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.101(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.102(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.103(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.104(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.105(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.106(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.107(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.108(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.109(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.110(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.111(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.112(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.113(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.114(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.115(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.116(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.117(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.118(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.119(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.120(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.121(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.122(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.123(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.124(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.125(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.126(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.127(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.128(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.129(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.130(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.131(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.132(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.133(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.134(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.135(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.136(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.137(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.138(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.139(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.140(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.141(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.142(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.143(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.144(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.145(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.146(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.147(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.148(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.149(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.150(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.151(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.152(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.153(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.154(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.155(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.156(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.157(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.158(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.159(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.160(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.161(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.162(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.163(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.164(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.165(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.166(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.167(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.168(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.169(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.170(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.171(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.172(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.173(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.174(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.175(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.176(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.177(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.178(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.179(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.180(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.181(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.182(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.183(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.184(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.185(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.186(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.187(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.188(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.189(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.190(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.191(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.192(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.193(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.194(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.195(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.196(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.197(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.198(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.199(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.200(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.201(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.202(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.203(float) #2 + +attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone speculatable willreturn } +attributes #3 = { argmemonly nounwind willreturn writeonly } +attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #7 = { argmemonly nounwind willreturn } +attributes #8 = { nounwind } +attributes #9 = { nounwind allocsize(0,1) } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} +!3 = distinct !{!3, !4} +!4 = !{!"llvm.loop.unroll.disable"} +!5 = distinct !{!5, !4} diff --git a/src/dios-egraphs/Diospyros/failed-test/final b/src/dios-egraphs/Diospyros/failed-test/final new file mode 100755 index 0000000000000000000000000000000000000000..daadd8533ef4771d868093d840ed881038ca0639 GIT binary patch literal 13628 zcmeHOdvH|M9lje93>ZAS)&g3U)sc3?SRTHLj@Avc8!njTB`F(9xP-8QjL9Y@yDivK zLt$HPm&|0WDASRewswX({*iW&QU~oOAPGt(Rx4EOm@0KHBWk8pjcBFcch1@DBh+>} z)6R5e@8sO`d%w=(KKAC=xpzLj$S_>v45OpSFbsjVro=E3h8K`wT#jadCDM3Wp;X(;#7BG9hDcLuqzyv(_EOqjItAja z`{*$yzWSDEds=#|J^wTnpH~|oo|EsWVFg%LeS55}F}hyU^W$5i?Jd%L;@L+|tYBDH zJo?SX=sK%0+R!5L-0|(v_Be-l5YM-vIO@!KG8Y4AS%He>6gn;dX>E;T8DVHJ*-({lIF2v-PAg*TCnJ@U|_*=&77TAH{d*O=fc9l zwZfJ@uEjneMQ~zmO3`Z-DKZzxEk_o=1543Jw`_Vg zyaD=CvI!c#1icrH`m^HgZL^vh*UnnEsc9W>9$!FX9FI<~tc-hRuX*PE;{%ud@bbks zfaXO*NQQBV8d1WkmX{XI0PH4DA3o$yJE<$u6xGx3Tr~2Sv+LX`EHhUo7Q}P+ruJB* zdFH&Ch^uiegm8QjjrIq1eabb?dKHdB1qu}?RG?6SLInyH`0rGp+O+>7RvZyCd&R@4 z>s(^rfS5TT!4FP|X9iu*ofR|sY7aJuY1?D~|B3vZ)YeZ8@n|p4ix1l=yYGWumspW9 z?ad>mT|InOc!Fm{f_Ge}qqzB)ld^lwWc9GQrSCirGZ_c?7Vk*Rt>?x`#rP1ELa$Nt zglUHc#dn_uJEQoCb{v=E4H5R=s-0Lp%FR@59hE5f~|{erpW z1>XCR{Ht6tZ)q+-W=y%{ExmlYFiYV7yt#G65WWEz7QR8#hI2a(=bq3}oT{^ePpp+` z1_xm)cmk);Ylw5m-1-r$dxCqRy9Y^hQ}>M6wvZl@!EVUxp~l{^xoqiWB4&xKUpfqE z*wpZZ_R=g$z7Kp_ICEAkP1PP0(+rMpVZ8po#w$6$IJvmgUR-R?FU3-wnO>`6nqQ9R zzv7!)&aA<7>E|TpbiN210=mg)SBn*=M0sdf_?{EKm$6_S%;6y{rW2yPmUB6Bz-h*F z7N#TStp|O`ftm|#k_*oy4dS+Idfno1Z?W)Ir$kL3x?vIS1HYyi({>2J+S0RPGPU9d zM0v1V_zuHkm+)cL`~fy=chJptXFS{p9~XhUdWR@av|{cGA1&2xheT(S#GePglc4ho z9!b(+37-^4G*kS=*+EXwF)S;F-pXQ8-nm4Y!M5kFMmW3uG?mPl@*|Oik_mNXXSb7* znFMtqMw47vh9sG7Gpr_NODE4V?CT}?J5(3gZXexU33tNRNg-xK8g$%JkooSW4t2{U zNv2HDc@s~SI%OFrRb6|N2Yxjauo%N+au0YPipR$yrIS&DK9?r5=iky{9Ue1&p zn{$~qzmNkvn)9G^B6B_}!F0|8bmxORM-XgBuVB>(0LEftm&K? zCskz5Wtp7Q@nDcWi9OoTW3tC6J5OYpK~-(aBpMci7P7QUNLPvAcB-n-C&eE6X7Ia>V;dR!*M&kNrl0iFc-HY`#K=y7>N0bXRUD35moXnq9FJi%SgYP=SL z+5MmyutE#&5WLm&0;tB7H{6H$h%nqMF*xqXBVfIWpbk-?AKR1Kgylr2rTgH6%mhXr zbc!;z{hSuc_=6WT&JoTr?l125Ltk$3!Ck^v+l{Kz@U?V;4fiPe$PQ;W-ks?L{ERTT z3#ZVIEGMf5bp~<{)^hC8!dMlaC8hm-z6)aNGKK0cTHmgA>d-pV9mW`XTkJL?QKZbM|H){L&E((`7T;}@B{$A{#=bdjs@i+3W&OXMQL_0X-w1Wos zC08L4xM%Zo>lpqm1)t%S6ms&`)R_-i)9%gTUu-eiKN#SkQo7gv?F^lH&um z5lB{)2JCsofn+6mIod@qx!<|(Z`j}}#CK8^TZZAL18_8DCO4;~R~CD(bIS*E8|8go zO_pOadz@e6Mms;AJ-oeic9W9~4IndK!9zWW3iG%I8x!u2lkq(;FdmEw&b?3%INXWC znAo05T+KIkSL|oz0sAl_-R1PWFQb@b$YRk#5Ek4Rx77o1gK6cN0NjL))$r+AQ2dlC z^KsLD#c?ATa7&(2{iJ;=IB2;&!uKkib5D2`w`y2CoO*8I!@y#K>~&iA``I_fu*lWvf8 zoPfASoL6W9LuGbu-sKztV)r`-&^f;(qcb$b0SLk&wrY%!93>K0_L`k{RDj;&f}Jks z+f)tiaPC7-l}qzfPHB}(q>2-g;@h3OwD_B{&vV~79IVsFp)BeytJnAYeP4~gc5=dA(w`Y9=mSl6kdc z@+KAm-L07iG*d|Cb&7e5W~MZAf@FFX^K#AX*35Ij?ZlOe`3a^z^E{-P$0T#AV!p1K zk7;H~GA~oiKF!4JmeKB%Ot)hGTr>A*=JzC%x7tW$yJjBM%&$vksbb!vnNMhDm1Gtx zCVxA`Li`12W|B`JBj&9yiUjVputu?vxma<*;^j+R+;x6>7x44bACTt<<@q6b-X+gJ zlIKU{`N#6yEzdv2*)Xa)ueudL#ogzpSD^3OK;r4Q5Xgqh2)#z=dP2V^bQ_@oLY0J` zA+(IpUP5aKJx(Y}sGHDzgdQUFeL_0`q0c%x?gZk0;@K~KW#aLl-37QKyG?JHXe{+E zj>lT#vD>`Ff*V^S^|8o0Z!US+g@tm->UXRw;ebbWkEvwvx9*$Jv+;Lo9)z)N;CC|@ zeut7JBY6}ca-m~T|4#j??oa9dUEPo9z6ihP(pZ`9C+ps;`x&~wkdFebP=P`P3Kb|+ zpiqHA1qu}?RG?6SLInyHC{&$Ex3OkF`Et*S5}B z+E&`-=SQ~kBYA5rzVpUM?R*bwv`v*7&5`E%)=g9axy_qrrJ=5|iF6<6^>s~6E%n4^ z18ZomYal$Aa9boo*bf-?+9NSyCa_j~s2-DYOMtc2Mc0uN2Hc)T)&Pk$Hq(GQTo(Rc zpxA2OU_~}Y>f^BpzQwO&*fZMKM~#_LK)!q0oIzj>4bED7>Ae@IL)s!?<}$ z`EVL8$7ygD(VOuO=2eTC}zPDHJYbHxvedo;gH++{Q z01QEd_^lcr##=PvOEliC`Rg_A$5#vF^FJu4@7MAVYkUwNbCdt1##37VC5;c`BMS20 z(zsjO`>V$JPK*2x(a;l=M~Om4@hN%uj6D2?Je>byp#GG-ujb*EdHA9{e0d(ux1;J$ z*{jRL`I{k9lmB0X5*YmVluOY*hlY|FpGV`E;P9g)@*DVnSI3M0CKx;4EjiCEHH*~* z&7X=n%*?FJA!nwtCd+kTMb(=5Z0loXvYm|OX8X$K=D6hAwB!j{XSSc5?MrSvITW>_ frFS8<|EQfOz2~HN8@12q9Y*afW41s!PR2g~z+`Np literal 0 HcmV?d00001 diff --git a/src/dios-egraphs/Diospyros/failed-test/opt.ll b/src/dios-egraphs/Diospyros/failed-test/opt.ll new file mode 100644 index 00000000..4f8a871a --- /dev/null +++ b/src/dios-egraphs/Diospyros/failed-test/opt.ll @@ -0,0 +1,750 @@ +; ModuleID = 'build/clang.ll' +source_filename = "fail-tests/qr-decomp-local-arrays.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1 +@.str.1 = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 +@.str.2 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 +@.str.3 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 +@.str.4 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @sgn(float %0) #0 { + %2 = fcmp ogt float %0, 0.000000e+00 + %3 = zext i1 %2 to i32 + %4 = fcmp olt float %0, 0.000000e+00 + %.neg = sext i1 %4 to i32 + %5 = add nsw i32 %.neg, %3 + %6 = sitofp i32 %5 to float + ret float %6 +} + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_sgn(float %0) #1 { + %2 = fcmp ogt float %0, 0.000000e+00 + %3 = zext i1 %2 to i32 + %4 = fcmp olt float %0, 0.000000e+00 + %.neg = sext i1 %4 to i32 + %5 = add nsw i32 %.neg, %3 + %6 = sitofp i32 %5 to float + ret float %6 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @naive_norm(float* %0, i32 %1) #0 { + %3 = icmp sgt i32 %1, 0 + %smax = select i1 %3, i32 %1, i32 0 + %wide.trip.count = zext i32 %smax to i64 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %2 + %4 = add nsw i64 %wide.trip.count, -1 + %xtraiter = and i64 %wide.trip.count, 3 + %5 = icmp ult i64 %4, 3 + br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count, 2147483644 + br label %6 + +6: ; preds = %6, %.lr.ph.new + %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] + %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 + %8 = load float, float* %7, align 4 + %9 = fmul float %8, %8 + %10 = fadd float %.013, %9 + %indvars.iv.next = or i64 %indvars.iv2, 1 + %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next + %12 = load float, float* %11, align 4 + %13 = fmul float %12, %12 + %14 = fadd float %10, %13 + %indvars.iv.next.1 = or i64 %indvars.iv2, 2 + %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 + %16 = load float, float* %15, align 4 + %17 = fmul float %16, %16 + %18 = fadd float %14, %17 + %indvars.iv.next.2 = or i64 %indvars.iv2, 3 + %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 + %20 = load float, float* %19, align 4 + %21 = fmul float %20, %20 + %22 = fadd float %18, %21 + %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 + %niter.nsub.3 = add i64 %niter, -4 + %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 + br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 + +._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] + %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] + %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] + %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil + %24 = load float, float* %23, align 4 + %25 = fmul float %24, %24 + %26 = fadd float %.013.epil, %25 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !3 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 + %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] + %27 = call float @llvm.sqrt.f32(float %.01.lcssa) + ret float %27 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32(float) #2 + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_naive_norm(float* %0, i32 %1) #1 { + %3 = icmp sgt i32 %1, 0 + %smax = select i1 %3, i32 %1, i32 0 + %wide.trip.count = zext i32 %smax to i64 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %2 + %4 = add nsw i64 %wide.trip.count, -1 + %xtraiter = and i64 %wide.trip.count, 3 + %5 = icmp ult i64 %4, 3 + br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count, 2147483644 + br label %6 + +6: ; preds = %6, %.lr.ph.new + %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] + %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 + %8 = load float, float* %7, align 4 + %9 = fmul float %8, %8 + %10 = fadd float %.013, %9 + %indvars.iv.next = or i64 %indvars.iv2, 1 + %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next + %12 = load float, float* %11, align 4 + %13 = fmul float %12, %12 + %14 = fadd float %10, %13 + %indvars.iv.next.1 = or i64 %indvars.iv2, 2 + %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 + %16 = load float, float* %15, align 4 + %17 = fmul float %16, %16 + %18 = fadd float %14, %17 + %indvars.iv.next.2 = or i64 %indvars.iv2, 3 + %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 + %20 = load float, float* %19, align 4 + %21 = fmul float %20, %20 + %22 = fadd float %18, %21 + %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 + %niter.nsub.3 = add i64 %niter, -4 + %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 + br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 + +._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] + %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] + %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] + %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil + %24 = load float, float* %23, align 4 + %25 = fmul float %24, %24 + %26 = fadd float %.013.epil, %25 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 + %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] + %27 = call float @llvm.sqrt.f32(float %.01.lcssa) + ret float %27 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_transpose(float* %0) #0 { +.lr.ph: + %1 = getelementptr inbounds float, float* %0, i64 1 + %2 = bitcast float* %1 to i32* + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds float, float* %0, i64 2 + %5 = bitcast float* %4 to i32* + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %2, align 4 + store i32 %3, i32* %5, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_transpose(float* %0) #1 { +.lr.ph: + %1 = getelementptr inbounds float, float* %0, i64 1 + %2 = bitcast float* %1 to i32* + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds float, float* %0, i64 2 + %5 = bitcast float* %4 to i32* + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %2, align 4 + store i32 %3, i32* %5, align 4 + ret void +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { +.preheader: + store float 0.000000e+00, float* %2, align 4 + %3 = load float, float* %0, align 4 + %4 = load float, float* %1, align 4 + %5 = fmul float %3, %4 + %6 = fadd float %5, 0.000000e+00 + store float %6, float* %2, align 4 + %7 = getelementptr inbounds float, float* %0, i64 1 + %8 = load float, float* %7, align 4 + %9 = getelementptr inbounds float, float* %1, i64 2 + %10 = load float, float* %9, align 4 + %11 = fmul float %8, %10 + %12 = fadd float %6, %11 + store float %12, float* %2, align 4 + %13 = getelementptr inbounds float, float* %2, i64 1 + store float 0.000000e+00, float* %13, align 4 + %14 = load float, float* %0, align 4 + %15 = getelementptr inbounds float, float* %1, i64 1 + %16 = load float, float* %15, align 4 + %17 = fmul float %14, %16 + %18 = fadd float %17, 0.000000e+00 + store float %18, float* %13, align 4 + %19 = load float, float* %7, align 4 + %20 = getelementptr inbounds float, float* %1, i64 3 + %21 = load float, float* %20, align 4 + %22 = fmul float %19, %21 + %23 = fadd float %18, %22 + store float %23, float* %13, align 4 + %24 = getelementptr inbounds float, float* %0, i64 2 + %25 = getelementptr inbounds float, float* %2, i64 2 + store float 0.000000e+00, float* %25, align 4 + %26 = load float, float* %24, align 4 + %27 = load float, float* %1, align 4 + %28 = fmul float %26, %27 + %29 = fadd float %28, 0.000000e+00 + store float %29, float* %25, align 4 + %30 = getelementptr inbounds float, float* %0, i64 3 + %31 = load float, float* %30, align 4 + %32 = load float, float* %9, align 4 + %33 = fmul float %31, %32 + %34 = fadd float %29, %33 + store float %34, float* %25, align 4 + %35 = getelementptr inbounds float, float* %2, i64 3 + store float 0.000000e+00, float* %35, align 4 + %36 = load float, float* %24, align 4 + %37 = load float, float* %15, align 4 + %38 = fmul float %36, %37 + %39 = fadd float %38, 0.000000e+00 + store float %39, float* %35, align 4 + %40 = load float, float* %30, align 4 + %41 = load float, float* %20, align 4 + %42 = fmul float %40, %41 + %43 = fadd float %39, %42 + store float %43, float* %35, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { +.preheader: + store float 0.000000e+00, float* %2, align 4 + %3 = load float, float* %0, align 4 + %4 = load float, float* %1, align 4 + %5 = fmul float %3, %4 + %6 = fadd float %5, 0.000000e+00 + store float %6, float* %2, align 4 + %7 = getelementptr inbounds float, float* %0, i64 1 + %8 = load float, float* %7, align 4 + %9 = getelementptr inbounds float, float* %1, i64 2 + %10 = load float, float* %9, align 4 + %11 = fmul float %8, %10 + %12 = fadd float %6, %11 + store float %12, float* %2, align 4 + %13 = getelementptr inbounds float, float* %2, i64 1 + store float 0.000000e+00, float* %13, align 4 + %14 = load float, float* %0, align 4 + %15 = getelementptr inbounds float, float* %1, i64 1 + %16 = load float, float* %15, align 4 + %17 = fmul float %14, %16 + %18 = fadd float %17, 0.000000e+00 + store float %18, float* %13, align 4 + %19 = load float, float* %7, align 4 + %20 = getelementptr inbounds float, float* %1, i64 3 + %21 = load float, float* %20, align 4 + %22 = fmul float %19, %21 + %23 = fadd float %18, %22 + store float %23, float* %13, align 4 + %24 = getelementptr inbounds float, float* %0, i64 2 + %25 = getelementptr inbounds float, float* %2, i64 2 + store float 0.000000e+00, float* %25, align 4 + %26 = load float, float* %24, align 4 + %27 = load float, float* %1, align 4 + %28 = fmul float %26, %27 + %29 = fadd float %28, 0.000000e+00 + store float %29, float* %25, align 4 + %30 = getelementptr inbounds float, float* %0, i64 3 + %31 = load float, float* %30, align 4 + %32 = load float, float* %9, align 4 + %33 = fmul float %31, %32 + %34 = fadd float %29, %33 + store float %34, float* %25, align 4 + %35 = getelementptr inbounds float, float* %2, i64 3 + store float 0.000000e+00, float* %35, align 4 + %36 = load float, float* %24, align 4 + %37 = load float, float* %15, align 4 + %38 = fmul float %36, %37 + %39 = fadd float %38, 0.000000e+00 + store float %39, float* %35, align 4 + %40 = load float, float* %30, align 4 + %41 = load float, float* %20, align 4 + %42 = fmul float %40, %41 + %43 = fadd float %39, %42 + store float %43, float* %35, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { +.preheader49: + %3 = bitcast float* %1 to i8* + %4 = alloca [4 x float], align 16 + %5 = bitcast [4 x float]* %4 to i8* + %6 = bitcast float* %0 to i32* + %7 = load i32, i32* %6, align 4 + %8 = bitcast float* %2 to i32* + store i32 %7, i32* %8, align 4 + %9 = getelementptr inbounds float, float* %0, i64 1 + %10 = bitcast float* %9 to i32* + %11 = load i32, i32* %10, align 4 + %12 = getelementptr inbounds float, float* %2, i64 1 + %13 = bitcast float* %12 to i32* + store i32 %11, i32* %13, align 4 + %14 = getelementptr inbounds float, float* %0, i64 2 + %15 = bitcast float* %14 to i32* + %16 = load i32, i32* %15, align 4 + %17 = getelementptr inbounds float, float* %2, i64 2 + %18 = bitcast float* %17 to i32* + store i32 %16, i32* %18, align 4 + %19 = getelementptr inbounds float, float* %0, i64 3 + %20 = bitcast float* %19 to i32* + %21 = load i32, i32* %20, align 4 + %22 = getelementptr inbounds float, float* %2, i64 3 + %23 = bitcast float* %22 to i32* + store i32 %21, i32* %23, align 4 + %24 = bitcast i32 %7 to float + %25 = fcmp ogt float %24, 0.000000e+00 + %26 = zext i1 %25 to i32 + %27 = fcmp olt float %24, 0.000000e+00 + %.neg = sext i1 %27 to i32 + %28 = add nsw i32 %.neg, %26 + %29 = sitofp i32 %28 to float + %30 = fmul float %24, %24 + %31 = fadd float %30, 0.000000e+00 + %32 = bitcast i32 %16 to float + %33 = fmul float %32, %32 + %34 = fadd float %31, %33 + %35 = call float @llvm.sqrt.f32(float %34) #8 + %36 = fneg float %29 + %37 = fmul float %35, %36 + %38 = fadd float %24, %37 + %39 = fmul float %37, 0.000000e+00 + %40 = fadd float %32, %39 + %41 = fmul float %38, %38 + %42 = fadd float %41, 0.000000e+00 + %43 = fmul float %40, %40 + %44 = fadd float %42, %43 + %45 = call float @llvm.sqrt.f32(float %44) #8 + %46 = fadd float %45, 0x3EE4F8B580000000 + %47 = fdiv float %38, %46 + %48 = fdiv float %40, %46 + %49 = fmul float %47, 2.000000e+00 + %50 = fmul float %49, %47 + %51 = fsub float 1.000000e+00, %50 + %52 = fmul float %49, %48 + %53 = fsub float 0.000000e+00, %52 + %54 = fmul float %48, 2.000000e+00 + %55 = fmul float %54, %47 + %56 = fsub float 0.000000e+00, %55 + %57 = fmul float %54, %48 + %58 = fsub float 1.000000e+00, %57 + %59 = bitcast float %51 to i32 + %60 = bitcast [4 x float]* %4 to i32* + store i32 %59, i32* %60, align 16 + %61 = bitcast float %53 to i32 + %62 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 + %63 = bitcast float* %62 to i32* + store i32 %61, i32* %63, align 4 + %64 = bitcast float %56 to i32 + %65 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 + %66 = bitcast float* %65 to i32* + store i32 %64, i32* %66, align 8 + %67 = bitcast float %58 to i32 + %68 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 + %69 = bitcast float* %68 to i32* + store i32 %67, i32* %69, align 4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %3, i8* nonnull align 16 dereferenceable(16) %5, i64 16, i1 false) + store float 0.000000e+00, float* %2, align 4 + %70 = load float, float* %0, align 4 + %71 = fmul float %51, %70 + %72 = fadd float %71, 0.000000e+00 + store float %72, float* %2, align 4 + %73 = load float, float* %14, align 4 + %74 = fmul float %53, %73 + %75 = fadd float %72, %74 + store float %75, float* %2, align 4 + store float 0.000000e+00, float* %12, align 4 + %76 = load float, float* %9, align 4 + %77 = fmul float %51, %76 + %78 = fadd float %77, 0.000000e+00 + store float %78, float* %12, align 4 + %79 = load float, float* %19, align 4 + %80 = fmul float %53, %79 + %81 = fadd float %78, %80 + store float %81, float* %12, align 4 + store float 0.000000e+00, float* %17, align 4 + %82 = load float, float* %0, align 4 + %83 = fmul float %56, %82 + %84 = fadd float %83, 0.000000e+00 + store float %84, float* %17, align 4 + %85 = load float, float* %14, align 4 + %86 = fmul float %58, %85 + %87 = fadd float %84, %86 + store float %87, float* %17, align 4 + store float 0.000000e+00, float* %22, align 4 + %88 = load float, float* %9, align 4 + %89 = fmul float %56, %88 + %90 = fadd float %89, 0.000000e+00 + store float %90, float* %22, align 4 + %91 = load float, float* %19, align 4 + %92 = fmul float %58, %91 + %93 = fadd float %90, %92 + store float %93, float* %22, align 4 + %94 = getelementptr inbounds float, float* %1, i64 1 + %95 = bitcast float* %94 to i32* + %96 = load i32, i32* %95, align 4 + %97 = getelementptr inbounds float, float* %1, i64 2 + %98 = bitcast float* %97 to i32* + %99 = load i32, i32* %98, align 4 + store i32 %99, i32* %95, align 4 + store i32 %96, i32* %98, align 4 + ret void +} + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { +.preheader13: + %3 = bitcast float* %2 to i8* + %4 = bitcast float* %0 to i8* + %5 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false, i1 true, i1 false) + %6 = call i8* @__memcpy_chk(i8* %3, i8* %4, i64 16, i64 %5) #8 + %7 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 + %8 = bitcast i8* %7 to float* + store float 1.000000e+00, float* %8, align 4 + %9 = getelementptr inbounds i8, i8* %7, i64 8 + %10 = getelementptr inbounds i8, i8* %7, i64 12 + %11 = bitcast i8* %10 to float* + store float 1.000000e+00, float* %11, align 4 + %12 = bitcast float* %1 to i8* + %13 = call i64 @llvm.objectsize.i64.p0i8(i8* %12, i1 false, i1 true, i1 false) + %14 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %15 = bitcast i8* %14 to float* + %16 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %17 = bitcast i8* %16 to float* + %18 = bitcast float* %2 to i32* + %19 = load i32, i32* %18, align 4 + %20 = bitcast i8* %14 to i32* + store i32 %19, i32* %20, align 4 + %21 = bitcast i8* %7 to i32* + %22 = load i32, i32* %21, align 4 + %23 = bitcast i8* %16 to i32* + store i32 %22, i32* %23, align 4 + %24 = getelementptr inbounds float, float* %2, i64 2 + %25 = bitcast float* %24 to i32* + %26 = load i32, i32* %25, align 4 + %27 = getelementptr inbounds i8, i8* %14, i64 4 + %28 = bitcast i8* %27 to i32* + store i32 %26, i32* %28, align 4 + %29 = bitcast i8* %9 to i32* + %30 = load i32, i32* %29, align 4 + %31 = getelementptr inbounds i8, i8* %16, i64 4 + %32 = bitcast i8* %31 to i32* + store i32 %30, i32* %32, align 4 + %33 = load float, float* %15, align 4 + %34 = call float @no_opt_sgn(float %33) + %35 = fneg float %34 + %36 = call float @no_opt_naive_norm(float* nonnull %15, i32 2) + %37 = fmul float %36, %35 + %38 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %39 = bitcast i8* %38 to float* + %40 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 + %41 = load float, float* %15, align 4 + %42 = load float, float* %17, align 4 + %43 = fmul float %37, %42 + %44 = fadd float %41, %43 + store float %44, float* %39, align 4 + %45 = bitcast i8* %27 to float* + %46 = load float, float* %45, align 4 + %47 = bitcast i8* %31 to float* + %48 = load float, float* %47, align 4 + %49 = fmul float %37, %48 + %50 = fadd float %46, %49 + %51 = getelementptr inbounds i8, i8* %38, i64 4 + %52 = bitcast i8* %51 to float* + store float %50, float* %52, align 4 + %53 = bitcast i8* %40 to float* + %54 = call float @no_opt_naive_norm(float* nonnull %39, i32 2) + %55 = fadd float %54, 0x3EE4F8B580000000 + %56 = load float, float* %39, align 4 + %57 = fdiv float %56, %55 + store float %57, float* %53, align 4 + %58 = load float, float* %52, align 4 + %59 = fdiv float %58, %55 + %60 = getelementptr inbounds i8, i8* %40, i64 4 + %61 = bitcast i8* %60 to float* + store float %59, float* %61, align 4 + %62 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 + %63 = bitcast i8* %62 to float* + %64 = load float, float* %53, align 4 + %65 = fmul float %64, 2.000000e+00 + %66 = fmul float %65, %64 + %67 = fsub float 1.000000e+00, %66 + store float %67, float* %63, align 4 + %68 = load float, float* %53, align 4 + %69 = fmul float %68, 2.000000e+00 + %70 = load float, float* %61, align 4 + %71 = fmul float %69, %70 + %72 = fsub float 0.000000e+00, %71 + %73 = getelementptr inbounds i8, i8* %62, i64 4 + %74 = bitcast i8* %73 to float* + store float %72, float* %74, align 4 + %75 = load float, float* %61, align 4 + %76 = fmul float %75, 2.000000e+00 + %77 = load float, float* %53, align 4 + %78 = fmul float %76, %77 + %79 = fsub float 0.000000e+00, %78 + %80 = getelementptr inbounds i8, i8* %62, i64 8 + %81 = bitcast i8* %80 to float* + store float %79, float* %81, align 4 + %82 = load float, float* %61, align 4 + %83 = fmul float %82, 2.000000e+00 + %84 = fmul float %83, %82 + %85 = fsub float 1.000000e+00, %84 + %86 = getelementptr inbounds i8, i8* %62, i64 12 + %87 = bitcast i8* %86 to float* + store float %85, float* %87, align 4 + %88 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 + %89 = bitcast i8* %88 to float* + %90 = bitcast i8* %62 to i32* + %91 = load i32, i32* %90, align 4 + %92 = bitcast i8* %88 to i32* + store i32 %91, i32* %92, align 4 + %93 = bitcast i8* %73 to i32* + %94 = load i32, i32* %93, align 4 + %95 = getelementptr inbounds i8, i8* %88, i64 4 + %96 = bitcast i8* %95 to i32* + store i32 %94, i32* %96, align 4 + %97 = bitcast i8* %80 to i32* + %98 = load i32, i32* %97, align 4 + %99 = getelementptr inbounds i8, i8* %88, i64 8 + %100 = bitcast i8* %99 to i32* + store i32 %98, i32* %100, align 4 + %101 = bitcast i8* %86 to i32* + %102 = load i32, i32* %101, align 4 + %103 = getelementptr inbounds i8, i8* %88, i64 12 + %104 = bitcast i8* %103 to i32* + store i32 %102, i32* %104, align 4 + %105 = call i8* @__memcpy_chk(i8* %12, i8* %88, i64 16, i64 %13) #8 + call void @no_opt_naive_fixed_matrix_multiply(float* %89, float* %0, float* %2) + call void @free(i8* %14) + call void @free(i8* %16) + call void @free(i8* %38) + call void @free(i8* %40) + call void @free(i8* %62) + call void @free(i8* %88) + call void @no_opt_naive_fixed_transpose(float* %1) + ret void +} + +; Function Attrs: nounwind +declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 + +; Function Attrs: nounwind readnone speculatable willreturn +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 + +; Function Attrs: allocsize(0,1) +declare i8* @calloc(i64, i64) #5 + +declare void @free(i8*) #6 + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #1 { +.preheader6: + %0 = alloca i64, align 8 + %1 = alloca [4 x float], align 16 + %2 = alloca [4 x float], align 16 + %3 = alloca [4 x float], align 16 + %4 = alloca [4 x float], align 16 + %5 = alloca [4 x float], align 16 + %6 = call i64 @time(i64* null) #8 + store i64 %6, i64* %0, align 8 + %7 = call i64 @time(i64* nonnull %0) #8 + %8 = trunc i64 %7 to i32 + call void @srand(i32 %8) #8 + %9 = call i32 @rand() #8 + %10 = sitofp i32 %9 to float + %11 = fdiv float %10, 0x41747AE140000000 + %12 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 + store float %11, float* %12, align 16 + %13 = fpext float %11 to double + %14 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %13) #8 + %15 = call i32 @rand() #8 + %16 = sitofp i32 %15 to float + %17 = fdiv float %16, 0x41747AE140000000 + %18 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1 + store float %17, float* %18, align 4 + %19 = fpext float %17 to double + %20 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %19) #8 + %21 = call i32 @rand() #8 + %22 = sitofp i32 %21 to float + %23 = fdiv float %22, 0x41747AE140000000 + %24 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2 + store float %23, float* %24, align 8 + %25 = fpext float %23 to double + %26 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %25) #8 + %27 = call i32 @rand() #8 + %28 = sitofp i32 %27 to float + %29 = fdiv float %28, 0x41747AE140000000 + %30 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3 + store float %29, float* %30, align 4 + %31 = fpext float %29 to double + %32 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %31) #8 + %33 = bitcast [4 x float]* %2 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %33, i8 0, i64 16, i1 false) + %34 = bitcast [4 x float]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %34, i8 0, i64 16, i1 false) + %35 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 + %36 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 + call void @naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %35, float* nonnull %36) + %37 = bitcast [4 x float]* %4 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %37, i8 0, i64 16, i1 false) + %38 = bitcast [4 x float]* %5 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %38, i8 0, i64 16, i1 false) + %39 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 + %40 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 + call void @no_opt_naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %39, float* nonnull %40) + %41 = load float, float* %35, align 16 + %42 = fpext float %41 to double + %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %42) #8 + %44 = load float, float* %39, align 16 + %45 = fpext float %44 to double + %46 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %45) #8 + %47 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1 + %48 = load float, float* %47, align 4 + %49 = fpext float %48 to double + %50 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %49) #8 + %51 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 + %52 = load float, float* %51, align 4 + %53 = fpext float %52 to double + %54 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %53) #8 + %55 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2 + %56 = load float, float* %55, align 8 + %57 = fpext float %56 to double + %58 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %57) #8 + %59 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 + %60 = load float, float* %59, align 8 + %61 = fpext float %60 to double + %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %61) #8 + %63 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3 + %64 = load float, float* %63, align 4 + %65 = fpext float %64 to double + %66 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %65) #8 + %67 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 + %68 = load float, float* %67, align 4 + %69 = fpext float %68 to double + %70 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %69) #8 + %71 = load float, float* %36, align 16 + %72 = fpext float %71 to double + %73 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %72) #8 + %74 = load float, float* %40, align 16 + %75 = fpext float %74 to double + %76 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %75) #8 + %77 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1 + %78 = load float, float* %77, align 4 + %79 = fpext float %78 to double + %80 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %79) #8 + %81 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 1 + %82 = load float, float* %81, align 4 + %83 = fpext float %82 to double + %84 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %83) #8 + %85 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2 + %86 = load float, float* %85, align 8 + %87 = fpext float %86 to double + %88 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %87) #8 + %89 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 2 + %90 = load float, float* %89, align 8 + %91 = fpext float %90 to double + %92 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %91) #8 + %93 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3 + %94 = load float, float* %93, align 4 + %95 = fpext float %94 to double + %96 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %95) #8 + %97 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 3 + %98 = load float, float* %97, align 4 + %99 = fpext float %98 to double + %100 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %99) #8 + ret i32 0 +} + +declare i64 @time(i64*) #6 + +declare void @srand(i32) #6 + +declare i32 @rand() #6 + +declare i32 @printf(i8*, ...) #6 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #7 + +attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone speculatable willreturn } +attributes #3 = { argmemonly nounwind willreturn writeonly } +attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #7 = { argmemonly nounwind willreturn } +attributes #8 = { nounwind } +attributes #9 = { nounwind allocsize(0,1) } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} +!3 = distinct !{!3, !4} +!4 = !{!"llvm.loop.unroll.disable"} +!5 = distinct !{!5, !4} diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/diff-aa.txt b/src/dios-egraphs/Diospyros/flaky-outputs/diff-aa.txt new file mode 100644 index 00000000..e69de29b diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/diff-clang.txt b/src/dios-egraphs/Diospyros/flaky-outputs/diff-clang.txt new file mode 100644 index 00000000..e69de29b diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/diff-dce.txt b/src/dios-egraphs/Diospyros/flaky-outputs/diff-dce.txt new file mode 100644 index 00000000..f0b20f07 --- /dev/null +++ b/src/dios-egraphs/Diospyros/flaky-outputs/diff-dce.txt @@ -0,0 +1,5416 @@ +229,235c229,235 +< %17 = getelementptr float, float* %0, i32 0 +< %18 = load float, float* %17, align 4 +< %19 = insertelement <4 x float> zeroinitializer, float %18, i32 0 +< %20 = insertelement <4 x float> %19, float 1.000000e+00, i32 1 +< %21 = insertelement <4 x float> %20, float 1.000000e+00, i32 2 +< %22 = insertelement <4 x float> %21, float 1.000000e+00, i32 3 +< %23 = insertelement <4 x float> zeroinitializer, float %10, i32 0 +--- +> %17 = insertelement <4 x float> zeroinitializer, float %4, i32 0 +> %18 = insertelement <4 x float> %17, float 1.000000e+00, i32 1 +> %19 = insertelement <4 x float> %18, float 1.000000e+00, i32 2 +> %20 = insertelement <4 x float> %19, float 1.000000e+00, i32 3 +> %21 = getelementptr float, float* %1, i32 0 +> %22 = load float, float* %21, align 4 +> %23 = insertelement <4 x float> zeroinitializer, float %22, i32 0 +239c239 +< %27 = fmul <4 x float> %22, %26 +--- +> %27 = fmul <4 x float> %20, %26 +284,307c284,306 +< %69 = insertelement <4 x float> zeroinitializer, float %56, i32 0 +< %70 = insertelement <4 x float> %69, float 0.000000e+00, i32 1 +< %71 = insertelement <4 x float> %70, float 0.000000e+00, i32 2 +< %72 = insertelement <4 x float> %71, float 0.000000e+00, i32 3 +< %73 = fmul <4 x float> %68, %72 +< %74 = fadd <4 x float> %73, zeroinitializer +< %75 = getelementptr float, float* %0, i32 0 +< %76 = getelementptr inbounds float, float* %75, i64 1 +< %77 = load float, float* %76, align 4 +< %78 = insertelement <4 x float> zeroinitializer, float %77, i32 0 +< %79 = insertelement <4 x float> %78, float 0.000000e+00, i32 1 +< %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 2 +< %81 = insertelement <4 x float> %80, float 0.000000e+00, i32 3 +< %82 = getelementptr float, float* %1, i32 0 +< %83 = getelementptr inbounds float, float* %82, i64 3 +< %84 = load float, float* %83, align 4 +< %85 = insertelement <4 x float> zeroinitializer, float %84, i32 0 +< %86 = insertelement <4 x float> %85, float 0.000000e+00, i32 1 +< %87 = insertelement <4 x float> %86, float 0.000000e+00, i32 2 +< %88 = insertelement <4 x float> %87, float 0.000000e+00, i32 3 +< %89 = call <4 x float> @llvm.fma.v4f32(<4 x float> %81, <4 x float> %88, <4 x float> %74) +< %90 = extractelement <4 x float> %89, i32 0 +< store float %90, float* %64, align 4 +< %91 = extractelement <4 x float> %89, i32 1 +--- +> %69 = load float, float* %55, align 4 +> %70 = insertelement <4 x float> zeroinitializer, float %69, i32 0 +> %71 = insertelement <4 x float> %70, float 0.000000e+00, i32 1 +> %72 = insertelement <4 x float> %71, float 0.000000e+00, i32 2 +> %73 = insertelement <4 x float> %72, float 0.000000e+00, i32 3 +> %74 = fmul <4 x float> %68, %73 +> %75 = fadd <4 x float> %74, zeroinitializer +> %76 = getelementptr float, float* %0, i32 0 +> %77 = getelementptr inbounds float, float* %76, i64 1 +> %78 = load float, float* %77, align 4 +> %79 = insertelement <4 x float> zeroinitializer, float %78, i32 0 +> %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 1 +> %81 = insertelement <4 x float> %80, float 0.000000e+00, i32 2 +> %82 = insertelement <4 x float> %81, float 0.000000e+00, i32 3 +> %83 = getelementptr float, float* %1, i32 0 +> %84 = getelementptr inbounds float, float* %83, i64 3 +> %85 = load float, float* %84, align 4 +> %86 = insertelement <4 x float> zeroinitializer, float %85, i32 0 +> %87 = insertelement <4 x float> %86, float 0.000000e+00, i32 1 +> %88 = insertelement <4 x float> %87, float 0.000000e+00, i32 2 +> %89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3 +> %90 = call <4 x float> @llvm.fma.v4f32(<4 x float> %82, <4 x float> %89, <4 x float> %75) +> %91 = extractelement <4 x float> %90, i32 0 +309c308 +< %93 = getelementptr inbounds float, float* %92, i64 2 +--- +> %93 = getelementptr inbounds float, float* %92, i64 1 +311,344c310,344 +< %94 = getelementptr float, float* %0, i32 0 +< %95 = getelementptr inbounds float, float* %94, i64 2 +< %96 = load float, float* %95, align 4 +< %97 = insertelement <4 x float> zeroinitializer, float %96, i32 0 +< %98 = insertelement <4 x float> %97, float 0.000000e+00, i32 1 +< %99 = insertelement <4 x float> %98, float 0.000000e+00, i32 2 +< %100 = insertelement <4 x float> %99, float 0.000000e+00, i32 3 +< %101 = getelementptr float, float* %1, i32 0 +< %102 = load float, float* %101, align 4 +< %103 = insertelement <4 x float> zeroinitializer, float %102, i32 0 +< %104 = insertelement <4 x float> %103, float 0.000000e+00, i32 1 +< %105 = insertelement <4 x float> %104, float 0.000000e+00, i32 2 +< %106 = insertelement <4 x float> %105, float 0.000000e+00, i32 3 +< %107 = call <4 x float> @llvm.fma.v4f32(<4 x float> %100, <4 x float> %106, <4 x float> zeroinitializer) +< %108 = extractelement <4 x float> %107, i32 0 +< store float %108, float* %93, align 4 +< %109 = insertelement <4 x float> zeroinitializer, float %96, i32 0 +< %110 = insertelement <4 x float> %109, float 1.000000e+00, i32 1 +< %111 = insertelement <4 x float> %110, float 1.000000e+00, i32 2 +< %112 = insertelement <4 x float> %111, float 1.000000e+00, i32 3 +< %113 = insertelement <4 x float> zeroinitializer, float %102, i32 0 +< %114 = insertelement <4 x float> %113, float 0.000000e+00, i32 1 +< %115 = insertelement <4 x float> %114, float 0.000000e+00, i32 2 +< %116 = insertelement <4 x float> %115, float 0.000000e+00, i32 3 +< %117 = fmul <4 x float> %112, %116 +< %118 = fadd <4 x float> %117, zeroinitializer +< %119 = getelementptr float, float* %0, i32 0 +< %120 = getelementptr inbounds float, float* %119, i64 3 +< %121 = load float, float* %120, align 4 +< %122 = insertelement <4 x float> zeroinitializer, float %121, i32 0 +< %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 1 +< %124 = insertelement <4 x float> %123, float 0.000000e+00, i32 2 +< %125 = insertelement <4 x float> %124, float 0.000000e+00, i32 3 +< %126 = load float, float* %37, align 4 +--- +> %94 = extractelement <4 x float> %90, i32 1 +> %95 = getelementptr float, float* %2, i32 0 +> %96 = getelementptr inbounds float, float* %95, i64 2 +> store float %94, float* %96, align 4 +> %97 = getelementptr float, float* %0, i32 0 +> %98 = getelementptr inbounds float, float* %97, i64 2 +> %99 = load float, float* %98, align 4 +> %100 = insertelement <4 x float> zeroinitializer, float %99, i32 0 +> %101 = insertelement <4 x float> %100, float 0.000000e+00, i32 1 +> %102 = insertelement <4 x float> %101, float 0.000000e+00, i32 2 +> %103 = insertelement <4 x float> %102, float 0.000000e+00, i32 3 +> %104 = getelementptr float, float* %1, i32 0 +> %105 = load float, float* %104, align 4 +> %106 = insertelement <4 x float> zeroinitializer, float %105, i32 0 +> %107 = insertelement <4 x float> %106, float 0.000000e+00, i32 1 +> %108 = insertelement <4 x float> %107, float 0.000000e+00, i32 2 +> %109 = insertelement <4 x float> %108, float 0.000000e+00, i32 3 +> %110 = call <4 x float> @llvm.fma.v4f32(<4 x float> %103, <4 x float> %109, <4 x float> zeroinitializer) +> %111 = extractelement <4 x float> %110, i32 0 +> %112 = getelementptr float, float* %2, i32 0 +> %113 = getelementptr inbounds float, float* %112, i64 2 +> store float %111, float* %113, align 4 +> %114 = insertelement <4 x float> zeroinitializer, float %99, i32 0 +> %115 = insertelement <4 x float> %114, float 1.000000e+00, i32 1 +> %116 = insertelement <4 x float> %115, float 1.000000e+00, i32 2 +> %117 = insertelement <4 x float> %116, float 1.000000e+00, i32 3 +> %118 = insertelement <4 x float> zeroinitializer, float %105, i32 0 +> %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 1 +> %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 2 +> %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 3 +> %122 = fmul <4 x float> %117, %121 +> %123 = fadd <4 x float> %122, zeroinitializer +> %124 = getelementptr float, float* %0, i32 0 +> %125 = getelementptr inbounds float, float* %124, i64 3 +> %126 = load float, float* %125, align 4 +349,361c349,361 +< %131 = call <4 x float> @llvm.fma.v4f32(<4 x float> %125, <4 x float> %130, <4 x float> %118) +< %132 = extractelement <4 x float> %131, i32 0 +< store float %132, float* %93, align 4 +< %133 = extractelement <4 x float> %131, i32 1 +< %134 = getelementptr float, float* %2, i32 0 +< %135 = getelementptr inbounds float, float* %134, i64 3 +< store float %133, float* %135, align 4 +< %136 = load float, float* %95, align 4 +< %137 = insertelement <4 x float> zeroinitializer, float %136, i32 0 +< %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 1 +< %139 = insertelement <4 x float> %138, float 0.000000e+00, i32 2 +< %140 = insertelement <4 x float> %139, float 0.000000e+00, i32 3 +< %141 = load float, float* %55, align 4 +--- +> %131 = load float, float* %37, align 4 +> %132 = insertelement <4 x float> zeroinitializer, float %131, i32 0 +> %133 = insertelement <4 x float> %132, float 0.000000e+00, i32 1 +> %134 = insertelement <4 x float> %133, float 0.000000e+00, i32 2 +> %135 = insertelement <4 x float> %134, float 0.000000e+00, i32 3 +> %136 = call <4 x float> @llvm.fma.v4f32(<4 x float> %130, <4 x float> %135, <4 x float> %123) +> %137 = extractelement <4 x float> %136, i32 0 +> store float %137, float* %113, align 4 +> %138 = extractelement <4 x float> %136, i32 1 +> %139 = getelementptr float, float* %2, i32 0 +> %140 = getelementptr inbounds float, float* %139, i64 3 +> store float %138, float* %140, align 4 +> %141 = load float, float* %98, align 4 +366,386c366,386 +< %146 = call <4 x float> @llvm.fma.v4f32(<4 x float> %140, <4 x float> %145, <4 x float> zeroinitializer) +< %147 = extractelement <4 x float> %146, i32 0 +< store float %147, float* %135, align 4 +< %148 = insertelement <4 x float> zeroinitializer, float %136, i32 0 +< %149 = insertelement <4 x float> %148, float 1.000000e+00, i32 1 +< %150 = insertelement <4 x float> %149, float 1.000000e+00, i32 2 +< %151 = insertelement <4 x float> %150, float 1.000000e+00, i32 3 +< %152 = insertelement <4 x float> zeroinitializer, float %141, i32 0 +< %153 = insertelement <4 x float> %152, float 0.000000e+00, i32 1 +< %154 = insertelement <4 x float> %153, float 0.000000e+00, i32 2 +< %155 = insertelement <4 x float> %154, float 0.000000e+00, i32 3 +< %156 = fmul <4 x float> %151, %155 +< %157 = fadd <4 x float> %156, zeroinitializer +< %158 = getelementptr float, float* %0, i32 0 +< %159 = getelementptr inbounds float, float* %158, i64 3 +< %160 = load float, float* %159, align 4 +< %161 = insertelement <4 x float> zeroinitializer, float %160, i32 0 +< %162 = insertelement <4 x float> %161, float 0.000000e+00, i32 1 +< %163 = insertelement <4 x float> %162, float 0.000000e+00, i32 2 +< %164 = insertelement <4 x float> %163, float 0.000000e+00, i32 3 +< %165 = load float, float* %83, align 4 +--- +> %146 = load float, float* %55, align 4 +> %147 = insertelement <4 x float> zeroinitializer, float %146, i32 0 +> %148 = insertelement <4 x float> %147, float 0.000000e+00, i32 1 +> %149 = insertelement <4 x float> %148, float 0.000000e+00, i32 2 +> %150 = insertelement <4 x float> %149, float 0.000000e+00, i32 3 +> %151 = call <4 x float> @llvm.fma.v4f32(<4 x float> %145, <4 x float> %150, <4 x float> zeroinitializer) +> %152 = extractelement <4 x float> %151, i32 0 +> store float %152, float* %140, align 4 +> %153 = insertelement <4 x float> zeroinitializer, float %141, i32 0 +> %154 = insertelement <4 x float> %153, float 1.000000e+00, i32 1 +> %155 = insertelement <4 x float> %154, float 1.000000e+00, i32 2 +> %156 = insertelement <4 x float> %155, float 1.000000e+00, i32 3 +> %157 = insertelement <4 x float> zeroinitializer, float %146, i32 0 +> %158 = insertelement <4 x float> %157, float 0.000000e+00, i32 1 +> %159 = insertelement <4 x float> %158, float 0.000000e+00, i32 2 +> %160 = insertelement <4 x float> %159, float 0.000000e+00, i32 3 +> %161 = fmul <4 x float> %156, %160 +> %162 = fadd <4 x float> %161, zeroinitializer +> %163 = getelementptr float, float* %0, i32 0 +> %164 = getelementptr inbounds float, float* %163, i64 3 +> %165 = load float, float* %164, align 4 +391,393c391,398 +< %170 = call <4 x float> @llvm.fma.v4f32(<4 x float> %164, <4 x float> %169, <4 x float> %157) +< %171 = extractelement <4 x float> %170, i32 0 +< store float %171, float* %135, align 4 +--- +> %170 = load float, float* %84, align 4 +> %171 = insertelement <4 x float> zeroinitializer, float %170, i32 0 +> %172 = insertelement <4 x float> %171, float 0.000000e+00, i32 1 +> %173 = insertelement <4 x float> %172, float 0.000000e+00, i32 2 +> %174 = insertelement <4 x float> %173, float 0.000000e+00, i32 3 +> %175 = call <4 x float> @llvm.fma.v4f32(<4 x float> %169, <4 x float> %174, <4 x float> %162) +> %176 = extractelement <4 x float> %175, i32 0 +> store float %176, float* %140, align 4 +516,532c521,537 +< %57 = load i32, i32* %31, align 4 +< %58 = bitcast i32 %57 to float +< %59 = bitcast i32 %57 to float +< %60 = fmul float %58, %59 +< %61 = fadd float %56, %60 +< %62 = call float @llvm.sqrt.f32(float %61) +< %63 = bitcast i32 %52 to float +< %64 = fcmp olt float %63, 0.000000e+00 +< %65 = sext i1 %64 to i32 +< %66 = fcmp ogt float %63, 0.000000e+00 +< %67 = zext i1 %66 to i32 +< %68 = add nsw i32 %65, %67 +< %69 = sitofp i32 %68 to float +< %70 = fneg float %69 +< %71 = fmul float %62, %70 +< %72 = bitcast i32 %52 to float +< %73 = fadd float %72, %71 +--- +> %57 = bitcast i32 %32 to float +> %58 = bitcast i32 %32 to float +> %59 = fmul float %57, %58 +> %60 = fadd float %56, %59 +> %61 = call float @llvm.sqrt.f32(float %60) +> %62 = bitcast i32 %52 to float +> %63 = fcmp olt float %62, 0.000000e+00 +> %64 = sext i1 %63 to i32 +> %65 = fcmp ogt float %62, 0.000000e+00 +> %66 = zext i1 %65 to i32 +> %67 = add nsw i32 %64, %66 +> %68 = sitofp i32 %67 to float +> %69 = fneg float %68 +> %70 = fmul float %61, %69 +> %71 = bitcast i32 %52 to float +> %72 = fadd float %71, %70 +> %73 = bitcast i32 %52 to float +534,545c539,550 +< %75 = bitcast i32 %52 to float +< %76 = fmul float %74, %75 +< %77 = fadd float %76, 0.000000e+00 +< %78 = bitcast i32 %57 to float +< %79 = bitcast i32 %57 to float +< %80 = fmul float %78, %79 +< %81 = fadd float %77, %80 +< %82 = call float @llvm.sqrt.f32(float %81) +< %83 = fneg float %69 +< %84 = fmul float %82, %83 +< %85 = bitcast i32 %52 to float +< %86 = fadd float %85, %84 +--- +> %75 = fmul float %73, %74 +> %76 = fadd float %75, 0.000000e+00 +> %77 = bitcast i32 %32 to float +> %78 = bitcast i32 %32 to float +> %79 = fmul float %77, %78 +> %80 = fadd float %76, %79 +> %81 = call float @llvm.sqrt.f32(float %80) +> %82 = fneg float %68 +> %83 = fmul float %81, %82 +> %84 = bitcast i32 %52 to float +> %85 = fadd float %84, %83 +> %86 = bitcast i32 %52 to float +547,560c552,565 +< %88 = bitcast i32 %52 to float +< %89 = fmul float %87, %88 +< %90 = fadd float %89, 0.000000e+00 +< %91 = bitcast i32 %57 to float +< %92 = bitcast i32 %57 to float +< %93 = fmul float %91, %92 +< %94 = fadd float %90, %93 +< %95 = call float @llvm.sqrt.f32(float %94) +< %96 = fneg float %69 +< %97 = fmul float %95, %96 +< %98 = bitcast i32 %52 to float +< %99 = fadd float %98, %97 +< %100 = fmul float %86, %99 +< %101 = fadd float %100, 0.000000e+00 +--- +> %88 = fmul float %86, %87 +> %89 = fadd float %88, 0.000000e+00 +> %90 = bitcast i32 %32 to float +> %91 = bitcast i32 %32 to float +> %92 = fmul float %90, %91 +> %93 = fadd float %89, %92 +> %94 = call float @llvm.sqrt.f32(float %93) +> %95 = fneg float %68 +> %96 = fmul float %94, %95 +> %97 = bitcast i32 %52 to float +> %98 = fadd float %97, %96 +> %99 = fmul float %85, %98 +> %100 = fadd float %99, 0.000000e+00 +> %101 = bitcast i32 %52 to float +562,574c567,579 +< %103 = bitcast i32 %52 to float +< %104 = fmul float %102, %103 +< %105 = fadd float %104, 0.000000e+00 +< %106 = bitcast i32 %57 to float +< %107 = bitcast i32 %57 to float +< %108 = fmul float %106, %107 +< %109 = fadd float %105, %108 +< %110 = call float @llvm.sqrt.f32(float %109) +< %111 = fneg float %69 +< %112 = fmul float %110, %111 +< %113 = fmul float %112, 0.000000e+00 +< %114 = bitcast i32 %57 to float +< %115 = fadd float %114, %113 +--- +> %103 = fmul float %101, %102 +> %104 = fadd float %103, 0.000000e+00 +> %105 = bitcast i32 %32 to float +> %106 = bitcast i32 %32 to float +> %107 = fmul float %105, %106 +> %108 = fadd float %104, %107 +> %109 = call float @llvm.sqrt.f32(float %108) +> %110 = fneg float %68 +> %111 = fmul float %109, %110 +> %112 = fmul float %111, 0.000000e+00 +> %113 = bitcast i32 %32 to float +> %114 = fadd float %113, %112 +> %115 = bitcast i32 %52 to float +576,594c581,599 +< %117 = bitcast i32 %52 to float +< %118 = fmul float %116, %117 +< %119 = fadd float %118, 0.000000e+00 +< %120 = bitcast i32 %57 to float +< %121 = bitcast i32 %57 to float +< %122 = fmul float %120, %121 +< %123 = fadd float %119, %122 +< %124 = call float @llvm.sqrt.f32(float %123) +< %125 = fneg float %69 +< %126 = fmul float %124, %125 +< %127 = fmul float %126, 0.000000e+00 +< %128 = bitcast i32 %57 to float +< %129 = fadd float %128, %127 +< %130 = fmul float %115, %129 +< %131 = fadd float %101, %130 +< %132 = call float @llvm.sqrt.f32(float %131) +< %133 = fadd float %132, 0.000000e+00 +< %134 = fdiv float %73, %133 +< %135 = fmul float %134, 2.000000e+00 +--- +> %117 = fmul float %115, %116 +> %118 = fadd float %117, 0.000000e+00 +> %119 = bitcast i32 %32 to float +> %120 = bitcast i32 %32 to float +> %121 = fmul float %119, %120 +> %122 = fadd float %118, %121 +> %123 = call float @llvm.sqrt.f32(float %122) +> %124 = fneg float %68 +> %125 = fmul float %123, %124 +> %126 = fmul float %125, 0.000000e+00 +> %127 = bitcast i32 %32 to float +> %128 = fadd float %127, %126 +> %129 = fmul float %114, %128 +> %130 = fadd float %100, %129 +> %131 = call float @llvm.sqrt.f32(float %130) +> %132 = fadd float %131, 0.000000e+00 +> %133 = fdiv float %72, %132 +> %134 = fmul float %133, 2.000000e+00 +> %135 = bitcast i32 %52 to float +596,607c601,612 +< %137 = bitcast i32 %52 to float +< %138 = fmul float %136, %137 +< %139 = fadd float %138, 0.000000e+00 +< %140 = bitcast i32 %57 to float +< %141 = bitcast i32 %57 to float +< %142 = fmul float %140, %141 +< %143 = fadd float %139, %142 +< %144 = call float @llvm.sqrt.f32(float %143) +< %145 = fneg float %69 +< %146 = fmul float %144, %145 +< %147 = bitcast i32 %52 to float +< %148 = fadd float %147, %146 +--- +> %137 = fmul float %135, %136 +> %138 = fadd float %137, 0.000000e+00 +> %139 = bitcast i32 %32 to float +> %140 = bitcast i32 %32 to float +> %141 = fmul float %139, %140 +> %142 = fadd float %138, %141 +> %143 = call float @llvm.sqrt.f32(float %142) +> %144 = fneg float %68 +> %145 = fmul float %143, %144 +> %146 = bitcast i32 %52 to float +> %147 = fadd float %146, %145 +> %148 = bitcast i32 %52 to float +609,620c614,625 +< %150 = bitcast i32 %52 to float +< %151 = fmul float %149, %150 +< %152 = fadd float %151, 0.000000e+00 +< %153 = bitcast i32 %57 to float +< %154 = bitcast i32 %57 to float +< %155 = fmul float %153, %154 +< %156 = fadd float %152, %155 +< %157 = call float @llvm.sqrt.f32(float %156) +< %158 = fneg float %69 +< %159 = fmul float %157, %158 +< %160 = bitcast i32 %52 to float +< %161 = fadd float %160, %159 +--- +> %150 = fmul float %148, %149 +> %151 = fadd float %150, 0.000000e+00 +> %152 = bitcast i32 %32 to float +> %153 = bitcast i32 %32 to float +> %154 = fmul float %152, %153 +> %155 = fadd float %151, %154 +> %156 = call float @llvm.sqrt.f32(float %155) +> %157 = fneg float %68 +> %158 = fmul float %156, %157 +> %159 = bitcast i32 %52 to float +> %160 = fadd float %159, %158 +> %161 = bitcast i32 %52 to float +622,635c627,640 +< %163 = bitcast i32 %52 to float +< %164 = fmul float %162, %163 +< %165 = fadd float %164, 0.000000e+00 +< %166 = bitcast i32 %57 to float +< %167 = bitcast i32 %57 to float +< %168 = fmul float %166, %167 +< %169 = fadd float %165, %168 +< %170 = call float @llvm.sqrt.f32(float %169) +< %171 = fneg float %69 +< %172 = fmul float %170, %171 +< %173 = bitcast i32 %52 to float +< %174 = fadd float %173, %172 +< %175 = fmul float %161, %174 +< %176 = fadd float %175, 0.000000e+00 +--- +> %163 = fmul float %161, %162 +> %164 = fadd float %163, 0.000000e+00 +> %165 = bitcast i32 %32 to float +> %166 = bitcast i32 %32 to float +> %167 = fmul float %165, %166 +> %168 = fadd float %164, %167 +> %169 = call float @llvm.sqrt.f32(float %168) +> %170 = fneg float %68 +> %171 = fmul float %169, %170 +> %172 = bitcast i32 %52 to float +> %173 = fadd float %172, %171 +> %174 = fmul float %160, %173 +> %175 = fadd float %174, 0.000000e+00 +> %176 = bitcast i32 %52 to float +637,649c642,654 +< %178 = bitcast i32 %52 to float +< %179 = fmul float %177, %178 +< %180 = fadd float %179, 0.000000e+00 +< %181 = bitcast i32 %57 to float +< %182 = bitcast i32 %57 to float +< %183 = fmul float %181, %182 +< %184 = fadd float %180, %183 +< %185 = call float @llvm.sqrt.f32(float %184) +< %186 = fneg float %69 +< %187 = fmul float %185, %186 +< %188 = fmul float %187, 0.000000e+00 +< %189 = bitcast i32 %57 to float +< %190 = fadd float %189, %188 +--- +> %178 = fmul float %176, %177 +> %179 = fadd float %178, 0.000000e+00 +> %180 = bitcast i32 %32 to float +> %181 = bitcast i32 %32 to float +> %182 = fmul float %180, %181 +> %183 = fadd float %179, %182 +> %184 = call float @llvm.sqrt.f32(float %183) +> %185 = fneg float %68 +> %186 = fmul float %184, %185 +> %187 = fmul float %186, 0.000000e+00 +> %188 = bitcast i32 %32 to float +> %189 = fadd float %188, %187 +> %190 = bitcast i32 %52 to float +651,670c656,675 +< %192 = bitcast i32 %52 to float +< %193 = fmul float %191, %192 +< %194 = fadd float %193, 0.000000e+00 +< %195 = bitcast i32 %57 to float +< %196 = bitcast i32 %57 to float +< %197 = fmul float %195, %196 +< %198 = fadd float %194, %197 +< %199 = call float @llvm.sqrt.f32(float %198) +< %200 = fneg float %69 +< %201 = fmul float %199, %200 +< %202 = fmul float %201, 0.000000e+00 +< %203 = bitcast i32 %57 to float +< %204 = fadd float %203, %202 +< %205 = fmul float %190, %204 +< %206 = fadd float %176, %205 +< %207 = call float @llvm.sqrt.f32(float %206) +< %208 = fadd float %207, 0.000000e+00 +< %209 = fdiv float %148, %208 +< %210 = fmul float %135, %209 +< %211 = insertelement <4 x float> %49, float %210, i32 1 +--- +> %192 = fmul float %190, %191 +> %193 = fadd float %192, 0.000000e+00 +> %194 = bitcast i32 %32 to float +> %195 = bitcast i32 %32 to float +> %196 = fmul float %194, %195 +> %197 = fadd float %193, %196 +> %198 = call float @llvm.sqrt.f32(float %197) +> %199 = fneg float %68 +> %200 = fmul float %198, %199 +> %201 = fmul float %200, 0.000000e+00 +> %202 = bitcast i32 %32 to float +> %203 = fadd float %202, %201 +> %204 = fmul float %189, %203 +> %205 = fadd float %175, %204 +> %206 = call float @llvm.sqrt.f32(float %205) +> %207 = fadd float %206, 0.000000e+00 +> %208 = fdiv float %147, %207 +> %209 = fmul float %134, %208 +> %210 = insertelement <4 x float> %49, float %209, i32 1 +> %211 = bitcast i32 %52 to float +672,683c677,688 +< %213 = bitcast i32 %52 to float +< %214 = fmul float %212, %213 +< %215 = fadd float %214, 0.000000e+00 +< %216 = bitcast i32 %57 to float +< %217 = bitcast i32 %57 to float +< %218 = fmul float %216, %217 +< %219 = fadd float %215, %218 +< %220 = call float @llvm.sqrt.f32(float %219) +< %221 = fneg float %69 +< %222 = fmul float %220, %221 +< %223 = bitcast i32 %52 to float +< %224 = fadd float %223, %222 +--- +> %213 = fmul float %211, %212 +> %214 = fadd float %213, 0.000000e+00 +> %215 = bitcast i32 %32 to float +> %216 = bitcast i32 %32 to float +> %217 = fmul float %215, %216 +> %218 = fadd float %214, %217 +> %219 = call float @llvm.sqrt.f32(float %218) +> %220 = fneg float %68 +> %221 = fmul float %219, %220 +> %222 = bitcast i32 %52 to float +> %223 = fadd float %222, %221 +> %224 = bitcast i32 %52 to float +685,696c690,701 +< %226 = bitcast i32 %52 to float +< %227 = fmul float %225, %226 +< %228 = fadd float %227, 0.000000e+00 +< %229 = bitcast i32 %57 to float +< %230 = bitcast i32 %57 to float +< %231 = fmul float %229, %230 +< %232 = fadd float %228, %231 +< %233 = call float @llvm.sqrt.f32(float %232) +< %234 = fneg float %69 +< %235 = fmul float %233, %234 +< %236 = bitcast i32 %52 to float +< %237 = fadd float %236, %235 +--- +> %226 = fmul float %224, %225 +> %227 = fadd float %226, 0.000000e+00 +> %228 = bitcast i32 %32 to float +> %229 = bitcast i32 %32 to float +> %230 = fmul float %228, %229 +> %231 = fadd float %227, %230 +> %232 = call float @llvm.sqrt.f32(float %231) +> %233 = fneg float %68 +> %234 = fmul float %232, %233 +> %235 = bitcast i32 %52 to float +> %236 = fadd float %235, %234 +> %237 = bitcast i32 %52 to float +698,711c703,716 +< %239 = bitcast i32 %52 to float +< %240 = fmul float %238, %239 +< %241 = fadd float %240, 0.000000e+00 +< %242 = bitcast i32 %57 to float +< %243 = bitcast i32 %57 to float +< %244 = fmul float %242, %243 +< %245 = fadd float %241, %244 +< %246 = call float @llvm.sqrt.f32(float %245) +< %247 = fneg float %69 +< %248 = fmul float %246, %247 +< %249 = bitcast i32 %52 to float +< %250 = fadd float %249, %248 +< %251 = fmul float %237, %250 +< %252 = fadd float %251, 0.000000e+00 +--- +> %239 = fmul float %237, %238 +> %240 = fadd float %239, 0.000000e+00 +> %241 = bitcast i32 %32 to float +> %242 = bitcast i32 %32 to float +> %243 = fmul float %241, %242 +> %244 = fadd float %240, %243 +> %245 = call float @llvm.sqrt.f32(float %244) +> %246 = fneg float %68 +> %247 = fmul float %245, %246 +> %248 = bitcast i32 %52 to float +> %249 = fadd float %248, %247 +> %250 = fmul float %236, %249 +> %251 = fadd float %250, 0.000000e+00 +> %252 = bitcast i32 %52 to float +713,725c718,730 +< %254 = bitcast i32 %52 to float +< %255 = fmul float %253, %254 +< %256 = fadd float %255, 0.000000e+00 +< %257 = bitcast i32 %57 to float +< %258 = bitcast i32 %57 to float +< %259 = fmul float %257, %258 +< %260 = fadd float %256, %259 +< %261 = call float @llvm.sqrt.f32(float %260) +< %262 = fneg float %69 +< %263 = fmul float %261, %262 +< %264 = fmul float %263, 0.000000e+00 +< %265 = bitcast i32 %57 to float +< %266 = fadd float %265, %264 +--- +> %254 = fmul float %252, %253 +> %255 = fadd float %254, 0.000000e+00 +> %256 = bitcast i32 %32 to float +> %257 = bitcast i32 %32 to float +> %258 = fmul float %256, %257 +> %259 = fadd float %255, %258 +> %260 = call float @llvm.sqrt.f32(float %259) +> %261 = fneg float %68 +> %262 = fmul float %260, %261 +> %263 = fmul float %262, 0.000000e+00 +> %264 = bitcast i32 %32 to float +> %265 = fadd float %264, %263 +> %266 = bitcast i32 %52 to float +727,745c732,750 +< %268 = bitcast i32 %52 to float +< %269 = fmul float %267, %268 +< %270 = fadd float %269, 0.000000e+00 +< %271 = bitcast i32 %57 to float +< %272 = bitcast i32 %57 to float +< %273 = fmul float %271, %272 +< %274 = fadd float %270, %273 +< %275 = call float @llvm.sqrt.f32(float %274) +< %276 = fneg float %69 +< %277 = fmul float %275, %276 +< %278 = fmul float %277, 0.000000e+00 +< %279 = bitcast i32 %57 to float +< %280 = fadd float %279, %278 +< %281 = fmul float %266, %280 +< %282 = fadd float %252, %281 +< %283 = call float @llvm.sqrt.f32(float %282) +< %284 = fadd float %283, 0.000000e+00 +< %285 = fdiv float %224, %284 +< %286 = fmul float %285, 2.000000e+00 +--- +> %268 = fmul float %266, %267 +> %269 = fadd float %268, 0.000000e+00 +> %270 = bitcast i32 %32 to float +> %271 = bitcast i32 %32 to float +> %272 = fmul float %270, %271 +> %273 = fadd float %269, %272 +> %274 = call float @llvm.sqrt.f32(float %273) +> %275 = fneg float %68 +> %276 = fmul float %274, %275 +> %277 = fmul float %276, 0.000000e+00 +> %278 = bitcast i32 %32 to float +> %279 = fadd float %278, %277 +> %280 = fmul float %265, %279 +> %281 = fadd float %251, %280 +> %282 = call float @llvm.sqrt.f32(float %281) +> %283 = fadd float %282, 0.000000e+00 +> %284 = fdiv float %223, %283 +> %285 = fmul float %284, 2.000000e+00 +> %286 = bitcast i32 %52 to float +747,759c752,764 +< %288 = bitcast i32 %52 to float +< %289 = fmul float %287, %288 +< %290 = fadd float %289, 0.000000e+00 +< %291 = bitcast i32 %57 to float +< %292 = bitcast i32 %57 to float +< %293 = fmul float %291, %292 +< %294 = fadd float %290, %293 +< %295 = call float @llvm.sqrt.f32(float %294) +< %296 = fneg float %69 +< %297 = fmul float %295, %296 +< %298 = fmul float %297, 0.000000e+00 +< %299 = bitcast i32 %57 to float +< %300 = fadd float %299, %298 +--- +> %288 = fmul float %286, %287 +> %289 = fadd float %288, 0.000000e+00 +> %290 = bitcast i32 %32 to float +> %291 = bitcast i32 %32 to float +> %292 = fmul float %290, %291 +> %293 = fadd float %289, %292 +> %294 = call float @llvm.sqrt.f32(float %293) +> %295 = fneg float %68 +> %296 = fmul float %294, %295 +> %297 = fmul float %296, 0.000000e+00 +> %298 = bitcast i32 %32 to float +> %299 = fadd float %298, %297 +> %300 = bitcast i32 %52 to float +761,772c766,777 +< %302 = bitcast i32 %52 to float +< %303 = fmul float %301, %302 +< %304 = fadd float %303, 0.000000e+00 +< %305 = bitcast i32 %57 to float +< %306 = bitcast i32 %57 to float +< %307 = fmul float %305, %306 +< %308 = fadd float %304, %307 +< %309 = call float @llvm.sqrt.f32(float %308) +< %310 = fneg float %69 +< %311 = fmul float %309, %310 +< %312 = bitcast i32 %52 to float +< %313 = fadd float %312, %311 +--- +> %302 = fmul float %300, %301 +> %303 = fadd float %302, 0.000000e+00 +> %304 = bitcast i32 %32 to float +> %305 = bitcast i32 %32 to float +> %306 = fmul float %304, %305 +> %307 = fadd float %303, %306 +> %308 = call float @llvm.sqrt.f32(float %307) +> %309 = fneg float %68 +> %310 = fmul float %308, %309 +> %311 = bitcast i32 %52 to float +> %312 = fadd float %311, %310 +> %313 = bitcast i32 %52 to float +774,787c779,792 +< %315 = bitcast i32 %52 to float +< %316 = fmul float %314, %315 +< %317 = fadd float %316, 0.000000e+00 +< %318 = bitcast i32 %57 to float +< %319 = bitcast i32 %57 to float +< %320 = fmul float %318, %319 +< %321 = fadd float %317, %320 +< %322 = call float @llvm.sqrt.f32(float %321) +< %323 = fneg float %69 +< %324 = fmul float %322, %323 +< %325 = bitcast i32 %52 to float +< %326 = fadd float %325, %324 +< %327 = fmul float %313, %326 +< %328 = fadd float %327, 0.000000e+00 +--- +> %315 = fmul float %313, %314 +> %316 = fadd float %315, 0.000000e+00 +> %317 = bitcast i32 %32 to float +> %318 = bitcast i32 %32 to float +> %319 = fmul float %317, %318 +> %320 = fadd float %316, %319 +> %321 = call float @llvm.sqrt.f32(float %320) +> %322 = fneg float %68 +> %323 = fmul float %321, %322 +> %324 = bitcast i32 %52 to float +> %325 = fadd float %324, %323 +> %326 = fmul float %312, %325 +> %327 = fadd float %326, 0.000000e+00 +> %328 = bitcast i32 %52 to float +789,801c794,806 +< %330 = bitcast i32 %52 to float +< %331 = fmul float %329, %330 +< %332 = fadd float %331, 0.000000e+00 +< %333 = bitcast i32 %57 to float +< %334 = bitcast i32 %57 to float +< %335 = fmul float %333, %334 +< %336 = fadd float %332, %335 +< %337 = call float @llvm.sqrt.f32(float %336) +< %338 = fneg float %69 +< %339 = fmul float %337, %338 +< %340 = fmul float %339, 0.000000e+00 +< %341 = bitcast i32 %57 to float +< %342 = fadd float %341, %340 +--- +> %330 = fmul float %328, %329 +> %331 = fadd float %330, 0.000000e+00 +> %332 = bitcast i32 %32 to float +> %333 = bitcast i32 %32 to float +> %334 = fmul float %332, %333 +> %335 = fadd float %331, %334 +> %336 = call float @llvm.sqrt.f32(float %335) +> %337 = fneg float %68 +> %338 = fmul float %336, %337 +> %339 = fmul float %338, 0.000000e+00 +> %340 = bitcast i32 %32 to float +> %341 = fadd float %340, %339 +> %342 = bitcast i32 %52 to float +803,822c808,827 +< %344 = bitcast i32 %52 to float +< %345 = fmul float %343, %344 +< %346 = fadd float %345, 0.000000e+00 +< %347 = bitcast i32 %57 to float +< %348 = bitcast i32 %57 to float +< %349 = fmul float %347, %348 +< %350 = fadd float %346, %349 +< %351 = call float @llvm.sqrt.f32(float %350) +< %352 = fneg float %69 +< %353 = fmul float %351, %352 +< %354 = fmul float %353, 0.000000e+00 +< %355 = bitcast i32 %57 to float +< %356 = fadd float %355, %354 +< %357 = fmul float %342, %356 +< %358 = fadd float %328, %357 +< %359 = call float @llvm.sqrt.f32(float %358) +< %360 = fadd float %359, 0.000000e+00 +< %361 = fdiv float %300, %360 +< %362 = fmul float %286, %361 +< %363 = insertelement <4 x float> %211, float %362, i32 2 +--- +> %344 = fmul float %342, %343 +> %345 = fadd float %344, 0.000000e+00 +> %346 = bitcast i32 %32 to float +> %347 = bitcast i32 %32 to float +> %348 = fmul float %346, %347 +> %349 = fadd float %345, %348 +> %350 = call float @llvm.sqrt.f32(float %349) +> %351 = fneg float %68 +> %352 = fmul float %350, %351 +> %353 = fmul float %352, 0.000000e+00 +> %354 = bitcast i32 %32 to float +> %355 = fadd float %354, %353 +> %356 = fmul float %341, %355 +> %357 = fadd float %327, %356 +> %358 = call float @llvm.sqrt.f32(float %357) +> %359 = fadd float %358, 0.000000e+00 +> %360 = fdiv float %299, %359 +> %361 = fmul float %285, %360 +> %362 = insertelement <4 x float> %210, float %361, i32 2 +> %363 = bitcast i32 %52 to float +824,836c829,841 +< %365 = bitcast i32 %52 to float +< %366 = fmul float %364, %365 +< %367 = fadd float %366, 0.000000e+00 +< %368 = bitcast i32 %57 to float +< %369 = bitcast i32 %57 to float +< %370 = fmul float %368, %369 +< %371 = fadd float %367, %370 +< %372 = call float @llvm.sqrt.f32(float %371) +< %373 = fneg float %69 +< %374 = fmul float %372, %373 +< %375 = fmul float %374, 0.000000e+00 +< %376 = bitcast i32 %57 to float +< %377 = fadd float %376, %375 +--- +> %365 = fmul float %363, %364 +> %366 = fadd float %365, 0.000000e+00 +> %367 = bitcast i32 %32 to float +> %368 = bitcast i32 %32 to float +> %369 = fmul float %367, %368 +> %370 = fadd float %366, %369 +> %371 = call float @llvm.sqrt.f32(float %370) +> %372 = fneg float %68 +> %373 = fmul float %371, %372 +> %374 = fmul float %373, 0.000000e+00 +> %375 = bitcast i32 %32 to float +> %376 = fadd float %375, %374 +> %377 = bitcast i32 %52 to float +838,849c843,854 +< %379 = bitcast i32 %52 to float +< %380 = fmul float %378, %379 +< %381 = fadd float %380, 0.000000e+00 +< %382 = bitcast i32 %57 to float +< %383 = bitcast i32 %57 to float +< %384 = fmul float %382, %383 +< %385 = fadd float %381, %384 +< %386 = call float @llvm.sqrt.f32(float %385) +< %387 = fneg float %69 +< %388 = fmul float %386, %387 +< %389 = bitcast i32 %52 to float +< %390 = fadd float %389, %388 +--- +> %379 = fmul float %377, %378 +> %380 = fadd float %379, 0.000000e+00 +> %381 = bitcast i32 %32 to float +> %382 = bitcast i32 %32 to float +> %383 = fmul float %381, %382 +> %384 = fadd float %380, %383 +> %385 = call float @llvm.sqrt.f32(float %384) +> %386 = fneg float %68 +> %387 = fmul float %385, %386 +> %388 = bitcast i32 %52 to float +> %389 = fadd float %388, %387 +> %390 = bitcast i32 %52 to float +851,864c856,869 +< %392 = bitcast i32 %52 to float +< %393 = fmul float %391, %392 +< %394 = fadd float %393, 0.000000e+00 +< %395 = bitcast i32 %57 to float +< %396 = bitcast i32 %57 to float +< %397 = fmul float %395, %396 +< %398 = fadd float %394, %397 +< %399 = call float @llvm.sqrt.f32(float %398) +< %400 = fneg float %69 +< %401 = fmul float %399, %400 +< %402 = bitcast i32 %52 to float +< %403 = fadd float %402, %401 +< %404 = fmul float %390, %403 +< %405 = fadd float %404, 0.000000e+00 +--- +> %392 = fmul float %390, %391 +> %393 = fadd float %392, 0.000000e+00 +> %394 = bitcast i32 %32 to float +> %395 = bitcast i32 %32 to float +> %396 = fmul float %394, %395 +> %397 = fadd float %393, %396 +> %398 = call float @llvm.sqrt.f32(float %397) +> %399 = fneg float %68 +> %400 = fmul float %398, %399 +> %401 = bitcast i32 %52 to float +> %402 = fadd float %401, %400 +> %403 = fmul float %389, %402 +> %404 = fadd float %403, 0.000000e+00 +> %405 = bitcast i32 %52 to float +866,878c871,883 +< %407 = bitcast i32 %52 to float +< %408 = fmul float %406, %407 +< %409 = fadd float %408, 0.000000e+00 +< %410 = bitcast i32 %57 to float +< %411 = bitcast i32 %57 to float +< %412 = fmul float %410, %411 +< %413 = fadd float %409, %412 +< %414 = call float @llvm.sqrt.f32(float %413) +< %415 = fneg float %69 +< %416 = fmul float %414, %415 +< %417 = fmul float %416, 0.000000e+00 +< %418 = bitcast i32 %57 to float +< %419 = fadd float %418, %417 +--- +> %407 = fmul float %405, %406 +> %408 = fadd float %407, 0.000000e+00 +> %409 = bitcast i32 %32 to float +> %410 = bitcast i32 %32 to float +> %411 = fmul float %409, %410 +> %412 = fadd float %408, %411 +> %413 = call float @llvm.sqrt.f32(float %412) +> %414 = fneg float %68 +> %415 = fmul float %413, %414 +> %416 = fmul float %415, 0.000000e+00 +> %417 = bitcast i32 %32 to float +> %418 = fadd float %417, %416 +> %419 = bitcast i32 %52 to float +880,898c885,903 +< %421 = bitcast i32 %52 to float +< %422 = fmul float %420, %421 +< %423 = fadd float %422, 0.000000e+00 +< %424 = bitcast i32 %57 to float +< %425 = bitcast i32 %57 to float +< %426 = fmul float %424, %425 +< %427 = fadd float %423, %426 +< %428 = call float @llvm.sqrt.f32(float %427) +< %429 = fneg float %69 +< %430 = fmul float %428, %429 +< %431 = fmul float %430, 0.000000e+00 +< %432 = bitcast i32 %57 to float +< %433 = fadd float %432, %431 +< %434 = fmul float %419, %433 +< %435 = fadd float %405, %434 +< %436 = call float @llvm.sqrt.f32(float %435) +< %437 = fadd float %436, 0.000000e+00 +< %438 = fdiv float %377, %437 +< %439 = fmul float %438, 2.000000e+00 +--- +> %421 = fmul float %419, %420 +> %422 = fadd float %421, 0.000000e+00 +> %423 = bitcast i32 %32 to float +> %424 = bitcast i32 %32 to float +> %425 = fmul float %423, %424 +> %426 = fadd float %422, %425 +> %427 = call float @llvm.sqrt.f32(float %426) +> %428 = fneg float %68 +> %429 = fmul float %427, %428 +> %430 = fmul float %429, 0.000000e+00 +> %431 = bitcast i32 %32 to float +> %432 = fadd float %431, %430 +> %433 = fmul float %418, %432 +> %434 = fadd float %404, %433 +> %435 = call float @llvm.sqrt.f32(float %434) +> %436 = fadd float %435, 0.000000e+00 +> %437 = fdiv float %376, %436 +> %438 = fmul float %437, 2.000000e+00 +> %439 = bitcast i32 %52 to float +900,911c905,916 +< %441 = bitcast i32 %52 to float +< %442 = fmul float %440, %441 +< %443 = fadd float %442, 0.000000e+00 +< %444 = bitcast i32 %57 to float +< %445 = bitcast i32 %57 to float +< %446 = fmul float %444, %445 +< %447 = fadd float %443, %446 +< %448 = call float @llvm.sqrt.f32(float %447) +< %449 = fneg float %69 +< %450 = fmul float %448, %449 +< %451 = bitcast i32 %52 to float +< %452 = fadd float %451, %450 +--- +> %441 = fmul float %439, %440 +> %442 = fadd float %441, 0.000000e+00 +> %443 = bitcast i32 %32 to float +> %444 = bitcast i32 %32 to float +> %445 = fmul float %443, %444 +> %446 = fadd float %442, %445 +> %447 = call float @llvm.sqrt.f32(float %446) +> %448 = fneg float %68 +> %449 = fmul float %447, %448 +> %450 = bitcast i32 %52 to float +> %451 = fadd float %450, %449 +> %452 = bitcast i32 %52 to float +913,924c918,929 +< %454 = bitcast i32 %52 to float +< %455 = fmul float %453, %454 +< %456 = fadd float %455, 0.000000e+00 +< %457 = bitcast i32 %57 to float +< %458 = bitcast i32 %57 to float +< %459 = fmul float %457, %458 +< %460 = fadd float %456, %459 +< %461 = call float @llvm.sqrt.f32(float %460) +< %462 = fneg float %69 +< %463 = fmul float %461, %462 +< %464 = bitcast i32 %52 to float +< %465 = fadd float %464, %463 +--- +> %454 = fmul float %452, %453 +> %455 = fadd float %454, 0.000000e+00 +> %456 = bitcast i32 %32 to float +> %457 = bitcast i32 %32 to float +> %458 = fmul float %456, %457 +> %459 = fadd float %455, %458 +> %460 = call float @llvm.sqrt.f32(float %459) +> %461 = fneg float %68 +> %462 = fmul float %460, %461 +> %463 = bitcast i32 %52 to float +> %464 = fadd float %463, %462 +> %465 = bitcast i32 %52 to float +926,939c931,944 +< %467 = bitcast i32 %52 to float +< %468 = fmul float %466, %467 +< %469 = fadd float %468, 0.000000e+00 +< %470 = bitcast i32 %57 to float +< %471 = bitcast i32 %57 to float +< %472 = fmul float %470, %471 +< %473 = fadd float %469, %472 +< %474 = call float @llvm.sqrt.f32(float %473) +< %475 = fneg float %69 +< %476 = fmul float %474, %475 +< %477 = bitcast i32 %52 to float +< %478 = fadd float %477, %476 +< %479 = fmul float %465, %478 +< %480 = fadd float %479, 0.000000e+00 +--- +> %467 = fmul float %465, %466 +> %468 = fadd float %467, 0.000000e+00 +> %469 = bitcast i32 %32 to float +> %470 = bitcast i32 %32 to float +> %471 = fmul float %469, %470 +> %472 = fadd float %468, %471 +> %473 = call float @llvm.sqrt.f32(float %472) +> %474 = fneg float %68 +> %475 = fmul float %473, %474 +> %476 = bitcast i32 %52 to float +> %477 = fadd float %476, %475 +> %478 = fmul float %464, %477 +> %479 = fadd float %478, 0.000000e+00 +> %480 = bitcast i32 %52 to float +941,953c946,958 +< %482 = bitcast i32 %52 to float +< %483 = fmul float %481, %482 +< %484 = fadd float %483, 0.000000e+00 +< %485 = bitcast i32 %57 to float +< %486 = bitcast i32 %57 to float +< %487 = fmul float %485, %486 +< %488 = fadd float %484, %487 +< %489 = call float @llvm.sqrt.f32(float %488) +< %490 = fneg float %69 +< %491 = fmul float %489, %490 +< %492 = fmul float %491, 0.000000e+00 +< %493 = bitcast i32 %57 to float +< %494 = fadd float %493, %492 +--- +> %482 = fmul float %480, %481 +> %483 = fadd float %482, 0.000000e+00 +> %484 = bitcast i32 %32 to float +> %485 = bitcast i32 %32 to float +> %486 = fmul float %484, %485 +> %487 = fadd float %483, %486 +> %488 = call float @llvm.sqrt.f32(float %487) +> %489 = fneg float %68 +> %490 = fmul float %488, %489 +> %491 = fmul float %490, 0.000000e+00 +> %492 = bitcast i32 %32 to float +> %493 = fadd float %492, %491 +> %494 = bitcast i32 %52 to float +955,975c960,980 +< %496 = bitcast i32 %52 to float +< %497 = fmul float %495, %496 +< %498 = fadd float %497, 0.000000e+00 +< %499 = bitcast i32 %57 to float +< %500 = bitcast i32 %57 to float +< %501 = fmul float %499, %500 +< %502 = fadd float %498, %501 +< %503 = call float @llvm.sqrt.f32(float %502) +< %504 = fneg float %69 +< %505 = fmul float %503, %504 +< %506 = fmul float %505, 0.000000e+00 +< %507 = bitcast i32 %57 to float +< %508 = fadd float %507, %506 +< %509 = fmul float %494, %508 +< %510 = fadd float %480, %509 +< %511 = call float @llvm.sqrt.f32(float %510) +< %512 = fadd float %511, 0.000000e+00 +< %513 = fdiv float %452, %512 +< %514 = fmul float %439, %513 +< %515 = insertelement <4 x float> %363, float %514, i32 3 +< %516 = fsub <4 x float> , %515 +--- +> %496 = fmul float %494, %495 +> %497 = fadd float %496, 0.000000e+00 +> %498 = bitcast i32 %32 to float +> %499 = bitcast i32 %32 to float +> %500 = fmul float %498, %499 +> %501 = fadd float %497, %500 +> %502 = call float @llvm.sqrt.f32(float %501) +> %503 = fneg float %68 +> %504 = fmul float %502, %503 +> %505 = fmul float %504, 0.000000e+00 +> %506 = bitcast i32 %32 to float +> %507 = fadd float %506, %505 +> %508 = fmul float %493, %507 +> %509 = fadd float %479, %508 +> %510 = call float @llvm.sqrt.f32(float %509) +> %511 = fadd float %510, 0.000000e+00 +> %512 = fdiv float %451, %511 +> %513 = fmul float %438, %512 +> %514 = insertelement <4 x float> %362, float %513, i32 3 +> %515 = fsub <4 x float> , %514 +> %516 = bitcast i32 %52 to float +977,989c982,994 +< %518 = bitcast i32 %52 to float +< %519 = fmul float %517, %518 +< %520 = fadd float %519, 0.000000e+00 +< %521 = bitcast i32 %57 to float +< %522 = bitcast i32 %57 to float +< %523 = fmul float %521, %522 +< %524 = fadd float %520, %523 +< %525 = call float @llvm.sqrt.f32(float %524) +< %526 = fneg float %69 +< %527 = fmul float %525, %526 +< %528 = fmul float %527, 0.000000e+00 +< %529 = bitcast i32 %57 to float +< %530 = fadd float %529, %528 +--- +> %518 = fmul float %516, %517 +> %519 = fadd float %518, 0.000000e+00 +> %520 = bitcast i32 %32 to float +> %521 = bitcast i32 %32 to float +> %522 = fmul float %520, %521 +> %523 = fadd float %519, %522 +> %524 = call float @llvm.sqrt.f32(float %523) +> %525 = fneg float %68 +> %526 = fmul float %524, %525 +> %527 = fmul float %526, 0.000000e+00 +> %528 = bitcast i32 %32 to float +> %529 = fadd float %528, %527 +> %530 = bitcast i32 %52 to float +991,1002c996,1007 +< %532 = bitcast i32 %52 to float +< %533 = fmul float %531, %532 +< %534 = fadd float %533, 0.000000e+00 +< %535 = bitcast i32 %57 to float +< %536 = bitcast i32 %57 to float +< %537 = fmul float %535, %536 +< %538 = fadd float %534, %537 +< %539 = call float @llvm.sqrt.f32(float %538) +< %540 = fneg float %69 +< %541 = fmul float %539, %540 +< %542 = bitcast i32 %52 to float +< %543 = fadd float %542, %541 +--- +> %532 = fmul float %530, %531 +> %533 = fadd float %532, 0.000000e+00 +> %534 = bitcast i32 %32 to float +> %535 = bitcast i32 %32 to float +> %536 = fmul float %534, %535 +> %537 = fadd float %533, %536 +> %538 = call float @llvm.sqrt.f32(float %537) +> %539 = fneg float %68 +> %540 = fmul float %538, %539 +> %541 = bitcast i32 %52 to float +> %542 = fadd float %541, %540 +> %543 = bitcast i32 %52 to float +1004,1017c1009,1022 +< %545 = bitcast i32 %52 to float +< %546 = fmul float %544, %545 +< %547 = fadd float %546, 0.000000e+00 +< %548 = bitcast i32 %57 to float +< %549 = bitcast i32 %57 to float +< %550 = fmul float %548, %549 +< %551 = fadd float %547, %550 +< %552 = call float @llvm.sqrt.f32(float %551) +< %553 = fneg float %69 +< %554 = fmul float %552, %553 +< %555 = bitcast i32 %52 to float +< %556 = fadd float %555, %554 +< %557 = fmul float %543, %556 +< %558 = fadd float %557, 0.000000e+00 +--- +> %545 = fmul float %543, %544 +> %546 = fadd float %545, 0.000000e+00 +> %547 = bitcast i32 %32 to float +> %548 = bitcast i32 %32 to float +> %549 = fmul float %547, %548 +> %550 = fadd float %546, %549 +> %551 = call float @llvm.sqrt.f32(float %550) +> %552 = fneg float %68 +> %553 = fmul float %551, %552 +> %554 = bitcast i32 %52 to float +> %555 = fadd float %554, %553 +> %556 = fmul float %542, %555 +> %557 = fadd float %556, 0.000000e+00 +> %558 = bitcast i32 %52 to float +1019,1031c1024,1036 +< %560 = bitcast i32 %52 to float +< %561 = fmul float %559, %560 +< %562 = fadd float %561, 0.000000e+00 +< %563 = bitcast i32 %57 to float +< %564 = bitcast i32 %57 to float +< %565 = fmul float %563, %564 +< %566 = fadd float %562, %565 +< %567 = call float @llvm.sqrt.f32(float %566) +< %568 = fneg float %69 +< %569 = fmul float %567, %568 +< %570 = fmul float %569, 0.000000e+00 +< %571 = bitcast i32 %57 to float +< %572 = fadd float %571, %570 +--- +> %560 = fmul float %558, %559 +> %561 = fadd float %560, 0.000000e+00 +> %562 = bitcast i32 %32 to float +> %563 = bitcast i32 %32 to float +> %564 = fmul float %562, %563 +> %565 = fadd float %561, %564 +> %566 = call float @llvm.sqrt.f32(float %565) +> %567 = fneg float %68 +> %568 = fmul float %566, %567 +> %569 = fmul float %568, 0.000000e+00 +> %570 = bitcast i32 %32 to float +> %571 = fadd float %570, %569 +> %572 = bitcast i32 %52 to float +1033,1051c1038,1056 +< %574 = bitcast i32 %52 to float +< %575 = fmul float %573, %574 +< %576 = fadd float %575, 0.000000e+00 +< %577 = bitcast i32 %57 to float +< %578 = bitcast i32 %57 to float +< %579 = fmul float %577, %578 +< %580 = fadd float %576, %579 +< %581 = call float @llvm.sqrt.f32(float %580) +< %582 = fneg float %69 +< %583 = fmul float %581, %582 +< %584 = fmul float %583, 0.000000e+00 +< %585 = bitcast i32 %57 to float +< %586 = fadd float %585, %584 +< %587 = fmul float %572, %586 +< %588 = fadd float %558, %587 +< %589 = call float @llvm.sqrt.f32(float %588) +< %590 = fadd float %589, 0.000000e+00 +< %591 = fdiv float %530, %590 +< %592 = fmul float %591, 2.000000e+00 +--- +> %574 = fmul float %572, %573 +> %575 = fadd float %574, 0.000000e+00 +> %576 = bitcast i32 %32 to float +> %577 = bitcast i32 %32 to float +> %578 = fmul float %576, %577 +> %579 = fadd float %575, %578 +> %580 = call float @llvm.sqrt.f32(float %579) +> %581 = fneg float %68 +> %582 = fmul float %580, %581 +> %583 = fmul float %582, 0.000000e+00 +> %584 = bitcast i32 %32 to float +> %585 = fadd float %584, %583 +> %586 = fmul float %571, %585 +> %587 = fadd float %557, %586 +> %588 = call float @llvm.sqrt.f32(float %587) +> %589 = fadd float %588, 0.000000e+00 +> %590 = fdiv float %529, %589 +> %591 = fmul float %590, 2.000000e+00 +> %592 = bitcast i32 %52 to float +1053,1065c1058,1070 +< %594 = bitcast i32 %52 to float +< %595 = fmul float %593, %594 +< %596 = fadd float %595, 0.000000e+00 +< %597 = bitcast i32 %57 to float +< %598 = bitcast i32 %57 to float +< %599 = fmul float %597, %598 +< %600 = fadd float %596, %599 +< %601 = call float @llvm.sqrt.f32(float %600) +< %602 = fneg float %69 +< %603 = fmul float %601, %602 +< %604 = fmul float %603, 0.000000e+00 +< %605 = bitcast i32 %57 to float +< %606 = fadd float %605, %604 +--- +> %594 = fmul float %592, %593 +> %595 = fadd float %594, 0.000000e+00 +> %596 = bitcast i32 %32 to float +> %597 = bitcast i32 %32 to float +> %598 = fmul float %596, %597 +> %599 = fadd float %595, %598 +> %600 = call float @llvm.sqrt.f32(float %599) +> %601 = fneg float %68 +> %602 = fmul float %600, %601 +> %603 = fmul float %602, 0.000000e+00 +> %604 = bitcast i32 %32 to float +> %605 = fadd float %604, %603 +> %606 = bitcast i32 %52 to float +1067,1078c1072,1083 +< %608 = bitcast i32 %52 to float +< %609 = fmul float %607, %608 +< %610 = fadd float %609, 0.000000e+00 +< %611 = bitcast i32 %57 to float +< %612 = bitcast i32 %57 to float +< %613 = fmul float %611, %612 +< %614 = fadd float %610, %613 +< %615 = call float @llvm.sqrt.f32(float %614) +< %616 = fneg float %69 +< %617 = fmul float %615, %616 +< %618 = bitcast i32 %52 to float +< %619 = fadd float %618, %617 +--- +> %608 = fmul float %606, %607 +> %609 = fadd float %608, 0.000000e+00 +> %610 = bitcast i32 %32 to float +> %611 = bitcast i32 %32 to float +> %612 = fmul float %610, %611 +> %613 = fadd float %609, %612 +> %614 = call float @llvm.sqrt.f32(float %613) +> %615 = fneg float %68 +> %616 = fmul float %614, %615 +> %617 = bitcast i32 %52 to float +> %618 = fadd float %617, %616 +> %619 = bitcast i32 %52 to float +1080,1093c1085,1098 +< %621 = bitcast i32 %52 to float +< %622 = fmul float %620, %621 +< %623 = fadd float %622, 0.000000e+00 +< %624 = bitcast i32 %57 to float +< %625 = bitcast i32 %57 to float +< %626 = fmul float %624, %625 +< %627 = fadd float %623, %626 +< %628 = call float @llvm.sqrt.f32(float %627) +< %629 = fneg float %69 +< %630 = fmul float %628, %629 +< %631 = bitcast i32 %52 to float +< %632 = fadd float %631, %630 +< %633 = fmul float %619, %632 +< %634 = fadd float %633, 0.000000e+00 +--- +> %621 = fmul float %619, %620 +> %622 = fadd float %621, 0.000000e+00 +> %623 = bitcast i32 %32 to float +> %624 = bitcast i32 %32 to float +> %625 = fmul float %623, %624 +> %626 = fadd float %622, %625 +> %627 = call float @llvm.sqrt.f32(float %626) +> %628 = fneg float %68 +> %629 = fmul float %627, %628 +> %630 = bitcast i32 %52 to float +> %631 = fadd float %630, %629 +> %632 = fmul float %618, %631 +> %633 = fadd float %632, 0.000000e+00 +> %634 = bitcast i32 %52 to float +1095,1107c1100,1112 +< %636 = bitcast i32 %52 to float +< %637 = fmul float %635, %636 +< %638 = fadd float %637, 0.000000e+00 +< %639 = bitcast i32 %57 to float +< %640 = bitcast i32 %57 to float +< %641 = fmul float %639, %640 +< %642 = fadd float %638, %641 +< %643 = call float @llvm.sqrt.f32(float %642) +< %644 = fneg float %69 +< %645 = fmul float %643, %644 +< %646 = fmul float %645, 0.000000e+00 +< %647 = bitcast i32 %57 to float +< %648 = fadd float %647, %646 +--- +> %636 = fmul float %634, %635 +> %637 = fadd float %636, 0.000000e+00 +> %638 = bitcast i32 %32 to float +> %639 = bitcast i32 %32 to float +> %640 = fmul float %638, %639 +> %641 = fadd float %637, %640 +> %642 = call float @llvm.sqrt.f32(float %641) +> %643 = fneg float %68 +> %644 = fmul float %642, %643 +> %645 = fmul float %644, 0.000000e+00 +> %646 = bitcast i32 %32 to float +> %647 = fadd float %646, %645 +> %648 = bitcast i32 %52 to float +1109,1162c1114,1147 +< %650 = bitcast i32 %52 to float +< %651 = fmul float %649, %650 +< %652 = fadd float %651, 0.000000e+00 +< %653 = bitcast i32 %57 to float +< %654 = bitcast i32 %57 to float +< %655 = fmul float %653, %654 +< %656 = fadd float %652, %655 +< %657 = call float @llvm.sqrt.f32(float %656) +< %658 = fneg float %69 +< %659 = fmul float %657, %658 +< %660 = fmul float %659, 0.000000e+00 +< %661 = bitcast i32 %57 to float +< %662 = fadd float %661, %660 +< %663 = fmul float %648, %662 +< %664 = fadd float %634, %663 +< %665 = call float @llvm.sqrt.f32(float %664) +< %666 = fadd float %665, 0.000000e+00 +< %667 = fdiv float %606, %666 +< %668 = fmul float %592, %667 +< %669 = fsub float 1.000000e+00, %668 +< %670 = insertelement <4 x float> zeroinitializer, float %669, i32 0 +< %671 = insertelement <4 x float> %670, float 0.000000e+00, i32 1 +< %672 = insertelement <4 x float> %671, float 0.000000e+00, i32 2 +< %673 = insertelement <4 x float> %672, float 0.000000e+00, i32 3 +< %674 = shufflevector <4 x float> %516, <4 x float> %673, <8 x i32> +< %675 = extractelement <8 x float> %674, i32 0 +< %676 = getelementptr float, float* %2, i32 0 +< %677 = getelementptr inbounds float, float* %676, i64 3 +< %678 = bitcast float* %677 to i32* +< %679 = bitcast i32* %678 to float* +< store float %675, float* %679, align 4 +< %680 = extractelement <8 x float> %674, i32 1 +< %681 = alloca [4 x float], align 16 +< %682 = bitcast [4 x float]* %681 to i32* +< %683 = bitcast i32* %682 to float* +< store float %680, float* %683, align 4 +< %684 = extractelement <8 x float> %674, i32 2 +< %685 = getelementptr inbounds [4 x float], [4 x float]* %681, i64 0, i64 1 +< %686 = bitcast float* %685 to i32* +< %687 = bitcast i32* %686 to float* +< store float %684, float* %687, align 4 +< %688 = extractelement <8 x float> %674, i32 3 +< %689 = getelementptr inbounds [4 x float], [4 x float]* %681, i64 0, i64 2 +< %690 = bitcast float* %689 to i32* +< %691 = bitcast i32* %690 to float* +< store float %688, float* %691, align 4 +< %692 = extractelement <8 x float> %674, i32 4 +< %693 = getelementptr inbounds [4 x float], [4 x float]* %681, i64 0, i64 3 +< %694 = bitcast float* %693 to i32* +< %695 = bitcast i32* %694 to float* +< store float %692, float* %695, align 4 +< %696 = bitcast float* %1 to i8* +< %697 = bitcast [4 x float]* %681 to i8* +< call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %696, i8* nonnull align 16 dereferenceable(16) %697, i64 16, i1 false) +--- +> %650 = fmul float %648, %649 +> %651 = fadd float %650, 0.000000e+00 +> %652 = bitcast i32 %32 to float +> %653 = bitcast i32 %32 to float +> %654 = fmul float %652, %653 +> %655 = fadd float %651, %654 +> %656 = call float @llvm.sqrt.f32(float %655) +> %657 = fneg float %68 +> %658 = fmul float %656, %657 +> %659 = fmul float %658, 0.000000e+00 +> %660 = bitcast i32 %32 to float +> %661 = fadd float %660, %659 +> %662 = fmul float %647, %661 +> %663 = fadd float %633, %662 +> %664 = call float @llvm.sqrt.f32(float %663) +> %665 = fadd float %664, 0.000000e+00 +> %666 = fdiv float %605, %665 +> %667 = fmul float %591, %666 +> %668 = fsub float 1.000000e+00, %667 +> %669 = insertelement <4 x float> zeroinitializer, float %668, i32 0 +> %670 = insertelement <4 x float> %669, float 0.000000e+00, i32 1 +> %671 = insertelement <4 x float> %670, float 0.000000e+00, i32 2 +> %672 = insertelement <4 x float> %671, float 0.000000e+00, i32 3 +> %673 = shufflevector <4 x float> %515, <4 x float> %672, <8 x i32> +> %674 = extractelement <8 x float> %673, i32 0 +> %675 = getelementptr float, float* %2, i32 0 +> %676 = getelementptr inbounds float, float* %675, i64 3 +> %677 = bitcast float* %676 to i32* +> %678 = bitcast i32* %677 to float* +> store float %674, float* %678, align 4 +> %679 = bitcast float* %1 to i8* +> %680 = alloca [4 x float], align 16 +> %681 = bitcast [4 x float]* %680 to i8* +> call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %679, i8* nonnull align 16 dereferenceable(16) %681, i64 16, i1 false) +1164,1168c1149,1169 +< %698 = getelementptr float, float* %0, i32 0 +< %699 = bitcast float* %698 to i32* +< %700 = load i32, i32* %699, align 4 +< %701 = bitcast i32 %700 to float +< %702 = bitcast i32 %700 to float +--- +> %682 = bitcast i32 %52 to float +> %683 = bitcast i32 %52 to float +> %684 = fmul float %682, %683 +> %685 = fadd float %684, 0.000000e+00 +> %686 = load i32, i32* %31, align 4 +> %687 = bitcast i32 %686 to float +> %688 = bitcast i32 %686 to float +> %689 = fmul float %687, %688 +> %690 = fadd float %685, %689 +> %691 = call float @llvm.sqrt.f32(float %690) +> %692 = sitofp i32 %67 to float +> %693 = fneg float %692 +> %694 = fmul float %691, %693 +> %695 = bitcast i32 %52 to float +> %696 = fadd float %695, %694 +> %697 = bitcast i32 %52 to float +> %698 = bitcast i32 %52 to float +> %699 = fmul float %697, %698 +> %700 = fadd float %699, 0.000000e+00 +> %701 = bitcast i32 %686 to float +> %702 = bitcast i32 %686 to float +1170,1172c1171,1173 +< %704 = fadd float %703, 0.000000e+00 +< %705 = bitcast i32 %57 to float +< %706 = bitcast i32 %57 to float +--- +> %704 = fadd float %700, %703 +> %705 = call float @llvm.sqrt.f32(float %704) +> %706 = fneg float %692 +1174,1207c1175,1208 +< %708 = fadd float %704, %707 +< %709 = call float @llvm.sqrt.f32(float %708) +< %710 = sitofp i32 %68 to float +< %711 = fneg float %710 +< %712 = fmul float %709, %711 +< %713 = bitcast i32 %700 to float +< %714 = fadd float %713, %712 +< %715 = bitcast i32 %700 to float +< %716 = bitcast i32 %700 to float +< %717 = fmul float %715, %716 +< %718 = fadd float %717, 0.000000e+00 +< %719 = bitcast i32 %57 to float +< %720 = bitcast i32 %57 to float +< %721 = fmul float %719, %720 +< %722 = fadd float %718, %721 +< %723 = call float @llvm.sqrt.f32(float %722) +< %724 = fneg float %710 +< %725 = fmul float %723, %724 +< %726 = bitcast i32 %700 to float +< %727 = fadd float %726, %725 +< %728 = bitcast i32 %700 to float +< %729 = bitcast i32 %700 to float +< %730 = fmul float %728, %729 +< %731 = fadd float %730, 0.000000e+00 +< %732 = bitcast i32 %57 to float +< %733 = bitcast i32 %57 to float +< %734 = fmul float %732, %733 +< %735 = fadd float %731, %734 +< %736 = call float @llvm.sqrt.f32(float %735) +< %737 = fneg float %710 +< %738 = fmul float %736, %737 +< %739 = bitcast i32 %700 to float +< %740 = fadd float %739, %738 +< %741 = fmul float %727, %740 +--- +> %708 = bitcast i32 %52 to float +> %709 = fadd float %708, %707 +> %710 = bitcast i32 %52 to float +> %711 = bitcast i32 %52 to float +> %712 = fmul float %710, %711 +> %713 = fadd float %712, 0.000000e+00 +> %714 = bitcast i32 %686 to float +> %715 = bitcast i32 %686 to float +> %716 = fmul float %714, %715 +> %717 = fadd float %713, %716 +> %718 = call float @llvm.sqrt.f32(float %717) +> %719 = fneg float %692 +> %720 = fmul float %718, %719 +> %721 = bitcast i32 %52 to float +> %722 = fadd float %721, %720 +> %723 = fmul float %709, %722 +> %724 = fadd float %723, 0.000000e+00 +> %725 = bitcast i32 %52 to float +> %726 = bitcast i32 %52 to float +> %727 = fmul float %725, %726 +> %728 = fadd float %727, 0.000000e+00 +> %729 = bitcast i32 %686 to float +> %730 = bitcast i32 %686 to float +> %731 = fmul float %729, %730 +> %732 = fadd float %728, %731 +> %733 = call float @llvm.sqrt.f32(float %732) +> %734 = fneg float %692 +> %735 = fmul float %733, %734 +> %736 = fmul float %735, 0.000000e+00 +> %737 = bitcast i32 %686 to float +> %738 = fadd float %737, %736 +> %739 = bitcast i32 %52 to float +> %740 = bitcast i32 %52 to float +> %741 = fmul float %739, %740 +1209,1210c1210,1211 +< %743 = bitcast i32 %700 to float +< %744 = bitcast i32 %700 to float +--- +> %743 = bitcast i32 %686 to float +> %744 = bitcast i32 %686 to float +1212,1214c1213,1215 +< %746 = fadd float %745, 0.000000e+00 +< %747 = bitcast i32 %57 to float +< %748 = bitcast i32 %57 to float +--- +> %746 = fadd float %742, %745 +> %747 = call float @llvm.sqrt.f32(float %746) +> %748 = fneg float %692 +1216,1252c1217,1253 +< %750 = fadd float %746, %749 +< %751 = call float @llvm.sqrt.f32(float %750) +< %752 = fneg float %710 +< %753 = fmul float %751, %752 +< %754 = fmul float %753, 0.000000e+00 +< %755 = bitcast i32 %57 to float +< %756 = fadd float %755, %754 +< %757 = bitcast i32 %700 to float +< %758 = bitcast i32 %700 to float +< %759 = fmul float %757, %758 +< %760 = fadd float %759, 0.000000e+00 +< %761 = bitcast i32 %57 to float +< %762 = bitcast i32 %57 to float +< %763 = fmul float %761, %762 +< %764 = fadd float %760, %763 +< %765 = call float @llvm.sqrt.f32(float %764) +< %766 = fneg float %710 +< %767 = fmul float %765, %766 +< %768 = fmul float %767, 0.000000e+00 +< %769 = bitcast i32 %57 to float +< %770 = fadd float %769, %768 +< %771 = fmul float %756, %770 +< %772 = fadd float %742, %771 +< %773 = call float @llvm.sqrt.f32(float %772) +< %774 = fadd float %773, 0.000000e+00 +< %775 = fdiv float %714, %774 +< %776 = fmul float %775, 2.000000e+00 +< %777 = bitcast i32 %700 to float +< %778 = bitcast i32 %700 to float +< %779 = fmul float %777, %778 +< %780 = fadd float %779, 0.000000e+00 +< %781 = bitcast i32 %57 to float +< %782 = bitcast i32 %57 to float +< %783 = fmul float %781, %782 +< %784 = fadd float %780, %783 +< %785 = call float @llvm.sqrt.f32(float %784) +< %786 = fneg float %710 +--- +> %750 = fmul float %749, 0.000000e+00 +> %751 = bitcast i32 %686 to float +> %752 = fadd float %751, %750 +> %753 = fmul float %738, %752 +> %754 = fadd float %724, %753 +> %755 = call float @llvm.sqrt.f32(float %754) +> %756 = fadd float %755, 0.000000e+00 +> %757 = fdiv float %696, %756 +> %758 = fmul float %757, 2.000000e+00 +> %759 = bitcast i32 %52 to float +> %760 = bitcast i32 %52 to float +> %761 = fmul float %759, %760 +> %762 = fadd float %761, 0.000000e+00 +> %763 = bitcast i32 %686 to float +> %764 = bitcast i32 %686 to float +> %765 = fmul float %763, %764 +> %766 = fadd float %762, %765 +> %767 = call float @llvm.sqrt.f32(float %766) +> %768 = fneg float %692 +> %769 = fmul float %767, %768 +> %770 = bitcast i32 %52 to float +> %771 = fadd float %770, %769 +> %772 = bitcast i32 %52 to float +> %773 = bitcast i32 %52 to float +> %774 = fmul float %772, %773 +> %775 = fadd float %774, 0.000000e+00 +> %776 = bitcast i32 %686 to float +> %777 = bitcast i32 %686 to float +> %778 = fmul float %776, %777 +> %779 = fadd float %775, %778 +> %780 = call float @llvm.sqrt.f32(float %779) +> %781 = fneg float %692 +> %782 = fmul float %780, %781 +> %783 = bitcast i32 %52 to float +> %784 = fadd float %783, %782 +> %785 = bitcast i32 %52 to float +> %786 = bitcast i32 %52 to float +1254,1282c1255,1283 +< %788 = bitcast i32 %700 to float +< %789 = fadd float %788, %787 +< %790 = bitcast i32 %700 to float +< %791 = bitcast i32 %700 to float +< %792 = fmul float %790, %791 +< %793 = fadd float %792, 0.000000e+00 +< %794 = bitcast i32 %57 to float +< %795 = bitcast i32 %57 to float +< %796 = fmul float %794, %795 +< %797 = fadd float %793, %796 +< %798 = call float @llvm.sqrt.f32(float %797) +< %799 = fneg float %710 +< %800 = fmul float %798, %799 +< %801 = bitcast i32 %700 to float +< %802 = fadd float %801, %800 +< %803 = bitcast i32 %700 to float +< %804 = bitcast i32 %700 to float +< %805 = fmul float %803, %804 +< %806 = fadd float %805, 0.000000e+00 +< %807 = bitcast i32 %57 to float +< %808 = bitcast i32 %57 to float +< %809 = fmul float %807, %808 +< %810 = fadd float %806, %809 +< %811 = call float @llvm.sqrt.f32(float %810) +< %812 = fneg float %710 +< %813 = fmul float %811, %812 +< %814 = bitcast i32 %700 to float +< %815 = fadd float %814, %813 +< %816 = fmul float %802, %815 +--- +> %788 = fadd float %787, 0.000000e+00 +> %789 = bitcast i32 %686 to float +> %790 = bitcast i32 %686 to float +> %791 = fmul float %789, %790 +> %792 = fadd float %788, %791 +> %793 = call float @llvm.sqrt.f32(float %792) +> %794 = fneg float %692 +> %795 = fmul float %793, %794 +> %796 = bitcast i32 %52 to float +> %797 = fadd float %796, %795 +> %798 = fmul float %784, %797 +> %799 = fadd float %798, 0.000000e+00 +> %800 = bitcast i32 %52 to float +> %801 = bitcast i32 %52 to float +> %802 = fmul float %800, %801 +> %803 = fadd float %802, 0.000000e+00 +> %804 = bitcast i32 %686 to float +> %805 = bitcast i32 %686 to float +> %806 = fmul float %804, %805 +> %807 = fadd float %803, %806 +> %808 = call float @llvm.sqrt.f32(float %807) +> %809 = fneg float %692 +> %810 = fmul float %808, %809 +> %811 = fmul float %810, 0.000000e+00 +> %812 = bitcast i32 %686 to float +> %813 = fadd float %812, %811 +> %814 = bitcast i32 %52 to float +> %815 = bitcast i32 %52 to float +> %816 = fmul float %814, %815 +1284,1285c1285,1286 +< %818 = bitcast i32 %700 to float +< %819 = bitcast i32 %700 to float +--- +> %818 = bitcast i32 %686 to float +> %819 = bitcast i32 %686 to float +1287,1289c1288,1290 +< %821 = fadd float %820, 0.000000e+00 +< %822 = bitcast i32 %57 to float +< %823 = bitcast i32 %57 to float +--- +> %821 = fadd float %817, %820 +> %822 = call float @llvm.sqrt.f32(float %821) +> %823 = fneg float %692 +1291,1360c1292,1361 +< %825 = fadd float %821, %824 +< %826 = call float @llvm.sqrt.f32(float %825) +< %827 = fneg float %710 +< %828 = fmul float %826, %827 +< %829 = fmul float %828, 0.000000e+00 +< %830 = bitcast i32 %57 to float +< %831 = fadd float %830, %829 +< %832 = bitcast i32 %700 to float +< %833 = bitcast i32 %700 to float +< %834 = fmul float %832, %833 +< %835 = fadd float %834, 0.000000e+00 +< %836 = bitcast i32 %57 to float +< %837 = bitcast i32 %57 to float +< %838 = fmul float %836, %837 +< %839 = fadd float %835, %838 +< %840 = call float @llvm.sqrt.f32(float %839) +< %841 = fneg float %710 +< %842 = fmul float %840, %841 +< %843 = fmul float %842, 0.000000e+00 +< %844 = bitcast i32 %57 to float +< %845 = fadd float %844, %843 +< %846 = fmul float %831, %845 +< %847 = fadd float %817, %846 +< %848 = call float @llvm.sqrt.f32(float %847) +< %849 = fadd float %848, 0.000000e+00 +< %850 = fdiv float %789, %849 +< %851 = fmul float %776, %850 +< %852 = fsub float 1.000000e+00, %851 +< %853 = insertelement <4 x float> zeroinitializer, float %852, i32 0 +< %854 = insertelement <4 x float> %853, float 0.000000e+00, i32 1 +< %855 = insertelement <4 x float> %854, float 0.000000e+00, i32 2 +< %856 = insertelement <4 x float> %855, float 0.000000e+00, i32 3 +< %857 = getelementptr float, float* %0, i32 0 +< %858 = load float, float* %857, align 4 +< %859 = insertelement <4 x float> zeroinitializer, float %858, i32 0 +< %860 = insertelement <4 x float> %859, float 0.000000e+00, i32 1 +< %861 = insertelement <4 x float> %860, float 0.000000e+00, i32 2 +< %862 = insertelement <4 x float> %861, float 0.000000e+00, i32 3 +< %863 = call <4 x float> @llvm.fma.v4f32(<4 x float> %856, <4 x float> %862, <4 x float> zeroinitializer) +< %864 = extractelement <4 x float> %863, i32 0 +< store float %864, float* %2, align 4 +< %865 = load i32, i32* %699, align 4 +< %866 = bitcast i32 %865 to float +< %867 = bitcast i32 %865 to float +< %868 = fmul float %866, %867 +< %869 = fadd float %868, 0.000000e+00 +< %870 = bitcast i32 %57 to float +< %871 = bitcast i32 %57 to float +< %872 = fmul float %870, %871 +< %873 = fadd float %869, %872 +< %874 = call float @llvm.sqrt.f32(float %873) +< %875 = fneg float %710 +< %876 = fmul float %874, %875 +< %877 = bitcast i32 %865 to float +< %878 = fadd float %877, %876 +< %879 = bitcast i32 %865 to float +< %880 = bitcast i32 %865 to float +< %881 = fmul float %879, %880 +< %882 = fadd float %881, 0.000000e+00 +< %883 = bitcast i32 %57 to float +< %884 = bitcast i32 %57 to float +< %885 = fmul float %883, %884 +< %886 = fadd float %882, %885 +< %887 = call float @llvm.sqrt.f32(float %886) +< %888 = fneg float %710 +< %889 = fmul float %887, %888 +< %890 = bitcast i32 %865 to float +< %891 = fadd float %890, %889 +< %892 = bitcast i32 %865 to float +< %893 = bitcast i32 %865 to float +--- +> %825 = fmul float %824, 0.000000e+00 +> %826 = bitcast i32 %686 to float +> %827 = fadd float %826, %825 +> %828 = fmul float %813, %827 +> %829 = fadd float %799, %828 +> %830 = call float @llvm.sqrt.f32(float %829) +> %831 = fadd float %830, 0.000000e+00 +> %832 = fdiv float %771, %831 +> %833 = fmul float %758, %832 +> %834 = fsub float 1.000000e+00, %833 +> %835 = insertelement <4 x float> zeroinitializer, float %834, i32 0 +> %836 = insertelement <4 x float> %835, float 0.000000e+00, i32 1 +> %837 = insertelement <4 x float> %836, float 0.000000e+00, i32 2 +> %838 = insertelement <4 x float> %837, float 0.000000e+00, i32 3 +> %839 = getelementptr float, float* %0, i32 0 +> %840 = load float, float* %839, align 4 +> %841 = insertelement <4 x float> zeroinitializer, float %840, i32 0 +> %842 = insertelement <4 x float> %841, float 0.000000e+00, i32 1 +> %843 = insertelement <4 x float> %842, float 0.000000e+00, i32 2 +> %844 = insertelement <4 x float> %843, float 0.000000e+00, i32 3 +> %845 = call <4 x float> @llvm.fma.v4f32(<4 x float> %838, <4 x float> %844, <4 x float> zeroinitializer) +> %846 = extractelement <4 x float> %845, i32 0 +> store float %846, float* %2, align 4 +> %847 = bitcast i32 %52 to float +> %848 = bitcast i32 %52 to float +> %849 = fmul float %847, %848 +> %850 = fadd float %849, 0.000000e+00 +> %851 = bitcast i32 %686 to float +> %852 = bitcast i32 %686 to float +> %853 = fmul float %851, %852 +> %854 = fadd float %850, %853 +> %855 = call float @llvm.sqrt.f32(float %854) +> %856 = fneg float %692 +> %857 = fmul float %855, %856 +> %858 = bitcast i32 %52 to float +> %859 = fadd float %858, %857 +> %860 = bitcast i32 %52 to float +> %861 = bitcast i32 %52 to float +> %862 = fmul float %860, %861 +> %863 = fadd float %862, 0.000000e+00 +> %864 = bitcast i32 %686 to float +> %865 = bitcast i32 %686 to float +> %866 = fmul float %864, %865 +> %867 = fadd float %863, %866 +> %868 = call float @llvm.sqrt.f32(float %867) +> %869 = fneg float %692 +> %870 = fmul float %868, %869 +> %871 = bitcast i32 %52 to float +> %872 = fadd float %871, %870 +> %873 = bitcast i32 %52 to float +> %874 = bitcast i32 %52 to float +> %875 = fmul float %873, %874 +> %876 = fadd float %875, 0.000000e+00 +> %877 = bitcast i32 %686 to float +> %878 = bitcast i32 %686 to float +> %879 = fmul float %877, %878 +> %880 = fadd float %876, %879 +> %881 = call float @llvm.sqrt.f32(float %880) +> %882 = fneg float %692 +> %883 = fmul float %881, %882 +> %884 = bitcast i32 %52 to float +> %885 = fadd float %884, %883 +> %886 = fmul float %872, %885 +> %887 = fadd float %886, 0.000000e+00 +> %888 = bitcast i32 %52 to float +> %889 = bitcast i32 %52 to float +> %890 = fmul float %888, %889 +> %891 = fadd float %890, 0.000000e+00 +> %892 = bitcast i32 %686 to float +> %893 = bitcast i32 %686 to float +1362,1364c1363,1365 +< %895 = fadd float %894, 0.000000e+00 +< %896 = bitcast i32 %57 to float +< %897 = bitcast i32 %57 to float +--- +> %895 = fadd float %891, %894 +> %896 = call float @llvm.sqrt.f32(float %895) +> %897 = fneg float %692 +1366,1400c1367,1401 +< %899 = fadd float %895, %898 +< %900 = call float @llvm.sqrt.f32(float %899) +< %901 = fneg float %710 +< %902 = fmul float %900, %901 +< %903 = bitcast i32 %865 to float +< %904 = fadd float %903, %902 +< %905 = fmul float %891, %904 +< %906 = fadd float %905, 0.000000e+00 +< %907 = bitcast i32 %865 to float +< %908 = bitcast i32 %865 to float +< %909 = fmul float %907, %908 +< %910 = fadd float %909, 0.000000e+00 +< %911 = bitcast i32 %57 to float +< %912 = bitcast i32 %57 to float +< %913 = fmul float %911, %912 +< %914 = fadd float %910, %913 +< %915 = call float @llvm.sqrt.f32(float %914) +< %916 = fneg float %710 +< %917 = fmul float %915, %916 +< %918 = fmul float %917, 0.000000e+00 +< %919 = bitcast i32 %57 to float +< %920 = fadd float %919, %918 +< %921 = bitcast i32 %865 to float +< %922 = bitcast i32 %865 to float +< %923 = fmul float %921, %922 +< %924 = fadd float %923, 0.000000e+00 +< %925 = bitcast i32 %57 to float +< %926 = bitcast i32 %57 to float +< %927 = fmul float %925, %926 +< %928 = fadd float %924, %927 +< %929 = call float @llvm.sqrt.f32(float %928) +< %930 = fneg float %710 +< %931 = fmul float %929, %930 +< %932 = fmul float %931, 0.000000e+00 +< %933 = bitcast i32 %57 to float +--- +> %899 = fmul float %898, 0.000000e+00 +> %900 = bitcast i32 %686 to float +> %901 = fadd float %900, %899 +> %902 = bitcast i32 %52 to float +> %903 = bitcast i32 %52 to float +> %904 = fmul float %902, %903 +> %905 = fadd float %904, 0.000000e+00 +> %906 = bitcast i32 %686 to float +> %907 = bitcast i32 %686 to float +> %908 = fmul float %906, %907 +> %909 = fadd float %905, %908 +> %910 = call float @llvm.sqrt.f32(float %909) +> %911 = fneg float %692 +> %912 = fmul float %910, %911 +> %913 = fmul float %912, 0.000000e+00 +> %914 = bitcast i32 %686 to float +> %915 = fadd float %914, %913 +> %916 = fmul float %901, %915 +> %917 = fadd float %887, %916 +> %918 = call float @llvm.sqrt.f32(float %917) +> %919 = fadd float %918, 0.000000e+00 +> %920 = fdiv float %859, %919 +> %921 = fmul float %920, 2.000000e+00 +> %922 = bitcast i32 %52 to float +> %923 = bitcast i32 %52 to float +> %924 = fmul float %922, %923 +> %925 = fadd float %924, 0.000000e+00 +> %926 = bitcast i32 %686 to float +> %927 = bitcast i32 %686 to float +> %928 = fmul float %926, %927 +> %929 = fadd float %925, %928 +> %930 = call float @llvm.sqrt.f32(float %929) +> %931 = fneg float %692 +> %932 = fmul float %930, %931 +> %933 = bitcast i32 %52 to float +1402,1404c1403,1405 +< %935 = fmul float %920, %934 +< %936 = fadd float %906, %935 +< %937 = call float @llvm.sqrt.f32(float %936) +--- +> %935 = bitcast i32 %52 to float +> %936 = bitcast i32 %52 to float +> %937 = fmul float %935, %936 +1406,1435c1407,1436 +< %939 = fdiv float %878, %938 +< %940 = fmul float %939, 2.000000e+00 +< %941 = bitcast i32 %865 to float +< %942 = bitcast i32 %865 to float +< %943 = fmul float %941, %942 +< %944 = fadd float %943, 0.000000e+00 +< %945 = bitcast i32 %57 to float +< %946 = bitcast i32 %57 to float +< %947 = fmul float %945, %946 +< %948 = fadd float %944, %947 +< %949 = call float @llvm.sqrt.f32(float %948) +< %950 = fneg float %710 +< %951 = fmul float %949, %950 +< %952 = bitcast i32 %865 to float +< %953 = fadd float %952, %951 +< %954 = bitcast i32 %865 to float +< %955 = bitcast i32 %865 to float +< %956 = fmul float %954, %955 +< %957 = fadd float %956, 0.000000e+00 +< %958 = bitcast i32 %57 to float +< %959 = bitcast i32 %57 to float +< %960 = fmul float %958, %959 +< %961 = fadd float %957, %960 +< %962 = call float @llvm.sqrt.f32(float %961) +< %963 = fneg float %710 +< %964 = fmul float %962, %963 +< %965 = bitcast i32 %865 to float +< %966 = fadd float %965, %964 +< %967 = bitcast i32 %865 to float +< %968 = bitcast i32 %865 to float +--- +> %939 = bitcast i32 %686 to float +> %940 = bitcast i32 %686 to float +> %941 = fmul float %939, %940 +> %942 = fadd float %938, %941 +> %943 = call float @llvm.sqrt.f32(float %942) +> %944 = fneg float %692 +> %945 = fmul float %943, %944 +> %946 = bitcast i32 %52 to float +> %947 = fadd float %946, %945 +> %948 = bitcast i32 %52 to float +> %949 = bitcast i32 %52 to float +> %950 = fmul float %948, %949 +> %951 = fadd float %950, 0.000000e+00 +> %952 = bitcast i32 %686 to float +> %953 = bitcast i32 %686 to float +> %954 = fmul float %952, %953 +> %955 = fadd float %951, %954 +> %956 = call float @llvm.sqrt.f32(float %955) +> %957 = fneg float %692 +> %958 = fmul float %956, %957 +> %959 = bitcast i32 %52 to float +> %960 = fadd float %959, %958 +> %961 = fmul float %947, %960 +> %962 = fadd float %961, 0.000000e+00 +> %963 = bitcast i32 %52 to float +> %964 = bitcast i32 %52 to float +> %965 = fmul float %963, %964 +> %966 = fadd float %965, 0.000000e+00 +> %967 = bitcast i32 %686 to float +> %968 = bitcast i32 %686 to float +1437,1439c1438,1440 +< %970 = fadd float %969, 0.000000e+00 +< %971 = bitcast i32 %57 to float +< %972 = bitcast i32 %57 to float +--- +> %970 = fadd float %966, %969 +> %971 = call float @llvm.sqrt.f32(float %970) +> %972 = fneg float %692 +1441,1465c1442,1466 +< %974 = fadd float %970, %973 +< %975 = call float @llvm.sqrt.f32(float %974) +< %976 = fneg float %710 +< %977 = fmul float %975, %976 +< %978 = bitcast i32 %865 to float +< %979 = fadd float %978, %977 +< %980 = fmul float %966, %979 +< %981 = fadd float %980, 0.000000e+00 +< %982 = bitcast i32 %865 to float +< %983 = bitcast i32 %865 to float +< %984 = fmul float %982, %983 +< %985 = fadd float %984, 0.000000e+00 +< %986 = bitcast i32 %57 to float +< %987 = bitcast i32 %57 to float +< %988 = fmul float %986, %987 +< %989 = fadd float %985, %988 +< %990 = call float @llvm.sqrt.f32(float %989) +< %991 = fneg float %710 +< %992 = fmul float %990, %991 +< %993 = fmul float %992, 0.000000e+00 +< %994 = bitcast i32 %57 to float +< %995 = fadd float %994, %993 +< %996 = bitcast i32 %865 to float +< %997 = bitcast i32 %865 to float +< %998 = fmul float %996, %997 +--- +> %974 = fmul float %973, 0.000000e+00 +> %975 = bitcast i32 %686 to float +> %976 = fadd float %975, %974 +> %977 = bitcast i32 %52 to float +> %978 = bitcast i32 %52 to float +> %979 = fmul float %977, %978 +> %980 = fadd float %979, 0.000000e+00 +> %981 = bitcast i32 %686 to float +> %982 = bitcast i32 %686 to float +> %983 = fmul float %981, %982 +> %984 = fadd float %980, %983 +> %985 = call float @llvm.sqrt.f32(float %984) +> %986 = fneg float %692 +> %987 = fmul float %985, %986 +> %988 = fmul float %987, 0.000000e+00 +> %989 = bitcast i32 %686 to float +> %990 = fadd float %989, %988 +> %991 = fmul float %976, %990 +> %992 = fadd float %962, %991 +> %993 = call float @llvm.sqrt.f32(float %992) +> %994 = fadd float %993, 0.000000e+00 +> %995 = fdiv float %934, %994 +> %996 = fmul float %921, %995 +> %997 = fsub float 1.000000e+00, %996 +> %998 = fmul float %997, %840 +1467,1468c1468,1469 +< %1000 = bitcast i32 %57 to float +< %1001 = bitcast i32 %57 to float +--- +> %1000 = bitcast i32 %52 to float +> %1001 = bitcast i32 %52 to float +1470,1472c1471,1473 +< %1003 = fadd float %999, %1002 +< %1004 = call float @llvm.sqrt.f32(float %1003) +< %1005 = fneg float %710 +--- +> %1003 = fadd float %1002, 0.000000e+00 +> %1004 = bitcast i32 %686 to float +> %1005 = bitcast i32 %686 to float +1474,1513c1475,1514 +< %1007 = fmul float %1006, 0.000000e+00 +< %1008 = bitcast i32 %57 to float +< %1009 = fadd float %1008, %1007 +< %1010 = fmul float %995, %1009 +< %1011 = fadd float %981, %1010 +< %1012 = call float @llvm.sqrt.f32(float %1011) +< %1013 = fadd float %1012, 0.000000e+00 +< %1014 = fdiv float %953, %1013 +< %1015 = fmul float %940, %1014 +< %1016 = fsub float 1.000000e+00, %1015 +< %1017 = fmul float %1016, %858 +< %1018 = fadd float %1017, 0.000000e+00 +< %1019 = bitcast i32 %865 to float +< %1020 = bitcast i32 %865 to float +< %1021 = fmul float %1019, %1020 +< %1022 = fadd float %1021, 0.000000e+00 +< %1023 = bitcast i32 %57 to float +< %1024 = bitcast i32 %57 to float +< %1025 = fmul float %1023, %1024 +< %1026 = fadd float %1022, %1025 +< %1027 = call float @llvm.sqrt.f32(float %1026) +< %1028 = fneg float %710 +< %1029 = fmul float %1027, %1028 +< %1030 = bitcast i32 %865 to float +< %1031 = fadd float %1030, %1029 +< %1032 = bitcast i32 %865 to float +< %1033 = bitcast i32 %865 to float +< %1034 = fmul float %1032, %1033 +< %1035 = fadd float %1034, 0.000000e+00 +< %1036 = bitcast i32 %57 to float +< %1037 = bitcast i32 %57 to float +< %1038 = fmul float %1036, %1037 +< %1039 = fadd float %1035, %1038 +< %1040 = call float @llvm.sqrt.f32(float %1039) +< %1041 = fneg float %710 +< %1042 = fmul float %1040, %1041 +< %1043 = bitcast i32 %865 to float +< %1044 = fadd float %1043, %1042 +< %1045 = bitcast i32 %865 to float +< %1046 = bitcast i32 %865 to float +--- +> %1007 = fadd float %1003, %1006 +> %1008 = call float @llvm.sqrt.f32(float %1007) +> %1009 = fneg float %692 +> %1010 = fmul float %1008, %1009 +> %1011 = bitcast i32 %52 to float +> %1012 = fadd float %1011, %1010 +> %1013 = bitcast i32 %52 to float +> %1014 = bitcast i32 %52 to float +> %1015 = fmul float %1013, %1014 +> %1016 = fadd float %1015, 0.000000e+00 +> %1017 = bitcast i32 %686 to float +> %1018 = bitcast i32 %686 to float +> %1019 = fmul float %1017, %1018 +> %1020 = fadd float %1016, %1019 +> %1021 = call float @llvm.sqrt.f32(float %1020) +> %1022 = fneg float %692 +> %1023 = fmul float %1021, %1022 +> %1024 = bitcast i32 %52 to float +> %1025 = fadd float %1024, %1023 +> %1026 = bitcast i32 %52 to float +> %1027 = bitcast i32 %52 to float +> %1028 = fmul float %1026, %1027 +> %1029 = fadd float %1028, 0.000000e+00 +> %1030 = bitcast i32 %686 to float +> %1031 = bitcast i32 %686 to float +> %1032 = fmul float %1030, %1031 +> %1033 = fadd float %1029, %1032 +> %1034 = call float @llvm.sqrt.f32(float %1033) +> %1035 = fneg float %692 +> %1036 = fmul float %1034, %1035 +> %1037 = bitcast i32 %52 to float +> %1038 = fadd float %1037, %1036 +> %1039 = fmul float %1025, %1038 +> %1040 = fadd float %1039, 0.000000e+00 +> %1041 = bitcast i32 %52 to float +> %1042 = bitcast i32 %52 to float +> %1043 = fmul float %1041, %1042 +> %1044 = fadd float %1043, 0.000000e+00 +> %1045 = bitcast i32 %686 to float +> %1046 = bitcast i32 %686 to float +1515,1517c1516,1518 +< %1048 = fadd float %1047, 0.000000e+00 +< %1049 = bitcast i32 %57 to float +< %1050 = bitcast i32 %57 to float +--- +> %1048 = fadd float %1044, %1047 +> %1049 = call float @llvm.sqrt.f32(float %1048) +> %1050 = fneg float %692 +1519,1570c1520,1571 +< %1052 = fadd float %1048, %1051 +< %1053 = call float @llvm.sqrt.f32(float %1052) +< %1054 = fneg float %710 +< %1055 = fmul float %1053, %1054 +< %1056 = bitcast i32 %865 to float +< %1057 = fadd float %1056, %1055 +< %1058 = fmul float %1044, %1057 +< %1059 = fadd float %1058, 0.000000e+00 +< %1060 = bitcast i32 %865 to float +< %1061 = bitcast i32 %865 to float +< %1062 = fmul float %1060, %1061 +< %1063 = fadd float %1062, 0.000000e+00 +< %1064 = bitcast i32 %57 to float +< %1065 = bitcast i32 %57 to float +< %1066 = fmul float %1064, %1065 +< %1067 = fadd float %1063, %1066 +< %1068 = call float @llvm.sqrt.f32(float %1067) +< %1069 = fneg float %710 +< %1070 = fmul float %1068, %1069 +< %1071 = fmul float %1070, 0.000000e+00 +< %1072 = bitcast i32 %57 to float +< %1073 = fadd float %1072, %1071 +< %1074 = bitcast i32 %865 to float +< %1075 = bitcast i32 %865 to float +< %1076 = fmul float %1074, %1075 +< %1077 = fadd float %1076, 0.000000e+00 +< %1078 = bitcast i32 %57 to float +< %1079 = bitcast i32 %57 to float +< %1080 = fmul float %1078, %1079 +< %1081 = fadd float %1077, %1080 +< %1082 = call float @llvm.sqrt.f32(float %1081) +< %1083 = fneg float %710 +< %1084 = fmul float %1082, %1083 +< %1085 = fmul float %1084, 0.000000e+00 +< %1086 = bitcast i32 %57 to float +< %1087 = fadd float %1086, %1085 +< %1088 = fmul float %1073, %1087 +< %1089 = fadd float %1059, %1088 +< %1090 = call float @llvm.sqrt.f32(float %1089) +< %1091 = fadd float %1090, 0.000000e+00 +< %1092 = fdiv float %1031, %1091 +< %1093 = fmul float %1092, 2.000000e+00 +< %1094 = bitcast i32 %865 to float +< %1095 = bitcast i32 %865 to float +< %1096 = fmul float %1094, %1095 +< %1097 = fadd float %1096, 0.000000e+00 +< %1098 = bitcast i32 %57 to float +< %1099 = bitcast i32 %57 to float +< %1100 = fmul float %1098, %1099 +< %1101 = fadd float %1097, %1100 +< %1102 = call float @llvm.sqrt.f32(float %1101) +< %1103 = fneg float %710 +--- +> %1052 = fmul float %1051, 0.000000e+00 +> %1053 = bitcast i32 %686 to float +> %1054 = fadd float %1053, %1052 +> %1055 = bitcast i32 %52 to float +> %1056 = bitcast i32 %52 to float +> %1057 = fmul float %1055, %1056 +> %1058 = fadd float %1057, 0.000000e+00 +> %1059 = bitcast i32 %686 to float +> %1060 = bitcast i32 %686 to float +> %1061 = fmul float %1059, %1060 +> %1062 = fadd float %1058, %1061 +> %1063 = call float @llvm.sqrt.f32(float %1062) +> %1064 = fneg float %692 +> %1065 = fmul float %1063, %1064 +> %1066 = fmul float %1065, 0.000000e+00 +> %1067 = bitcast i32 %686 to float +> %1068 = fadd float %1067, %1066 +> %1069 = fmul float %1054, %1068 +> %1070 = fadd float %1040, %1069 +> %1071 = call float @llvm.sqrt.f32(float %1070) +> %1072 = fadd float %1071, 0.000000e+00 +> %1073 = fdiv float %1012, %1072 +> %1074 = fmul float %1073, 2.000000e+00 +> %1075 = bitcast i32 %52 to float +> %1076 = bitcast i32 %52 to float +> %1077 = fmul float %1075, %1076 +> %1078 = fadd float %1077, 0.000000e+00 +> %1079 = bitcast i32 %686 to float +> %1080 = bitcast i32 %686 to float +> %1081 = fmul float %1079, %1080 +> %1082 = fadd float %1078, %1081 +> %1083 = call float @llvm.sqrt.f32(float %1082) +> %1084 = fneg float %692 +> %1085 = fmul float %1083, %1084 +> %1086 = fmul float %1085, 0.000000e+00 +> %1087 = bitcast i32 %686 to float +> %1088 = fadd float %1087, %1086 +> %1089 = bitcast i32 %52 to float +> %1090 = bitcast i32 %52 to float +> %1091 = fmul float %1089, %1090 +> %1092 = fadd float %1091, 0.000000e+00 +> %1093 = bitcast i32 %686 to float +> %1094 = bitcast i32 %686 to float +> %1095 = fmul float %1093, %1094 +> %1096 = fadd float %1092, %1095 +> %1097 = call float @llvm.sqrt.f32(float %1096) +> %1098 = fneg float %692 +> %1099 = fmul float %1097, %1098 +> %1100 = bitcast i32 %52 to float +> %1101 = fadd float %1100, %1099 +> %1102 = bitcast i32 %52 to float +> %1103 = bitcast i32 %52 to float +1572,1589c1573,1590 +< %1105 = fmul float %1104, 0.000000e+00 +< %1106 = bitcast i32 %57 to float +< %1107 = fadd float %1106, %1105 +< %1108 = bitcast i32 %865 to float +< %1109 = bitcast i32 %865 to float +< %1110 = fmul float %1108, %1109 +< %1111 = fadd float %1110, 0.000000e+00 +< %1112 = bitcast i32 %57 to float +< %1113 = bitcast i32 %57 to float +< %1114 = fmul float %1112, %1113 +< %1115 = fadd float %1111, %1114 +< %1116 = call float @llvm.sqrt.f32(float %1115) +< %1117 = fneg float %710 +< %1118 = fmul float %1116, %1117 +< %1119 = bitcast i32 %865 to float +< %1120 = fadd float %1119, %1118 +< %1121 = bitcast i32 %865 to float +< %1122 = bitcast i32 %865 to float +--- +> %1105 = fadd float %1104, 0.000000e+00 +> %1106 = bitcast i32 %686 to float +> %1107 = bitcast i32 %686 to float +> %1108 = fmul float %1106, %1107 +> %1109 = fadd float %1105, %1108 +> %1110 = call float @llvm.sqrt.f32(float %1109) +> %1111 = fneg float %692 +> %1112 = fmul float %1110, %1111 +> %1113 = bitcast i32 %52 to float +> %1114 = fadd float %1113, %1112 +> %1115 = fmul float %1101, %1114 +> %1116 = fadd float %1115, 0.000000e+00 +> %1117 = bitcast i32 %52 to float +> %1118 = bitcast i32 %52 to float +> %1119 = fmul float %1117, %1118 +> %1120 = fadd float %1119, 0.000000e+00 +> %1121 = bitcast i32 %686 to float +> %1122 = bitcast i32 %686 to float +1591,1593c1592,1594 +< %1124 = fadd float %1123, 0.000000e+00 +< %1125 = bitcast i32 %57 to float +< %1126 = bitcast i32 %57 to float +--- +> %1124 = fadd float %1120, %1123 +> %1125 = call float @llvm.sqrt.f32(float %1124) +> %1126 = fneg float %692 +1595,1652c1596,1653 +< %1128 = fadd float %1124, %1127 +< %1129 = call float @llvm.sqrt.f32(float %1128) +< %1130 = fneg float %710 +< %1131 = fmul float %1129, %1130 +< %1132 = bitcast i32 %865 to float +< %1133 = fadd float %1132, %1131 +< %1134 = fmul float %1120, %1133 +< %1135 = fadd float %1134, 0.000000e+00 +< %1136 = bitcast i32 %865 to float +< %1137 = bitcast i32 %865 to float +< %1138 = fmul float %1136, %1137 +< %1139 = fadd float %1138, 0.000000e+00 +< %1140 = bitcast i32 %57 to float +< %1141 = bitcast i32 %57 to float +< %1142 = fmul float %1140, %1141 +< %1143 = fadd float %1139, %1142 +< %1144 = call float @llvm.sqrt.f32(float %1143) +< %1145 = fneg float %710 +< %1146 = fmul float %1144, %1145 +< %1147 = fmul float %1146, 0.000000e+00 +< %1148 = bitcast i32 %57 to float +< %1149 = fadd float %1148, %1147 +< %1150 = bitcast i32 %865 to float +< %1151 = bitcast i32 %865 to float +< %1152 = fmul float %1150, %1151 +< %1153 = fadd float %1152, 0.000000e+00 +< %1154 = bitcast i32 %57 to float +< %1155 = bitcast i32 %57 to float +< %1156 = fmul float %1154, %1155 +< %1157 = fadd float %1153, %1156 +< %1158 = call float @llvm.sqrt.f32(float %1157) +< %1159 = fneg float %710 +< %1160 = fmul float %1158, %1159 +< %1161 = fmul float %1160, 0.000000e+00 +< %1162 = bitcast i32 %57 to float +< %1163 = fadd float %1162, %1161 +< %1164 = fmul float %1149, %1163 +< %1165 = fadd float %1135, %1164 +< %1166 = call float @llvm.sqrt.f32(float %1165) +< %1167 = fadd float %1166, 0.000000e+00 +< %1168 = fdiv float %1107, %1167 +< %1169 = fmul float %1093, %1168 +< %1170 = fneg float %1169 +< %1171 = load float, float* %30, align 4 +< %1172 = fmul float %1170, %1171 +< %1173 = fadd float %1018, %1172 +< %1174 = insertelement <4 x float> zeroinitializer, float %1173, i32 0 +< %1175 = insertelement <4 x float> %1174, float 0.000000e+00, i32 1 +< %1176 = insertelement <4 x float> %1175, float 0.000000e+00, i32 2 +< %1177 = insertelement <4 x float> %1176, float 0.000000e+00, i32 3 +< %1178 = extractelement <4 x float> %1177, i32 0 +< store float %1178, float* %2, align 4 +< %1179 = extractelement <4 x float> %1177, i32 1 +< %1180 = getelementptr float, float* %2, i32 0 +< %1181 = getelementptr inbounds float, float* %1180, i64 1 +< store float %1179, float* %1181, align 4 +< %1182 = bitcast i32 %865 to float +< %1183 = bitcast i32 %865 to float +--- +> %1128 = fmul float %1127, 0.000000e+00 +> %1129 = bitcast i32 %686 to float +> %1130 = fadd float %1129, %1128 +> %1131 = bitcast i32 %52 to float +> %1132 = bitcast i32 %52 to float +> %1133 = fmul float %1131, %1132 +> %1134 = fadd float %1133, 0.000000e+00 +> %1135 = bitcast i32 %686 to float +> %1136 = bitcast i32 %686 to float +> %1137 = fmul float %1135, %1136 +> %1138 = fadd float %1134, %1137 +> %1139 = call float @llvm.sqrt.f32(float %1138) +> %1140 = fneg float %692 +> %1141 = fmul float %1139, %1140 +> %1142 = fmul float %1141, 0.000000e+00 +> %1143 = bitcast i32 %686 to float +> %1144 = fadd float %1143, %1142 +> %1145 = fmul float %1130, %1144 +> %1146 = fadd float %1116, %1145 +> %1147 = call float @llvm.sqrt.f32(float %1146) +> %1148 = fadd float %1147, 0.000000e+00 +> %1149 = fdiv float %1088, %1148 +> %1150 = fmul float %1074, %1149 +> %1151 = fneg float %1150 +> %1152 = getelementptr float, float* %0, i32 0 +> %1153 = getelementptr inbounds float, float* %1152, i64 2 +> %1154 = load float, float* %1153, align 4 +> %1155 = fmul float %1151, %1154 +> %1156 = fadd float %999, %1155 +> %1157 = insertelement <4 x float> zeroinitializer, float %1156, i32 0 +> %1158 = insertelement <4 x float> %1157, float 0.000000e+00, i32 1 +> %1159 = insertelement <4 x float> %1158, float 0.000000e+00, i32 2 +> %1160 = insertelement <4 x float> %1159, float 0.000000e+00, i32 3 +> %1161 = extractelement <4 x float> %1160, i32 0 +> store float %1161, float* %2, align 4 +> %1162 = extractelement <4 x float> %1160, i32 1 +> %1163 = getelementptr float, float* %2, i32 0 +> %1164 = getelementptr inbounds float, float* %1163, i64 1 +> store float %1162, float* %1164, align 4 +> %1165 = bitcast i32 %52 to float +> %1166 = bitcast i32 %52 to float +> %1167 = fmul float %1165, %1166 +> %1168 = fadd float %1167, 0.000000e+00 +> %1169 = bitcast i32 %686 to float +> %1170 = bitcast i32 %686 to float +> %1171 = fmul float %1169, %1170 +> %1172 = fadd float %1168, %1171 +> %1173 = call float @llvm.sqrt.f32(float %1172) +> %1174 = fneg float %692 +> %1175 = fmul float %1173, %1174 +> %1176 = bitcast i32 %52 to float +> %1177 = fadd float %1176, %1175 +> %1178 = bitcast i32 %52 to float +> %1179 = bitcast i32 %52 to float +> %1180 = fmul float %1178, %1179 +> %1181 = fadd float %1180, 0.000000e+00 +> %1182 = bitcast i32 %686 to float +> %1183 = bitcast i32 %686 to float +1654,1656c1655,1657 +< %1185 = fadd float %1184, 0.000000e+00 +< %1186 = bitcast i32 %57 to float +< %1187 = bitcast i32 %57 to float +--- +> %1185 = fadd float %1181, %1184 +> %1186 = call float @llvm.sqrt.f32(float %1185) +> %1187 = fneg float %692 +1658,1665c1659,1666 +< %1189 = fadd float %1185, %1188 +< %1190 = call float @llvm.sqrt.f32(float %1189) +< %1191 = fneg float %710 +< %1192 = fmul float %1190, %1191 +< %1193 = bitcast i32 %865 to float +< %1194 = fadd float %1193, %1192 +< %1195 = bitcast i32 %865 to float +< %1196 = bitcast i32 %865 to float +--- +> %1189 = bitcast i32 %52 to float +> %1190 = fadd float %1189, %1188 +> %1191 = bitcast i32 %52 to float +> %1192 = bitcast i32 %52 to float +> %1193 = fmul float %1191, %1192 +> %1194 = fadd float %1193, 0.000000e+00 +> %1195 = bitcast i32 %686 to float +> %1196 = bitcast i32 %686 to float +1667,1669c1668,1670 +< %1198 = fadd float %1197, 0.000000e+00 +< %1199 = bitcast i32 %57 to float +< %1200 = bitcast i32 %57 to float +--- +> %1198 = fadd float %1194, %1197 +> %1199 = call float @llvm.sqrt.f32(float %1198) +> %1200 = fneg float %692 +1671,1727c1672,1728 +< %1202 = fadd float %1198, %1201 +< %1203 = call float @llvm.sqrt.f32(float %1202) +< %1204 = fneg float %710 +< %1205 = fmul float %1203, %1204 +< %1206 = bitcast i32 %865 to float +< %1207 = fadd float %1206, %1205 +< %1208 = bitcast i32 %865 to float +< %1209 = bitcast i32 %865 to float +< %1210 = fmul float %1208, %1209 +< %1211 = fadd float %1210, 0.000000e+00 +< %1212 = bitcast i32 %57 to float +< %1213 = bitcast i32 %57 to float +< %1214 = fmul float %1212, %1213 +< %1215 = fadd float %1211, %1214 +< %1216 = call float @llvm.sqrt.f32(float %1215) +< %1217 = fneg float %710 +< %1218 = fmul float %1216, %1217 +< %1219 = bitcast i32 %865 to float +< %1220 = fadd float %1219, %1218 +< %1221 = fmul float %1207, %1220 +< %1222 = fadd float %1221, 0.000000e+00 +< %1223 = bitcast i32 %865 to float +< %1224 = bitcast i32 %865 to float +< %1225 = fmul float %1223, %1224 +< %1226 = fadd float %1225, 0.000000e+00 +< %1227 = bitcast i32 %57 to float +< %1228 = bitcast i32 %57 to float +< %1229 = fmul float %1227, %1228 +< %1230 = fadd float %1226, %1229 +< %1231 = call float @llvm.sqrt.f32(float %1230) +< %1232 = fneg float %710 +< %1233 = fmul float %1231, %1232 +< %1234 = fmul float %1233, 0.000000e+00 +< %1235 = bitcast i32 %57 to float +< %1236 = fadd float %1235, %1234 +< %1237 = bitcast i32 %865 to float +< %1238 = bitcast i32 %865 to float +< %1239 = fmul float %1237, %1238 +< %1240 = fadd float %1239, 0.000000e+00 +< %1241 = bitcast i32 %57 to float +< %1242 = bitcast i32 %57 to float +< %1243 = fmul float %1241, %1242 +< %1244 = fadd float %1240, %1243 +< %1245 = call float @llvm.sqrt.f32(float %1244) +< %1246 = fneg float %710 +< %1247 = fmul float %1245, %1246 +< %1248 = fmul float %1247, 0.000000e+00 +< %1249 = bitcast i32 %57 to float +< %1250 = fadd float %1249, %1248 +< %1251 = fmul float %1236, %1250 +< %1252 = fadd float %1222, %1251 +< %1253 = call float @llvm.sqrt.f32(float %1252) +< %1254 = fadd float %1253, 0.000000e+00 +< %1255 = fdiv float %1194, %1254 +< %1256 = fmul float %1255, 2.000000e+00 +< %1257 = bitcast i32 %865 to float +< %1258 = bitcast i32 %865 to float +--- +> %1202 = bitcast i32 %52 to float +> %1203 = fadd float %1202, %1201 +> %1204 = fmul float %1190, %1203 +> %1205 = fadd float %1204, 0.000000e+00 +> %1206 = bitcast i32 %52 to float +> %1207 = bitcast i32 %52 to float +> %1208 = fmul float %1206, %1207 +> %1209 = fadd float %1208, 0.000000e+00 +> %1210 = bitcast i32 %686 to float +> %1211 = bitcast i32 %686 to float +> %1212 = fmul float %1210, %1211 +> %1213 = fadd float %1209, %1212 +> %1214 = call float @llvm.sqrt.f32(float %1213) +> %1215 = fneg float %692 +> %1216 = fmul float %1214, %1215 +> %1217 = fmul float %1216, 0.000000e+00 +> %1218 = bitcast i32 %686 to float +> %1219 = fadd float %1218, %1217 +> %1220 = bitcast i32 %52 to float +> %1221 = bitcast i32 %52 to float +> %1222 = fmul float %1220, %1221 +> %1223 = fadd float %1222, 0.000000e+00 +> %1224 = bitcast i32 %686 to float +> %1225 = bitcast i32 %686 to float +> %1226 = fmul float %1224, %1225 +> %1227 = fadd float %1223, %1226 +> %1228 = call float @llvm.sqrt.f32(float %1227) +> %1229 = fneg float %692 +> %1230 = fmul float %1228, %1229 +> %1231 = fmul float %1230, 0.000000e+00 +> %1232 = bitcast i32 %686 to float +> %1233 = fadd float %1232, %1231 +> %1234 = fmul float %1219, %1233 +> %1235 = fadd float %1205, %1234 +> %1236 = call float @llvm.sqrt.f32(float %1235) +> %1237 = fadd float %1236, 0.000000e+00 +> %1238 = fdiv float %1177, %1237 +> %1239 = fmul float %1238, 2.000000e+00 +> %1240 = bitcast i32 %52 to float +> %1241 = bitcast i32 %52 to float +> %1242 = fmul float %1240, %1241 +> %1243 = fadd float %1242, 0.000000e+00 +> %1244 = bitcast i32 %686 to float +> %1245 = bitcast i32 %686 to float +> %1246 = fmul float %1244, %1245 +> %1247 = fadd float %1243, %1246 +> %1248 = call float @llvm.sqrt.f32(float %1247) +> %1249 = fneg float %692 +> %1250 = fmul float %1248, %1249 +> %1251 = bitcast i32 %52 to float +> %1252 = fadd float %1251, %1250 +> %1253 = bitcast i32 %52 to float +> %1254 = bitcast i32 %52 to float +> %1255 = fmul float %1253, %1254 +> %1256 = fadd float %1255, 0.000000e+00 +> %1257 = bitcast i32 %686 to float +> %1258 = bitcast i32 %686 to float +1729,1731c1730,1732 +< %1260 = fadd float %1259, 0.000000e+00 +< %1261 = bitcast i32 %57 to float +< %1262 = bitcast i32 %57 to float +--- +> %1260 = fadd float %1256, %1259 +> %1261 = call float @llvm.sqrt.f32(float %1260) +> %1262 = fneg float %692 +1733,1740c1734,1741 +< %1264 = fadd float %1260, %1263 +< %1265 = call float @llvm.sqrt.f32(float %1264) +< %1266 = fneg float %710 +< %1267 = fmul float %1265, %1266 +< %1268 = bitcast i32 %865 to float +< %1269 = fadd float %1268, %1267 +< %1270 = bitcast i32 %865 to float +< %1271 = bitcast i32 %865 to float +--- +> %1264 = bitcast i32 %52 to float +> %1265 = fadd float %1264, %1263 +> %1266 = bitcast i32 %52 to float +> %1267 = bitcast i32 %52 to float +> %1268 = fmul float %1266, %1267 +> %1269 = fadd float %1268, 0.000000e+00 +> %1270 = bitcast i32 %686 to float +> %1271 = bitcast i32 %686 to float +1742,1744c1743,1745 +< %1273 = fadd float %1272, 0.000000e+00 +< %1274 = bitcast i32 %57 to float +< %1275 = bitcast i32 %57 to float +--- +> %1273 = fadd float %1269, %1272 +> %1274 = call float @llvm.sqrt.f32(float %1273) +> %1275 = fneg float %692 +1746,1817c1747,1818 +< %1277 = fadd float %1273, %1276 +< %1278 = call float @llvm.sqrt.f32(float %1277) +< %1279 = fneg float %710 +< %1280 = fmul float %1278, %1279 +< %1281 = bitcast i32 %865 to float +< %1282 = fadd float %1281, %1280 +< %1283 = bitcast i32 %865 to float +< %1284 = bitcast i32 %865 to float +< %1285 = fmul float %1283, %1284 +< %1286 = fadd float %1285, 0.000000e+00 +< %1287 = bitcast i32 %57 to float +< %1288 = bitcast i32 %57 to float +< %1289 = fmul float %1287, %1288 +< %1290 = fadd float %1286, %1289 +< %1291 = call float @llvm.sqrt.f32(float %1290) +< %1292 = fneg float %710 +< %1293 = fmul float %1291, %1292 +< %1294 = bitcast i32 %865 to float +< %1295 = fadd float %1294, %1293 +< %1296 = fmul float %1282, %1295 +< %1297 = fadd float %1296, 0.000000e+00 +< %1298 = bitcast i32 %865 to float +< %1299 = bitcast i32 %865 to float +< %1300 = fmul float %1298, %1299 +< %1301 = fadd float %1300, 0.000000e+00 +< %1302 = bitcast i32 %57 to float +< %1303 = bitcast i32 %57 to float +< %1304 = fmul float %1302, %1303 +< %1305 = fadd float %1301, %1304 +< %1306 = call float @llvm.sqrt.f32(float %1305) +< %1307 = fneg float %710 +< %1308 = fmul float %1306, %1307 +< %1309 = fmul float %1308, 0.000000e+00 +< %1310 = bitcast i32 %57 to float +< %1311 = fadd float %1310, %1309 +< %1312 = bitcast i32 %865 to float +< %1313 = bitcast i32 %865 to float +< %1314 = fmul float %1312, %1313 +< %1315 = fadd float %1314, 0.000000e+00 +< %1316 = bitcast i32 %57 to float +< %1317 = bitcast i32 %57 to float +< %1318 = fmul float %1316, %1317 +< %1319 = fadd float %1315, %1318 +< %1320 = call float @llvm.sqrt.f32(float %1319) +< %1321 = fneg float %710 +< %1322 = fmul float %1320, %1321 +< %1323 = fmul float %1322, 0.000000e+00 +< %1324 = bitcast i32 %57 to float +< %1325 = fadd float %1324, %1323 +< %1326 = fmul float %1311, %1325 +< %1327 = fadd float %1297, %1326 +< %1328 = call float @llvm.sqrt.f32(float %1327) +< %1329 = fadd float %1328, 0.000000e+00 +< %1330 = fdiv float %1269, %1329 +< %1331 = fmul float %1256, %1330 +< %1332 = fsub float 1.000000e+00, %1331 +< %1333 = insertelement <4 x float> zeroinitializer, float %1332, i32 0 +< %1334 = insertelement <4 x float> %1333, float 0.000000e+00, i32 1 +< %1335 = insertelement <4 x float> %1334, float 0.000000e+00, i32 2 +< %1336 = insertelement <4 x float> %1335, float 0.000000e+00, i32 3 +< %1337 = getelementptr float, float* %0, i32 0 +< %1338 = getelementptr inbounds float, float* %1337, i64 1 +< %1339 = load float, float* %1338, align 4 +< %1340 = insertelement <4 x float> zeroinitializer, float %1339, i32 0 +< %1341 = insertelement <4 x float> %1340, float 0.000000e+00, i32 1 +< %1342 = insertelement <4 x float> %1341, float 0.000000e+00, i32 2 +< %1343 = insertelement <4 x float> %1342, float 0.000000e+00, i32 3 +< %1344 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1336, <4 x float> %1343, <4 x float> zeroinitializer) +< %1345 = extractelement <4 x float> %1344, i32 0 +< store float %1345, float* %1181, align 4 +< %1346 = bitcast i32 %865 to float +< %1347 = bitcast i32 %865 to float +--- +> %1277 = bitcast i32 %52 to float +> %1278 = fadd float %1277, %1276 +> %1279 = fmul float %1265, %1278 +> %1280 = fadd float %1279, 0.000000e+00 +> %1281 = bitcast i32 %52 to float +> %1282 = bitcast i32 %52 to float +> %1283 = fmul float %1281, %1282 +> %1284 = fadd float %1283, 0.000000e+00 +> %1285 = bitcast i32 %686 to float +> %1286 = bitcast i32 %686 to float +> %1287 = fmul float %1285, %1286 +> %1288 = fadd float %1284, %1287 +> %1289 = call float @llvm.sqrt.f32(float %1288) +> %1290 = fneg float %692 +> %1291 = fmul float %1289, %1290 +> %1292 = fmul float %1291, 0.000000e+00 +> %1293 = bitcast i32 %686 to float +> %1294 = fadd float %1293, %1292 +> %1295 = bitcast i32 %52 to float +> %1296 = bitcast i32 %52 to float +> %1297 = fmul float %1295, %1296 +> %1298 = fadd float %1297, 0.000000e+00 +> %1299 = bitcast i32 %686 to float +> %1300 = bitcast i32 %686 to float +> %1301 = fmul float %1299, %1300 +> %1302 = fadd float %1298, %1301 +> %1303 = call float @llvm.sqrt.f32(float %1302) +> %1304 = fneg float %692 +> %1305 = fmul float %1303, %1304 +> %1306 = fmul float %1305, 0.000000e+00 +> %1307 = bitcast i32 %686 to float +> %1308 = fadd float %1307, %1306 +> %1309 = fmul float %1294, %1308 +> %1310 = fadd float %1280, %1309 +> %1311 = call float @llvm.sqrt.f32(float %1310) +> %1312 = fadd float %1311, 0.000000e+00 +> %1313 = fdiv float %1252, %1312 +> %1314 = fmul float %1239, %1313 +> %1315 = fsub float 1.000000e+00, %1314 +> %1316 = insertelement <4 x float> zeroinitializer, float %1315, i32 0 +> %1317 = insertelement <4 x float> %1316, float 0.000000e+00, i32 1 +> %1318 = insertelement <4 x float> %1317, float 0.000000e+00, i32 2 +> %1319 = insertelement <4 x float> %1318, float 0.000000e+00, i32 3 +> %1320 = getelementptr float, float* %0, i32 0 +> %1321 = getelementptr inbounds float, float* %1320, i64 1 +> %1322 = load float, float* %1321, align 4 +> %1323 = insertelement <4 x float> zeroinitializer, float %1322, i32 0 +> %1324 = insertelement <4 x float> %1323, float 0.000000e+00, i32 1 +> %1325 = insertelement <4 x float> %1324, float 0.000000e+00, i32 2 +> %1326 = insertelement <4 x float> %1325, float 0.000000e+00, i32 3 +> %1327 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1319, <4 x float> %1326, <4 x float> zeroinitializer) +> %1328 = extractelement <4 x float> %1327, i32 0 +> store float %1328, float* %1164, align 4 +> %1329 = bitcast i32 %52 to float +> %1330 = bitcast i32 %52 to float +> %1331 = fmul float %1329, %1330 +> %1332 = fadd float %1331, 0.000000e+00 +> %1333 = bitcast i32 %686 to float +> %1334 = bitcast i32 %686 to float +> %1335 = fmul float %1333, %1334 +> %1336 = fadd float %1332, %1335 +> %1337 = call float @llvm.sqrt.f32(float %1336) +> %1338 = fneg float %692 +> %1339 = fmul float %1337, %1338 +> %1340 = bitcast i32 %52 to float +> %1341 = fadd float %1340, %1339 +> %1342 = bitcast i32 %52 to float +> %1343 = bitcast i32 %52 to float +> %1344 = fmul float %1342, %1343 +> %1345 = fadd float %1344, 0.000000e+00 +> %1346 = bitcast i32 %686 to float +> %1347 = bitcast i32 %686 to float +1819,1821c1820,1822 +< %1349 = fadd float %1348, 0.000000e+00 +< %1350 = bitcast i32 %57 to float +< %1351 = bitcast i32 %57 to float +--- +> %1349 = fadd float %1345, %1348 +> %1350 = call float @llvm.sqrt.f32(float %1349) +> %1351 = fneg float %692 +1823,1830c1824,1831 +< %1353 = fadd float %1349, %1352 +< %1354 = call float @llvm.sqrt.f32(float %1353) +< %1355 = fneg float %710 +< %1356 = fmul float %1354, %1355 +< %1357 = bitcast i32 %865 to float +< %1358 = fadd float %1357, %1356 +< %1359 = bitcast i32 %865 to float +< %1360 = bitcast i32 %865 to float +--- +> %1353 = bitcast i32 %52 to float +> %1354 = fadd float %1353, %1352 +> %1355 = bitcast i32 %52 to float +> %1356 = bitcast i32 %52 to float +> %1357 = fmul float %1355, %1356 +> %1358 = fadd float %1357, 0.000000e+00 +> %1359 = bitcast i32 %686 to float +> %1360 = bitcast i32 %686 to float +1832,1834c1833,1835 +< %1362 = fadd float %1361, 0.000000e+00 +< %1363 = bitcast i32 %57 to float +< %1364 = bitcast i32 %57 to float +--- +> %1362 = fadd float %1358, %1361 +> %1363 = call float @llvm.sqrt.f32(float %1362) +> %1364 = fneg float %692 +1836,1892c1837,1893 +< %1366 = fadd float %1362, %1365 +< %1367 = call float @llvm.sqrt.f32(float %1366) +< %1368 = fneg float %710 +< %1369 = fmul float %1367, %1368 +< %1370 = bitcast i32 %865 to float +< %1371 = fadd float %1370, %1369 +< %1372 = bitcast i32 %865 to float +< %1373 = bitcast i32 %865 to float +< %1374 = fmul float %1372, %1373 +< %1375 = fadd float %1374, 0.000000e+00 +< %1376 = bitcast i32 %57 to float +< %1377 = bitcast i32 %57 to float +< %1378 = fmul float %1376, %1377 +< %1379 = fadd float %1375, %1378 +< %1380 = call float @llvm.sqrt.f32(float %1379) +< %1381 = fneg float %710 +< %1382 = fmul float %1380, %1381 +< %1383 = bitcast i32 %865 to float +< %1384 = fadd float %1383, %1382 +< %1385 = fmul float %1371, %1384 +< %1386 = fadd float %1385, 0.000000e+00 +< %1387 = bitcast i32 %865 to float +< %1388 = bitcast i32 %865 to float +< %1389 = fmul float %1387, %1388 +< %1390 = fadd float %1389, 0.000000e+00 +< %1391 = bitcast i32 %57 to float +< %1392 = bitcast i32 %57 to float +< %1393 = fmul float %1391, %1392 +< %1394 = fadd float %1390, %1393 +< %1395 = call float @llvm.sqrt.f32(float %1394) +< %1396 = fneg float %710 +< %1397 = fmul float %1395, %1396 +< %1398 = fmul float %1397, 0.000000e+00 +< %1399 = bitcast i32 %57 to float +< %1400 = fadd float %1399, %1398 +< %1401 = bitcast i32 %865 to float +< %1402 = bitcast i32 %865 to float +< %1403 = fmul float %1401, %1402 +< %1404 = fadd float %1403, 0.000000e+00 +< %1405 = bitcast i32 %57 to float +< %1406 = bitcast i32 %57 to float +< %1407 = fmul float %1405, %1406 +< %1408 = fadd float %1404, %1407 +< %1409 = call float @llvm.sqrt.f32(float %1408) +< %1410 = fneg float %710 +< %1411 = fmul float %1409, %1410 +< %1412 = fmul float %1411, 0.000000e+00 +< %1413 = bitcast i32 %57 to float +< %1414 = fadd float %1413, %1412 +< %1415 = fmul float %1400, %1414 +< %1416 = fadd float %1386, %1415 +< %1417 = call float @llvm.sqrt.f32(float %1416) +< %1418 = fadd float %1417, 0.000000e+00 +< %1419 = fdiv float %1358, %1418 +< %1420 = fmul float %1419, 2.000000e+00 +< %1421 = bitcast i32 %865 to float +< %1422 = bitcast i32 %865 to float +--- +> %1366 = bitcast i32 %52 to float +> %1367 = fadd float %1366, %1365 +> %1368 = fmul float %1354, %1367 +> %1369 = fadd float %1368, 0.000000e+00 +> %1370 = bitcast i32 %52 to float +> %1371 = bitcast i32 %52 to float +> %1372 = fmul float %1370, %1371 +> %1373 = fadd float %1372, 0.000000e+00 +> %1374 = bitcast i32 %686 to float +> %1375 = bitcast i32 %686 to float +> %1376 = fmul float %1374, %1375 +> %1377 = fadd float %1373, %1376 +> %1378 = call float @llvm.sqrt.f32(float %1377) +> %1379 = fneg float %692 +> %1380 = fmul float %1378, %1379 +> %1381 = fmul float %1380, 0.000000e+00 +> %1382 = bitcast i32 %686 to float +> %1383 = fadd float %1382, %1381 +> %1384 = bitcast i32 %52 to float +> %1385 = bitcast i32 %52 to float +> %1386 = fmul float %1384, %1385 +> %1387 = fadd float %1386, 0.000000e+00 +> %1388 = bitcast i32 %686 to float +> %1389 = bitcast i32 %686 to float +> %1390 = fmul float %1388, %1389 +> %1391 = fadd float %1387, %1390 +> %1392 = call float @llvm.sqrt.f32(float %1391) +> %1393 = fneg float %692 +> %1394 = fmul float %1392, %1393 +> %1395 = fmul float %1394, 0.000000e+00 +> %1396 = bitcast i32 %686 to float +> %1397 = fadd float %1396, %1395 +> %1398 = fmul float %1383, %1397 +> %1399 = fadd float %1369, %1398 +> %1400 = call float @llvm.sqrt.f32(float %1399) +> %1401 = fadd float %1400, 0.000000e+00 +> %1402 = fdiv float %1341, %1401 +> %1403 = fmul float %1402, 2.000000e+00 +> %1404 = bitcast i32 %52 to float +> %1405 = bitcast i32 %52 to float +> %1406 = fmul float %1404, %1405 +> %1407 = fadd float %1406, 0.000000e+00 +> %1408 = bitcast i32 %686 to float +> %1409 = bitcast i32 %686 to float +> %1410 = fmul float %1408, %1409 +> %1411 = fadd float %1407, %1410 +> %1412 = call float @llvm.sqrt.f32(float %1411) +> %1413 = fneg float %692 +> %1414 = fmul float %1412, %1413 +> %1415 = bitcast i32 %52 to float +> %1416 = fadd float %1415, %1414 +> %1417 = bitcast i32 %52 to float +> %1418 = bitcast i32 %52 to float +> %1419 = fmul float %1417, %1418 +> %1420 = fadd float %1419, 0.000000e+00 +> %1421 = bitcast i32 %686 to float +> %1422 = bitcast i32 %686 to float +1894,1896c1895,1897 +< %1424 = fadd float %1423, 0.000000e+00 +< %1425 = bitcast i32 %57 to float +< %1426 = bitcast i32 %57 to float +--- +> %1424 = fadd float %1420, %1423 +> %1425 = call float @llvm.sqrt.f32(float %1424) +> %1426 = fneg float %692 +1898,1905c1899,1906 +< %1428 = fadd float %1424, %1427 +< %1429 = call float @llvm.sqrt.f32(float %1428) +< %1430 = fneg float %710 +< %1431 = fmul float %1429, %1430 +< %1432 = bitcast i32 %865 to float +< %1433 = fadd float %1432, %1431 +< %1434 = bitcast i32 %865 to float +< %1435 = bitcast i32 %865 to float +--- +> %1428 = bitcast i32 %52 to float +> %1429 = fadd float %1428, %1427 +> %1430 = bitcast i32 %52 to float +> %1431 = bitcast i32 %52 to float +> %1432 = fmul float %1430, %1431 +> %1433 = fadd float %1432, 0.000000e+00 +> %1434 = bitcast i32 %686 to float +> %1435 = bitcast i32 %686 to float +1907,1909c1908,1910 +< %1437 = fadd float %1436, 0.000000e+00 +< %1438 = bitcast i32 %57 to float +< %1439 = bitcast i32 %57 to float +--- +> %1437 = fadd float %1433, %1436 +> %1438 = call float @llvm.sqrt.f32(float %1437) +> %1439 = fneg float %692 +1911,1967c1912,1968 +< %1441 = fadd float %1437, %1440 +< %1442 = call float @llvm.sqrt.f32(float %1441) +< %1443 = fneg float %710 +< %1444 = fmul float %1442, %1443 +< %1445 = bitcast i32 %865 to float +< %1446 = fadd float %1445, %1444 +< %1447 = bitcast i32 %865 to float +< %1448 = bitcast i32 %865 to float +< %1449 = fmul float %1447, %1448 +< %1450 = fadd float %1449, 0.000000e+00 +< %1451 = bitcast i32 %57 to float +< %1452 = bitcast i32 %57 to float +< %1453 = fmul float %1451, %1452 +< %1454 = fadd float %1450, %1453 +< %1455 = call float @llvm.sqrt.f32(float %1454) +< %1456 = fneg float %710 +< %1457 = fmul float %1455, %1456 +< %1458 = bitcast i32 %865 to float +< %1459 = fadd float %1458, %1457 +< %1460 = fmul float %1446, %1459 +< %1461 = fadd float %1460, 0.000000e+00 +< %1462 = bitcast i32 %865 to float +< %1463 = bitcast i32 %865 to float +< %1464 = fmul float %1462, %1463 +< %1465 = fadd float %1464, 0.000000e+00 +< %1466 = bitcast i32 %57 to float +< %1467 = bitcast i32 %57 to float +< %1468 = fmul float %1466, %1467 +< %1469 = fadd float %1465, %1468 +< %1470 = call float @llvm.sqrt.f32(float %1469) +< %1471 = fneg float %710 +< %1472 = fmul float %1470, %1471 +< %1473 = fmul float %1472, 0.000000e+00 +< %1474 = bitcast i32 %57 to float +< %1475 = fadd float %1474, %1473 +< %1476 = bitcast i32 %865 to float +< %1477 = bitcast i32 %865 to float +< %1478 = fmul float %1476, %1477 +< %1479 = fadd float %1478, 0.000000e+00 +< %1480 = bitcast i32 %57 to float +< %1481 = bitcast i32 %57 to float +< %1482 = fmul float %1480, %1481 +< %1483 = fadd float %1479, %1482 +< %1484 = call float @llvm.sqrt.f32(float %1483) +< %1485 = fneg float %710 +< %1486 = fmul float %1484, %1485 +< %1487 = fmul float %1486, 0.000000e+00 +< %1488 = bitcast i32 %57 to float +< %1489 = fadd float %1488, %1487 +< %1490 = fmul float %1475, %1489 +< %1491 = fadd float %1461, %1490 +< %1492 = call float @llvm.sqrt.f32(float %1491) +< %1493 = fadd float %1492, 0.000000e+00 +< %1494 = fdiv float %1433, %1493 +< %1495 = fmul float %1420, %1494 +< %1496 = fsub float 1.000000e+00, %1495 +< %1497 = fmul float %1496, %1339 +--- +> %1441 = bitcast i32 %52 to float +> %1442 = fadd float %1441, %1440 +> %1443 = fmul float %1429, %1442 +> %1444 = fadd float %1443, 0.000000e+00 +> %1445 = bitcast i32 %52 to float +> %1446 = bitcast i32 %52 to float +> %1447 = fmul float %1445, %1446 +> %1448 = fadd float %1447, 0.000000e+00 +> %1449 = bitcast i32 %686 to float +> %1450 = bitcast i32 %686 to float +> %1451 = fmul float %1449, %1450 +> %1452 = fadd float %1448, %1451 +> %1453 = call float @llvm.sqrt.f32(float %1452) +> %1454 = fneg float %692 +> %1455 = fmul float %1453, %1454 +> %1456 = fmul float %1455, 0.000000e+00 +> %1457 = bitcast i32 %686 to float +> %1458 = fadd float %1457, %1456 +> %1459 = bitcast i32 %52 to float +> %1460 = bitcast i32 %52 to float +> %1461 = fmul float %1459, %1460 +> %1462 = fadd float %1461, 0.000000e+00 +> %1463 = bitcast i32 %686 to float +> %1464 = bitcast i32 %686 to float +> %1465 = fmul float %1463, %1464 +> %1466 = fadd float %1462, %1465 +> %1467 = call float @llvm.sqrt.f32(float %1466) +> %1468 = fneg float %692 +> %1469 = fmul float %1467, %1468 +> %1470 = fmul float %1469, 0.000000e+00 +> %1471 = bitcast i32 %686 to float +> %1472 = fadd float %1471, %1470 +> %1473 = fmul float %1458, %1472 +> %1474 = fadd float %1444, %1473 +> %1475 = call float @llvm.sqrt.f32(float %1474) +> %1476 = fadd float %1475, 0.000000e+00 +> %1477 = fdiv float %1416, %1476 +> %1478 = fmul float %1403, %1477 +> %1479 = fsub float 1.000000e+00, %1478 +> %1480 = fmul float %1479, %1322 +> %1481 = fadd float %1480, 0.000000e+00 +> %1482 = bitcast i32 %52 to float +> %1483 = bitcast i32 %52 to float +> %1484 = fmul float %1482, %1483 +> %1485 = fadd float %1484, 0.000000e+00 +> %1486 = bitcast i32 %686 to float +> %1487 = bitcast i32 %686 to float +> %1488 = fmul float %1486, %1487 +> %1489 = fadd float %1485, %1488 +> %1490 = call float @llvm.sqrt.f32(float %1489) +> %1491 = fneg float %692 +> %1492 = fmul float %1490, %1491 +> %1493 = bitcast i32 %52 to float +> %1494 = fadd float %1493, %1492 +> %1495 = bitcast i32 %52 to float +> %1496 = bitcast i32 %52 to float +> %1497 = fmul float %1495, %1496 +1969,1970c1970,1971 +< %1499 = bitcast i32 %865 to float +< %1500 = bitcast i32 %865 to float +--- +> %1499 = bitcast i32 %686 to float +> %1500 = bitcast i32 %686 to float +1972,1974c1973,1975 +< %1502 = fadd float %1501, 0.000000e+00 +< %1503 = bitcast i32 %57 to float +< %1504 = bitcast i32 %57 to float +--- +> %1502 = fadd float %1498, %1501 +> %1503 = call float @llvm.sqrt.f32(float %1502) +> %1504 = fneg float %692 +1976,1983c1977,1984 +< %1506 = fadd float %1502, %1505 +< %1507 = call float @llvm.sqrt.f32(float %1506) +< %1508 = fneg float %710 +< %1509 = fmul float %1507, %1508 +< %1510 = bitcast i32 %865 to float +< %1511 = fadd float %1510, %1509 +< %1512 = bitcast i32 %865 to float +< %1513 = bitcast i32 %865 to float +--- +> %1506 = bitcast i32 %52 to float +> %1507 = fadd float %1506, %1505 +> %1508 = bitcast i32 %52 to float +> %1509 = bitcast i32 %52 to float +> %1510 = fmul float %1508, %1509 +> %1511 = fadd float %1510, 0.000000e+00 +> %1512 = bitcast i32 %686 to float +> %1513 = bitcast i32 %686 to float +1985,1987c1986,1988 +< %1515 = fadd float %1514, 0.000000e+00 +< %1516 = bitcast i32 %57 to float +< %1517 = bitcast i32 %57 to float +--- +> %1515 = fadd float %1511, %1514 +> %1516 = call float @llvm.sqrt.f32(float %1515) +> %1517 = fneg float %692 +1989,2059c1990,2060 +< %1519 = fadd float %1515, %1518 +< %1520 = call float @llvm.sqrt.f32(float %1519) +< %1521 = fneg float %710 +< %1522 = fmul float %1520, %1521 +< %1523 = bitcast i32 %865 to float +< %1524 = fadd float %1523, %1522 +< %1525 = bitcast i32 %865 to float +< %1526 = bitcast i32 %865 to float +< %1527 = fmul float %1525, %1526 +< %1528 = fadd float %1527, 0.000000e+00 +< %1529 = bitcast i32 %57 to float +< %1530 = bitcast i32 %57 to float +< %1531 = fmul float %1529, %1530 +< %1532 = fadd float %1528, %1531 +< %1533 = call float @llvm.sqrt.f32(float %1532) +< %1534 = fneg float %710 +< %1535 = fmul float %1533, %1534 +< %1536 = bitcast i32 %865 to float +< %1537 = fadd float %1536, %1535 +< %1538 = fmul float %1524, %1537 +< %1539 = fadd float %1538, 0.000000e+00 +< %1540 = bitcast i32 %865 to float +< %1541 = bitcast i32 %865 to float +< %1542 = fmul float %1540, %1541 +< %1543 = fadd float %1542, 0.000000e+00 +< %1544 = bitcast i32 %57 to float +< %1545 = bitcast i32 %57 to float +< %1546 = fmul float %1544, %1545 +< %1547 = fadd float %1543, %1546 +< %1548 = call float @llvm.sqrt.f32(float %1547) +< %1549 = fneg float %710 +< %1550 = fmul float %1548, %1549 +< %1551 = fmul float %1550, 0.000000e+00 +< %1552 = bitcast i32 %57 to float +< %1553 = fadd float %1552, %1551 +< %1554 = bitcast i32 %865 to float +< %1555 = bitcast i32 %865 to float +< %1556 = fmul float %1554, %1555 +< %1557 = fadd float %1556, 0.000000e+00 +< %1558 = bitcast i32 %57 to float +< %1559 = bitcast i32 %57 to float +< %1560 = fmul float %1558, %1559 +< %1561 = fadd float %1557, %1560 +< %1562 = call float @llvm.sqrt.f32(float %1561) +< %1563 = fneg float %710 +< %1564 = fmul float %1562, %1563 +< %1565 = fmul float %1564, 0.000000e+00 +< %1566 = bitcast i32 %57 to float +< %1567 = fadd float %1566, %1565 +< %1568 = fmul float %1553, %1567 +< %1569 = fadd float %1539, %1568 +< %1570 = call float @llvm.sqrt.f32(float %1569) +< %1571 = fadd float %1570, 0.000000e+00 +< %1572 = fdiv float %1511, %1571 +< %1573 = fmul float %1572, 2.000000e+00 +< %1574 = bitcast i32 %865 to float +< %1575 = bitcast i32 %865 to float +< %1576 = fmul float %1574, %1575 +< %1577 = fadd float %1576, 0.000000e+00 +< %1578 = bitcast i32 %57 to float +< %1579 = bitcast i32 %57 to float +< %1580 = fmul float %1578, %1579 +< %1581 = fadd float %1577, %1580 +< %1582 = call float @llvm.sqrt.f32(float %1581) +< %1583 = fneg float %710 +< %1584 = fmul float %1582, %1583 +< %1585 = fmul float %1584, 0.000000e+00 +< %1586 = bitcast i32 %57 to float +< %1587 = fadd float %1586, %1585 +< %1588 = bitcast i32 %865 to float +< %1589 = bitcast i32 %865 to float +--- +> %1519 = bitcast i32 %52 to float +> %1520 = fadd float %1519, %1518 +> %1521 = fmul float %1507, %1520 +> %1522 = fadd float %1521, 0.000000e+00 +> %1523 = bitcast i32 %52 to float +> %1524 = bitcast i32 %52 to float +> %1525 = fmul float %1523, %1524 +> %1526 = fadd float %1525, 0.000000e+00 +> %1527 = bitcast i32 %686 to float +> %1528 = bitcast i32 %686 to float +> %1529 = fmul float %1527, %1528 +> %1530 = fadd float %1526, %1529 +> %1531 = call float @llvm.sqrt.f32(float %1530) +> %1532 = fneg float %692 +> %1533 = fmul float %1531, %1532 +> %1534 = fmul float %1533, 0.000000e+00 +> %1535 = bitcast i32 %686 to float +> %1536 = fadd float %1535, %1534 +> %1537 = bitcast i32 %52 to float +> %1538 = bitcast i32 %52 to float +> %1539 = fmul float %1537, %1538 +> %1540 = fadd float %1539, 0.000000e+00 +> %1541 = bitcast i32 %686 to float +> %1542 = bitcast i32 %686 to float +> %1543 = fmul float %1541, %1542 +> %1544 = fadd float %1540, %1543 +> %1545 = call float @llvm.sqrt.f32(float %1544) +> %1546 = fneg float %692 +> %1547 = fmul float %1545, %1546 +> %1548 = fmul float %1547, 0.000000e+00 +> %1549 = bitcast i32 %686 to float +> %1550 = fadd float %1549, %1548 +> %1551 = fmul float %1536, %1550 +> %1552 = fadd float %1522, %1551 +> %1553 = call float @llvm.sqrt.f32(float %1552) +> %1554 = fadd float %1553, 0.000000e+00 +> %1555 = fdiv float %1494, %1554 +> %1556 = fmul float %1555, 2.000000e+00 +> %1557 = bitcast i32 %52 to float +> %1558 = bitcast i32 %52 to float +> %1559 = fmul float %1557, %1558 +> %1560 = fadd float %1559, 0.000000e+00 +> %1561 = bitcast i32 %686 to float +> %1562 = bitcast i32 %686 to float +> %1563 = fmul float %1561, %1562 +> %1564 = fadd float %1560, %1563 +> %1565 = call float @llvm.sqrt.f32(float %1564) +> %1566 = fneg float %692 +> %1567 = fmul float %1565, %1566 +> %1568 = fmul float %1567, 0.000000e+00 +> %1569 = bitcast i32 %686 to float +> %1570 = fadd float %1569, %1568 +> %1571 = bitcast i32 %52 to float +> %1572 = bitcast i32 %52 to float +> %1573 = fmul float %1571, %1572 +> %1574 = fadd float %1573, 0.000000e+00 +> %1575 = bitcast i32 %686 to float +> %1576 = bitcast i32 %686 to float +> %1577 = fmul float %1575, %1576 +> %1578 = fadd float %1574, %1577 +> %1579 = call float @llvm.sqrt.f32(float %1578) +> %1580 = fneg float %692 +> %1581 = fmul float %1579, %1580 +> %1582 = bitcast i32 %52 to float +> %1583 = fadd float %1582, %1581 +> %1584 = bitcast i32 %52 to float +> %1585 = bitcast i32 %52 to float +> %1586 = fmul float %1584, %1585 +> %1587 = fadd float %1586, 0.000000e+00 +> %1588 = bitcast i32 %686 to float +> %1589 = bitcast i32 %686 to float +2061,2063c2062,2064 +< %1591 = fadd float %1590, 0.000000e+00 +< %1592 = bitcast i32 %57 to float +< %1593 = bitcast i32 %57 to float +--- +> %1591 = fadd float %1587, %1590 +> %1592 = call float @llvm.sqrt.f32(float %1591) +> %1593 = fneg float %692 +2065,2149c2066,2150 +< %1595 = fadd float %1591, %1594 +< %1596 = call float @llvm.sqrt.f32(float %1595) +< %1597 = fneg float %710 +< %1598 = fmul float %1596, %1597 +< %1599 = bitcast i32 %865 to float +< %1600 = fadd float %1599, %1598 +< %1601 = bitcast i32 %865 to float +< %1602 = bitcast i32 %865 to float +< %1603 = fmul float %1601, %1602 +< %1604 = fadd float %1603, 0.000000e+00 +< %1605 = bitcast i32 %57 to float +< %1606 = bitcast i32 %57 to float +< %1607 = fmul float %1605, %1606 +< %1608 = fadd float %1604, %1607 +< %1609 = call float @llvm.sqrt.f32(float %1608) +< %1610 = fneg float %710 +< %1611 = fmul float %1609, %1610 +< %1612 = bitcast i32 %865 to float +< %1613 = fadd float %1612, %1611 +< %1614 = fmul float %1600, %1613 +< %1615 = fadd float %1614, 0.000000e+00 +< %1616 = bitcast i32 %865 to float +< %1617 = bitcast i32 %865 to float +< %1618 = fmul float %1616, %1617 +< %1619 = fadd float %1618, 0.000000e+00 +< %1620 = bitcast i32 %57 to float +< %1621 = bitcast i32 %57 to float +< %1622 = fmul float %1620, %1621 +< %1623 = fadd float %1619, %1622 +< %1624 = call float @llvm.sqrt.f32(float %1623) +< %1625 = fneg float %710 +< %1626 = fmul float %1624, %1625 +< %1627 = fmul float %1626, 0.000000e+00 +< %1628 = bitcast i32 %57 to float +< %1629 = fadd float %1628, %1627 +< %1630 = bitcast i32 %865 to float +< %1631 = bitcast i32 %865 to float +< %1632 = fmul float %1630, %1631 +< %1633 = fadd float %1632, 0.000000e+00 +< %1634 = bitcast i32 %57 to float +< %1635 = bitcast i32 %57 to float +< %1636 = fmul float %1634, %1635 +< %1637 = fadd float %1633, %1636 +< %1638 = call float @llvm.sqrt.f32(float %1637) +< %1639 = fneg float %710 +< %1640 = fmul float %1638, %1639 +< %1641 = fmul float %1640, 0.000000e+00 +< %1642 = bitcast i32 %57 to float +< %1643 = fadd float %1642, %1641 +< %1644 = fmul float %1629, %1643 +< %1645 = fadd float %1615, %1644 +< %1646 = call float @llvm.sqrt.f32(float %1645) +< %1647 = fadd float %1646, 0.000000e+00 +< %1648 = fdiv float %1587, %1647 +< %1649 = fmul float %1573, %1648 +< %1650 = fneg float %1649 +< %1651 = getelementptr float, float* %0, i32 0 +< %1652 = getelementptr inbounds float, float* %1651, i64 3 +< %1653 = load float, float* %1652, align 4 +< %1654 = fmul float %1650, %1653 +< %1655 = fadd float %1498, %1654 +< %1656 = insertelement <4 x float> zeroinitializer, float %1655, i32 0 +< %1657 = insertelement <4 x float> %1656, float 0.000000e+00, i32 1 +< %1658 = insertelement <4 x float> %1657, float 0.000000e+00, i32 2 +< %1659 = insertelement <4 x float> %1658, float 0.000000e+00, i32 3 +< %1660 = extractelement <4 x float> %1659, i32 0 +< store float %1660, float* %1181, align 4 +< %1661 = extractelement <4 x float> %1659, i32 1 +< store float %1661, float* %40, align 4 +< %1662 = bitcast i32 %865 to float +< %1663 = bitcast i32 %865 to float +< %1664 = fmul float %1662, %1663 +< %1665 = fadd float %1664, 0.000000e+00 +< %1666 = bitcast i32 %57 to float +< %1667 = bitcast i32 %57 to float +< %1668 = fmul float %1666, %1667 +< %1669 = fadd float %1665, %1668 +< %1670 = call float @llvm.sqrt.f32(float %1669) +< %1671 = fneg float %710 +< %1672 = fmul float %1670, %1671 +< %1673 = fmul float %1672, 0.000000e+00 +< %1674 = bitcast i32 %57 to float +< %1675 = fadd float %1674, %1673 +< %1676 = bitcast i32 %865 to float +< %1677 = bitcast i32 %865 to float +--- +> %1595 = bitcast i32 %52 to float +> %1596 = fadd float %1595, %1594 +> %1597 = fmul float %1583, %1596 +> %1598 = fadd float %1597, 0.000000e+00 +> %1599 = bitcast i32 %52 to float +> %1600 = bitcast i32 %52 to float +> %1601 = fmul float %1599, %1600 +> %1602 = fadd float %1601, 0.000000e+00 +> %1603 = bitcast i32 %686 to float +> %1604 = bitcast i32 %686 to float +> %1605 = fmul float %1603, %1604 +> %1606 = fadd float %1602, %1605 +> %1607 = call float @llvm.sqrt.f32(float %1606) +> %1608 = fneg float %692 +> %1609 = fmul float %1607, %1608 +> %1610 = fmul float %1609, 0.000000e+00 +> %1611 = bitcast i32 %686 to float +> %1612 = fadd float %1611, %1610 +> %1613 = bitcast i32 %52 to float +> %1614 = bitcast i32 %52 to float +> %1615 = fmul float %1613, %1614 +> %1616 = fadd float %1615, 0.000000e+00 +> %1617 = bitcast i32 %686 to float +> %1618 = bitcast i32 %686 to float +> %1619 = fmul float %1617, %1618 +> %1620 = fadd float %1616, %1619 +> %1621 = call float @llvm.sqrt.f32(float %1620) +> %1622 = fneg float %692 +> %1623 = fmul float %1621, %1622 +> %1624 = fmul float %1623, 0.000000e+00 +> %1625 = bitcast i32 %686 to float +> %1626 = fadd float %1625, %1624 +> %1627 = fmul float %1612, %1626 +> %1628 = fadd float %1598, %1627 +> %1629 = call float @llvm.sqrt.f32(float %1628) +> %1630 = fadd float %1629, 0.000000e+00 +> %1631 = fdiv float %1570, %1630 +> %1632 = fmul float %1556, %1631 +> %1633 = fneg float %1632 +> %1634 = load float, float* %44, align 4 +> %1635 = fmul float %1633, %1634 +> %1636 = fadd float %1481, %1635 +> %1637 = insertelement <4 x float> zeroinitializer, float %1636, i32 0 +> %1638 = insertelement <4 x float> %1637, float 0.000000e+00, i32 1 +> %1639 = insertelement <4 x float> %1638, float 0.000000e+00, i32 2 +> %1640 = insertelement <4 x float> %1639, float 0.000000e+00, i32 3 +> %1641 = extractelement <4 x float> %1640, i32 0 +> store float %1641, float* %1164, align 4 +> %1642 = extractelement <4 x float> %1640, i32 1 +> %1643 = getelementptr float, float* %2, i32 0 +> %1644 = getelementptr inbounds float, float* %1643, i64 2 +> store float %1642, float* %1644, align 4 +> %1645 = bitcast i32 %52 to float +> %1646 = bitcast i32 %52 to float +> %1647 = fmul float %1645, %1646 +> %1648 = fadd float %1647, 0.000000e+00 +> %1649 = bitcast i32 %686 to float +> %1650 = bitcast i32 %686 to float +> %1651 = fmul float %1649, %1650 +> %1652 = fadd float %1648, %1651 +> %1653 = call float @llvm.sqrt.f32(float %1652) +> %1654 = fneg float %692 +> %1655 = fmul float %1653, %1654 +> %1656 = fmul float %1655, 0.000000e+00 +> %1657 = bitcast i32 %686 to float +> %1658 = fadd float %1657, %1656 +> %1659 = bitcast i32 %52 to float +> %1660 = bitcast i32 %52 to float +> %1661 = fmul float %1659, %1660 +> %1662 = fadd float %1661, 0.000000e+00 +> %1663 = bitcast i32 %686 to float +> %1664 = bitcast i32 %686 to float +> %1665 = fmul float %1663, %1664 +> %1666 = fadd float %1662, %1665 +> %1667 = call float @llvm.sqrt.f32(float %1666) +> %1668 = fneg float %692 +> %1669 = fmul float %1667, %1668 +> %1670 = bitcast i32 %52 to float +> %1671 = fadd float %1670, %1669 +> %1672 = bitcast i32 %52 to float +> %1673 = bitcast i32 %52 to float +> %1674 = fmul float %1672, %1673 +> %1675 = fadd float %1674, 0.000000e+00 +> %1676 = bitcast i32 %686 to float +> %1677 = bitcast i32 %686 to float +2151,2153c2152,2154 +< %1679 = fadd float %1678, 0.000000e+00 +< %1680 = bitcast i32 %57 to float +< %1681 = bitcast i32 %57 to float +--- +> %1679 = fadd float %1675, %1678 +> %1680 = call float @llvm.sqrt.f32(float %1679) +> %1681 = fneg float %692 +2155,2211c2156,2212 +< %1683 = fadd float %1679, %1682 +< %1684 = call float @llvm.sqrt.f32(float %1683) +< %1685 = fneg float %710 +< %1686 = fmul float %1684, %1685 +< %1687 = bitcast i32 %865 to float +< %1688 = fadd float %1687, %1686 +< %1689 = bitcast i32 %865 to float +< %1690 = bitcast i32 %865 to float +< %1691 = fmul float %1689, %1690 +< %1692 = fadd float %1691, 0.000000e+00 +< %1693 = bitcast i32 %57 to float +< %1694 = bitcast i32 %57 to float +< %1695 = fmul float %1693, %1694 +< %1696 = fadd float %1692, %1695 +< %1697 = call float @llvm.sqrt.f32(float %1696) +< %1698 = fneg float %710 +< %1699 = fmul float %1697, %1698 +< %1700 = bitcast i32 %865 to float +< %1701 = fadd float %1700, %1699 +< %1702 = fmul float %1688, %1701 +< %1703 = fadd float %1702, 0.000000e+00 +< %1704 = bitcast i32 %865 to float +< %1705 = bitcast i32 %865 to float +< %1706 = fmul float %1704, %1705 +< %1707 = fadd float %1706, 0.000000e+00 +< %1708 = bitcast i32 %57 to float +< %1709 = bitcast i32 %57 to float +< %1710 = fmul float %1708, %1709 +< %1711 = fadd float %1707, %1710 +< %1712 = call float @llvm.sqrt.f32(float %1711) +< %1713 = fneg float %710 +< %1714 = fmul float %1712, %1713 +< %1715 = fmul float %1714, 0.000000e+00 +< %1716 = bitcast i32 %57 to float +< %1717 = fadd float %1716, %1715 +< %1718 = bitcast i32 %865 to float +< %1719 = bitcast i32 %865 to float +< %1720 = fmul float %1718, %1719 +< %1721 = fadd float %1720, 0.000000e+00 +< %1722 = bitcast i32 %57 to float +< %1723 = bitcast i32 %57 to float +< %1724 = fmul float %1722, %1723 +< %1725 = fadd float %1721, %1724 +< %1726 = call float @llvm.sqrt.f32(float %1725) +< %1727 = fneg float %710 +< %1728 = fmul float %1726, %1727 +< %1729 = fmul float %1728, 0.000000e+00 +< %1730 = bitcast i32 %57 to float +< %1731 = fadd float %1730, %1729 +< %1732 = fmul float %1717, %1731 +< %1733 = fadd float %1703, %1732 +< %1734 = call float @llvm.sqrt.f32(float %1733) +< %1735 = fadd float %1734, 0.000000e+00 +< %1736 = fdiv float %1675, %1735 +< %1737 = fmul float %1736, 2.000000e+00 +< %1738 = bitcast i32 %865 to float +< %1739 = bitcast i32 %865 to float +--- +> %1683 = bitcast i32 %52 to float +> %1684 = fadd float %1683, %1682 +> %1685 = fmul float %1671, %1684 +> %1686 = fadd float %1685, 0.000000e+00 +> %1687 = bitcast i32 %52 to float +> %1688 = bitcast i32 %52 to float +> %1689 = fmul float %1687, %1688 +> %1690 = fadd float %1689, 0.000000e+00 +> %1691 = bitcast i32 %686 to float +> %1692 = bitcast i32 %686 to float +> %1693 = fmul float %1691, %1692 +> %1694 = fadd float %1690, %1693 +> %1695 = call float @llvm.sqrt.f32(float %1694) +> %1696 = fneg float %692 +> %1697 = fmul float %1695, %1696 +> %1698 = fmul float %1697, 0.000000e+00 +> %1699 = bitcast i32 %686 to float +> %1700 = fadd float %1699, %1698 +> %1701 = bitcast i32 %52 to float +> %1702 = bitcast i32 %52 to float +> %1703 = fmul float %1701, %1702 +> %1704 = fadd float %1703, 0.000000e+00 +> %1705 = bitcast i32 %686 to float +> %1706 = bitcast i32 %686 to float +> %1707 = fmul float %1705, %1706 +> %1708 = fadd float %1704, %1707 +> %1709 = call float @llvm.sqrt.f32(float %1708) +> %1710 = fneg float %692 +> %1711 = fmul float %1709, %1710 +> %1712 = fmul float %1711, 0.000000e+00 +> %1713 = bitcast i32 %686 to float +> %1714 = fadd float %1713, %1712 +> %1715 = fmul float %1700, %1714 +> %1716 = fadd float %1686, %1715 +> %1717 = call float @llvm.sqrt.f32(float %1716) +> %1718 = fadd float %1717, 0.000000e+00 +> %1719 = fdiv float %1658, %1718 +> %1720 = fmul float %1719, 2.000000e+00 +> %1721 = bitcast i32 %52 to float +> %1722 = bitcast i32 %52 to float +> %1723 = fmul float %1721, %1722 +> %1724 = fadd float %1723, 0.000000e+00 +> %1725 = bitcast i32 %686 to float +> %1726 = bitcast i32 %686 to float +> %1727 = fmul float %1725, %1726 +> %1728 = fadd float %1724, %1727 +> %1729 = call float @llvm.sqrt.f32(float %1728) +> %1730 = fneg float %692 +> %1731 = fmul float %1729, %1730 +> %1732 = bitcast i32 %52 to float +> %1733 = fadd float %1732, %1731 +> %1734 = bitcast i32 %52 to float +> %1735 = bitcast i32 %52 to float +> %1736 = fmul float %1734, %1735 +> %1737 = fadd float %1736, 0.000000e+00 +> %1738 = bitcast i32 %686 to float +> %1739 = bitcast i32 %686 to float +2213,2215c2214,2216 +< %1741 = fadd float %1740, 0.000000e+00 +< %1742 = bitcast i32 %57 to float +< %1743 = bitcast i32 %57 to float +--- +> %1741 = fadd float %1737, %1740 +> %1742 = call float @llvm.sqrt.f32(float %1741) +> %1743 = fneg float %692 +2217,2224c2218,2225 +< %1745 = fadd float %1741, %1744 +< %1746 = call float @llvm.sqrt.f32(float %1745) +< %1747 = fneg float %710 +< %1748 = fmul float %1746, %1747 +< %1749 = bitcast i32 %865 to float +< %1750 = fadd float %1749, %1748 +< %1751 = bitcast i32 %865 to float +< %1752 = bitcast i32 %865 to float +--- +> %1745 = bitcast i32 %52 to float +> %1746 = fadd float %1745, %1744 +> %1747 = bitcast i32 %52 to float +> %1748 = bitcast i32 %52 to float +> %1749 = fmul float %1747, %1748 +> %1750 = fadd float %1749, 0.000000e+00 +> %1751 = bitcast i32 %686 to float +> %1752 = bitcast i32 %686 to float +2226,2228c2227,2229 +< %1754 = fadd float %1753, 0.000000e+00 +< %1755 = bitcast i32 %57 to float +< %1756 = bitcast i32 %57 to float +--- +> %1754 = fadd float %1750, %1753 +> %1755 = call float @llvm.sqrt.f32(float %1754) +> %1756 = fneg float %692 +2230,2314c2231,2315 +< %1758 = fadd float %1754, %1757 +< %1759 = call float @llvm.sqrt.f32(float %1758) +< %1760 = fneg float %710 +< %1761 = fmul float %1759, %1760 +< %1762 = bitcast i32 %865 to float +< %1763 = fadd float %1762, %1761 +< %1764 = bitcast i32 %865 to float +< %1765 = bitcast i32 %865 to float +< %1766 = fmul float %1764, %1765 +< %1767 = fadd float %1766, 0.000000e+00 +< %1768 = bitcast i32 %57 to float +< %1769 = bitcast i32 %57 to float +< %1770 = fmul float %1768, %1769 +< %1771 = fadd float %1767, %1770 +< %1772 = call float @llvm.sqrt.f32(float %1771) +< %1773 = fneg float %710 +< %1774 = fmul float %1772, %1773 +< %1775 = bitcast i32 %865 to float +< %1776 = fadd float %1775, %1774 +< %1777 = fmul float %1763, %1776 +< %1778 = fadd float %1777, 0.000000e+00 +< %1779 = bitcast i32 %865 to float +< %1780 = bitcast i32 %865 to float +< %1781 = fmul float %1779, %1780 +< %1782 = fadd float %1781, 0.000000e+00 +< %1783 = bitcast i32 %57 to float +< %1784 = bitcast i32 %57 to float +< %1785 = fmul float %1783, %1784 +< %1786 = fadd float %1782, %1785 +< %1787 = call float @llvm.sqrt.f32(float %1786) +< %1788 = fneg float %710 +< %1789 = fmul float %1787, %1788 +< %1790 = fmul float %1789, 0.000000e+00 +< %1791 = bitcast i32 %57 to float +< %1792 = fadd float %1791, %1790 +< %1793 = bitcast i32 %865 to float +< %1794 = bitcast i32 %865 to float +< %1795 = fmul float %1793, %1794 +< %1796 = fadd float %1795, 0.000000e+00 +< %1797 = bitcast i32 %57 to float +< %1798 = bitcast i32 %57 to float +< %1799 = fmul float %1797, %1798 +< %1800 = fadd float %1796, %1799 +< %1801 = call float @llvm.sqrt.f32(float %1800) +< %1802 = fneg float %710 +< %1803 = fmul float %1801, %1802 +< %1804 = fmul float %1803, 0.000000e+00 +< %1805 = bitcast i32 %57 to float +< %1806 = fadd float %1805, %1804 +< %1807 = fmul float %1792, %1806 +< %1808 = fadd float %1778, %1807 +< %1809 = call float @llvm.sqrt.f32(float %1808) +< %1810 = fadd float %1809, 0.000000e+00 +< %1811 = fdiv float %1750, %1810 +< %1812 = fmul float %1737, %1811 +< %1813 = fneg float %1812 +< %1814 = insertelement <4 x float> zeroinitializer, float %1813, i32 0 +< %1815 = insertelement <4 x float> %1814, float 0.000000e+00, i32 1 +< %1816 = insertelement <4 x float> %1815, float 0.000000e+00, i32 2 +< %1817 = insertelement <4 x float> %1816, float 0.000000e+00, i32 3 +< %1818 = getelementptr float, float* %0, i32 0 +< %1819 = load float, float* %1818, align 4 +< %1820 = insertelement <4 x float> zeroinitializer, float %1819, i32 0 +< %1821 = insertelement <4 x float> %1820, float 0.000000e+00, i32 1 +< %1822 = insertelement <4 x float> %1821, float 0.000000e+00, i32 2 +< %1823 = insertelement <4 x float> %1822, float 0.000000e+00, i32 3 +< %1824 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1817, <4 x float> %1823, <4 x float> zeroinitializer) +< %1825 = extractelement <4 x float> %1824, i32 0 +< store float %1825, float* %40, align 4 +< %1826 = bitcast i32 %865 to float +< %1827 = bitcast i32 %865 to float +< %1828 = fmul float %1826, %1827 +< %1829 = fadd float %1828, 0.000000e+00 +< %1830 = bitcast i32 %57 to float +< %1831 = bitcast i32 %57 to float +< %1832 = fmul float %1830, %1831 +< %1833 = fadd float %1829, %1832 +< %1834 = call float @llvm.sqrt.f32(float %1833) +< %1835 = fneg float %710 +< %1836 = fmul float %1834, %1835 +< %1837 = fmul float %1836, 0.000000e+00 +< %1838 = bitcast i32 %57 to float +< %1839 = fadd float %1838, %1837 +< %1840 = bitcast i32 %865 to float +< %1841 = bitcast i32 %865 to float +--- +> %1758 = bitcast i32 %52 to float +> %1759 = fadd float %1758, %1757 +> %1760 = fmul float %1746, %1759 +> %1761 = fadd float %1760, 0.000000e+00 +> %1762 = bitcast i32 %52 to float +> %1763 = bitcast i32 %52 to float +> %1764 = fmul float %1762, %1763 +> %1765 = fadd float %1764, 0.000000e+00 +> %1766 = bitcast i32 %686 to float +> %1767 = bitcast i32 %686 to float +> %1768 = fmul float %1766, %1767 +> %1769 = fadd float %1765, %1768 +> %1770 = call float @llvm.sqrt.f32(float %1769) +> %1771 = fneg float %692 +> %1772 = fmul float %1770, %1771 +> %1773 = fmul float %1772, 0.000000e+00 +> %1774 = bitcast i32 %686 to float +> %1775 = fadd float %1774, %1773 +> %1776 = bitcast i32 %52 to float +> %1777 = bitcast i32 %52 to float +> %1778 = fmul float %1776, %1777 +> %1779 = fadd float %1778, 0.000000e+00 +> %1780 = bitcast i32 %686 to float +> %1781 = bitcast i32 %686 to float +> %1782 = fmul float %1780, %1781 +> %1783 = fadd float %1779, %1782 +> %1784 = call float @llvm.sqrt.f32(float %1783) +> %1785 = fneg float %692 +> %1786 = fmul float %1784, %1785 +> %1787 = fmul float %1786, 0.000000e+00 +> %1788 = bitcast i32 %686 to float +> %1789 = fadd float %1788, %1787 +> %1790 = fmul float %1775, %1789 +> %1791 = fadd float %1761, %1790 +> %1792 = call float @llvm.sqrt.f32(float %1791) +> %1793 = fadd float %1792, 0.000000e+00 +> %1794 = fdiv float %1733, %1793 +> %1795 = fmul float %1720, %1794 +> %1796 = fneg float %1795 +> %1797 = insertelement <4 x float> zeroinitializer, float %1796, i32 0 +> %1798 = insertelement <4 x float> %1797, float 0.000000e+00, i32 1 +> %1799 = insertelement <4 x float> %1798, float 0.000000e+00, i32 2 +> %1800 = insertelement <4 x float> %1799, float 0.000000e+00, i32 3 +> %1801 = getelementptr float, float* %0, i32 0 +> %1802 = load float, float* %1801, align 4 +> %1803 = insertelement <4 x float> zeroinitializer, float %1802, i32 0 +> %1804 = insertelement <4 x float> %1803, float 0.000000e+00, i32 1 +> %1805 = insertelement <4 x float> %1804, float 0.000000e+00, i32 2 +> %1806 = insertelement <4 x float> %1805, float 0.000000e+00, i32 3 +> %1807 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1800, <4 x float> %1806, <4 x float> zeroinitializer) +> %1808 = extractelement <4 x float> %1807, i32 0 +> store float %1808, float* %1644, align 4 +> %1809 = bitcast i32 %52 to float +> %1810 = bitcast i32 %52 to float +> %1811 = fmul float %1809, %1810 +> %1812 = fadd float %1811, 0.000000e+00 +> %1813 = bitcast i32 %686 to float +> %1814 = bitcast i32 %686 to float +> %1815 = fmul float %1813, %1814 +> %1816 = fadd float %1812, %1815 +> %1817 = call float @llvm.sqrt.f32(float %1816) +> %1818 = fneg float %692 +> %1819 = fmul float %1817, %1818 +> %1820 = fmul float %1819, 0.000000e+00 +> %1821 = bitcast i32 %686 to float +> %1822 = fadd float %1821, %1820 +> %1823 = bitcast i32 %52 to float +> %1824 = bitcast i32 %52 to float +> %1825 = fmul float %1823, %1824 +> %1826 = fadd float %1825, 0.000000e+00 +> %1827 = bitcast i32 %686 to float +> %1828 = bitcast i32 %686 to float +> %1829 = fmul float %1827, %1828 +> %1830 = fadd float %1826, %1829 +> %1831 = call float @llvm.sqrt.f32(float %1830) +> %1832 = fneg float %692 +> %1833 = fmul float %1831, %1832 +> %1834 = bitcast i32 %52 to float +> %1835 = fadd float %1834, %1833 +> %1836 = bitcast i32 %52 to float +> %1837 = bitcast i32 %52 to float +> %1838 = fmul float %1836, %1837 +> %1839 = fadd float %1838, 0.000000e+00 +> %1840 = bitcast i32 %686 to float +> %1841 = bitcast i32 %686 to float +2316,2318c2317,2319 +< %1843 = fadd float %1842, 0.000000e+00 +< %1844 = bitcast i32 %57 to float +< %1845 = bitcast i32 %57 to float +--- +> %1843 = fadd float %1839, %1842 +> %1844 = call float @llvm.sqrt.f32(float %1843) +> %1845 = fneg float %692 +2320,2376c2321,2377 +< %1847 = fadd float %1843, %1846 +< %1848 = call float @llvm.sqrt.f32(float %1847) +< %1849 = fneg float %710 +< %1850 = fmul float %1848, %1849 +< %1851 = bitcast i32 %865 to float +< %1852 = fadd float %1851, %1850 +< %1853 = bitcast i32 %865 to float +< %1854 = bitcast i32 %865 to float +< %1855 = fmul float %1853, %1854 +< %1856 = fadd float %1855, 0.000000e+00 +< %1857 = bitcast i32 %57 to float +< %1858 = bitcast i32 %57 to float +< %1859 = fmul float %1857, %1858 +< %1860 = fadd float %1856, %1859 +< %1861 = call float @llvm.sqrt.f32(float %1860) +< %1862 = fneg float %710 +< %1863 = fmul float %1861, %1862 +< %1864 = bitcast i32 %865 to float +< %1865 = fadd float %1864, %1863 +< %1866 = fmul float %1852, %1865 +< %1867 = fadd float %1866, 0.000000e+00 +< %1868 = bitcast i32 %865 to float +< %1869 = bitcast i32 %865 to float +< %1870 = fmul float %1868, %1869 +< %1871 = fadd float %1870, 0.000000e+00 +< %1872 = bitcast i32 %57 to float +< %1873 = bitcast i32 %57 to float +< %1874 = fmul float %1872, %1873 +< %1875 = fadd float %1871, %1874 +< %1876 = call float @llvm.sqrt.f32(float %1875) +< %1877 = fneg float %710 +< %1878 = fmul float %1876, %1877 +< %1879 = fmul float %1878, 0.000000e+00 +< %1880 = bitcast i32 %57 to float +< %1881 = fadd float %1880, %1879 +< %1882 = bitcast i32 %865 to float +< %1883 = bitcast i32 %865 to float +< %1884 = fmul float %1882, %1883 +< %1885 = fadd float %1884, 0.000000e+00 +< %1886 = bitcast i32 %57 to float +< %1887 = bitcast i32 %57 to float +< %1888 = fmul float %1886, %1887 +< %1889 = fadd float %1885, %1888 +< %1890 = call float @llvm.sqrt.f32(float %1889) +< %1891 = fneg float %710 +< %1892 = fmul float %1890, %1891 +< %1893 = fmul float %1892, 0.000000e+00 +< %1894 = bitcast i32 %57 to float +< %1895 = fadd float %1894, %1893 +< %1896 = fmul float %1881, %1895 +< %1897 = fadd float %1867, %1896 +< %1898 = call float @llvm.sqrt.f32(float %1897) +< %1899 = fadd float %1898, 0.000000e+00 +< %1900 = fdiv float %1839, %1899 +< %1901 = fmul float %1900, 2.000000e+00 +< %1902 = bitcast i32 %865 to float +< %1903 = bitcast i32 %865 to float +--- +> %1847 = bitcast i32 %52 to float +> %1848 = fadd float %1847, %1846 +> %1849 = fmul float %1835, %1848 +> %1850 = fadd float %1849, 0.000000e+00 +> %1851 = bitcast i32 %52 to float +> %1852 = bitcast i32 %52 to float +> %1853 = fmul float %1851, %1852 +> %1854 = fadd float %1853, 0.000000e+00 +> %1855 = bitcast i32 %686 to float +> %1856 = bitcast i32 %686 to float +> %1857 = fmul float %1855, %1856 +> %1858 = fadd float %1854, %1857 +> %1859 = call float @llvm.sqrt.f32(float %1858) +> %1860 = fneg float %692 +> %1861 = fmul float %1859, %1860 +> %1862 = fmul float %1861, 0.000000e+00 +> %1863 = bitcast i32 %686 to float +> %1864 = fadd float %1863, %1862 +> %1865 = bitcast i32 %52 to float +> %1866 = bitcast i32 %52 to float +> %1867 = fmul float %1865, %1866 +> %1868 = fadd float %1867, 0.000000e+00 +> %1869 = bitcast i32 %686 to float +> %1870 = bitcast i32 %686 to float +> %1871 = fmul float %1869, %1870 +> %1872 = fadd float %1868, %1871 +> %1873 = call float @llvm.sqrt.f32(float %1872) +> %1874 = fneg float %692 +> %1875 = fmul float %1873, %1874 +> %1876 = fmul float %1875, 0.000000e+00 +> %1877 = bitcast i32 %686 to float +> %1878 = fadd float %1877, %1876 +> %1879 = fmul float %1864, %1878 +> %1880 = fadd float %1850, %1879 +> %1881 = call float @llvm.sqrt.f32(float %1880) +> %1882 = fadd float %1881, 0.000000e+00 +> %1883 = fdiv float %1822, %1882 +> %1884 = fmul float %1883, 2.000000e+00 +> %1885 = bitcast i32 %52 to float +> %1886 = bitcast i32 %52 to float +> %1887 = fmul float %1885, %1886 +> %1888 = fadd float %1887, 0.000000e+00 +> %1889 = bitcast i32 %686 to float +> %1890 = bitcast i32 %686 to float +> %1891 = fmul float %1889, %1890 +> %1892 = fadd float %1888, %1891 +> %1893 = call float @llvm.sqrt.f32(float %1892) +> %1894 = fneg float %692 +> %1895 = fmul float %1893, %1894 +> %1896 = bitcast i32 %52 to float +> %1897 = fadd float %1896, %1895 +> %1898 = bitcast i32 %52 to float +> %1899 = bitcast i32 %52 to float +> %1900 = fmul float %1898, %1899 +> %1901 = fadd float %1900, 0.000000e+00 +> %1902 = bitcast i32 %686 to float +> %1903 = bitcast i32 %686 to float +2378,2380c2379,2381 +< %1905 = fadd float %1904, 0.000000e+00 +< %1906 = bitcast i32 %57 to float +< %1907 = bitcast i32 %57 to float +--- +> %1905 = fadd float %1901, %1904 +> %1906 = call float @llvm.sqrt.f32(float %1905) +> %1907 = fneg float %692 +2382,2389c2383,2390 +< %1909 = fadd float %1905, %1908 +< %1910 = call float @llvm.sqrt.f32(float %1909) +< %1911 = fneg float %710 +< %1912 = fmul float %1910, %1911 +< %1913 = bitcast i32 %865 to float +< %1914 = fadd float %1913, %1912 +< %1915 = bitcast i32 %865 to float +< %1916 = bitcast i32 %865 to float +--- +> %1909 = bitcast i32 %52 to float +> %1910 = fadd float %1909, %1908 +> %1911 = bitcast i32 %52 to float +> %1912 = bitcast i32 %52 to float +> %1913 = fmul float %1911, %1912 +> %1914 = fadd float %1913, 0.000000e+00 +> %1915 = bitcast i32 %686 to float +> %1916 = bitcast i32 %686 to float +2391,2393c2392,2394 +< %1918 = fadd float %1917, 0.000000e+00 +< %1919 = bitcast i32 %57 to float +< %1920 = bitcast i32 %57 to float +--- +> %1918 = fadd float %1914, %1917 +> %1919 = call float @llvm.sqrt.f32(float %1918) +> %1920 = fneg float %692 +2395,2468c2396,2469 +< %1922 = fadd float %1918, %1921 +< %1923 = call float @llvm.sqrt.f32(float %1922) +< %1924 = fneg float %710 +< %1925 = fmul float %1923, %1924 +< %1926 = bitcast i32 %865 to float +< %1927 = fadd float %1926, %1925 +< %1928 = bitcast i32 %865 to float +< %1929 = bitcast i32 %865 to float +< %1930 = fmul float %1928, %1929 +< %1931 = fadd float %1930, 0.000000e+00 +< %1932 = bitcast i32 %57 to float +< %1933 = bitcast i32 %57 to float +< %1934 = fmul float %1932, %1933 +< %1935 = fadd float %1931, %1934 +< %1936 = call float @llvm.sqrt.f32(float %1935) +< %1937 = fneg float %710 +< %1938 = fmul float %1936, %1937 +< %1939 = bitcast i32 %865 to float +< %1940 = fadd float %1939, %1938 +< %1941 = fmul float %1927, %1940 +< %1942 = fadd float %1941, 0.000000e+00 +< %1943 = bitcast i32 %865 to float +< %1944 = bitcast i32 %865 to float +< %1945 = fmul float %1943, %1944 +< %1946 = fadd float %1945, 0.000000e+00 +< %1947 = bitcast i32 %57 to float +< %1948 = bitcast i32 %57 to float +< %1949 = fmul float %1947, %1948 +< %1950 = fadd float %1946, %1949 +< %1951 = call float @llvm.sqrt.f32(float %1950) +< %1952 = fneg float %710 +< %1953 = fmul float %1951, %1952 +< %1954 = fmul float %1953, 0.000000e+00 +< %1955 = bitcast i32 %57 to float +< %1956 = fadd float %1955, %1954 +< %1957 = bitcast i32 %865 to float +< %1958 = bitcast i32 %865 to float +< %1959 = fmul float %1957, %1958 +< %1960 = fadd float %1959, 0.000000e+00 +< %1961 = bitcast i32 %57 to float +< %1962 = bitcast i32 %57 to float +< %1963 = fmul float %1961, %1962 +< %1964 = fadd float %1960, %1963 +< %1965 = call float @llvm.sqrt.f32(float %1964) +< %1966 = fneg float %710 +< %1967 = fmul float %1965, %1966 +< %1968 = fmul float %1967, 0.000000e+00 +< %1969 = bitcast i32 %57 to float +< %1970 = fadd float %1969, %1968 +< %1971 = fmul float %1956, %1970 +< %1972 = fadd float %1942, %1971 +< %1973 = call float @llvm.sqrt.f32(float %1972) +< %1974 = fadd float %1973, 0.000000e+00 +< %1975 = fdiv float %1914, %1974 +< %1976 = fmul float %1901, %1975 +< %1977 = fneg float %1976 +< %1978 = fmul float %1977, %1819 +< %1979 = fadd float %1978, 0.000000e+00 +< %1980 = bitcast i32 %865 to float +< %1981 = bitcast i32 %865 to float +< %1982 = fmul float %1980, %1981 +< %1983 = fadd float %1982, 0.000000e+00 +< %1984 = bitcast i32 %57 to float +< %1985 = bitcast i32 %57 to float +< %1986 = fmul float %1984, %1985 +< %1987 = fadd float %1983, %1986 +< %1988 = call float @llvm.sqrt.f32(float %1987) +< %1989 = fneg float %710 +< %1990 = fmul float %1988, %1989 +< %1991 = fmul float %1990, 0.000000e+00 +< %1992 = bitcast i32 %57 to float +< %1993 = fadd float %1992, %1991 +< %1994 = bitcast i32 %865 to float +< %1995 = bitcast i32 %865 to float +--- +> %1922 = bitcast i32 %52 to float +> %1923 = fadd float %1922, %1921 +> %1924 = fmul float %1910, %1923 +> %1925 = fadd float %1924, 0.000000e+00 +> %1926 = bitcast i32 %52 to float +> %1927 = bitcast i32 %52 to float +> %1928 = fmul float %1926, %1927 +> %1929 = fadd float %1928, 0.000000e+00 +> %1930 = bitcast i32 %686 to float +> %1931 = bitcast i32 %686 to float +> %1932 = fmul float %1930, %1931 +> %1933 = fadd float %1929, %1932 +> %1934 = call float @llvm.sqrt.f32(float %1933) +> %1935 = fneg float %692 +> %1936 = fmul float %1934, %1935 +> %1937 = fmul float %1936, 0.000000e+00 +> %1938 = bitcast i32 %686 to float +> %1939 = fadd float %1938, %1937 +> %1940 = bitcast i32 %52 to float +> %1941 = bitcast i32 %52 to float +> %1942 = fmul float %1940, %1941 +> %1943 = fadd float %1942, 0.000000e+00 +> %1944 = bitcast i32 %686 to float +> %1945 = bitcast i32 %686 to float +> %1946 = fmul float %1944, %1945 +> %1947 = fadd float %1943, %1946 +> %1948 = call float @llvm.sqrt.f32(float %1947) +> %1949 = fneg float %692 +> %1950 = fmul float %1948, %1949 +> %1951 = fmul float %1950, 0.000000e+00 +> %1952 = bitcast i32 %686 to float +> %1953 = fadd float %1952, %1951 +> %1954 = fmul float %1939, %1953 +> %1955 = fadd float %1925, %1954 +> %1956 = call float @llvm.sqrt.f32(float %1955) +> %1957 = fadd float %1956, 0.000000e+00 +> %1958 = fdiv float %1897, %1957 +> %1959 = fmul float %1884, %1958 +> %1960 = fneg float %1959 +> %1961 = fmul float %1960, %1802 +> %1962 = fadd float %1961, 0.000000e+00 +> %1963 = bitcast i32 %52 to float +> %1964 = bitcast i32 %52 to float +> %1965 = fmul float %1963, %1964 +> %1966 = fadd float %1965, 0.000000e+00 +> %1967 = bitcast i32 %686 to float +> %1968 = bitcast i32 %686 to float +> %1969 = fmul float %1967, %1968 +> %1970 = fadd float %1966, %1969 +> %1971 = call float @llvm.sqrt.f32(float %1970) +> %1972 = fneg float %692 +> %1973 = fmul float %1971, %1972 +> %1974 = fmul float %1973, 0.000000e+00 +> %1975 = bitcast i32 %686 to float +> %1976 = fadd float %1975, %1974 +> %1977 = bitcast i32 %52 to float +> %1978 = bitcast i32 %52 to float +> %1979 = fmul float %1977, %1978 +> %1980 = fadd float %1979, 0.000000e+00 +> %1981 = bitcast i32 %686 to float +> %1982 = bitcast i32 %686 to float +> %1983 = fmul float %1981, %1982 +> %1984 = fadd float %1980, %1983 +> %1985 = call float @llvm.sqrt.f32(float %1984) +> %1986 = fneg float %692 +> %1987 = fmul float %1985, %1986 +> %1988 = bitcast i32 %52 to float +> %1989 = fadd float %1988, %1987 +> %1990 = bitcast i32 %52 to float +> %1991 = bitcast i32 %52 to float +> %1992 = fmul float %1990, %1991 +> %1993 = fadd float %1992, 0.000000e+00 +> %1994 = bitcast i32 %686 to float +> %1995 = bitcast i32 %686 to float +2470,2472c2471,2473 +< %1997 = fadd float %1996, 0.000000e+00 +< %1998 = bitcast i32 %57 to float +< %1999 = bitcast i32 %57 to float +--- +> %1997 = fadd float %1993, %1996 +> %1998 = call float @llvm.sqrt.f32(float %1997) +> %1999 = fneg float %692 +2474,2544c2475,2545 +< %2001 = fadd float %1997, %2000 +< %2002 = call float @llvm.sqrt.f32(float %2001) +< %2003 = fneg float %710 +< %2004 = fmul float %2002, %2003 +< %2005 = bitcast i32 %865 to float +< %2006 = fadd float %2005, %2004 +< %2007 = bitcast i32 %865 to float +< %2008 = bitcast i32 %865 to float +< %2009 = fmul float %2007, %2008 +< %2010 = fadd float %2009, 0.000000e+00 +< %2011 = bitcast i32 %57 to float +< %2012 = bitcast i32 %57 to float +< %2013 = fmul float %2011, %2012 +< %2014 = fadd float %2010, %2013 +< %2015 = call float @llvm.sqrt.f32(float %2014) +< %2016 = fneg float %710 +< %2017 = fmul float %2015, %2016 +< %2018 = bitcast i32 %865 to float +< %2019 = fadd float %2018, %2017 +< %2020 = fmul float %2006, %2019 +< %2021 = fadd float %2020, 0.000000e+00 +< %2022 = bitcast i32 %865 to float +< %2023 = bitcast i32 %865 to float +< %2024 = fmul float %2022, %2023 +< %2025 = fadd float %2024, 0.000000e+00 +< %2026 = bitcast i32 %57 to float +< %2027 = bitcast i32 %57 to float +< %2028 = fmul float %2026, %2027 +< %2029 = fadd float %2025, %2028 +< %2030 = call float @llvm.sqrt.f32(float %2029) +< %2031 = fneg float %710 +< %2032 = fmul float %2030, %2031 +< %2033 = fmul float %2032, 0.000000e+00 +< %2034 = bitcast i32 %57 to float +< %2035 = fadd float %2034, %2033 +< %2036 = bitcast i32 %865 to float +< %2037 = bitcast i32 %865 to float +< %2038 = fmul float %2036, %2037 +< %2039 = fadd float %2038, 0.000000e+00 +< %2040 = bitcast i32 %57 to float +< %2041 = bitcast i32 %57 to float +< %2042 = fmul float %2040, %2041 +< %2043 = fadd float %2039, %2042 +< %2044 = call float @llvm.sqrt.f32(float %2043) +< %2045 = fneg float %710 +< %2046 = fmul float %2044, %2045 +< %2047 = fmul float %2046, 0.000000e+00 +< %2048 = bitcast i32 %57 to float +< %2049 = fadd float %2048, %2047 +< %2050 = fmul float %2035, %2049 +< %2051 = fadd float %2021, %2050 +< %2052 = call float @llvm.sqrt.f32(float %2051) +< %2053 = fadd float %2052, 0.000000e+00 +< %2054 = fdiv float %1993, %2053 +< %2055 = fmul float %2054, 2.000000e+00 +< %2056 = bitcast i32 %865 to float +< %2057 = bitcast i32 %865 to float +< %2058 = fmul float %2056, %2057 +< %2059 = fadd float %2058, 0.000000e+00 +< %2060 = bitcast i32 %57 to float +< %2061 = bitcast i32 %57 to float +< %2062 = fmul float %2060, %2061 +< %2063 = fadd float %2059, %2062 +< %2064 = call float @llvm.sqrt.f32(float %2063) +< %2065 = fneg float %710 +< %2066 = fmul float %2064, %2065 +< %2067 = fmul float %2066, 0.000000e+00 +< %2068 = bitcast i32 %57 to float +< %2069 = fadd float %2068, %2067 +< %2070 = bitcast i32 %865 to float +< %2071 = bitcast i32 %865 to float +--- +> %2001 = bitcast i32 %52 to float +> %2002 = fadd float %2001, %2000 +> %2003 = fmul float %1989, %2002 +> %2004 = fadd float %2003, 0.000000e+00 +> %2005 = bitcast i32 %52 to float +> %2006 = bitcast i32 %52 to float +> %2007 = fmul float %2005, %2006 +> %2008 = fadd float %2007, 0.000000e+00 +> %2009 = bitcast i32 %686 to float +> %2010 = bitcast i32 %686 to float +> %2011 = fmul float %2009, %2010 +> %2012 = fadd float %2008, %2011 +> %2013 = call float @llvm.sqrt.f32(float %2012) +> %2014 = fneg float %692 +> %2015 = fmul float %2013, %2014 +> %2016 = fmul float %2015, 0.000000e+00 +> %2017 = bitcast i32 %686 to float +> %2018 = fadd float %2017, %2016 +> %2019 = bitcast i32 %52 to float +> %2020 = bitcast i32 %52 to float +> %2021 = fmul float %2019, %2020 +> %2022 = fadd float %2021, 0.000000e+00 +> %2023 = bitcast i32 %686 to float +> %2024 = bitcast i32 %686 to float +> %2025 = fmul float %2023, %2024 +> %2026 = fadd float %2022, %2025 +> %2027 = call float @llvm.sqrt.f32(float %2026) +> %2028 = fneg float %692 +> %2029 = fmul float %2027, %2028 +> %2030 = fmul float %2029, 0.000000e+00 +> %2031 = bitcast i32 %686 to float +> %2032 = fadd float %2031, %2030 +> %2033 = fmul float %2018, %2032 +> %2034 = fadd float %2004, %2033 +> %2035 = call float @llvm.sqrt.f32(float %2034) +> %2036 = fadd float %2035, 0.000000e+00 +> %2037 = fdiv float %1976, %2036 +> %2038 = fmul float %2037, 2.000000e+00 +> %2039 = bitcast i32 %52 to float +> %2040 = bitcast i32 %52 to float +> %2041 = fmul float %2039, %2040 +> %2042 = fadd float %2041, 0.000000e+00 +> %2043 = bitcast i32 %686 to float +> %2044 = bitcast i32 %686 to float +> %2045 = fmul float %2043, %2044 +> %2046 = fadd float %2042, %2045 +> %2047 = call float @llvm.sqrt.f32(float %2046) +> %2048 = fneg float %692 +> %2049 = fmul float %2047, %2048 +> %2050 = fmul float %2049, 0.000000e+00 +> %2051 = bitcast i32 %686 to float +> %2052 = fadd float %2051, %2050 +> %2053 = bitcast i32 %52 to float +> %2054 = bitcast i32 %52 to float +> %2055 = fmul float %2053, %2054 +> %2056 = fadd float %2055, 0.000000e+00 +> %2057 = bitcast i32 %686 to float +> %2058 = bitcast i32 %686 to float +> %2059 = fmul float %2057, %2058 +> %2060 = fadd float %2056, %2059 +> %2061 = call float @llvm.sqrt.f32(float %2060) +> %2062 = fneg float %692 +> %2063 = fmul float %2061, %2062 +> %2064 = bitcast i32 %52 to float +> %2065 = fadd float %2064, %2063 +> %2066 = bitcast i32 %52 to float +> %2067 = bitcast i32 %52 to float +> %2068 = fmul float %2066, %2067 +> %2069 = fadd float %2068, 0.000000e+00 +> %2070 = bitcast i32 %686 to float +> %2071 = bitcast i32 %686 to float +2546,2548c2547,2549 +< %2073 = fadd float %2072, 0.000000e+00 +< %2074 = bitcast i32 %57 to float +< %2075 = bitcast i32 %57 to float +--- +> %2073 = fadd float %2069, %2072 +> %2074 = call float @llvm.sqrt.f32(float %2073) +> %2075 = fneg float %692 +2550,2634c2551,2635 +< %2077 = fadd float %2073, %2076 +< %2078 = call float @llvm.sqrt.f32(float %2077) +< %2079 = fneg float %710 +< %2080 = fmul float %2078, %2079 +< %2081 = bitcast i32 %865 to float +< %2082 = fadd float %2081, %2080 +< %2083 = bitcast i32 %865 to float +< %2084 = bitcast i32 %865 to float +< %2085 = fmul float %2083, %2084 +< %2086 = fadd float %2085, 0.000000e+00 +< %2087 = bitcast i32 %57 to float +< %2088 = bitcast i32 %57 to float +< %2089 = fmul float %2087, %2088 +< %2090 = fadd float %2086, %2089 +< %2091 = call float @llvm.sqrt.f32(float %2090) +< %2092 = fneg float %710 +< %2093 = fmul float %2091, %2092 +< %2094 = bitcast i32 %865 to float +< %2095 = fadd float %2094, %2093 +< %2096 = fmul float %2082, %2095 +< %2097 = fadd float %2096, 0.000000e+00 +< %2098 = bitcast i32 %865 to float +< %2099 = bitcast i32 %865 to float +< %2100 = fmul float %2098, %2099 +< %2101 = fadd float %2100, 0.000000e+00 +< %2102 = bitcast i32 %57 to float +< %2103 = bitcast i32 %57 to float +< %2104 = fmul float %2102, %2103 +< %2105 = fadd float %2101, %2104 +< %2106 = call float @llvm.sqrt.f32(float %2105) +< %2107 = fneg float %710 +< %2108 = fmul float %2106, %2107 +< %2109 = fmul float %2108, 0.000000e+00 +< %2110 = bitcast i32 %57 to float +< %2111 = fadd float %2110, %2109 +< %2112 = bitcast i32 %865 to float +< %2113 = bitcast i32 %865 to float +< %2114 = fmul float %2112, %2113 +< %2115 = fadd float %2114, 0.000000e+00 +< %2116 = bitcast i32 %57 to float +< %2117 = bitcast i32 %57 to float +< %2118 = fmul float %2116, %2117 +< %2119 = fadd float %2115, %2118 +< %2120 = call float @llvm.sqrt.f32(float %2119) +< %2121 = fneg float %710 +< %2122 = fmul float %2120, %2121 +< %2123 = fmul float %2122, 0.000000e+00 +< %2124 = bitcast i32 %57 to float +< %2125 = fadd float %2124, %2123 +< %2126 = fmul float %2111, %2125 +< %2127 = fadd float %2097, %2126 +< %2128 = call float @llvm.sqrt.f32(float %2127) +< %2129 = fadd float %2128, 0.000000e+00 +< %2130 = fdiv float %2069, %2129 +< %2131 = fmul float %2055, %2130 +< %2132 = fsub float 1.000000e+00, %2131 +< %2133 = load float, float* %30, align 4 +< %2134 = fmul float %2132, %2133 +< %2135 = fadd float %1979, %2134 +< %2136 = insertelement <4 x float> zeroinitializer, float %2135, i32 0 +< %2137 = insertelement <4 x float> %2136, float 0.000000e+00, i32 1 +< %2138 = insertelement <4 x float> %2137, float 0.000000e+00, i32 2 +< %2139 = insertelement <4 x float> %2138, float 0.000000e+00, i32 3 +< %2140 = extractelement <4 x float> %2139, i32 0 +< store float %2140, float* %40, align 4 +< %2141 = extractelement <4 x float> %2139, i32 1 +< %2142 = getelementptr float, float* %2, i32 0 +< %2143 = getelementptr inbounds float, float* %2142, i64 3 +< store float %2141, float* %2143, align 4 +< %2144 = bitcast i32 %865 to float +< %2145 = bitcast i32 %865 to float +< %2146 = fmul float %2144, %2145 +< %2147 = fadd float %2146, 0.000000e+00 +< %2148 = bitcast i32 %57 to float +< %2149 = bitcast i32 %57 to float +< %2150 = fmul float %2148, %2149 +< %2151 = fadd float %2147, %2150 +< %2152 = call float @llvm.sqrt.f32(float %2151) +< %2153 = fneg float %710 +< %2154 = fmul float %2152, %2153 +< %2155 = fmul float %2154, 0.000000e+00 +< %2156 = bitcast i32 %57 to float +< %2157 = fadd float %2156, %2155 +< %2158 = bitcast i32 %865 to float +< %2159 = bitcast i32 %865 to float +--- +> %2077 = bitcast i32 %52 to float +> %2078 = fadd float %2077, %2076 +> %2079 = fmul float %2065, %2078 +> %2080 = fadd float %2079, 0.000000e+00 +> %2081 = bitcast i32 %52 to float +> %2082 = bitcast i32 %52 to float +> %2083 = fmul float %2081, %2082 +> %2084 = fadd float %2083, 0.000000e+00 +> %2085 = bitcast i32 %686 to float +> %2086 = bitcast i32 %686 to float +> %2087 = fmul float %2085, %2086 +> %2088 = fadd float %2084, %2087 +> %2089 = call float @llvm.sqrt.f32(float %2088) +> %2090 = fneg float %692 +> %2091 = fmul float %2089, %2090 +> %2092 = fmul float %2091, 0.000000e+00 +> %2093 = bitcast i32 %686 to float +> %2094 = fadd float %2093, %2092 +> %2095 = bitcast i32 %52 to float +> %2096 = bitcast i32 %52 to float +> %2097 = fmul float %2095, %2096 +> %2098 = fadd float %2097, 0.000000e+00 +> %2099 = bitcast i32 %686 to float +> %2100 = bitcast i32 %686 to float +> %2101 = fmul float %2099, %2100 +> %2102 = fadd float %2098, %2101 +> %2103 = call float @llvm.sqrt.f32(float %2102) +> %2104 = fneg float %692 +> %2105 = fmul float %2103, %2104 +> %2106 = fmul float %2105, 0.000000e+00 +> %2107 = bitcast i32 %686 to float +> %2108 = fadd float %2107, %2106 +> %2109 = fmul float %2094, %2108 +> %2110 = fadd float %2080, %2109 +> %2111 = call float @llvm.sqrt.f32(float %2110) +> %2112 = fadd float %2111, 0.000000e+00 +> %2113 = fdiv float %2052, %2112 +> %2114 = fmul float %2038, %2113 +> %2115 = fsub float 1.000000e+00, %2114 +> %2116 = load float, float* %1153, align 4 +> %2117 = fmul float %2115, %2116 +> %2118 = fadd float %1962, %2117 +> %2119 = insertelement <4 x float> zeroinitializer, float %2118, i32 0 +> %2120 = insertelement <4 x float> %2119, float 0.000000e+00, i32 1 +> %2121 = insertelement <4 x float> %2120, float 0.000000e+00, i32 2 +> %2122 = insertelement <4 x float> %2121, float 0.000000e+00, i32 3 +> %2123 = extractelement <4 x float> %2122, i32 0 +> store float %2123, float* %1644, align 4 +> %2124 = extractelement <4 x float> %2122, i32 1 +> %2125 = getelementptr float, float* %2, i32 0 +> %2126 = getelementptr inbounds float, float* %2125, i64 3 +> store float %2124, float* %2126, align 4 +> %2127 = bitcast i32 %52 to float +> %2128 = bitcast i32 %52 to float +> %2129 = fmul float %2127, %2128 +> %2130 = fadd float %2129, 0.000000e+00 +> %2131 = bitcast i32 %686 to float +> %2132 = bitcast i32 %686 to float +> %2133 = fmul float %2131, %2132 +> %2134 = fadd float %2130, %2133 +> %2135 = call float @llvm.sqrt.f32(float %2134) +> %2136 = fneg float %692 +> %2137 = fmul float %2135, %2136 +> %2138 = fmul float %2137, 0.000000e+00 +> %2139 = bitcast i32 %686 to float +> %2140 = fadd float %2139, %2138 +> %2141 = bitcast i32 %52 to float +> %2142 = bitcast i32 %52 to float +> %2143 = fmul float %2141, %2142 +> %2144 = fadd float %2143, 0.000000e+00 +> %2145 = bitcast i32 %686 to float +> %2146 = bitcast i32 %686 to float +> %2147 = fmul float %2145, %2146 +> %2148 = fadd float %2144, %2147 +> %2149 = call float @llvm.sqrt.f32(float %2148) +> %2150 = fneg float %692 +> %2151 = fmul float %2149, %2150 +> %2152 = bitcast i32 %52 to float +> %2153 = fadd float %2152, %2151 +> %2154 = bitcast i32 %52 to float +> %2155 = bitcast i32 %52 to float +> %2156 = fmul float %2154, %2155 +> %2157 = fadd float %2156, 0.000000e+00 +> %2158 = bitcast i32 %686 to float +> %2159 = bitcast i32 %686 to float +2636,2638c2637,2639 +< %2161 = fadd float %2160, 0.000000e+00 +< %2162 = bitcast i32 %57 to float +< %2163 = bitcast i32 %57 to float +--- +> %2161 = fadd float %2157, %2160 +> %2162 = call float @llvm.sqrt.f32(float %2161) +> %2163 = fneg float %692 +2640,2696c2641,2697 +< %2165 = fadd float %2161, %2164 +< %2166 = call float @llvm.sqrt.f32(float %2165) +< %2167 = fneg float %710 +< %2168 = fmul float %2166, %2167 +< %2169 = bitcast i32 %865 to float +< %2170 = fadd float %2169, %2168 +< %2171 = bitcast i32 %865 to float +< %2172 = bitcast i32 %865 to float +< %2173 = fmul float %2171, %2172 +< %2174 = fadd float %2173, 0.000000e+00 +< %2175 = bitcast i32 %57 to float +< %2176 = bitcast i32 %57 to float +< %2177 = fmul float %2175, %2176 +< %2178 = fadd float %2174, %2177 +< %2179 = call float @llvm.sqrt.f32(float %2178) +< %2180 = fneg float %710 +< %2181 = fmul float %2179, %2180 +< %2182 = bitcast i32 %865 to float +< %2183 = fadd float %2182, %2181 +< %2184 = fmul float %2170, %2183 +< %2185 = fadd float %2184, 0.000000e+00 +< %2186 = bitcast i32 %865 to float +< %2187 = bitcast i32 %865 to float +< %2188 = fmul float %2186, %2187 +< %2189 = fadd float %2188, 0.000000e+00 +< %2190 = bitcast i32 %57 to float +< %2191 = bitcast i32 %57 to float +< %2192 = fmul float %2190, %2191 +< %2193 = fadd float %2189, %2192 +< %2194 = call float @llvm.sqrt.f32(float %2193) +< %2195 = fneg float %710 +< %2196 = fmul float %2194, %2195 +< %2197 = fmul float %2196, 0.000000e+00 +< %2198 = bitcast i32 %57 to float +< %2199 = fadd float %2198, %2197 +< %2200 = bitcast i32 %865 to float +< %2201 = bitcast i32 %865 to float +< %2202 = fmul float %2200, %2201 +< %2203 = fadd float %2202, 0.000000e+00 +< %2204 = bitcast i32 %57 to float +< %2205 = bitcast i32 %57 to float +< %2206 = fmul float %2204, %2205 +< %2207 = fadd float %2203, %2206 +< %2208 = call float @llvm.sqrt.f32(float %2207) +< %2209 = fneg float %710 +< %2210 = fmul float %2208, %2209 +< %2211 = fmul float %2210, 0.000000e+00 +< %2212 = bitcast i32 %57 to float +< %2213 = fadd float %2212, %2211 +< %2214 = fmul float %2199, %2213 +< %2215 = fadd float %2185, %2214 +< %2216 = call float @llvm.sqrt.f32(float %2215) +< %2217 = fadd float %2216, 0.000000e+00 +< %2218 = fdiv float %2157, %2217 +< %2219 = fmul float %2218, 2.000000e+00 +< %2220 = bitcast i32 %865 to float +< %2221 = bitcast i32 %865 to float +--- +> %2165 = bitcast i32 %52 to float +> %2166 = fadd float %2165, %2164 +> %2167 = fmul float %2153, %2166 +> %2168 = fadd float %2167, 0.000000e+00 +> %2169 = bitcast i32 %52 to float +> %2170 = bitcast i32 %52 to float +> %2171 = fmul float %2169, %2170 +> %2172 = fadd float %2171, 0.000000e+00 +> %2173 = bitcast i32 %686 to float +> %2174 = bitcast i32 %686 to float +> %2175 = fmul float %2173, %2174 +> %2176 = fadd float %2172, %2175 +> %2177 = call float @llvm.sqrt.f32(float %2176) +> %2178 = fneg float %692 +> %2179 = fmul float %2177, %2178 +> %2180 = fmul float %2179, 0.000000e+00 +> %2181 = bitcast i32 %686 to float +> %2182 = fadd float %2181, %2180 +> %2183 = bitcast i32 %52 to float +> %2184 = bitcast i32 %52 to float +> %2185 = fmul float %2183, %2184 +> %2186 = fadd float %2185, 0.000000e+00 +> %2187 = bitcast i32 %686 to float +> %2188 = bitcast i32 %686 to float +> %2189 = fmul float %2187, %2188 +> %2190 = fadd float %2186, %2189 +> %2191 = call float @llvm.sqrt.f32(float %2190) +> %2192 = fneg float %692 +> %2193 = fmul float %2191, %2192 +> %2194 = fmul float %2193, 0.000000e+00 +> %2195 = bitcast i32 %686 to float +> %2196 = fadd float %2195, %2194 +> %2197 = fmul float %2182, %2196 +> %2198 = fadd float %2168, %2197 +> %2199 = call float @llvm.sqrt.f32(float %2198) +> %2200 = fadd float %2199, 0.000000e+00 +> %2201 = fdiv float %2140, %2200 +> %2202 = fmul float %2201, 2.000000e+00 +> %2203 = bitcast i32 %52 to float +> %2204 = bitcast i32 %52 to float +> %2205 = fmul float %2203, %2204 +> %2206 = fadd float %2205, 0.000000e+00 +> %2207 = bitcast i32 %686 to float +> %2208 = bitcast i32 %686 to float +> %2209 = fmul float %2207, %2208 +> %2210 = fadd float %2206, %2209 +> %2211 = call float @llvm.sqrt.f32(float %2210) +> %2212 = fneg float %692 +> %2213 = fmul float %2211, %2212 +> %2214 = bitcast i32 %52 to float +> %2215 = fadd float %2214, %2213 +> %2216 = bitcast i32 %52 to float +> %2217 = bitcast i32 %52 to float +> %2218 = fmul float %2216, %2217 +> %2219 = fadd float %2218, 0.000000e+00 +> %2220 = bitcast i32 %686 to float +> %2221 = bitcast i32 %686 to float +2698,2700c2699,2701 +< %2223 = fadd float %2222, 0.000000e+00 +< %2224 = bitcast i32 %57 to float +< %2225 = bitcast i32 %57 to float +--- +> %2223 = fadd float %2219, %2222 +> %2224 = call float @llvm.sqrt.f32(float %2223) +> %2225 = fneg float %692 +2702,2709c2703,2710 +< %2227 = fadd float %2223, %2226 +< %2228 = call float @llvm.sqrt.f32(float %2227) +< %2229 = fneg float %710 +< %2230 = fmul float %2228, %2229 +< %2231 = bitcast i32 %865 to float +< %2232 = fadd float %2231, %2230 +< %2233 = bitcast i32 %865 to float +< %2234 = bitcast i32 %865 to float +--- +> %2227 = bitcast i32 %52 to float +> %2228 = fadd float %2227, %2226 +> %2229 = bitcast i32 %52 to float +> %2230 = bitcast i32 %52 to float +> %2231 = fmul float %2229, %2230 +> %2232 = fadd float %2231, 0.000000e+00 +> %2233 = bitcast i32 %686 to float +> %2234 = bitcast i32 %686 to float +2711,2713c2712,2714 +< %2236 = fadd float %2235, 0.000000e+00 +< %2237 = bitcast i32 %57 to float +< %2238 = bitcast i32 %57 to float +--- +> %2236 = fadd float %2232, %2235 +> %2237 = call float @llvm.sqrt.f32(float %2236) +> %2238 = fneg float %692 +2715,2798c2716,2799 +< %2240 = fadd float %2236, %2239 +< %2241 = call float @llvm.sqrt.f32(float %2240) +< %2242 = fneg float %710 +< %2243 = fmul float %2241, %2242 +< %2244 = bitcast i32 %865 to float +< %2245 = fadd float %2244, %2243 +< %2246 = bitcast i32 %865 to float +< %2247 = bitcast i32 %865 to float +< %2248 = fmul float %2246, %2247 +< %2249 = fadd float %2248, 0.000000e+00 +< %2250 = bitcast i32 %57 to float +< %2251 = bitcast i32 %57 to float +< %2252 = fmul float %2250, %2251 +< %2253 = fadd float %2249, %2252 +< %2254 = call float @llvm.sqrt.f32(float %2253) +< %2255 = fneg float %710 +< %2256 = fmul float %2254, %2255 +< %2257 = bitcast i32 %865 to float +< %2258 = fadd float %2257, %2256 +< %2259 = fmul float %2245, %2258 +< %2260 = fadd float %2259, 0.000000e+00 +< %2261 = bitcast i32 %865 to float +< %2262 = bitcast i32 %865 to float +< %2263 = fmul float %2261, %2262 +< %2264 = fadd float %2263, 0.000000e+00 +< %2265 = bitcast i32 %57 to float +< %2266 = bitcast i32 %57 to float +< %2267 = fmul float %2265, %2266 +< %2268 = fadd float %2264, %2267 +< %2269 = call float @llvm.sqrt.f32(float %2268) +< %2270 = fneg float %710 +< %2271 = fmul float %2269, %2270 +< %2272 = fmul float %2271, 0.000000e+00 +< %2273 = bitcast i32 %57 to float +< %2274 = fadd float %2273, %2272 +< %2275 = bitcast i32 %865 to float +< %2276 = bitcast i32 %865 to float +< %2277 = fmul float %2275, %2276 +< %2278 = fadd float %2277, 0.000000e+00 +< %2279 = bitcast i32 %57 to float +< %2280 = bitcast i32 %57 to float +< %2281 = fmul float %2279, %2280 +< %2282 = fadd float %2278, %2281 +< %2283 = call float @llvm.sqrt.f32(float %2282) +< %2284 = fneg float %710 +< %2285 = fmul float %2283, %2284 +< %2286 = fmul float %2285, 0.000000e+00 +< %2287 = bitcast i32 %57 to float +< %2288 = fadd float %2287, %2286 +< %2289 = fmul float %2274, %2288 +< %2290 = fadd float %2260, %2289 +< %2291 = call float @llvm.sqrt.f32(float %2290) +< %2292 = fadd float %2291, 0.000000e+00 +< %2293 = fdiv float %2232, %2292 +< %2294 = fmul float %2219, %2293 +< %2295 = fneg float %2294 +< %2296 = insertelement <4 x float> zeroinitializer, float %2295, i32 0 +< %2297 = insertelement <4 x float> %2296, float 0.000000e+00, i32 1 +< %2298 = insertelement <4 x float> %2297, float 0.000000e+00, i32 2 +< %2299 = insertelement <4 x float> %2298, float 0.000000e+00, i32 3 +< %2300 = load float, float* %1338, align 4 +< %2301 = insertelement <4 x float> zeroinitializer, float %2300, i32 0 +< %2302 = insertelement <4 x float> %2301, float 0.000000e+00, i32 1 +< %2303 = insertelement <4 x float> %2302, float 0.000000e+00, i32 2 +< %2304 = insertelement <4 x float> %2303, float 0.000000e+00, i32 3 +< %2305 = call <4 x float> @llvm.fma.v4f32(<4 x float> %2299, <4 x float> %2304, <4 x float> zeroinitializer) +< %2306 = extractelement <4 x float> %2305, i32 0 +< store float %2306, float* %2143, align 4 +< %2307 = bitcast i32 %865 to float +< %2308 = bitcast i32 %865 to float +< %2309 = fmul float %2307, %2308 +< %2310 = fadd float %2309, 0.000000e+00 +< %2311 = bitcast i32 %57 to float +< %2312 = bitcast i32 %57 to float +< %2313 = fmul float %2311, %2312 +< %2314 = fadd float %2310, %2313 +< %2315 = call float @llvm.sqrt.f32(float %2314) +< %2316 = fneg float %710 +< %2317 = fmul float %2315, %2316 +< %2318 = fmul float %2317, 0.000000e+00 +< %2319 = bitcast i32 %57 to float +< %2320 = fadd float %2319, %2318 +< %2321 = bitcast i32 %865 to float +< %2322 = bitcast i32 %865 to float +--- +> %2240 = bitcast i32 %52 to float +> %2241 = fadd float %2240, %2239 +> %2242 = fmul float %2228, %2241 +> %2243 = fadd float %2242, 0.000000e+00 +> %2244 = bitcast i32 %52 to float +> %2245 = bitcast i32 %52 to float +> %2246 = fmul float %2244, %2245 +> %2247 = fadd float %2246, 0.000000e+00 +> %2248 = bitcast i32 %686 to float +> %2249 = bitcast i32 %686 to float +> %2250 = fmul float %2248, %2249 +> %2251 = fadd float %2247, %2250 +> %2252 = call float @llvm.sqrt.f32(float %2251) +> %2253 = fneg float %692 +> %2254 = fmul float %2252, %2253 +> %2255 = fmul float %2254, 0.000000e+00 +> %2256 = bitcast i32 %686 to float +> %2257 = fadd float %2256, %2255 +> %2258 = bitcast i32 %52 to float +> %2259 = bitcast i32 %52 to float +> %2260 = fmul float %2258, %2259 +> %2261 = fadd float %2260, 0.000000e+00 +> %2262 = bitcast i32 %686 to float +> %2263 = bitcast i32 %686 to float +> %2264 = fmul float %2262, %2263 +> %2265 = fadd float %2261, %2264 +> %2266 = call float @llvm.sqrt.f32(float %2265) +> %2267 = fneg float %692 +> %2268 = fmul float %2266, %2267 +> %2269 = fmul float %2268, 0.000000e+00 +> %2270 = bitcast i32 %686 to float +> %2271 = fadd float %2270, %2269 +> %2272 = fmul float %2257, %2271 +> %2273 = fadd float %2243, %2272 +> %2274 = call float @llvm.sqrt.f32(float %2273) +> %2275 = fadd float %2274, 0.000000e+00 +> %2276 = fdiv float %2215, %2275 +> %2277 = fmul float %2202, %2276 +> %2278 = fneg float %2277 +> %2279 = insertelement <4 x float> zeroinitializer, float %2278, i32 0 +> %2280 = insertelement <4 x float> %2279, float 0.000000e+00, i32 1 +> %2281 = insertelement <4 x float> %2280, float 0.000000e+00, i32 2 +> %2282 = insertelement <4 x float> %2281, float 0.000000e+00, i32 3 +> %2283 = load float, float* %1321, align 4 +> %2284 = insertelement <4 x float> zeroinitializer, float %2283, i32 0 +> %2285 = insertelement <4 x float> %2284, float 0.000000e+00, i32 1 +> %2286 = insertelement <4 x float> %2285, float 0.000000e+00, i32 2 +> %2287 = insertelement <4 x float> %2286, float 0.000000e+00, i32 3 +> %2288 = call <4 x float> @llvm.fma.v4f32(<4 x float> %2282, <4 x float> %2287, <4 x float> zeroinitializer) +> %2289 = extractelement <4 x float> %2288, i32 0 +> store float %2289, float* %2126, align 4 +> %2290 = bitcast i32 %52 to float +> %2291 = bitcast i32 %52 to float +> %2292 = fmul float %2290, %2291 +> %2293 = fadd float %2292, 0.000000e+00 +> %2294 = bitcast i32 %686 to float +> %2295 = bitcast i32 %686 to float +> %2296 = fmul float %2294, %2295 +> %2297 = fadd float %2293, %2296 +> %2298 = call float @llvm.sqrt.f32(float %2297) +> %2299 = fneg float %692 +> %2300 = fmul float %2298, %2299 +> %2301 = fmul float %2300, 0.000000e+00 +> %2302 = bitcast i32 %686 to float +> %2303 = fadd float %2302, %2301 +> %2304 = bitcast i32 %52 to float +> %2305 = bitcast i32 %52 to float +> %2306 = fmul float %2304, %2305 +> %2307 = fadd float %2306, 0.000000e+00 +> %2308 = bitcast i32 %686 to float +> %2309 = bitcast i32 %686 to float +> %2310 = fmul float %2308, %2309 +> %2311 = fadd float %2307, %2310 +> %2312 = call float @llvm.sqrt.f32(float %2311) +> %2313 = fneg float %692 +> %2314 = fmul float %2312, %2313 +> %2315 = bitcast i32 %52 to float +> %2316 = fadd float %2315, %2314 +> %2317 = bitcast i32 %52 to float +> %2318 = bitcast i32 %52 to float +> %2319 = fmul float %2317, %2318 +> %2320 = fadd float %2319, 0.000000e+00 +> %2321 = bitcast i32 %686 to float +> %2322 = bitcast i32 %686 to float +2800,2802c2801,2803 +< %2324 = fadd float %2323, 0.000000e+00 +< %2325 = bitcast i32 %57 to float +< %2326 = bitcast i32 %57 to float +--- +> %2324 = fadd float %2320, %2323 +> %2325 = call float @llvm.sqrt.f32(float %2324) +> %2326 = fneg float %692 +2804,2860c2805,2861 +< %2328 = fadd float %2324, %2327 +< %2329 = call float @llvm.sqrt.f32(float %2328) +< %2330 = fneg float %710 +< %2331 = fmul float %2329, %2330 +< %2332 = bitcast i32 %865 to float +< %2333 = fadd float %2332, %2331 +< %2334 = bitcast i32 %865 to float +< %2335 = bitcast i32 %865 to float +< %2336 = fmul float %2334, %2335 +< %2337 = fadd float %2336, 0.000000e+00 +< %2338 = bitcast i32 %57 to float +< %2339 = bitcast i32 %57 to float +< %2340 = fmul float %2338, %2339 +< %2341 = fadd float %2337, %2340 +< %2342 = call float @llvm.sqrt.f32(float %2341) +< %2343 = fneg float %710 +< %2344 = fmul float %2342, %2343 +< %2345 = bitcast i32 %865 to float +< %2346 = fadd float %2345, %2344 +< %2347 = fmul float %2333, %2346 +< %2348 = fadd float %2347, 0.000000e+00 +< %2349 = bitcast i32 %865 to float +< %2350 = bitcast i32 %865 to float +< %2351 = fmul float %2349, %2350 +< %2352 = fadd float %2351, 0.000000e+00 +< %2353 = bitcast i32 %57 to float +< %2354 = bitcast i32 %57 to float +< %2355 = fmul float %2353, %2354 +< %2356 = fadd float %2352, %2355 +< %2357 = call float @llvm.sqrt.f32(float %2356) +< %2358 = fneg float %710 +< %2359 = fmul float %2357, %2358 +< %2360 = fmul float %2359, 0.000000e+00 +< %2361 = bitcast i32 %57 to float +< %2362 = fadd float %2361, %2360 +< %2363 = bitcast i32 %865 to float +< %2364 = bitcast i32 %865 to float +< %2365 = fmul float %2363, %2364 +< %2366 = fadd float %2365, 0.000000e+00 +< %2367 = bitcast i32 %57 to float +< %2368 = bitcast i32 %57 to float +< %2369 = fmul float %2367, %2368 +< %2370 = fadd float %2366, %2369 +< %2371 = call float @llvm.sqrt.f32(float %2370) +< %2372 = fneg float %710 +< %2373 = fmul float %2371, %2372 +< %2374 = fmul float %2373, 0.000000e+00 +< %2375 = bitcast i32 %57 to float +< %2376 = fadd float %2375, %2374 +< %2377 = fmul float %2362, %2376 +< %2378 = fadd float %2348, %2377 +< %2379 = call float @llvm.sqrt.f32(float %2378) +< %2380 = fadd float %2379, 0.000000e+00 +< %2381 = fdiv float %2320, %2380 +< %2382 = fmul float %2381, 2.000000e+00 +< %2383 = bitcast i32 %865 to float +< %2384 = bitcast i32 %865 to float +--- +> %2328 = bitcast i32 %52 to float +> %2329 = fadd float %2328, %2327 +> %2330 = fmul float %2316, %2329 +> %2331 = fadd float %2330, 0.000000e+00 +> %2332 = bitcast i32 %52 to float +> %2333 = bitcast i32 %52 to float +> %2334 = fmul float %2332, %2333 +> %2335 = fadd float %2334, 0.000000e+00 +> %2336 = bitcast i32 %686 to float +> %2337 = bitcast i32 %686 to float +> %2338 = fmul float %2336, %2337 +> %2339 = fadd float %2335, %2338 +> %2340 = call float @llvm.sqrt.f32(float %2339) +> %2341 = fneg float %692 +> %2342 = fmul float %2340, %2341 +> %2343 = fmul float %2342, 0.000000e+00 +> %2344 = bitcast i32 %686 to float +> %2345 = fadd float %2344, %2343 +> %2346 = bitcast i32 %52 to float +> %2347 = bitcast i32 %52 to float +> %2348 = fmul float %2346, %2347 +> %2349 = fadd float %2348, 0.000000e+00 +> %2350 = bitcast i32 %686 to float +> %2351 = bitcast i32 %686 to float +> %2352 = fmul float %2350, %2351 +> %2353 = fadd float %2349, %2352 +> %2354 = call float @llvm.sqrt.f32(float %2353) +> %2355 = fneg float %692 +> %2356 = fmul float %2354, %2355 +> %2357 = fmul float %2356, 0.000000e+00 +> %2358 = bitcast i32 %686 to float +> %2359 = fadd float %2358, %2357 +> %2360 = fmul float %2345, %2359 +> %2361 = fadd float %2331, %2360 +> %2362 = call float @llvm.sqrt.f32(float %2361) +> %2363 = fadd float %2362, 0.000000e+00 +> %2364 = fdiv float %2303, %2363 +> %2365 = fmul float %2364, 2.000000e+00 +> %2366 = bitcast i32 %52 to float +> %2367 = bitcast i32 %52 to float +> %2368 = fmul float %2366, %2367 +> %2369 = fadd float %2368, 0.000000e+00 +> %2370 = bitcast i32 %686 to float +> %2371 = bitcast i32 %686 to float +> %2372 = fmul float %2370, %2371 +> %2373 = fadd float %2369, %2372 +> %2374 = call float @llvm.sqrt.f32(float %2373) +> %2375 = fneg float %692 +> %2376 = fmul float %2374, %2375 +> %2377 = bitcast i32 %52 to float +> %2378 = fadd float %2377, %2376 +> %2379 = bitcast i32 %52 to float +> %2380 = bitcast i32 %52 to float +> %2381 = fmul float %2379, %2380 +> %2382 = fadd float %2381, 0.000000e+00 +> %2383 = bitcast i32 %686 to float +> %2384 = bitcast i32 %686 to float +2862,2864c2863,2865 +< %2386 = fadd float %2385, 0.000000e+00 +< %2387 = bitcast i32 %57 to float +< %2388 = bitcast i32 %57 to float +--- +> %2386 = fadd float %2382, %2385 +> %2387 = call float @llvm.sqrt.f32(float %2386) +> %2388 = fneg float %692 +2866,2873c2867,2874 +< %2390 = fadd float %2386, %2389 +< %2391 = call float @llvm.sqrt.f32(float %2390) +< %2392 = fneg float %710 +< %2393 = fmul float %2391, %2392 +< %2394 = bitcast i32 %865 to float +< %2395 = fadd float %2394, %2393 +< %2396 = bitcast i32 %865 to float +< %2397 = bitcast i32 %865 to float +--- +> %2390 = bitcast i32 %52 to float +> %2391 = fadd float %2390, %2389 +> %2392 = bitcast i32 %52 to float +> %2393 = bitcast i32 %52 to float +> %2394 = fmul float %2392, %2393 +> %2395 = fadd float %2394, 0.000000e+00 +> %2396 = bitcast i32 %686 to float +> %2397 = bitcast i32 %686 to float +2875,2877c2876,2878 +< %2399 = fadd float %2398, 0.000000e+00 +< %2400 = bitcast i32 %57 to float +< %2401 = bitcast i32 %57 to float +--- +> %2399 = fadd float %2395, %2398 +> %2400 = call float @llvm.sqrt.f32(float %2399) +> %2401 = fneg float %692 +2879,2952c2880,2953 +< %2403 = fadd float %2399, %2402 +< %2404 = call float @llvm.sqrt.f32(float %2403) +< %2405 = fneg float %710 +< %2406 = fmul float %2404, %2405 +< %2407 = bitcast i32 %865 to float +< %2408 = fadd float %2407, %2406 +< %2409 = bitcast i32 %865 to float +< %2410 = bitcast i32 %865 to float +< %2411 = fmul float %2409, %2410 +< %2412 = fadd float %2411, 0.000000e+00 +< %2413 = bitcast i32 %57 to float +< %2414 = bitcast i32 %57 to float +< %2415 = fmul float %2413, %2414 +< %2416 = fadd float %2412, %2415 +< %2417 = call float @llvm.sqrt.f32(float %2416) +< %2418 = fneg float %710 +< %2419 = fmul float %2417, %2418 +< %2420 = bitcast i32 %865 to float +< %2421 = fadd float %2420, %2419 +< %2422 = fmul float %2408, %2421 +< %2423 = fadd float %2422, 0.000000e+00 +< %2424 = bitcast i32 %865 to float +< %2425 = bitcast i32 %865 to float +< %2426 = fmul float %2424, %2425 +< %2427 = fadd float %2426, 0.000000e+00 +< %2428 = bitcast i32 %57 to float +< %2429 = bitcast i32 %57 to float +< %2430 = fmul float %2428, %2429 +< %2431 = fadd float %2427, %2430 +< %2432 = call float @llvm.sqrt.f32(float %2431) +< %2433 = fneg float %710 +< %2434 = fmul float %2432, %2433 +< %2435 = fmul float %2434, 0.000000e+00 +< %2436 = bitcast i32 %57 to float +< %2437 = fadd float %2436, %2435 +< %2438 = bitcast i32 %865 to float +< %2439 = bitcast i32 %865 to float +< %2440 = fmul float %2438, %2439 +< %2441 = fadd float %2440, 0.000000e+00 +< %2442 = bitcast i32 %57 to float +< %2443 = bitcast i32 %57 to float +< %2444 = fmul float %2442, %2443 +< %2445 = fadd float %2441, %2444 +< %2446 = call float @llvm.sqrt.f32(float %2445) +< %2447 = fneg float %710 +< %2448 = fmul float %2446, %2447 +< %2449 = fmul float %2448, 0.000000e+00 +< %2450 = bitcast i32 %57 to float +< %2451 = fadd float %2450, %2449 +< %2452 = fmul float %2437, %2451 +< %2453 = fadd float %2423, %2452 +< %2454 = call float @llvm.sqrt.f32(float %2453) +< %2455 = fadd float %2454, 0.000000e+00 +< %2456 = fdiv float %2395, %2455 +< %2457 = fmul float %2382, %2456 +< %2458 = fneg float %2457 +< %2459 = fmul float %2458, %2300 +< %2460 = fadd float %2459, 0.000000e+00 +< %2461 = bitcast i32 %865 to float +< %2462 = bitcast i32 %865 to float +< %2463 = fmul float %2461, %2462 +< %2464 = fadd float %2463, 0.000000e+00 +< %2465 = bitcast i32 %57 to float +< %2466 = bitcast i32 %57 to float +< %2467 = fmul float %2465, %2466 +< %2468 = fadd float %2464, %2467 +< %2469 = call float @llvm.sqrt.f32(float %2468) +< %2470 = fneg float %710 +< %2471 = fmul float %2469, %2470 +< %2472 = fmul float %2471, 0.000000e+00 +< %2473 = bitcast i32 %57 to float +< %2474 = fadd float %2473, %2472 +< %2475 = bitcast i32 %865 to float +< %2476 = bitcast i32 %865 to float +--- +> %2403 = bitcast i32 %52 to float +> %2404 = fadd float %2403, %2402 +> %2405 = fmul float %2391, %2404 +> %2406 = fadd float %2405, 0.000000e+00 +> %2407 = bitcast i32 %52 to float +> %2408 = bitcast i32 %52 to float +> %2409 = fmul float %2407, %2408 +> %2410 = fadd float %2409, 0.000000e+00 +> %2411 = bitcast i32 %686 to float +> %2412 = bitcast i32 %686 to float +> %2413 = fmul float %2411, %2412 +> %2414 = fadd float %2410, %2413 +> %2415 = call float @llvm.sqrt.f32(float %2414) +> %2416 = fneg float %692 +> %2417 = fmul float %2415, %2416 +> %2418 = fmul float %2417, 0.000000e+00 +> %2419 = bitcast i32 %686 to float +> %2420 = fadd float %2419, %2418 +> %2421 = bitcast i32 %52 to float +> %2422 = bitcast i32 %52 to float +> %2423 = fmul float %2421, %2422 +> %2424 = fadd float %2423, 0.000000e+00 +> %2425 = bitcast i32 %686 to float +> %2426 = bitcast i32 %686 to float +> %2427 = fmul float %2425, %2426 +> %2428 = fadd float %2424, %2427 +> %2429 = call float @llvm.sqrt.f32(float %2428) +> %2430 = fneg float %692 +> %2431 = fmul float %2429, %2430 +> %2432 = fmul float %2431, 0.000000e+00 +> %2433 = bitcast i32 %686 to float +> %2434 = fadd float %2433, %2432 +> %2435 = fmul float %2420, %2434 +> %2436 = fadd float %2406, %2435 +> %2437 = call float @llvm.sqrt.f32(float %2436) +> %2438 = fadd float %2437, 0.000000e+00 +> %2439 = fdiv float %2378, %2438 +> %2440 = fmul float %2365, %2439 +> %2441 = fneg float %2440 +> %2442 = fmul float %2441, %2283 +> %2443 = fadd float %2442, 0.000000e+00 +> %2444 = bitcast i32 %52 to float +> %2445 = bitcast i32 %52 to float +> %2446 = fmul float %2444, %2445 +> %2447 = fadd float %2446, 0.000000e+00 +> %2448 = bitcast i32 %686 to float +> %2449 = bitcast i32 %686 to float +> %2450 = fmul float %2448, %2449 +> %2451 = fadd float %2447, %2450 +> %2452 = call float @llvm.sqrt.f32(float %2451) +> %2453 = fneg float %692 +> %2454 = fmul float %2452, %2453 +> %2455 = fmul float %2454, 0.000000e+00 +> %2456 = bitcast i32 %686 to float +> %2457 = fadd float %2456, %2455 +> %2458 = bitcast i32 %52 to float +> %2459 = bitcast i32 %52 to float +> %2460 = fmul float %2458, %2459 +> %2461 = fadd float %2460, 0.000000e+00 +> %2462 = bitcast i32 %686 to float +> %2463 = bitcast i32 %686 to float +> %2464 = fmul float %2462, %2463 +> %2465 = fadd float %2461, %2464 +> %2466 = call float @llvm.sqrt.f32(float %2465) +> %2467 = fneg float %692 +> %2468 = fmul float %2466, %2467 +> %2469 = bitcast i32 %52 to float +> %2470 = fadd float %2469, %2468 +> %2471 = bitcast i32 %52 to float +> %2472 = bitcast i32 %52 to float +> %2473 = fmul float %2471, %2472 +> %2474 = fadd float %2473, 0.000000e+00 +> %2475 = bitcast i32 %686 to float +> %2476 = bitcast i32 %686 to float +2954,2956c2955,2957 +< %2478 = fadd float %2477, 0.000000e+00 +< %2479 = bitcast i32 %57 to float +< %2480 = bitcast i32 %57 to float +--- +> %2478 = fadd float %2474, %2477 +> %2479 = call float @llvm.sqrt.f32(float %2478) +> %2480 = fneg float %692 +2958,3028c2959,3029 +< %2482 = fadd float %2478, %2481 +< %2483 = call float @llvm.sqrt.f32(float %2482) +< %2484 = fneg float %710 +< %2485 = fmul float %2483, %2484 +< %2486 = bitcast i32 %865 to float +< %2487 = fadd float %2486, %2485 +< %2488 = bitcast i32 %865 to float +< %2489 = bitcast i32 %865 to float +< %2490 = fmul float %2488, %2489 +< %2491 = fadd float %2490, 0.000000e+00 +< %2492 = bitcast i32 %57 to float +< %2493 = bitcast i32 %57 to float +< %2494 = fmul float %2492, %2493 +< %2495 = fadd float %2491, %2494 +< %2496 = call float @llvm.sqrt.f32(float %2495) +< %2497 = fneg float %710 +< %2498 = fmul float %2496, %2497 +< %2499 = bitcast i32 %865 to float +< %2500 = fadd float %2499, %2498 +< %2501 = fmul float %2487, %2500 +< %2502 = fadd float %2501, 0.000000e+00 +< %2503 = bitcast i32 %865 to float +< %2504 = bitcast i32 %865 to float +< %2505 = fmul float %2503, %2504 +< %2506 = fadd float %2505, 0.000000e+00 +< %2507 = bitcast i32 %57 to float +< %2508 = bitcast i32 %57 to float +< %2509 = fmul float %2507, %2508 +< %2510 = fadd float %2506, %2509 +< %2511 = call float @llvm.sqrt.f32(float %2510) +< %2512 = fneg float %710 +< %2513 = fmul float %2511, %2512 +< %2514 = fmul float %2513, 0.000000e+00 +< %2515 = bitcast i32 %57 to float +< %2516 = fadd float %2515, %2514 +< %2517 = bitcast i32 %865 to float +< %2518 = bitcast i32 %865 to float +< %2519 = fmul float %2517, %2518 +< %2520 = fadd float %2519, 0.000000e+00 +< %2521 = bitcast i32 %57 to float +< %2522 = bitcast i32 %57 to float +< %2523 = fmul float %2521, %2522 +< %2524 = fadd float %2520, %2523 +< %2525 = call float @llvm.sqrt.f32(float %2524) +< %2526 = fneg float %710 +< %2527 = fmul float %2525, %2526 +< %2528 = fmul float %2527, 0.000000e+00 +< %2529 = bitcast i32 %57 to float +< %2530 = fadd float %2529, %2528 +< %2531 = fmul float %2516, %2530 +< %2532 = fadd float %2502, %2531 +< %2533 = call float @llvm.sqrt.f32(float %2532) +< %2534 = fadd float %2533, 0.000000e+00 +< %2535 = fdiv float %2474, %2534 +< %2536 = fmul float %2535, 2.000000e+00 +< %2537 = bitcast i32 %865 to float +< %2538 = bitcast i32 %865 to float +< %2539 = fmul float %2537, %2538 +< %2540 = fadd float %2539, 0.000000e+00 +< %2541 = bitcast i32 %57 to float +< %2542 = bitcast i32 %57 to float +< %2543 = fmul float %2541, %2542 +< %2544 = fadd float %2540, %2543 +< %2545 = call float @llvm.sqrt.f32(float %2544) +< %2546 = fneg float %710 +< %2547 = fmul float %2545, %2546 +< %2548 = fmul float %2547, 0.000000e+00 +< %2549 = bitcast i32 %57 to float +< %2550 = fadd float %2549, %2548 +< %2551 = bitcast i32 %865 to float +< %2552 = bitcast i32 %865 to float +--- +> %2482 = bitcast i32 %52 to float +> %2483 = fadd float %2482, %2481 +> %2484 = fmul float %2470, %2483 +> %2485 = fadd float %2484, 0.000000e+00 +> %2486 = bitcast i32 %52 to float +> %2487 = bitcast i32 %52 to float +> %2488 = fmul float %2486, %2487 +> %2489 = fadd float %2488, 0.000000e+00 +> %2490 = bitcast i32 %686 to float +> %2491 = bitcast i32 %686 to float +> %2492 = fmul float %2490, %2491 +> %2493 = fadd float %2489, %2492 +> %2494 = call float @llvm.sqrt.f32(float %2493) +> %2495 = fneg float %692 +> %2496 = fmul float %2494, %2495 +> %2497 = fmul float %2496, 0.000000e+00 +> %2498 = bitcast i32 %686 to float +> %2499 = fadd float %2498, %2497 +> %2500 = bitcast i32 %52 to float +> %2501 = bitcast i32 %52 to float +> %2502 = fmul float %2500, %2501 +> %2503 = fadd float %2502, 0.000000e+00 +> %2504 = bitcast i32 %686 to float +> %2505 = bitcast i32 %686 to float +> %2506 = fmul float %2504, %2505 +> %2507 = fadd float %2503, %2506 +> %2508 = call float @llvm.sqrt.f32(float %2507) +> %2509 = fneg float %692 +> %2510 = fmul float %2508, %2509 +> %2511 = fmul float %2510, 0.000000e+00 +> %2512 = bitcast i32 %686 to float +> %2513 = fadd float %2512, %2511 +> %2514 = fmul float %2499, %2513 +> %2515 = fadd float %2485, %2514 +> %2516 = call float @llvm.sqrt.f32(float %2515) +> %2517 = fadd float %2516, 0.000000e+00 +> %2518 = fdiv float %2457, %2517 +> %2519 = fmul float %2518, 2.000000e+00 +> %2520 = bitcast i32 %52 to float +> %2521 = bitcast i32 %52 to float +> %2522 = fmul float %2520, %2521 +> %2523 = fadd float %2522, 0.000000e+00 +> %2524 = bitcast i32 %686 to float +> %2525 = bitcast i32 %686 to float +> %2526 = fmul float %2524, %2525 +> %2527 = fadd float %2523, %2526 +> %2528 = call float @llvm.sqrt.f32(float %2527) +> %2529 = fneg float %692 +> %2530 = fmul float %2528, %2529 +> %2531 = fmul float %2530, 0.000000e+00 +> %2532 = bitcast i32 %686 to float +> %2533 = fadd float %2532, %2531 +> %2534 = bitcast i32 %52 to float +> %2535 = bitcast i32 %52 to float +> %2536 = fmul float %2534, %2535 +> %2537 = fadd float %2536, 0.000000e+00 +> %2538 = bitcast i32 %686 to float +> %2539 = bitcast i32 %686 to float +> %2540 = fmul float %2538, %2539 +> %2541 = fadd float %2537, %2540 +> %2542 = call float @llvm.sqrt.f32(float %2541) +> %2543 = fneg float %692 +> %2544 = fmul float %2542, %2543 +> %2545 = bitcast i32 %52 to float +> %2546 = fadd float %2545, %2544 +> %2547 = bitcast i32 %52 to float +> %2548 = bitcast i32 %52 to float +> %2549 = fmul float %2547, %2548 +> %2550 = fadd float %2549, 0.000000e+00 +> %2551 = bitcast i32 %686 to float +> %2552 = bitcast i32 %686 to float +3030,3032c3031,3033 +< %2554 = fadd float %2553, 0.000000e+00 +< %2555 = bitcast i32 %57 to float +< %2556 = bitcast i32 %57 to float +--- +> %2554 = fadd float %2550, %2553 +> %2555 = call float @llvm.sqrt.f32(float %2554) +> %2556 = fneg float %692 +3034,3118c3035,3102 +< %2558 = fadd float %2554, %2557 +< %2559 = call float @llvm.sqrt.f32(float %2558) +< %2560 = fneg float %710 +< %2561 = fmul float %2559, %2560 +< %2562 = bitcast i32 %865 to float +< %2563 = fadd float %2562, %2561 +< %2564 = bitcast i32 %865 to float +< %2565 = bitcast i32 %865 to float +< %2566 = fmul float %2564, %2565 +< %2567 = fadd float %2566, 0.000000e+00 +< %2568 = bitcast i32 %57 to float +< %2569 = bitcast i32 %57 to float +< %2570 = fmul float %2568, %2569 +< %2571 = fadd float %2567, %2570 +< %2572 = call float @llvm.sqrt.f32(float %2571) +< %2573 = fneg float %710 +< %2574 = fmul float %2572, %2573 +< %2575 = bitcast i32 %865 to float +< %2576 = fadd float %2575, %2574 +< %2577 = fmul float %2563, %2576 +< %2578 = fadd float %2577, 0.000000e+00 +< %2579 = bitcast i32 %865 to float +< %2580 = bitcast i32 %865 to float +< %2581 = fmul float %2579, %2580 +< %2582 = fadd float %2581, 0.000000e+00 +< %2583 = bitcast i32 %57 to float +< %2584 = bitcast i32 %57 to float +< %2585 = fmul float %2583, %2584 +< %2586 = fadd float %2582, %2585 +< %2587 = call float @llvm.sqrt.f32(float %2586) +< %2588 = fneg float %710 +< %2589 = fmul float %2587, %2588 +< %2590 = fmul float %2589, 0.000000e+00 +< %2591 = bitcast i32 %57 to float +< %2592 = fadd float %2591, %2590 +< %2593 = bitcast i32 %865 to float +< %2594 = bitcast i32 %865 to float +< %2595 = fmul float %2593, %2594 +< %2596 = fadd float %2595, 0.000000e+00 +< %2597 = bitcast i32 %57 to float +< %2598 = bitcast i32 %57 to float +< %2599 = fmul float %2597, %2598 +< %2600 = fadd float %2596, %2599 +< %2601 = call float @llvm.sqrt.f32(float %2600) +< %2602 = fneg float %710 +< %2603 = fmul float %2601, %2602 +< %2604 = fmul float %2603, 0.000000e+00 +< %2605 = bitcast i32 %57 to float +< %2606 = fadd float %2605, %2604 +< %2607 = fmul float %2592, %2606 +< %2608 = fadd float %2578, %2607 +< %2609 = call float @llvm.sqrt.f32(float %2608) +< %2610 = fadd float %2609, 0.000000e+00 +< %2611 = fdiv float %2550, %2610 +< %2612 = fmul float %2536, %2611 +< %2613 = fsub float 1.000000e+00, %2612 +< %2614 = load float, float* %1652, align 4 +< %2615 = fmul float %2613, %2614 +< %2616 = fadd float %2460, %2615 +< %2617 = insertelement <4 x float> zeroinitializer, float %2616, i32 0 +< %2618 = insertelement <4 x float> %2617, float 0.000000e+00, i32 1 +< %2619 = insertelement <4 x float> %2618, float 0.000000e+00, i32 2 +< %2620 = insertelement <4 x float> %2619, float 0.000000e+00, i32 3 +< %2621 = extractelement <4 x float> %2620, i32 0 +< store float %2621, float* %2143, align 4 +< %2622 = getelementptr float, float* %1, i32 0 +< %2623 = getelementptr inbounds float, float* %2622, i64 2 +< %2624 = bitcast float* %2623 to i32* +< %2625 = load i32, i32* %2624, align 4 +< %2626 = bitcast i32 %2625 to float +< %2627 = insertelement <4 x float> zeroinitializer, float %2626, i32 0 +< %2628 = getelementptr float, float* %1, i32 0 +< %2629 = getelementptr inbounds float, float* %2628, i64 1 +< %2630 = bitcast float* %2629 to i32* +< %2631 = load i32, i32* %2630, align 4 +< %2632 = bitcast i32 %2631 to float +< %2633 = insertelement <4 x float> %2627, float %2632, i32 1 +< %2634 = insertelement <4 x float> %2633, float 0.000000e+00, i32 2 +< %2635 = insertelement <4 x float> %2634, float 0.000000e+00, i32 3 +< %2636 = extractelement <4 x float> %2635, i32 0 +< %2637 = bitcast i32* %2630 to float* +< store float %2636, float* %2637, align 4 +< %2638 = extractelement <4 x float> %2635, i32 1 +< %2639 = bitcast i32* %2624 to float* +< store float %2638, float* %2639, align 4 +--- +> %2558 = bitcast i32 %52 to float +> %2559 = fadd float %2558, %2557 +> %2560 = fmul float %2546, %2559 +> %2561 = fadd float %2560, 0.000000e+00 +> %2562 = bitcast i32 %52 to float +> %2563 = bitcast i32 %52 to float +> %2564 = fmul float %2562, %2563 +> %2565 = fadd float %2564, 0.000000e+00 +> %2566 = bitcast i32 %686 to float +> %2567 = bitcast i32 %686 to float +> %2568 = fmul float %2566, %2567 +> %2569 = fadd float %2565, %2568 +> %2570 = call float @llvm.sqrt.f32(float %2569) +> %2571 = fneg float %692 +> %2572 = fmul float %2570, %2571 +> %2573 = fmul float %2572, 0.000000e+00 +> %2574 = bitcast i32 %686 to float +> %2575 = fadd float %2574, %2573 +> %2576 = bitcast i32 %52 to float +> %2577 = bitcast i32 %52 to float +> %2578 = fmul float %2576, %2577 +> %2579 = fadd float %2578, 0.000000e+00 +> %2580 = bitcast i32 %686 to float +> %2581 = bitcast i32 %686 to float +> %2582 = fmul float %2580, %2581 +> %2583 = fadd float %2579, %2582 +> %2584 = call float @llvm.sqrt.f32(float %2583) +> %2585 = fneg float %692 +> %2586 = fmul float %2584, %2585 +> %2587 = fmul float %2586, 0.000000e+00 +> %2588 = bitcast i32 %686 to float +> %2589 = fadd float %2588, %2587 +> %2590 = fmul float %2575, %2589 +> %2591 = fadd float %2561, %2590 +> %2592 = call float @llvm.sqrt.f32(float %2591) +> %2593 = fadd float %2592, 0.000000e+00 +> %2594 = fdiv float %2533, %2593 +> %2595 = fmul float %2519, %2594 +> %2596 = fsub float 1.000000e+00, %2595 +> %2597 = load float, float* %44, align 4 +> %2598 = fmul float %2596, %2597 +> %2599 = fadd float %2443, %2598 +> %2600 = insertelement <4 x float> zeroinitializer, float %2599, i32 0 +> %2601 = insertelement <4 x float> %2600, float 0.000000e+00, i32 1 +> %2602 = insertelement <4 x float> %2601, float 0.000000e+00, i32 2 +> %2603 = insertelement <4 x float> %2602, float 0.000000e+00, i32 3 +> %2604 = extractelement <4 x float> %2603, i32 0 +> store float %2604, float* %2126, align 4 +> %2605 = getelementptr float, float* %1, i32 0 +> %2606 = getelementptr inbounds float, float* %2605, i64 2 +> %2607 = bitcast float* %2606 to i32* +> %2608 = load i32, i32* %2607, align 4 +> %2609 = bitcast i32 %2608 to float +> %2610 = insertelement <4 x float> zeroinitializer, float %2609, i32 0 +> %2611 = getelementptr float, float* %1, i32 0 +> %2612 = getelementptr inbounds float, float* %2611, i64 1 +> %2613 = bitcast float* %2612 to i32* +> %2614 = load i32, i32* %2613, align 4 +> %2615 = bitcast i32 %2614 to float +> %2616 = insertelement <4 x float> %2610, float %2615, i32 1 +> %2617 = insertelement <4 x float> %2616, float 0.000000e+00, i32 2 +> %2618 = insertelement <4 x float> %2617, float 0.000000e+00, i32 3 +> %2619 = extractelement <4 x float> %2618, i32 0 +> %2620 = bitcast i32* %2613 to float* +> store float %2619, float* %2620, align 4 +> %2621 = extractelement <4 x float> %2618, i32 1 +> %2622 = bitcast i32* %2607 to float* +> store float %2621, float* %2622, align 4 diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/diff-diospyros.txt b/src/dios-egraphs/Diospyros/flaky-outputs/diff-diospyros.txt new file mode 100644 index 00000000..29bb2e62 --- /dev/null +++ b/src/dios-egraphs/Diospyros/flaky-outputs/diff-diospyros.txt @@ -0,0 +1,5450 @@ +270,276c270,276 +< %58 = getelementptr float, float* %0, i32 0 +< %59 = load float, float* %58, align 4 +< %60 = insertelement <4 x float> zeroinitializer, float %59, i32 0 +< %61 = insertelement <4 x float> %60, float 1.000000e+00, i32 1 +< %62 = insertelement <4 x float> %61, float 1.000000e+00, i32 2 +< %63 = insertelement <4 x float> %62, float 1.000000e+00, i32 3 +< %64 = insertelement <4 x float> zeroinitializer, float %51, i32 0 +--- +> %58 = insertelement <4 x float> zeroinitializer, float %45, i32 0 +> %59 = insertelement <4 x float> %58, float 1.000000e+00, i32 1 +> %60 = insertelement <4 x float> %59, float 1.000000e+00, i32 2 +> %61 = insertelement <4 x float> %60, float 1.000000e+00, i32 3 +> %62 = getelementptr float, float* %1, i32 0 +> %63 = load float, float* %62, align 4 +> %64 = insertelement <4 x float> zeroinitializer, float %63, i32 0 +280c280 +< %68 = fmul <4 x float> %63, %67 +--- +> %68 = fmul <4 x float> %61, %67 +325,348c325,347 +< %110 = insertelement <4 x float> zeroinitializer, float %97, i32 0 +< %111 = insertelement <4 x float> %110, float 0.000000e+00, i32 1 +< %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 2 +< %113 = insertelement <4 x float> %112, float 0.000000e+00, i32 3 +< %114 = fmul <4 x float> %109, %113 +< %115 = fadd <4 x float> %114, zeroinitializer +< %116 = getelementptr float, float* %0, i32 0 +< %117 = getelementptr inbounds float, float* %116, i64 1 +< %118 = load float, float* %117, align 4 +< %119 = insertelement <4 x float> zeroinitializer, float %118, i32 0 +< %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 1 +< %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 2 +< %122 = insertelement <4 x float> %121, float 0.000000e+00, i32 3 +< %123 = getelementptr float, float* %1, i32 0 +< %124 = getelementptr inbounds float, float* %123, i64 3 +< %125 = load float, float* %124, align 4 +< %126 = insertelement <4 x float> zeroinitializer, float %125, i32 0 +< %127 = insertelement <4 x float> %126, float 0.000000e+00, i32 1 +< %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 2 +< %129 = insertelement <4 x float> %128, float 0.000000e+00, i32 3 +< %130 = call <4 x float> @llvm.fma.f32.3(<4 x float> %122, <4 x float> %129, <4 x float> %115) +< %131 = extractelement <4 x float> %130, i32 0 +< store float %131, float* %105, align 4 +< %132 = extractelement <4 x float> %130, i32 1 +--- +> %110 = load float, float* %96, align 4 +> %111 = insertelement <4 x float> zeroinitializer, float %110, i32 0 +> %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 1 +> %113 = insertelement <4 x float> %112, float 0.000000e+00, i32 2 +> %114 = insertelement <4 x float> %113, float 0.000000e+00, i32 3 +> %115 = fmul <4 x float> %109, %114 +> %116 = fadd <4 x float> %115, zeroinitializer +> %117 = getelementptr float, float* %0, i32 0 +> %118 = getelementptr inbounds float, float* %117, i64 1 +> %119 = load float, float* %118, align 4 +> %120 = insertelement <4 x float> zeroinitializer, float %119, i32 0 +> %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 1 +> %122 = insertelement <4 x float> %121, float 0.000000e+00, i32 2 +> %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3 +> %124 = getelementptr float, float* %1, i32 0 +> %125 = getelementptr inbounds float, float* %124, i64 3 +> %126 = load float, float* %125, align 4 +> %127 = insertelement <4 x float> zeroinitializer, float %126, i32 0 +> %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 1 +> %129 = insertelement <4 x float> %128, float 0.000000e+00, i32 2 +> %130 = insertelement <4 x float> %129, float 0.000000e+00, i32 3 +> %131 = call <4 x float> @llvm.fma.f32.3(<4 x float> %123, <4 x float> %130, <4 x float> %116) +> %132 = extractelement <4 x float> %131, i32 0 +350c349 +< %134 = getelementptr inbounds float, float* %133, i64 2 +--- +> %134 = getelementptr inbounds float, float* %133, i64 1 +352,385c351,385 +< %135 = getelementptr float, float* %0, i32 0 +< %136 = getelementptr inbounds float, float* %135, i64 2 +< %137 = load float, float* %136, align 4 +< %138 = insertelement <4 x float> zeroinitializer, float %137, i32 0 +< %139 = insertelement <4 x float> %138, float 0.000000e+00, i32 1 +< %140 = insertelement <4 x float> %139, float 0.000000e+00, i32 2 +< %141 = insertelement <4 x float> %140, float 0.000000e+00, i32 3 +< %142 = getelementptr float, float* %1, i32 0 +< %143 = load float, float* %142, align 4 +< %144 = insertelement <4 x float> zeroinitializer, float %143, i32 0 +< %145 = insertelement <4 x float> %144, float 0.000000e+00, i32 1 +< %146 = insertelement <4 x float> %145, float 0.000000e+00, i32 2 +< %147 = insertelement <4 x float> %146, float 0.000000e+00, i32 3 +< %148 = call <4 x float> @llvm.fma.f32.4(<4 x float> %141, <4 x float> %147, <4 x float> zeroinitializer) +< %149 = extractelement <4 x float> %148, i32 0 +< store float %149, float* %134, align 4 +< %150 = insertelement <4 x float> zeroinitializer, float %137, i32 0 +< %151 = insertelement <4 x float> %150, float 1.000000e+00, i32 1 +< %152 = insertelement <4 x float> %151, float 1.000000e+00, i32 2 +< %153 = insertelement <4 x float> %152, float 1.000000e+00, i32 3 +< %154 = insertelement <4 x float> zeroinitializer, float %143, i32 0 +< %155 = insertelement <4 x float> %154, float 0.000000e+00, i32 1 +< %156 = insertelement <4 x float> %155, float 0.000000e+00, i32 2 +< %157 = insertelement <4 x float> %156, float 0.000000e+00, i32 3 +< %158 = fmul <4 x float> %153, %157 +< %159 = fadd <4 x float> %158, zeroinitializer +< %160 = getelementptr float, float* %0, i32 0 +< %161 = getelementptr inbounds float, float* %160, i64 3 +< %162 = load float, float* %161, align 4 +< %163 = insertelement <4 x float> zeroinitializer, float %162, i32 0 +< %164 = insertelement <4 x float> %163, float 0.000000e+00, i32 1 +< %165 = insertelement <4 x float> %164, float 0.000000e+00, i32 2 +< %166 = insertelement <4 x float> %165, float 0.000000e+00, i32 3 +< %167 = load float, float* %78, align 4 +--- +> %135 = extractelement <4 x float> %131, i32 1 +> %136 = getelementptr float, float* %2, i32 0 +> %137 = getelementptr inbounds float, float* %136, i64 2 +> store float %135, float* %137, align 4 +> %138 = getelementptr float, float* %0, i32 0 +> %139 = getelementptr inbounds float, float* %138, i64 2 +> %140 = load float, float* %139, align 4 +> %141 = insertelement <4 x float> zeroinitializer, float %140, i32 0 +> %142 = insertelement <4 x float> %141, float 0.000000e+00, i32 1 +> %143 = insertelement <4 x float> %142, float 0.000000e+00, i32 2 +> %144 = insertelement <4 x float> %143, float 0.000000e+00, i32 3 +> %145 = getelementptr float, float* %1, i32 0 +> %146 = load float, float* %145, align 4 +> %147 = insertelement <4 x float> zeroinitializer, float %146, i32 0 +> %148 = insertelement <4 x float> %147, float 0.000000e+00, i32 1 +> %149 = insertelement <4 x float> %148, float 0.000000e+00, i32 2 +> %150 = insertelement <4 x float> %149, float 0.000000e+00, i32 3 +> %151 = call <4 x float> @llvm.fma.f32.4(<4 x float> %144, <4 x float> %150, <4 x float> zeroinitializer) +> %152 = extractelement <4 x float> %151, i32 0 +> %153 = getelementptr float, float* %2, i32 0 +> %154 = getelementptr inbounds float, float* %153, i64 2 +> store float %152, float* %154, align 4 +> %155 = insertelement <4 x float> zeroinitializer, float %140, i32 0 +> %156 = insertelement <4 x float> %155, float 1.000000e+00, i32 1 +> %157 = insertelement <4 x float> %156, float 1.000000e+00, i32 2 +> %158 = insertelement <4 x float> %157, float 1.000000e+00, i32 3 +> %159 = insertelement <4 x float> zeroinitializer, float %146, i32 0 +> %160 = insertelement <4 x float> %159, float 0.000000e+00, i32 1 +> %161 = insertelement <4 x float> %160, float 0.000000e+00, i32 2 +> %162 = insertelement <4 x float> %161, float 0.000000e+00, i32 3 +> %163 = fmul <4 x float> %158, %162 +> %164 = fadd <4 x float> %163, zeroinitializer +> %165 = getelementptr float, float* %0, i32 0 +> %166 = getelementptr inbounds float, float* %165, i64 3 +> %167 = load float, float* %166, align 4 +390,402c390,402 +< %172 = call <4 x float> @llvm.fma.f32.5(<4 x float> %166, <4 x float> %171, <4 x float> %159) +< %173 = extractelement <4 x float> %172, i32 0 +< store float %173, float* %134, align 4 +< %174 = extractelement <4 x float> %172, i32 1 +< %175 = getelementptr float, float* %2, i32 0 +< %176 = getelementptr inbounds float, float* %175, i64 3 +< store float %174, float* %176, align 4 +< %177 = load float, float* %136, align 4 +< %178 = insertelement <4 x float> zeroinitializer, float %177, i32 0 +< %179 = insertelement <4 x float> %178, float 0.000000e+00, i32 1 +< %180 = insertelement <4 x float> %179, float 0.000000e+00, i32 2 +< %181 = insertelement <4 x float> %180, float 0.000000e+00, i32 3 +< %182 = load float, float* %96, align 4 +--- +> %172 = load float, float* %78, align 4 +> %173 = insertelement <4 x float> zeroinitializer, float %172, i32 0 +> %174 = insertelement <4 x float> %173, float 0.000000e+00, i32 1 +> %175 = insertelement <4 x float> %174, float 0.000000e+00, i32 2 +> %176 = insertelement <4 x float> %175, float 0.000000e+00, i32 3 +> %177 = call <4 x float> @llvm.fma.f32.5(<4 x float> %171, <4 x float> %176, <4 x float> %164) +> %178 = extractelement <4 x float> %177, i32 0 +> store float %178, float* %154, align 4 +> %179 = extractelement <4 x float> %177, i32 1 +> %180 = getelementptr float, float* %2, i32 0 +> %181 = getelementptr inbounds float, float* %180, i64 3 +> store float %179, float* %181, align 4 +> %182 = load float, float* %139, align 4 +407,427c407,427 +< %187 = call <4 x float> @llvm.fma.f32.6(<4 x float> %181, <4 x float> %186, <4 x float> zeroinitializer) +< %188 = extractelement <4 x float> %187, i32 0 +< store float %188, float* %176, align 4 +< %189 = insertelement <4 x float> zeroinitializer, float %177, i32 0 +< %190 = insertelement <4 x float> %189, float 1.000000e+00, i32 1 +< %191 = insertelement <4 x float> %190, float 1.000000e+00, i32 2 +< %192 = insertelement <4 x float> %191, float 1.000000e+00, i32 3 +< %193 = insertelement <4 x float> zeroinitializer, float %182, i32 0 +< %194 = insertelement <4 x float> %193, float 0.000000e+00, i32 1 +< %195 = insertelement <4 x float> %194, float 0.000000e+00, i32 2 +< %196 = insertelement <4 x float> %195, float 0.000000e+00, i32 3 +< %197 = fmul <4 x float> %192, %196 +< %198 = fadd <4 x float> %197, zeroinitializer +< %199 = getelementptr float, float* %0, i32 0 +< %200 = getelementptr inbounds float, float* %199, i64 3 +< %201 = load float, float* %200, align 4 +< %202 = insertelement <4 x float> zeroinitializer, float %201, i32 0 +< %203 = insertelement <4 x float> %202, float 0.000000e+00, i32 1 +< %204 = insertelement <4 x float> %203, float 0.000000e+00, i32 2 +< %205 = insertelement <4 x float> %204, float 0.000000e+00, i32 3 +< %206 = load float, float* %124, align 4 +--- +> %187 = load float, float* %96, align 4 +> %188 = insertelement <4 x float> zeroinitializer, float %187, i32 0 +> %189 = insertelement <4 x float> %188, float 0.000000e+00, i32 1 +> %190 = insertelement <4 x float> %189, float 0.000000e+00, i32 2 +> %191 = insertelement <4 x float> %190, float 0.000000e+00, i32 3 +> %192 = call <4 x float> @llvm.fma.f32.6(<4 x float> %186, <4 x float> %191, <4 x float> zeroinitializer) +> %193 = extractelement <4 x float> %192, i32 0 +> store float %193, float* %181, align 4 +> %194 = insertelement <4 x float> zeroinitializer, float %182, i32 0 +> %195 = insertelement <4 x float> %194, float 1.000000e+00, i32 1 +> %196 = insertelement <4 x float> %195, float 1.000000e+00, i32 2 +> %197 = insertelement <4 x float> %196, float 1.000000e+00, i32 3 +> %198 = insertelement <4 x float> zeroinitializer, float %187, i32 0 +> %199 = insertelement <4 x float> %198, float 0.000000e+00, i32 1 +> %200 = insertelement <4 x float> %199, float 0.000000e+00, i32 2 +> %201 = insertelement <4 x float> %200, float 0.000000e+00, i32 3 +> %202 = fmul <4 x float> %197, %201 +> %203 = fadd <4 x float> %202, zeroinitializer +> %204 = getelementptr float, float* %0, i32 0 +> %205 = getelementptr inbounds float, float* %204, i64 3 +> %206 = load float, float* %205, align 4 +432,434c432,439 +< %211 = call <4 x float> @llvm.fma.f32.7(<4 x float> %205, <4 x float> %210, <4 x float> %198) +< %212 = extractelement <4 x float> %211, i32 0 +< store float %212, float* %176, align 4 +--- +> %211 = load float, float* %125, align 4 +> %212 = insertelement <4 x float> zeroinitializer, float %211, i32 0 +> %213 = insertelement <4 x float> %212, float 0.000000e+00, i32 1 +> %214 = insertelement <4 x float> %213, float 0.000000e+00, i32 2 +> %215 = insertelement <4 x float> %214, float 0.000000e+00, i32 3 +> %216 = call <4 x float> @llvm.fma.f32.7(<4 x float> %210, <4 x float> %215, <4 x float> %203) +> %217 = extractelement <4 x float> %216, i32 0 +> store float %217, float* %181, align 4 +658,674c663,679 +< %157 = load i32, i32* %130, align 4 +< %158 = bitcast i32 %157 to float +< %159 = bitcast i32 %157 to float +< %160 = fmul float %158, %159 +< %161 = fadd float %156, %160 +< %162 = call float @llvm.sqrt.f32.8(float %161) +< %163 = bitcast i32 %152 to float +< %164 = fcmp olt float %163, 0.000000e+00 +< %165 = sext i1 %164 to i32 +< %166 = fcmp ogt float %163, 0.000000e+00 +< %167 = zext i1 %166 to i32 +< %168 = add nsw i32 %165, %167 +< %169 = sitofp i32 %168 to float +< %170 = fneg float %169 +< %171 = fmul float %162, %170 +< %172 = bitcast i32 %152 to float +< %173 = fadd float %172, %171 +--- +> %157 = bitcast i32 %131 to float +> %158 = bitcast i32 %131 to float +> %159 = fmul float %157, %158 +> %160 = fadd float %156, %159 +> %161 = call float @llvm.sqrt.f32.8(float %160) +> %162 = bitcast i32 %152 to float +> %163 = fcmp olt float %162, 0.000000e+00 +> %164 = sext i1 %163 to i32 +> %165 = fcmp ogt float %162, 0.000000e+00 +> %166 = zext i1 %165 to i32 +> %167 = add nsw i32 %164, %166 +> %168 = sitofp i32 %167 to float +> %169 = fneg float %168 +> %170 = fmul float %161, %169 +> %171 = bitcast i32 %152 to float +> %172 = fadd float %171, %170 +> %173 = bitcast i32 %152 to float +676,687c681,692 +< %175 = bitcast i32 %152 to float +< %176 = fmul float %174, %175 +< %177 = fadd float %176, 0.000000e+00 +< %178 = bitcast i32 %157 to float +< %179 = bitcast i32 %157 to float +< %180 = fmul float %178, %179 +< %181 = fadd float %177, %180 +< %182 = call float @llvm.sqrt.f32.9(float %181) +< %183 = fneg float %169 +< %184 = fmul float %182, %183 +< %185 = bitcast i32 %152 to float +< %186 = fadd float %185, %184 +--- +> %175 = fmul float %173, %174 +> %176 = fadd float %175, 0.000000e+00 +> %177 = bitcast i32 %131 to float +> %178 = bitcast i32 %131 to float +> %179 = fmul float %177, %178 +> %180 = fadd float %176, %179 +> %181 = call float @llvm.sqrt.f32.9(float %180) +> %182 = fneg float %168 +> %183 = fmul float %181, %182 +> %184 = bitcast i32 %152 to float +> %185 = fadd float %184, %183 +> %186 = bitcast i32 %152 to float +689,702c694,707 +< %188 = bitcast i32 %152 to float +< %189 = fmul float %187, %188 +< %190 = fadd float %189, 0.000000e+00 +< %191 = bitcast i32 %157 to float +< %192 = bitcast i32 %157 to float +< %193 = fmul float %191, %192 +< %194 = fadd float %190, %193 +< %195 = call float @llvm.sqrt.f32.10(float %194) +< %196 = fneg float %169 +< %197 = fmul float %195, %196 +< %198 = bitcast i32 %152 to float +< %199 = fadd float %198, %197 +< %200 = fmul float %186, %199 +< %201 = fadd float %200, 0.000000e+00 +--- +> %188 = fmul float %186, %187 +> %189 = fadd float %188, 0.000000e+00 +> %190 = bitcast i32 %131 to float +> %191 = bitcast i32 %131 to float +> %192 = fmul float %190, %191 +> %193 = fadd float %189, %192 +> %194 = call float @llvm.sqrt.f32.10(float %193) +> %195 = fneg float %168 +> %196 = fmul float %194, %195 +> %197 = bitcast i32 %152 to float +> %198 = fadd float %197, %196 +> %199 = fmul float %185, %198 +> %200 = fadd float %199, 0.000000e+00 +> %201 = bitcast i32 %152 to float +704,716c709,721 +< %203 = bitcast i32 %152 to float +< %204 = fmul float %202, %203 +< %205 = fadd float %204, 0.000000e+00 +< %206 = bitcast i32 %157 to float +< %207 = bitcast i32 %157 to float +< %208 = fmul float %206, %207 +< %209 = fadd float %205, %208 +< %210 = call float @llvm.sqrt.f32.11(float %209) +< %211 = fneg float %169 +< %212 = fmul float %210, %211 +< %213 = fmul float %212, 0.000000e+00 +< %214 = bitcast i32 %157 to float +< %215 = fadd float %214, %213 +--- +> %203 = fmul float %201, %202 +> %204 = fadd float %203, 0.000000e+00 +> %205 = bitcast i32 %131 to float +> %206 = bitcast i32 %131 to float +> %207 = fmul float %205, %206 +> %208 = fadd float %204, %207 +> %209 = call float @llvm.sqrt.f32.11(float %208) +> %210 = fneg float %168 +> %211 = fmul float %209, %210 +> %212 = fmul float %211, 0.000000e+00 +> %213 = bitcast i32 %131 to float +> %214 = fadd float %213, %212 +> %215 = bitcast i32 %152 to float +718,736c723,741 +< %217 = bitcast i32 %152 to float +< %218 = fmul float %216, %217 +< %219 = fadd float %218, 0.000000e+00 +< %220 = bitcast i32 %157 to float +< %221 = bitcast i32 %157 to float +< %222 = fmul float %220, %221 +< %223 = fadd float %219, %222 +< %224 = call float @llvm.sqrt.f32.12(float %223) +< %225 = fneg float %169 +< %226 = fmul float %224, %225 +< %227 = fmul float %226, 0.000000e+00 +< %228 = bitcast i32 %157 to float +< %229 = fadd float %228, %227 +< %230 = fmul float %215, %229 +< %231 = fadd float %201, %230 +< %232 = call float @llvm.sqrt.f32.13(float %231) +< %233 = fadd float %232, 0.000000e+00 +< %234 = fdiv float %173, %233 +< %235 = fmul float %234, 2.000000e+00 +--- +> %217 = fmul float %215, %216 +> %218 = fadd float %217, 0.000000e+00 +> %219 = bitcast i32 %131 to float +> %220 = bitcast i32 %131 to float +> %221 = fmul float %219, %220 +> %222 = fadd float %218, %221 +> %223 = call float @llvm.sqrt.f32.12(float %222) +> %224 = fneg float %168 +> %225 = fmul float %223, %224 +> %226 = fmul float %225, 0.000000e+00 +> %227 = bitcast i32 %131 to float +> %228 = fadd float %227, %226 +> %229 = fmul float %214, %228 +> %230 = fadd float %200, %229 +> %231 = call float @llvm.sqrt.f32.13(float %230) +> %232 = fadd float %231, 0.000000e+00 +> %233 = fdiv float %172, %232 +> %234 = fmul float %233, 2.000000e+00 +> %235 = bitcast i32 %152 to float +738,749c743,754 +< %237 = bitcast i32 %152 to float +< %238 = fmul float %236, %237 +< %239 = fadd float %238, 0.000000e+00 +< %240 = bitcast i32 %157 to float +< %241 = bitcast i32 %157 to float +< %242 = fmul float %240, %241 +< %243 = fadd float %239, %242 +< %244 = call float @llvm.sqrt.f32.14(float %243) +< %245 = fneg float %169 +< %246 = fmul float %244, %245 +< %247 = bitcast i32 %152 to float +< %248 = fadd float %247, %246 +--- +> %237 = fmul float %235, %236 +> %238 = fadd float %237, 0.000000e+00 +> %239 = bitcast i32 %131 to float +> %240 = bitcast i32 %131 to float +> %241 = fmul float %239, %240 +> %242 = fadd float %238, %241 +> %243 = call float @llvm.sqrt.f32.14(float %242) +> %244 = fneg float %168 +> %245 = fmul float %243, %244 +> %246 = bitcast i32 %152 to float +> %247 = fadd float %246, %245 +> %248 = bitcast i32 %152 to float +751,762c756,767 +< %250 = bitcast i32 %152 to float +< %251 = fmul float %249, %250 +< %252 = fadd float %251, 0.000000e+00 +< %253 = bitcast i32 %157 to float +< %254 = bitcast i32 %157 to float +< %255 = fmul float %253, %254 +< %256 = fadd float %252, %255 +< %257 = call float @llvm.sqrt.f32.15(float %256) +< %258 = fneg float %169 +< %259 = fmul float %257, %258 +< %260 = bitcast i32 %152 to float +< %261 = fadd float %260, %259 +--- +> %250 = fmul float %248, %249 +> %251 = fadd float %250, 0.000000e+00 +> %252 = bitcast i32 %131 to float +> %253 = bitcast i32 %131 to float +> %254 = fmul float %252, %253 +> %255 = fadd float %251, %254 +> %256 = call float @llvm.sqrt.f32.15(float %255) +> %257 = fneg float %168 +> %258 = fmul float %256, %257 +> %259 = bitcast i32 %152 to float +> %260 = fadd float %259, %258 +> %261 = bitcast i32 %152 to float +764,777c769,782 +< %263 = bitcast i32 %152 to float +< %264 = fmul float %262, %263 +< %265 = fadd float %264, 0.000000e+00 +< %266 = bitcast i32 %157 to float +< %267 = bitcast i32 %157 to float +< %268 = fmul float %266, %267 +< %269 = fadd float %265, %268 +< %270 = call float @llvm.sqrt.f32.16(float %269) +< %271 = fneg float %169 +< %272 = fmul float %270, %271 +< %273 = bitcast i32 %152 to float +< %274 = fadd float %273, %272 +< %275 = fmul float %261, %274 +< %276 = fadd float %275, 0.000000e+00 +--- +> %263 = fmul float %261, %262 +> %264 = fadd float %263, 0.000000e+00 +> %265 = bitcast i32 %131 to float +> %266 = bitcast i32 %131 to float +> %267 = fmul float %265, %266 +> %268 = fadd float %264, %267 +> %269 = call float @llvm.sqrt.f32.16(float %268) +> %270 = fneg float %168 +> %271 = fmul float %269, %270 +> %272 = bitcast i32 %152 to float +> %273 = fadd float %272, %271 +> %274 = fmul float %260, %273 +> %275 = fadd float %274, 0.000000e+00 +> %276 = bitcast i32 %152 to float +779,791c784,796 +< %278 = bitcast i32 %152 to float +< %279 = fmul float %277, %278 +< %280 = fadd float %279, 0.000000e+00 +< %281 = bitcast i32 %157 to float +< %282 = bitcast i32 %157 to float +< %283 = fmul float %281, %282 +< %284 = fadd float %280, %283 +< %285 = call float @llvm.sqrt.f32.17(float %284) +< %286 = fneg float %169 +< %287 = fmul float %285, %286 +< %288 = fmul float %287, 0.000000e+00 +< %289 = bitcast i32 %157 to float +< %290 = fadd float %289, %288 +--- +> %278 = fmul float %276, %277 +> %279 = fadd float %278, 0.000000e+00 +> %280 = bitcast i32 %131 to float +> %281 = bitcast i32 %131 to float +> %282 = fmul float %280, %281 +> %283 = fadd float %279, %282 +> %284 = call float @llvm.sqrt.f32.17(float %283) +> %285 = fneg float %168 +> %286 = fmul float %284, %285 +> %287 = fmul float %286, 0.000000e+00 +> %288 = bitcast i32 %131 to float +> %289 = fadd float %288, %287 +> %290 = bitcast i32 %152 to float +793,812c798,817 +< %292 = bitcast i32 %152 to float +< %293 = fmul float %291, %292 +< %294 = fadd float %293, 0.000000e+00 +< %295 = bitcast i32 %157 to float +< %296 = bitcast i32 %157 to float +< %297 = fmul float %295, %296 +< %298 = fadd float %294, %297 +< %299 = call float @llvm.sqrt.f32.18(float %298) +< %300 = fneg float %169 +< %301 = fmul float %299, %300 +< %302 = fmul float %301, 0.000000e+00 +< %303 = bitcast i32 %157 to float +< %304 = fadd float %303, %302 +< %305 = fmul float %290, %304 +< %306 = fadd float %276, %305 +< %307 = call float @llvm.sqrt.f32.19(float %306) +< %308 = fadd float %307, 0.000000e+00 +< %309 = fdiv float %248, %308 +< %310 = fmul float %235, %309 +< %311 = insertelement <4 x float> %149, float %310, i32 1 +--- +> %292 = fmul float %290, %291 +> %293 = fadd float %292, 0.000000e+00 +> %294 = bitcast i32 %131 to float +> %295 = bitcast i32 %131 to float +> %296 = fmul float %294, %295 +> %297 = fadd float %293, %296 +> %298 = call float @llvm.sqrt.f32.18(float %297) +> %299 = fneg float %168 +> %300 = fmul float %298, %299 +> %301 = fmul float %300, 0.000000e+00 +> %302 = bitcast i32 %131 to float +> %303 = fadd float %302, %301 +> %304 = fmul float %289, %303 +> %305 = fadd float %275, %304 +> %306 = call float @llvm.sqrt.f32.19(float %305) +> %307 = fadd float %306, 0.000000e+00 +> %308 = fdiv float %247, %307 +> %309 = fmul float %234, %308 +> %310 = insertelement <4 x float> %149, float %309, i32 1 +> %311 = bitcast i32 %152 to float +814,825c819,830 +< %313 = bitcast i32 %152 to float +< %314 = fmul float %312, %313 +< %315 = fadd float %314, 0.000000e+00 +< %316 = bitcast i32 %157 to float +< %317 = bitcast i32 %157 to float +< %318 = fmul float %316, %317 +< %319 = fadd float %315, %318 +< %320 = call float @llvm.sqrt.f32.20(float %319) +< %321 = fneg float %169 +< %322 = fmul float %320, %321 +< %323 = bitcast i32 %152 to float +< %324 = fadd float %323, %322 +--- +> %313 = fmul float %311, %312 +> %314 = fadd float %313, 0.000000e+00 +> %315 = bitcast i32 %131 to float +> %316 = bitcast i32 %131 to float +> %317 = fmul float %315, %316 +> %318 = fadd float %314, %317 +> %319 = call float @llvm.sqrt.f32.20(float %318) +> %320 = fneg float %168 +> %321 = fmul float %319, %320 +> %322 = bitcast i32 %152 to float +> %323 = fadd float %322, %321 +> %324 = bitcast i32 %152 to float +827,838c832,843 +< %326 = bitcast i32 %152 to float +< %327 = fmul float %325, %326 +< %328 = fadd float %327, 0.000000e+00 +< %329 = bitcast i32 %157 to float +< %330 = bitcast i32 %157 to float +< %331 = fmul float %329, %330 +< %332 = fadd float %328, %331 +< %333 = call float @llvm.sqrt.f32.21(float %332) +< %334 = fneg float %169 +< %335 = fmul float %333, %334 +< %336 = bitcast i32 %152 to float +< %337 = fadd float %336, %335 +--- +> %326 = fmul float %324, %325 +> %327 = fadd float %326, 0.000000e+00 +> %328 = bitcast i32 %131 to float +> %329 = bitcast i32 %131 to float +> %330 = fmul float %328, %329 +> %331 = fadd float %327, %330 +> %332 = call float @llvm.sqrt.f32.21(float %331) +> %333 = fneg float %168 +> %334 = fmul float %332, %333 +> %335 = bitcast i32 %152 to float +> %336 = fadd float %335, %334 +> %337 = bitcast i32 %152 to float +840,853c845,858 +< %339 = bitcast i32 %152 to float +< %340 = fmul float %338, %339 +< %341 = fadd float %340, 0.000000e+00 +< %342 = bitcast i32 %157 to float +< %343 = bitcast i32 %157 to float +< %344 = fmul float %342, %343 +< %345 = fadd float %341, %344 +< %346 = call float @llvm.sqrt.f32.22(float %345) +< %347 = fneg float %169 +< %348 = fmul float %346, %347 +< %349 = bitcast i32 %152 to float +< %350 = fadd float %349, %348 +< %351 = fmul float %337, %350 +< %352 = fadd float %351, 0.000000e+00 +--- +> %339 = fmul float %337, %338 +> %340 = fadd float %339, 0.000000e+00 +> %341 = bitcast i32 %131 to float +> %342 = bitcast i32 %131 to float +> %343 = fmul float %341, %342 +> %344 = fadd float %340, %343 +> %345 = call float @llvm.sqrt.f32.22(float %344) +> %346 = fneg float %168 +> %347 = fmul float %345, %346 +> %348 = bitcast i32 %152 to float +> %349 = fadd float %348, %347 +> %350 = fmul float %336, %349 +> %351 = fadd float %350, 0.000000e+00 +> %352 = bitcast i32 %152 to float +855,867c860,872 +< %354 = bitcast i32 %152 to float +< %355 = fmul float %353, %354 +< %356 = fadd float %355, 0.000000e+00 +< %357 = bitcast i32 %157 to float +< %358 = bitcast i32 %157 to float +< %359 = fmul float %357, %358 +< %360 = fadd float %356, %359 +< %361 = call float @llvm.sqrt.f32.23(float %360) +< %362 = fneg float %169 +< %363 = fmul float %361, %362 +< %364 = fmul float %363, 0.000000e+00 +< %365 = bitcast i32 %157 to float +< %366 = fadd float %365, %364 +--- +> %354 = fmul float %352, %353 +> %355 = fadd float %354, 0.000000e+00 +> %356 = bitcast i32 %131 to float +> %357 = bitcast i32 %131 to float +> %358 = fmul float %356, %357 +> %359 = fadd float %355, %358 +> %360 = call float @llvm.sqrt.f32.23(float %359) +> %361 = fneg float %168 +> %362 = fmul float %360, %361 +> %363 = fmul float %362, 0.000000e+00 +> %364 = bitcast i32 %131 to float +> %365 = fadd float %364, %363 +> %366 = bitcast i32 %152 to float +869,887c874,892 +< %368 = bitcast i32 %152 to float +< %369 = fmul float %367, %368 +< %370 = fadd float %369, 0.000000e+00 +< %371 = bitcast i32 %157 to float +< %372 = bitcast i32 %157 to float +< %373 = fmul float %371, %372 +< %374 = fadd float %370, %373 +< %375 = call float @llvm.sqrt.f32.24(float %374) +< %376 = fneg float %169 +< %377 = fmul float %375, %376 +< %378 = fmul float %377, 0.000000e+00 +< %379 = bitcast i32 %157 to float +< %380 = fadd float %379, %378 +< %381 = fmul float %366, %380 +< %382 = fadd float %352, %381 +< %383 = call float @llvm.sqrt.f32.25(float %382) +< %384 = fadd float %383, 0.000000e+00 +< %385 = fdiv float %324, %384 +< %386 = fmul float %385, 2.000000e+00 +--- +> %368 = fmul float %366, %367 +> %369 = fadd float %368, 0.000000e+00 +> %370 = bitcast i32 %131 to float +> %371 = bitcast i32 %131 to float +> %372 = fmul float %370, %371 +> %373 = fadd float %369, %372 +> %374 = call float @llvm.sqrt.f32.24(float %373) +> %375 = fneg float %168 +> %376 = fmul float %374, %375 +> %377 = fmul float %376, 0.000000e+00 +> %378 = bitcast i32 %131 to float +> %379 = fadd float %378, %377 +> %380 = fmul float %365, %379 +> %381 = fadd float %351, %380 +> %382 = call float @llvm.sqrt.f32.25(float %381) +> %383 = fadd float %382, 0.000000e+00 +> %384 = fdiv float %323, %383 +> %385 = fmul float %384, 2.000000e+00 +> %386 = bitcast i32 %152 to float +889,901c894,906 +< %388 = bitcast i32 %152 to float +< %389 = fmul float %387, %388 +< %390 = fadd float %389, 0.000000e+00 +< %391 = bitcast i32 %157 to float +< %392 = bitcast i32 %157 to float +< %393 = fmul float %391, %392 +< %394 = fadd float %390, %393 +< %395 = call float @llvm.sqrt.f32.26(float %394) +< %396 = fneg float %169 +< %397 = fmul float %395, %396 +< %398 = fmul float %397, 0.000000e+00 +< %399 = bitcast i32 %157 to float +< %400 = fadd float %399, %398 +--- +> %388 = fmul float %386, %387 +> %389 = fadd float %388, 0.000000e+00 +> %390 = bitcast i32 %131 to float +> %391 = bitcast i32 %131 to float +> %392 = fmul float %390, %391 +> %393 = fadd float %389, %392 +> %394 = call float @llvm.sqrt.f32.26(float %393) +> %395 = fneg float %168 +> %396 = fmul float %394, %395 +> %397 = fmul float %396, 0.000000e+00 +> %398 = bitcast i32 %131 to float +> %399 = fadd float %398, %397 +> %400 = bitcast i32 %152 to float +903,914c908,919 +< %402 = bitcast i32 %152 to float +< %403 = fmul float %401, %402 +< %404 = fadd float %403, 0.000000e+00 +< %405 = bitcast i32 %157 to float +< %406 = bitcast i32 %157 to float +< %407 = fmul float %405, %406 +< %408 = fadd float %404, %407 +< %409 = call float @llvm.sqrt.f32.27(float %408) +< %410 = fneg float %169 +< %411 = fmul float %409, %410 +< %412 = bitcast i32 %152 to float +< %413 = fadd float %412, %411 +--- +> %402 = fmul float %400, %401 +> %403 = fadd float %402, 0.000000e+00 +> %404 = bitcast i32 %131 to float +> %405 = bitcast i32 %131 to float +> %406 = fmul float %404, %405 +> %407 = fadd float %403, %406 +> %408 = call float @llvm.sqrt.f32.27(float %407) +> %409 = fneg float %168 +> %410 = fmul float %408, %409 +> %411 = bitcast i32 %152 to float +> %412 = fadd float %411, %410 +> %413 = bitcast i32 %152 to float +916,929c921,934 +< %415 = bitcast i32 %152 to float +< %416 = fmul float %414, %415 +< %417 = fadd float %416, 0.000000e+00 +< %418 = bitcast i32 %157 to float +< %419 = bitcast i32 %157 to float +< %420 = fmul float %418, %419 +< %421 = fadd float %417, %420 +< %422 = call float @llvm.sqrt.f32.28(float %421) +< %423 = fneg float %169 +< %424 = fmul float %422, %423 +< %425 = bitcast i32 %152 to float +< %426 = fadd float %425, %424 +< %427 = fmul float %413, %426 +< %428 = fadd float %427, 0.000000e+00 +--- +> %415 = fmul float %413, %414 +> %416 = fadd float %415, 0.000000e+00 +> %417 = bitcast i32 %131 to float +> %418 = bitcast i32 %131 to float +> %419 = fmul float %417, %418 +> %420 = fadd float %416, %419 +> %421 = call float @llvm.sqrt.f32.28(float %420) +> %422 = fneg float %168 +> %423 = fmul float %421, %422 +> %424 = bitcast i32 %152 to float +> %425 = fadd float %424, %423 +> %426 = fmul float %412, %425 +> %427 = fadd float %426, 0.000000e+00 +> %428 = bitcast i32 %152 to float +931,943c936,948 +< %430 = bitcast i32 %152 to float +< %431 = fmul float %429, %430 +< %432 = fadd float %431, 0.000000e+00 +< %433 = bitcast i32 %157 to float +< %434 = bitcast i32 %157 to float +< %435 = fmul float %433, %434 +< %436 = fadd float %432, %435 +< %437 = call float @llvm.sqrt.f32.29(float %436) +< %438 = fneg float %169 +< %439 = fmul float %437, %438 +< %440 = fmul float %439, 0.000000e+00 +< %441 = bitcast i32 %157 to float +< %442 = fadd float %441, %440 +--- +> %430 = fmul float %428, %429 +> %431 = fadd float %430, 0.000000e+00 +> %432 = bitcast i32 %131 to float +> %433 = bitcast i32 %131 to float +> %434 = fmul float %432, %433 +> %435 = fadd float %431, %434 +> %436 = call float @llvm.sqrt.f32.29(float %435) +> %437 = fneg float %168 +> %438 = fmul float %436, %437 +> %439 = fmul float %438, 0.000000e+00 +> %440 = bitcast i32 %131 to float +> %441 = fadd float %440, %439 +> %442 = bitcast i32 %152 to float +945,964c950,969 +< %444 = bitcast i32 %152 to float +< %445 = fmul float %443, %444 +< %446 = fadd float %445, 0.000000e+00 +< %447 = bitcast i32 %157 to float +< %448 = bitcast i32 %157 to float +< %449 = fmul float %447, %448 +< %450 = fadd float %446, %449 +< %451 = call float @llvm.sqrt.f32.30(float %450) +< %452 = fneg float %169 +< %453 = fmul float %451, %452 +< %454 = fmul float %453, 0.000000e+00 +< %455 = bitcast i32 %157 to float +< %456 = fadd float %455, %454 +< %457 = fmul float %442, %456 +< %458 = fadd float %428, %457 +< %459 = call float @llvm.sqrt.f32.31(float %458) +< %460 = fadd float %459, 0.000000e+00 +< %461 = fdiv float %400, %460 +< %462 = fmul float %386, %461 +< %463 = insertelement <4 x float> %311, float %462, i32 2 +--- +> %444 = fmul float %442, %443 +> %445 = fadd float %444, 0.000000e+00 +> %446 = bitcast i32 %131 to float +> %447 = bitcast i32 %131 to float +> %448 = fmul float %446, %447 +> %449 = fadd float %445, %448 +> %450 = call float @llvm.sqrt.f32.30(float %449) +> %451 = fneg float %168 +> %452 = fmul float %450, %451 +> %453 = fmul float %452, 0.000000e+00 +> %454 = bitcast i32 %131 to float +> %455 = fadd float %454, %453 +> %456 = fmul float %441, %455 +> %457 = fadd float %427, %456 +> %458 = call float @llvm.sqrt.f32.31(float %457) +> %459 = fadd float %458, 0.000000e+00 +> %460 = fdiv float %399, %459 +> %461 = fmul float %385, %460 +> %462 = insertelement <4 x float> %310, float %461, i32 2 +> %463 = bitcast i32 %152 to float +966,978c971,983 +< %465 = bitcast i32 %152 to float +< %466 = fmul float %464, %465 +< %467 = fadd float %466, 0.000000e+00 +< %468 = bitcast i32 %157 to float +< %469 = bitcast i32 %157 to float +< %470 = fmul float %468, %469 +< %471 = fadd float %467, %470 +< %472 = call float @llvm.sqrt.f32.32(float %471) +< %473 = fneg float %169 +< %474 = fmul float %472, %473 +< %475 = fmul float %474, 0.000000e+00 +< %476 = bitcast i32 %157 to float +< %477 = fadd float %476, %475 +--- +> %465 = fmul float %463, %464 +> %466 = fadd float %465, 0.000000e+00 +> %467 = bitcast i32 %131 to float +> %468 = bitcast i32 %131 to float +> %469 = fmul float %467, %468 +> %470 = fadd float %466, %469 +> %471 = call float @llvm.sqrt.f32.32(float %470) +> %472 = fneg float %168 +> %473 = fmul float %471, %472 +> %474 = fmul float %473, 0.000000e+00 +> %475 = bitcast i32 %131 to float +> %476 = fadd float %475, %474 +> %477 = bitcast i32 %152 to float +980,991c985,996 +< %479 = bitcast i32 %152 to float +< %480 = fmul float %478, %479 +< %481 = fadd float %480, 0.000000e+00 +< %482 = bitcast i32 %157 to float +< %483 = bitcast i32 %157 to float +< %484 = fmul float %482, %483 +< %485 = fadd float %481, %484 +< %486 = call float @llvm.sqrt.f32.33(float %485) +< %487 = fneg float %169 +< %488 = fmul float %486, %487 +< %489 = bitcast i32 %152 to float +< %490 = fadd float %489, %488 +--- +> %479 = fmul float %477, %478 +> %480 = fadd float %479, 0.000000e+00 +> %481 = bitcast i32 %131 to float +> %482 = bitcast i32 %131 to float +> %483 = fmul float %481, %482 +> %484 = fadd float %480, %483 +> %485 = call float @llvm.sqrt.f32.33(float %484) +> %486 = fneg float %168 +> %487 = fmul float %485, %486 +> %488 = bitcast i32 %152 to float +> %489 = fadd float %488, %487 +> %490 = bitcast i32 %152 to float +993,1006c998,1011 +< %492 = bitcast i32 %152 to float +< %493 = fmul float %491, %492 +< %494 = fadd float %493, 0.000000e+00 +< %495 = bitcast i32 %157 to float +< %496 = bitcast i32 %157 to float +< %497 = fmul float %495, %496 +< %498 = fadd float %494, %497 +< %499 = call float @llvm.sqrt.f32.34(float %498) +< %500 = fneg float %169 +< %501 = fmul float %499, %500 +< %502 = bitcast i32 %152 to float +< %503 = fadd float %502, %501 +< %504 = fmul float %490, %503 +< %505 = fadd float %504, 0.000000e+00 +--- +> %492 = fmul float %490, %491 +> %493 = fadd float %492, 0.000000e+00 +> %494 = bitcast i32 %131 to float +> %495 = bitcast i32 %131 to float +> %496 = fmul float %494, %495 +> %497 = fadd float %493, %496 +> %498 = call float @llvm.sqrt.f32.34(float %497) +> %499 = fneg float %168 +> %500 = fmul float %498, %499 +> %501 = bitcast i32 %152 to float +> %502 = fadd float %501, %500 +> %503 = fmul float %489, %502 +> %504 = fadd float %503, 0.000000e+00 +> %505 = bitcast i32 %152 to float +1008,1020c1013,1025 +< %507 = bitcast i32 %152 to float +< %508 = fmul float %506, %507 +< %509 = fadd float %508, 0.000000e+00 +< %510 = bitcast i32 %157 to float +< %511 = bitcast i32 %157 to float +< %512 = fmul float %510, %511 +< %513 = fadd float %509, %512 +< %514 = call float @llvm.sqrt.f32.35(float %513) +< %515 = fneg float %169 +< %516 = fmul float %514, %515 +< %517 = fmul float %516, 0.000000e+00 +< %518 = bitcast i32 %157 to float +< %519 = fadd float %518, %517 +--- +> %507 = fmul float %505, %506 +> %508 = fadd float %507, 0.000000e+00 +> %509 = bitcast i32 %131 to float +> %510 = bitcast i32 %131 to float +> %511 = fmul float %509, %510 +> %512 = fadd float %508, %511 +> %513 = call float @llvm.sqrt.f32.35(float %512) +> %514 = fneg float %168 +> %515 = fmul float %513, %514 +> %516 = fmul float %515, 0.000000e+00 +> %517 = bitcast i32 %131 to float +> %518 = fadd float %517, %516 +> %519 = bitcast i32 %152 to float +1022,1040c1027,1045 +< %521 = bitcast i32 %152 to float +< %522 = fmul float %520, %521 +< %523 = fadd float %522, 0.000000e+00 +< %524 = bitcast i32 %157 to float +< %525 = bitcast i32 %157 to float +< %526 = fmul float %524, %525 +< %527 = fadd float %523, %526 +< %528 = call float @llvm.sqrt.f32.36(float %527) +< %529 = fneg float %169 +< %530 = fmul float %528, %529 +< %531 = fmul float %530, 0.000000e+00 +< %532 = bitcast i32 %157 to float +< %533 = fadd float %532, %531 +< %534 = fmul float %519, %533 +< %535 = fadd float %505, %534 +< %536 = call float @llvm.sqrt.f32.37(float %535) +< %537 = fadd float %536, 0.000000e+00 +< %538 = fdiv float %477, %537 +< %539 = fmul float %538, 2.000000e+00 +--- +> %521 = fmul float %519, %520 +> %522 = fadd float %521, 0.000000e+00 +> %523 = bitcast i32 %131 to float +> %524 = bitcast i32 %131 to float +> %525 = fmul float %523, %524 +> %526 = fadd float %522, %525 +> %527 = call float @llvm.sqrt.f32.36(float %526) +> %528 = fneg float %168 +> %529 = fmul float %527, %528 +> %530 = fmul float %529, 0.000000e+00 +> %531 = bitcast i32 %131 to float +> %532 = fadd float %531, %530 +> %533 = fmul float %518, %532 +> %534 = fadd float %504, %533 +> %535 = call float @llvm.sqrt.f32.37(float %534) +> %536 = fadd float %535, 0.000000e+00 +> %537 = fdiv float %476, %536 +> %538 = fmul float %537, 2.000000e+00 +> %539 = bitcast i32 %152 to float +1042,1053c1047,1058 +< %541 = bitcast i32 %152 to float +< %542 = fmul float %540, %541 +< %543 = fadd float %542, 0.000000e+00 +< %544 = bitcast i32 %157 to float +< %545 = bitcast i32 %157 to float +< %546 = fmul float %544, %545 +< %547 = fadd float %543, %546 +< %548 = call float @llvm.sqrt.f32.38(float %547) +< %549 = fneg float %169 +< %550 = fmul float %548, %549 +< %551 = bitcast i32 %152 to float +< %552 = fadd float %551, %550 +--- +> %541 = fmul float %539, %540 +> %542 = fadd float %541, 0.000000e+00 +> %543 = bitcast i32 %131 to float +> %544 = bitcast i32 %131 to float +> %545 = fmul float %543, %544 +> %546 = fadd float %542, %545 +> %547 = call float @llvm.sqrt.f32.38(float %546) +> %548 = fneg float %168 +> %549 = fmul float %547, %548 +> %550 = bitcast i32 %152 to float +> %551 = fadd float %550, %549 +> %552 = bitcast i32 %152 to float +1055,1066c1060,1071 +< %554 = bitcast i32 %152 to float +< %555 = fmul float %553, %554 +< %556 = fadd float %555, 0.000000e+00 +< %557 = bitcast i32 %157 to float +< %558 = bitcast i32 %157 to float +< %559 = fmul float %557, %558 +< %560 = fadd float %556, %559 +< %561 = call float @llvm.sqrt.f32.39(float %560) +< %562 = fneg float %169 +< %563 = fmul float %561, %562 +< %564 = bitcast i32 %152 to float +< %565 = fadd float %564, %563 +--- +> %554 = fmul float %552, %553 +> %555 = fadd float %554, 0.000000e+00 +> %556 = bitcast i32 %131 to float +> %557 = bitcast i32 %131 to float +> %558 = fmul float %556, %557 +> %559 = fadd float %555, %558 +> %560 = call float @llvm.sqrt.f32.39(float %559) +> %561 = fneg float %168 +> %562 = fmul float %560, %561 +> %563 = bitcast i32 %152 to float +> %564 = fadd float %563, %562 +> %565 = bitcast i32 %152 to float +1068,1081c1073,1086 +< %567 = bitcast i32 %152 to float +< %568 = fmul float %566, %567 +< %569 = fadd float %568, 0.000000e+00 +< %570 = bitcast i32 %157 to float +< %571 = bitcast i32 %157 to float +< %572 = fmul float %570, %571 +< %573 = fadd float %569, %572 +< %574 = call float @llvm.sqrt.f32.40(float %573) +< %575 = fneg float %169 +< %576 = fmul float %574, %575 +< %577 = bitcast i32 %152 to float +< %578 = fadd float %577, %576 +< %579 = fmul float %565, %578 +< %580 = fadd float %579, 0.000000e+00 +--- +> %567 = fmul float %565, %566 +> %568 = fadd float %567, 0.000000e+00 +> %569 = bitcast i32 %131 to float +> %570 = bitcast i32 %131 to float +> %571 = fmul float %569, %570 +> %572 = fadd float %568, %571 +> %573 = call float @llvm.sqrt.f32.40(float %572) +> %574 = fneg float %168 +> %575 = fmul float %573, %574 +> %576 = bitcast i32 %152 to float +> %577 = fadd float %576, %575 +> %578 = fmul float %564, %577 +> %579 = fadd float %578, 0.000000e+00 +> %580 = bitcast i32 %152 to float +1083,1095c1088,1100 +< %582 = bitcast i32 %152 to float +< %583 = fmul float %581, %582 +< %584 = fadd float %583, 0.000000e+00 +< %585 = bitcast i32 %157 to float +< %586 = bitcast i32 %157 to float +< %587 = fmul float %585, %586 +< %588 = fadd float %584, %587 +< %589 = call float @llvm.sqrt.f32.41(float %588) +< %590 = fneg float %169 +< %591 = fmul float %589, %590 +< %592 = fmul float %591, 0.000000e+00 +< %593 = bitcast i32 %157 to float +< %594 = fadd float %593, %592 +--- +> %582 = fmul float %580, %581 +> %583 = fadd float %582, 0.000000e+00 +> %584 = bitcast i32 %131 to float +> %585 = bitcast i32 %131 to float +> %586 = fmul float %584, %585 +> %587 = fadd float %583, %586 +> %588 = call float @llvm.sqrt.f32.41(float %587) +> %589 = fneg float %168 +> %590 = fmul float %588, %589 +> %591 = fmul float %590, 0.000000e+00 +> %592 = bitcast i32 %131 to float +> %593 = fadd float %592, %591 +> %594 = bitcast i32 %152 to float +1097,1117c1102,1122 +< %596 = bitcast i32 %152 to float +< %597 = fmul float %595, %596 +< %598 = fadd float %597, 0.000000e+00 +< %599 = bitcast i32 %157 to float +< %600 = bitcast i32 %157 to float +< %601 = fmul float %599, %600 +< %602 = fadd float %598, %601 +< %603 = call float @llvm.sqrt.f32.42(float %602) +< %604 = fneg float %169 +< %605 = fmul float %603, %604 +< %606 = fmul float %605, 0.000000e+00 +< %607 = bitcast i32 %157 to float +< %608 = fadd float %607, %606 +< %609 = fmul float %594, %608 +< %610 = fadd float %580, %609 +< %611 = call float @llvm.sqrt.f32.43(float %610) +< %612 = fadd float %611, 0.000000e+00 +< %613 = fdiv float %552, %612 +< %614 = fmul float %539, %613 +< %615 = insertelement <4 x float> %463, float %614, i32 3 +< %616 = fsub <4 x float> , %615 +--- +> %596 = fmul float %594, %595 +> %597 = fadd float %596, 0.000000e+00 +> %598 = bitcast i32 %131 to float +> %599 = bitcast i32 %131 to float +> %600 = fmul float %598, %599 +> %601 = fadd float %597, %600 +> %602 = call float @llvm.sqrt.f32.42(float %601) +> %603 = fneg float %168 +> %604 = fmul float %602, %603 +> %605 = fmul float %604, 0.000000e+00 +> %606 = bitcast i32 %131 to float +> %607 = fadd float %606, %605 +> %608 = fmul float %593, %607 +> %609 = fadd float %579, %608 +> %610 = call float @llvm.sqrt.f32.43(float %609) +> %611 = fadd float %610, 0.000000e+00 +> %612 = fdiv float %551, %611 +> %613 = fmul float %538, %612 +> %614 = insertelement <4 x float> %462, float %613, i32 3 +> %615 = fsub <4 x float> , %614 +> %616 = bitcast i32 %152 to float +1119,1131c1124,1136 +< %618 = bitcast i32 %152 to float +< %619 = fmul float %617, %618 +< %620 = fadd float %619, 0.000000e+00 +< %621 = bitcast i32 %157 to float +< %622 = bitcast i32 %157 to float +< %623 = fmul float %621, %622 +< %624 = fadd float %620, %623 +< %625 = call float @llvm.sqrt.f32.44(float %624) +< %626 = fneg float %169 +< %627 = fmul float %625, %626 +< %628 = fmul float %627, 0.000000e+00 +< %629 = bitcast i32 %157 to float +< %630 = fadd float %629, %628 +--- +> %618 = fmul float %616, %617 +> %619 = fadd float %618, 0.000000e+00 +> %620 = bitcast i32 %131 to float +> %621 = bitcast i32 %131 to float +> %622 = fmul float %620, %621 +> %623 = fadd float %619, %622 +> %624 = call float @llvm.sqrt.f32.44(float %623) +> %625 = fneg float %168 +> %626 = fmul float %624, %625 +> %627 = fmul float %626, 0.000000e+00 +> %628 = bitcast i32 %131 to float +> %629 = fadd float %628, %627 +> %630 = bitcast i32 %152 to float +1133,1144c1138,1149 +< %632 = bitcast i32 %152 to float +< %633 = fmul float %631, %632 +< %634 = fadd float %633, 0.000000e+00 +< %635 = bitcast i32 %157 to float +< %636 = bitcast i32 %157 to float +< %637 = fmul float %635, %636 +< %638 = fadd float %634, %637 +< %639 = call float @llvm.sqrt.f32.45(float %638) +< %640 = fneg float %169 +< %641 = fmul float %639, %640 +< %642 = bitcast i32 %152 to float +< %643 = fadd float %642, %641 +--- +> %632 = fmul float %630, %631 +> %633 = fadd float %632, 0.000000e+00 +> %634 = bitcast i32 %131 to float +> %635 = bitcast i32 %131 to float +> %636 = fmul float %634, %635 +> %637 = fadd float %633, %636 +> %638 = call float @llvm.sqrt.f32.45(float %637) +> %639 = fneg float %168 +> %640 = fmul float %638, %639 +> %641 = bitcast i32 %152 to float +> %642 = fadd float %641, %640 +> %643 = bitcast i32 %152 to float +1146,1159c1151,1164 +< %645 = bitcast i32 %152 to float +< %646 = fmul float %644, %645 +< %647 = fadd float %646, 0.000000e+00 +< %648 = bitcast i32 %157 to float +< %649 = bitcast i32 %157 to float +< %650 = fmul float %648, %649 +< %651 = fadd float %647, %650 +< %652 = call float @llvm.sqrt.f32.46(float %651) +< %653 = fneg float %169 +< %654 = fmul float %652, %653 +< %655 = bitcast i32 %152 to float +< %656 = fadd float %655, %654 +< %657 = fmul float %643, %656 +< %658 = fadd float %657, 0.000000e+00 +--- +> %645 = fmul float %643, %644 +> %646 = fadd float %645, 0.000000e+00 +> %647 = bitcast i32 %131 to float +> %648 = bitcast i32 %131 to float +> %649 = fmul float %647, %648 +> %650 = fadd float %646, %649 +> %651 = call float @llvm.sqrt.f32.46(float %650) +> %652 = fneg float %168 +> %653 = fmul float %651, %652 +> %654 = bitcast i32 %152 to float +> %655 = fadd float %654, %653 +> %656 = fmul float %642, %655 +> %657 = fadd float %656, 0.000000e+00 +> %658 = bitcast i32 %152 to float +1161,1173c1166,1178 +< %660 = bitcast i32 %152 to float +< %661 = fmul float %659, %660 +< %662 = fadd float %661, 0.000000e+00 +< %663 = bitcast i32 %157 to float +< %664 = bitcast i32 %157 to float +< %665 = fmul float %663, %664 +< %666 = fadd float %662, %665 +< %667 = call float @llvm.sqrt.f32.47(float %666) +< %668 = fneg float %169 +< %669 = fmul float %667, %668 +< %670 = fmul float %669, 0.000000e+00 +< %671 = bitcast i32 %157 to float +< %672 = fadd float %671, %670 +--- +> %660 = fmul float %658, %659 +> %661 = fadd float %660, 0.000000e+00 +> %662 = bitcast i32 %131 to float +> %663 = bitcast i32 %131 to float +> %664 = fmul float %662, %663 +> %665 = fadd float %661, %664 +> %666 = call float @llvm.sqrt.f32.47(float %665) +> %667 = fneg float %168 +> %668 = fmul float %666, %667 +> %669 = fmul float %668, 0.000000e+00 +> %670 = bitcast i32 %131 to float +> %671 = fadd float %670, %669 +> %672 = bitcast i32 %152 to float +1175,1193c1180,1198 +< %674 = bitcast i32 %152 to float +< %675 = fmul float %673, %674 +< %676 = fadd float %675, 0.000000e+00 +< %677 = bitcast i32 %157 to float +< %678 = bitcast i32 %157 to float +< %679 = fmul float %677, %678 +< %680 = fadd float %676, %679 +< %681 = call float @llvm.sqrt.f32.48(float %680) +< %682 = fneg float %169 +< %683 = fmul float %681, %682 +< %684 = fmul float %683, 0.000000e+00 +< %685 = bitcast i32 %157 to float +< %686 = fadd float %685, %684 +< %687 = fmul float %672, %686 +< %688 = fadd float %658, %687 +< %689 = call float @llvm.sqrt.f32.49(float %688) +< %690 = fadd float %689, 0.000000e+00 +< %691 = fdiv float %630, %690 +< %692 = fmul float %691, 2.000000e+00 +--- +> %674 = fmul float %672, %673 +> %675 = fadd float %674, 0.000000e+00 +> %676 = bitcast i32 %131 to float +> %677 = bitcast i32 %131 to float +> %678 = fmul float %676, %677 +> %679 = fadd float %675, %678 +> %680 = call float @llvm.sqrt.f32.48(float %679) +> %681 = fneg float %168 +> %682 = fmul float %680, %681 +> %683 = fmul float %682, 0.000000e+00 +> %684 = bitcast i32 %131 to float +> %685 = fadd float %684, %683 +> %686 = fmul float %671, %685 +> %687 = fadd float %657, %686 +> %688 = call float @llvm.sqrt.f32.49(float %687) +> %689 = fadd float %688, 0.000000e+00 +> %690 = fdiv float %629, %689 +> %691 = fmul float %690, 2.000000e+00 +> %692 = bitcast i32 %152 to float +1195,1207c1200,1212 +< %694 = bitcast i32 %152 to float +< %695 = fmul float %693, %694 +< %696 = fadd float %695, 0.000000e+00 +< %697 = bitcast i32 %157 to float +< %698 = bitcast i32 %157 to float +< %699 = fmul float %697, %698 +< %700 = fadd float %696, %699 +< %701 = call float @llvm.sqrt.f32.50(float %700) +< %702 = fneg float %169 +< %703 = fmul float %701, %702 +< %704 = fmul float %703, 0.000000e+00 +< %705 = bitcast i32 %157 to float +< %706 = fadd float %705, %704 +--- +> %694 = fmul float %692, %693 +> %695 = fadd float %694, 0.000000e+00 +> %696 = bitcast i32 %131 to float +> %697 = bitcast i32 %131 to float +> %698 = fmul float %696, %697 +> %699 = fadd float %695, %698 +> %700 = call float @llvm.sqrt.f32.50(float %699) +> %701 = fneg float %168 +> %702 = fmul float %700, %701 +> %703 = fmul float %702, 0.000000e+00 +> %704 = bitcast i32 %131 to float +> %705 = fadd float %704, %703 +> %706 = bitcast i32 %152 to float +1209,1220c1214,1225 +< %708 = bitcast i32 %152 to float +< %709 = fmul float %707, %708 +< %710 = fadd float %709, 0.000000e+00 +< %711 = bitcast i32 %157 to float +< %712 = bitcast i32 %157 to float +< %713 = fmul float %711, %712 +< %714 = fadd float %710, %713 +< %715 = call float @llvm.sqrt.f32.51(float %714) +< %716 = fneg float %169 +< %717 = fmul float %715, %716 +< %718 = bitcast i32 %152 to float +< %719 = fadd float %718, %717 +--- +> %708 = fmul float %706, %707 +> %709 = fadd float %708, 0.000000e+00 +> %710 = bitcast i32 %131 to float +> %711 = bitcast i32 %131 to float +> %712 = fmul float %710, %711 +> %713 = fadd float %709, %712 +> %714 = call float @llvm.sqrt.f32.51(float %713) +> %715 = fneg float %168 +> %716 = fmul float %714, %715 +> %717 = bitcast i32 %152 to float +> %718 = fadd float %717, %716 +> %719 = bitcast i32 %152 to float +1222,1235c1227,1240 +< %721 = bitcast i32 %152 to float +< %722 = fmul float %720, %721 +< %723 = fadd float %722, 0.000000e+00 +< %724 = bitcast i32 %157 to float +< %725 = bitcast i32 %157 to float +< %726 = fmul float %724, %725 +< %727 = fadd float %723, %726 +< %728 = call float @llvm.sqrt.f32.52(float %727) +< %729 = fneg float %169 +< %730 = fmul float %728, %729 +< %731 = bitcast i32 %152 to float +< %732 = fadd float %731, %730 +< %733 = fmul float %719, %732 +< %734 = fadd float %733, 0.000000e+00 +--- +> %721 = fmul float %719, %720 +> %722 = fadd float %721, 0.000000e+00 +> %723 = bitcast i32 %131 to float +> %724 = bitcast i32 %131 to float +> %725 = fmul float %723, %724 +> %726 = fadd float %722, %725 +> %727 = call float @llvm.sqrt.f32.52(float %726) +> %728 = fneg float %168 +> %729 = fmul float %727, %728 +> %730 = bitcast i32 %152 to float +> %731 = fadd float %730, %729 +> %732 = fmul float %718, %731 +> %733 = fadd float %732, 0.000000e+00 +> %734 = bitcast i32 %152 to float +1237,1249c1242,1254 +< %736 = bitcast i32 %152 to float +< %737 = fmul float %735, %736 +< %738 = fadd float %737, 0.000000e+00 +< %739 = bitcast i32 %157 to float +< %740 = bitcast i32 %157 to float +< %741 = fmul float %739, %740 +< %742 = fadd float %738, %741 +< %743 = call float @llvm.sqrt.f32.53(float %742) +< %744 = fneg float %169 +< %745 = fmul float %743, %744 +< %746 = fmul float %745, 0.000000e+00 +< %747 = bitcast i32 %157 to float +< %748 = fadd float %747, %746 +--- +> %736 = fmul float %734, %735 +> %737 = fadd float %736, 0.000000e+00 +> %738 = bitcast i32 %131 to float +> %739 = bitcast i32 %131 to float +> %740 = fmul float %738, %739 +> %741 = fadd float %737, %740 +> %742 = call float @llvm.sqrt.f32.53(float %741) +> %743 = fneg float %168 +> %744 = fmul float %742, %743 +> %745 = fmul float %744, 0.000000e+00 +> %746 = bitcast i32 %131 to float +> %747 = fadd float %746, %745 +> %748 = bitcast i32 %152 to float +1251,1309c1256,1314 +< %750 = bitcast i32 %152 to float +< %751 = fmul float %749, %750 +< %752 = fadd float %751, 0.000000e+00 +< %753 = bitcast i32 %157 to float +< %754 = bitcast i32 %157 to float +< %755 = fmul float %753, %754 +< %756 = fadd float %752, %755 +< %757 = call float @llvm.sqrt.f32.54(float %756) +< %758 = fneg float %169 +< %759 = fmul float %757, %758 +< %760 = fmul float %759, 0.000000e+00 +< %761 = bitcast i32 %157 to float +< %762 = fadd float %761, %760 +< %763 = fmul float %748, %762 +< %764 = fadd float %734, %763 +< %765 = call float @llvm.sqrt.f32.55(float %764) +< %766 = fadd float %765, 0.000000e+00 +< %767 = fdiv float %706, %766 +< %768 = fmul float %692, %767 +< %769 = fsub float 1.000000e+00, %768 +< %770 = insertelement <4 x float> zeroinitializer, float %769, i32 0 +< %771 = insertelement <4 x float> %770, float 0.000000e+00, i32 1 +< %772 = insertelement <4 x float> %771, float 0.000000e+00, i32 2 +< %773 = insertelement <4 x float> %772, float 0.000000e+00, i32 3 +< %774 = shufflevector <4 x float> %616, <4 x float> %773, <8 x i32> +< %775 = extractelement <8 x float> %774, i32 0 +< %776 = bitcast i32* %23 to float* +< %777 = getelementptr float, float* %2, i32 0 +< %778 = getelementptr inbounds float, float* %777, i64 3 +< %779 = bitcast float* %778 to i32* +< %780 = bitcast i32* %779 to float* +< store float %775, float* %780, align 4 +< %781 = extractelement <8 x float> %774, i32 1 +< %782 = bitcast i32* %60 to float* +< %783 = alloca [4 x float], align 16 +< %784 = bitcast [4 x float]* %783 to i32* +< %785 = bitcast i32* %784 to float* +< store float %781, float* %785, align 4 +< %786 = extractelement <8 x float> %774, i32 2 +< %787 = bitcast i32* %63 to float* +< %788 = getelementptr inbounds [4 x float], [4 x float]* %783, i64 0, i64 1 +< %789 = bitcast float* %788 to i32* +< %790 = bitcast i32* %789 to float* +< store float %786, float* %790, align 4 +< %791 = extractelement <8 x float> %774, i32 3 +< %792 = bitcast i32* %66 to float* +< %793 = getelementptr inbounds [4 x float], [4 x float]* %783, i64 0, i64 2 +< %794 = bitcast float* %793 to i32* +< %795 = bitcast i32* %794 to float* +< store float %791, float* %795, align 4 +< %796 = extractelement <8 x float> %774, i32 4 +< %797 = bitcast i32* %69 to float* +< %798 = getelementptr inbounds [4 x float], [4 x float]* %783, i64 0, i64 3 +< %799 = bitcast float* %798 to i32* +< %800 = bitcast i32* %799 to float* +< store float %796, float* %800, align 4 +< %801 = bitcast float* %1 to i8* +< %802 = bitcast [4 x float]* %783 to i8* +< call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %801, i8* nonnull align 16 dereferenceable(16) %802, i64 16, i1 false) +--- +> %750 = fmul float %748, %749 +> %751 = fadd float %750, 0.000000e+00 +> %752 = bitcast i32 %131 to float +> %753 = bitcast i32 %131 to float +> %754 = fmul float %752, %753 +> %755 = fadd float %751, %754 +> %756 = call float @llvm.sqrt.f32.54(float %755) +> %757 = fneg float %168 +> %758 = fmul float %756, %757 +> %759 = fmul float %758, 0.000000e+00 +> %760 = bitcast i32 %131 to float +> %761 = fadd float %760, %759 +> %762 = fmul float %747, %761 +> %763 = fadd float %733, %762 +> %764 = call float @llvm.sqrt.f32.55(float %763) +> %765 = fadd float %764, 0.000000e+00 +> %766 = fdiv float %705, %765 +> %767 = fmul float %691, %766 +> %768 = fsub float 1.000000e+00, %767 +> %769 = insertelement <4 x float> zeroinitializer, float %768, i32 0 +> %770 = insertelement <4 x float> %769, float 0.000000e+00, i32 1 +> %771 = insertelement <4 x float> %770, float 0.000000e+00, i32 2 +> %772 = insertelement <4 x float> %771, float 0.000000e+00, i32 3 +> %773 = shufflevector <4 x float> %615, <4 x float> %772, <8 x i32> +> %774 = extractelement <8 x float> %773, i32 0 +> %775 = bitcast i32* %23 to float* +> %776 = getelementptr float, float* %2, i32 0 +> %777 = getelementptr inbounds float, float* %776, i64 3 +> %778 = bitcast float* %777 to i32* +> %779 = bitcast i32* %778 to float* +> store float %774, float* %779, align 4 +> %780 = extractelement <8 x float> %773, i32 1 +> %781 = bitcast i32* %60 to float* +> %782 = alloca [4 x float], align 16 +> %783 = bitcast [4 x float]* %782 to i32* +> %784 = bitcast i32* %783 to float* +> store float %780, float* %784, align 4 +> %785 = extractelement <8 x float> %773, i32 2 +> %786 = bitcast i32* %63 to float* +> %787 = getelementptr inbounds [4 x float], [4 x float]* %782, i64 0, i64 1 +> %788 = bitcast float* %787 to i32* +> %789 = bitcast i32* %788 to float* +> store float %785, float* %789, align 4 +> %790 = extractelement <8 x float> %773, i32 3 +> %791 = bitcast i32* %66 to float* +> %792 = getelementptr inbounds [4 x float], [4 x float]* %782, i64 0, i64 2 +> %793 = bitcast float* %792 to i32* +> %794 = bitcast i32* %793 to float* +> store float %790, float* %794, align 4 +> %795 = extractelement <8 x float> %773, i32 4 +> %796 = bitcast i32* %69 to float* +> %797 = getelementptr inbounds [4 x float], [4 x float]* %782, i64 0, i64 3 +> %798 = bitcast float* %797 to i32* +> %799 = bitcast i32* %798 to float* +> store float %795, float* %799, align 4 +> %800 = bitcast float* %1 to i8* +> %801 = alloca [4 x float], align 16 +> %802 = bitcast [4 x float]* %801 to i8* +> call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %800, i8* nonnull align 16 dereferenceable(16) %802, i64 16, i1 false) +1311,3267c1316,3271 +< %803 = getelementptr float, float* %0, i32 0 +< %804 = bitcast float* %803 to i32* +< %805 = load i32, i32* %804, align 4 +< %806 = bitcast i32 %805 to float +< %807 = bitcast i32 %805 to float +< %808 = fmul float %806, %807 +< %809 = fadd float %808, 0.000000e+00 +< %810 = bitcast i32 %157 to float +< %811 = bitcast i32 %157 to float +< %812 = fmul float %810, %811 +< %813 = fadd float %809, %812 +< %814 = call float @llvm.sqrt.f32.56(float %813) +< %815 = sitofp i32 %168 to float +< %816 = fneg float %815 +< %817 = fmul float %814, %816 +< %818 = bitcast i32 %805 to float +< %819 = fadd float %818, %817 +< %820 = bitcast i32 %805 to float +< %821 = bitcast i32 %805 to float +< %822 = fmul float %820, %821 +< %823 = fadd float %822, 0.000000e+00 +< %824 = bitcast i32 %157 to float +< %825 = bitcast i32 %157 to float +< %826 = fmul float %824, %825 +< %827 = fadd float %823, %826 +< %828 = call float @llvm.sqrt.f32.57(float %827) +< %829 = fneg float %815 +< %830 = fmul float %828, %829 +< %831 = bitcast i32 %805 to float +< %832 = fadd float %831, %830 +< %833 = bitcast i32 %805 to float +< %834 = bitcast i32 %805 to float +< %835 = fmul float %833, %834 +< %836 = fadd float %835, 0.000000e+00 +< %837 = bitcast i32 %157 to float +< %838 = bitcast i32 %157 to float +< %839 = fmul float %837, %838 +< %840 = fadd float %836, %839 +< %841 = call float @llvm.sqrt.f32.58(float %840) +< %842 = fneg float %815 +< %843 = fmul float %841, %842 +< %844 = bitcast i32 %805 to float +< %845 = fadd float %844, %843 +< %846 = fmul float %832, %845 +< %847 = fadd float %846, 0.000000e+00 +< %848 = bitcast i32 %805 to float +< %849 = bitcast i32 %805 to float +< %850 = fmul float %848, %849 +< %851 = fadd float %850, 0.000000e+00 +< %852 = bitcast i32 %157 to float +< %853 = bitcast i32 %157 to float +< %854 = fmul float %852, %853 +< %855 = fadd float %851, %854 +< %856 = call float @llvm.sqrt.f32.59(float %855) +< %857 = fneg float %815 +< %858 = fmul float %856, %857 +< %859 = fmul float %858, 0.000000e+00 +< %860 = bitcast i32 %157 to float +< %861 = fadd float %860, %859 +< %862 = bitcast i32 %805 to float +< %863 = bitcast i32 %805 to float +< %864 = fmul float %862, %863 +< %865 = fadd float %864, 0.000000e+00 +< %866 = bitcast i32 %157 to float +< %867 = bitcast i32 %157 to float +< %868 = fmul float %866, %867 +< %869 = fadd float %865, %868 +< %870 = call float @llvm.sqrt.f32.60(float %869) +< %871 = fneg float %815 +< %872 = fmul float %870, %871 +< %873 = fmul float %872, 0.000000e+00 +< %874 = bitcast i32 %157 to float +< %875 = fadd float %874, %873 +< %876 = fmul float %861, %875 +< %877 = fadd float %847, %876 +< %878 = call float @llvm.sqrt.f32.61(float %877) +< %879 = fadd float %878, 0.000000e+00 +< %880 = fdiv float %819, %879 +< %881 = fmul float %880, 2.000000e+00 +< %882 = bitcast i32 %805 to float +< %883 = bitcast i32 %805 to float +< %884 = fmul float %882, %883 +< %885 = fadd float %884, 0.000000e+00 +< %886 = bitcast i32 %157 to float +< %887 = bitcast i32 %157 to float +< %888 = fmul float %886, %887 +< %889 = fadd float %885, %888 +< %890 = call float @llvm.sqrt.f32.62(float %889) +< %891 = fneg float %815 +< %892 = fmul float %890, %891 +< %893 = bitcast i32 %805 to float +< %894 = fadd float %893, %892 +< %895 = bitcast i32 %805 to float +< %896 = bitcast i32 %805 to float +< %897 = fmul float %895, %896 +< %898 = fadd float %897, 0.000000e+00 +< %899 = bitcast i32 %157 to float +< %900 = bitcast i32 %157 to float +< %901 = fmul float %899, %900 +< %902 = fadd float %898, %901 +< %903 = call float @llvm.sqrt.f32.63(float %902) +< %904 = fneg float %815 +< %905 = fmul float %903, %904 +< %906 = bitcast i32 %805 to float +< %907 = fadd float %906, %905 +< %908 = bitcast i32 %805 to float +< %909 = bitcast i32 %805 to float +< %910 = fmul float %908, %909 +< %911 = fadd float %910, 0.000000e+00 +< %912 = bitcast i32 %157 to float +< %913 = bitcast i32 %157 to float +< %914 = fmul float %912, %913 +< %915 = fadd float %911, %914 +< %916 = call float @llvm.sqrt.f32.64(float %915) +< %917 = fneg float %815 +< %918 = fmul float %916, %917 +< %919 = bitcast i32 %805 to float +< %920 = fadd float %919, %918 +< %921 = fmul float %907, %920 +< %922 = fadd float %921, 0.000000e+00 +< %923 = bitcast i32 %805 to float +< %924 = bitcast i32 %805 to float +< %925 = fmul float %923, %924 +< %926 = fadd float %925, 0.000000e+00 +< %927 = bitcast i32 %157 to float +< %928 = bitcast i32 %157 to float +< %929 = fmul float %927, %928 +< %930 = fadd float %926, %929 +< %931 = call float @llvm.sqrt.f32.65(float %930) +< %932 = fneg float %815 +< %933 = fmul float %931, %932 +< %934 = fmul float %933, 0.000000e+00 +< %935 = bitcast i32 %157 to float +< %936 = fadd float %935, %934 +< %937 = bitcast i32 %805 to float +< %938 = bitcast i32 %805 to float +< %939 = fmul float %937, %938 +< %940 = fadd float %939, 0.000000e+00 +< %941 = bitcast i32 %157 to float +< %942 = bitcast i32 %157 to float +< %943 = fmul float %941, %942 +< %944 = fadd float %940, %943 +< %945 = call float @llvm.sqrt.f32.66(float %944) +< %946 = fneg float %815 +< %947 = fmul float %945, %946 +< %948 = fmul float %947, 0.000000e+00 +< %949 = bitcast i32 %157 to float +< %950 = fadd float %949, %948 +< %951 = fmul float %936, %950 +< %952 = fadd float %922, %951 +< %953 = call float @llvm.sqrt.f32.67(float %952) +< %954 = fadd float %953, 0.000000e+00 +< %955 = fdiv float %894, %954 +< %956 = fmul float %881, %955 +< %957 = fsub float 1.000000e+00, %956 +< %958 = insertelement <4 x float> zeroinitializer, float %957, i32 0 +< %959 = insertelement <4 x float> %958, float 0.000000e+00, i32 1 +< %960 = insertelement <4 x float> %959, float 0.000000e+00, i32 2 +< %961 = insertelement <4 x float> %960, float 0.000000e+00, i32 3 +< %962 = getelementptr float, float* %0, i32 0 +< %963 = load float, float* %962, align 4 +< %964 = insertelement <4 x float> zeroinitializer, float %963, i32 0 +< %965 = insertelement <4 x float> %964, float 0.000000e+00, i32 1 +< %966 = insertelement <4 x float> %965, float 0.000000e+00, i32 2 +< %967 = insertelement <4 x float> %966, float 0.000000e+00, i32 3 +< %968 = call <4 x float> @llvm.fma.f32.68(<4 x float> %961, <4 x float> %967, <4 x float> zeroinitializer) +< %969 = extractelement <4 x float> %968, i32 0 +< store float %969, float* %2, align 4 +< %970 = load i32, i32* %804, align 4 +< %971 = bitcast i32 %970 to float +< %972 = bitcast i32 %970 to float +< %973 = fmul float %971, %972 +< %974 = fadd float %973, 0.000000e+00 +< %975 = bitcast i32 %157 to float +< %976 = bitcast i32 %157 to float +< %977 = fmul float %975, %976 +< %978 = fadd float %974, %977 +< %979 = call float @llvm.sqrt.f32.69(float %978) +< %980 = fneg float %815 +< %981 = fmul float %979, %980 +< %982 = bitcast i32 %970 to float +< %983 = fadd float %982, %981 +< %984 = bitcast i32 %970 to float +< %985 = bitcast i32 %970 to float +< %986 = fmul float %984, %985 +< %987 = fadd float %986, 0.000000e+00 +< %988 = bitcast i32 %157 to float +< %989 = bitcast i32 %157 to float +< %990 = fmul float %988, %989 +< %991 = fadd float %987, %990 +< %992 = call float @llvm.sqrt.f32.70(float %991) +< %993 = fneg float %815 +< %994 = fmul float %992, %993 +< %995 = bitcast i32 %970 to float +< %996 = fadd float %995, %994 +< %997 = bitcast i32 %970 to float +< %998 = bitcast i32 %970 to float +< %999 = fmul float %997, %998 +< %1000 = fadd float %999, 0.000000e+00 +< %1001 = bitcast i32 %157 to float +< %1002 = bitcast i32 %157 to float +< %1003 = fmul float %1001, %1002 +< %1004 = fadd float %1000, %1003 +< %1005 = call float @llvm.sqrt.f32.71(float %1004) +< %1006 = fneg float %815 +< %1007 = fmul float %1005, %1006 +< %1008 = bitcast i32 %970 to float +< %1009 = fadd float %1008, %1007 +< %1010 = fmul float %996, %1009 +< %1011 = fadd float %1010, 0.000000e+00 +< %1012 = bitcast i32 %970 to float +< %1013 = bitcast i32 %970 to float +< %1014 = fmul float %1012, %1013 +< %1015 = fadd float %1014, 0.000000e+00 +< %1016 = bitcast i32 %157 to float +< %1017 = bitcast i32 %157 to float +< %1018 = fmul float %1016, %1017 +< %1019 = fadd float %1015, %1018 +< %1020 = call float @llvm.sqrt.f32.72(float %1019) +< %1021 = fneg float %815 +< %1022 = fmul float %1020, %1021 +< %1023 = fmul float %1022, 0.000000e+00 +< %1024 = bitcast i32 %157 to float +< %1025 = fadd float %1024, %1023 +< %1026 = bitcast i32 %970 to float +< %1027 = bitcast i32 %970 to float +< %1028 = fmul float %1026, %1027 +< %1029 = fadd float %1028, 0.000000e+00 +< %1030 = bitcast i32 %157 to float +< %1031 = bitcast i32 %157 to float +< %1032 = fmul float %1030, %1031 +< %1033 = fadd float %1029, %1032 +< %1034 = call float @llvm.sqrt.f32.73(float %1033) +< %1035 = fneg float %815 +< %1036 = fmul float %1034, %1035 +< %1037 = fmul float %1036, 0.000000e+00 +< %1038 = bitcast i32 %157 to float +< %1039 = fadd float %1038, %1037 +< %1040 = fmul float %1025, %1039 +< %1041 = fadd float %1011, %1040 +< %1042 = call float @llvm.sqrt.f32.74(float %1041) +< %1043 = fadd float %1042, 0.000000e+00 +< %1044 = fdiv float %983, %1043 +< %1045 = fmul float %1044, 2.000000e+00 +< %1046 = bitcast i32 %970 to float +< %1047 = bitcast i32 %970 to float +< %1048 = fmul float %1046, %1047 +< %1049 = fadd float %1048, 0.000000e+00 +< %1050 = bitcast i32 %157 to float +< %1051 = bitcast i32 %157 to float +< %1052 = fmul float %1050, %1051 +< %1053 = fadd float %1049, %1052 +< %1054 = call float @llvm.sqrt.f32.75(float %1053) +< %1055 = fneg float %815 +< %1056 = fmul float %1054, %1055 +< %1057 = bitcast i32 %970 to float +< %1058 = fadd float %1057, %1056 +< %1059 = bitcast i32 %970 to float +< %1060 = bitcast i32 %970 to float +< %1061 = fmul float %1059, %1060 +< %1062 = fadd float %1061, 0.000000e+00 +< %1063 = bitcast i32 %157 to float +< %1064 = bitcast i32 %157 to float +< %1065 = fmul float %1063, %1064 +< %1066 = fadd float %1062, %1065 +< %1067 = call float @llvm.sqrt.f32.76(float %1066) +< %1068 = fneg float %815 +< %1069 = fmul float %1067, %1068 +< %1070 = bitcast i32 %970 to float +< %1071 = fadd float %1070, %1069 +< %1072 = bitcast i32 %970 to float +< %1073 = bitcast i32 %970 to float +< %1074 = fmul float %1072, %1073 +< %1075 = fadd float %1074, 0.000000e+00 +< %1076 = bitcast i32 %157 to float +< %1077 = bitcast i32 %157 to float +< %1078 = fmul float %1076, %1077 +< %1079 = fadd float %1075, %1078 +< %1080 = call float @llvm.sqrt.f32.77(float %1079) +< %1081 = fneg float %815 +< %1082 = fmul float %1080, %1081 +< %1083 = bitcast i32 %970 to float +< %1084 = fadd float %1083, %1082 +< %1085 = fmul float %1071, %1084 +< %1086 = fadd float %1085, 0.000000e+00 +< %1087 = bitcast i32 %970 to float +< %1088 = bitcast i32 %970 to float +< %1089 = fmul float %1087, %1088 +< %1090 = fadd float %1089, 0.000000e+00 +< %1091 = bitcast i32 %157 to float +< %1092 = bitcast i32 %157 to float +< %1093 = fmul float %1091, %1092 +< %1094 = fadd float %1090, %1093 +< %1095 = call float @llvm.sqrt.f32.78(float %1094) +< %1096 = fneg float %815 +< %1097 = fmul float %1095, %1096 +< %1098 = fmul float %1097, 0.000000e+00 +< %1099 = bitcast i32 %157 to float +< %1100 = fadd float %1099, %1098 +< %1101 = bitcast i32 %970 to float +< %1102 = bitcast i32 %970 to float +< %1103 = fmul float %1101, %1102 +< %1104 = fadd float %1103, 0.000000e+00 +< %1105 = bitcast i32 %157 to float +< %1106 = bitcast i32 %157 to float +< %1107 = fmul float %1105, %1106 +< %1108 = fadd float %1104, %1107 +< %1109 = call float @llvm.sqrt.f32.79(float %1108) +< %1110 = fneg float %815 +< %1111 = fmul float %1109, %1110 +< %1112 = fmul float %1111, 0.000000e+00 +< %1113 = bitcast i32 %157 to float +< %1114 = fadd float %1113, %1112 +< %1115 = fmul float %1100, %1114 +< %1116 = fadd float %1086, %1115 +< %1117 = call float @llvm.sqrt.f32.80(float %1116) +< %1118 = fadd float %1117, 0.000000e+00 +< %1119 = fdiv float %1058, %1118 +< %1120 = fmul float %1045, %1119 +< %1121 = fsub float 1.000000e+00, %1120 +< %1122 = fmul float %1121, %963 +< %1123 = fadd float %1122, 0.000000e+00 +< %1124 = bitcast i32 %970 to float +< %1125 = bitcast i32 %970 to float +< %1126 = fmul float %1124, %1125 +< %1127 = fadd float %1126, 0.000000e+00 +< %1128 = bitcast i32 %157 to float +< %1129 = bitcast i32 %157 to float +< %1130 = fmul float %1128, %1129 +< %1131 = fadd float %1127, %1130 +< %1132 = call float @llvm.sqrt.f32.81(float %1131) +< %1133 = fneg float %815 +< %1134 = fmul float %1132, %1133 +< %1135 = bitcast i32 %970 to float +< %1136 = fadd float %1135, %1134 +< %1137 = bitcast i32 %970 to float +< %1138 = bitcast i32 %970 to float +< %1139 = fmul float %1137, %1138 +< %1140 = fadd float %1139, 0.000000e+00 +< %1141 = bitcast i32 %157 to float +< %1142 = bitcast i32 %157 to float +< %1143 = fmul float %1141, %1142 +< %1144 = fadd float %1140, %1143 +< %1145 = call float @llvm.sqrt.f32.82(float %1144) +< %1146 = fneg float %815 +< %1147 = fmul float %1145, %1146 +< %1148 = bitcast i32 %970 to float +< %1149 = fadd float %1148, %1147 +< %1150 = bitcast i32 %970 to float +< %1151 = bitcast i32 %970 to float +< %1152 = fmul float %1150, %1151 +< %1153 = fadd float %1152, 0.000000e+00 +< %1154 = bitcast i32 %157 to float +< %1155 = bitcast i32 %157 to float +< %1156 = fmul float %1154, %1155 +< %1157 = fadd float %1153, %1156 +< %1158 = call float @llvm.sqrt.f32.83(float %1157) +< %1159 = fneg float %815 +< %1160 = fmul float %1158, %1159 +< %1161 = bitcast i32 %970 to float +< %1162 = fadd float %1161, %1160 +< %1163 = fmul float %1149, %1162 +< %1164 = fadd float %1163, 0.000000e+00 +< %1165 = bitcast i32 %970 to float +< %1166 = bitcast i32 %970 to float +< %1167 = fmul float %1165, %1166 +< %1168 = fadd float %1167, 0.000000e+00 +< %1169 = bitcast i32 %157 to float +< %1170 = bitcast i32 %157 to float +< %1171 = fmul float %1169, %1170 +< %1172 = fadd float %1168, %1171 +< %1173 = call float @llvm.sqrt.f32.84(float %1172) +< %1174 = fneg float %815 +< %1175 = fmul float %1173, %1174 +< %1176 = fmul float %1175, 0.000000e+00 +< %1177 = bitcast i32 %157 to float +< %1178 = fadd float %1177, %1176 +< %1179 = bitcast i32 %970 to float +< %1180 = bitcast i32 %970 to float +< %1181 = fmul float %1179, %1180 +< %1182 = fadd float %1181, 0.000000e+00 +< %1183 = bitcast i32 %157 to float +< %1184 = bitcast i32 %157 to float +< %1185 = fmul float %1183, %1184 +< %1186 = fadd float %1182, %1185 +< %1187 = call float @llvm.sqrt.f32.85(float %1186) +< %1188 = fneg float %815 +< %1189 = fmul float %1187, %1188 +< %1190 = fmul float %1189, 0.000000e+00 +< %1191 = bitcast i32 %157 to float +< %1192 = fadd float %1191, %1190 +< %1193 = fmul float %1178, %1192 +< %1194 = fadd float %1164, %1193 +< %1195 = call float @llvm.sqrt.f32.86(float %1194) +< %1196 = fadd float %1195, 0.000000e+00 +< %1197 = fdiv float %1136, %1196 +< %1198 = fmul float %1197, 2.000000e+00 +< %1199 = bitcast i32 %970 to float +< %1200 = bitcast i32 %970 to float +< %1201 = fmul float %1199, %1200 +< %1202 = fadd float %1201, 0.000000e+00 +< %1203 = bitcast i32 %157 to float +< %1204 = bitcast i32 %157 to float +< %1205 = fmul float %1203, %1204 +< %1206 = fadd float %1202, %1205 +< %1207 = call float @llvm.sqrt.f32.87(float %1206) +< %1208 = fneg float %815 +< %1209 = fmul float %1207, %1208 +< %1210 = fmul float %1209, 0.000000e+00 +< %1211 = bitcast i32 %157 to float +< %1212 = fadd float %1211, %1210 +< %1213 = bitcast i32 %970 to float +< %1214 = bitcast i32 %970 to float +< %1215 = fmul float %1213, %1214 +< %1216 = fadd float %1215, 0.000000e+00 +< %1217 = bitcast i32 %157 to float +< %1218 = bitcast i32 %157 to float +< %1219 = fmul float %1217, %1218 +< %1220 = fadd float %1216, %1219 +< %1221 = call float @llvm.sqrt.f32.88(float %1220) +< %1222 = fneg float %815 +< %1223 = fmul float %1221, %1222 +< %1224 = bitcast i32 %970 to float +< %1225 = fadd float %1224, %1223 +< %1226 = bitcast i32 %970 to float +< %1227 = bitcast i32 %970 to float +< %1228 = fmul float %1226, %1227 +< %1229 = fadd float %1228, 0.000000e+00 +< %1230 = bitcast i32 %157 to float +< %1231 = bitcast i32 %157 to float +< %1232 = fmul float %1230, %1231 +< %1233 = fadd float %1229, %1232 +< %1234 = call float @llvm.sqrt.f32.89(float %1233) +< %1235 = fneg float %815 +< %1236 = fmul float %1234, %1235 +< %1237 = bitcast i32 %970 to float +< %1238 = fadd float %1237, %1236 +< %1239 = fmul float %1225, %1238 +< %1240 = fadd float %1239, 0.000000e+00 +< %1241 = bitcast i32 %970 to float +< %1242 = bitcast i32 %970 to float +< %1243 = fmul float %1241, %1242 +< %1244 = fadd float %1243, 0.000000e+00 +< %1245 = bitcast i32 %157 to float +< %1246 = bitcast i32 %157 to float +< %1247 = fmul float %1245, %1246 +< %1248 = fadd float %1244, %1247 +< %1249 = call float @llvm.sqrt.f32.90(float %1248) +< %1250 = fneg float %815 +< %1251 = fmul float %1249, %1250 +< %1252 = fmul float %1251, 0.000000e+00 +< %1253 = bitcast i32 %157 to float +< %1254 = fadd float %1253, %1252 +< %1255 = bitcast i32 %970 to float +< %1256 = bitcast i32 %970 to float +< %1257 = fmul float %1255, %1256 +< %1258 = fadd float %1257, 0.000000e+00 +< %1259 = bitcast i32 %157 to float +< %1260 = bitcast i32 %157 to float +< %1261 = fmul float %1259, %1260 +< %1262 = fadd float %1258, %1261 +< %1263 = call float @llvm.sqrt.f32.91(float %1262) +< %1264 = fneg float %815 +< %1265 = fmul float %1263, %1264 +< %1266 = fmul float %1265, 0.000000e+00 +< %1267 = bitcast i32 %157 to float +< %1268 = fadd float %1267, %1266 +< %1269 = fmul float %1254, %1268 +< %1270 = fadd float %1240, %1269 +< %1271 = call float @llvm.sqrt.f32.92(float %1270) +< %1272 = fadd float %1271, 0.000000e+00 +< %1273 = fdiv float %1212, %1272 +< %1274 = fmul float %1198, %1273 +< %1275 = fneg float %1274 +< %1276 = load float, float* %129, align 4 +< %1277 = fmul float %1275, %1276 +< %1278 = fadd float %1123, %1277 +< %1279 = insertelement <4 x float> zeroinitializer, float %1278, i32 0 +< %1280 = insertelement <4 x float> %1279, float 0.000000e+00, i32 1 +< %1281 = insertelement <4 x float> %1280, float 0.000000e+00, i32 2 +< %1282 = insertelement <4 x float> %1281, float 0.000000e+00, i32 3 +< %1283 = extractelement <4 x float> %1282, i32 0 +< store float %1283, float* %2, align 4 +< %1284 = extractelement <4 x float> %1282, i32 1 +< %1285 = getelementptr float, float* %2, i32 0 +< %1286 = getelementptr inbounds float, float* %1285, i64 1 +< store float %1284, float* %1286, align 4 +< %1287 = bitcast i32 %970 to float +< %1288 = bitcast i32 %970 to float +< %1289 = fmul float %1287, %1288 +< %1290 = fadd float %1289, 0.000000e+00 +< %1291 = bitcast i32 %157 to float +< %1292 = bitcast i32 %157 to float +< %1293 = fmul float %1291, %1292 +< %1294 = fadd float %1290, %1293 +< %1295 = call float @llvm.sqrt.f32.93(float %1294) +< %1296 = fneg float %815 +< %1297 = fmul float %1295, %1296 +< %1298 = bitcast i32 %970 to float +< %1299 = fadd float %1298, %1297 +< %1300 = bitcast i32 %970 to float +< %1301 = bitcast i32 %970 to float +< %1302 = fmul float %1300, %1301 +< %1303 = fadd float %1302, 0.000000e+00 +< %1304 = bitcast i32 %157 to float +< %1305 = bitcast i32 %157 to float +< %1306 = fmul float %1304, %1305 +< %1307 = fadd float %1303, %1306 +< %1308 = call float @llvm.sqrt.f32.94(float %1307) +< %1309 = fneg float %815 +< %1310 = fmul float %1308, %1309 +< %1311 = bitcast i32 %970 to float +< %1312 = fadd float %1311, %1310 +< %1313 = bitcast i32 %970 to float +< %1314 = bitcast i32 %970 to float +< %1315 = fmul float %1313, %1314 +< %1316 = fadd float %1315, 0.000000e+00 +< %1317 = bitcast i32 %157 to float +< %1318 = bitcast i32 %157 to float +< %1319 = fmul float %1317, %1318 +< %1320 = fadd float %1316, %1319 +< %1321 = call float @llvm.sqrt.f32.95(float %1320) +< %1322 = fneg float %815 +< %1323 = fmul float %1321, %1322 +< %1324 = bitcast i32 %970 to float +< %1325 = fadd float %1324, %1323 +< %1326 = fmul float %1312, %1325 +< %1327 = fadd float %1326, 0.000000e+00 +< %1328 = bitcast i32 %970 to float +< %1329 = bitcast i32 %970 to float +< %1330 = fmul float %1328, %1329 +< %1331 = fadd float %1330, 0.000000e+00 +< %1332 = bitcast i32 %157 to float +< %1333 = bitcast i32 %157 to float +< %1334 = fmul float %1332, %1333 +< %1335 = fadd float %1331, %1334 +< %1336 = call float @llvm.sqrt.f32.96(float %1335) +< %1337 = fneg float %815 +< %1338 = fmul float %1336, %1337 +< %1339 = fmul float %1338, 0.000000e+00 +< %1340 = bitcast i32 %157 to float +< %1341 = fadd float %1340, %1339 +< %1342 = bitcast i32 %970 to float +< %1343 = bitcast i32 %970 to float +< %1344 = fmul float %1342, %1343 +< %1345 = fadd float %1344, 0.000000e+00 +< %1346 = bitcast i32 %157 to float +< %1347 = bitcast i32 %157 to float +< %1348 = fmul float %1346, %1347 +< %1349 = fadd float %1345, %1348 +< %1350 = call float @llvm.sqrt.f32.97(float %1349) +< %1351 = fneg float %815 +< %1352 = fmul float %1350, %1351 +< %1353 = fmul float %1352, 0.000000e+00 +< %1354 = bitcast i32 %157 to float +< %1355 = fadd float %1354, %1353 +< %1356 = fmul float %1341, %1355 +< %1357 = fadd float %1327, %1356 +< %1358 = call float @llvm.sqrt.f32.98(float %1357) +< %1359 = fadd float %1358, 0.000000e+00 +< %1360 = fdiv float %1299, %1359 +< %1361 = fmul float %1360, 2.000000e+00 +< %1362 = bitcast i32 %970 to float +< %1363 = bitcast i32 %970 to float +< %1364 = fmul float %1362, %1363 +< %1365 = fadd float %1364, 0.000000e+00 +< %1366 = bitcast i32 %157 to float +< %1367 = bitcast i32 %157 to float +< %1368 = fmul float %1366, %1367 +< %1369 = fadd float %1365, %1368 +< %1370 = call float @llvm.sqrt.f32.99(float %1369) +< %1371 = fneg float %815 +< %1372 = fmul float %1370, %1371 +< %1373 = bitcast i32 %970 to float +< %1374 = fadd float %1373, %1372 +< %1375 = bitcast i32 %970 to float +< %1376 = bitcast i32 %970 to float +< %1377 = fmul float %1375, %1376 +< %1378 = fadd float %1377, 0.000000e+00 +< %1379 = bitcast i32 %157 to float +< %1380 = bitcast i32 %157 to float +< %1381 = fmul float %1379, %1380 +< %1382 = fadd float %1378, %1381 +< %1383 = call float @llvm.sqrt.f32.100(float %1382) +< %1384 = fneg float %815 +< %1385 = fmul float %1383, %1384 +< %1386 = bitcast i32 %970 to float +< %1387 = fadd float %1386, %1385 +< %1388 = bitcast i32 %970 to float +< %1389 = bitcast i32 %970 to float +< %1390 = fmul float %1388, %1389 +< %1391 = fadd float %1390, 0.000000e+00 +< %1392 = bitcast i32 %157 to float +< %1393 = bitcast i32 %157 to float +< %1394 = fmul float %1392, %1393 +< %1395 = fadd float %1391, %1394 +< %1396 = call float @llvm.sqrt.f32.101(float %1395) +< %1397 = fneg float %815 +< %1398 = fmul float %1396, %1397 +< %1399 = bitcast i32 %970 to float +< %1400 = fadd float %1399, %1398 +< %1401 = fmul float %1387, %1400 +< %1402 = fadd float %1401, 0.000000e+00 +< %1403 = bitcast i32 %970 to float +< %1404 = bitcast i32 %970 to float +< %1405 = fmul float %1403, %1404 +< %1406 = fadd float %1405, 0.000000e+00 +< %1407 = bitcast i32 %157 to float +< %1408 = bitcast i32 %157 to float +< %1409 = fmul float %1407, %1408 +< %1410 = fadd float %1406, %1409 +< %1411 = call float @llvm.sqrt.f32.102(float %1410) +< %1412 = fneg float %815 +< %1413 = fmul float %1411, %1412 +< %1414 = fmul float %1413, 0.000000e+00 +< %1415 = bitcast i32 %157 to float +< %1416 = fadd float %1415, %1414 +< %1417 = bitcast i32 %970 to float +< %1418 = bitcast i32 %970 to float +< %1419 = fmul float %1417, %1418 +< %1420 = fadd float %1419, 0.000000e+00 +< %1421 = bitcast i32 %157 to float +< %1422 = bitcast i32 %157 to float +< %1423 = fmul float %1421, %1422 +< %1424 = fadd float %1420, %1423 +< %1425 = call float @llvm.sqrt.f32.103(float %1424) +< %1426 = fneg float %815 +< %1427 = fmul float %1425, %1426 +< %1428 = fmul float %1427, 0.000000e+00 +< %1429 = bitcast i32 %157 to float +< %1430 = fadd float %1429, %1428 +< %1431 = fmul float %1416, %1430 +< %1432 = fadd float %1402, %1431 +< %1433 = call float @llvm.sqrt.f32.104(float %1432) +< %1434 = fadd float %1433, 0.000000e+00 +< %1435 = fdiv float %1374, %1434 +< %1436 = fmul float %1361, %1435 +< %1437 = fsub float 1.000000e+00, %1436 +< %1438 = insertelement <4 x float> zeroinitializer, float %1437, i32 0 +< %1439 = insertelement <4 x float> %1438, float 0.000000e+00, i32 1 +< %1440 = insertelement <4 x float> %1439, float 0.000000e+00, i32 2 +< %1441 = insertelement <4 x float> %1440, float 0.000000e+00, i32 3 +< %1442 = getelementptr float, float* %0, i32 0 +< %1443 = getelementptr inbounds float, float* %1442, i64 1 +< %1444 = load float, float* %1443, align 4 +< %1445 = insertelement <4 x float> zeroinitializer, float %1444, i32 0 +< %1446 = insertelement <4 x float> %1445, float 0.000000e+00, i32 1 +< %1447 = insertelement <4 x float> %1446, float 0.000000e+00, i32 2 +< %1448 = insertelement <4 x float> %1447, float 0.000000e+00, i32 3 +< %1449 = call <4 x float> @llvm.fma.f32.105(<4 x float> %1441, <4 x float> %1448, <4 x float> zeroinitializer) +< %1450 = extractelement <4 x float> %1449, i32 0 +< store float %1450, float* %1286, align 4 +< %1451 = bitcast i32 %970 to float +< %1452 = bitcast i32 %970 to float +< %1453 = fmul float %1451, %1452 +< %1454 = fadd float %1453, 0.000000e+00 +< %1455 = bitcast i32 %157 to float +< %1456 = bitcast i32 %157 to float +< %1457 = fmul float %1455, %1456 +< %1458 = fadd float %1454, %1457 +< %1459 = call float @llvm.sqrt.f32.106(float %1458) +< %1460 = fneg float %815 +< %1461 = fmul float %1459, %1460 +< %1462 = bitcast i32 %970 to float +< %1463 = fadd float %1462, %1461 +< %1464 = bitcast i32 %970 to float +< %1465 = bitcast i32 %970 to float +< %1466 = fmul float %1464, %1465 +< %1467 = fadd float %1466, 0.000000e+00 +< %1468 = bitcast i32 %157 to float +< %1469 = bitcast i32 %157 to float +< %1470 = fmul float %1468, %1469 +< %1471 = fadd float %1467, %1470 +< %1472 = call float @llvm.sqrt.f32.107(float %1471) +< %1473 = fneg float %815 +< %1474 = fmul float %1472, %1473 +< %1475 = bitcast i32 %970 to float +< %1476 = fadd float %1475, %1474 +< %1477 = bitcast i32 %970 to float +< %1478 = bitcast i32 %970 to float +< %1479 = fmul float %1477, %1478 +< %1480 = fadd float %1479, 0.000000e+00 +< %1481 = bitcast i32 %157 to float +< %1482 = bitcast i32 %157 to float +< %1483 = fmul float %1481, %1482 +< %1484 = fadd float %1480, %1483 +< %1485 = call float @llvm.sqrt.f32.108(float %1484) +< %1486 = fneg float %815 +< %1487 = fmul float %1485, %1486 +< %1488 = bitcast i32 %970 to float +< %1489 = fadd float %1488, %1487 +< %1490 = fmul float %1476, %1489 +< %1491 = fadd float %1490, 0.000000e+00 +< %1492 = bitcast i32 %970 to float +< %1493 = bitcast i32 %970 to float +< %1494 = fmul float %1492, %1493 +< %1495 = fadd float %1494, 0.000000e+00 +< %1496 = bitcast i32 %157 to float +< %1497 = bitcast i32 %157 to float +< %1498 = fmul float %1496, %1497 +< %1499 = fadd float %1495, %1498 +< %1500 = call float @llvm.sqrt.f32.109(float %1499) +< %1501 = fneg float %815 +< %1502 = fmul float %1500, %1501 +< %1503 = fmul float %1502, 0.000000e+00 +< %1504 = bitcast i32 %157 to float +< %1505 = fadd float %1504, %1503 +< %1506 = bitcast i32 %970 to float +< %1507 = bitcast i32 %970 to float +< %1508 = fmul float %1506, %1507 +< %1509 = fadd float %1508, 0.000000e+00 +< %1510 = bitcast i32 %157 to float +< %1511 = bitcast i32 %157 to float +< %1512 = fmul float %1510, %1511 +< %1513 = fadd float %1509, %1512 +< %1514 = call float @llvm.sqrt.f32.110(float %1513) +< %1515 = fneg float %815 +< %1516 = fmul float %1514, %1515 +< %1517 = fmul float %1516, 0.000000e+00 +< %1518 = bitcast i32 %157 to float +< %1519 = fadd float %1518, %1517 +< %1520 = fmul float %1505, %1519 +< %1521 = fadd float %1491, %1520 +< %1522 = call float @llvm.sqrt.f32.111(float %1521) +< %1523 = fadd float %1522, 0.000000e+00 +< %1524 = fdiv float %1463, %1523 +< %1525 = fmul float %1524, 2.000000e+00 +< %1526 = bitcast i32 %970 to float +< %1527 = bitcast i32 %970 to float +< %1528 = fmul float %1526, %1527 +< %1529 = fadd float %1528, 0.000000e+00 +< %1530 = bitcast i32 %157 to float +< %1531 = bitcast i32 %157 to float +< %1532 = fmul float %1530, %1531 +< %1533 = fadd float %1529, %1532 +< %1534 = call float @llvm.sqrt.f32.112(float %1533) +< %1535 = fneg float %815 +< %1536 = fmul float %1534, %1535 +< %1537 = bitcast i32 %970 to float +< %1538 = fadd float %1537, %1536 +< %1539 = bitcast i32 %970 to float +< %1540 = bitcast i32 %970 to float +< %1541 = fmul float %1539, %1540 +< %1542 = fadd float %1541, 0.000000e+00 +< %1543 = bitcast i32 %157 to float +< %1544 = bitcast i32 %157 to float +< %1545 = fmul float %1543, %1544 +< %1546 = fadd float %1542, %1545 +< %1547 = call float @llvm.sqrt.f32.113(float %1546) +< %1548 = fneg float %815 +< %1549 = fmul float %1547, %1548 +< %1550 = bitcast i32 %970 to float +< %1551 = fadd float %1550, %1549 +< %1552 = bitcast i32 %970 to float +< %1553 = bitcast i32 %970 to float +< %1554 = fmul float %1552, %1553 +< %1555 = fadd float %1554, 0.000000e+00 +< %1556 = bitcast i32 %157 to float +< %1557 = bitcast i32 %157 to float +< %1558 = fmul float %1556, %1557 +< %1559 = fadd float %1555, %1558 +< %1560 = call float @llvm.sqrt.f32.114(float %1559) +< %1561 = fneg float %815 +< %1562 = fmul float %1560, %1561 +< %1563 = bitcast i32 %970 to float +< %1564 = fadd float %1563, %1562 +< %1565 = fmul float %1551, %1564 +< %1566 = fadd float %1565, 0.000000e+00 +< %1567 = bitcast i32 %970 to float +< %1568 = bitcast i32 %970 to float +< %1569 = fmul float %1567, %1568 +< %1570 = fadd float %1569, 0.000000e+00 +< %1571 = bitcast i32 %157 to float +< %1572 = bitcast i32 %157 to float +< %1573 = fmul float %1571, %1572 +< %1574 = fadd float %1570, %1573 +< %1575 = call float @llvm.sqrt.f32.115(float %1574) +< %1576 = fneg float %815 +< %1577 = fmul float %1575, %1576 +< %1578 = fmul float %1577, 0.000000e+00 +< %1579 = bitcast i32 %157 to float +< %1580 = fadd float %1579, %1578 +< %1581 = bitcast i32 %970 to float +< %1582 = bitcast i32 %970 to float +< %1583 = fmul float %1581, %1582 +< %1584 = fadd float %1583, 0.000000e+00 +< %1585 = bitcast i32 %157 to float +< %1586 = bitcast i32 %157 to float +< %1587 = fmul float %1585, %1586 +< %1588 = fadd float %1584, %1587 +< %1589 = call float @llvm.sqrt.f32.116(float %1588) +< %1590 = fneg float %815 +< %1591 = fmul float %1589, %1590 +< %1592 = fmul float %1591, 0.000000e+00 +< %1593 = bitcast i32 %157 to float +< %1594 = fadd float %1593, %1592 +< %1595 = fmul float %1580, %1594 +< %1596 = fadd float %1566, %1595 +< %1597 = call float @llvm.sqrt.f32.117(float %1596) +< %1598 = fadd float %1597, 0.000000e+00 +< %1599 = fdiv float %1538, %1598 +< %1600 = fmul float %1525, %1599 +< %1601 = fsub float 1.000000e+00, %1600 +< %1602 = fmul float %1601, %1444 +< %1603 = fadd float %1602, 0.000000e+00 +< %1604 = bitcast i32 %970 to float +< %1605 = bitcast i32 %970 to float +< %1606 = fmul float %1604, %1605 +< %1607 = fadd float %1606, 0.000000e+00 +< %1608 = bitcast i32 %157 to float +< %1609 = bitcast i32 %157 to float +< %1610 = fmul float %1608, %1609 +< %1611 = fadd float %1607, %1610 +< %1612 = call float @llvm.sqrt.f32.118(float %1611) +< %1613 = fneg float %815 +< %1614 = fmul float %1612, %1613 +< %1615 = bitcast i32 %970 to float +< %1616 = fadd float %1615, %1614 +< %1617 = bitcast i32 %970 to float +< %1618 = bitcast i32 %970 to float +< %1619 = fmul float %1617, %1618 +< %1620 = fadd float %1619, 0.000000e+00 +< %1621 = bitcast i32 %157 to float +< %1622 = bitcast i32 %157 to float +< %1623 = fmul float %1621, %1622 +< %1624 = fadd float %1620, %1623 +< %1625 = call float @llvm.sqrt.f32.119(float %1624) +< %1626 = fneg float %815 +< %1627 = fmul float %1625, %1626 +< %1628 = bitcast i32 %970 to float +< %1629 = fadd float %1628, %1627 +< %1630 = bitcast i32 %970 to float +< %1631 = bitcast i32 %970 to float +< %1632 = fmul float %1630, %1631 +< %1633 = fadd float %1632, 0.000000e+00 +< %1634 = bitcast i32 %157 to float +< %1635 = bitcast i32 %157 to float +< %1636 = fmul float %1634, %1635 +< %1637 = fadd float %1633, %1636 +< %1638 = call float @llvm.sqrt.f32.120(float %1637) +< %1639 = fneg float %815 +< %1640 = fmul float %1638, %1639 +< %1641 = bitcast i32 %970 to float +< %1642 = fadd float %1641, %1640 +< %1643 = fmul float %1629, %1642 +< %1644 = fadd float %1643, 0.000000e+00 +< %1645 = bitcast i32 %970 to float +< %1646 = bitcast i32 %970 to float +< %1647 = fmul float %1645, %1646 +< %1648 = fadd float %1647, 0.000000e+00 +< %1649 = bitcast i32 %157 to float +< %1650 = bitcast i32 %157 to float +< %1651 = fmul float %1649, %1650 +< %1652 = fadd float %1648, %1651 +< %1653 = call float @llvm.sqrt.f32.121(float %1652) +< %1654 = fneg float %815 +< %1655 = fmul float %1653, %1654 +< %1656 = fmul float %1655, 0.000000e+00 +< %1657 = bitcast i32 %157 to float +< %1658 = fadd float %1657, %1656 +< %1659 = bitcast i32 %970 to float +< %1660 = bitcast i32 %970 to float +< %1661 = fmul float %1659, %1660 +< %1662 = fadd float %1661, 0.000000e+00 +< %1663 = bitcast i32 %157 to float +< %1664 = bitcast i32 %157 to float +< %1665 = fmul float %1663, %1664 +< %1666 = fadd float %1662, %1665 +< %1667 = call float @llvm.sqrt.f32.122(float %1666) +< %1668 = fneg float %815 +< %1669 = fmul float %1667, %1668 +< %1670 = fmul float %1669, 0.000000e+00 +< %1671 = bitcast i32 %157 to float +< %1672 = fadd float %1671, %1670 +< %1673 = fmul float %1658, %1672 +< %1674 = fadd float %1644, %1673 +< %1675 = call float @llvm.sqrt.f32.123(float %1674) +< %1676 = fadd float %1675, 0.000000e+00 +< %1677 = fdiv float %1616, %1676 +< %1678 = fmul float %1677, 2.000000e+00 +< %1679 = bitcast i32 %970 to float +< %1680 = bitcast i32 %970 to float +< %1681 = fmul float %1679, %1680 +< %1682 = fadd float %1681, 0.000000e+00 +< %1683 = bitcast i32 %157 to float +< %1684 = bitcast i32 %157 to float +< %1685 = fmul float %1683, %1684 +< %1686 = fadd float %1682, %1685 +< %1687 = call float @llvm.sqrt.f32.124(float %1686) +< %1688 = fneg float %815 +< %1689 = fmul float %1687, %1688 +< %1690 = fmul float %1689, 0.000000e+00 +< %1691 = bitcast i32 %157 to float +< %1692 = fadd float %1691, %1690 +< %1693 = bitcast i32 %970 to float +< %1694 = bitcast i32 %970 to float +< %1695 = fmul float %1693, %1694 +< %1696 = fadd float %1695, 0.000000e+00 +< %1697 = bitcast i32 %157 to float +< %1698 = bitcast i32 %157 to float +< %1699 = fmul float %1697, %1698 +< %1700 = fadd float %1696, %1699 +< %1701 = call float @llvm.sqrt.f32.125(float %1700) +< %1702 = fneg float %815 +< %1703 = fmul float %1701, %1702 +< %1704 = bitcast i32 %970 to float +< %1705 = fadd float %1704, %1703 +< %1706 = bitcast i32 %970 to float +< %1707 = bitcast i32 %970 to float +< %1708 = fmul float %1706, %1707 +< %1709 = fadd float %1708, 0.000000e+00 +< %1710 = bitcast i32 %157 to float +< %1711 = bitcast i32 %157 to float +< %1712 = fmul float %1710, %1711 +< %1713 = fadd float %1709, %1712 +< %1714 = call float @llvm.sqrt.f32.126(float %1713) +< %1715 = fneg float %815 +< %1716 = fmul float %1714, %1715 +< %1717 = bitcast i32 %970 to float +< %1718 = fadd float %1717, %1716 +< %1719 = fmul float %1705, %1718 +< %1720 = fadd float %1719, 0.000000e+00 +< %1721 = bitcast i32 %970 to float +< %1722 = bitcast i32 %970 to float +< %1723 = fmul float %1721, %1722 +< %1724 = fadd float %1723, 0.000000e+00 +< %1725 = bitcast i32 %157 to float +< %1726 = bitcast i32 %157 to float +< %1727 = fmul float %1725, %1726 +< %1728 = fadd float %1724, %1727 +< %1729 = call float @llvm.sqrt.f32.127(float %1728) +< %1730 = fneg float %815 +< %1731 = fmul float %1729, %1730 +< %1732 = fmul float %1731, 0.000000e+00 +< %1733 = bitcast i32 %157 to float +< %1734 = fadd float %1733, %1732 +< %1735 = bitcast i32 %970 to float +< %1736 = bitcast i32 %970 to float +< %1737 = fmul float %1735, %1736 +< %1738 = fadd float %1737, 0.000000e+00 +< %1739 = bitcast i32 %157 to float +< %1740 = bitcast i32 %157 to float +< %1741 = fmul float %1739, %1740 +< %1742 = fadd float %1738, %1741 +< %1743 = call float @llvm.sqrt.f32.128(float %1742) +< %1744 = fneg float %815 +< %1745 = fmul float %1743, %1744 +< %1746 = fmul float %1745, 0.000000e+00 +< %1747 = bitcast i32 %157 to float +< %1748 = fadd float %1747, %1746 +< %1749 = fmul float %1734, %1748 +< %1750 = fadd float %1720, %1749 +< %1751 = call float @llvm.sqrt.f32.129(float %1750) +< %1752 = fadd float %1751, 0.000000e+00 +< %1753 = fdiv float %1692, %1752 +< %1754 = fmul float %1678, %1753 +< %1755 = fneg float %1754 +< %1756 = getelementptr float, float* %0, i32 0 +< %1757 = getelementptr inbounds float, float* %1756, i64 3 +< %1758 = load float, float* %1757, align 4 +< %1759 = fmul float %1755, %1758 +< %1760 = fadd float %1603, %1759 +< %1761 = insertelement <4 x float> zeroinitializer, float %1760, i32 0 +< %1762 = insertelement <4 x float> %1761, float 0.000000e+00, i32 1 +< %1763 = insertelement <4 x float> %1762, float 0.000000e+00, i32 2 +< %1764 = insertelement <4 x float> %1763, float 0.000000e+00, i32 3 +< %1765 = extractelement <4 x float> %1764, i32 0 +< store float %1765, float* %1286, align 4 +< %1766 = extractelement <4 x float> %1764, i32 1 +< store float %1766, float* %140, align 4 +< %1767 = bitcast i32 %970 to float +< %1768 = bitcast i32 %970 to float +< %1769 = fmul float %1767, %1768 +< %1770 = fadd float %1769, 0.000000e+00 +< %1771 = bitcast i32 %157 to float +< %1772 = bitcast i32 %157 to float +< %1773 = fmul float %1771, %1772 +< %1774 = fadd float %1770, %1773 +< %1775 = call float @llvm.sqrt.f32.130(float %1774) +< %1776 = fneg float %815 +< %1777 = fmul float %1775, %1776 +< %1778 = fmul float %1777, 0.000000e+00 +< %1779 = bitcast i32 %157 to float +< %1780 = fadd float %1779, %1778 +< %1781 = bitcast i32 %970 to float +< %1782 = bitcast i32 %970 to float +< %1783 = fmul float %1781, %1782 +< %1784 = fadd float %1783, 0.000000e+00 +< %1785 = bitcast i32 %157 to float +< %1786 = bitcast i32 %157 to float +< %1787 = fmul float %1785, %1786 +< %1788 = fadd float %1784, %1787 +< %1789 = call float @llvm.sqrt.f32.131(float %1788) +< %1790 = fneg float %815 +< %1791 = fmul float %1789, %1790 +< %1792 = bitcast i32 %970 to float +< %1793 = fadd float %1792, %1791 +< %1794 = bitcast i32 %970 to float +< %1795 = bitcast i32 %970 to float +< %1796 = fmul float %1794, %1795 +< %1797 = fadd float %1796, 0.000000e+00 +< %1798 = bitcast i32 %157 to float +< %1799 = bitcast i32 %157 to float +< %1800 = fmul float %1798, %1799 +< %1801 = fadd float %1797, %1800 +< %1802 = call float @llvm.sqrt.f32.132(float %1801) +< %1803 = fneg float %815 +< %1804 = fmul float %1802, %1803 +< %1805 = bitcast i32 %970 to float +< %1806 = fadd float %1805, %1804 +< %1807 = fmul float %1793, %1806 +< %1808 = fadd float %1807, 0.000000e+00 +< %1809 = bitcast i32 %970 to float +< %1810 = bitcast i32 %970 to float +< %1811 = fmul float %1809, %1810 +< %1812 = fadd float %1811, 0.000000e+00 +< %1813 = bitcast i32 %157 to float +< %1814 = bitcast i32 %157 to float +< %1815 = fmul float %1813, %1814 +< %1816 = fadd float %1812, %1815 +< %1817 = call float @llvm.sqrt.f32.133(float %1816) +< %1818 = fneg float %815 +< %1819 = fmul float %1817, %1818 +< %1820 = fmul float %1819, 0.000000e+00 +< %1821 = bitcast i32 %157 to float +< %1822 = fadd float %1821, %1820 +< %1823 = bitcast i32 %970 to float +< %1824 = bitcast i32 %970 to float +< %1825 = fmul float %1823, %1824 +< %1826 = fadd float %1825, 0.000000e+00 +< %1827 = bitcast i32 %157 to float +< %1828 = bitcast i32 %157 to float +< %1829 = fmul float %1827, %1828 +< %1830 = fadd float %1826, %1829 +< %1831 = call float @llvm.sqrt.f32.134(float %1830) +< %1832 = fneg float %815 +< %1833 = fmul float %1831, %1832 +< %1834 = fmul float %1833, 0.000000e+00 +< %1835 = bitcast i32 %157 to float +< %1836 = fadd float %1835, %1834 +< %1837 = fmul float %1822, %1836 +< %1838 = fadd float %1808, %1837 +< %1839 = call float @llvm.sqrt.f32.135(float %1838) +< %1840 = fadd float %1839, 0.000000e+00 +< %1841 = fdiv float %1780, %1840 +< %1842 = fmul float %1841, 2.000000e+00 +< %1843 = bitcast i32 %970 to float +< %1844 = bitcast i32 %970 to float +< %1845 = fmul float %1843, %1844 +< %1846 = fadd float %1845, 0.000000e+00 +< %1847 = bitcast i32 %157 to float +< %1848 = bitcast i32 %157 to float +< %1849 = fmul float %1847, %1848 +< %1850 = fadd float %1846, %1849 +< %1851 = call float @llvm.sqrt.f32.136(float %1850) +< %1852 = fneg float %815 +< %1853 = fmul float %1851, %1852 +< %1854 = bitcast i32 %970 to float +< %1855 = fadd float %1854, %1853 +< %1856 = bitcast i32 %970 to float +< %1857 = bitcast i32 %970 to float +< %1858 = fmul float %1856, %1857 +< %1859 = fadd float %1858, 0.000000e+00 +< %1860 = bitcast i32 %157 to float +< %1861 = bitcast i32 %157 to float +< %1862 = fmul float %1860, %1861 +< %1863 = fadd float %1859, %1862 +< %1864 = call float @llvm.sqrt.f32.137(float %1863) +< %1865 = fneg float %815 +< %1866 = fmul float %1864, %1865 +< %1867 = bitcast i32 %970 to float +< %1868 = fadd float %1867, %1866 +< %1869 = bitcast i32 %970 to float +< %1870 = bitcast i32 %970 to float +< %1871 = fmul float %1869, %1870 +< %1872 = fadd float %1871, 0.000000e+00 +< %1873 = bitcast i32 %157 to float +< %1874 = bitcast i32 %157 to float +< %1875 = fmul float %1873, %1874 +< %1876 = fadd float %1872, %1875 +< %1877 = call float @llvm.sqrt.f32.138(float %1876) +< %1878 = fneg float %815 +< %1879 = fmul float %1877, %1878 +< %1880 = bitcast i32 %970 to float +< %1881 = fadd float %1880, %1879 +< %1882 = fmul float %1868, %1881 +< %1883 = fadd float %1882, 0.000000e+00 +< %1884 = bitcast i32 %970 to float +< %1885 = bitcast i32 %970 to float +< %1886 = fmul float %1884, %1885 +< %1887 = fadd float %1886, 0.000000e+00 +< %1888 = bitcast i32 %157 to float +< %1889 = bitcast i32 %157 to float +< %1890 = fmul float %1888, %1889 +< %1891 = fadd float %1887, %1890 +< %1892 = call float @llvm.sqrt.f32.139(float %1891) +< %1893 = fneg float %815 +< %1894 = fmul float %1892, %1893 +< %1895 = fmul float %1894, 0.000000e+00 +< %1896 = bitcast i32 %157 to float +< %1897 = fadd float %1896, %1895 +< %1898 = bitcast i32 %970 to float +< %1899 = bitcast i32 %970 to float +< %1900 = fmul float %1898, %1899 +< %1901 = fadd float %1900, 0.000000e+00 +< %1902 = bitcast i32 %157 to float +< %1903 = bitcast i32 %157 to float +< %1904 = fmul float %1902, %1903 +< %1905 = fadd float %1901, %1904 +< %1906 = call float @llvm.sqrt.f32.140(float %1905) +< %1907 = fneg float %815 +< %1908 = fmul float %1906, %1907 +< %1909 = fmul float %1908, 0.000000e+00 +< %1910 = bitcast i32 %157 to float +< %1911 = fadd float %1910, %1909 +< %1912 = fmul float %1897, %1911 +< %1913 = fadd float %1883, %1912 +< %1914 = call float @llvm.sqrt.f32.141(float %1913) +< %1915 = fadd float %1914, 0.000000e+00 +< %1916 = fdiv float %1855, %1915 +< %1917 = fmul float %1842, %1916 +< %1918 = fneg float %1917 +< %1919 = insertelement <4 x float> zeroinitializer, float %1918, i32 0 +< %1920 = insertelement <4 x float> %1919, float 0.000000e+00, i32 1 +< %1921 = insertelement <4 x float> %1920, float 0.000000e+00, i32 2 +< %1922 = insertelement <4 x float> %1921, float 0.000000e+00, i32 3 +< %1923 = getelementptr float, float* %0, i32 0 +< %1924 = load float, float* %1923, align 4 +< %1925 = insertelement <4 x float> zeroinitializer, float %1924, i32 0 +< %1926 = insertelement <4 x float> %1925, float 0.000000e+00, i32 1 +< %1927 = insertelement <4 x float> %1926, float 0.000000e+00, i32 2 +< %1928 = insertelement <4 x float> %1927, float 0.000000e+00, i32 3 +< %1929 = call <4 x float> @llvm.fma.f32.142(<4 x float> %1922, <4 x float> %1928, <4 x float> zeroinitializer) +< %1930 = extractelement <4 x float> %1929, i32 0 +< store float %1930, float* %140, align 4 +< %1931 = bitcast i32 %970 to float +< %1932 = bitcast i32 %970 to float +< %1933 = fmul float %1931, %1932 +< %1934 = fadd float %1933, 0.000000e+00 +< %1935 = bitcast i32 %157 to float +< %1936 = bitcast i32 %157 to float +< %1937 = fmul float %1935, %1936 +< %1938 = fadd float %1934, %1937 +< %1939 = call float @llvm.sqrt.f32.143(float %1938) +< %1940 = fneg float %815 +< %1941 = fmul float %1939, %1940 +< %1942 = fmul float %1941, 0.000000e+00 +< %1943 = bitcast i32 %157 to float +< %1944 = fadd float %1943, %1942 +< %1945 = bitcast i32 %970 to float +< %1946 = bitcast i32 %970 to float +< %1947 = fmul float %1945, %1946 +< %1948 = fadd float %1947, 0.000000e+00 +< %1949 = bitcast i32 %157 to float +< %1950 = bitcast i32 %157 to float +< %1951 = fmul float %1949, %1950 +< %1952 = fadd float %1948, %1951 +< %1953 = call float @llvm.sqrt.f32.144(float %1952) +< %1954 = fneg float %815 +< %1955 = fmul float %1953, %1954 +< %1956 = bitcast i32 %970 to float +< %1957 = fadd float %1956, %1955 +< %1958 = bitcast i32 %970 to float +< %1959 = bitcast i32 %970 to float +< %1960 = fmul float %1958, %1959 +< %1961 = fadd float %1960, 0.000000e+00 +< %1962 = bitcast i32 %157 to float +< %1963 = bitcast i32 %157 to float +< %1964 = fmul float %1962, %1963 +< %1965 = fadd float %1961, %1964 +< %1966 = call float @llvm.sqrt.f32.145(float %1965) +< %1967 = fneg float %815 +< %1968 = fmul float %1966, %1967 +< %1969 = bitcast i32 %970 to float +< %1970 = fadd float %1969, %1968 +< %1971 = fmul float %1957, %1970 +< %1972 = fadd float %1971, 0.000000e+00 +< %1973 = bitcast i32 %970 to float +< %1974 = bitcast i32 %970 to float +< %1975 = fmul float %1973, %1974 +< %1976 = fadd float %1975, 0.000000e+00 +< %1977 = bitcast i32 %157 to float +< %1978 = bitcast i32 %157 to float +< %1979 = fmul float %1977, %1978 +< %1980 = fadd float %1976, %1979 +< %1981 = call float @llvm.sqrt.f32.146(float %1980) +< %1982 = fneg float %815 +< %1983 = fmul float %1981, %1982 +< %1984 = fmul float %1983, 0.000000e+00 +< %1985 = bitcast i32 %157 to float +< %1986 = fadd float %1985, %1984 +< %1987 = bitcast i32 %970 to float +< %1988 = bitcast i32 %970 to float +< %1989 = fmul float %1987, %1988 +< %1990 = fadd float %1989, 0.000000e+00 +< %1991 = bitcast i32 %157 to float +< %1992 = bitcast i32 %157 to float +< %1993 = fmul float %1991, %1992 +< %1994 = fadd float %1990, %1993 +< %1995 = call float @llvm.sqrt.f32.147(float %1994) +< %1996 = fneg float %815 +< %1997 = fmul float %1995, %1996 +< %1998 = fmul float %1997, 0.000000e+00 +< %1999 = bitcast i32 %157 to float +< %2000 = fadd float %1999, %1998 +< %2001 = fmul float %1986, %2000 +< %2002 = fadd float %1972, %2001 +< %2003 = call float @llvm.sqrt.f32.148(float %2002) +< %2004 = fadd float %2003, 0.000000e+00 +< %2005 = fdiv float %1944, %2004 +< %2006 = fmul float %2005, 2.000000e+00 +< %2007 = bitcast i32 %970 to float +< %2008 = bitcast i32 %970 to float +< %2009 = fmul float %2007, %2008 +< %2010 = fadd float %2009, 0.000000e+00 +< %2011 = bitcast i32 %157 to float +< %2012 = bitcast i32 %157 to float +< %2013 = fmul float %2011, %2012 +< %2014 = fadd float %2010, %2013 +< %2015 = call float @llvm.sqrt.f32.149(float %2014) +< %2016 = fneg float %815 +< %2017 = fmul float %2015, %2016 +< %2018 = bitcast i32 %970 to float +< %2019 = fadd float %2018, %2017 +< %2020 = bitcast i32 %970 to float +< %2021 = bitcast i32 %970 to float +< %2022 = fmul float %2020, %2021 +< %2023 = fadd float %2022, 0.000000e+00 +< %2024 = bitcast i32 %157 to float +< %2025 = bitcast i32 %157 to float +< %2026 = fmul float %2024, %2025 +< %2027 = fadd float %2023, %2026 +< %2028 = call float @llvm.sqrt.f32.150(float %2027) +< %2029 = fneg float %815 +< %2030 = fmul float %2028, %2029 +< %2031 = bitcast i32 %970 to float +< %2032 = fadd float %2031, %2030 +< %2033 = bitcast i32 %970 to float +< %2034 = bitcast i32 %970 to float +< %2035 = fmul float %2033, %2034 +< %2036 = fadd float %2035, 0.000000e+00 +< %2037 = bitcast i32 %157 to float +< %2038 = bitcast i32 %157 to float +< %2039 = fmul float %2037, %2038 +< %2040 = fadd float %2036, %2039 +< %2041 = call float @llvm.sqrt.f32.151(float %2040) +< %2042 = fneg float %815 +< %2043 = fmul float %2041, %2042 +< %2044 = bitcast i32 %970 to float +< %2045 = fadd float %2044, %2043 +< %2046 = fmul float %2032, %2045 +< %2047 = fadd float %2046, 0.000000e+00 +< %2048 = bitcast i32 %970 to float +< %2049 = bitcast i32 %970 to float +< %2050 = fmul float %2048, %2049 +< %2051 = fadd float %2050, 0.000000e+00 +< %2052 = bitcast i32 %157 to float +< %2053 = bitcast i32 %157 to float +< %2054 = fmul float %2052, %2053 +< %2055 = fadd float %2051, %2054 +< %2056 = call float @llvm.sqrt.f32.152(float %2055) +< %2057 = fneg float %815 +< %2058 = fmul float %2056, %2057 +< %2059 = fmul float %2058, 0.000000e+00 +< %2060 = bitcast i32 %157 to float +< %2061 = fadd float %2060, %2059 +< %2062 = bitcast i32 %970 to float +< %2063 = bitcast i32 %970 to float +< %2064 = fmul float %2062, %2063 +< %2065 = fadd float %2064, 0.000000e+00 +< %2066 = bitcast i32 %157 to float +< %2067 = bitcast i32 %157 to float +< %2068 = fmul float %2066, %2067 +< %2069 = fadd float %2065, %2068 +< %2070 = call float @llvm.sqrt.f32.153(float %2069) +< %2071 = fneg float %815 +< %2072 = fmul float %2070, %2071 +< %2073 = fmul float %2072, 0.000000e+00 +< %2074 = bitcast i32 %157 to float +< %2075 = fadd float %2074, %2073 +< %2076 = fmul float %2061, %2075 +< %2077 = fadd float %2047, %2076 +< %2078 = call float @llvm.sqrt.f32.154(float %2077) +< %2079 = fadd float %2078, 0.000000e+00 +< %2080 = fdiv float %2019, %2079 +< %2081 = fmul float %2006, %2080 +< %2082 = fneg float %2081 +< %2083 = fmul float %2082, %1924 +< %2084 = fadd float %2083, 0.000000e+00 +< %2085 = bitcast i32 %970 to float +< %2086 = bitcast i32 %970 to float +< %2087 = fmul float %2085, %2086 +< %2088 = fadd float %2087, 0.000000e+00 +< %2089 = bitcast i32 %157 to float +< %2090 = bitcast i32 %157 to float +< %2091 = fmul float %2089, %2090 +< %2092 = fadd float %2088, %2091 +< %2093 = call float @llvm.sqrt.f32.155(float %2092) +< %2094 = fneg float %815 +< %2095 = fmul float %2093, %2094 +< %2096 = fmul float %2095, 0.000000e+00 +< %2097 = bitcast i32 %157 to float +< %2098 = fadd float %2097, %2096 +< %2099 = bitcast i32 %970 to float +< %2100 = bitcast i32 %970 to float +< %2101 = fmul float %2099, %2100 +< %2102 = fadd float %2101, 0.000000e+00 +< %2103 = bitcast i32 %157 to float +< %2104 = bitcast i32 %157 to float +< %2105 = fmul float %2103, %2104 +< %2106 = fadd float %2102, %2105 +< %2107 = call float @llvm.sqrt.f32.156(float %2106) +< %2108 = fneg float %815 +< %2109 = fmul float %2107, %2108 +< %2110 = bitcast i32 %970 to float +< %2111 = fadd float %2110, %2109 +< %2112 = bitcast i32 %970 to float +< %2113 = bitcast i32 %970 to float +< %2114 = fmul float %2112, %2113 +< %2115 = fadd float %2114, 0.000000e+00 +< %2116 = bitcast i32 %157 to float +< %2117 = bitcast i32 %157 to float +< %2118 = fmul float %2116, %2117 +< %2119 = fadd float %2115, %2118 +< %2120 = call float @llvm.sqrt.f32.157(float %2119) +< %2121 = fneg float %815 +< %2122 = fmul float %2120, %2121 +< %2123 = bitcast i32 %970 to float +< %2124 = fadd float %2123, %2122 +< %2125 = fmul float %2111, %2124 +< %2126 = fadd float %2125, 0.000000e+00 +< %2127 = bitcast i32 %970 to float +< %2128 = bitcast i32 %970 to float +< %2129 = fmul float %2127, %2128 +< %2130 = fadd float %2129, 0.000000e+00 +< %2131 = bitcast i32 %157 to float +< %2132 = bitcast i32 %157 to float +< %2133 = fmul float %2131, %2132 +< %2134 = fadd float %2130, %2133 +< %2135 = call float @llvm.sqrt.f32.158(float %2134) +< %2136 = fneg float %815 +< %2137 = fmul float %2135, %2136 +< %2138 = fmul float %2137, 0.000000e+00 +< %2139 = bitcast i32 %157 to float +< %2140 = fadd float %2139, %2138 +< %2141 = bitcast i32 %970 to float +< %2142 = bitcast i32 %970 to float +< %2143 = fmul float %2141, %2142 +< %2144 = fadd float %2143, 0.000000e+00 +< %2145 = bitcast i32 %157 to float +< %2146 = bitcast i32 %157 to float +< %2147 = fmul float %2145, %2146 +< %2148 = fadd float %2144, %2147 +< %2149 = call float @llvm.sqrt.f32.159(float %2148) +< %2150 = fneg float %815 +< %2151 = fmul float %2149, %2150 +< %2152 = fmul float %2151, 0.000000e+00 +< %2153 = bitcast i32 %157 to float +< %2154 = fadd float %2153, %2152 +< %2155 = fmul float %2140, %2154 +< %2156 = fadd float %2126, %2155 +< %2157 = call float @llvm.sqrt.f32.160(float %2156) +< %2158 = fadd float %2157, 0.000000e+00 +< %2159 = fdiv float %2098, %2158 +< %2160 = fmul float %2159, 2.000000e+00 +< %2161 = bitcast i32 %970 to float +< %2162 = bitcast i32 %970 to float +< %2163 = fmul float %2161, %2162 +< %2164 = fadd float %2163, 0.000000e+00 +< %2165 = bitcast i32 %157 to float +< %2166 = bitcast i32 %157 to float +< %2167 = fmul float %2165, %2166 +< %2168 = fadd float %2164, %2167 +< %2169 = call float @llvm.sqrt.f32.161(float %2168) +< %2170 = fneg float %815 +< %2171 = fmul float %2169, %2170 +< %2172 = fmul float %2171, 0.000000e+00 +< %2173 = bitcast i32 %157 to float +< %2174 = fadd float %2173, %2172 +< %2175 = bitcast i32 %970 to float +< %2176 = bitcast i32 %970 to float +< %2177 = fmul float %2175, %2176 +< %2178 = fadd float %2177, 0.000000e+00 +< %2179 = bitcast i32 %157 to float +< %2180 = bitcast i32 %157 to float +< %2181 = fmul float %2179, %2180 +< %2182 = fadd float %2178, %2181 +< %2183 = call float @llvm.sqrt.f32.162(float %2182) +< %2184 = fneg float %815 +< %2185 = fmul float %2183, %2184 +< %2186 = bitcast i32 %970 to float +< %2187 = fadd float %2186, %2185 +< %2188 = bitcast i32 %970 to float +< %2189 = bitcast i32 %970 to float +< %2190 = fmul float %2188, %2189 +< %2191 = fadd float %2190, 0.000000e+00 +< %2192 = bitcast i32 %157 to float +< %2193 = bitcast i32 %157 to float +< %2194 = fmul float %2192, %2193 +< %2195 = fadd float %2191, %2194 +< %2196 = call float @llvm.sqrt.f32.163(float %2195) +< %2197 = fneg float %815 +< %2198 = fmul float %2196, %2197 +< %2199 = bitcast i32 %970 to float +< %2200 = fadd float %2199, %2198 +< %2201 = fmul float %2187, %2200 +< %2202 = fadd float %2201, 0.000000e+00 +< %2203 = bitcast i32 %970 to float +< %2204 = bitcast i32 %970 to float +< %2205 = fmul float %2203, %2204 +< %2206 = fadd float %2205, 0.000000e+00 +< %2207 = bitcast i32 %157 to float +< %2208 = bitcast i32 %157 to float +< %2209 = fmul float %2207, %2208 +< %2210 = fadd float %2206, %2209 +< %2211 = call float @llvm.sqrt.f32.164(float %2210) +< %2212 = fneg float %815 +< %2213 = fmul float %2211, %2212 +< %2214 = fmul float %2213, 0.000000e+00 +< %2215 = bitcast i32 %157 to float +< %2216 = fadd float %2215, %2214 +< %2217 = bitcast i32 %970 to float +< %2218 = bitcast i32 %970 to float +< %2219 = fmul float %2217, %2218 +< %2220 = fadd float %2219, 0.000000e+00 +< %2221 = bitcast i32 %157 to float +< %2222 = bitcast i32 %157 to float +< %2223 = fmul float %2221, %2222 +< %2224 = fadd float %2220, %2223 +< %2225 = call float @llvm.sqrt.f32.165(float %2224) +< %2226 = fneg float %815 +< %2227 = fmul float %2225, %2226 +< %2228 = fmul float %2227, 0.000000e+00 +< %2229 = bitcast i32 %157 to float +< %2230 = fadd float %2229, %2228 +< %2231 = fmul float %2216, %2230 +< %2232 = fadd float %2202, %2231 +< %2233 = call float @llvm.sqrt.f32.166(float %2232) +< %2234 = fadd float %2233, 0.000000e+00 +< %2235 = fdiv float %2174, %2234 +< %2236 = fmul float %2160, %2235 +< %2237 = fsub float 1.000000e+00, %2236 +< %2238 = load float, float* %129, align 4 +< %2239 = fmul float %2237, %2238 +< %2240 = fadd float %2084, %2239 +< %2241 = insertelement <4 x float> zeroinitializer, float %2240, i32 0 +< %2242 = insertelement <4 x float> %2241, float 0.000000e+00, i32 1 +< %2243 = insertelement <4 x float> %2242, float 0.000000e+00, i32 2 +< %2244 = insertelement <4 x float> %2243, float 0.000000e+00, i32 3 +< %2245 = extractelement <4 x float> %2244, i32 0 +< store float %2245, float* %140, align 4 +< %2246 = extractelement <4 x float> %2244, i32 1 +< %2247 = getelementptr float, float* %2, i32 0 +< %2248 = getelementptr inbounds float, float* %2247, i64 3 +< store float %2246, float* %2248, align 4 +< %2249 = bitcast i32 %970 to float +< %2250 = bitcast i32 %970 to float +< %2251 = fmul float %2249, %2250 +< %2252 = fadd float %2251, 0.000000e+00 +< %2253 = bitcast i32 %157 to float +< %2254 = bitcast i32 %157 to float +< %2255 = fmul float %2253, %2254 +< %2256 = fadd float %2252, %2255 +< %2257 = call float @llvm.sqrt.f32.167(float %2256) +< %2258 = fneg float %815 +< %2259 = fmul float %2257, %2258 +< %2260 = fmul float %2259, 0.000000e+00 +< %2261 = bitcast i32 %157 to float +< %2262 = fadd float %2261, %2260 +< %2263 = bitcast i32 %970 to float +< %2264 = bitcast i32 %970 to float +< %2265 = fmul float %2263, %2264 +< %2266 = fadd float %2265, 0.000000e+00 +< %2267 = bitcast i32 %157 to float +< %2268 = bitcast i32 %157 to float +< %2269 = fmul float %2267, %2268 +< %2270 = fadd float %2266, %2269 +< %2271 = call float @llvm.sqrt.f32.168(float %2270) +< %2272 = fneg float %815 +< %2273 = fmul float %2271, %2272 +< %2274 = bitcast i32 %970 to float +< %2275 = fadd float %2274, %2273 +< %2276 = bitcast i32 %970 to float +< %2277 = bitcast i32 %970 to float +< %2278 = fmul float %2276, %2277 +< %2279 = fadd float %2278, 0.000000e+00 +< %2280 = bitcast i32 %157 to float +< %2281 = bitcast i32 %157 to float +< %2282 = fmul float %2280, %2281 +< %2283 = fadd float %2279, %2282 +< %2284 = call float @llvm.sqrt.f32.169(float %2283) +< %2285 = fneg float %815 +< %2286 = fmul float %2284, %2285 +< %2287 = bitcast i32 %970 to float +< %2288 = fadd float %2287, %2286 +< %2289 = fmul float %2275, %2288 +< %2290 = fadd float %2289, 0.000000e+00 +< %2291 = bitcast i32 %970 to float +< %2292 = bitcast i32 %970 to float +< %2293 = fmul float %2291, %2292 +< %2294 = fadd float %2293, 0.000000e+00 +< %2295 = bitcast i32 %157 to float +< %2296 = bitcast i32 %157 to float +< %2297 = fmul float %2295, %2296 +< %2298 = fadd float %2294, %2297 +< %2299 = call float @llvm.sqrt.f32.170(float %2298) +< %2300 = fneg float %815 +< %2301 = fmul float %2299, %2300 +< %2302 = fmul float %2301, 0.000000e+00 +< %2303 = bitcast i32 %157 to float +< %2304 = fadd float %2303, %2302 +< %2305 = bitcast i32 %970 to float +< %2306 = bitcast i32 %970 to float +< %2307 = fmul float %2305, %2306 +< %2308 = fadd float %2307, 0.000000e+00 +< %2309 = bitcast i32 %157 to float +< %2310 = bitcast i32 %157 to float +< %2311 = fmul float %2309, %2310 +< %2312 = fadd float %2308, %2311 +< %2313 = call float @llvm.sqrt.f32.171(float %2312) +< %2314 = fneg float %815 +< %2315 = fmul float %2313, %2314 +< %2316 = fmul float %2315, 0.000000e+00 +< %2317 = bitcast i32 %157 to float +< %2318 = fadd float %2317, %2316 +< %2319 = fmul float %2304, %2318 +< %2320 = fadd float %2290, %2319 +< %2321 = call float @llvm.sqrt.f32.172(float %2320) +< %2322 = fadd float %2321, 0.000000e+00 +< %2323 = fdiv float %2262, %2322 +< %2324 = fmul float %2323, 2.000000e+00 +< %2325 = bitcast i32 %970 to float +< %2326 = bitcast i32 %970 to float +< %2327 = fmul float %2325, %2326 +< %2328 = fadd float %2327, 0.000000e+00 +< %2329 = bitcast i32 %157 to float +< %2330 = bitcast i32 %157 to float +< %2331 = fmul float %2329, %2330 +< %2332 = fadd float %2328, %2331 +< %2333 = call float @llvm.sqrt.f32.173(float %2332) +< %2334 = fneg float %815 +< %2335 = fmul float %2333, %2334 +< %2336 = bitcast i32 %970 to float +< %2337 = fadd float %2336, %2335 +< %2338 = bitcast i32 %970 to float +< %2339 = bitcast i32 %970 to float +< %2340 = fmul float %2338, %2339 +< %2341 = fadd float %2340, 0.000000e+00 +< %2342 = bitcast i32 %157 to float +< %2343 = bitcast i32 %157 to float +< %2344 = fmul float %2342, %2343 +< %2345 = fadd float %2341, %2344 +< %2346 = call float @llvm.sqrt.f32.174(float %2345) +< %2347 = fneg float %815 +< %2348 = fmul float %2346, %2347 +< %2349 = bitcast i32 %970 to float +< %2350 = fadd float %2349, %2348 +< %2351 = bitcast i32 %970 to float +< %2352 = bitcast i32 %970 to float +< %2353 = fmul float %2351, %2352 +< %2354 = fadd float %2353, 0.000000e+00 +< %2355 = bitcast i32 %157 to float +< %2356 = bitcast i32 %157 to float +< %2357 = fmul float %2355, %2356 +< %2358 = fadd float %2354, %2357 +< %2359 = call float @llvm.sqrt.f32.175(float %2358) +< %2360 = fneg float %815 +< %2361 = fmul float %2359, %2360 +< %2362 = bitcast i32 %970 to float +< %2363 = fadd float %2362, %2361 +< %2364 = fmul float %2350, %2363 +< %2365 = fadd float %2364, 0.000000e+00 +< %2366 = bitcast i32 %970 to float +< %2367 = bitcast i32 %970 to float +< %2368 = fmul float %2366, %2367 +< %2369 = fadd float %2368, 0.000000e+00 +< %2370 = bitcast i32 %157 to float +< %2371 = bitcast i32 %157 to float +< %2372 = fmul float %2370, %2371 +< %2373 = fadd float %2369, %2372 +< %2374 = call float @llvm.sqrt.f32.176(float %2373) +< %2375 = fneg float %815 +< %2376 = fmul float %2374, %2375 +< %2377 = fmul float %2376, 0.000000e+00 +< %2378 = bitcast i32 %157 to float +< %2379 = fadd float %2378, %2377 +< %2380 = bitcast i32 %970 to float +< %2381 = bitcast i32 %970 to float +< %2382 = fmul float %2380, %2381 +< %2383 = fadd float %2382, 0.000000e+00 +< %2384 = bitcast i32 %157 to float +< %2385 = bitcast i32 %157 to float +< %2386 = fmul float %2384, %2385 +< %2387 = fadd float %2383, %2386 +< %2388 = call float @llvm.sqrt.f32.177(float %2387) +< %2389 = fneg float %815 +< %2390 = fmul float %2388, %2389 +< %2391 = fmul float %2390, 0.000000e+00 +< %2392 = bitcast i32 %157 to float +< %2393 = fadd float %2392, %2391 +< %2394 = fmul float %2379, %2393 +< %2395 = fadd float %2365, %2394 +< %2396 = call float @llvm.sqrt.f32.178(float %2395) +< %2397 = fadd float %2396, 0.000000e+00 +< %2398 = fdiv float %2337, %2397 +< %2399 = fmul float %2324, %2398 +< %2400 = fneg float %2399 +< %2401 = insertelement <4 x float> zeroinitializer, float %2400, i32 0 +< %2402 = insertelement <4 x float> %2401, float 0.000000e+00, i32 1 +< %2403 = insertelement <4 x float> %2402, float 0.000000e+00, i32 2 +< %2404 = insertelement <4 x float> %2403, float 0.000000e+00, i32 3 +< %2405 = load float, float* %1443, align 4 +< %2406 = insertelement <4 x float> zeroinitializer, float %2405, i32 0 +< %2407 = insertelement <4 x float> %2406, float 0.000000e+00, i32 1 +< %2408 = insertelement <4 x float> %2407, float 0.000000e+00, i32 2 +< %2409 = insertelement <4 x float> %2408, float 0.000000e+00, i32 3 +< %2410 = call <4 x float> @llvm.fma.f32.179(<4 x float> %2404, <4 x float> %2409, <4 x float> zeroinitializer) +< %2411 = extractelement <4 x float> %2410, i32 0 +< store float %2411, float* %2248, align 4 +< %2412 = bitcast i32 %970 to float +< %2413 = bitcast i32 %970 to float +< %2414 = fmul float %2412, %2413 +< %2415 = fadd float %2414, 0.000000e+00 +< %2416 = bitcast i32 %157 to float +< %2417 = bitcast i32 %157 to float +< %2418 = fmul float %2416, %2417 +< %2419 = fadd float %2415, %2418 +< %2420 = call float @llvm.sqrt.f32.180(float %2419) +< %2421 = fneg float %815 +< %2422 = fmul float %2420, %2421 +< %2423 = fmul float %2422, 0.000000e+00 +< %2424 = bitcast i32 %157 to float +< %2425 = fadd float %2424, %2423 +< %2426 = bitcast i32 %970 to float +< %2427 = bitcast i32 %970 to float +< %2428 = fmul float %2426, %2427 +< %2429 = fadd float %2428, 0.000000e+00 +< %2430 = bitcast i32 %157 to float +< %2431 = bitcast i32 %157 to float +< %2432 = fmul float %2430, %2431 +< %2433 = fadd float %2429, %2432 +< %2434 = call float @llvm.sqrt.f32.181(float %2433) +< %2435 = fneg float %815 +< %2436 = fmul float %2434, %2435 +< %2437 = bitcast i32 %970 to float +< %2438 = fadd float %2437, %2436 +< %2439 = bitcast i32 %970 to float +< %2440 = bitcast i32 %970 to float +< %2441 = fmul float %2439, %2440 +< %2442 = fadd float %2441, 0.000000e+00 +< %2443 = bitcast i32 %157 to float +< %2444 = bitcast i32 %157 to float +< %2445 = fmul float %2443, %2444 +< %2446 = fadd float %2442, %2445 +< %2447 = call float @llvm.sqrt.f32.182(float %2446) +< %2448 = fneg float %815 +< %2449 = fmul float %2447, %2448 +< %2450 = bitcast i32 %970 to float +< %2451 = fadd float %2450, %2449 +< %2452 = fmul float %2438, %2451 +< %2453 = fadd float %2452, 0.000000e+00 +< %2454 = bitcast i32 %970 to float +< %2455 = bitcast i32 %970 to float +< %2456 = fmul float %2454, %2455 +< %2457 = fadd float %2456, 0.000000e+00 +< %2458 = bitcast i32 %157 to float +< %2459 = bitcast i32 %157 to float +< %2460 = fmul float %2458, %2459 +< %2461 = fadd float %2457, %2460 +< %2462 = call float @llvm.sqrt.f32.183(float %2461) +< %2463 = fneg float %815 +< %2464 = fmul float %2462, %2463 +< %2465 = fmul float %2464, 0.000000e+00 +< %2466 = bitcast i32 %157 to float +< %2467 = fadd float %2466, %2465 +< %2468 = bitcast i32 %970 to float +< %2469 = bitcast i32 %970 to float +< %2470 = fmul float %2468, %2469 +< %2471 = fadd float %2470, 0.000000e+00 +< %2472 = bitcast i32 %157 to float +< %2473 = bitcast i32 %157 to float +< %2474 = fmul float %2472, %2473 +< %2475 = fadd float %2471, %2474 +< %2476 = call float @llvm.sqrt.f32.184(float %2475) +< %2477 = fneg float %815 +< %2478 = fmul float %2476, %2477 +< %2479 = fmul float %2478, 0.000000e+00 +< %2480 = bitcast i32 %157 to float +< %2481 = fadd float %2480, %2479 +< %2482 = fmul float %2467, %2481 +< %2483 = fadd float %2453, %2482 +< %2484 = call float @llvm.sqrt.f32.185(float %2483) +< %2485 = fadd float %2484, 0.000000e+00 +< %2486 = fdiv float %2425, %2485 +< %2487 = fmul float %2486, 2.000000e+00 +< %2488 = bitcast i32 %970 to float +< %2489 = bitcast i32 %970 to float +< %2490 = fmul float %2488, %2489 +< %2491 = fadd float %2490, 0.000000e+00 +< %2492 = bitcast i32 %157 to float +< %2493 = bitcast i32 %157 to float +< %2494 = fmul float %2492, %2493 +< %2495 = fadd float %2491, %2494 +< %2496 = call float @llvm.sqrt.f32.186(float %2495) +< %2497 = fneg float %815 +< %2498 = fmul float %2496, %2497 +< %2499 = bitcast i32 %970 to float +< %2500 = fadd float %2499, %2498 +< %2501 = bitcast i32 %970 to float +< %2502 = bitcast i32 %970 to float +< %2503 = fmul float %2501, %2502 +< %2504 = fadd float %2503, 0.000000e+00 +< %2505 = bitcast i32 %157 to float +< %2506 = bitcast i32 %157 to float +< %2507 = fmul float %2505, %2506 +< %2508 = fadd float %2504, %2507 +< %2509 = call float @llvm.sqrt.f32.187(float %2508) +< %2510 = fneg float %815 +< %2511 = fmul float %2509, %2510 +< %2512 = bitcast i32 %970 to float +< %2513 = fadd float %2512, %2511 +< %2514 = bitcast i32 %970 to float +< %2515 = bitcast i32 %970 to float +< %2516 = fmul float %2514, %2515 +< %2517 = fadd float %2516, 0.000000e+00 +< %2518 = bitcast i32 %157 to float +< %2519 = bitcast i32 %157 to float +< %2520 = fmul float %2518, %2519 +< %2521 = fadd float %2517, %2520 +< %2522 = call float @llvm.sqrt.f32.188(float %2521) +< %2523 = fneg float %815 +< %2524 = fmul float %2522, %2523 +< %2525 = bitcast i32 %970 to float +< %2526 = fadd float %2525, %2524 +< %2527 = fmul float %2513, %2526 +< %2528 = fadd float %2527, 0.000000e+00 +< %2529 = bitcast i32 %970 to float +< %2530 = bitcast i32 %970 to float +< %2531 = fmul float %2529, %2530 +< %2532 = fadd float %2531, 0.000000e+00 +< %2533 = bitcast i32 %157 to float +< %2534 = bitcast i32 %157 to float +< %2535 = fmul float %2533, %2534 +< %2536 = fadd float %2532, %2535 +< %2537 = call float @llvm.sqrt.f32.189(float %2536) +< %2538 = fneg float %815 +< %2539 = fmul float %2537, %2538 +< %2540 = fmul float %2539, 0.000000e+00 +< %2541 = bitcast i32 %157 to float +< %2542 = fadd float %2541, %2540 +< %2543 = bitcast i32 %970 to float +< %2544 = bitcast i32 %970 to float +< %2545 = fmul float %2543, %2544 +< %2546 = fadd float %2545, 0.000000e+00 +< %2547 = bitcast i32 %157 to float +< %2548 = bitcast i32 %157 to float +< %2549 = fmul float %2547, %2548 +< %2550 = fadd float %2546, %2549 +< %2551 = call float @llvm.sqrt.f32.190(float %2550) +< %2552 = fneg float %815 +< %2553 = fmul float %2551, %2552 +< %2554 = fmul float %2553, 0.000000e+00 +< %2555 = bitcast i32 %157 to float +< %2556 = fadd float %2555, %2554 +< %2557 = fmul float %2542, %2556 +< %2558 = fadd float %2528, %2557 +< %2559 = call float @llvm.sqrt.f32.191(float %2558) +< %2560 = fadd float %2559, 0.000000e+00 +< %2561 = fdiv float %2500, %2560 +< %2562 = fmul float %2487, %2561 +< %2563 = fneg float %2562 +< %2564 = fmul float %2563, %2405 +< %2565 = fadd float %2564, 0.000000e+00 +< %2566 = bitcast i32 %970 to float +< %2567 = bitcast i32 %970 to float +< %2568 = fmul float %2566, %2567 +< %2569 = fadd float %2568, 0.000000e+00 +< %2570 = bitcast i32 %157 to float +< %2571 = bitcast i32 %157 to float +< %2572 = fmul float %2570, %2571 +< %2573 = fadd float %2569, %2572 +< %2574 = call float @llvm.sqrt.f32.192(float %2573) +< %2575 = fneg float %815 +< %2576 = fmul float %2574, %2575 +< %2577 = fmul float %2576, 0.000000e+00 +< %2578 = bitcast i32 %157 to float +< %2579 = fadd float %2578, %2577 +< %2580 = bitcast i32 %970 to float +< %2581 = bitcast i32 %970 to float +< %2582 = fmul float %2580, %2581 +< %2583 = fadd float %2582, 0.000000e+00 +< %2584 = bitcast i32 %157 to float +< %2585 = bitcast i32 %157 to float +< %2586 = fmul float %2584, %2585 +< %2587 = fadd float %2583, %2586 +< %2588 = call float @llvm.sqrt.f32.193(float %2587) +< %2589 = fneg float %815 +< %2590 = fmul float %2588, %2589 +< %2591 = bitcast i32 %970 to float +< %2592 = fadd float %2591, %2590 +< %2593 = bitcast i32 %970 to float +< %2594 = bitcast i32 %970 to float +< %2595 = fmul float %2593, %2594 +< %2596 = fadd float %2595, 0.000000e+00 +< %2597 = bitcast i32 %157 to float +< %2598 = bitcast i32 %157 to float +< %2599 = fmul float %2597, %2598 +< %2600 = fadd float %2596, %2599 +< %2601 = call float @llvm.sqrt.f32.194(float %2600) +< %2602 = fneg float %815 +< %2603 = fmul float %2601, %2602 +< %2604 = bitcast i32 %970 to float +< %2605 = fadd float %2604, %2603 +< %2606 = fmul float %2592, %2605 +< %2607 = fadd float %2606, 0.000000e+00 +< %2608 = bitcast i32 %970 to float +< %2609 = bitcast i32 %970 to float +< %2610 = fmul float %2608, %2609 +< %2611 = fadd float %2610, 0.000000e+00 +< %2612 = bitcast i32 %157 to float +< %2613 = bitcast i32 %157 to float +< %2614 = fmul float %2612, %2613 +< %2615 = fadd float %2611, %2614 +< %2616 = call float @llvm.sqrt.f32.195(float %2615) +< %2617 = fneg float %815 +< %2618 = fmul float %2616, %2617 +< %2619 = fmul float %2618, 0.000000e+00 +< %2620 = bitcast i32 %157 to float +< %2621 = fadd float %2620, %2619 +< %2622 = bitcast i32 %970 to float +< %2623 = bitcast i32 %970 to float +< %2624 = fmul float %2622, %2623 +< %2625 = fadd float %2624, 0.000000e+00 +< %2626 = bitcast i32 %157 to float +< %2627 = bitcast i32 %157 to float +< %2628 = fmul float %2626, %2627 +< %2629 = fadd float %2625, %2628 +< %2630 = call float @llvm.sqrt.f32.196(float %2629) +< %2631 = fneg float %815 +< %2632 = fmul float %2630, %2631 +< %2633 = fmul float %2632, 0.000000e+00 +< %2634 = bitcast i32 %157 to float +< %2635 = fadd float %2634, %2633 +< %2636 = fmul float %2621, %2635 +< %2637 = fadd float %2607, %2636 +< %2638 = call float @llvm.sqrt.f32.197(float %2637) +< %2639 = fadd float %2638, 0.000000e+00 +< %2640 = fdiv float %2579, %2639 +< %2641 = fmul float %2640, 2.000000e+00 +< %2642 = bitcast i32 %970 to float +< %2643 = bitcast i32 %970 to float +< %2644 = fmul float %2642, %2643 +< %2645 = fadd float %2644, 0.000000e+00 +< %2646 = bitcast i32 %157 to float +< %2647 = bitcast i32 %157 to float +< %2648 = fmul float %2646, %2647 +< %2649 = fadd float %2645, %2648 +< %2650 = call float @llvm.sqrt.f32.198(float %2649) +< %2651 = fneg float %815 +< %2652 = fmul float %2650, %2651 +< %2653 = fmul float %2652, 0.000000e+00 +< %2654 = bitcast i32 %157 to float +< %2655 = fadd float %2654, %2653 +< %2656 = bitcast i32 %970 to float +< %2657 = bitcast i32 %970 to float +< %2658 = fmul float %2656, %2657 +< %2659 = fadd float %2658, 0.000000e+00 +< %2660 = bitcast i32 %157 to float +< %2661 = bitcast i32 %157 to float +< %2662 = fmul float %2660, %2661 +< %2663 = fadd float %2659, %2662 +< %2664 = call float @llvm.sqrt.f32.199(float %2663) +< %2665 = fneg float %815 +< %2666 = fmul float %2664, %2665 +< %2667 = bitcast i32 %970 to float +< %2668 = fadd float %2667, %2666 +< %2669 = bitcast i32 %970 to float +< %2670 = bitcast i32 %970 to float +< %2671 = fmul float %2669, %2670 +< %2672 = fadd float %2671, 0.000000e+00 +< %2673 = bitcast i32 %157 to float +< %2674 = bitcast i32 %157 to float +< %2675 = fmul float %2673, %2674 +< %2676 = fadd float %2672, %2675 +< %2677 = call float @llvm.sqrt.f32.200(float %2676) +< %2678 = fneg float %815 +< %2679 = fmul float %2677, %2678 +< %2680 = bitcast i32 %970 to float +< %2681 = fadd float %2680, %2679 +< %2682 = fmul float %2668, %2681 +< %2683 = fadd float %2682, 0.000000e+00 +< %2684 = bitcast i32 %970 to float +< %2685 = bitcast i32 %970 to float +< %2686 = fmul float %2684, %2685 +< %2687 = fadd float %2686, 0.000000e+00 +< %2688 = bitcast i32 %157 to float +< %2689 = bitcast i32 %157 to float +< %2690 = fmul float %2688, %2689 +< %2691 = fadd float %2687, %2690 +< %2692 = call float @llvm.sqrt.f32.201(float %2691) +< %2693 = fneg float %815 +< %2694 = fmul float %2692, %2693 +< %2695 = fmul float %2694, 0.000000e+00 +< %2696 = bitcast i32 %157 to float +< %2697 = fadd float %2696, %2695 +< %2698 = bitcast i32 %970 to float +< %2699 = bitcast i32 %970 to float +< %2700 = fmul float %2698, %2699 +< %2701 = fadd float %2700, 0.000000e+00 +< %2702 = bitcast i32 %157 to float +< %2703 = bitcast i32 %157 to float +< %2704 = fmul float %2702, %2703 +< %2705 = fadd float %2701, %2704 +< %2706 = call float @llvm.sqrt.f32.202(float %2705) +< %2707 = fneg float %815 +< %2708 = fmul float %2706, %2707 +< %2709 = fmul float %2708, 0.000000e+00 +< %2710 = bitcast i32 %157 to float +< %2711 = fadd float %2710, %2709 +< %2712 = fmul float %2697, %2711 +< %2713 = fadd float %2683, %2712 +< %2714 = call float @llvm.sqrt.f32.203(float %2713) +< %2715 = fadd float %2714, 0.000000e+00 +< %2716 = fdiv float %2655, %2715 +< %2717 = fmul float %2641, %2716 +< %2718 = fsub float 1.000000e+00, %2717 +< %2719 = load float, float* %1757, align 4 +< %2720 = fmul float %2718, %2719 +< %2721 = fadd float %2565, %2720 +< %2722 = insertelement <4 x float> zeroinitializer, float %2721, i32 0 +< %2723 = insertelement <4 x float> %2722, float 0.000000e+00, i32 1 +< %2724 = insertelement <4 x float> %2723, float 0.000000e+00, i32 2 +< %2725 = insertelement <4 x float> %2724, float 0.000000e+00, i32 3 +< %2726 = extractelement <4 x float> %2725, i32 0 +< store float %2726, float* %2248, align 4 +< %2727 = getelementptr float, float* %1, i32 0 +< %2728 = getelementptr inbounds float, float* %2727, i64 2 +< %2729 = bitcast float* %2728 to i32* +< %2730 = load i32, i32* %2729, align 4 +< %2731 = bitcast i32 %2730 to float +< %2732 = insertelement <4 x float> zeroinitializer, float %2731, i32 0 +< %2733 = getelementptr float, float* %1, i32 0 +< %2734 = getelementptr inbounds float, float* %2733, i64 1 +< %2735 = bitcast float* %2734 to i32* +< %2736 = load i32, i32* %2735, align 4 +< %2737 = bitcast i32 %2736 to float +< %2738 = insertelement <4 x float> %2732, float %2737, i32 1 +< %2739 = insertelement <4 x float> %2738, float 0.000000e+00, i32 2 +< %2740 = insertelement <4 x float> %2739, float 0.000000e+00, i32 3 +< %2741 = extractelement <4 x float> %2740, i32 0 +< %2742 = bitcast i32* %95 to float* +< %2743 = bitcast i32* %2735 to float* +< store float %2741, float* %2743, align 4 +< %2744 = extractelement <4 x float> %2740, i32 1 +< %2745 = bitcast i32* %98 to float* +< %2746 = bitcast i32* %2729 to float* +< store float %2744, float* %2746, align 4 +--- +> %803 = bitcast i32 %152 to float +> %804 = bitcast i32 %152 to float +> %805 = fmul float %803, %804 +> %806 = fadd float %805, 0.000000e+00 +> %807 = load i32, i32* %130, align 4 +> %808 = bitcast i32 %807 to float +> %809 = bitcast i32 %807 to float +> %810 = fmul float %808, %809 +> %811 = fadd float %806, %810 +> %812 = call float @llvm.sqrt.f32.56(float %811) +> %813 = sitofp i32 %167 to float +> %814 = fneg float %813 +> %815 = fmul float %812, %814 +> %816 = bitcast i32 %152 to float +> %817 = fadd float %816, %815 +> %818 = bitcast i32 %152 to float +> %819 = bitcast i32 %152 to float +> %820 = fmul float %818, %819 +> %821 = fadd float %820, 0.000000e+00 +> %822 = bitcast i32 %807 to float +> %823 = bitcast i32 %807 to float +> %824 = fmul float %822, %823 +> %825 = fadd float %821, %824 +> %826 = call float @llvm.sqrt.f32.57(float %825) +> %827 = fneg float %813 +> %828 = fmul float %826, %827 +> %829 = bitcast i32 %152 to float +> %830 = fadd float %829, %828 +> %831 = bitcast i32 %152 to float +> %832 = bitcast i32 %152 to float +> %833 = fmul float %831, %832 +> %834 = fadd float %833, 0.000000e+00 +> %835 = bitcast i32 %807 to float +> %836 = bitcast i32 %807 to float +> %837 = fmul float %835, %836 +> %838 = fadd float %834, %837 +> %839 = call float @llvm.sqrt.f32.58(float %838) +> %840 = fneg float %813 +> %841 = fmul float %839, %840 +> %842 = bitcast i32 %152 to float +> %843 = fadd float %842, %841 +> %844 = fmul float %830, %843 +> %845 = fadd float %844, 0.000000e+00 +> %846 = bitcast i32 %152 to float +> %847 = bitcast i32 %152 to float +> %848 = fmul float %846, %847 +> %849 = fadd float %848, 0.000000e+00 +> %850 = bitcast i32 %807 to float +> %851 = bitcast i32 %807 to float +> %852 = fmul float %850, %851 +> %853 = fadd float %849, %852 +> %854 = call float @llvm.sqrt.f32.59(float %853) +> %855 = fneg float %813 +> %856 = fmul float %854, %855 +> %857 = fmul float %856, 0.000000e+00 +> %858 = bitcast i32 %807 to float +> %859 = fadd float %858, %857 +> %860 = bitcast i32 %152 to float +> %861 = bitcast i32 %152 to float +> %862 = fmul float %860, %861 +> %863 = fadd float %862, 0.000000e+00 +> %864 = bitcast i32 %807 to float +> %865 = bitcast i32 %807 to float +> %866 = fmul float %864, %865 +> %867 = fadd float %863, %866 +> %868 = call float @llvm.sqrt.f32.60(float %867) +> %869 = fneg float %813 +> %870 = fmul float %868, %869 +> %871 = fmul float %870, 0.000000e+00 +> %872 = bitcast i32 %807 to float +> %873 = fadd float %872, %871 +> %874 = fmul float %859, %873 +> %875 = fadd float %845, %874 +> %876 = call float @llvm.sqrt.f32.61(float %875) +> %877 = fadd float %876, 0.000000e+00 +> %878 = fdiv float %817, %877 +> %879 = fmul float %878, 2.000000e+00 +> %880 = bitcast i32 %152 to float +> %881 = bitcast i32 %152 to float +> %882 = fmul float %880, %881 +> %883 = fadd float %882, 0.000000e+00 +> %884 = bitcast i32 %807 to float +> %885 = bitcast i32 %807 to float +> %886 = fmul float %884, %885 +> %887 = fadd float %883, %886 +> %888 = call float @llvm.sqrt.f32.62(float %887) +> %889 = fneg float %813 +> %890 = fmul float %888, %889 +> %891 = bitcast i32 %152 to float +> %892 = fadd float %891, %890 +> %893 = bitcast i32 %152 to float +> %894 = bitcast i32 %152 to float +> %895 = fmul float %893, %894 +> %896 = fadd float %895, 0.000000e+00 +> %897 = bitcast i32 %807 to float +> %898 = bitcast i32 %807 to float +> %899 = fmul float %897, %898 +> %900 = fadd float %896, %899 +> %901 = call float @llvm.sqrt.f32.63(float %900) +> %902 = fneg float %813 +> %903 = fmul float %901, %902 +> %904 = bitcast i32 %152 to float +> %905 = fadd float %904, %903 +> %906 = bitcast i32 %152 to float +> %907 = bitcast i32 %152 to float +> %908 = fmul float %906, %907 +> %909 = fadd float %908, 0.000000e+00 +> %910 = bitcast i32 %807 to float +> %911 = bitcast i32 %807 to float +> %912 = fmul float %910, %911 +> %913 = fadd float %909, %912 +> %914 = call float @llvm.sqrt.f32.64(float %913) +> %915 = fneg float %813 +> %916 = fmul float %914, %915 +> %917 = bitcast i32 %152 to float +> %918 = fadd float %917, %916 +> %919 = fmul float %905, %918 +> %920 = fadd float %919, 0.000000e+00 +> %921 = bitcast i32 %152 to float +> %922 = bitcast i32 %152 to float +> %923 = fmul float %921, %922 +> %924 = fadd float %923, 0.000000e+00 +> %925 = bitcast i32 %807 to float +> %926 = bitcast i32 %807 to float +> %927 = fmul float %925, %926 +> %928 = fadd float %924, %927 +> %929 = call float @llvm.sqrt.f32.65(float %928) +> %930 = fneg float %813 +> %931 = fmul float %929, %930 +> %932 = fmul float %931, 0.000000e+00 +> %933 = bitcast i32 %807 to float +> %934 = fadd float %933, %932 +> %935 = bitcast i32 %152 to float +> %936 = bitcast i32 %152 to float +> %937 = fmul float %935, %936 +> %938 = fadd float %937, 0.000000e+00 +> %939 = bitcast i32 %807 to float +> %940 = bitcast i32 %807 to float +> %941 = fmul float %939, %940 +> %942 = fadd float %938, %941 +> %943 = call float @llvm.sqrt.f32.66(float %942) +> %944 = fneg float %813 +> %945 = fmul float %943, %944 +> %946 = fmul float %945, 0.000000e+00 +> %947 = bitcast i32 %807 to float +> %948 = fadd float %947, %946 +> %949 = fmul float %934, %948 +> %950 = fadd float %920, %949 +> %951 = call float @llvm.sqrt.f32.67(float %950) +> %952 = fadd float %951, 0.000000e+00 +> %953 = fdiv float %892, %952 +> %954 = fmul float %879, %953 +> %955 = fsub float 1.000000e+00, %954 +> %956 = insertelement <4 x float> zeroinitializer, float %955, i32 0 +> %957 = insertelement <4 x float> %956, float 0.000000e+00, i32 1 +> %958 = insertelement <4 x float> %957, float 0.000000e+00, i32 2 +> %959 = insertelement <4 x float> %958, float 0.000000e+00, i32 3 +> %960 = getelementptr float, float* %0, i32 0 +> %961 = load float, float* %960, align 4 +> %962 = insertelement <4 x float> zeroinitializer, float %961, i32 0 +> %963 = insertelement <4 x float> %962, float 0.000000e+00, i32 1 +> %964 = insertelement <4 x float> %963, float 0.000000e+00, i32 2 +> %965 = insertelement <4 x float> %964, float 0.000000e+00, i32 3 +> %966 = call <4 x float> @llvm.fma.f32.68(<4 x float> %959, <4 x float> %965, <4 x float> zeroinitializer) +> %967 = extractelement <4 x float> %966, i32 0 +> store float %967, float* %2, align 4 +> %968 = bitcast i32 %152 to float +> %969 = bitcast i32 %152 to float +> %970 = fmul float %968, %969 +> %971 = fadd float %970, 0.000000e+00 +> %972 = bitcast i32 %807 to float +> %973 = bitcast i32 %807 to float +> %974 = fmul float %972, %973 +> %975 = fadd float %971, %974 +> %976 = call float @llvm.sqrt.f32.69(float %975) +> %977 = fneg float %813 +> %978 = fmul float %976, %977 +> %979 = bitcast i32 %152 to float +> %980 = fadd float %979, %978 +> %981 = bitcast i32 %152 to float +> %982 = bitcast i32 %152 to float +> %983 = fmul float %981, %982 +> %984 = fadd float %983, 0.000000e+00 +> %985 = bitcast i32 %807 to float +> %986 = bitcast i32 %807 to float +> %987 = fmul float %985, %986 +> %988 = fadd float %984, %987 +> %989 = call float @llvm.sqrt.f32.70(float %988) +> %990 = fneg float %813 +> %991 = fmul float %989, %990 +> %992 = bitcast i32 %152 to float +> %993 = fadd float %992, %991 +> %994 = bitcast i32 %152 to float +> %995 = bitcast i32 %152 to float +> %996 = fmul float %994, %995 +> %997 = fadd float %996, 0.000000e+00 +> %998 = bitcast i32 %807 to float +> %999 = bitcast i32 %807 to float +> %1000 = fmul float %998, %999 +> %1001 = fadd float %997, %1000 +> %1002 = call float @llvm.sqrt.f32.71(float %1001) +> %1003 = fneg float %813 +> %1004 = fmul float %1002, %1003 +> %1005 = bitcast i32 %152 to float +> %1006 = fadd float %1005, %1004 +> %1007 = fmul float %993, %1006 +> %1008 = fadd float %1007, 0.000000e+00 +> %1009 = bitcast i32 %152 to float +> %1010 = bitcast i32 %152 to float +> %1011 = fmul float %1009, %1010 +> %1012 = fadd float %1011, 0.000000e+00 +> %1013 = bitcast i32 %807 to float +> %1014 = bitcast i32 %807 to float +> %1015 = fmul float %1013, %1014 +> %1016 = fadd float %1012, %1015 +> %1017 = call float @llvm.sqrt.f32.72(float %1016) +> %1018 = fneg float %813 +> %1019 = fmul float %1017, %1018 +> %1020 = fmul float %1019, 0.000000e+00 +> %1021 = bitcast i32 %807 to float +> %1022 = fadd float %1021, %1020 +> %1023 = bitcast i32 %152 to float +> %1024 = bitcast i32 %152 to float +> %1025 = fmul float %1023, %1024 +> %1026 = fadd float %1025, 0.000000e+00 +> %1027 = bitcast i32 %807 to float +> %1028 = bitcast i32 %807 to float +> %1029 = fmul float %1027, %1028 +> %1030 = fadd float %1026, %1029 +> %1031 = call float @llvm.sqrt.f32.73(float %1030) +> %1032 = fneg float %813 +> %1033 = fmul float %1031, %1032 +> %1034 = fmul float %1033, 0.000000e+00 +> %1035 = bitcast i32 %807 to float +> %1036 = fadd float %1035, %1034 +> %1037 = fmul float %1022, %1036 +> %1038 = fadd float %1008, %1037 +> %1039 = call float @llvm.sqrt.f32.74(float %1038) +> %1040 = fadd float %1039, 0.000000e+00 +> %1041 = fdiv float %980, %1040 +> %1042 = fmul float %1041, 2.000000e+00 +> %1043 = bitcast i32 %152 to float +> %1044 = bitcast i32 %152 to float +> %1045 = fmul float %1043, %1044 +> %1046 = fadd float %1045, 0.000000e+00 +> %1047 = bitcast i32 %807 to float +> %1048 = bitcast i32 %807 to float +> %1049 = fmul float %1047, %1048 +> %1050 = fadd float %1046, %1049 +> %1051 = call float @llvm.sqrt.f32.75(float %1050) +> %1052 = fneg float %813 +> %1053 = fmul float %1051, %1052 +> %1054 = bitcast i32 %152 to float +> %1055 = fadd float %1054, %1053 +> %1056 = bitcast i32 %152 to float +> %1057 = bitcast i32 %152 to float +> %1058 = fmul float %1056, %1057 +> %1059 = fadd float %1058, 0.000000e+00 +> %1060 = bitcast i32 %807 to float +> %1061 = bitcast i32 %807 to float +> %1062 = fmul float %1060, %1061 +> %1063 = fadd float %1059, %1062 +> %1064 = call float @llvm.sqrt.f32.76(float %1063) +> %1065 = fneg float %813 +> %1066 = fmul float %1064, %1065 +> %1067 = bitcast i32 %152 to float +> %1068 = fadd float %1067, %1066 +> %1069 = bitcast i32 %152 to float +> %1070 = bitcast i32 %152 to float +> %1071 = fmul float %1069, %1070 +> %1072 = fadd float %1071, 0.000000e+00 +> %1073 = bitcast i32 %807 to float +> %1074 = bitcast i32 %807 to float +> %1075 = fmul float %1073, %1074 +> %1076 = fadd float %1072, %1075 +> %1077 = call float @llvm.sqrt.f32.77(float %1076) +> %1078 = fneg float %813 +> %1079 = fmul float %1077, %1078 +> %1080 = bitcast i32 %152 to float +> %1081 = fadd float %1080, %1079 +> %1082 = fmul float %1068, %1081 +> %1083 = fadd float %1082, 0.000000e+00 +> %1084 = bitcast i32 %152 to float +> %1085 = bitcast i32 %152 to float +> %1086 = fmul float %1084, %1085 +> %1087 = fadd float %1086, 0.000000e+00 +> %1088 = bitcast i32 %807 to float +> %1089 = bitcast i32 %807 to float +> %1090 = fmul float %1088, %1089 +> %1091 = fadd float %1087, %1090 +> %1092 = call float @llvm.sqrt.f32.78(float %1091) +> %1093 = fneg float %813 +> %1094 = fmul float %1092, %1093 +> %1095 = fmul float %1094, 0.000000e+00 +> %1096 = bitcast i32 %807 to float +> %1097 = fadd float %1096, %1095 +> %1098 = bitcast i32 %152 to float +> %1099 = bitcast i32 %152 to float +> %1100 = fmul float %1098, %1099 +> %1101 = fadd float %1100, 0.000000e+00 +> %1102 = bitcast i32 %807 to float +> %1103 = bitcast i32 %807 to float +> %1104 = fmul float %1102, %1103 +> %1105 = fadd float %1101, %1104 +> %1106 = call float @llvm.sqrt.f32.79(float %1105) +> %1107 = fneg float %813 +> %1108 = fmul float %1106, %1107 +> %1109 = fmul float %1108, 0.000000e+00 +> %1110 = bitcast i32 %807 to float +> %1111 = fadd float %1110, %1109 +> %1112 = fmul float %1097, %1111 +> %1113 = fadd float %1083, %1112 +> %1114 = call float @llvm.sqrt.f32.80(float %1113) +> %1115 = fadd float %1114, 0.000000e+00 +> %1116 = fdiv float %1055, %1115 +> %1117 = fmul float %1042, %1116 +> %1118 = fsub float 1.000000e+00, %1117 +> %1119 = fmul float %1118, %961 +> %1120 = fadd float %1119, 0.000000e+00 +> %1121 = bitcast i32 %152 to float +> %1122 = bitcast i32 %152 to float +> %1123 = fmul float %1121, %1122 +> %1124 = fadd float %1123, 0.000000e+00 +> %1125 = bitcast i32 %807 to float +> %1126 = bitcast i32 %807 to float +> %1127 = fmul float %1125, %1126 +> %1128 = fadd float %1124, %1127 +> %1129 = call float @llvm.sqrt.f32.81(float %1128) +> %1130 = fneg float %813 +> %1131 = fmul float %1129, %1130 +> %1132 = bitcast i32 %152 to float +> %1133 = fadd float %1132, %1131 +> %1134 = bitcast i32 %152 to float +> %1135 = bitcast i32 %152 to float +> %1136 = fmul float %1134, %1135 +> %1137 = fadd float %1136, 0.000000e+00 +> %1138 = bitcast i32 %807 to float +> %1139 = bitcast i32 %807 to float +> %1140 = fmul float %1138, %1139 +> %1141 = fadd float %1137, %1140 +> %1142 = call float @llvm.sqrt.f32.82(float %1141) +> %1143 = fneg float %813 +> %1144 = fmul float %1142, %1143 +> %1145 = bitcast i32 %152 to float +> %1146 = fadd float %1145, %1144 +> %1147 = bitcast i32 %152 to float +> %1148 = bitcast i32 %152 to float +> %1149 = fmul float %1147, %1148 +> %1150 = fadd float %1149, 0.000000e+00 +> %1151 = bitcast i32 %807 to float +> %1152 = bitcast i32 %807 to float +> %1153 = fmul float %1151, %1152 +> %1154 = fadd float %1150, %1153 +> %1155 = call float @llvm.sqrt.f32.83(float %1154) +> %1156 = fneg float %813 +> %1157 = fmul float %1155, %1156 +> %1158 = bitcast i32 %152 to float +> %1159 = fadd float %1158, %1157 +> %1160 = fmul float %1146, %1159 +> %1161 = fadd float %1160, 0.000000e+00 +> %1162 = bitcast i32 %152 to float +> %1163 = bitcast i32 %152 to float +> %1164 = fmul float %1162, %1163 +> %1165 = fadd float %1164, 0.000000e+00 +> %1166 = bitcast i32 %807 to float +> %1167 = bitcast i32 %807 to float +> %1168 = fmul float %1166, %1167 +> %1169 = fadd float %1165, %1168 +> %1170 = call float @llvm.sqrt.f32.84(float %1169) +> %1171 = fneg float %813 +> %1172 = fmul float %1170, %1171 +> %1173 = fmul float %1172, 0.000000e+00 +> %1174 = bitcast i32 %807 to float +> %1175 = fadd float %1174, %1173 +> %1176 = bitcast i32 %152 to float +> %1177 = bitcast i32 %152 to float +> %1178 = fmul float %1176, %1177 +> %1179 = fadd float %1178, 0.000000e+00 +> %1180 = bitcast i32 %807 to float +> %1181 = bitcast i32 %807 to float +> %1182 = fmul float %1180, %1181 +> %1183 = fadd float %1179, %1182 +> %1184 = call float @llvm.sqrt.f32.85(float %1183) +> %1185 = fneg float %813 +> %1186 = fmul float %1184, %1185 +> %1187 = fmul float %1186, 0.000000e+00 +> %1188 = bitcast i32 %807 to float +> %1189 = fadd float %1188, %1187 +> %1190 = fmul float %1175, %1189 +> %1191 = fadd float %1161, %1190 +> %1192 = call float @llvm.sqrt.f32.86(float %1191) +> %1193 = fadd float %1192, 0.000000e+00 +> %1194 = fdiv float %1133, %1193 +> %1195 = fmul float %1194, 2.000000e+00 +> %1196 = bitcast i32 %152 to float +> %1197 = bitcast i32 %152 to float +> %1198 = fmul float %1196, %1197 +> %1199 = fadd float %1198, 0.000000e+00 +> %1200 = bitcast i32 %807 to float +> %1201 = bitcast i32 %807 to float +> %1202 = fmul float %1200, %1201 +> %1203 = fadd float %1199, %1202 +> %1204 = call float @llvm.sqrt.f32.87(float %1203) +> %1205 = fneg float %813 +> %1206 = fmul float %1204, %1205 +> %1207 = fmul float %1206, 0.000000e+00 +> %1208 = bitcast i32 %807 to float +> %1209 = fadd float %1208, %1207 +> %1210 = bitcast i32 %152 to float +> %1211 = bitcast i32 %152 to float +> %1212 = fmul float %1210, %1211 +> %1213 = fadd float %1212, 0.000000e+00 +> %1214 = bitcast i32 %807 to float +> %1215 = bitcast i32 %807 to float +> %1216 = fmul float %1214, %1215 +> %1217 = fadd float %1213, %1216 +> %1218 = call float @llvm.sqrt.f32.88(float %1217) +> %1219 = fneg float %813 +> %1220 = fmul float %1218, %1219 +> %1221 = bitcast i32 %152 to float +> %1222 = fadd float %1221, %1220 +> %1223 = bitcast i32 %152 to float +> %1224 = bitcast i32 %152 to float +> %1225 = fmul float %1223, %1224 +> %1226 = fadd float %1225, 0.000000e+00 +> %1227 = bitcast i32 %807 to float +> %1228 = bitcast i32 %807 to float +> %1229 = fmul float %1227, %1228 +> %1230 = fadd float %1226, %1229 +> %1231 = call float @llvm.sqrt.f32.89(float %1230) +> %1232 = fneg float %813 +> %1233 = fmul float %1231, %1232 +> %1234 = bitcast i32 %152 to float +> %1235 = fadd float %1234, %1233 +> %1236 = fmul float %1222, %1235 +> %1237 = fadd float %1236, 0.000000e+00 +> %1238 = bitcast i32 %152 to float +> %1239 = bitcast i32 %152 to float +> %1240 = fmul float %1238, %1239 +> %1241 = fadd float %1240, 0.000000e+00 +> %1242 = bitcast i32 %807 to float +> %1243 = bitcast i32 %807 to float +> %1244 = fmul float %1242, %1243 +> %1245 = fadd float %1241, %1244 +> %1246 = call float @llvm.sqrt.f32.90(float %1245) +> %1247 = fneg float %813 +> %1248 = fmul float %1246, %1247 +> %1249 = fmul float %1248, 0.000000e+00 +> %1250 = bitcast i32 %807 to float +> %1251 = fadd float %1250, %1249 +> %1252 = bitcast i32 %152 to float +> %1253 = bitcast i32 %152 to float +> %1254 = fmul float %1252, %1253 +> %1255 = fadd float %1254, 0.000000e+00 +> %1256 = bitcast i32 %807 to float +> %1257 = bitcast i32 %807 to float +> %1258 = fmul float %1256, %1257 +> %1259 = fadd float %1255, %1258 +> %1260 = call float @llvm.sqrt.f32.91(float %1259) +> %1261 = fneg float %813 +> %1262 = fmul float %1260, %1261 +> %1263 = fmul float %1262, 0.000000e+00 +> %1264 = bitcast i32 %807 to float +> %1265 = fadd float %1264, %1263 +> %1266 = fmul float %1251, %1265 +> %1267 = fadd float %1237, %1266 +> %1268 = call float @llvm.sqrt.f32.92(float %1267) +> %1269 = fadd float %1268, 0.000000e+00 +> %1270 = fdiv float %1209, %1269 +> %1271 = fmul float %1195, %1270 +> %1272 = fneg float %1271 +> %1273 = getelementptr float, float* %0, i32 0 +> %1274 = getelementptr inbounds float, float* %1273, i64 2 +> %1275 = load float, float* %1274, align 4 +> %1276 = fmul float %1272, %1275 +> %1277 = fadd float %1120, %1276 +> %1278 = insertelement <4 x float> zeroinitializer, float %1277, i32 0 +> %1279 = insertelement <4 x float> %1278, float 0.000000e+00, i32 1 +> %1280 = insertelement <4 x float> %1279, float 0.000000e+00, i32 2 +> %1281 = insertelement <4 x float> %1280, float 0.000000e+00, i32 3 +> %1282 = extractelement <4 x float> %1281, i32 0 +> store float %1282, float* %2, align 4 +> %1283 = extractelement <4 x float> %1281, i32 1 +> %1284 = getelementptr float, float* %2, i32 0 +> %1285 = getelementptr inbounds float, float* %1284, i64 1 +> store float %1283, float* %1285, align 4 +> %1286 = bitcast i32 %152 to float +> %1287 = bitcast i32 %152 to float +> %1288 = fmul float %1286, %1287 +> %1289 = fadd float %1288, 0.000000e+00 +> %1290 = bitcast i32 %807 to float +> %1291 = bitcast i32 %807 to float +> %1292 = fmul float %1290, %1291 +> %1293 = fadd float %1289, %1292 +> %1294 = call float @llvm.sqrt.f32.93(float %1293) +> %1295 = fneg float %813 +> %1296 = fmul float %1294, %1295 +> %1297 = bitcast i32 %152 to float +> %1298 = fadd float %1297, %1296 +> %1299 = bitcast i32 %152 to float +> %1300 = bitcast i32 %152 to float +> %1301 = fmul float %1299, %1300 +> %1302 = fadd float %1301, 0.000000e+00 +> %1303 = bitcast i32 %807 to float +> %1304 = bitcast i32 %807 to float +> %1305 = fmul float %1303, %1304 +> %1306 = fadd float %1302, %1305 +> %1307 = call float @llvm.sqrt.f32.94(float %1306) +> %1308 = fneg float %813 +> %1309 = fmul float %1307, %1308 +> %1310 = bitcast i32 %152 to float +> %1311 = fadd float %1310, %1309 +> %1312 = bitcast i32 %152 to float +> %1313 = bitcast i32 %152 to float +> %1314 = fmul float %1312, %1313 +> %1315 = fadd float %1314, 0.000000e+00 +> %1316 = bitcast i32 %807 to float +> %1317 = bitcast i32 %807 to float +> %1318 = fmul float %1316, %1317 +> %1319 = fadd float %1315, %1318 +> %1320 = call float @llvm.sqrt.f32.95(float %1319) +> %1321 = fneg float %813 +> %1322 = fmul float %1320, %1321 +> %1323 = bitcast i32 %152 to float +> %1324 = fadd float %1323, %1322 +> %1325 = fmul float %1311, %1324 +> %1326 = fadd float %1325, 0.000000e+00 +> %1327 = bitcast i32 %152 to float +> %1328 = bitcast i32 %152 to float +> %1329 = fmul float %1327, %1328 +> %1330 = fadd float %1329, 0.000000e+00 +> %1331 = bitcast i32 %807 to float +> %1332 = bitcast i32 %807 to float +> %1333 = fmul float %1331, %1332 +> %1334 = fadd float %1330, %1333 +> %1335 = call float @llvm.sqrt.f32.96(float %1334) +> %1336 = fneg float %813 +> %1337 = fmul float %1335, %1336 +> %1338 = fmul float %1337, 0.000000e+00 +> %1339 = bitcast i32 %807 to float +> %1340 = fadd float %1339, %1338 +> %1341 = bitcast i32 %152 to float +> %1342 = bitcast i32 %152 to float +> %1343 = fmul float %1341, %1342 +> %1344 = fadd float %1343, 0.000000e+00 +> %1345 = bitcast i32 %807 to float +> %1346 = bitcast i32 %807 to float +> %1347 = fmul float %1345, %1346 +> %1348 = fadd float %1344, %1347 +> %1349 = call float @llvm.sqrt.f32.97(float %1348) +> %1350 = fneg float %813 +> %1351 = fmul float %1349, %1350 +> %1352 = fmul float %1351, 0.000000e+00 +> %1353 = bitcast i32 %807 to float +> %1354 = fadd float %1353, %1352 +> %1355 = fmul float %1340, %1354 +> %1356 = fadd float %1326, %1355 +> %1357 = call float @llvm.sqrt.f32.98(float %1356) +> %1358 = fadd float %1357, 0.000000e+00 +> %1359 = fdiv float %1298, %1358 +> %1360 = fmul float %1359, 2.000000e+00 +> %1361 = bitcast i32 %152 to float +> %1362 = bitcast i32 %152 to float +> %1363 = fmul float %1361, %1362 +> %1364 = fadd float %1363, 0.000000e+00 +> %1365 = bitcast i32 %807 to float +> %1366 = bitcast i32 %807 to float +> %1367 = fmul float %1365, %1366 +> %1368 = fadd float %1364, %1367 +> %1369 = call float @llvm.sqrt.f32.99(float %1368) +> %1370 = fneg float %813 +> %1371 = fmul float %1369, %1370 +> %1372 = bitcast i32 %152 to float +> %1373 = fadd float %1372, %1371 +> %1374 = bitcast i32 %152 to float +> %1375 = bitcast i32 %152 to float +> %1376 = fmul float %1374, %1375 +> %1377 = fadd float %1376, 0.000000e+00 +> %1378 = bitcast i32 %807 to float +> %1379 = bitcast i32 %807 to float +> %1380 = fmul float %1378, %1379 +> %1381 = fadd float %1377, %1380 +> %1382 = call float @llvm.sqrt.f32.100(float %1381) +> %1383 = fneg float %813 +> %1384 = fmul float %1382, %1383 +> %1385 = bitcast i32 %152 to float +> %1386 = fadd float %1385, %1384 +> %1387 = bitcast i32 %152 to float +> %1388 = bitcast i32 %152 to float +> %1389 = fmul float %1387, %1388 +> %1390 = fadd float %1389, 0.000000e+00 +> %1391 = bitcast i32 %807 to float +> %1392 = bitcast i32 %807 to float +> %1393 = fmul float %1391, %1392 +> %1394 = fadd float %1390, %1393 +> %1395 = call float @llvm.sqrt.f32.101(float %1394) +> %1396 = fneg float %813 +> %1397 = fmul float %1395, %1396 +> %1398 = bitcast i32 %152 to float +> %1399 = fadd float %1398, %1397 +> %1400 = fmul float %1386, %1399 +> %1401 = fadd float %1400, 0.000000e+00 +> %1402 = bitcast i32 %152 to float +> %1403 = bitcast i32 %152 to float +> %1404 = fmul float %1402, %1403 +> %1405 = fadd float %1404, 0.000000e+00 +> %1406 = bitcast i32 %807 to float +> %1407 = bitcast i32 %807 to float +> %1408 = fmul float %1406, %1407 +> %1409 = fadd float %1405, %1408 +> %1410 = call float @llvm.sqrt.f32.102(float %1409) +> %1411 = fneg float %813 +> %1412 = fmul float %1410, %1411 +> %1413 = fmul float %1412, 0.000000e+00 +> %1414 = bitcast i32 %807 to float +> %1415 = fadd float %1414, %1413 +> %1416 = bitcast i32 %152 to float +> %1417 = bitcast i32 %152 to float +> %1418 = fmul float %1416, %1417 +> %1419 = fadd float %1418, 0.000000e+00 +> %1420 = bitcast i32 %807 to float +> %1421 = bitcast i32 %807 to float +> %1422 = fmul float %1420, %1421 +> %1423 = fadd float %1419, %1422 +> %1424 = call float @llvm.sqrt.f32.103(float %1423) +> %1425 = fneg float %813 +> %1426 = fmul float %1424, %1425 +> %1427 = fmul float %1426, 0.000000e+00 +> %1428 = bitcast i32 %807 to float +> %1429 = fadd float %1428, %1427 +> %1430 = fmul float %1415, %1429 +> %1431 = fadd float %1401, %1430 +> %1432 = call float @llvm.sqrt.f32.104(float %1431) +> %1433 = fadd float %1432, 0.000000e+00 +> %1434 = fdiv float %1373, %1433 +> %1435 = fmul float %1360, %1434 +> %1436 = fsub float 1.000000e+00, %1435 +> %1437 = insertelement <4 x float> zeroinitializer, float %1436, i32 0 +> %1438 = insertelement <4 x float> %1437, float 0.000000e+00, i32 1 +> %1439 = insertelement <4 x float> %1438, float 0.000000e+00, i32 2 +> %1440 = insertelement <4 x float> %1439, float 0.000000e+00, i32 3 +> %1441 = getelementptr float, float* %0, i32 0 +> %1442 = getelementptr inbounds float, float* %1441, i64 1 +> %1443 = load float, float* %1442, align 4 +> %1444 = insertelement <4 x float> zeroinitializer, float %1443, i32 0 +> %1445 = insertelement <4 x float> %1444, float 0.000000e+00, i32 1 +> %1446 = insertelement <4 x float> %1445, float 0.000000e+00, i32 2 +> %1447 = insertelement <4 x float> %1446, float 0.000000e+00, i32 3 +> %1448 = call <4 x float> @llvm.fma.f32.105(<4 x float> %1440, <4 x float> %1447, <4 x float> zeroinitializer) +> %1449 = extractelement <4 x float> %1448, i32 0 +> store float %1449, float* %1285, align 4 +> %1450 = bitcast i32 %152 to float +> %1451 = bitcast i32 %152 to float +> %1452 = fmul float %1450, %1451 +> %1453 = fadd float %1452, 0.000000e+00 +> %1454 = bitcast i32 %807 to float +> %1455 = bitcast i32 %807 to float +> %1456 = fmul float %1454, %1455 +> %1457 = fadd float %1453, %1456 +> %1458 = call float @llvm.sqrt.f32.106(float %1457) +> %1459 = fneg float %813 +> %1460 = fmul float %1458, %1459 +> %1461 = bitcast i32 %152 to float +> %1462 = fadd float %1461, %1460 +> %1463 = bitcast i32 %152 to float +> %1464 = bitcast i32 %152 to float +> %1465 = fmul float %1463, %1464 +> %1466 = fadd float %1465, 0.000000e+00 +> %1467 = bitcast i32 %807 to float +> %1468 = bitcast i32 %807 to float +> %1469 = fmul float %1467, %1468 +> %1470 = fadd float %1466, %1469 +> %1471 = call float @llvm.sqrt.f32.107(float %1470) +> %1472 = fneg float %813 +> %1473 = fmul float %1471, %1472 +> %1474 = bitcast i32 %152 to float +> %1475 = fadd float %1474, %1473 +> %1476 = bitcast i32 %152 to float +> %1477 = bitcast i32 %152 to float +> %1478 = fmul float %1476, %1477 +> %1479 = fadd float %1478, 0.000000e+00 +> %1480 = bitcast i32 %807 to float +> %1481 = bitcast i32 %807 to float +> %1482 = fmul float %1480, %1481 +> %1483 = fadd float %1479, %1482 +> %1484 = call float @llvm.sqrt.f32.108(float %1483) +> %1485 = fneg float %813 +> %1486 = fmul float %1484, %1485 +> %1487 = bitcast i32 %152 to float +> %1488 = fadd float %1487, %1486 +> %1489 = fmul float %1475, %1488 +> %1490 = fadd float %1489, 0.000000e+00 +> %1491 = bitcast i32 %152 to float +> %1492 = bitcast i32 %152 to float +> %1493 = fmul float %1491, %1492 +> %1494 = fadd float %1493, 0.000000e+00 +> %1495 = bitcast i32 %807 to float +> %1496 = bitcast i32 %807 to float +> %1497 = fmul float %1495, %1496 +> %1498 = fadd float %1494, %1497 +> %1499 = call float @llvm.sqrt.f32.109(float %1498) +> %1500 = fneg float %813 +> %1501 = fmul float %1499, %1500 +> %1502 = fmul float %1501, 0.000000e+00 +> %1503 = bitcast i32 %807 to float +> %1504 = fadd float %1503, %1502 +> %1505 = bitcast i32 %152 to float +> %1506 = bitcast i32 %152 to float +> %1507 = fmul float %1505, %1506 +> %1508 = fadd float %1507, 0.000000e+00 +> %1509 = bitcast i32 %807 to float +> %1510 = bitcast i32 %807 to float +> %1511 = fmul float %1509, %1510 +> %1512 = fadd float %1508, %1511 +> %1513 = call float @llvm.sqrt.f32.110(float %1512) +> %1514 = fneg float %813 +> %1515 = fmul float %1513, %1514 +> %1516 = fmul float %1515, 0.000000e+00 +> %1517 = bitcast i32 %807 to float +> %1518 = fadd float %1517, %1516 +> %1519 = fmul float %1504, %1518 +> %1520 = fadd float %1490, %1519 +> %1521 = call float @llvm.sqrt.f32.111(float %1520) +> %1522 = fadd float %1521, 0.000000e+00 +> %1523 = fdiv float %1462, %1522 +> %1524 = fmul float %1523, 2.000000e+00 +> %1525 = bitcast i32 %152 to float +> %1526 = bitcast i32 %152 to float +> %1527 = fmul float %1525, %1526 +> %1528 = fadd float %1527, 0.000000e+00 +> %1529 = bitcast i32 %807 to float +> %1530 = bitcast i32 %807 to float +> %1531 = fmul float %1529, %1530 +> %1532 = fadd float %1528, %1531 +> %1533 = call float @llvm.sqrt.f32.112(float %1532) +> %1534 = fneg float %813 +> %1535 = fmul float %1533, %1534 +> %1536 = bitcast i32 %152 to float +> %1537 = fadd float %1536, %1535 +> %1538 = bitcast i32 %152 to float +> %1539 = bitcast i32 %152 to float +> %1540 = fmul float %1538, %1539 +> %1541 = fadd float %1540, 0.000000e+00 +> %1542 = bitcast i32 %807 to float +> %1543 = bitcast i32 %807 to float +> %1544 = fmul float %1542, %1543 +> %1545 = fadd float %1541, %1544 +> %1546 = call float @llvm.sqrt.f32.113(float %1545) +> %1547 = fneg float %813 +> %1548 = fmul float %1546, %1547 +> %1549 = bitcast i32 %152 to float +> %1550 = fadd float %1549, %1548 +> %1551 = bitcast i32 %152 to float +> %1552 = bitcast i32 %152 to float +> %1553 = fmul float %1551, %1552 +> %1554 = fadd float %1553, 0.000000e+00 +> %1555 = bitcast i32 %807 to float +> %1556 = bitcast i32 %807 to float +> %1557 = fmul float %1555, %1556 +> %1558 = fadd float %1554, %1557 +> %1559 = call float @llvm.sqrt.f32.114(float %1558) +> %1560 = fneg float %813 +> %1561 = fmul float %1559, %1560 +> %1562 = bitcast i32 %152 to float +> %1563 = fadd float %1562, %1561 +> %1564 = fmul float %1550, %1563 +> %1565 = fadd float %1564, 0.000000e+00 +> %1566 = bitcast i32 %152 to float +> %1567 = bitcast i32 %152 to float +> %1568 = fmul float %1566, %1567 +> %1569 = fadd float %1568, 0.000000e+00 +> %1570 = bitcast i32 %807 to float +> %1571 = bitcast i32 %807 to float +> %1572 = fmul float %1570, %1571 +> %1573 = fadd float %1569, %1572 +> %1574 = call float @llvm.sqrt.f32.115(float %1573) +> %1575 = fneg float %813 +> %1576 = fmul float %1574, %1575 +> %1577 = fmul float %1576, 0.000000e+00 +> %1578 = bitcast i32 %807 to float +> %1579 = fadd float %1578, %1577 +> %1580 = bitcast i32 %152 to float +> %1581 = bitcast i32 %152 to float +> %1582 = fmul float %1580, %1581 +> %1583 = fadd float %1582, 0.000000e+00 +> %1584 = bitcast i32 %807 to float +> %1585 = bitcast i32 %807 to float +> %1586 = fmul float %1584, %1585 +> %1587 = fadd float %1583, %1586 +> %1588 = call float @llvm.sqrt.f32.116(float %1587) +> %1589 = fneg float %813 +> %1590 = fmul float %1588, %1589 +> %1591 = fmul float %1590, 0.000000e+00 +> %1592 = bitcast i32 %807 to float +> %1593 = fadd float %1592, %1591 +> %1594 = fmul float %1579, %1593 +> %1595 = fadd float %1565, %1594 +> %1596 = call float @llvm.sqrt.f32.117(float %1595) +> %1597 = fadd float %1596, 0.000000e+00 +> %1598 = fdiv float %1537, %1597 +> %1599 = fmul float %1524, %1598 +> %1600 = fsub float 1.000000e+00, %1599 +> %1601 = fmul float %1600, %1443 +> %1602 = fadd float %1601, 0.000000e+00 +> %1603 = bitcast i32 %152 to float +> %1604 = bitcast i32 %152 to float +> %1605 = fmul float %1603, %1604 +> %1606 = fadd float %1605, 0.000000e+00 +> %1607 = bitcast i32 %807 to float +> %1608 = bitcast i32 %807 to float +> %1609 = fmul float %1607, %1608 +> %1610 = fadd float %1606, %1609 +> %1611 = call float @llvm.sqrt.f32.118(float %1610) +> %1612 = fneg float %813 +> %1613 = fmul float %1611, %1612 +> %1614 = bitcast i32 %152 to float +> %1615 = fadd float %1614, %1613 +> %1616 = bitcast i32 %152 to float +> %1617 = bitcast i32 %152 to float +> %1618 = fmul float %1616, %1617 +> %1619 = fadd float %1618, 0.000000e+00 +> %1620 = bitcast i32 %807 to float +> %1621 = bitcast i32 %807 to float +> %1622 = fmul float %1620, %1621 +> %1623 = fadd float %1619, %1622 +> %1624 = call float @llvm.sqrt.f32.119(float %1623) +> %1625 = fneg float %813 +> %1626 = fmul float %1624, %1625 +> %1627 = bitcast i32 %152 to float +> %1628 = fadd float %1627, %1626 +> %1629 = bitcast i32 %152 to float +> %1630 = bitcast i32 %152 to float +> %1631 = fmul float %1629, %1630 +> %1632 = fadd float %1631, 0.000000e+00 +> %1633 = bitcast i32 %807 to float +> %1634 = bitcast i32 %807 to float +> %1635 = fmul float %1633, %1634 +> %1636 = fadd float %1632, %1635 +> %1637 = call float @llvm.sqrt.f32.120(float %1636) +> %1638 = fneg float %813 +> %1639 = fmul float %1637, %1638 +> %1640 = bitcast i32 %152 to float +> %1641 = fadd float %1640, %1639 +> %1642 = fmul float %1628, %1641 +> %1643 = fadd float %1642, 0.000000e+00 +> %1644 = bitcast i32 %152 to float +> %1645 = bitcast i32 %152 to float +> %1646 = fmul float %1644, %1645 +> %1647 = fadd float %1646, 0.000000e+00 +> %1648 = bitcast i32 %807 to float +> %1649 = bitcast i32 %807 to float +> %1650 = fmul float %1648, %1649 +> %1651 = fadd float %1647, %1650 +> %1652 = call float @llvm.sqrt.f32.121(float %1651) +> %1653 = fneg float %813 +> %1654 = fmul float %1652, %1653 +> %1655 = fmul float %1654, 0.000000e+00 +> %1656 = bitcast i32 %807 to float +> %1657 = fadd float %1656, %1655 +> %1658 = bitcast i32 %152 to float +> %1659 = bitcast i32 %152 to float +> %1660 = fmul float %1658, %1659 +> %1661 = fadd float %1660, 0.000000e+00 +> %1662 = bitcast i32 %807 to float +> %1663 = bitcast i32 %807 to float +> %1664 = fmul float %1662, %1663 +> %1665 = fadd float %1661, %1664 +> %1666 = call float @llvm.sqrt.f32.122(float %1665) +> %1667 = fneg float %813 +> %1668 = fmul float %1666, %1667 +> %1669 = fmul float %1668, 0.000000e+00 +> %1670 = bitcast i32 %807 to float +> %1671 = fadd float %1670, %1669 +> %1672 = fmul float %1657, %1671 +> %1673 = fadd float %1643, %1672 +> %1674 = call float @llvm.sqrt.f32.123(float %1673) +> %1675 = fadd float %1674, 0.000000e+00 +> %1676 = fdiv float %1615, %1675 +> %1677 = fmul float %1676, 2.000000e+00 +> %1678 = bitcast i32 %152 to float +> %1679 = bitcast i32 %152 to float +> %1680 = fmul float %1678, %1679 +> %1681 = fadd float %1680, 0.000000e+00 +> %1682 = bitcast i32 %807 to float +> %1683 = bitcast i32 %807 to float +> %1684 = fmul float %1682, %1683 +> %1685 = fadd float %1681, %1684 +> %1686 = call float @llvm.sqrt.f32.124(float %1685) +> %1687 = fneg float %813 +> %1688 = fmul float %1686, %1687 +> %1689 = fmul float %1688, 0.000000e+00 +> %1690 = bitcast i32 %807 to float +> %1691 = fadd float %1690, %1689 +> %1692 = bitcast i32 %152 to float +> %1693 = bitcast i32 %152 to float +> %1694 = fmul float %1692, %1693 +> %1695 = fadd float %1694, 0.000000e+00 +> %1696 = bitcast i32 %807 to float +> %1697 = bitcast i32 %807 to float +> %1698 = fmul float %1696, %1697 +> %1699 = fadd float %1695, %1698 +> %1700 = call float @llvm.sqrt.f32.125(float %1699) +> %1701 = fneg float %813 +> %1702 = fmul float %1700, %1701 +> %1703 = bitcast i32 %152 to float +> %1704 = fadd float %1703, %1702 +> %1705 = bitcast i32 %152 to float +> %1706 = bitcast i32 %152 to float +> %1707 = fmul float %1705, %1706 +> %1708 = fadd float %1707, 0.000000e+00 +> %1709 = bitcast i32 %807 to float +> %1710 = bitcast i32 %807 to float +> %1711 = fmul float %1709, %1710 +> %1712 = fadd float %1708, %1711 +> %1713 = call float @llvm.sqrt.f32.126(float %1712) +> %1714 = fneg float %813 +> %1715 = fmul float %1713, %1714 +> %1716 = bitcast i32 %152 to float +> %1717 = fadd float %1716, %1715 +> %1718 = fmul float %1704, %1717 +> %1719 = fadd float %1718, 0.000000e+00 +> %1720 = bitcast i32 %152 to float +> %1721 = bitcast i32 %152 to float +> %1722 = fmul float %1720, %1721 +> %1723 = fadd float %1722, 0.000000e+00 +> %1724 = bitcast i32 %807 to float +> %1725 = bitcast i32 %807 to float +> %1726 = fmul float %1724, %1725 +> %1727 = fadd float %1723, %1726 +> %1728 = call float @llvm.sqrt.f32.127(float %1727) +> %1729 = fneg float %813 +> %1730 = fmul float %1728, %1729 +> %1731 = fmul float %1730, 0.000000e+00 +> %1732 = bitcast i32 %807 to float +> %1733 = fadd float %1732, %1731 +> %1734 = bitcast i32 %152 to float +> %1735 = bitcast i32 %152 to float +> %1736 = fmul float %1734, %1735 +> %1737 = fadd float %1736, 0.000000e+00 +> %1738 = bitcast i32 %807 to float +> %1739 = bitcast i32 %807 to float +> %1740 = fmul float %1738, %1739 +> %1741 = fadd float %1737, %1740 +> %1742 = call float @llvm.sqrt.f32.128(float %1741) +> %1743 = fneg float %813 +> %1744 = fmul float %1742, %1743 +> %1745 = fmul float %1744, 0.000000e+00 +> %1746 = bitcast i32 %807 to float +> %1747 = fadd float %1746, %1745 +> %1748 = fmul float %1733, %1747 +> %1749 = fadd float %1719, %1748 +> %1750 = call float @llvm.sqrt.f32.129(float %1749) +> %1751 = fadd float %1750, 0.000000e+00 +> %1752 = fdiv float %1691, %1751 +> %1753 = fmul float %1677, %1752 +> %1754 = fneg float %1753 +> %1755 = load float, float* %144, align 4 +> %1756 = fmul float %1754, %1755 +> %1757 = fadd float %1602, %1756 +> %1758 = insertelement <4 x float> zeroinitializer, float %1757, i32 0 +> %1759 = insertelement <4 x float> %1758, float 0.000000e+00, i32 1 +> %1760 = insertelement <4 x float> %1759, float 0.000000e+00, i32 2 +> %1761 = insertelement <4 x float> %1760, float 0.000000e+00, i32 3 +> %1762 = extractelement <4 x float> %1761, i32 0 +> store float %1762, float* %1285, align 4 +> %1763 = extractelement <4 x float> %1761, i32 1 +> %1764 = getelementptr float, float* %2, i32 0 +> %1765 = getelementptr inbounds float, float* %1764, i64 2 +> store float %1763, float* %1765, align 4 +> %1766 = bitcast i32 %152 to float +> %1767 = bitcast i32 %152 to float +> %1768 = fmul float %1766, %1767 +> %1769 = fadd float %1768, 0.000000e+00 +> %1770 = bitcast i32 %807 to float +> %1771 = bitcast i32 %807 to float +> %1772 = fmul float %1770, %1771 +> %1773 = fadd float %1769, %1772 +> %1774 = call float @llvm.sqrt.f32.130(float %1773) +> %1775 = fneg float %813 +> %1776 = fmul float %1774, %1775 +> %1777 = fmul float %1776, 0.000000e+00 +> %1778 = bitcast i32 %807 to float +> %1779 = fadd float %1778, %1777 +> %1780 = bitcast i32 %152 to float +> %1781 = bitcast i32 %152 to float +> %1782 = fmul float %1780, %1781 +> %1783 = fadd float %1782, 0.000000e+00 +> %1784 = bitcast i32 %807 to float +> %1785 = bitcast i32 %807 to float +> %1786 = fmul float %1784, %1785 +> %1787 = fadd float %1783, %1786 +> %1788 = call float @llvm.sqrt.f32.131(float %1787) +> %1789 = fneg float %813 +> %1790 = fmul float %1788, %1789 +> %1791 = bitcast i32 %152 to float +> %1792 = fadd float %1791, %1790 +> %1793 = bitcast i32 %152 to float +> %1794 = bitcast i32 %152 to float +> %1795 = fmul float %1793, %1794 +> %1796 = fadd float %1795, 0.000000e+00 +> %1797 = bitcast i32 %807 to float +> %1798 = bitcast i32 %807 to float +> %1799 = fmul float %1797, %1798 +> %1800 = fadd float %1796, %1799 +> %1801 = call float @llvm.sqrt.f32.132(float %1800) +> %1802 = fneg float %813 +> %1803 = fmul float %1801, %1802 +> %1804 = bitcast i32 %152 to float +> %1805 = fadd float %1804, %1803 +> %1806 = fmul float %1792, %1805 +> %1807 = fadd float %1806, 0.000000e+00 +> %1808 = bitcast i32 %152 to float +> %1809 = bitcast i32 %152 to float +> %1810 = fmul float %1808, %1809 +> %1811 = fadd float %1810, 0.000000e+00 +> %1812 = bitcast i32 %807 to float +> %1813 = bitcast i32 %807 to float +> %1814 = fmul float %1812, %1813 +> %1815 = fadd float %1811, %1814 +> %1816 = call float @llvm.sqrt.f32.133(float %1815) +> %1817 = fneg float %813 +> %1818 = fmul float %1816, %1817 +> %1819 = fmul float %1818, 0.000000e+00 +> %1820 = bitcast i32 %807 to float +> %1821 = fadd float %1820, %1819 +> %1822 = bitcast i32 %152 to float +> %1823 = bitcast i32 %152 to float +> %1824 = fmul float %1822, %1823 +> %1825 = fadd float %1824, 0.000000e+00 +> %1826 = bitcast i32 %807 to float +> %1827 = bitcast i32 %807 to float +> %1828 = fmul float %1826, %1827 +> %1829 = fadd float %1825, %1828 +> %1830 = call float @llvm.sqrt.f32.134(float %1829) +> %1831 = fneg float %813 +> %1832 = fmul float %1830, %1831 +> %1833 = fmul float %1832, 0.000000e+00 +> %1834 = bitcast i32 %807 to float +> %1835 = fadd float %1834, %1833 +> %1836 = fmul float %1821, %1835 +> %1837 = fadd float %1807, %1836 +> %1838 = call float @llvm.sqrt.f32.135(float %1837) +> %1839 = fadd float %1838, 0.000000e+00 +> %1840 = fdiv float %1779, %1839 +> %1841 = fmul float %1840, 2.000000e+00 +> %1842 = bitcast i32 %152 to float +> %1843 = bitcast i32 %152 to float +> %1844 = fmul float %1842, %1843 +> %1845 = fadd float %1844, 0.000000e+00 +> %1846 = bitcast i32 %807 to float +> %1847 = bitcast i32 %807 to float +> %1848 = fmul float %1846, %1847 +> %1849 = fadd float %1845, %1848 +> %1850 = call float @llvm.sqrt.f32.136(float %1849) +> %1851 = fneg float %813 +> %1852 = fmul float %1850, %1851 +> %1853 = bitcast i32 %152 to float +> %1854 = fadd float %1853, %1852 +> %1855 = bitcast i32 %152 to float +> %1856 = bitcast i32 %152 to float +> %1857 = fmul float %1855, %1856 +> %1858 = fadd float %1857, 0.000000e+00 +> %1859 = bitcast i32 %807 to float +> %1860 = bitcast i32 %807 to float +> %1861 = fmul float %1859, %1860 +> %1862 = fadd float %1858, %1861 +> %1863 = call float @llvm.sqrt.f32.137(float %1862) +> %1864 = fneg float %813 +> %1865 = fmul float %1863, %1864 +> %1866 = bitcast i32 %152 to float +> %1867 = fadd float %1866, %1865 +> %1868 = bitcast i32 %152 to float +> %1869 = bitcast i32 %152 to float +> %1870 = fmul float %1868, %1869 +> %1871 = fadd float %1870, 0.000000e+00 +> %1872 = bitcast i32 %807 to float +> %1873 = bitcast i32 %807 to float +> %1874 = fmul float %1872, %1873 +> %1875 = fadd float %1871, %1874 +> %1876 = call float @llvm.sqrt.f32.138(float %1875) +> %1877 = fneg float %813 +> %1878 = fmul float %1876, %1877 +> %1879 = bitcast i32 %152 to float +> %1880 = fadd float %1879, %1878 +> %1881 = fmul float %1867, %1880 +> %1882 = fadd float %1881, 0.000000e+00 +> %1883 = bitcast i32 %152 to float +> %1884 = bitcast i32 %152 to float +> %1885 = fmul float %1883, %1884 +> %1886 = fadd float %1885, 0.000000e+00 +> %1887 = bitcast i32 %807 to float +> %1888 = bitcast i32 %807 to float +> %1889 = fmul float %1887, %1888 +> %1890 = fadd float %1886, %1889 +> %1891 = call float @llvm.sqrt.f32.139(float %1890) +> %1892 = fneg float %813 +> %1893 = fmul float %1891, %1892 +> %1894 = fmul float %1893, 0.000000e+00 +> %1895 = bitcast i32 %807 to float +> %1896 = fadd float %1895, %1894 +> %1897 = bitcast i32 %152 to float +> %1898 = bitcast i32 %152 to float +> %1899 = fmul float %1897, %1898 +> %1900 = fadd float %1899, 0.000000e+00 +> %1901 = bitcast i32 %807 to float +> %1902 = bitcast i32 %807 to float +> %1903 = fmul float %1901, %1902 +> %1904 = fadd float %1900, %1903 +> %1905 = call float @llvm.sqrt.f32.140(float %1904) +> %1906 = fneg float %813 +> %1907 = fmul float %1905, %1906 +> %1908 = fmul float %1907, 0.000000e+00 +> %1909 = bitcast i32 %807 to float +> %1910 = fadd float %1909, %1908 +> %1911 = fmul float %1896, %1910 +> %1912 = fadd float %1882, %1911 +> %1913 = call float @llvm.sqrt.f32.141(float %1912) +> %1914 = fadd float %1913, 0.000000e+00 +> %1915 = fdiv float %1854, %1914 +> %1916 = fmul float %1841, %1915 +> %1917 = fneg float %1916 +> %1918 = insertelement <4 x float> zeroinitializer, float %1917, i32 0 +> %1919 = insertelement <4 x float> %1918, float 0.000000e+00, i32 1 +> %1920 = insertelement <4 x float> %1919, float 0.000000e+00, i32 2 +> %1921 = insertelement <4 x float> %1920, float 0.000000e+00, i32 3 +> %1922 = getelementptr float, float* %0, i32 0 +> %1923 = load float, float* %1922, align 4 +> %1924 = insertelement <4 x float> zeroinitializer, float %1923, i32 0 +> %1925 = insertelement <4 x float> %1924, float 0.000000e+00, i32 1 +> %1926 = insertelement <4 x float> %1925, float 0.000000e+00, i32 2 +> %1927 = insertelement <4 x float> %1926, float 0.000000e+00, i32 3 +> %1928 = call <4 x float> @llvm.fma.f32.142(<4 x float> %1921, <4 x float> %1927, <4 x float> zeroinitializer) +> %1929 = extractelement <4 x float> %1928, i32 0 +> store float %1929, float* %1765, align 4 +> %1930 = bitcast i32 %152 to float +> %1931 = bitcast i32 %152 to float +> %1932 = fmul float %1930, %1931 +> %1933 = fadd float %1932, 0.000000e+00 +> %1934 = bitcast i32 %807 to float +> %1935 = bitcast i32 %807 to float +> %1936 = fmul float %1934, %1935 +> %1937 = fadd float %1933, %1936 +> %1938 = call float @llvm.sqrt.f32.143(float %1937) +> %1939 = fneg float %813 +> %1940 = fmul float %1938, %1939 +> %1941 = fmul float %1940, 0.000000e+00 +> %1942 = bitcast i32 %807 to float +> %1943 = fadd float %1942, %1941 +> %1944 = bitcast i32 %152 to float +> %1945 = bitcast i32 %152 to float +> %1946 = fmul float %1944, %1945 +> %1947 = fadd float %1946, 0.000000e+00 +> %1948 = bitcast i32 %807 to float +> %1949 = bitcast i32 %807 to float +> %1950 = fmul float %1948, %1949 +> %1951 = fadd float %1947, %1950 +> %1952 = call float @llvm.sqrt.f32.144(float %1951) +> %1953 = fneg float %813 +> %1954 = fmul float %1952, %1953 +> %1955 = bitcast i32 %152 to float +> %1956 = fadd float %1955, %1954 +> %1957 = bitcast i32 %152 to float +> %1958 = bitcast i32 %152 to float +> %1959 = fmul float %1957, %1958 +> %1960 = fadd float %1959, 0.000000e+00 +> %1961 = bitcast i32 %807 to float +> %1962 = bitcast i32 %807 to float +> %1963 = fmul float %1961, %1962 +> %1964 = fadd float %1960, %1963 +> %1965 = call float @llvm.sqrt.f32.145(float %1964) +> %1966 = fneg float %813 +> %1967 = fmul float %1965, %1966 +> %1968 = bitcast i32 %152 to float +> %1969 = fadd float %1968, %1967 +> %1970 = fmul float %1956, %1969 +> %1971 = fadd float %1970, 0.000000e+00 +> %1972 = bitcast i32 %152 to float +> %1973 = bitcast i32 %152 to float +> %1974 = fmul float %1972, %1973 +> %1975 = fadd float %1974, 0.000000e+00 +> %1976 = bitcast i32 %807 to float +> %1977 = bitcast i32 %807 to float +> %1978 = fmul float %1976, %1977 +> %1979 = fadd float %1975, %1978 +> %1980 = call float @llvm.sqrt.f32.146(float %1979) +> %1981 = fneg float %813 +> %1982 = fmul float %1980, %1981 +> %1983 = fmul float %1982, 0.000000e+00 +> %1984 = bitcast i32 %807 to float +> %1985 = fadd float %1984, %1983 +> %1986 = bitcast i32 %152 to float +> %1987 = bitcast i32 %152 to float +> %1988 = fmul float %1986, %1987 +> %1989 = fadd float %1988, 0.000000e+00 +> %1990 = bitcast i32 %807 to float +> %1991 = bitcast i32 %807 to float +> %1992 = fmul float %1990, %1991 +> %1993 = fadd float %1989, %1992 +> %1994 = call float @llvm.sqrt.f32.147(float %1993) +> %1995 = fneg float %813 +> %1996 = fmul float %1994, %1995 +> %1997 = fmul float %1996, 0.000000e+00 +> %1998 = bitcast i32 %807 to float +> %1999 = fadd float %1998, %1997 +> %2000 = fmul float %1985, %1999 +> %2001 = fadd float %1971, %2000 +> %2002 = call float @llvm.sqrt.f32.148(float %2001) +> %2003 = fadd float %2002, 0.000000e+00 +> %2004 = fdiv float %1943, %2003 +> %2005 = fmul float %2004, 2.000000e+00 +> %2006 = bitcast i32 %152 to float +> %2007 = bitcast i32 %152 to float +> %2008 = fmul float %2006, %2007 +> %2009 = fadd float %2008, 0.000000e+00 +> %2010 = bitcast i32 %807 to float +> %2011 = bitcast i32 %807 to float +> %2012 = fmul float %2010, %2011 +> %2013 = fadd float %2009, %2012 +> %2014 = call float @llvm.sqrt.f32.149(float %2013) +> %2015 = fneg float %813 +> %2016 = fmul float %2014, %2015 +> %2017 = bitcast i32 %152 to float +> %2018 = fadd float %2017, %2016 +> %2019 = bitcast i32 %152 to float +> %2020 = bitcast i32 %152 to float +> %2021 = fmul float %2019, %2020 +> %2022 = fadd float %2021, 0.000000e+00 +> %2023 = bitcast i32 %807 to float +> %2024 = bitcast i32 %807 to float +> %2025 = fmul float %2023, %2024 +> %2026 = fadd float %2022, %2025 +> %2027 = call float @llvm.sqrt.f32.150(float %2026) +> %2028 = fneg float %813 +> %2029 = fmul float %2027, %2028 +> %2030 = bitcast i32 %152 to float +> %2031 = fadd float %2030, %2029 +> %2032 = bitcast i32 %152 to float +> %2033 = bitcast i32 %152 to float +> %2034 = fmul float %2032, %2033 +> %2035 = fadd float %2034, 0.000000e+00 +> %2036 = bitcast i32 %807 to float +> %2037 = bitcast i32 %807 to float +> %2038 = fmul float %2036, %2037 +> %2039 = fadd float %2035, %2038 +> %2040 = call float @llvm.sqrt.f32.151(float %2039) +> %2041 = fneg float %813 +> %2042 = fmul float %2040, %2041 +> %2043 = bitcast i32 %152 to float +> %2044 = fadd float %2043, %2042 +> %2045 = fmul float %2031, %2044 +> %2046 = fadd float %2045, 0.000000e+00 +> %2047 = bitcast i32 %152 to float +> %2048 = bitcast i32 %152 to float +> %2049 = fmul float %2047, %2048 +> %2050 = fadd float %2049, 0.000000e+00 +> %2051 = bitcast i32 %807 to float +> %2052 = bitcast i32 %807 to float +> %2053 = fmul float %2051, %2052 +> %2054 = fadd float %2050, %2053 +> %2055 = call float @llvm.sqrt.f32.152(float %2054) +> %2056 = fneg float %813 +> %2057 = fmul float %2055, %2056 +> %2058 = fmul float %2057, 0.000000e+00 +> %2059 = bitcast i32 %807 to float +> %2060 = fadd float %2059, %2058 +> %2061 = bitcast i32 %152 to float +> %2062 = bitcast i32 %152 to float +> %2063 = fmul float %2061, %2062 +> %2064 = fadd float %2063, 0.000000e+00 +> %2065 = bitcast i32 %807 to float +> %2066 = bitcast i32 %807 to float +> %2067 = fmul float %2065, %2066 +> %2068 = fadd float %2064, %2067 +> %2069 = call float @llvm.sqrt.f32.153(float %2068) +> %2070 = fneg float %813 +> %2071 = fmul float %2069, %2070 +> %2072 = fmul float %2071, 0.000000e+00 +> %2073 = bitcast i32 %807 to float +> %2074 = fadd float %2073, %2072 +> %2075 = fmul float %2060, %2074 +> %2076 = fadd float %2046, %2075 +> %2077 = call float @llvm.sqrt.f32.154(float %2076) +> %2078 = fadd float %2077, 0.000000e+00 +> %2079 = fdiv float %2018, %2078 +> %2080 = fmul float %2005, %2079 +> %2081 = fneg float %2080 +> %2082 = fmul float %2081, %1923 +> %2083 = fadd float %2082, 0.000000e+00 +> %2084 = bitcast i32 %152 to float +> %2085 = bitcast i32 %152 to float +> %2086 = fmul float %2084, %2085 +> %2087 = fadd float %2086, 0.000000e+00 +> %2088 = bitcast i32 %807 to float +> %2089 = bitcast i32 %807 to float +> %2090 = fmul float %2088, %2089 +> %2091 = fadd float %2087, %2090 +> %2092 = call float @llvm.sqrt.f32.155(float %2091) +> %2093 = fneg float %813 +> %2094 = fmul float %2092, %2093 +> %2095 = fmul float %2094, 0.000000e+00 +> %2096 = bitcast i32 %807 to float +> %2097 = fadd float %2096, %2095 +> %2098 = bitcast i32 %152 to float +> %2099 = bitcast i32 %152 to float +> %2100 = fmul float %2098, %2099 +> %2101 = fadd float %2100, 0.000000e+00 +> %2102 = bitcast i32 %807 to float +> %2103 = bitcast i32 %807 to float +> %2104 = fmul float %2102, %2103 +> %2105 = fadd float %2101, %2104 +> %2106 = call float @llvm.sqrt.f32.156(float %2105) +> %2107 = fneg float %813 +> %2108 = fmul float %2106, %2107 +> %2109 = bitcast i32 %152 to float +> %2110 = fadd float %2109, %2108 +> %2111 = bitcast i32 %152 to float +> %2112 = bitcast i32 %152 to float +> %2113 = fmul float %2111, %2112 +> %2114 = fadd float %2113, 0.000000e+00 +> %2115 = bitcast i32 %807 to float +> %2116 = bitcast i32 %807 to float +> %2117 = fmul float %2115, %2116 +> %2118 = fadd float %2114, %2117 +> %2119 = call float @llvm.sqrt.f32.157(float %2118) +> %2120 = fneg float %813 +> %2121 = fmul float %2119, %2120 +> %2122 = bitcast i32 %152 to float +> %2123 = fadd float %2122, %2121 +> %2124 = fmul float %2110, %2123 +> %2125 = fadd float %2124, 0.000000e+00 +> %2126 = bitcast i32 %152 to float +> %2127 = bitcast i32 %152 to float +> %2128 = fmul float %2126, %2127 +> %2129 = fadd float %2128, 0.000000e+00 +> %2130 = bitcast i32 %807 to float +> %2131 = bitcast i32 %807 to float +> %2132 = fmul float %2130, %2131 +> %2133 = fadd float %2129, %2132 +> %2134 = call float @llvm.sqrt.f32.158(float %2133) +> %2135 = fneg float %813 +> %2136 = fmul float %2134, %2135 +> %2137 = fmul float %2136, 0.000000e+00 +> %2138 = bitcast i32 %807 to float +> %2139 = fadd float %2138, %2137 +> %2140 = bitcast i32 %152 to float +> %2141 = bitcast i32 %152 to float +> %2142 = fmul float %2140, %2141 +> %2143 = fadd float %2142, 0.000000e+00 +> %2144 = bitcast i32 %807 to float +> %2145 = bitcast i32 %807 to float +> %2146 = fmul float %2144, %2145 +> %2147 = fadd float %2143, %2146 +> %2148 = call float @llvm.sqrt.f32.159(float %2147) +> %2149 = fneg float %813 +> %2150 = fmul float %2148, %2149 +> %2151 = fmul float %2150, 0.000000e+00 +> %2152 = bitcast i32 %807 to float +> %2153 = fadd float %2152, %2151 +> %2154 = fmul float %2139, %2153 +> %2155 = fadd float %2125, %2154 +> %2156 = call float @llvm.sqrt.f32.160(float %2155) +> %2157 = fadd float %2156, 0.000000e+00 +> %2158 = fdiv float %2097, %2157 +> %2159 = fmul float %2158, 2.000000e+00 +> %2160 = bitcast i32 %152 to float +> %2161 = bitcast i32 %152 to float +> %2162 = fmul float %2160, %2161 +> %2163 = fadd float %2162, 0.000000e+00 +> %2164 = bitcast i32 %807 to float +> %2165 = bitcast i32 %807 to float +> %2166 = fmul float %2164, %2165 +> %2167 = fadd float %2163, %2166 +> %2168 = call float @llvm.sqrt.f32.161(float %2167) +> %2169 = fneg float %813 +> %2170 = fmul float %2168, %2169 +> %2171 = fmul float %2170, 0.000000e+00 +> %2172 = bitcast i32 %807 to float +> %2173 = fadd float %2172, %2171 +> %2174 = bitcast i32 %152 to float +> %2175 = bitcast i32 %152 to float +> %2176 = fmul float %2174, %2175 +> %2177 = fadd float %2176, 0.000000e+00 +> %2178 = bitcast i32 %807 to float +> %2179 = bitcast i32 %807 to float +> %2180 = fmul float %2178, %2179 +> %2181 = fadd float %2177, %2180 +> %2182 = call float @llvm.sqrt.f32.162(float %2181) +> %2183 = fneg float %813 +> %2184 = fmul float %2182, %2183 +> %2185 = bitcast i32 %152 to float +> %2186 = fadd float %2185, %2184 +> %2187 = bitcast i32 %152 to float +> %2188 = bitcast i32 %152 to float +> %2189 = fmul float %2187, %2188 +> %2190 = fadd float %2189, 0.000000e+00 +> %2191 = bitcast i32 %807 to float +> %2192 = bitcast i32 %807 to float +> %2193 = fmul float %2191, %2192 +> %2194 = fadd float %2190, %2193 +> %2195 = call float @llvm.sqrt.f32.163(float %2194) +> %2196 = fneg float %813 +> %2197 = fmul float %2195, %2196 +> %2198 = bitcast i32 %152 to float +> %2199 = fadd float %2198, %2197 +> %2200 = fmul float %2186, %2199 +> %2201 = fadd float %2200, 0.000000e+00 +> %2202 = bitcast i32 %152 to float +> %2203 = bitcast i32 %152 to float +> %2204 = fmul float %2202, %2203 +> %2205 = fadd float %2204, 0.000000e+00 +> %2206 = bitcast i32 %807 to float +> %2207 = bitcast i32 %807 to float +> %2208 = fmul float %2206, %2207 +> %2209 = fadd float %2205, %2208 +> %2210 = call float @llvm.sqrt.f32.164(float %2209) +> %2211 = fneg float %813 +> %2212 = fmul float %2210, %2211 +> %2213 = fmul float %2212, 0.000000e+00 +> %2214 = bitcast i32 %807 to float +> %2215 = fadd float %2214, %2213 +> %2216 = bitcast i32 %152 to float +> %2217 = bitcast i32 %152 to float +> %2218 = fmul float %2216, %2217 +> %2219 = fadd float %2218, 0.000000e+00 +> %2220 = bitcast i32 %807 to float +> %2221 = bitcast i32 %807 to float +> %2222 = fmul float %2220, %2221 +> %2223 = fadd float %2219, %2222 +> %2224 = call float @llvm.sqrt.f32.165(float %2223) +> %2225 = fneg float %813 +> %2226 = fmul float %2224, %2225 +> %2227 = fmul float %2226, 0.000000e+00 +> %2228 = bitcast i32 %807 to float +> %2229 = fadd float %2228, %2227 +> %2230 = fmul float %2215, %2229 +> %2231 = fadd float %2201, %2230 +> %2232 = call float @llvm.sqrt.f32.166(float %2231) +> %2233 = fadd float %2232, 0.000000e+00 +> %2234 = fdiv float %2173, %2233 +> %2235 = fmul float %2159, %2234 +> %2236 = fsub float 1.000000e+00, %2235 +> %2237 = load float, float* %1274, align 4 +> %2238 = fmul float %2236, %2237 +> %2239 = fadd float %2083, %2238 +> %2240 = insertelement <4 x float> zeroinitializer, float %2239, i32 0 +> %2241 = insertelement <4 x float> %2240, float 0.000000e+00, i32 1 +> %2242 = insertelement <4 x float> %2241, float 0.000000e+00, i32 2 +> %2243 = insertelement <4 x float> %2242, float 0.000000e+00, i32 3 +> %2244 = extractelement <4 x float> %2243, i32 0 +> store float %2244, float* %1765, align 4 +> %2245 = extractelement <4 x float> %2243, i32 1 +> %2246 = getelementptr float, float* %2, i32 0 +> %2247 = getelementptr inbounds float, float* %2246, i64 3 +> store float %2245, float* %2247, align 4 +> %2248 = bitcast i32 %152 to float +> %2249 = bitcast i32 %152 to float +> %2250 = fmul float %2248, %2249 +> %2251 = fadd float %2250, 0.000000e+00 +> %2252 = bitcast i32 %807 to float +> %2253 = bitcast i32 %807 to float +> %2254 = fmul float %2252, %2253 +> %2255 = fadd float %2251, %2254 +> %2256 = call float @llvm.sqrt.f32.167(float %2255) +> %2257 = fneg float %813 +> %2258 = fmul float %2256, %2257 +> %2259 = fmul float %2258, 0.000000e+00 +> %2260 = bitcast i32 %807 to float +> %2261 = fadd float %2260, %2259 +> %2262 = bitcast i32 %152 to float +> %2263 = bitcast i32 %152 to float +> %2264 = fmul float %2262, %2263 +> %2265 = fadd float %2264, 0.000000e+00 +> %2266 = bitcast i32 %807 to float +> %2267 = bitcast i32 %807 to float +> %2268 = fmul float %2266, %2267 +> %2269 = fadd float %2265, %2268 +> %2270 = call float @llvm.sqrt.f32.168(float %2269) +> %2271 = fneg float %813 +> %2272 = fmul float %2270, %2271 +> %2273 = bitcast i32 %152 to float +> %2274 = fadd float %2273, %2272 +> %2275 = bitcast i32 %152 to float +> %2276 = bitcast i32 %152 to float +> %2277 = fmul float %2275, %2276 +> %2278 = fadd float %2277, 0.000000e+00 +> %2279 = bitcast i32 %807 to float +> %2280 = bitcast i32 %807 to float +> %2281 = fmul float %2279, %2280 +> %2282 = fadd float %2278, %2281 +> %2283 = call float @llvm.sqrt.f32.169(float %2282) +> %2284 = fneg float %813 +> %2285 = fmul float %2283, %2284 +> %2286 = bitcast i32 %152 to float +> %2287 = fadd float %2286, %2285 +> %2288 = fmul float %2274, %2287 +> %2289 = fadd float %2288, 0.000000e+00 +> %2290 = bitcast i32 %152 to float +> %2291 = bitcast i32 %152 to float +> %2292 = fmul float %2290, %2291 +> %2293 = fadd float %2292, 0.000000e+00 +> %2294 = bitcast i32 %807 to float +> %2295 = bitcast i32 %807 to float +> %2296 = fmul float %2294, %2295 +> %2297 = fadd float %2293, %2296 +> %2298 = call float @llvm.sqrt.f32.170(float %2297) +> %2299 = fneg float %813 +> %2300 = fmul float %2298, %2299 +> %2301 = fmul float %2300, 0.000000e+00 +> %2302 = bitcast i32 %807 to float +> %2303 = fadd float %2302, %2301 +> %2304 = bitcast i32 %152 to float +> %2305 = bitcast i32 %152 to float +> %2306 = fmul float %2304, %2305 +> %2307 = fadd float %2306, 0.000000e+00 +> %2308 = bitcast i32 %807 to float +> %2309 = bitcast i32 %807 to float +> %2310 = fmul float %2308, %2309 +> %2311 = fadd float %2307, %2310 +> %2312 = call float @llvm.sqrt.f32.171(float %2311) +> %2313 = fneg float %813 +> %2314 = fmul float %2312, %2313 +> %2315 = fmul float %2314, 0.000000e+00 +> %2316 = bitcast i32 %807 to float +> %2317 = fadd float %2316, %2315 +> %2318 = fmul float %2303, %2317 +> %2319 = fadd float %2289, %2318 +> %2320 = call float @llvm.sqrt.f32.172(float %2319) +> %2321 = fadd float %2320, 0.000000e+00 +> %2322 = fdiv float %2261, %2321 +> %2323 = fmul float %2322, 2.000000e+00 +> %2324 = bitcast i32 %152 to float +> %2325 = bitcast i32 %152 to float +> %2326 = fmul float %2324, %2325 +> %2327 = fadd float %2326, 0.000000e+00 +> %2328 = bitcast i32 %807 to float +> %2329 = bitcast i32 %807 to float +> %2330 = fmul float %2328, %2329 +> %2331 = fadd float %2327, %2330 +> %2332 = call float @llvm.sqrt.f32.173(float %2331) +> %2333 = fneg float %813 +> %2334 = fmul float %2332, %2333 +> %2335 = bitcast i32 %152 to float +> %2336 = fadd float %2335, %2334 +> %2337 = bitcast i32 %152 to float +> %2338 = bitcast i32 %152 to float +> %2339 = fmul float %2337, %2338 +> %2340 = fadd float %2339, 0.000000e+00 +> %2341 = bitcast i32 %807 to float +> %2342 = bitcast i32 %807 to float +> %2343 = fmul float %2341, %2342 +> %2344 = fadd float %2340, %2343 +> %2345 = call float @llvm.sqrt.f32.174(float %2344) +> %2346 = fneg float %813 +> %2347 = fmul float %2345, %2346 +> %2348 = bitcast i32 %152 to float +> %2349 = fadd float %2348, %2347 +> %2350 = bitcast i32 %152 to float +> %2351 = bitcast i32 %152 to float +> %2352 = fmul float %2350, %2351 +> %2353 = fadd float %2352, 0.000000e+00 +> %2354 = bitcast i32 %807 to float +> %2355 = bitcast i32 %807 to float +> %2356 = fmul float %2354, %2355 +> %2357 = fadd float %2353, %2356 +> %2358 = call float @llvm.sqrt.f32.175(float %2357) +> %2359 = fneg float %813 +> %2360 = fmul float %2358, %2359 +> %2361 = bitcast i32 %152 to float +> %2362 = fadd float %2361, %2360 +> %2363 = fmul float %2349, %2362 +> %2364 = fadd float %2363, 0.000000e+00 +> %2365 = bitcast i32 %152 to float +> %2366 = bitcast i32 %152 to float +> %2367 = fmul float %2365, %2366 +> %2368 = fadd float %2367, 0.000000e+00 +> %2369 = bitcast i32 %807 to float +> %2370 = bitcast i32 %807 to float +> %2371 = fmul float %2369, %2370 +> %2372 = fadd float %2368, %2371 +> %2373 = call float @llvm.sqrt.f32.176(float %2372) +> %2374 = fneg float %813 +> %2375 = fmul float %2373, %2374 +> %2376 = fmul float %2375, 0.000000e+00 +> %2377 = bitcast i32 %807 to float +> %2378 = fadd float %2377, %2376 +> %2379 = bitcast i32 %152 to float +> %2380 = bitcast i32 %152 to float +> %2381 = fmul float %2379, %2380 +> %2382 = fadd float %2381, 0.000000e+00 +> %2383 = bitcast i32 %807 to float +> %2384 = bitcast i32 %807 to float +> %2385 = fmul float %2383, %2384 +> %2386 = fadd float %2382, %2385 +> %2387 = call float @llvm.sqrt.f32.177(float %2386) +> %2388 = fneg float %813 +> %2389 = fmul float %2387, %2388 +> %2390 = fmul float %2389, 0.000000e+00 +> %2391 = bitcast i32 %807 to float +> %2392 = fadd float %2391, %2390 +> %2393 = fmul float %2378, %2392 +> %2394 = fadd float %2364, %2393 +> %2395 = call float @llvm.sqrt.f32.178(float %2394) +> %2396 = fadd float %2395, 0.000000e+00 +> %2397 = fdiv float %2336, %2396 +> %2398 = fmul float %2323, %2397 +> %2399 = fneg float %2398 +> %2400 = insertelement <4 x float> zeroinitializer, float %2399, i32 0 +> %2401 = insertelement <4 x float> %2400, float 0.000000e+00, i32 1 +> %2402 = insertelement <4 x float> %2401, float 0.000000e+00, i32 2 +> %2403 = insertelement <4 x float> %2402, float 0.000000e+00, i32 3 +> %2404 = load float, float* %1442, align 4 +> %2405 = insertelement <4 x float> zeroinitializer, float %2404, i32 0 +> %2406 = insertelement <4 x float> %2405, float 0.000000e+00, i32 1 +> %2407 = insertelement <4 x float> %2406, float 0.000000e+00, i32 2 +> %2408 = insertelement <4 x float> %2407, float 0.000000e+00, i32 3 +> %2409 = call <4 x float> @llvm.fma.f32.179(<4 x float> %2403, <4 x float> %2408, <4 x float> zeroinitializer) +> %2410 = extractelement <4 x float> %2409, i32 0 +> store float %2410, float* %2247, align 4 +> %2411 = bitcast i32 %152 to float +> %2412 = bitcast i32 %152 to float +> %2413 = fmul float %2411, %2412 +> %2414 = fadd float %2413, 0.000000e+00 +> %2415 = bitcast i32 %807 to float +> %2416 = bitcast i32 %807 to float +> %2417 = fmul float %2415, %2416 +> %2418 = fadd float %2414, %2417 +> %2419 = call float @llvm.sqrt.f32.180(float %2418) +> %2420 = fneg float %813 +> %2421 = fmul float %2419, %2420 +> %2422 = fmul float %2421, 0.000000e+00 +> %2423 = bitcast i32 %807 to float +> %2424 = fadd float %2423, %2422 +> %2425 = bitcast i32 %152 to float +> %2426 = bitcast i32 %152 to float +> %2427 = fmul float %2425, %2426 +> %2428 = fadd float %2427, 0.000000e+00 +> %2429 = bitcast i32 %807 to float +> %2430 = bitcast i32 %807 to float +> %2431 = fmul float %2429, %2430 +> %2432 = fadd float %2428, %2431 +> %2433 = call float @llvm.sqrt.f32.181(float %2432) +> %2434 = fneg float %813 +> %2435 = fmul float %2433, %2434 +> %2436 = bitcast i32 %152 to float +> %2437 = fadd float %2436, %2435 +> %2438 = bitcast i32 %152 to float +> %2439 = bitcast i32 %152 to float +> %2440 = fmul float %2438, %2439 +> %2441 = fadd float %2440, 0.000000e+00 +> %2442 = bitcast i32 %807 to float +> %2443 = bitcast i32 %807 to float +> %2444 = fmul float %2442, %2443 +> %2445 = fadd float %2441, %2444 +> %2446 = call float @llvm.sqrt.f32.182(float %2445) +> %2447 = fneg float %813 +> %2448 = fmul float %2446, %2447 +> %2449 = bitcast i32 %152 to float +> %2450 = fadd float %2449, %2448 +> %2451 = fmul float %2437, %2450 +> %2452 = fadd float %2451, 0.000000e+00 +> %2453 = bitcast i32 %152 to float +> %2454 = bitcast i32 %152 to float +> %2455 = fmul float %2453, %2454 +> %2456 = fadd float %2455, 0.000000e+00 +> %2457 = bitcast i32 %807 to float +> %2458 = bitcast i32 %807 to float +> %2459 = fmul float %2457, %2458 +> %2460 = fadd float %2456, %2459 +> %2461 = call float @llvm.sqrt.f32.183(float %2460) +> %2462 = fneg float %813 +> %2463 = fmul float %2461, %2462 +> %2464 = fmul float %2463, 0.000000e+00 +> %2465 = bitcast i32 %807 to float +> %2466 = fadd float %2465, %2464 +> %2467 = bitcast i32 %152 to float +> %2468 = bitcast i32 %152 to float +> %2469 = fmul float %2467, %2468 +> %2470 = fadd float %2469, 0.000000e+00 +> %2471 = bitcast i32 %807 to float +> %2472 = bitcast i32 %807 to float +> %2473 = fmul float %2471, %2472 +> %2474 = fadd float %2470, %2473 +> %2475 = call float @llvm.sqrt.f32.184(float %2474) +> %2476 = fneg float %813 +> %2477 = fmul float %2475, %2476 +> %2478 = fmul float %2477, 0.000000e+00 +> %2479 = bitcast i32 %807 to float +> %2480 = fadd float %2479, %2478 +> %2481 = fmul float %2466, %2480 +> %2482 = fadd float %2452, %2481 +> %2483 = call float @llvm.sqrt.f32.185(float %2482) +> %2484 = fadd float %2483, 0.000000e+00 +> %2485 = fdiv float %2424, %2484 +> %2486 = fmul float %2485, 2.000000e+00 +> %2487 = bitcast i32 %152 to float +> %2488 = bitcast i32 %152 to float +> %2489 = fmul float %2487, %2488 +> %2490 = fadd float %2489, 0.000000e+00 +> %2491 = bitcast i32 %807 to float +> %2492 = bitcast i32 %807 to float +> %2493 = fmul float %2491, %2492 +> %2494 = fadd float %2490, %2493 +> %2495 = call float @llvm.sqrt.f32.186(float %2494) +> %2496 = fneg float %813 +> %2497 = fmul float %2495, %2496 +> %2498 = bitcast i32 %152 to float +> %2499 = fadd float %2498, %2497 +> %2500 = bitcast i32 %152 to float +> %2501 = bitcast i32 %152 to float +> %2502 = fmul float %2500, %2501 +> %2503 = fadd float %2502, 0.000000e+00 +> %2504 = bitcast i32 %807 to float +> %2505 = bitcast i32 %807 to float +> %2506 = fmul float %2504, %2505 +> %2507 = fadd float %2503, %2506 +> %2508 = call float @llvm.sqrt.f32.187(float %2507) +> %2509 = fneg float %813 +> %2510 = fmul float %2508, %2509 +> %2511 = bitcast i32 %152 to float +> %2512 = fadd float %2511, %2510 +> %2513 = bitcast i32 %152 to float +> %2514 = bitcast i32 %152 to float +> %2515 = fmul float %2513, %2514 +> %2516 = fadd float %2515, 0.000000e+00 +> %2517 = bitcast i32 %807 to float +> %2518 = bitcast i32 %807 to float +> %2519 = fmul float %2517, %2518 +> %2520 = fadd float %2516, %2519 +> %2521 = call float @llvm.sqrt.f32.188(float %2520) +> %2522 = fneg float %813 +> %2523 = fmul float %2521, %2522 +> %2524 = bitcast i32 %152 to float +> %2525 = fadd float %2524, %2523 +> %2526 = fmul float %2512, %2525 +> %2527 = fadd float %2526, 0.000000e+00 +> %2528 = bitcast i32 %152 to float +> %2529 = bitcast i32 %152 to float +> %2530 = fmul float %2528, %2529 +> %2531 = fadd float %2530, 0.000000e+00 +> %2532 = bitcast i32 %807 to float +> %2533 = bitcast i32 %807 to float +> %2534 = fmul float %2532, %2533 +> %2535 = fadd float %2531, %2534 +> %2536 = call float @llvm.sqrt.f32.189(float %2535) +> %2537 = fneg float %813 +> %2538 = fmul float %2536, %2537 +> %2539 = fmul float %2538, 0.000000e+00 +> %2540 = bitcast i32 %807 to float +> %2541 = fadd float %2540, %2539 +> %2542 = bitcast i32 %152 to float +> %2543 = bitcast i32 %152 to float +> %2544 = fmul float %2542, %2543 +> %2545 = fadd float %2544, 0.000000e+00 +> %2546 = bitcast i32 %807 to float +> %2547 = bitcast i32 %807 to float +> %2548 = fmul float %2546, %2547 +> %2549 = fadd float %2545, %2548 +> %2550 = call float @llvm.sqrt.f32.190(float %2549) +> %2551 = fneg float %813 +> %2552 = fmul float %2550, %2551 +> %2553 = fmul float %2552, 0.000000e+00 +> %2554 = bitcast i32 %807 to float +> %2555 = fadd float %2554, %2553 +> %2556 = fmul float %2541, %2555 +> %2557 = fadd float %2527, %2556 +> %2558 = call float @llvm.sqrt.f32.191(float %2557) +> %2559 = fadd float %2558, 0.000000e+00 +> %2560 = fdiv float %2499, %2559 +> %2561 = fmul float %2486, %2560 +> %2562 = fneg float %2561 +> %2563 = fmul float %2562, %2404 +> %2564 = fadd float %2563, 0.000000e+00 +> %2565 = bitcast i32 %152 to float +> %2566 = bitcast i32 %152 to float +> %2567 = fmul float %2565, %2566 +> %2568 = fadd float %2567, 0.000000e+00 +> %2569 = bitcast i32 %807 to float +> %2570 = bitcast i32 %807 to float +> %2571 = fmul float %2569, %2570 +> %2572 = fadd float %2568, %2571 +> %2573 = call float @llvm.sqrt.f32.192(float %2572) +> %2574 = fneg float %813 +> %2575 = fmul float %2573, %2574 +> %2576 = fmul float %2575, 0.000000e+00 +> %2577 = bitcast i32 %807 to float +> %2578 = fadd float %2577, %2576 +> %2579 = bitcast i32 %152 to float +> %2580 = bitcast i32 %152 to float +> %2581 = fmul float %2579, %2580 +> %2582 = fadd float %2581, 0.000000e+00 +> %2583 = bitcast i32 %807 to float +> %2584 = bitcast i32 %807 to float +> %2585 = fmul float %2583, %2584 +> %2586 = fadd float %2582, %2585 +> %2587 = call float @llvm.sqrt.f32.193(float %2586) +> %2588 = fneg float %813 +> %2589 = fmul float %2587, %2588 +> %2590 = bitcast i32 %152 to float +> %2591 = fadd float %2590, %2589 +> %2592 = bitcast i32 %152 to float +> %2593 = bitcast i32 %152 to float +> %2594 = fmul float %2592, %2593 +> %2595 = fadd float %2594, 0.000000e+00 +> %2596 = bitcast i32 %807 to float +> %2597 = bitcast i32 %807 to float +> %2598 = fmul float %2596, %2597 +> %2599 = fadd float %2595, %2598 +> %2600 = call float @llvm.sqrt.f32.194(float %2599) +> %2601 = fneg float %813 +> %2602 = fmul float %2600, %2601 +> %2603 = bitcast i32 %152 to float +> %2604 = fadd float %2603, %2602 +> %2605 = fmul float %2591, %2604 +> %2606 = fadd float %2605, 0.000000e+00 +> %2607 = bitcast i32 %152 to float +> %2608 = bitcast i32 %152 to float +> %2609 = fmul float %2607, %2608 +> %2610 = fadd float %2609, 0.000000e+00 +> %2611 = bitcast i32 %807 to float +> %2612 = bitcast i32 %807 to float +> %2613 = fmul float %2611, %2612 +> %2614 = fadd float %2610, %2613 +> %2615 = call float @llvm.sqrt.f32.195(float %2614) +> %2616 = fneg float %813 +> %2617 = fmul float %2615, %2616 +> %2618 = fmul float %2617, 0.000000e+00 +> %2619 = bitcast i32 %807 to float +> %2620 = fadd float %2619, %2618 +> %2621 = bitcast i32 %152 to float +> %2622 = bitcast i32 %152 to float +> %2623 = fmul float %2621, %2622 +> %2624 = fadd float %2623, 0.000000e+00 +> %2625 = bitcast i32 %807 to float +> %2626 = bitcast i32 %807 to float +> %2627 = fmul float %2625, %2626 +> %2628 = fadd float %2624, %2627 +> %2629 = call float @llvm.sqrt.f32.196(float %2628) +> %2630 = fneg float %813 +> %2631 = fmul float %2629, %2630 +> %2632 = fmul float %2631, 0.000000e+00 +> %2633 = bitcast i32 %807 to float +> %2634 = fadd float %2633, %2632 +> %2635 = fmul float %2620, %2634 +> %2636 = fadd float %2606, %2635 +> %2637 = call float @llvm.sqrt.f32.197(float %2636) +> %2638 = fadd float %2637, 0.000000e+00 +> %2639 = fdiv float %2578, %2638 +> %2640 = fmul float %2639, 2.000000e+00 +> %2641 = bitcast i32 %152 to float +> %2642 = bitcast i32 %152 to float +> %2643 = fmul float %2641, %2642 +> %2644 = fadd float %2643, 0.000000e+00 +> %2645 = bitcast i32 %807 to float +> %2646 = bitcast i32 %807 to float +> %2647 = fmul float %2645, %2646 +> %2648 = fadd float %2644, %2647 +> %2649 = call float @llvm.sqrt.f32.198(float %2648) +> %2650 = fneg float %813 +> %2651 = fmul float %2649, %2650 +> %2652 = fmul float %2651, 0.000000e+00 +> %2653 = bitcast i32 %807 to float +> %2654 = fadd float %2653, %2652 +> %2655 = bitcast i32 %152 to float +> %2656 = bitcast i32 %152 to float +> %2657 = fmul float %2655, %2656 +> %2658 = fadd float %2657, 0.000000e+00 +> %2659 = bitcast i32 %807 to float +> %2660 = bitcast i32 %807 to float +> %2661 = fmul float %2659, %2660 +> %2662 = fadd float %2658, %2661 +> %2663 = call float @llvm.sqrt.f32.199(float %2662) +> %2664 = fneg float %813 +> %2665 = fmul float %2663, %2664 +> %2666 = bitcast i32 %152 to float +> %2667 = fadd float %2666, %2665 +> %2668 = bitcast i32 %152 to float +> %2669 = bitcast i32 %152 to float +> %2670 = fmul float %2668, %2669 +> %2671 = fadd float %2670, 0.000000e+00 +> %2672 = bitcast i32 %807 to float +> %2673 = bitcast i32 %807 to float +> %2674 = fmul float %2672, %2673 +> %2675 = fadd float %2671, %2674 +> %2676 = call float @llvm.sqrt.f32.200(float %2675) +> %2677 = fneg float %813 +> %2678 = fmul float %2676, %2677 +> %2679 = bitcast i32 %152 to float +> %2680 = fadd float %2679, %2678 +> %2681 = fmul float %2667, %2680 +> %2682 = fadd float %2681, 0.000000e+00 +> %2683 = bitcast i32 %152 to float +> %2684 = bitcast i32 %152 to float +> %2685 = fmul float %2683, %2684 +> %2686 = fadd float %2685, 0.000000e+00 +> %2687 = bitcast i32 %807 to float +> %2688 = bitcast i32 %807 to float +> %2689 = fmul float %2687, %2688 +> %2690 = fadd float %2686, %2689 +> %2691 = call float @llvm.sqrt.f32.201(float %2690) +> %2692 = fneg float %813 +> %2693 = fmul float %2691, %2692 +> %2694 = fmul float %2693, 0.000000e+00 +> %2695 = bitcast i32 %807 to float +> %2696 = fadd float %2695, %2694 +> %2697 = bitcast i32 %152 to float +> %2698 = bitcast i32 %152 to float +> %2699 = fmul float %2697, %2698 +> %2700 = fadd float %2699, 0.000000e+00 +> %2701 = bitcast i32 %807 to float +> %2702 = bitcast i32 %807 to float +> %2703 = fmul float %2701, %2702 +> %2704 = fadd float %2700, %2703 +> %2705 = call float @llvm.sqrt.f32.202(float %2704) +> %2706 = fneg float %813 +> %2707 = fmul float %2705, %2706 +> %2708 = fmul float %2707, 0.000000e+00 +> %2709 = bitcast i32 %807 to float +> %2710 = fadd float %2709, %2708 +> %2711 = fmul float %2696, %2710 +> %2712 = fadd float %2682, %2711 +> %2713 = call float @llvm.sqrt.f32.203(float %2712) +> %2714 = fadd float %2713, 0.000000e+00 +> %2715 = fdiv float %2654, %2714 +> %2716 = fmul float %2640, %2715 +> %2717 = fsub float 1.000000e+00, %2716 +> %2718 = load float, float* %144, align 4 +> %2719 = fmul float %2717, %2718 +> %2720 = fadd float %2564, %2719 +> %2721 = insertelement <4 x float> zeroinitializer, float %2720, i32 0 +> %2722 = insertelement <4 x float> %2721, float 0.000000e+00, i32 1 +> %2723 = insertelement <4 x float> %2722, float 0.000000e+00, i32 2 +> %2724 = insertelement <4 x float> %2723, float 0.000000e+00, i32 3 +> %2725 = extractelement <4 x float> %2724, i32 0 +> store float %2725, float* %2247, align 4 +> %2726 = getelementptr float, float* %1, i32 0 +> %2727 = getelementptr inbounds float, float* %2726, i64 2 +> %2728 = bitcast float* %2727 to i32* +> %2729 = load i32, i32* %2728, align 4 +> %2730 = bitcast i32 %2729 to float +> %2731 = insertelement <4 x float> zeroinitializer, float %2730, i32 0 +> %2732 = getelementptr float, float* %1, i32 0 +> %2733 = getelementptr inbounds float, float* %2732, i64 1 +> %2734 = bitcast float* %2733 to i32* +> %2735 = load i32, i32* %2734, align 4 +> %2736 = bitcast i32 %2735 to float +> %2737 = insertelement <4 x float> %2731, float %2736, i32 1 +> %2738 = insertelement <4 x float> %2737, float 0.000000e+00, i32 2 +> %2739 = insertelement <4 x float> %2738, float 0.000000e+00, i32 3 +> %2740 = extractelement <4 x float> %2739, i32 0 +> %2741 = bitcast i32* %95 to float* +> %2742 = bitcast i32* %2734 to float* +> store float %2740, float* %2742, align 4 +> %2743 = extractelement <4 x float> %2739, i32 1 +> %2744 = bitcast i32* %98 to float* +> %2745 = bitcast i32* %2728 to float* +> store float %2743, float* %2745, align 4 diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/diff-final.txt b/src/dios-egraphs/Diospyros/flaky-outputs/diff-final.txt new file mode 100644 index 00000000..6f27b438 --- /dev/null +++ b/src/dios-egraphs/Diospyros/flaky-outputs/diff-final.txt @@ -0,0 +1 @@ +Binary files build/final and flaky-outputs/flaky-final differ diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/diff-opt.txt b/src/dios-egraphs/Diospyros/flaky-outputs/diff-opt.txt new file mode 100644 index 00000000..e69de29b diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/flaky-aa.ll b/src/dios-egraphs/Diospyros/flaky-outputs/flaky-aa.ll new file mode 100644 index 00000000..13a36f92 --- /dev/null +++ b/src/dios-egraphs/Diospyros/flaky-outputs/flaky-aa.ll @@ -0,0 +1,828 @@ +; ModuleID = 'build/opt.ll' +source_filename = "fail-tests/qr-decomp-local-arrays.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@.str = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 +@.str.1 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 +@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 +@.str.2 = private unnamed_addr constant [36 x i8] c"fail-tests/qr-decomp-local-arrays.c\00", align 1 +@.str.3 = private unnamed_addr constant [34 x i8] c"fabs(expectedQ[i] - Q[i]) < DELTA\00", align 1 +@.str.4 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 +@.str.5 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 +@.str.6 = private unnamed_addr constant [34 x i8] c"fabs(expectedR[i] - R[i]) < DELTA\00", align 1 + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @sgn(float %0) #0 { + %2 = fcmp ogt float %0, 0.000000e+00 + %3 = zext i1 %2 to i32 + %4 = fcmp olt float %0, 0.000000e+00 + %.neg = sext i1 %4 to i32 + %5 = add nsw i32 %.neg, %3 + %6 = sitofp i32 %5 to float + ret float %6 +} + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_sgn(float %0) #1 { + %2 = fcmp ogt float %0, 0.000000e+00 + %3 = zext i1 %2 to i32 + %4 = fcmp olt float %0, 0.000000e+00 + %.neg = sext i1 %4 to i32 + %5 = add nsw i32 %.neg, %3 + %6 = sitofp i32 %5 to float + ret float %6 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @naive_norm(float* %0, i32 %1) #0 { + %3 = icmp sgt i32 %1, 0 + %smax = select i1 %3, i32 %1, i32 0 + %wide.trip.count = zext i32 %smax to i64 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %2 + %4 = add nsw i64 %wide.trip.count, -1 + %xtraiter = and i64 %wide.trip.count, 3 + %5 = icmp ult i64 %4, 3 + br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count, 2147483644 + br label %6 + +6: ; preds = %6, %.lr.ph.new + %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] + %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 + %8 = load float, float* %7, align 4 + %9 = fmul float %8, %8 + %10 = fadd float %.013, %9 + %indvars.iv.next = or i64 %indvars.iv2, 1 + %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next + %12 = load float, float* %11, align 4 + %13 = fmul float %12, %12 + %14 = fadd float %10, %13 + %indvars.iv.next.1 = or i64 %indvars.iv2, 2 + %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 + %16 = load float, float* %15, align 4 + %17 = fmul float %16, %16 + %18 = fadd float %14, %17 + %indvars.iv.next.2 = or i64 %indvars.iv2, 3 + %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 + %20 = load float, float* %19, align 4 + %21 = fmul float %20, %20 + %22 = fadd float %18, %21 + %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 + %niter.nsub.3 = add i64 %niter, -4 + %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 + br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 + +._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] + %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] + %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] + %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil + %24 = load float, float* %23, align 4 + %25 = fmul float %24, %24 + %26 = fadd float %.013.epil, %25 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !3 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 + %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] + %27 = call float @llvm.sqrt.f32(float %.01.lcssa) + ret float %27 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32(float) #2 + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_naive_norm(float* %0, i32 %1) #1 { + %3 = icmp sgt i32 %1, 0 + %smax = select i1 %3, i32 %1, i32 0 + %wide.trip.count = zext i32 %smax to i64 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %2 + %4 = add nsw i64 %wide.trip.count, -1 + %xtraiter = and i64 %wide.trip.count, 3 + %5 = icmp ult i64 %4, 3 + br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count, 2147483644 + br label %6 + +6: ; preds = %6, %.lr.ph.new + %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] + %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 + %8 = load float, float* %7, align 4 + %9 = fmul float %8, %8 + %10 = fadd float %.013, %9 + %indvars.iv.next = or i64 %indvars.iv2, 1 + %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next + %12 = load float, float* %11, align 4 + %13 = fmul float %12, %12 + %14 = fadd float %10, %13 + %indvars.iv.next.1 = or i64 %indvars.iv2, 2 + %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 + %16 = load float, float* %15, align 4 + %17 = fmul float %16, %16 + %18 = fadd float %14, %17 + %indvars.iv.next.2 = or i64 %indvars.iv2, 3 + %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 + %20 = load float, float* %19, align 4 + %21 = fmul float %20, %20 + %22 = fadd float %18, %21 + %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 + %niter.nsub.3 = add i64 %niter, -4 + %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 + br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 + +._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] + %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] + %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] + %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil + %24 = load float, float* %23, align 4 + %25 = fmul float %24, %24 + %26 = fadd float %.013.epil, %25 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 + %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] + %27 = call float @llvm.sqrt.f32(float %.01.lcssa) + ret float %27 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_transpose(float* %0) #0 { +.lr.ph: + %1 = getelementptr inbounds float, float* %0, i64 1 + %2 = bitcast float* %1 to i32* + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds float, float* %0, i64 2 + %5 = bitcast float* %4 to i32* + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %2, align 4 + store i32 %3, i32* %5, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_transpose(float* %0) #1 { +.lr.ph: + %1 = getelementptr inbounds float, float* %0, i64 1 + %2 = bitcast float* %1 to i32* + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds float, float* %0, i64 2 + %5 = bitcast float* %4 to i32* + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %2, align 4 + store i32 %3, i32* %5, align 4 + ret void +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { +.preheader: + store float 0.000000e+00, float* %2, align 4 + %3 = load float, float* %0, align 4 + %4 = load float, float* %1, align 4 + %5 = fmul float %3, %4 + %6 = fadd float %5, 0.000000e+00 + store float %6, float* %2, align 4 + %7 = getelementptr inbounds float, float* %0, i64 1 + %8 = load float, float* %7, align 4 + %9 = getelementptr inbounds float, float* %1, i64 2 + %10 = load float, float* %9, align 4 + %11 = fmul float %8, %10 + %12 = fadd float %6, %11 + store float %12, float* %2, align 4 + %13 = getelementptr inbounds float, float* %2, i64 1 + store float 0.000000e+00, float* %13, align 4 + %14 = load float, float* %0, align 4 + %15 = getelementptr inbounds float, float* %1, i64 1 + %16 = load float, float* %15, align 4 + %17 = fmul float %14, %16 + %18 = fadd float %17, 0.000000e+00 + store float %18, float* %13, align 4 + %19 = load float, float* %7, align 4 + %20 = getelementptr inbounds float, float* %1, i64 3 + %21 = load float, float* %20, align 4 + %22 = fmul float %19, %21 + %23 = fadd float %18, %22 + store float %23, float* %13, align 4 + %24 = getelementptr inbounds float, float* %0, i64 2 + %25 = getelementptr inbounds float, float* %2, i64 2 + store float 0.000000e+00, float* %25, align 4 + %26 = load float, float* %24, align 4 + %27 = load float, float* %1, align 4 + %28 = fmul float %26, %27 + %29 = fadd float %28, 0.000000e+00 + store float %29, float* %25, align 4 + %30 = getelementptr inbounds float, float* %0, i64 3 + %31 = load float, float* %30, align 4 + %32 = load float, float* %9, align 4 + %33 = fmul float %31, %32 + %34 = fadd float %29, %33 + store float %34, float* %25, align 4 + %35 = getelementptr inbounds float, float* %2, i64 3 + store float 0.000000e+00, float* %35, align 4 + %36 = load float, float* %24, align 4 + %37 = load float, float* %15, align 4 + %38 = fmul float %36, %37 + %39 = fadd float %38, 0.000000e+00 + store float %39, float* %35, align 4 + %40 = load float, float* %30, align 4 + %41 = load float, float* %20, align 4 + %42 = fmul float %40, %41 + %43 = fadd float %39, %42 + store float %43, float* %35, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { +.preheader: + store float 0.000000e+00, float* %2, align 4 + %3 = load float, float* %0, align 4 + %4 = load float, float* %1, align 4 + %5 = fmul float %3, %4 + %6 = fadd float %5, 0.000000e+00 + store float %6, float* %2, align 4 + %7 = getelementptr inbounds float, float* %0, i64 1 + %8 = load float, float* %7, align 4 + %9 = getelementptr inbounds float, float* %1, i64 2 + %10 = load float, float* %9, align 4 + %11 = fmul float %8, %10 + %12 = fadd float %6, %11 + store float %12, float* %2, align 4 + %13 = getelementptr inbounds float, float* %2, i64 1 + store float 0.000000e+00, float* %13, align 4 + %14 = load float, float* %0, align 4 + %15 = getelementptr inbounds float, float* %1, i64 1 + %16 = load float, float* %15, align 4 + %17 = fmul float %14, %16 + %18 = fadd float %17, 0.000000e+00 + store float %18, float* %13, align 4 + %19 = load float, float* %7, align 4 + %20 = getelementptr inbounds float, float* %1, i64 3 + %21 = load float, float* %20, align 4 + %22 = fmul float %19, %21 + %23 = fadd float %18, %22 + store float %23, float* %13, align 4 + %24 = getelementptr inbounds float, float* %0, i64 2 + %25 = getelementptr inbounds float, float* %2, i64 2 + store float 0.000000e+00, float* %25, align 4 + %26 = load float, float* %24, align 4 + %27 = load float, float* %1, align 4 + %28 = fmul float %26, %27 + %29 = fadd float %28, 0.000000e+00 + store float %29, float* %25, align 4 + %30 = getelementptr inbounds float, float* %0, i64 3 + %31 = load float, float* %30, align 4 + %32 = load float, float* %9, align 4 + %33 = fmul float %31, %32 + %34 = fadd float %29, %33 + store float %34, float* %25, align 4 + %35 = getelementptr inbounds float, float* %2, i64 3 + store float 0.000000e+00, float* %35, align 4 + %36 = load float, float* %24, align 4 + %37 = load float, float* %15, align 4 + %38 = fmul float %36, %37 + %39 = fadd float %38, 0.000000e+00 + store float %39, float* %35, align 4 + %40 = load float, float* %30, align 4 + %41 = load float, float* %20, align 4 + %42 = fmul float %40, %41 + %43 = fadd float %39, %42 + store float %43, float* %35, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { +.preheader49: + %3 = bitcast float* %1 to i8* + %4 = alloca [4 x float], align 16 + %5 = bitcast [4 x float]* %4 to i8* + %6 = bitcast float* %0 to i32* + %7 = load i32, i32* %6, align 4 + %8 = bitcast float* %2 to i32* + store i32 %7, i32* %8, align 4 + %9 = getelementptr inbounds float, float* %0, i64 1 + %10 = bitcast float* %9 to i32* + %11 = load i32, i32* %10, align 4 + %12 = getelementptr inbounds float, float* %2, i64 1 + %13 = bitcast float* %12 to i32* + store i32 %11, i32* %13, align 4 + %14 = getelementptr inbounds float, float* %0, i64 2 + %15 = bitcast float* %14 to i32* + %16 = load i32, i32* %15, align 4 + %17 = getelementptr inbounds float, float* %2, i64 2 + %18 = bitcast float* %17 to i32* + store i32 %16, i32* %18, align 4 + %19 = getelementptr inbounds float, float* %0, i64 3 + %20 = bitcast float* %19 to i32* + %21 = load i32, i32* %20, align 4 + %22 = getelementptr inbounds float, float* %2, i64 3 + %23 = bitcast float* %22 to i32* + store i32 %21, i32* %23, align 4 + %24 = bitcast i32 %7 to float + %25 = fcmp ogt float %24, 0.000000e+00 + %26 = zext i1 %25 to i32 + %27 = fcmp olt float %24, 0.000000e+00 + %.neg = sext i1 %27 to i32 + %28 = add nsw i32 %.neg, %26 + %29 = sitofp i32 %28 to float + %30 = fmul float %24, %24 + %31 = fadd float %30, 0.000000e+00 + %32 = bitcast i32 %16 to float + %33 = fmul float %32, %32 + %34 = fadd float %31, %33 + %35 = call float @llvm.sqrt.f32(float %34) #9 + %36 = fneg float %29 + %37 = fmul float %35, %36 + %38 = fadd float %24, %37 + %39 = fmul float %37, 0.000000e+00 + %40 = fadd float %32, %39 + %41 = fmul float %38, %38 + %42 = fadd float %41, 0.000000e+00 + %43 = fmul float %40, %40 + %44 = fadd float %42, %43 + %45 = call float @llvm.sqrt.f32(float %44) #9 + %46 = fadd float %45, 0x3EE4F8B580000000 + %47 = fdiv float %38, %46 + %48 = fdiv float %40, %46 + %49 = fmul float %47, 2.000000e+00 + %50 = fmul float %49, %47 + %51 = fsub float 1.000000e+00, %50 + %52 = fmul float %49, %48 + %53 = fsub float 0.000000e+00, %52 + %54 = fmul float %48, 2.000000e+00 + %55 = fmul float %54, %47 + %56 = fsub float 0.000000e+00, %55 + %57 = fmul float %54, %48 + %58 = fsub float 1.000000e+00, %57 + %59 = bitcast float %51 to i32 + %60 = bitcast [4 x float]* %4 to i32* + store i32 %59, i32* %60, align 16 + %61 = bitcast float %53 to i32 + %62 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 + %63 = bitcast float* %62 to i32* + store i32 %61, i32* %63, align 4 + %64 = bitcast float %56 to i32 + %65 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 + %66 = bitcast float* %65 to i32* + store i32 %64, i32* %66, align 8 + %67 = bitcast float %58 to i32 + %68 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 + %69 = bitcast float* %68 to i32* + store i32 %67, i32* %69, align 4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %3, i8* nonnull align 16 dereferenceable(16) %5, i64 16, i1 false) + store float 0.000000e+00, float* %2, align 4 + %70 = load float, float* %0, align 4 + %71 = fmul float %51, %70 + %72 = fadd float %71, 0.000000e+00 + store float %72, float* %2, align 4 + %73 = load float, float* %14, align 4 + %74 = fmul float %53, %73 + %75 = fadd float %72, %74 + store float %75, float* %2, align 4 + store float 0.000000e+00, float* %12, align 4 + %76 = load float, float* %9, align 4 + %77 = fmul float %51, %76 + %78 = fadd float %77, 0.000000e+00 + store float %78, float* %12, align 4 + %79 = load float, float* %19, align 4 + %80 = fmul float %53, %79 + %81 = fadd float %78, %80 + store float %81, float* %12, align 4 + store float 0.000000e+00, float* %17, align 4 + %82 = load float, float* %0, align 4 + %83 = fmul float %56, %82 + %84 = fadd float %83, 0.000000e+00 + store float %84, float* %17, align 4 + %85 = load float, float* %14, align 4 + %86 = fmul float %58, %85 + %87 = fadd float %84, %86 + store float %87, float* %17, align 4 + store float 0.000000e+00, float* %22, align 4 + %88 = load float, float* %9, align 4 + %89 = fmul float %56, %88 + %90 = fadd float %89, 0.000000e+00 + store float %90, float* %22, align 4 + %91 = load float, float* %19, align 4 + %92 = fmul float %58, %91 + %93 = fadd float %90, %92 + store float %93, float* %22, align 4 + %94 = getelementptr inbounds float, float* %1, i64 1 + %95 = bitcast float* %94 to i32* + %96 = load i32, i32* %95, align 4 + %97 = getelementptr inbounds float, float* %1, i64 2 + %98 = bitcast float* %97 to i32* + %99 = load i32, i32* %98, align 4 + store i32 %99, i32* %95, align 4 + store i32 %96, i32* %98, align 4 + ret void +} + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { +.preheader13: + %3 = bitcast float* %2 to i8* + %4 = bitcast float* %0 to i8* + %5 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false, i1 true, i1 false) + %6 = call i8* @__memcpy_chk(i8* %3, i8* %4, i64 16, i64 %5) #9 + %7 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 + %8 = bitcast i8* %7 to float* + store float 1.000000e+00, float* %8, align 4 + %9 = getelementptr inbounds i8, i8* %7, i64 8 + %10 = getelementptr inbounds i8, i8* %7, i64 12 + %11 = bitcast i8* %10 to float* + store float 1.000000e+00, float* %11, align 4 + %12 = bitcast float* %1 to i8* + %13 = call i64 @llvm.objectsize.i64.p0i8(i8* %12, i1 false, i1 true, i1 false) + %14 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 + %15 = bitcast i8* %14 to float* + %16 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 + %17 = bitcast i8* %16 to float* + %18 = bitcast float* %2 to i32* + %19 = load i32, i32* %18, align 4 + %20 = bitcast i8* %14 to i32* + store i32 %19, i32* %20, align 4 + %21 = bitcast i8* %7 to i32* + %22 = load i32, i32* %21, align 4 + %23 = bitcast i8* %16 to i32* + store i32 %22, i32* %23, align 4 + %24 = getelementptr inbounds float, float* %2, i64 2 + %25 = bitcast float* %24 to i32* + %26 = load i32, i32* %25, align 4 + %27 = getelementptr inbounds i8, i8* %14, i64 4 + %28 = bitcast i8* %27 to i32* + store i32 %26, i32* %28, align 4 + %29 = bitcast i8* %9 to i32* + %30 = load i32, i32* %29, align 4 + %31 = getelementptr inbounds i8, i8* %16, i64 4 + %32 = bitcast i8* %31 to i32* + store i32 %30, i32* %32, align 4 + %33 = load float, float* %15, align 4 + %34 = call float @no_opt_sgn(float %33) + %35 = fneg float %34 + %36 = call float @no_opt_naive_norm(float* nonnull %15, i32 2) + %37 = fmul float %36, %35 + %38 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 + %39 = bitcast i8* %38 to float* + %40 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 + %41 = load float, float* %15, align 4 + %42 = load float, float* %17, align 4 + %43 = fmul float %37, %42 + %44 = fadd float %41, %43 + store float %44, float* %39, align 4 + %45 = bitcast i8* %27 to float* + %46 = load float, float* %45, align 4 + %47 = bitcast i8* %31 to float* + %48 = load float, float* %47, align 4 + %49 = fmul float %37, %48 + %50 = fadd float %46, %49 + %51 = getelementptr inbounds i8, i8* %38, i64 4 + %52 = bitcast i8* %51 to float* + store float %50, float* %52, align 4 + %53 = bitcast i8* %40 to float* + %54 = call float @no_opt_naive_norm(float* nonnull %39, i32 2) + %55 = fadd float %54, 0x3EE4F8B580000000 + %56 = load float, float* %39, align 4 + %57 = fdiv float %56, %55 + store float %57, float* %53, align 4 + %58 = load float, float* %52, align 4 + %59 = fdiv float %58, %55 + %60 = getelementptr inbounds i8, i8* %40, i64 4 + %61 = bitcast i8* %60 to float* + store float %59, float* %61, align 4 + %62 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 + %63 = bitcast i8* %62 to float* + %64 = load float, float* %53, align 4 + %65 = fmul float %64, 2.000000e+00 + %66 = fmul float %65, %64 + %67 = fsub float 1.000000e+00, %66 + store float %67, float* %63, align 4 + %68 = load float, float* %53, align 4 + %69 = fmul float %68, 2.000000e+00 + %70 = load float, float* %61, align 4 + %71 = fmul float %69, %70 + %72 = fsub float 0.000000e+00, %71 + %73 = getelementptr inbounds i8, i8* %62, i64 4 + %74 = bitcast i8* %73 to float* + store float %72, float* %74, align 4 + %75 = load float, float* %61, align 4 + %76 = fmul float %75, 2.000000e+00 + %77 = load float, float* %53, align 4 + %78 = fmul float %76, %77 + %79 = fsub float 0.000000e+00, %78 + %80 = getelementptr inbounds i8, i8* %62, i64 8 + %81 = bitcast i8* %80 to float* + store float %79, float* %81, align 4 + %82 = load float, float* %61, align 4 + %83 = fmul float %82, 2.000000e+00 + %84 = fmul float %83, %82 + %85 = fsub float 1.000000e+00, %84 + %86 = getelementptr inbounds i8, i8* %62, i64 12 + %87 = bitcast i8* %86 to float* + store float %85, float* %87, align 4 + %88 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 + %89 = bitcast i8* %88 to float* + %90 = bitcast i8* %62 to i32* + %91 = load i32, i32* %90, align 4 + %92 = bitcast i8* %88 to i32* + store i32 %91, i32* %92, align 4 + %93 = bitcast i8* %73 to i32* + %94 = load i32, i32* %93, align 4 + %95 = getelementptr inbounds i8, i8* %88, i64 4 + %96 = bitcast i8* %95 to i32* + store i32 %94, i32* %96, align 4 + %97 = bitcast i8* %80 to i32* + %98 = load i32, i32* %97, align 4 + %99 = getelementptr inbounds i8, i8* %88, i64 8 + %100 = bitcast i8* %99 to i32* + store i32 %98, i32* %100, align 4 + %101 = bitcast i8* %86 to i32* + %102 = load i32, i32* %101, align 4 + %103 = getelementptr inbounds i8, i8* %88, i64 12 + %104 = bitcast i8* %103 to i32* + store i32 %102, i32* %104, align 4 + %105 = call i8* @__memcpy_chk(i8* %12, i8* %88, i64 16, i64 %13) #9 + call void @no_opt_naive_fixed_matrix_multiply(float* %89, float* %0, float* %2) + call void @free(i8* %14) + call void @free(i8* %16) + call void @free(i8* %38) + call void @free(i8* %40) + call void @free(i8* %62) + call void @free(i8* %88) + call void @no_opt_naive_fixed_transpose(float* %1) + ret void +} + +; Function Attrs: nounwind +declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 + +; Function Attrs: nounwind readnone speculatable willreturn +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 + +; Function Attrs: allocsize(0,1) +declare i8* @calloc(i64, i64) #5 + +declare void @free(i8*) #6 + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #1 { +.preheader6: + %0 = alloca i64, align 8 + %1 = alloca [4 x float], align 16 + %2 = alloca [4 x float], align 16 + %3 = alloca [4 x float], align 16 + %4 = alloca [4 x float], align 16 + %5 = alloca [4 x float], align 16 + %6 = call i64 @time(i64* null) #9 + store i64 %6, i64* %0, align 8 + %7 = call i64 @time(i64* nonnull %0) #9 + %8 = trunc i64 %7 to i32 + call void @srand(i32 %8) #9 + %9 = call i32 @rand() #9 + %10 = sitofp i32 %9 to float + %11 = fdiv float %10, 0x41747AE140000000 + %12 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 + store float %11, float* %12, align 16 + %13 = call i32 @rand() #9 + %14 = sitofp i32 %13 to float + %15 = fdiv float %14, 0x41747AE140000000 + %16 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1 + store float %15, float* %16, align 4 + %17 = call i32 @rand() #9 + %18 = sitofp i32 %17 to float + %19 = fdiv float %18, 0x41747AE140000000 + %20 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2 + store float %19, float* %20, align 8 + %21 = call i32 @rand() #9 + %22 = sitofp i32 %21 to float + %23 = fdiv float %22, 0x41747AE140000000 + %24 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3 + store float %23, float* %24, align 4 + %25 = bitcast [4 x float]* %2 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %25, i8 0, i64 16, i1 false) + %26 = bitcast [4 x float]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %26, i8 0, i64 16, i1 false) + %27 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 + %28 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 + call void @naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %27, float* nonnull %28) + %29 = bitcast [4 x float]* %4 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %29, i8 0, i64 16, i1 false) + %30 = bitcast [4 x float]* %5 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %30, i8 0, i64 16, i1 false) + %31 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 + %32 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 + call void @no_opt_naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %31, float* nonnull %32) + %33 = load float, float* %27, align 16 + %34 = fpext float %33 to double + %35 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %34) #9 + %36 = load float, float* %31, align 16 + %37 = fpext float %36 to double + %38 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %37) #9 + %39 = load float, float* %31, align 16 + %40 = load float, float* %27, align 16 + %41 = fsub float %39, %40 + %42 = call float @llvm.fabs.f32(float %41) + %43 = fcmp uge float %42, 0x3FB99999A0000000 + br i1 %43, label %58, label %44 + +44: ; preds = %.preheader6 + %45 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1 + %46 = load float, float* %45, align 4 + %47 = fpext float %46 to double + %48 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %47) #9 + %49 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 + %50 = load float, float* %49, align 4 + %51 = fpext float %50 to double + %52 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %51) #9 + %53 = load float, float* %31, align 16 + %54 = load float, float* %27, align 16 + %55 = fsub float %53, %54 + %56 = call float @llvm.fabs.f32(float %55) + %57 = fcmp uge float %56, 0x3FB99999A0000000 + br i1 %57, label %58, label %.preheader6.1 + +58: ; preds = %115, %.preheader6.1, %44, %.preheader6 + call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.2, i64 0, i64 0), i32 300, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.3, i64 0, i64 0)) #11 + unreachable + +59: ; preds = %.preheader5 + %60 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1 + %61 = load float, float* %60, align 4 + %62 = fpext float %61 to double + %63 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %62) #9 + %64 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 1 + %65 = load float, float* %64, align 4 + %66 = fpext float %65 to double + %67 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %66) #9 + %68 = load float, float* %32, align 16 + %69 = load float, float* %28, align 16 + %70 = fsub float %68, %69 + %71 = call float @llvm.fabs.f32(float %70) + %72 = fcmp uge float %71, 0x3FB99999A0000000 + br i1 %72, label %73, label %.preheader.1 + +73: ; preds = %.preheader5, %87, %.preheader.1, %59 + call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.2, i64 0, i64 0), i32 307, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.6, i64 0, i64 0)) #11 + unreachable + +.preheader.1: ; preds = %59 + %74 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2 + %75 = load float, float* %74, align 8 + %76 = fpext float %75 to double + %77 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %76) #9 + %78 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 2 + %79 = load float, float* %78, align 8 + %80 = fpext float %79 to double + %81 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %80) #9 + %82 = load float, float* %64, align 4 + %83 = load float, float* %60, align 4 + %84 = fsub float %82, %83 + %85 = call float @llvm.fabs.f32(float %84) + %86 = fcmp uge float %85, 0x3FB99999A0000000 + br i1 %86, label %73, label %87 + +87: ; preds = %.preheader.1 + %88 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3 + %89 = load float, float* %88, align 4 + %90 = fpext float %89 to double + %91 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %90) #9 + %92 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 3 + %93 = load float, float* %92, align 4 + %94 = fpext float %93 to double + %95 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %94) #9 + %96 = load float, float* %64, align 4 + %97 = load float, float* %60, align 4 + %98 = fsub float %96, %97 + %99 = call float @llvm.fabs.f32(float %98) + %100 = fcmp uge float %99, 0x3FB99999A0000000 + br i1 %100, label %73, label %101 + +101: ; preds = %87 + ret i32 0 + +.preheader6.1: ; preds = %44 + %102 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2 + %103 = load float, float* %102, align 8 + %104 = fpext float %103 to double + %105 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %104) #9 + %106 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 + %107 = load float, float* %106, align 8 + %108 = fpext float %107 to double + %109 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %108) #9 + %110 = load float, float* %49, align 4 + %111 = load float, float* %45, align 4 + %112 = fsub float %110, %111 + %113 = call float @llvm.fabs.f32(float %112) + %114 = fcmp uge float %113, 0x3FB99999A0000000 + br i1 %114, label %58, label %115 + +115: ; preds = %.preheader6.1 + %116 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3 + %117 = load float, float* %116, align 4 + %118 = fpext float %117 to double + %119 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %118) #9 + %120 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 + %121 = load float, float* %120, align 4 + %122 = fpext float %121 to double + %123 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %122) #9 + %124 = load float, float* %49, align 4 + %125 = load float, float* %45, align 4 + %126 = fsub float %124, %125 + %127 = call float @llvm.fabs.f32(float %126) + %128 = fcmp uge float %127, 0x3FB99999A0000000 + br i1 %128, label %58, label %.preheader5 + +.preheader5: ; preds = %115 + %129 = load float, float* %28, align 16 + %130 = fpext float %129 to double + %131 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %130) #9 + %132 = load float, float* %32, align 16 + %133 = fpext float %132 to double + %134 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %133) #9 + %135 = load float, float* %32, align 16 + %136 = load float, float* %28, align 16 + %137 = fsub float %135, %136 + %138 = call float @llvm.fabs.f32(float %137) + %139 = fcmp uge float %138, 0x3FB99999A0000000 + br i1 %139, label %73, label %59 +} + +declare i64 @time(i64*) #6 + +declare void @srand(i32) #6 + +declare i32 @rand() #6 + +declare i32 @printf(i8*, ...) #6 + +; Function Attrs: nounwind readnone speculatable willreturn +declare double @llvm.fabs.f64(double) #2 + +; Function Attrs: noreturn +declare void @__assert_rtn(i8*, i8*, i32, i8*) #7 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #8 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.fabs.f32(float) #2 + +attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone speculatable willreturn } +attributes #3 = { argmemonly nounwind willreturn writeonly } +attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #7 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #8 = { argmemonly nounwind willreturn } +attributes #9 = { nounwind } +attributes #10 = { nounwind allocsize(0,1) } +attributes #11 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} +!3 = distinct !{!3, !4} +!4 = !{!"llvm.loop.unroll.disable"} +!5 = distinct !{!5, !4} diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/flaky-clang.ll b/src/dios-egraphs/Diospyros/flaky-outputs/flaky-clang.ll new file mode 100644 index 00000000..44cb581c --- /dev/null +++ b/src/dios-egraphs/Diospyros/flaky-outputs/flaky-clang.ll @@ -0,0 +1,2356 @@ +; ModuleID = 'fail-tests/qr-decomp-local-arrays.c' +source_filename = "fail-tests/qr-decomp-local-arrays.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@.str = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 +@.str.1 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 +@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 +@.str.2 = private unnamed_addr constant [36 x i8] c"fail-tests/qr-decomp-local-arrays.c\00", align 1 +@.str.3 = private unnamed_addr constant [34 x i8] c"fabs(expectedQ[i] - Q[i]) < DELTA\00", align 1 +@.str.4 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 +@.str.5 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 +@.str.6 = private unnamed_addr constant [34 x i8] c"fabs(expectedR[i] - R[i]) < DELTA\00", align 1 + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @sgn(float %0) #0 { + %2 = alloca float, align 4 + store float %0, float* %2, align 4 + %3 = load float, float* %2, align 4 + %4 = fcmp ogt float %3, 0.000000e+00 + %5 = zext i1 %4 to i32 + %6 = load float, float* %2, align 4 + %7 = fcmp olt float %6, 0.000000e+00 + %8 = zext i1 %7 to i32 + %9 = sub nsw i32 %5, %8 + %10 = sitofp i32 %9 to float + ret float %10 +} + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_sgn(float %0) #1 { + %2 = alloca float, align 4 + store float %0, float* %2, align 4 + %3 = load float, float* %2, align 4 + %4 = fcmp ogt float %3, 0.000000e+00 + %5 = zext i1 %4 to i32 + %6 = load float, float* %2, align 4 + %7 = fcmp olt float %6, 0.000000e+00 + %8 = zext i1 %7 to i32 + %9 = sub nsw i32 %5, %8 + %10 = sitofp i32 %9 to float + ret float %10 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @naive_norm(float* %0, i32 %1) #0 { + %3 = alloca float*, align 8 + %4 = alloca i32, align 4 + %5 = alloca float, align 4 + %6 = alloca i32, align 4 + store float* %0, float** %3, align 8 + store i32 %1, i32* %4, align 4 + store float 0.000000e+00, float* %5, align 4 + store i32 0, i32* %6, align 4 + br label %7 + +7: ; preds = %25, %2 + %8 = load i32, i32* %6, align 4 + %9 = load i32, i32* %4, align 4 + %10 = icmp slt i32 %8, %9 + br i1 %10, label %11, label %28 + +11: ; preds = %7 + %12 = load float*, float** %3, align 8 + %13 = load i32, i32* %6, align 4 + %14 = sext i32 %13 to i64 + %15 = getelementptr inbounds float, float* %12, i64 %14 + %16 = load float, float* %15, align 4 + %17 = load float*, float** %3, align 8 + %18 = load i32, i32* %6, align 4 + %19 = sext i32 %18 to i64 + %20 = getelementptr inbounds float, float* %17, i64 %19 + %21 = load float, float* %20, align 4 + %22 = fmul float %16, %21 + %23 = load float, float* %5, align 4 + %24 = fadd float %23, %22 + store float %24, float* %5, align 4 + br label %25 + +25: ; preds = %11 + %26 = load i32, i32* %6, align 4 + %27 = add nsw i32 %26, 1 + store i32 %27, i32* %6, align 4 + br label %7 + +28: ; preds = %7 + %29 = load float, float* %5, align 4 + %30 = call float @llvm.sqrt.f32(float %29) + ret float %30 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32(float) #2 + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_naive_norm(float* %0, i32 %1) #1 { + %3 = alloca float*, align 8 + %4 = alloca i32, align 4 + %5 = alloca float, align 4 + %6 = alloca i32, align 4 + store float* %0, float** %3, align 8 + store i32 %1, i32* %4, align 4 + store float 0.000000e+00, float* %5, align 4 + store i32 0, i32* %6, align 4 + br label %7 + +7: ; preds = %25, %2 + %8 = load i32, i32* %6, align 4 + %9 = load i32, i32* %4, align 4 + %10 = icmp slt i32 %8, %9 + br i1 %10, label %11, label %28 + +11: ; preds = %7 + %12 = load float*, float** %3, align 8 + %13 = load i32, i32* %6, align 4 + %14 = sext i32 %13 to i64 + %15 = getelementptr inbounds float, float* %12, i64 %14 + %16 = load float, float* %15, align 4 + %17 = load float*, float** %3, align 8 + %18 = load i32, i32* %6, align 4 + %19 = sext i32 %18 to i64 + %20 = getelementptr inbounds float, float* %17, i64 %19 + %21 = load float, float* %20, align 4 + %22 = fmul float %16, %21 + %23 = load float, float* %5, align 4 + %24 = fadd float %23, %22 + store float %24, float* %5, align 4 + br label %25 + +25: ; preds = %11 + %26 = load i32, i32* %6, align 4 + %27 = add nsw i32 %26, 1 + store i32 %27, i32* %6, align 4 + br label %7 + +28: ; preds = %7 + %29 = load float, float* %5, align 4 + %30 = call float @llvm.sqrt.f32(float %29) + ret float %30 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_transpose(float* %0) #0 { + %2 = alloca float*, align 8 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca float, align 4 + store float* %0, float** %2, align 8 + store i32 0, i32* %3, align 4 + br label %6 + +6: ; preds = %51, %1 + %7 = load i32, i32* %3, align 4 + %8 = icmp slt i32 %7, 2 + br i1 %8, label %9, label %54 + +9: ; preds = %6 + %10 = load i32, i32* %3, align 4 + %11 = add nsw i32 %10, 1 + store i32 %11, i32* %4, align 4 + br label %12 + +12: ; preds = %47, %9 + %13 = load i32, i32* %4, align 4 + %14 = icmp slt i32 %13, 2 + br i1 %14, label %15, label %50 + +15: ; preds = %12 + %16 = load float*, float** %2, align 8 + %17 = load i32, i32* %3, align 4 + %18 = mul nsw i32 %17, 2 + %19 = load i32, i32* %4, align 4 + %20 = add nsw i32 %18, %19 + %21 = sext i32 %20 to i64 + %22 = getelementptr inbounds float, float* %16, i64 %21 + %23 = load float, float* %22, align 4 + store float %23, float* %5, align 4 + %24 = load float*, float** %2, align 8 + %25 = load i32, i32* %4, align 4 + %26 = mul nsw i32 %25, 2 + %27 = load i32, i32* %3, align 4 + %28 = add nsw i32 %26, %27 + %29 = sext i32 %28 to i64 + %30 = getelementptr inbounds float, float* %24, i64 %29 + %31 = load float, float* %30, align 4 + %32 = load float*, float** %2, align 8 + %33 = load i32, i32* %3, align 4 + %34 = mul nsw i32 %33, 2 + %35 = load i32, i32* %4, align 4 + %36 = add nsw i32 %34, %35 + %37 = sext i32 %36 to i64 + %38 = getelementptr inbounds float, float* %32, i64 %37 + store float %31, float* %38, align 4 + %39 = load float, float* %5, align 4 + %40 = load float*, float** %2, align 8 + %41 = load i32, i32* %4, align 4 + %42 = mul nsw i32 %41, 2 + %43 = load i32, i32* %3, align 4 + %44 = add nsw i32 %42, %43 + %45 = sext i32 %44 to i64 + %46 = getelementptr inbounds float, float* %40, i64 %45 + store float %39, float* %46, align 4 + br label %47 + +47: ; preds = %15 + %48 = load i32, i32* %4, align 4 + %49 = add nsw i32 %48, 1 + store i32 %49, i32* %4, align 4 + br label %12 + +50: ; preds = %12 + br label %51 + +51: ; preds = %50 + %52 = load i32, i32* %3, align 4 + %53 = add nsw i32 %52, 1 + store i32 %53, i32* %3, align 4 + br label %6 + +54: ; preds = %6 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_transpose(float* %0) #1 { + %2 = alloca float*, align 8 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca float, align 4 + store float* %0, float** %2, align 8 + store i32 0, i32* %3, align 4 + br label %6 + +6: ; preds = %51, %1 + %7 = load i32, i32* %3, align 4 + %8 = icmp slt i32 %7, 2 + br i1 %8, label %9, label %54 + +9: ; preds = %6 + %10 = load i32, i32* %3, align 4 + %11 = add nsw i32 %10, 1 + store i32 %11, i32* %4, align 4 + br label %12 + +12: ; preds = %47, %9 + %13 = load i32, i32* %4, align 4 + %14 = icmp slt i32 %13, 2 + br i1 %14, label %15, label %50 + +15: ; preds = %12 + %16 = load float*, float** %2, align 8 + %17 = load i32, i32* %3, align 4 + %18 = mul nsw i32 %17, 2 + %19 = load i32, i32* %4, align 4 + %20 = add nsw i32 %18, %19 + %21 = sext i32 %20 to i64 + %22 = getelementptr inbounds float, float* %16, i64 %21 + %23 = load float, float* %22, align 4 + store float %23, float* %5, align 4 + %24 = load float*, float** %2, align 8 + %25 = load i32, i32* %4, align 4 + %26 = mul nsw i32 %25, 2 + %27 = load i32, i32* %3, align 4 + %28 = add nsw i32 %26, %27 + %29 = sext i32 %28 to i64 + %30 = getelementptr inbounds float, float* %24, i64 %29 + %31 = load float, float* %30, align 4 + %32 = load float*, float** %2, align 8 + %33 = load i32, i32* %3, align 4 + %34 = mul nsw i32 %33, 2 + %35 = load i32, i32* %4, align 4 + %36 = add nsw i32 %34, %35 + %37 = sext i32 %36 to i64 + %38 = getelementptr inbounds float, float* %32, i64 %37 + store float %31, float* %38, align 4 + %39 = load float, float* %5, align 4 + %40 = load float*, float** %2, align 8 + %41 = load i32, i32* %4, align 4 + %42 = mul nsw i32 %41, 2 + %43 = load i32, i32* %3, align 4 + %44 = add nsw i32 %42, %43 + %45 = sext i32 %44 to i64 + %46 = getelementptr inbounds float, float* %40, i64 %45 + store float %39, float* %46, align 4 + br label %47 + +47: ; preds = %15 + %48 = load i32, i32* %4, align 4 + %49 = add nsw i32 %48, 1 + store i32 %49, i32* %4, align 4 + br label %12 + +50: ; preds = %12 + br label %51 + +51: ; preds = %50 + %52 = load i32, i32* %3, align 4 + %53 = add nsw i32 %52, 1 + store i32 %53, i32* %3, align 4 + br label %6 + +54: ; preds = %6 + ret void +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { + %4 = alloca float*, align 8 + %5 = alloca float*, align 8 + %6 = alloca float*, align 8 + %7 = alloca i32, align 4 + %8 = alloca i32, align 4 + %9 = alloca i32, align 4 + store float* %0, float** %4, align 8 + store float* %1, float** %5, align 8 + store float* %2, float** %6, align 8 + store i32 0, i32* %7, align 4 + br label %10 + +10: ; preds = %63, %3 + %11 = load i32, i32* %7, align 4 + %12 = icmp slt i32 %11, 2 + br i1 %12, label %13, label %66 + +13: ; preds = %10 + store i32 0, i32* %8, align 4 + br label %14 + +14: ; preds = %59, %13 + %15 = load i32, i32* %8, align 4 + %16 = icmp slt i32 %15, 2 + br i1 %16, label %17, label %62 + +17: ; preds = %14 + %18 = load float*, float** %6, align 8 + %19 = load i32, i32* %7, align 4 + %20 = mul nsw i32 2, %19 + %21 = load i32, i32* %8, align 4 + %22 = add nsw i32 %20, %21 + %23 = sext i32 %22 to i64 + %24 = getelementptr inbounds float, float* %18, i64 %23 + store float 0.000000e+00, float* %24, align 4 + store i32 0, i32* %9, align 4 + br label %25 + +25: ; preds = %55, %17 + %26 = load i32, i32* %9, align 4 + %27 = icmp slt i32 %26, 2 + br i1 %27, label %28, label %58 + +28: ; preds = %25 + %29 = load float*, float** %4, align 8 + %30 = load i32, i32* %7, align 4 + %31 = mul nsw i32 2, %30 + %32 = load i32, i32* %9, align 4 + %33 = add nsw i32 %31, %32 + %34 = sext i32 %33 to i64 + %35 = getelementptr inbounds float, float* %29, i64 %34 + %36 = load float, float* %35, align 4 + %37 = load float*, float** %5, align 8 + %38 = load i32, i32* %9, align 4 + %39 = mul nsw i32 2, %38 + %40 = load i32, i32* %8, align 4 + %41 = add nsw i32 %39, %40 + %42 = sext i32 %41 to i64 + %43 = getelementptr inbounds float, float* %37, i64 %42 + %44 = load float, float* %43, align 4 + %45 = fmul float %36, %44 + %46 = load float*, float** %6, align 8 + %47 = load i32, i32* %7, align 4 + %48 = mul nsw i32 2, %47 + %49 = load i32, i32* %8, align 4 + %50 = add nsw i32 %48, %49 + %51 = sext i32 %50 to i64 + %52 = getelementptr inbounds float, float* %46, i64 %51 + %53 = load float, float* %52, align 4 + %54 = fadd float %53, %45 + store float %54, float* %52, align 4 + br label %55 + +55: ; preds = %28 + %56 = load i32, i32* %9, align 4 + %57 = add nsw i32 %56, 1 + store i32 %57, i32* %9, align 4 + br label %25 + +58: ; preds = %25 + br label %59 + +59: ; preds = %58 + %60 = load i32, i32* %8, align 4 + %61 = add nsw i32 %60, 1 + store i32 %61, i32* %8, align 4 + br label %14 + +62: ; preds = %14 + br label %63 + +63: ; preds = %62 + %64 = load i32, i32* %7, align 4 + %65 = add nsw i32 %64, 1 + store i32 %65, i32* %7, align 4 + br label %10 + +66: ; preds = %10 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { + %4 = alloca float*, align 8 + %5 = alloca float*, align 8 + %6 = alloca float*, align 8 + %7 = alloca i32, align 4 + %8 = alloca i32, align 4 + %9 = alloca i32, align 4 + store float* %0, float** %4, align 8 + store float* %1, float** %5, align 8 + store float* %2, float** %6, align 8 + store i32 0, i32* %7, align 4 + br label %10 + +10: ; preds = %63, %3 + %11 = load i32, i32* %7, align 4 + %12 = icmp slt i32 %11, 2 + br i1 %12, label %13, label %66 + +13: ; preds = %10 + store i32 0, i32* %8, align 4 + br label %14 + +14: ; preds = %59, %13 + %15 = load i32, i32* %8, align 4 + %16 = icmp slt i32 %15, 2 + br i1 %16, label %17, label %62 + +17: ; preds = %14 + %18 = load float*, float** %6, align 8 + %19 = load i32, i32* %7, align 4 + %20 = mul nsw i32 2, %19 + %21 = load i32, i32* %8, align 4 + %22 = add nsw i32 %20, %21 + %23 = sext i32 %22 to i64 + %24 = getelementptr inbounds float, float* %18, i64 %23 + store float 0.000000e+00, float* %24, align 4 + store i32 0, i32* %9, align 4 + br label %25 + +25: ; preds = %55, %17 + %26 = load i32, i32* %9, align 4 + %27 = icmp slt i32 %26, 2 + br i1 %27, label %28, label %58 + +28: ; preds = %25 + %29 = load float*, float** %4, align 8 + %30 = load i32, i32* %7, align 4 + %31 = mul nsw i32 2, %30 + %32 = load i32, i32* %9, align 4 + %33 = add nsw i32 %31, %32 + %34 = sext i32 %33 to i64 + %35 = getelementptr inbounds float, float* %29, i64 %34 + %36 = load float, float* %35, align 4 + %37 = load float*, float** %5, align 8 + %38 = load i32, i32* %9, align 4 + %39 = mul nsw i32 2, %38 + %40 = load i32, i32* %8, align 4 + %41 = add nsw i32 %39, %40 + %42 = sext i32 %41 to i64 + %43 = getelementptr inbounds float, float* %37, i64 %42 + %44 = load float, float* %43, align 4 + %45 = fmul float %36, %44 + %46 = load float*, float** %6, align 8 + %47 = load i32, i32* %7, align 4 + %48 = mul nsw i32 2, %47 + %49 = load i32, i32* %8, align 4 + %50 = add nsw i32 %48, %49 + %51 = sext i32 %50 to i64 + %52 = getelementptr inbounds float, float* %46, i64 %51 + %53 = load float, float* %52, align 4 + %54 = fadd float %53, %45 + store float %54, float* %52, align 4 + br label %55 + +55: ; preds = %28 + %56 = load i32, i32* %9, align 4 + %57 = add nsw i32 %56, 1 + store i32 %57, i32* %9, align 4 + br label %25 + +58: ; preds = %25 + br label %59 + +59: ; preds = %58 + %60 = load i32, i32* %8, align 4 + %61 = add nsw i32 %60, 1 + store i32 %61, i32* %8, align 4 + br label %14 + +62: ; preds = %14 + br label %63 + +63: ; preds = %62 + %64 = load i32, i32* %7, align 4 + %65 = add nsw i32 %64, 1 + store i32 %65, i32* %7, align 4 + br label %10 + +66: ; preds = %10 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { + %4 = alloca float*, align 8 + %5 = alloca i32, align 4 + %6 = alloca float, align 4 + %7 = alloca i32, align 4 + %8 = alloca float*, align 8 + %9 = alloca i32, align 4 + %10 = alloca float, align 4 + %11 = alloca i32, align 4 + %12 = alloca float*, align 8 + %13 = alloca float*, align 8 + %14 = alloca float*, align 8 + %15 = alloca i32, align 4 + %16 = alloca i32, align 4 + %17 = alloca i32, align 4 + %18 = alloca float*, align 8 + %19 = alloca float*, align 8 + %20 = alloca float*, align 8 + %21 = alloca i32, align 4 + %22 = alloca i32, align 4 + %23 = alloca i32, align 4 + %24 = alloca float*, align 8 + %25 = alloca float*, align 8 + %26 = alloca float*, align 8 + %27 = alloca i32, align 4 + %28 = alloca i32, align 4 + %29 = alloca i32, align 4 + %30 = alloca float*, align 8 + %31 = alloca i32, align 4 + %32 = alloca i32, align 4 + %33 = alloca float, align 4 + %34 = alloca float, align 4 + %35 = alloca float*, align 8 + %36 = alloca float*, align 8 + %37 = alloca float*, align 8 + %38 = alloca i32, align 4 + %39 = alloca [4 x float], align 16 + %40 = alloca i32, align 4 + %41 = alloca i32, align 4 + %42 = alloca i32, align 4 + %43 = alloca i32, align 4 + %44 = alloca [2 x float], align 4 + %45 = alloca [2 x float], align 4 + %46 = alloca i32, align 4 + %47 = alloca i32, align 4 + %48 = alloca i32, align 4 + %49 = alloca float, align 4 + %50 = alloca [2 x float], align 4 + %51 = alloca [2 x float], align 4 + %52 = alloca i32, align 4 + %53 = alloca i32, align 4 + %54 = alloca float, align 4 + %55 = alloca i32, align 4 + %56 = alloca [4 x float], align 16 + %57 = alloca i32, align 4 + %58 = alloca i32, align 4 + %59 = alloca i32, align 4 + %60 = alloca float, align 4 + %61 = alloca [4 x float], align 16 + %62 = alloca i32, align 4 + %63 = alloca i32, align 4 + %64 = alloca i32, align 4 + %65 = alloca float, align 4 + %66 = alloca i32, align 4 + %67 = alloca [4 x float], align 16 + %68 = alloca i32, align 4 + %69 = alloca i32, align 4 + %70 = alloca i32, align 4 + store float* %0, float** %35, align 8 + store float* %1, float** %36, align 8 + store float* %2, float** %37, align 8 + store i32 0, i32* %38, align 4 + br label %71 + +71: ; preds = %84, %3 + %72 = load i32, i32* %38, align 4 + %73 = icmp slt i32 %72, 4 + br i1 %73, label %74, label %87 + +74: ; preds = %71 + %75 = load float*, float** %35, align 8 + %76 = load i32, i32* %38, align 4 + %77 = sext i32 %76 to i64 + %78 = getelementptr inbounds float, float* %75, i64 %77 + %79 = load float, float* %78, align 4 + %80 = load float*, float** %37, align 8 + %81 = load i32, i32* %38, align 4 + %82 = sext i32 %81 to i64 + %83 = getelementptr inbounds float, float* %80, i64 %82 + store float %79, float* %83, align 4 + br label %84 + +84: ; preds = %74 + %85 = load i32, i32* %38, align 4 + %86 = add nsw i32 %85, 1 + store i32 %86, i32* %38, align 4 + br label %71 + +87: ; preds = %71 + %88 = bitcast [4 x float]* %39 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %88, i8 0, i64 16, i1 false) + store i32 0, i32* %40, align 4 + br label %89 + +89: ; preds = %112, %87 + %90 = load i32, i32* %40, align 4 + %91 = icmp slt i32 %90, 2 + br i1 %91, label %92, label %115 + +92: ; preds = %89 + store i32 0, i32* %41, align 4 + br label %93 + +93: ; preds = %108, %92 + %94 = load i32, i32* %41, align 4 + %95 = icmp slt i32 %94, 2 + br i1 %95, label %96, label %111 + +96: ; preds = %93 + %97 = load i32, i32* %40, align 4 + %98 = load i32, i32* %41, align 4 + %99 = icmp eq i32 %97, %98 + %100 = zext i1 %99 to i32 + %101 = sitofp i32 %100 to float + %102 = load i32, i32* %40, align 4 + %103 = mul nsw i32 %102, 2 + %104 = load i32, i32* %41, align 4 + %105 = add nsw i32 %103, %104 + %106 = sext i32 %105 to i64 + %107 = getelementptr inbounds [4 x float], [4 x float]* %39, i64 0, i64 %106 + store float %101, float* %107, align 4 + br label %108 + +108: ; preds = %96 + %109 = load i32, i32* %41, align 4 + %110 = add nsw i32 %109, 1 + store i32 %110, i32* %41, align 4 + br label %93 + +111: ; preds = %93 + br label %112 + +112: ; preds = %111 + %113 = load i32, i32* %40, align 4 + %114 = add nsw i32 %113, 1 + store i32 %114, i32* %40, align 4 + br label %89 + +115: ; preds = %89 + store i32 0, i32* %42, align 4 + br label %116 + +116: ; preds = %643, %115 + %117 = load i32, i32* %42, align 4 + %118 = icmp slt i32 %117, 1 + br i1 %118, label %119, label %646 + +119: ; preds = %116 + %120 = load i32, i32* %42, align 4 + %121 = sub nsw i32 2, %120 + store i32 %121, i32* %43, align 4 + %122 = bitcast [2 x float]* %44 to i8* + call void @llvm.memset.p0i8.i64(i8* align 4 %122, i8 0, i64 8, i1 false) + %123 = bitcast [2 x float]* %45 to i8* + call void @llvm.memset.p0i8.i64(i8* align 4 %123, i8 0, i64 8, i1 false) + store i32 0, i32* %46, align 4 + br label %124 + +124: ; preds = %134, %119 + %125 = load i32, i32* %46, align 4 + %126 = icmp slt i32 %125, 2 + br i1 %126, label %127, label %137 + +127: ; preds = %124 + %128 = load i32, i32* %46, align 4 + %129 = sext i32 %128 to i64 + %130 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 %129 + store float 0.000000e+00, float* %130, align 4 + %131 = load i32, i32* %46, align 4 + %132 = sext i32 %131 to i64 + %133 = getelementptr inbounds [2 x float], [2 x float]* %45, i64 0, i64 %132 + store float 0.000000e+00, float* %133, align 4 + br label %134 + +134: ; preds = %127 + %135 = load i32, i32* %46, align 4 + %136 = add nsw i32 %135, 1 + store i32 %136, i32* %46, align 4 + br label %124 + +137: ; preds = %124 + store i32 0, i32* %47, align 4 + br label %138 + +138: ; preds = %167, %137 + %139 = load i32, i32* %47, align 4 + %140 = load i32, i32* %43, align 4 + %141 = icmp slt i32 %139, %140 + br i1 %141, label %142, label %170 + +142: ; preds = %138 + %143 = load i32, i32* %42, align 4 + %144 = load i32, i32* %47, align 4 + %145 = add nsw i32 %143, %144 + store i32 %145, i32* %48, align 4 + %146 = load float*, float** %37, align 8 + %147 = load i32, i32* %48, align 4 + %148 = mul nsw i32 %147, 2 + %149 = load i32, i32* %42, align 4 + %150 = add nsw i32 %148, %149 + %151 = sext i32 %150 to i64 + %152 = getelementptr inbounds float, float* %146, i64 %151 + %153 = load float, float* %152, align 4 + %154 = load i32, i32* %47, align 4 + %155 = sext i32 %154 to i64 + %156 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 %155 + store float %153, float* %156, align 4 + %157 = load i32, i32* %48, align 4 + %158 = mul nsw i32 %157, 2 + %159 = load i32, i32* %42, align 4 + %160 = add nsw i32 %158, %159 + %161 = sext i32 %160 to i64 + %162 = getelementptr inbounds [4 x float], [4 x float]* %39, i64 0, i64 %161 + %163 = load float, float* %162, align 4 + %164 = load i32, i32* %47, align 4 + %165 = sext i32 %164 to i64 + %166 = getelementptr inbounds [2 x float], [2 x float]* %45, i64 0, i64 %165 + store float %163, float* %166, align 4 + br label %167 + +167: ; preds = %142 + %168 = load i32, i32* %47, align 4 + %169 = add nsw i32 %168, 1 + store i32 %169, i32* %47, align 4 + br label %138 + +170: ; preds = %138 + %171 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 0 + %172 = load float, float* %171, align 4 + store float %172, float* %34, align 4 + %173 = load float, float* %34, align 4 + %174 = fcmp ogt float %173, 0.000000e+00 + %175 = zext i1 %174 to i32 + %176 = load float, float* %34, align 4 + %177 = fcmp olt float %176, 0.000000e+00 + %178 = zext i1 %177 to i32 + %179 = sub nsw i32 %175, %178 + %180 = sitofp i32 %179 to float + %181 = fneg float %180 + %182 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 0 + %183 = load i32, i32* %43, align 4 + store float* %182, float** %4, align 8 + store i32 %183, i32* %5, align 4 + store float 0.000000e+00, float* %6, align 4 + store i32 0, i32* %7, align 4 + br label %184 + +184: ; preds = %188, %170 + %185 = load i32, i32* %7, align 4 + %186 = load i32, i32* %5, align 4 + %187 = icmp slt i32 %185, %186 + br i1 %187, label %188, label %204 + +188: ; preds = %184 + %189 = load float*, float** %4, align 8 + %190 = load i32, i32* %7, align 4 + %191 = sext i32 %190 to i64 + %192 = getelementptr inbounds float, float* %189, i64 %191 + %193 = load float, float* %192, align 4 + %194 = load float*, float** %4, align 8 + %195 = load i32, i32* %7, align 4 + %196 = sext i32 %195 to i64 + %197 = getelementptr inbounds float, float* %194, i64 %196 + %198 = load float, float* %197, align 4 + %199 = fmul float %193, %198 + %200 = load float, float* %6, align 4 + %201 = fadd float %200, %199 + store float %201, float* %6, align 4 + %202 = load i32, i32* %7, align 4 + %203 = add nsw i32 %202, 1 + store i32 %203, i32* %7, align 4 + br label %184 + +204: ; preds = %184 + %205 = load float, float* %6, align 4 + %206 = call float @llvm.sqrt.f32(float %205) #8 + %207 = fmul float %181, %206 + store float %207, float* %49, align 4 + %208 = bitcast [2 x float]* %50 to i8* + call void @llvm.memset.p0i8.i64(i8* align 4 %208, i8 0, i64 8, i1 false) + %209 = bitcast [2 x float]* %51 to i8* + call void @llvm.memset.p0i8.i64(i8* align 4 %209, i8 0, i64 8, i1 false) + store i32 0, i32* %52, align 4 + br label %210 + +210: ; preds = %220, %204 + %211 = load i32, i32* %52, align 4 + %212 = icmp slt i32 %211, 2 + br i1 %212, label %213, label %223 + +213: ; preds = %210 + %214 = load i32, i32* %52, align 4 + %215 = sext i32 %214 to i64 + %216 = getelementptr inbounds [2 x float], [2 x float]* %50, i64 0, i64 %215 + store float 0.000000e+00, float* %216, align 4 + %217 = load i32, i32* %52, align 4 + %218 = sext i32 %217 to i64 + %219 = getelementptr inbounds [2 x float], [2 x float]* %51, i64 0, i64 %218 + store float 0.000000e+00, float* %219, align 4 + br label %220 + +220: ; preds = %213 + %221 = load i32, i32* %52, align 4 + %222 = add nsw i32 %221, 1 + store i32 %222, i32* %52, align 4 + br label %210 + +223: ; preds = %210 + store i32 0, i32* %53, align 4 + br label %224 + +224: ; preds = %243, %223 + %225 = load i32, i32* %53, align 4 + %226 = load i32, i32* %43, align 4 + %227 = icmp slt i32 %225, %226 + br i1 %227, label %228, label %246 + +228: ; preds = %224 + %229 = load i32, i32* %53, align 4 + %230 = sext i32 %229 to i64 + %231 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 %230 + %232 = load float, float* %231, align 4 + %233 = load float, float* %49, align 4 + %234 = load i32, i32* %53, align 4 + %235 = sext i32 %234 to i64 + %236 = getelementptr inbounds [2 x float], [2 x float]* %45, i64 0, i64 %235 + %237 = load float, float* %236, align 4 + %238 = fmul float %233, %237 + %239 = fadd float %232, %238 + %240 = load i32, i32* %53, align 4 + %241 = sext i32 %240 to i64 + %242 = getelementptr inbounds [2 x float], [2 x float]* %50, i64 0, i64 %241 + store float %239, float* %242, align 4 + br label %243 + +243: ; preds = %228 + %244 = load i32, i32* %53, align 4 + %245 = add nsw i32 %244, 1 + store i32 %245, i32* %53, align 4 + br label %224 + +246: ; preds = %224 + %247 = getelementptr inbounds [2 x float], [2 x float]* %50, i64 0, i64 0 + %248 = load i32, i32* %43, align 4 + store float* %247, float** %8, align 8 + store i32 %248, i32* %9, align 4 + store float 0.000000e+00, float* %10, align 4 + store i32 0, i32* %11, align 4 + br label %249 + +249: ; preds = %253, %246 + %250 = load i32, i32* %11, align 4 + %251 = load i32, i32* %9, align 4 + %252 = icmp slt i32 %250, %251 + br i1 %252, label %253, label %269 + +253: ; preds = %249 + %254 = load float*, float** %8, align 8 + %255 = load i32, i32* %11, align 4 + %256 = sext i32 %255 to i64 + %257 = getelementptr inbounds float, float* %254, i64 %256 + %258 = load float, float* %257, align 4 + %259 = load float*, float** %8, align 8 + %260 = load i32, i32* %11, align 4 + %261 = sext i32 %260 to i64 + %262 = getelementptr inbounds float, float* %259, i64 %261 + %263 = load float, float* %262, align 4 + %264 = fmul float %258, %263 + %265 = load float, float* %10, align 4 + %266 = fadd float %265, %264 + store float %266, float* %10, align 4 + %267 = load i32, i32* %11, align 4 + %268 = add nsw i32 %267, 1 + store i32 %268, i32* %11, align 4 + br label %249 + +269: ; preds = %249 + %270 = load float, float* %10, align 4 + %271 = call float @llvm.sqrt.f32(float %270) #8 + store float %271, float* %54, align 4 + store i32 0, i32* %55, align 4 + br label %272 + +272: ; preds = %287, %269 + %273 = load i32, i32* %55, align 4 + %274 = load i32, i32* %43, align 4 + %275 = icmp slt i32 %273, %274 + br i1 %275, label %276, label %290 + +276: ; preds = %272 + %277 = load i32, i32* %55, align 4 + %278 = sext i32 %277 to i64 + %279 = getelementptr inbounds [2 x float], [2 x float]* %50, i64 0, i64 %278 + %280 = load float, float* %279, align 4 + %281 = load float, float* %54, align 4 + %282 = fadd float %281, 0x3EE4F8B580000000 + %283 = fdiv float %280, %282 + %284 = load i32, i32* %55, align 4 + %285 = sext i32 %284 to i64 + %286 = getelementptr inbounds [2 x float], [2 x float]* %51, i64 0, i64 %285 + store float %283, float* %286, align 4 + br label %287 + +287: ; preds = %276 + %288 = load i32, i32* %55, align 4 + %289 = add nsw i32 %288, 1 + store i32 %289, i32* %55, align 4 + br label %272 + +290: ; preds = %272 + %291 = bitcast [4 x float]* %56 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %291, i8 0, i64 16, i1 false) + store i32 0, i32* %57, align 4 + br label %292 + +292: ; preds = %299, %290 + %293 = load i32, i32* %57, align 4 + %294 = icmp slt i32 %293, 4 + br i1 %294, label %295, label %302 + +295: ; preds = %292 + %296 = load i32, i32* %57, align 4 + %297 = sext i32 %296 to i64 + %298 = getelementptr inbounds [4 x float], [4 x float]* %56, i64 0, i64 %297 + store float 0.000000e+00, float* %298, align 4 + br label %299 + +299: ; preds = %295 + %300 = load i32, i32* %57, align 4 + %301 = add nsw i32 %300, 1 + store i32 %301, i32* %57, align 4 + br label %292 + +302: ; preds = %292 + store i32 0, i32* %58, align 4 + br label %303 + +303: ; preds = %341, %302 + %304 = load i32, i32* %58, align 4 + %305 = load i32, i32* %43, align 4 + %306 = icmp slt i32 %304, %305 + br i1 %306, label %307, label %344 + +307: ; preds = %303 + store i32 0, i32* %59, align 4 + br label %308 + +308: ; preds = %337, %307 + %309 = load i32, i32* %59, align 4 + %310 = load i32, i32* %43, align 4 + %311 = icmp slt i32 %309, %310 + br i1 %311, label %312, label %340 + +312: ; preds = %308 + %313 = load i32, i32* %58, align 4 + %314 = load i32, i32* %59, align 4 + %315 = icmp eq i32 %313, %314 + %316 = zext i1 %315 to i64 + %317 = select i1 %315, float 1.000000e+00, float 0.000000e+00 + %318 = load i32, i32* %58, align 4 + %319 = sext i32 %318 to i64 + %320 = getelementptr inbounds [2 x float], [2 x float]* %51, i64 0, i64 %319 + %321 = load float, float* %320, align 4 + %322 = fmul float 2.000000e+00, %321 + %323 = load i32, i32* %59, align 4 + %324 = sext i32 %323 to i64 + %325 = getelementptr inbounds [2 x float], [2 x float]* %51, i64 0, i64 %324 + %326 = load float, float* %325, align 4 + %327 = fmul float %322, %326 + %328 = fsub float %317, %327 + store float %328, float* %60, align 4 + %329 = load float, float* %60, align 4 + %330 = load i32, i32* %58, align 4 + %331 = load i32, i32* %43, align 4 + %332 = mul nsw i32 %330, %331 + %333 = load i32, i32* %59, align 4 + %334 = add nsw i32 %332, %333 + %335 = sext i32 %334 to i64 + %336 = getelementptr inbounds [4 x float], [4 x float]* %56, i64 0, i64 %335 + store float %329, float* %336, align 4 + br label %337 + +337: ; preds = %312 + %338 = load i32, i32* %59, align 4 + %339 = add nsw i32 %338, 1 + store i32 %339, i32* %59, align 4 + br label %308 + +340: ; preds = %308 + br label %341 + +341: ; preds = %340 + %342 = load i32, i32* %58, align 4 + %343 = add nsw i32 %342, 1 + store i32 %343, i32* %58, align 4 + br label %303 + +344: ; preds = %303 + %345 = bitcast [4 x float]* %61 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %345, i8 0, i64 16, i1 false) + store i32 0, i32* %62, align 4 + br label %346 + +346: ; preds = %353, %344 + %347 = load i32, i32* %62, align 4 + %348 = icmp slt i32 %347, 4 + br i1 %348, label %349, label %356 + +349: ; preds = %346 + %350 = load i32, i32* %62, align 4 + %351 = sext i32 %350 to i64 + %352 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 %351 + store float 0.000000e+00, float* %352, align 4 + br label %353 + +353: ; preds = %349 + %354 = load i32, i32* %62, align 4 + %355 = add nsw i32 %354, 1 + store i32 %355, i32* %62, align 4 + br label %346 + +356: ; preds = %346 + store i32 0, i32* %63, align 4 + br label %357 + +357: ; preds = %403, %356 + %358 = load i32, i32* %63, align 4 + %359 = icmp slt i32 %358, 2 + br i1 %359, label %360, label %406 + +360: ; preds = %357 + store i32 0, i32* %64, align 4 + br label %361 + +361: ; preds = %399, %360 + %362 = load i32, i32* %64, align 4 + %363 = icmp slt i32 %362, 2 + br i1 %363, label %364, label %402 + +364: ; preds = %361 + %365 = load i32, i32* %63, align 4 + %366 = load i32, i32* %42, align 4 + %367 = icmp slt i32 %365, %366 + br i1 %367, label %372, label %368 + +368: ; preds = %364 + %369 = load i32, i32* %64, align 4 + %370 = load i32, i32* %42, align 4 + %371 = icmp slt i32 %369, %370 + br i1 %371, label %372, label %378 + +372: ; preds = %368, %364 + %373 = load i32, i32* %63, align 4 + %374 = load i32, i32* %64, align 4 + %375 = icmp eq i32 %373, %374 + %376 = zext i1 %375 to i64 + %377 = select i1 %375, float 1.000000e+00, float 0.000000e+00 + store float %377, float* %65, align 4 + br label %391 + +378: ; preds = %368 + %379 = load i32, i32* %63, align 4 + %380 = load i32, i32* %42, align 4 + %381 = sub nsw i32 %379, %380 + %382 = load i32, i32* %43, align 4 + %383 = mul nsw i32 %381, %382 + %384 = load i32, i32* %64, align 4 + %385 = load i32, i32* %42, align 4 + %386 = sub nsw i32 %384, %385 + %387 = add nsw i32 %383, %386 + %388 = sext i32 %387 to i64 + %389 = getelementptr inbounds [4 x float], [4 x float]* %56, i64 0, i64 %388 + %390 = load float, float* %389, align 4 + store float %390, float* %65, align 4 + br label %391 + +391: ; preds = %378, %372 + %392 = load float, float* %65, align 4 + %393 = load i32, i32* %63, align 4 + %394 = mul nsw i32 %393, 2 + %395 = load i32, i32* %64, align 4 + %396 = add nsw i32 %394, %395 + %397 = sext i32 %396 to i64 + %398 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 %397 + store float %392, float* %398, align 4 + br label %399 + +399: ; preds = %391 + %400 = load i32, i32* %64, align 4 + %401 = add nsw i32 %400, 1 + store i32 %401, i32* %64, align 4 + br label %361 + +402: ; preds = %361 + br label %403 + +403: ; preds = %402 + %404 = load i32, i32* %63, align 4 + %405 = add nsw i32 %404, 1 + store i32 %405, i32* %63, align 4 + br label %357 + +406: ; preds = %357 + %407 = load i32, i32* %42, align 4 + %408 = icmp eq i32 %407, 0 + br i1 %408, label %409, label %483 + +409: ; preds = %406 + store i32 0, i32* %66, align 4 + br label %410 + +410: ; preds = %422, %409 + %411 = load i32, i32* %66, align 4 + %412 = icmp slt i32 %411, 4 + br i1 %412, label %413, label %425 + +413: ; preds = %410 + %414 = load i32, i32* %66, align 4 + %415 = sext i32 %414 to i64 + %416 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 %415 + %417 = load float, float* %416, align 4 + %418 = load float*, float** %36, align 8 + %419 = load i32, i32* %66, align 4 + %420 = sext i32 %419 to i64 + %421 = getelementptr inbounds float, float* %418, i64 %420 + store float %417, float* %421, align 4 + br label %422 + +422: ; preds = %413 + %423 = load i32, i32* %66, align 4 + %424 = add nsw i32 %423, 1 + store i32 %424, i32* %66, align 4 + br label %410 + +425: ; preds = %410 + %426 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 0 + %427 = load float*, float** %35, align 8 + %428 = load float*, float** %37, align 8 + store float* %426, float** %12, align 8 + store float* %427, float** %13, align 8 + store float* %428, float** %14, align 8 + store i32 0, i32* %15, align 4 + br label %429 + +429: ; preds = %479, %425 + %430 = load i32, i32* %15, align 4 + %431 = icmp slt i32 %430, 2 + br i1 %431, label %432, label %482 + +432: ; preds = %429 + store i32 0, i32* %16, align 4 + br label %433 + +433: ; preds = %476, %432 + %434 = load i32, i32* %16, align 4 + %435 = icmp slt i32 %434, 2 + br i1 %435, label %436, label %479 + +436: ; preds = %433 + %437 = load float*, float** %14, align 8 + %438 = load i32, i32* %15, align 4 + %439 = mul nsw i32 2, %438 + %440 = load i32, i32* %16, align 4 + %441 = add nsw i32 %439, %440 + %442 = sext i32 %441 to i64 + %443 = getelementptr inbounds float, float* %437, i64 %442 + store float 0.000000e+00, float* %443, align 4 + store i32 0, i32* %17, align 4 + br label %444 + +444: ; preds = %447, %436 + %445 = load i32, i32* %17, align 4 + %446 = icmp slt i32 %445, 2 + br i1 %446, label %447, label %476 + +447: ; preds = %444 + %448 = load float*, float** %12, align 8 + %449 = load i32, i32* %15, align 4 + %450 = mul nsw i32 2, %449 + %451 = load i32, i32* %17, align 4 + %452 = add nsw i32 %450, %451 + %453 = sext i32 %452 to i64 + %454 = getelementptr inbounds float, float* %448, i64 %453 + %455 = load float, float* %454, align 4 + %456 = load float*, float** %13, align 8 + %457 = load i32, i32* %17, align 4 + %458 = mul nsw i32 2, %457 + %459 = load i32, i32* %16, align 4 + %460 = add nsw i32 %458, %459 + %461 = sext i32 %460 to i64 + %462 = getelementptr inbounds float, float* %456, i64 %461 + %463 = load float, float* %462, align 4 + %464 = fmul float %455, %463 + %465 = load float*, float** %14, align 8 + %466 = load i32, i32* %15, align 4 + %467 = mul nsw i32 2, %466 + %468 = load i32, i32* %16, align 4 + %469 = add nsw i32 %467, %468 + %470 = sext i32 %469 to i64 + %471 = getelementptr inbounds float, float* %465, i64 %470 + %472 = load float, float* %471, align 4 + %473 = fadd float %472, %464 + store float %473, float* %471, align 4 + %474 = load i32, i32* %17, align 4 + %475 = add nsw i32 %474, 1 + store i32 %475, i32* %17, align 4 + br label %444 + +476: ; preds = %444 + %477 = load i32, i32* %16, align 4 + %478 = add nsw i32 %477, 1 + store i32 %478, i32* %16, align 4 + br label %433 + +479: ; preds = %433 + %480 = load i32, i32* %15, align 4 + %481 = add nsw i32 %480, 1 + store i32 %481, i32* %15, align 4 + br label %429 + +482: ; preds = %429 + br label %642 + +483: ; preds = %406 + %484 = bitcast [4 x float]* %67 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %484, i8 0, i64 16, i1 false) + store i32 0, i32* %68, align 4 + br label %485 + +485: ; preds = %492, %483 + %486 = load i32, i32* %68, align 4 + %487 = icmp slt i32 %486, 4 + br i1 %487, label %488, label %495 + +488: ; preds = %485 + %489 = load i32, i32* %68, align 4 + %490 = sext i32 %489 to i64 + %491 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 %490 + store float 0.000000e+00, float* %491, align 4 + br label %492 + +492: ; preds = %488 + %493 = load i32, i32* %68, align 4 + %494 = add nsw i32 %493, 1 + store i32 %494, i32* %68, align 4 + br label %485 + +495: ; preds = %485 + %496 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 0 + %497 = load float*, float** %36, align 8 + %498 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 0 + store float* %496, float** %18, align 8 + store float* %497, float** %19, align 8 + store float* %498, float** %20, align 8 + store i32 0, i32* %21, align 4 + br label %499 + +499: ; preds = %549, %495 + %500 = load i32, i32* %21, align 4 + %501 = icmp slt i32 %500, 2 + br i1 %501, label %502, label %552 + +502: ; preds = %499 + store i32 0, i32* %22, align 4 + br label %503 + +503: ; preds = %546, %502 + %504 = load i32, i32* %22, align 4 + %505 = icmp slt i32 %504, 2 + br i1 %505, label %506, label %549 + +506: ; preds = %503 + %507 = load float*, float** %20, align 8 + %508 = load i32, i32* %21, align 4 + %509 = mul nsw i32 2, %508 + %510 = load i32, i32* %22, align 4 + %511 = add nsw i32 %509, %510 + %512 = sext i32 %511 to i64 + %513 = getelementptr inbounds float, float* %507, i64 %512 + store float 0.000000e+00, float* %513, align 4 + store i32 0, i32* %23, align 4 + br label %514 + +514: ; preds = %517, %506 + %515 = load i32, i32* %23, align 4 + %516 = icmp slt i32 %515, 2 + br i1 %516, label %517, label %546 + +517: ; preds = %514 + %518 = load float*, float** %18, align 8 + %519 = load i32, i32* %21, align 4 + %520 = mul nsw i32 2, %519 + %521 = load i32, i32* %23, align 4 + %522 = add nsw i32 %520, %521 + %523 = sext i32 %522 to i64 + %524 = getelementptr inbounds float, float* %518, i64 %523 + %525 = load float, float* %524, align 4 + %526 = load float*, float** %19, align 8 + %527 = load i32, i32* %23, align 4 + %528 = mul nsw i32 2, %527 + %529 = load i32, i32* %22, align 4 + %530 = add nsw i32 %528, %529 + %531 = sext i32 %530 to i64 + %532 = getelementptr inbounds float, float* %526, i64 %531 + %533 = load float, float* %532, align 4 + %534 = fmul float %525, %533 + %535 = load float*, float** %20, align 8 + %536 = load i32, i32* %21, align 4 + %537 = mul nsw i32 2, %536 + %538 = load i32, i32* %22, align 4 + %539 = add nsw i32 %537, %538 + %540 = sext i32 %539 to i64 + %541 = getelementptr inbounds float, float* %535, i64 %540 + %542 = load float, float* %541, align 4 + %543 = fadd float %542, %534 + store float %543, float* %541, align 4 + %544 = load i32, i32* %23, align 4 + %545 = add nsw i32 %544, 1 + store i32 %545, i32* %23, align 4 + br label %514 + +546: ; preds = %514 + %547 = load i32, i32* %22, align 4 + %548 = add nsw i32 %547, 1 + store i32 %548, i32* %22, align 4 + br label %503 + +549: ; preds = %503 + %550 = load i32, i32* %21, align 4 + %551 = add nsw i32 %550, 1 + store i32 %551, i32* %21, align 4 + br label %499 + +552: ; preds = %499 + store i32 0, i32* %69, align 4 + br label %553 + +553: ; preds = %565, %552 + %554 = load i32, i32* %69, align 4 + %555 = icmp slt i32 %554, 4 + br i1 %555, label %556, label %568 + +556: ; preds = %553 + %557 = load i32, i32* %69, align 4 + %558 = sext i32 %557 to i64 + %559 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 %558 + %560 = load float, float* %559, align 4 + %561 = load float*, float** %36, align 8 + %562 = load i32, i32* %69, align 4 + %563 = sext i32 %562 to i64 + %564 = getelementptr inbounds float, float* %561, i64 %563 + store float %560, float* %564, align 4 + br label %565 + +565: ; preds = %556 + %566 = load i32, i32* %69, align 4 + %567 = add nsw i32 %566, 1 + store i32 %567, i32* %69, align 4 + br label %553 + +568: ; preds = %553 + %569 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 0 + %570 = load float*, float** %37, align 8 + %571 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 0 + store float* %569, float** %24, align 8 + store float* %570, float** %25, align 8 + store float* %571, float** %26, align 8 + store i32 0, i32* %27, align 4 + br label %572 + +572: ; preds = %622, %568 + %573 = load i32, i32* %27, align 4 + %574 = icmp slt i32 %573, 2 + br i1 %574, label %575, label %625 + +575: ; preds = %572 + store i32 0, i32* %28, align 4 + br label %576 + +576: ; preds = %619, %575 + %577 = load i32, i32* %28, align 4 + %578 = icmp slt i32 %577, 2 + br i1 %578, label %579, label %622 + +579: ; preds = %576 + %580 = load float*, float** %26, align 8 + %581 = load i32, i32* %27, align 4 + %582 = mul nsw i32 2, %581 + %583 = load i32, i32* %28, align 4 + %584 = add nsw i32 %582, %583 + %585 = sext i32 %584 to i64 + %586 = getelementptr inbounds float, float* %580, i64 %585 + store float 0.000000e+00, float* %586, align 4 + store i32 0, i32* %29, align 4 + br label %587 + +587: ; preds = %590, %579 + %588 = load i32, i32* %29, align 4 + %589 = icmp slt i32 %588, 2 + br i1 %589, label %590, label %619 + +590: ; preds = %587 + %591 = load float*, float** %24, align 8 + %592 = load i32, i32* %27, align 4 + %593 = mul nsw i32 2, %592 + %594 = load i32, i32* %29, align 4 + %595 = add nsw i32 %593, %594 + %596 = sext i32 %595 to i64 + %597 = getelementptr inbounds float, float* %591, i64 %596 + %598 = load float, float* %597, align 4 + %599 = load float*, float** %25, align 8 + %600 = load i32, i32* %29, align 4 + %601 = mul nsw i32 2, %600 + %602 = load i32, i32* %28, align 4 + %603 = add nsw i32 %601, %602 + %604 = sext i32 %603 to i64 + %605 = getelementptr inbounds float, float* %599, i64 %604 + %606 = load float, float* %605, align 4 + %607 = fmul float %598, %606 + %608 = load float*, float** %26, align 8 + %609 = load i32, i32* %27, align 4 + %610 = mul nsw i32 2, %609 + %611 = load i32, i32* %28, align 4 + %612 = add nsw i32 %610, %611 + %613 = sext i32 %612 to i64 + %614 = getelementptr inbounds float, float* %608, i64 %613 + %615 = load float, float* %614, align 4 + %616 = fadd float %615, %607 + store float %616, float* %614, align 4 + %617 = load i32, i32* %29, align 4 + %618 = add nsw i32 %617, 1 + store i32 %618, i32* %29, align 4 + br label %587 + +619: ; preds = %587 + %620 = load i32, i32* %28, align 4 + %621 = add nsw i32 %620, 1 + store i32 %621, i32* %28, align 4 + br label %576 + +622: ; preds = %576 + %623 = load i32, i32* %27, align 4 + %624 = add nsw i32 %623, 1 + store i32 %624, i32* %27, align 4 + br label %572 + +625: ; preds = %572 + store i32 0, i32* %70, align 4 + br label %626 + +626: ; preds = %638, %625 + %627 = load i32, i32* %70, align 4 + %628 = icmp slt i32 %627, 4 + br i1 %628, label %629, label %641 + +629: ; preds = %626 + %630 = load i32, i32* %70, align 4 + %631 = sext i32 %630 to i64 + %632 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 %631 + %633 = load float, float* %632, align 4 + %634 = load float*, float** %37, align 8 + %635 = load i32, i32* %70, align 4 + %636 = sext i32 %635 to i64 + %637 = getelementptr inbounds float, float* %634, i64 %636 + store float %633, float* %637, align 4 + br label %638 + +638: ; preds = %629 + %639 = load i32, i32* %70, align 4 + %640 = add nsw i32 %639, 1 + store i32 %640, i32* %70, align 4 + br label %626 + +641: ; preds = %626 + br label %642 + +642: ; preds = %641, %482 + br label %643 + +643: ; preds = %642 + %644 = load i32, i32* %42, align 4 + %645 = add nsw i32 %644, 1 + store i32 %645, i32* %42, align 4 + br label %116 + +646: ; preds = %116 + %647 = load float*, float** %36, align 8 + store float* %647, float** %30, align 8 + store i32 0, i32* %31, align 4 + br label %648 + +648: ; preds = %691, %646 + %649 = load i32, i32* %31, align 4 + %650 = icmp slt i32 %649, 2 + br i1 %650, label %651, label %694 + +651: ; preds = %648 + %652 = load i32, i32* %31, align 4 + %653 = add nsw i32 %652, 1 + store i32 %653, i32* %32, align 4 + br label %654 + +654: ; preds = %657, %651 + %655 = load i32, i32* %32, align 4 + %656 = icmp slt i32 %655, 2 + br i1 %656, label %657, label %691 + +657: ; preds = %654 + %658 = load float*, float** %30, align 8 + %659 = load i32, i32* %31, align 4 + %660 = mul nsw i32 %659, 2 + %661 = load i32, i32* %32, align 4 + %662 = add nsw i32 %660, %661 + %663 = sext i32 %662 to i64 + %664 = getelementptr inbounds float, float* %658, i64 %663 + %665 = load float, float* %664, align 4 + store float %665, float* %33, align 4 + %666 = load float*, float** %30, align 8 + %667 = load i32, i32* %32, align 4 + %668 = mul nsw i32 %667, 2 + %669 = load i32, i32* %31, align 4 + %670 = add nsw i32 %668, %669 + %671 = sext i32 %670 to i64 + %672 = getelementptr inbounds float, float* %666, i64 %671 + %673 = load float, float* %672, align 4 + %674 = load float*, float** %30, align 8 + %675 = load i32, i32* %31, align 4 + %676 = mul nsw i32 %675, 2 + %677 = load i32, i32* %32, align 4 + %678 = add nsw i32 %676, %677 + %679 = sext i32 %678 to i64 + %680 = getelementptr inbounds float, float* %674, i64 %679 + store float %673, float* %680, align 4 + %681 = load float, float* %33, align 4 + %682 = load float*, float** %30, align 8 + %683 = load i32, i32* %32, align 4 + %684 = mul nsw i32 %683, 2 + %685 = load i32, i32* %31, align 4 + %686 = add nsw i32 %684, %685 + %687 = sext i32 %686 to i64 + %688 = getelementptr inbounds float, float* %682, i64 %687 + store float %681, float* %688, align 4 + %689 = load i32, i32* %32, align 4 + %690 = add nsw i32 %689, 1 + store i32 %690, i32* %32, align 4 + br label %654 + +691: ; preds = %654 + %692 = load i32, i32* %31, align 4 + %693 = add nsw i32 %692, 1 + store i32 %693, i32* %31, align 4 + br label %648 + +694: ; preds = %648 + ret void +} + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { + %4 = alloca float*, align 8 + %5 = alloca float*, align 8 + %6 = alloca float*, align 8 + %7 = alloca float*, align 8 + %8 = alloca i32, align 4 + %9 = alloca i32, align 4 + %10 = alloca i32, align 4 + %11 = alloca i32, align 4 + %12 = alloca float*, align 8 + %13 = alloca float*, align 8 + %14 = alloca i32, align 4 + %15 = alloca i32, align 4 + %16 = alloca float, align 4 + %17 = alloca float*, align 8 + %18 = alloca float*, align 8 + %19 = alloca i32, align 4 + %20 = alloca float, align 4 + %21 = alloca i32, align 4 + %22 = alloca float*, align 8 + %23 = alloca i32, align 4 + %24 = alloca i32, align 4 + %25 = alloca float, align 4 + %26 = alloca float*, align 8 + %27 = alloca i32, align 4 + %28 = alloca i32, align 4 + %29 = alloca float, align 4 + %30 = alloca float*, align 8 + store float* %0, float** %4, align 8 + store float* %1, float** %5, align 8 + store float* %2, float** %6, align 8 + %31 = load float*, float** %6, align 8 + %32 = bitcast float* %31 to i8* + %33 = load float*, float** %4, align 8 + %34 = bitcast float* %33 to i8* + %35 = load float*, float** %6, align 8 + %36 = bitcast float* %35 to i8* + %37 = call i64 @llvm.objectsize.i64.p0i8(i8* %36, i1 false, i1 true, i1 false) + %38 = call i8* @__memcpy_chk(i8* %32, i8* %34, i64 16, i64 %37) #8 + %39 = call i8* @calloc(i64 4, i64 4) #9 + %40 = bitcast i8* %39 to float* + store float* %40, float** %7, align 8 + store i32 0, i32* %8, align 4 + br label %41 + +41: ; preds = %65, %3 + %42 = load i32, i32* %8, align 4 + %43 = icmp slt i32 %42, 2 + br i1 %43, label %44, label %68 + +44: ; preds = %41 + store i32 0, i32* %9, align 4 + br label %45 + +45: ; preds = %61, %44 + %46 = load i32, i32* %9, align 4 + %47 = icmp slt i32 %46, 2 + br i1 %47, label %48, label %64 + +48: ; preds = %45 + %49 = load i32, i32* %8, align 4 + %50 = load i32, i32* %9, align 4 + %51 = icmp eq i32 %49, %50 + %52 = zext i1 %51 to i32 + %53 = sitofp i32 %52 to float + %54 = load float*, float** %7, align 8 + %55 = load i32, i32* %8, align 4 + %56 = mul nsw i32 %55, 2 + %57 = load i32, i32* %9, align 4 + %58 = add nsw i32 %56, %57 + %59 = sext i32 %58 to i64 + %60 = getelementptr inbounds float, float* %54, i64 %59 + store float %53, float* %60, align 4 + br label %61 + +61: ; preds = %48 + %62 = load i32, i32* %9, align 4 + %63 = add nsw i32 %62, 1 + store i32 %63, i32* %9, align 4 + br label %45 + +64: ; preds = %45 + br label %65 + +65: ; preds = %64 + %66 = load i32, i32* %8, align 4 + %67 = add nsw i32 %66, 1 + store i32 %67, i32* %8, align 4 + br label %41 + +68: ; preds = %41 + store i32 0, i32* %10, align 4 + br label %69 + +69: ; preds = %343, %68 + %70 = load i32, i32* %10, align 4 + %71 = icmp slt i32 %70, 1 + br i1 %71, label %72, label %346 + +72: ; preds = %69 + %73 = load i32, i32* %10, align 4 + %74 = sub nsw i32 2, %73 + store i32 %74, i32* %11, align 4 + %75 = load i32, i32* %11, align 4 + %76 = sext i32 %75 to i64 + %77 = call i8* @calloc(i64 4, i64 %76) #9 + %78 = bitcast i8* %77 to float* + store float* %78, float** %12, align 8 + %79 = load i32, i32* %11, align 4 + %80 = sext i32 %79 to i64 + %81 = call i8* @calloc(i64 4, i64 %80) #9 + %82 = bitcast i8* %81 to float* + store float* %82, float** %13, align 8 + store i32 0, i32* %14, align 4 + br label %83 + +83: ; preds = %115, %72 + %84 = load i32, i32* %14, align 4 + %85 = load i32, i32* %11, align 4 + %86 = icmp slt i32 %84, %85 + br i1 %86, label %87, label %118 + +87: ; preds = %83 + %88 = load i32, i32* %10, align 4 + %89 = load i32, i32* %14, align 4 + %90 = add nsw i32 %88, %89 + store i32 %90, i32* %15, align 4 + %91 = load float*, float** %6, align 8 + %92 = load i32, i32* %15, align 4 + %93 = mul nsw i32 %92, 2 + %94 = load i32, i32* %10, align 4 + %95 = add nsw i32 %93, %94 + %96 = sext i32 %95 to i64 + %97 = getelementptr inbounds float, float* %91, i64 %96 + %98 = load float, float* %97, align 4 + %99 = load float*, float** %12, align 8 + %100 = load i32, i32* %14, align 4 + %101 = sext i32 %100 to i64 + %102 = getelementptr inbounds float, float* %99, i64 %101 + store float %98, float* %102, align 4 + %103 = load float*, float** %7, align 8 + %104 = load i32, i32* %15, align 4 + %105 = mul nsw i32 %104, 2 + %106 = load i32, i32* %10, align 4 + %107 = add nsw i32 %105, %106 + %108 = sext i32 %107 to i64 + %109 = getelementptr inbounds float, float* %103, i64 %108 + %110 = load float, float* %109, align 4 + %111 = load float*, float** %13, align 8 + %112 = load i32, i32* %14, align 4 + %113 = sext i32 %112 to i64 + %114 = getelementptr inbounds float, float* %111, i64 %113 + store float %110, float* %114, align 4 + br label %115 + +115: ; preds = %87 + %116 = load i32, i32* %14, align 4 + %117 = add nsw i32 %116, 1 + store i32 %117, i32* %14, align 4 + br label %83 + +118: ; preds = %83 + %119 = load float*, float** %12, align 8 + %120 = getelementptr inbounds float, float* %119, i64 0 + %121 = load float, float* %120, align 4 + %122 = call float @no_opt_sgn(float %121) + %123 = fneg float %122 + %124 = load float*, float** %12, align 8 + %125 = load i32, i32* %11, align 4 + %126 = call float @no_opt_naive_norm(float* %124, i32 %125) + %127 = fmul float %123, %126 + store float %127, float* %16, align 4 + %128 = load i32, i32* %11, align 4 + %129 = sext i32 %128 to i64 + %130 = call i8* @calloc(i64 4, i64 %129) #9 + %131 = bitcast i8* %130 to float* + store float* %131, float** %17, align 8 + %132 = load i32, i32* %11, align 4 + %133 = sext i32 %132 to i64 + %134 = call i8* @calloc(i64 4, i64 %133) #9 + %135 = bitcast i8* %134 to float* + store float* %135, float** %18, align 8 + store i32 0, i32* %19, align 4 + br label %136 + +136: ; preds = %158, %118 + %137 = load i32, i32* %19, align 4 + %138 = load i32, i32* %11, align 4 + %139 = icmp slt i32 %137, %138 + br i1 %139, label %140, label %161 + +140: ; preds = %136 + %141 = load float*, float** %12, align 8 + %142 = load i32, i32* %19, align 4 + %143 = sext i32 %142 to i64 + %144 = getelementptr inbounds float, float* %141, i64 %143 + %145 = load float, float* %144, align 4 + %146 = load float, float* %16, align 4 + %147 = load float*, float** %13, align 8 + %148 = load i32, i32* %19, align 4 + %149 = sext i32 %148 to i64 + %150 = getelementptr inbounds float, float* %147, i64 %149 + %151 = load float, float* %150, align 4 + %152 = fmul float %146, %151 + %153 = fadd float %145, %152 + %154 = load float*, float** %17, align 8 + %155 = load i32, i32* %19, align 4 + %156 = sext i32 %155 to i64 + %157 = getelementptr inbounds float, float* %154, i64 %156 + store float %153, float* %157, align 4 + br label %158 + +158: ; preds = %140 + %159 = load i32, i32* %19, align 4 + %160 = add nsw i32 %159, 1 + store i32 %160, i32* %19, align 4 + br label %136 + +161: ; preds = %136 + %162 = load float*, float** %17, align 8 + %163 = load i32, i32* %11, align 4 + %164 = call float @no_opt_naive_norm(float* %162, i32 %163) + store float %164, float* %20, align 4 + store i32 0, i32* %21, align 4 + br label %165 + +165: ; preds = %182, %161 + %166 = load i32, i32* %21, align 4 + %167 = load i32, i32* %11, align 4 + %168 = icmp slt i32 %166, %167 + br i1 %168, label %169, label %185 + +169: ; preds = %165 + %170 = load float*, float** %17, align 8 + %171 = load i32, i32* %21, align 4 + %172 = sext i32 %171 to i64 + %173 = getelementptr inbounds float, float* %170, i64 %172 + %174 = load float, float* %173, align 4 + %175 = load float, float* %20, align 4 + %176 = fadd float %175, 0x3EE4F8B580000000 + %177 = fdiv float %174, %176 + %178 = load float*, float** %18, align 8 + %179 = load i32, i32* %21, align 4 + %180 = sext i32 %179 to i64 + %181 = getelementptr inbounds float, float* %178, i64 %180 + store float %177, float* %181, align 4 + br label %182 + +182: ; preds = %169 + %183 = load i32, i32* %21, align 4 + %184 = add nsw i32 %183, 1 + store i32 %184, i32* %21, align 4 + br label %165 + +185: ; preds = %165 + %186 = load i32, i32* %11, align 4 + %187 = load i32, i32* %11, align 4 + %188 = mul nsw i32 %186, %187 + %189 = sext i32 %188 to i64 + %190 = call i8* @calloc(i64 4, i64 %189) #9 + %191 = bitcast i8* %190 to float* + store float* %191, float** %22, align 8 + store i32 0, i32* %23, align 4 + br label %192 + +192: ; preds = %233, %185 + %193 = load i32, i32* %23, align 4 + %194 = load i32, i32* %11, align 4 + %195 = icmp slt i32 %193, %194 + br i1 %195, label %196, label %236 + +196: ; preds = %192 + store i32 0, i32* %24, align 4 + br label %197 + +197: ; preds = %229, %196 + %198 = load i32, i32* %24, align 4 + %199 = load i32, i32* %11, align 4 + %200 = icmp slt i32 %198, %199 + br i1 %200, label %201, label %232 + +201: ; preds = %197 + %202 = load i32, i32* %23, align 4 + %203 = load i32, i32* %24, align 4 + %204 = icmp eq i32 %202, %203 + %205 = zext i1 %204 to i64 + %206 = select i1 %204, float 1.000000e+00, float 0.000000e+00 + %207 = load float*, float** %18, align 8 + %208 = load i32, i32* %23, align 4 + %209 = sext i32 %208 to i64 + %210 = getelementptr inbounds float, float* %207, i64 %209 + %211 = load float, float* %210, align 4 + %212 = fmul float 2.000000e+00, %211 + %213 = load float*, float** %18, align 8 + %214 = load i32, i32* %24, align 4 + %215 = sext i32 %214 to i64 + %216 = getelementptr inbounds float, float* %213, i64 %215 + %217 = load float, float* %216, align 4 + %218 = fmul float %212, %217 + %219 = fsub float %206, %218 + store float %219, float* %25, align 4 + %220 = load float, float* %25, align 4 + %221 = load float*, float** %22, align 8 + %222 = load i32, i32* %23, align 4 + %223 = load i32, i32* %11, align 4 + %224 = mul nsw i32 %222, %223 + %225 = load i32, i32* %24, align 4 + %226 = add nsw i32 %224, %225 + %227 = sext i32 %226 to i64 + %228 = getelementptr inbounds float, float* %221, i64 %227 + store float %220, float* %228, align 4 + br label %229 + +229: ; preds = %201 + %230 = load i32, i32* %24, align 4 + %231 = add nsw i32 %230, 1 + store i32 %231, i32* %24, align 4 + br label %197 + +232: ; preds = %197 + br label %233 + +233: ; preds = %232 + %234 = load i32, i32* %23, align 4 + %235 = add nsw i32 %234, 1 + store i32 %235, i32* %23, align 4 + br label %192 + +236: ; preds = %192 + %237 = call i8* @calloc(i64 4, i64 4) #9 + %238 = bitcast i8* %237 to float* + store float* %238, float** %26, align 8 + store i32 0, i32* %27, align 4 + br label %239 + +239: ; preds = %287, %236 + %240 = load i32, i32* %27, align 4 + %241 = icmp slt i32 %240, 2 + br i1 %241, label %242, label %290 + +242: ; preds = %239 + store i32 0, i32* %28, align 4 + br label %243 + +243: ; preds = %283, %242 + %244 = load i32, i32* %28, align 4 + %245 = icmp slt i32 %244, 2 + br i1 %245, label %246, label %286 + +246: ; preds = %243 + %247 = load i32, i32* %27, align 4 + %248 = load i32, i32* %10, align 4 + %249 = icmp slt i32 %247, %248 + br i1 %249, label %254, label %250 + +250: ; preds = %246 + %251 = load i32, i32* %28, align 4 + %252 = load i32, i32* %10, align 4 + %253 = icmp slt i32 %251, %252 + br i1 %253, label %254, label %260 + +254: ; preds = %250, %246 + %255 = load i32, i32* %27, align 4 + %256 = load i32, i32* %28, align 4 + %257 = icmp eq i32 %255, %256 + %258 = zext i1 %257 to i64 + %259 = select i1 %257, float 1.000000e+00, float 0.000000e+00 + store float %259, float* %29, align 4 + br label %274 + +260: ; preds = %250 + %261 = load float*, float** %22, align 8 + %262 = load i32, i32* %27, align 4 + %263 = load i32, i32* %10, align 4 + %264 = sub nsw i32 %262, %263 + %265 = load i32, i32* %11, align 4 + %266 = mul nsw i32 %264, %265 + %267 = load i32, i32* %28, align 4 + %268 = load i32, i32* %10, align 4 + %269 = sub nsw i32 %267, %268 + %270 = add nsw i32 %266, %269 + %271 = sext i32 %270 to i64 + %272 = getelementptr inbounds float, float* %261, i64 %271 + %273 = load float, float* %272, align 4 + store float %273, float* %29, align 4 + br label %274 + +274: ; preds = %260, %254 + %275 = load float, float* %29, align 4 + %276 = load float*, float** %26, align 8 + %277 = load i32, i32* %27, align 4 + %278 = mul nsw i32 %277, 2 + %279 = load i32, i32* %28, align 4 + %280 = add nsw i32 %278, %279 + %281 = sext i32 %280 to i64 + %282 = getelementptr inbounds float, float* %276, i64 %281 + store float %275, float* %282, align 4 + br label %283 + +283: ; preds = %274 + %284 = load i32, i32* %28, align 4 + %285 = add nsw i32 %284, 1 + store i32 %285, i32* %28, align 4 + br label %243 + +286: ; preds = %243 + br label %287 + +287: ; preds = %286 + %288 = load i32, i32* %27, align 4 + %289 = add nsw i32 %288, 1 + store i32 %289, i32* %27, align 4 + br label %239 + +290: ; preds = %239 + %291 = load i32, i32* %10, align 4 + %292 = icmp eq i32 %291, 0 + br i1 %292, label %293, label %305 + +293: ; preds = %290 + %294 = load float*, float** %5, align 8 + %295 = bitcast float* %294 to i8* + %296 = load float*, float** %26, align 8 + %297 = bitcast float* %296 to i8* + %298 = load float*, float** %5, align 8 + %299 = bitcast float* %298 to i8* + %300 = call i64 @llvm.objectsize.i64.p0i8(i8* %299, i1 false, i1 true, i1 false) + %301 = call i8* @__memcpy_chk(i8* %295, i8* %297, i64 16, i64 %300) #8 + %302 = load float*, float** %26, align 8 + %303 = load float*, float** %4, align 8 + %304 = load float*, float** %6, align 8 + call void @no_opt_naive_fixed_matrix_multiply(float* %302, float* %303, float* %304) + br label %330 + +305: ; preds = %290 + %306 = call i8* @calloc(i64 4, i64 4) #9 + %307 = bitcast i8* %306 to float* + store float* %307, float** %30, align 8 + %308 = load float*, float** %26, align 8 + %309 = load float*, float** %5, align 8 + %310 = load float*, float** %30, align 8 + call void @no_opt_naive_fixed_matrix_multiply(float* %308, float* %309, float* %310) + %311 = load float*, float** %5, align 8 + %312 = bitcast float* %311 to i8* + %313 = load float*, float** %30, align 8 + %314 = bitcast float* %313 to i8* + %315 = load float*, float** %5, align 8 + %316 = bitcast float* %315 to i8* + %317 = call i64 @llvm.objectsize.i64.p0i8(i8* %316, i1 false, i1 true, i1 false) + %318 = call i8* @__memcpy_chk(i8* %312, i8* %314, i64 16, i64 %317) #8 + %319 = load float*, float** %26, align 8 + %320 = load float*, float** %6, align 8 + %321 = load float*, float** %30, align 8 + call void @no_opt_naive_fixed_matrix_multiply(float* %319, float* %320, float* %321) + %322 = load float*, float** %6, align 8 + %323 = bitcast float* %322 to i8* + %324 = load float*, float** %30, align 8 + %325 = bitcast float* %324 to i8* + %326 = load float*, float** %6, align 8 + %327 = bitcast float* %326 to i8* + %328 = call i64 @llvm.objectsize.i64.p0i8(i8* %327, i1 false, i1 true, i1 false) + %329 = call i8* @__memcpy_chk(i8* %323, i8* %325, i64 16, i64 %328) #8 + br label %330 + +330: ; preds = %305, %293 + %331 = load float*, float** %12, align 8 + %332 = bitcast float* %331 to i8* + call void @free(i8* %332) + %333 = load float*, float** %13, align 8 + %334 = bitcast float* %333 to i8* + call void @free(i8* %334) + %335 = load float*, float** %17, align 8 + %336 = bitcast float* %335 to i8* + call void @free(i8* %336) + %337 = load float*, float** %18, align 8 + %338 = bitcast float* %337 to i8* + call void @free(i8* %338) + %339 = load float*, float** %22, align 8 + %340 = bitcast float* %339 to i8* + call void @free(i8* %340) + %341 = load float*, float** %26, align 8 + %342 = bitcast float* %341 to i8* + call void @free(i8* %342) + br label %343 + +343: ; preds = %330 + %344 = load i32, i32* %10, align 4 + %345 = add nsw i32 %344, 1 + store i32 %345, i32* %10, align 4 + br label %69 + +346: ; preds = %69 + %347 = load float*, float** %5, align 8 + call void @no_opt_naive_fixed_transpose(float* %347) + ret void +} + +; Function Attrs: nounwind +declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 + +; Function Attrs: nounwind readnone speculatable willreturn +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 + +; Function Attrs: allocsize(0,1) +declare i8* @calloc(i64, i64) #5 + +declare void @free(i8*) #6 + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #1 { + %1 = alloca i32, align 4 + %2 = alloca i64, align 8 + %3 = alloca [4 x float], align 16 + %4 = alloca i32, align 4 + %5 = alloca [4 x float], align 16 + %6 = alloca [4 x float], align 16 + %7 = alloca [4 x float], align 16 + %8 = alloca [4 x float], align 16 + %9 = alloca i32, align 4 + %10 = alloca i32, align 4 + %11 = alloca i32, align 4 + %12 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + %13 = call i64 @time(i64* null) + store i64 %13, i64* %2, align 8 + %14 = call i64 @time(i64* %2) + %15 = trunc i64 %14 to i32 + call void @srand(i32 %15) + %16 = bitcast [4 x float]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %16, i8 0, i64 16, i1 false) + store i32 0, i32* %4, align 4 + br label %17 + +17: ; preds = %27, %0 + %18 = load i32, i32* %4, align 4 + %19 = icmp slt i32 %18, 4 + br i1 %19, label %20, label %30 + +20: ; preds = %17 + %21 = call i32 @rand() + %22 = sitofp i32 %21 to float + %23 = fdiv float %22, 0x41747AE140000000 + %24 = load i32, i32* %4, align 4 + %25 = sext i32 %24 to i64 + %26 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 %25 + store float %23, float* %26, align 4 + br label %27 + +27: ; preds = %20 + %28 = load i32, i32* %4, align 4 + %29 = add nsw i32 %28, 1 + store i32 %29, i32* %4, align 4 + br label %17 + +30: ; preds = %17 + %31 = bitcast [4 x float]* %5 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %31, i8 0, i64 16, i1 false) + %32 = bitcast [4 x float]* %6 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %32, i8 0, i64 16, i1 false) + %33 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 + %34 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 + %35 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 + call void @naive_fixed_qr_decomp(float* %33, float* %34, float* %35) + %36 = bitcast [4 x float]* %7 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %36, i8 0, i64 16, i1 false) + %37 = bitcast [4 x float]* %8 to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %37, i8 0, i64 16, i1 false) + %38 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 + %39 = getelementptr inbounds [4 x float], [4 x float]* %7, i64 0, i64 0 + %40 = getelementptr inbounds [4 x float], [4 x float]* %8, i64 0, i64 0 + call void @no_opt_naive_fixed_qr_decomp(float* %38, float* %39, float* %40) + store i32 0, i32* %9, align 4 + br label %41 + +41: ; preds = %91, %30 + %42 = load i32, i32* %9, align 4 + %43 = icmp slt i32 %42, 2 + br i1 %43, label %44, label %94 + +44: ; preds = %41 + store i32 0, i32* %10, align 4 + br label %45 + +45: ; preds = %87, %44 + %46 = load i32, i32* %10, align 4 + %47 = icmp slt i32 %46, 2 + br i1 %47, label %48, label %90 + +48: ; preds = %45 + %49 = load i32, i32* %9, align 4 + %50 = mul nsw i32 %49, 2 + %51 = load i32, i32* %10, align 4 + %52 = add nsw i32 %50, %51 + %53 = sext i32 %52 to i64 + %54 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 %53 + %55 = load float, float* %54, align 4 + %56 = fpext float %55 to double + %57 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %56) + %58 = load i32, i32* %9, align 4 + %59 = mul nsw i32 %58, 2 + %60 = load i32, i32* %10, align 4 + %61 = add nsw i32 %59, %60 + %62 = sext i32 %61 to i64 + %63 = getelementptr inbounds [4 x float], [4 x float]* %7, i64 0, i64 %62 + %64 = load float, float* %63, align 4 + %65 = fpext float %64 to double + %66 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %65) + %67 = load i32, i32* %9, align 4 + %68 = sext i32 %67 to i64 + %69 = getelementptr inbounds [4 x float], [4 x float]* %7, i64 0, i64 %68 + %70 = load float, float* %69, align 4 + %71 = load i32, i32* %9, align 4 + %72 = sext i32 %71 to i64 + %73 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 %72 + %74 = load float, float* %73, align 4 + %75 = fsub float %70, %74 + %76 = fpext float %75 to double + %77 = call double @llvm.fabs.f64(double %76) + %78 = fcmp olt double %77, 0x3FB99999A0000000 + %79 = xor i1 %78, true + %80 = zext i1 %79 to i32 + %81 = sext i32 %80 to i64 + %82 = icmp ne i64 %81, 0 + br i1 %82, label %83, label %85 + +83: ; preds = %48 + call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.2, i64 0, i64 0), i32 300, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.3, i64 0, i64 0)) #10 + unreachable + +84: ; No predecessors! + br label %86 + +85: ; preds = %48 + br label %86 + +86: ; preds = %85, %84 + br label %87 + +87: ; preds = %86 + %88 = load i32, i32* %10, align 4 + %89 = add nsw i32 %88, 1 + store i32 %89, i32* %10, align 4 + br label %45 + +90: ; preds = %45 + br label %91 + +91: ; preds = %90 + %92 = load i32, i32* %9, align 4 + %93 = add nsw i32 %92, 1 + store i32 %93, i32* %9, align 4 + br label %41 + +94: ; preds = %41 + store i32 0, i32* %11, align 4 + br label %95 + +95: ; preds = %145, %94 + %96 = load i32, i32* %11, align 4 + %97 = icmp slt i32 %96, 2 + br i1 %97, label %98, label %148 + +98: ; preds = %95 + store i32 0, i32* %12, align 4 + br label %99 + +99: ; preds = %141, %98 + %100 = load i32, i32* %12, align 4 + %101 = icmp slt i32 %100, 2 + br i1 %101, label %102, label %144 + +102: ; preds = %99 + %103 = load i32, i32* %11, align 4 + %104 = mul nsw i32 %103, 2 + %105 = load i32, i32* %12, align 4 + %106 = add nsw i32 %104, %105 + %107 = sext i32 %106 to i64 + %108 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 %107 + %109 = load float, float* %108, align 4 + %110 = fpext float %109 to double + %111 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %110) + %112 = load i32, i32* %11, align 4 + %113 = mul nsw i32 %112, 2 + %114 = load i32, i32* %12, align 4 + %115 = add nsw i32 %113, %114 + %116 = sext i32 %115 to i64 + %117 = getelementptr inbounds [4 x float], [4 x float]* %8, i64 0, i64 %116 + %118 = load float, float* %117, align 4 + %119 = fpext float %118 to double + %120 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %119) + %121 = load i32, i32* %11, align 4 + %122 = sext i32 %121 to i64 + %123 = getelementptr inbounds [4 x float], [4 x float]* %8, i64 0, i64 %122 + %124 = load float, float* %123, align 4 + %125 = load i32, i32* %11, align 4 + %126 = sext i32 %125 to i64 + %127 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 %126 + %128 = load float, float* %127, align 4 + %129 = fsub float %124, %128 + %130 = fpext float %129 to double + %131 = call double @llvm.fabs.f64(double %130) + %132 = fcmp olt double %131, 0x3FB99999A0000000 + %133 = xor i1 %132, true + %134 = zext i1 %133 to i32 + %135 = sext i32 %134 to i64 + %136 = icmp ne i64 %135, 0 + br i1 %136, label %137, label %139 + +137: ; preds = %102 + call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.2, i64 0, i64 0), i32 307, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.6, i64 0, i64 0)) #10 + unreachable + +138: ; No predecessors! + br label %140 + +139: ; preds = %102 + br label %140 + +140: ; preds = %139, %138 + br label %141 + +141: ; preds = %140 + %142 = load i32, i32* %12, align 4 + %143 = add nsw i32 %142, 1 + store i32 %143, i32* %12, align 4 + br label %99 + +144: ; preds = %99 + br label %145 + +145: ; preds = %144 + %146 = load i32, i32* %11, align 4 + %147 = add nsw i32 %146, 1 + store i32 %147, i32* %11, align 4 + br label %95 + +148: ; preds = %95 + %149 = load i32, i32* %1, align 4 + ret i32 %149 +} + +declare i64 @time(i64*) #6 + +declare void @srand(i32) #6 + +declare i32 @rand() #6 + +declare i32 @printf(i8*, ...) #6 + +; Function Attrs: nounwind readnone speculatable willreturn +declare double @llvm.fabs.f64(double) #2 + +; Function Attrs: noreturn +declare void @__assert_rtn(i8*, i8*, i32, i8*) #7 + +attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone speculatable willreturn } +attributes #3 = { argmemonly nounwind willreturn writeonly } +attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #7 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #8 = { nounwind } +attributes #9 = { allocsize(0,1) } +attributes #10 = { noreturn } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/flaky-dce.ll b/src/dios-egraphs/Diospyros/flaky-outputs/flaky-dce.ll new file mode 100644 index 00000000..ef4a3d72 --- /dev/null +++ b/src/dios-egraphs/Diospyros/flaky-outputs/flaky-dce.ll @@ -0,0 +1,3482 @@ +; ModuleID = 'build/diospyros.ll' +source_filename = "fail-tests/qr-decomp-local-arrays.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@.str = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 +@.str.1 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 +@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 +@.str.2 = private unnamed_addr constant [36 x i8] c"fail-tests/qr-decomp-local-arrays.c\00", align 1 +@.str.3 = private unnamed_addr constant [34 x i8] c"fabs(expectedQ[i] - Q[i]) < DELTA\00", align 1 +@.str.4 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 +@.str.5 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 +@.str.6 = private unnamed_addr constant [34 x i8] c"fabs(expectedR[i] - R[i]) < DELTA\00", align 1 + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @sgn(float %0) #0 { + %2 = fcmp ogt float %0, 0.000000e+00 + %3 = zext i1 %2 to i32 + %4 = fcmp olt float %0, 0.000000e+00 + %.neg = sext i1 %4 to i32 + %5 = add nsw i32 %.neg, %3 + %6 = sitofp i32 %5 to float + ret float %6 +} + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_sgn(float %0) #1 { + %2 = fcmp ogt float %0, 0.000000e+00 + %3 = zext i1 %2 to i32 + %4 = fcmp olt float %0, 0.000000e+00 + %.neg = sext i1 %4 to i32 + %5 = add nsw i32 %.neg, %3 + %6 = sitofp i32 %5 to float + ret float %6 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @naive_norm(float* %0, i32 %1) #0 { + %3 = icmp sgt i32 %1, 0 + %smax = select i1 %3, i32 %1, i32 0 + %wide.trip.count = zext i32 %smax to i64 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %2 + %4 = add nsw i64 %wide.trip.count, -1 + %xtraiter = and i64 %wide.trip.count, 3 + %5 = icmp ult i64 %4, 3 + br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count, 2147483644 + br label %6 + +6: ; preds = %6, %.lr.ph.new + %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] + %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 + %8 = load float, float* %7, align 4 + %9 = fmul float %8, %8 + %10 = fadd float %.013, %9 + %indvars.iv.next = or i64 %indvars.iv2, 1 + %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next + %12 = load float, float* %11, align 4 + %13 = fmul float %12, %12 + %14 = fadd float %10, %13 + %indvars.iv.next.1 = or i64 %indvars.iv2, 2 + %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 + %16 = load float, float* %15, align 4 + %17 = fmul float %16, %16 + %18 = fadd float %14, %17 + %indvars.iv.next.2 = or i64 %indvars.iv2, 3 + %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 + %20 = load float, float* %19, align 4 + %21 = fmul float %20, %20 + %22 = fadd float %18, %21 + %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 + %niter.nsub.3 = add i64 %niter, -4 + %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 + br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 + +._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] + %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] + %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] + %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil + %24 = load float, float* %23, align 4 + %25 = fmul float %24, %24 + %26 = fadd float %.013.epil, %25 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !3 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 + %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] + %27 = call float @llvm.sqrt.f32(float %.01.lcssa) + ret float %27 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32(float) #2 + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_naive_norm(float* %0, i32 %1) #1 { + %3 = icmp sgt i32 %1, 0 + %smax = select i1 %3, i32 %1, i32 0 + %wide.trip.count = zext i32 %smax to i64 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %2 + %4 = add nsw i64 %wide.trip.count, -1 + %xtraiter = and i64 %wide.trip.count, 3 + %5 = icmp ult i64 %4, 3 + br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count, 2147483644 + br label %6 + +6: ; preds = %6, %.lr.ph.new + %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] + %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 + %8 = load float, float* %7, align 4 + %9 = fmul float %8, %8 + %10 = fadd float %.013, %9 + %indvars.iv.next = or i64 %indvars.iv2, 1 + %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next + %12 = load float, float* %11, align 4 + %13 = fmul float %12, %12 + %14 = fadd float %10, %13 + %indvars.iv.next.1 = or i64 %indvars.iv2, 2 + %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 + %16 = load float, float* %15, align 4 + %17 = fmul float %16, %16 + %18 = fadd float %14, %17 + %indvars.iv.next.2 = or i64 %indvars.iv2, 3 + %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 + %20 = load float, float* %19, align 4 + %21 = fmul float %20, %20 + %22 = fadd float %18, %21 + %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 + %niter.nsub.3 = add i64 %niter, -4 + %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 + br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 + +._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] + %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] + %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] + %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil + %24 = load float, float* %23, align 4 + %25 = fmul float %24, %24 + %26 = fadd float %.013.epil, %25 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 + %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] + %27 = call float @llvm.sqrt.f32(float %.01.lcssa) + ret float %27 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_transpose(float* %0) #0 { +.lr.ph: + %1 = getelementptr inbounds float, float* %0, i64 1 + %2 = bitcast float* %1 to i32* + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds float, float* %0, i64 2 + %5 = bitcast float* %4 to i32* + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %2, align 4 + store i32 %3, i32* %5, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_transpose(float* %0) #1 { +.lr.ph: + %1 = getelementptr inbounds float, float* %0, i64 1 + %2 = bitcast float* %1 to i32* + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds float, float* %0, i64 2 + %5 = bitcast float* %4 to i32* + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %2, align 4 + store i32 %3, i32* %5, align 4 + ret void +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { +.preheader: + store float 0.000000e+00, float* %2, align 4 + %3 = getelementptr float, float* %0, i32 0 + %4 = load float, float* %3, align 4 + %5 = insertelement <4 x float> zeroinitializer, float %4, i32 0 + %6 = insertelement <4 x float> %5, float 0.000000e+00, i32 1 + %7 = insertelement <4 x float> %6, float 0.000000e+00, i32 2 + %8 = insertelement <4 x float> %7, float 0.000000e+00, i32 3 + %9 = getelementptr float, float* %1, i32 0 + %10 = load float, float* %9, align 4 + %11 = insertelement <4 x float> zeroinitializer, float %10, i32 0 + %12 = insertelement <4 x float> %11, float 0.000000e+00, i32 1 + %13 = insertelement <4 x float> %12, float 0.000000e+00, i32 2 + %14 = insertelement <4 x float> %13, float 0.000000e+00, i32 3 + %15 = call <4 x float> @llvm.fma.v4f32(<4 x float> %8, <4 x float> %14, <4 x float> zeroinitializer) + %16 = extractelement <4 x float> %15, i32 0 + store float %16, float* %2, align 4 + %17 = insertelement <4 x float> zeroinitializer, float %4, i32 0 + %18 = insertelement <4 x float> %17, float 1.000000e+00, i32 1 + %19 = insertelement <4 x float> %18, float 1.000000e+00, i32 2 + %20 = insertelement <4 x float> %19, float 1.000000e+00, i32 3 + %21 = getelementptr float, float* %1, i32 0 + %22 = load float, float* %21, align 4 + %23 = insertelement <4 x float> zeroinitializer, float %22, i32 0 + %24 = insertelement <4 x float> %23, float 0.000000e+00, i32 1 + %25 = insertelement <4 x float> %24, float 0.000000e+00, i32 2 + %26 = insertelement <4 x float> %25, float 0.000000e+00, i32 3 + %27 = fmul <4 x float> %20, %26 + %28 = fadd <4 x float> %27, zeroinitializer + %29 = getelementptr float, float* %0, i32 0 + %30 = getelementptr inbounds float, float* %29, i64 1 + %31 = load float, float* %30, align 4 + %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 + %33 = insertelement <4 x float> %32, float 0.000000e+00, i32 1 + %34 = insertelement <4 x float> %33, float 0.000000e+00, i32 2 + %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 3 + %36 = getelementptr float, float* %1, i32 0 + %37 = getelementptr inbounds float, float* %36, i64 2 + %38 = load float, float* %37, align 4 + %39 = insertelement <4 x float> zeroinitializer, float %38, i32 0 + %40 = insertelement <4 x float> %39, float 0.000000e+00, i32 1 + %41 = insertelement <4 x float> %40, float 0.000000e+00, i32 2 + %42 = insertelement <4 x float> %41, float 0.000000e+00, i32 3 + %43 = call <4 x float> @llvm.fma.v4f32(<4 x float> %35, <4 x float> %42, <4 x float> %28) + %44 = extractelement <4 x float> %43, i32 0 + store float %44, float* %2, align 4 + %45 = extractelement <4 x float> %43, i32 1 + %46 = getelementptr float, float* %2, i32 0 + %47 = getelementptr inbounds float, float* %46, i64 1 + store float %45, float* %47, align 4 + %48 = getelementptr float, float* %0, i32 0 + %49 = load float, float* %48, align 4 + %50 = insertelement <4 x float> zeroinitializer, float %49, i32 0 + %51 = insertelement <4 x float> %50, float 0.000000e+00, i32 1 + %52 = insertelement <4 x float> %51, float 0.000000e+00, i32 2 + %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 3 + %54 = getelementptr float, float* %1, i32 0 + %55 = getelementptr inbounds float, float* %54, i64 1 + %56 = load float, float* %55, align 4 + %57 = insertelement <4 x float> zeroinitializer, float %56, i32 0 + %58 = insertelement <4 x float> %57, float 0.000000e+00, i32 1 + %59 = insertelement <4 x float> %58, float 0.000000e+00, i32 2 + %60 = insertelement <4 x float> %59, float 0.000000e+00, i32 3 + %61 = call <4 x float> @llvm.fma.v4f32(<4 x float> %53, <4 x float> %60, <4 x float> zeroinitializer) + %62 = extractelement <4 x float> %61, i32 0 + %63 = getelementptr float, float* %2, i32 0 + %64 = getelementptr inbounds float, float* %63, i64 1 + store float %62, float* %64, align 4 + %65 = insertelement <4 x float> zeroinitializer, float %49, i32 0 + %66 = insertelement <4 x float> %65, float 1.000000e+00, i32 1 + %67 = insertelement <4 x float> %66, float 1.000000e+00, i32 2 + %68 = insertelement <4 x float> %67, float 1.000000e+00, i32 3 + %69 = load float, float* %55, align 4 + %70 = insertelement <4 x float> zeroinitializer, float %69, i32 0 + %71 = insertelement <4 x float> %70, float 0.000000e+00, i32 1 + %72 = insertelement <4 x float> %71, float 0.000000e+00, i32 2 + %73 = insertelement <4 x float> %72, float 0.000000e+00, i32 3 + %74 = fmul <4 x float> %68, %73 + %75 = fadd <4 x float> %74, zeroinitializer + %76 = getelementptr float, float* %0, i32 0 + %77 = getelementptr inbounds float, float* %76, i64 1 + %78 = load float, float* %77, align 4 + %79 = insertelement <4 x float> zeroinitializer, float %78, i32 0 + %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 1 + %81 = insertelement <4 x float> %80, float 0.000000e+00, i32 2 + %82 = insertelement <4 x float> %81, float 0.000000e+00, i32 3 + %83 = getelementptr float, float* %1, i32 0 + %84 = getelementptr inbounds float, float* %83, i64 3 + %85 = load float, float* %84, align 4 + %86 = insertelement <4 x float> zeroinitializer, float %85, i32 0 + %87 = insertelement <4 x float> %86, float 0.000000e+00, i32 1 + %88 = insertelement <4 x float> %87, float 0.000000e+00, i32 2 + %89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3 + %90 = call <4 x float> @llvm.fma.v4f32(<4 x float> %82, <4 x float> %89, <4 x float> %75) + %91 = extractelement <4 x float> %90, i32 0 + %92 = getelementptr float, float* %2, i32 0 + %93 = getelementptr inbounds float, float* %92, i64 1 + store float %91, float* %93, align 4 + %94 = extractelement <4 x float> %90, i32 1 + %95 = getelementptr float, float* %2, i32 0 + %96 = getelementptr inbounds float, float* %95, i64 2 + store float %94, float* %96, align 4 + %97 = getelementptr float, float* %0, i32 0 + %98 = getelementptr inbounds float, float* %97, i64 2 + %99 = load float, float* %98, align 4 + %100 = insertelement <4 x float> zeroinitializer, float %99, i32 0 + %101 = insertelement <4 x float> %100, float 0.000000e+00, i32 1 + %102 = insertelement <4 x float> %101, float 0.000000e+00, i32 2 + %103 = insertelement <4 x float> %102, float 0.000000e+00, i32 3 + %104 = getelementptr float, float* %1, i32 0 + %105 = load float, float* %104, align 4 + %106 = insertelement <4 x float> zeroinitializer, float %105, i32 0 + %107 = insertelement <4 x float> %106, float 0.000000e+00, i32 1 + %108 = insertelement <4 x float> %107, float 0.000000e+00, i32 2 + %109 = insertelement <4 x float> %108, float 0.000000e+00, i32 3 + %110 = call <4 x float> @llvm.fma.v4f32(<4 x float> %103, <4 x float> %109, <4 x float> zeroinitializer) + %111 = extractelement <4 x float> %110, i32 0 + %112 = getelementptr float, float* %2, i32 0 + %113 = getelementptr inbounds float, float* %112, i64 2 + store float %111, float* %113, align 4 + %114 = insertelement <4 x float> zeroinitializer, float %99, i32 0 + %115 = insertelement <4 x float> %114, float 1.000000e+00, i32 1 + %116 = insertelement <4 x float> %115, float 1.000000e+00, i32 2 + %117 = insertelement <4 x float> %116, float 1.000000e+00, i32 3 + %118 = insertelement <4 x float> zeroinitializer, float %105, i32 0 + %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 1 + %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 2 + %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 3 + %122 = fmul <4 x float> %117, %121 + %123 = fadd <4 x float> %122, zeroinitializer + %124 = getelementptr float, float* %0, i32 0 + %125 = getelementptr inbounds float, float* %124, i64 3 + %126 = load float, float* %125, align 4 + %127 = insertelement <4 x float> zeroinitializer, float %126, i32 0 + %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 1 + %129 = insertelement <4 x float> %128, float 0.000000e+00, i32 2 + %130 = insertelement <4 x float> %129, float 0.000000e+00, i32 3 + %131 = load float, float* %37, align 4 + %132 = insertelement <4 x float> zeroinitializer, float %131, i32 0 + %133 = insertelement <4 x float> %132, float 0.000000e+00, i32 1 + %134 = insertelement <4 x float> %133, float 0.000000e+00, i32 2 + %135 = insertelement <4 x float> %134, float 0.000000e+00, i32 3 + %136 = call <4 x float> @llvm.fma.v4f32(<4 x float> %130, <4 x float> %135, <4 x float> %123) + %137 = extractelement <4 x float> %136, i32 0 + store float %137, float* %113, align 4 + %138 = extractelement <4 x float> %136, i32 1 + %139 = getelementptr float, float* %2, i32 0 + %140 = getelementptr inbounds float, float* %139, i64 3 + store float %138, float* %140, align 4 + %141 = load float, float* %98, align 4 + %142 = insertelement <4 x float> zeroinitializer, float %141, i32 0 + %143 = insertelement <4 x float> %142, float 0.000000e+00, i32 1 + %144 = insertelement <4 x float> %143, float 0.000000e+00, i32 2 + %145 = insertelement <4 x float> %144, float 0.000000e+00, i32 3 + %146 = load float, float* %55, align 4 + %147 = insertelement <4 x float> zeroinitializer, float %146, i32 0 + %148 = insertelement <4 x float> %147, float 0.000000e+00, i32 1 + %149 = insertelement <4 x float> %148, float 0.000000e+00, i32 2 + %150 = insertelement <4 x float> %149, float 0.000000e+00, i32 3 + %151 = call <4 x float> @llvm.fma.v4f32(<4 x float> %145, <4 x float> %150, <4 x float> zeroinitializer) + %152 = extractelement <4 x float> %151, i32 0 + store float %152, float* %140, align 4 + %153 = insertelement <4 x float> zeroinitializer, float %141, i32 0 + %154 = insertelement <4 x float> %153, float 1.000000e+00, i32 1 + %155 = insertelement <4 x float> %154, float 1.000000e+00, i32 2 + %156 = insertelement <4 x float> %155, float 1.000000e+00, i32 3 + %157 = insertelement <4 x float> zeroinitializer, float %146, i32 0 + %158 = insertelement <4 x float> %157, float 0.000000e+00, i32 1 + %159 = insertelement <4 x float> %158, float 0.000000e+00, i32 2 + %160 = insertelement <4 x float> %159, float 0.000000e+00, i32 3 + %161 = fmul <4 x float> %156, %160 + %162 = fadd <4 x float> %161, zeroinitializer + %163 = getelementptr float, float* %0, i32 0 + %164 = getelementptr inbounds float, float* %163, i64 3 + %165 = load float, float* %164, align 4 + %166 = insertelement <4 x float> zeroinitializer, float %165, i32 0 + %167 = insertelement <4 x float> %166, float 0.000000e+00, i32 1 + %168 = insertelement <4 x float> %167, float 0.000000e+00, i32 2 + %169 = insertelement <4 x float> %168, float 0.000000e+00, i32 3 + %170 = load float, float* %84, align 4 + %171 = insertelement <4 x float> zeroinitializer, float %170, i32 0 + %172 = insertelement <4 x float> %171, float 0.000000e+00, i32 1 + %173 = insertelement <4 x float> %172, float 0.000000e+00, i32 2 + %174 = insertelement <4 x float> %173, float 0.000000e+00, i32 3 + %175 = call <4 x float> @llvm.fma.v4f32(<4 x float> %169, <4 x float> %174, <4 x float> %162) + %176 = extractelement <4 x float> %175, i32 0 + store float %176, float* %140, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { +.preheader: + store float 0.000000e+00, float* %2, align 4 + %3 = load float, float* %0, align 4 + %4 = load float, float* %1, align 4 + %5 = fmul float %3, %4 + %6 = fadd float %5, 0.000000e+00 + store float %6, float* %2, align 4 + %7 = getelementptr inbounds float, float* %0, i64 1 + %8 = load float, float* %7, align 4 + %9 = getelementptr inbounds float, float* %1, i64 2 + %10 = load float, float* %9, align 4 + %11 = fmul float %8, %10 + %12 = fadd float %6, %11 + store float %12, float* %2, align 4 + %13 = getelementptr inbounds float, float* %2, i64 1 + store float 0.000000e+00, float* %13, align 4 + %14 = load float, float* %0, align 4 + %15 = getelementptr inbounds float, float* %1, i64 1 + %16 = load float, float* %15, align 4 + %17 = fmul float %14, %16 + %18 = fadd float %17, 0.000000e+00 + store float %18, float* %13, align 4 + %19 = load float, float* %7, align 4 + %20 = getelementptr inbounds float, float* %1, i64 3 + %21 = load float, float* %20, align 4 + %22 = fmul float %19, %21 + %23 = fadd float %18, %22 + store float %23, float* %13, align 4 + %24 = getelementptr inbounds float, float* %0, i64 2 + %25 = getelementptr inbounds float, float* %2, i64 2 + store float 0.000000e+00, float* %25, align 4 + %26 = load float, float* %24, align 4 + %27 = load float, float* %1, align 4 + %28 = fmul float %26, %27 + %29 = fadd float %28, 0.000000e+00 + store float %29, float* %25, align 4 + %30 = getelementptr inbounds float, float* %0, i64 3 + %31 = load float, float* %30, align 4 + %32 = load float, float* %9, align 4 + %33 = fmul float %31, %32 + %34 = fadd float %29, %33 + store float %34, float* %25, align 4 + %35 = getelementptr inbounds float, float* %2, i64 3 + store float 0.000000e+00, float* %35, align 4 + %36 = load float, float* %24, align 4 + %37 = load float, float* %15, align 4 + %38 = fmul float %36, %37 + %39 = fadd float %38, 0.000000e+00 + store float %39, float* %35, align 4 + %40 = load float, float* %30, align 4 + %41 = load float, float* %20, align 4 + %42 = fmul float %40, %41 + %43 = fadd float %39, %42 + store float %43, float* %35, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { +.preheader49: + %3 = getelementptr float, float* %0, i32 0 + %4 = bitcast float* %3 to i32* + %5 = load i32, i32* %4, align 4 + %6 = bitcast i32 %5 to float + %7 = insertelement <4 x float> zeroinitializer, float %6, i32 0 + %8 = insertelement <4 x float> %7, float 0.000000e+00, i32 1 + %9 = insertelement <4 x float> %8, float 0.000000e+00, i32 2 + %10 = insertelement <4 x float> %9, float 0.000000e+00, i32 3 + %11 = extractelement <4 x float> %10, i32 0 + %12 = getelementptr float, float* %2, i32 0 + %13 = bitcast float* %12 to i32* + %14 = bitcast i32* %13 to float* + store float %11, float* %14, align 4 + %15 = getelementptr float, float* %0, i32 0 + %16 = getelementptr inbounds float, float* %15, i64 1 + %17 = bitcast float* %16 to i32* + %18 = load i32, i32* %17, align 4 + %19 = bitcast i32 %18 to float + %20 = insertelement <4 x float> zeroinitializer, float %19, i32 0 + %21 = insertelement <4 x float> %20, float 0.000000e+00, i32 1 + %22 = insertelement <4 x float> %21, float 0.000000e+00, i32 2 + %23 = insertelement <4 x float> %22, float 0.000000e+00, i32 3 + %24 = extractelement <4 x float> %23, i32 0 + %25 = getelementptr float, float* %2, i32 0 + %26 = getelementptr inbounds float, float* %25, i64 1 + %27 = bitcast float* %26 to i32* + %28 = bitcast i32* %27 to float* + store float %24, float* %28, align 4 + %29 = getelementptr float, float* %0, i32 0 + %30 = getelementptr inbounds float, float* %29, i64 2 + %31 = bitcast float* %30 to i32* + %32 = load i32, i32* %31, align 4 + %33 = bitcast i32 %32 to float + %34 = insertelement <4 x float> zeroinitializer, float %33, i32 0 + %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 1 + %36 = insertelement <4 x float> %35, float 0.000000e+00, i32 2 + %37 = insertelement <4 x float> %36, float 0.000000e+00, i32 3 + %38 = extractelement <4 x float> %37, i32 0 + %39 = getelementptr float, float* %2, i32 0 + %40 = getelementptr inbounds float, float* %39, i64 2 + %41 = bitcast float* %40 to i32* + %42 = bitcast i32* %41 to float* + store float %38, float* %42, align 4 + %43 = getelementptr float, float* %0, i32 0 + %44 = getelementptr inbounds float, float* %43, i64 3 + %45 = bitcast float* %44 to i32* + %46 = load i32, i32* %45, align 4 + %47 = bitcast i32 %46 to float + %48 = fneg float %47 + %49 = insertelement <4 x float> zeroinitializer, float %48, i32 0 + %50 = getelementptr float, float* %0, i32 0 + %51 = bitcast float* %50 to i32* + %52 = load i32, i32* %51, align 4 + %53 = bitcast i32 %52 to float + %54 = bitcast i32 %52 to float + %55 = fmul float %53, %54 + %56 = fadd float %55, 0.000000e+00 + %57 = bitcast i32 %32 to float + %58 = bitcast i32 %32 to float + %59 = fmul float %57, %58 + %60 = fadd float %56, %59 + %61 = call float @llvm.sqrt.f32(float %60) + %62 = bitcast i32 %52 to float + %63 = fcmp olt float %62, 0.000000e+00 + %64 = sext i1 %63 to i32 + %65 = fcmp ogt float %62, 0.000000e+00 + %66 = zext i1 %65 to i32 + %67 = add nsw i32 %64, %66 + %68 = sitofp i32 %67 to float + %69 = fneg float %68 + %70 = fmul float %61, %69 + %71 = bitcast i32 %52 to float + %72 = fadd float %71, %70 + %73 = bitcast i32 %52 to float + %74 = bitcast i32 %52 to float + %75 = fmul float %73, %74 + %76 = fadd float %75, 0.000000e+00 + %77 = bitcast i32 %32 to float + %78 = bitcast i32 %32 to float + %79 = fmul float %77, %78 + %80 = fadd float %76, %79 + %81 = call float @llvm.sqrt.f32(float %80) + %82 = fneg float %68 + %83 = fmul float %81, %82 + %84 = bitcast i32 %52 to float + %85 = fadd float %84, %83 + %86 = bitcast i32 %52 to float + %87 = bitcast i32 %52 to float + %88 = fmul float %86, %87 + %89 = fadd float %88, 0.000000e+00 + %90 = bitcast i32 %32 to float + %91 = bitcast i32 %32 to float + %92 = fmul float %90, %91 + %93 = fadd float %89, %92 + %94 = call float @llvm.sqrt.f32(float %93) + %95 = fneg float %68 + %96 = fmul float %94, %95 + %97 = bitcast i32 %52 to float + %98 = fadd float %97, %96 + %99 = fmul float %85, %98 + %100 = fadd float %99, 0.000000e+00 + %101 = bitcast i32 %52 to float + %102 = bitcast i32 %52 to float + %103 = fmul float %101, %102 + %104 = fadd float %103, 0.000000e+00 + %105 = bitcast i32 %32 to float + %106 = bitcast i32 %32 to float + %107 = fmul float %105, %106 + %108 = fadd float %104, %107 + %109 = call float @llvm.sqrt.f32(float %108) + %110 = fneg float %68 + %111 = fmul float %109, %110 + %112 = fmul float %111, 0.000000e+00 + %113 = bitcast i32 %32 to float + %114 = fadd float %113, %112 + %115 = bitcast i32 %52 to float + %116 = bitcast i32 %52 to float + %117 = fmul float %115, %116 + %118 = fadd float %117, 0.000000e+00 + %119 = bitcast i32 %32 to float + %120 = bitcast i32 %32 to float + %121 = fmul float %119, %120 + %122 = fadd float %118, %121 + %123 = call float @llvm.sqrt.f32(float %122) + %124 = fneg float %68 + %125 = fmul float %123, %124 + %126 = fmul float %125, 0.000000e+00 + %127 = bitcast i32 %32 to float + %128 = fadd float %127, %126 + %129 = fmul float %114, %128 + %130 = fadd float %100, %129 + %131 = call float @llvm.sqrt.f32(float %130) + %132 = fadd float %131, 0.000000e+00 + %133 = fdiv float %72, %132 + %134 = fmul float %133, 2.000000e+00 + %135 = bitcast i32 %52 to float + %136 = bitcast i32 %52 to float + %137 = fmul float %135, %136 + %138 = fadd float %137, 0.000000e+00 + %139 = bitcast i32 %32 to float + %140 = bitcast i32 %32 to float + %141 = fmul float %139, %140 + %142 = fadd float %138, %141 + %143 = call float @llvm.sqrt.f32(float %142) + %144 = fneg float %68 + %145 = fmul float %143, %144 + %146 = bitcast i32 %52 to float + %147 = fadd float %146, %145 + %148 = bitcast i32 %52 to float + %149 = bitcast i32 %52 to float + %150 = fmul float %148, %149 + %151 = fadd float %150, 0.000000e+00 + %152 = bitcast i32 %32 to float + %153 = bitcast i32 %32 to float + %154 = fmul float %152, %153 + %155 = fadd float %151, %154 + %156 = call float @llvm.sqrt.f32(float %155) + %157 = fneg float %68 + %158 = fmul float %156, %157 + %159 = bitcast i32 %52 to float + %160 = fadd float %159, %158 + %161 = bitcast i32 %52 to float + %162 = bitcast i32 %52 to float + %163 = fmul float %161, %162 + %164 = fadd float %163, 0.000000e+00 + %165 = bitcast i32 %32 to float + %166 = bitcast i32 %32 to float + %167 = fmul float %165, %166 + %168 = fadd float %164, %167 + %169 = call float @llvm.sqrt.f32(float %168) + %170 = fneg float %68 + %171 = fmul float %169, %170 + %172 = bitcast i32 %52 to float + %173 = fadd float %172, %171 + %174 = fmul float %160, %173 + %175 = fadd float %174, 0.000000e+00 + %176 = bitcast i32 %52 to float + %177 = bitcast i32 %52 to float + %178 = fmul float %176, %177 + %179 = fadd float %178, 0.000000e+00 + %180 = bitcast i32 %32 to float + %181 = bitcast i32 %32 to float + %182 = fmul float %180, %181 + %183 = fadd float %179, %182 + %184 = call float @llvm.sqrt.f32(float %183) + %185 = fneg float %68 + %186 = fmul float %184, %185 + %187 = fmul float %186, 0.000000e+00 + %188 = bitcast i32 %32 to float + %189 = fadd float %188, %187 + %190 = bitcast i32 %52 to float + %191 = bitcast i32 %52 to float + %192 = fmul float %190, %191 + %193 = fadd float %192, 0.000000e+00 + %194 = bitcast i32 %32 to float + %195 = bitcast i32 %32 to float + %196 = fmul float %194, %195 + %197 = fadd float %193, %196 + %198 = call float @llvm.sqrt.f32(float %197) + %199 = fneg float %68 + %200 = fmul float %198, %199 + %201 = fmul float %200, 0.000000e+00 + %202 = bitcast i32 %32 to float + %203 = fadd float %202, %201 + %204 = fmul float %189, %203 + %205 = fadd float %175, %204 + %206 = call float @llvm.sqrt.f32(float %205) + %207 = fadd float %206, 0.000000e+00 + %208 = fdiv float %147, %207 + %209 = fmul float %134, %208 + %210 = insertelement <4 x float> %49, float %209, i32 1 + %211 = bitcast i32 %52 to float + %212 = bitcast i32 %52 to float + %213 = fmul float %211, %212 + %214 = fadd float %213, 0.000000e+00 + %215 = bitcast i32 %32 to float + %216 = bitcast i32 %32 to float + %217 = fmul float %215, %216 + %218 = fadd float %214, %217 + %219 = call float @llvm.sqrt.f32(float %218) + %220 = fneg float %68 + %221 = fmul float %219, %220 + %222 = bitcast i32 %52 to float + %223 = fadd float %222, %221 + %224 = bitcast i32 %52 to float + %225 = bitcast i32 %52 to float + %226 = fmul float %224, %225 + %227 = fadd float %226, 0.000000e+00 + %228 = bitcast i32 %32 to float + %229 = bitcast i32 %32 to float + %230 = fmul float %228, %229 + %231 = fadd float %227, %230 + %232 = call float @llvm.sqrt.f32(float %231) + %233 = fneg float %68 + %234 = fmul float %232, %233 + %235 = bitcast i32 %52 to float + %236 = fadd float %235, %234 + %237 = bitcast i32 %52 to float + %238 = bitcast i32 %52 to float + %239 = fmul float %237, %238 + %240 = fadd float %239, 0.000000e+00 + %241 = bitcast i32 %32 to float + %242 = bitcast i32 %32 to float + %243 = fmul float %241, %242 + %244 = fadd float %240, %243 + %245 = call float @llvm.sqrt.f32(float %244) + %246 = fneg float %68 + %247 = fmul float %245, %246 + %248 = bitcast i32 %52 to float + %249 = fadd float %248, %247 + %250 = fmul float %236, %249 + %251 = fadd float %250, 0.000000e+00 + %252 = bitcast i32 %52 to float + %253 = bitcast i32 %52 to float + %254 = fmul float %252, %253 + %255 = fadd float %254, 0.000000e+00 + %256 = bitcast i32 %32 to float + %257 = bitcast i32 %32 to float + %258 = fmul float %256, %257 + %259 = fadd float %255, %258 + %260 = call float @llvm.sqrt.f32(float %259) + %261 = fneg float %68 + %262 = fmul float %260, %261 + %263 = fmul float %262, 0.000000e+00 + %264 = bitcast i32 %32 to float + %265 = fadd float %264, %263 + %266 = bitcast i32 %52 to float + %267 = bitcast i32 %52 to float + %268 = fmul float %266, %267 + %269 = fadd float %268, 0.000000e+00 + %270 = bitcast i32 %32 to float + %271 = bitcast i32 %32 to float + %272 = fmul float %270, %271 + %273 = fadd float %269, %272 + %274 = call float @llvm.sqrt.f32(float %273) + %275 = fneg float %68 + %276 = fmul float %274, %275 + %277 = fmul float %276, 0.000000e+00 + %278 = bitcast i32 %32 to float + %279 = fadd float %278, %277 + %280 = fmul float %265, %279 + %281 = fadd float %251, %280 + %282 = call float @llvm.sqrt.f32(float %281) + %283 = fadd float %282, 0.000000e+00 + %284 = fdiv float %223, %283 + %285 = fmul float %284, 2.000000e+00 + %286 = bitcast i32 %52 to float + %287 = bitcast i32 %52 to float + %288 = fmul float %286, %287 + %289 = fadd float %288, 0.000000e+00 + %290 = bitcast i32 %32 to float + %291 = bitcast i32 %32 to float + %292 = fmul float %290, %291 + %293 = fadd float %289, %292 + %294 = call float @llvm.sqrt.f32(float %293) + %295 = fneg float %68 + %296 = fmul float %294, %295 + %297 = fmul float %296, 0.000000e+00 + %298 = bitcast i32 %32 to float + %299 = fadd float %298, %297 + %300 = bitcast i32 %52 to float + %301 = bitcast i32 %52 to float + %302 = fmul float %300, %301 + %303 = fadd float %302, 0.000000e+00 + %304 = bitcast i32 %32 to float + %305 = bitcast i32 %32 to float + %306 = fmul float %304, %305 + %307 = fadd float %303, %306 + %308 = call float @llvm.sqrt.f32(float %307) + %309 = fneg float %68 + %310 = fmul float %308, %309 + %311 = bitcast i32 %52 to float + %312 = fadd float %311, %310 + %313 = bitcast i32 %52 to float + %314 = bitcast i32 %52 to float + %315 = fmul float %313, %314 + %316 = fadd float %315, 0.000000e+00 + %317 = bitcast i32 %32 to float + %318 = bitcast i32 %32 to float + %319 = fmul float %317, %318 + %320 = fadd float %316, %319 + %321 = call float @llvm.sqrt.f32(float %320) + %322 = fneg float %68 + %323 = fmul float %321, %322 + %324 = bitcast i32 %52 to float + %325 = fadd float %324, %323 + %326 = fmul float %312, %325 + %327 = fadd float %326, 0.000000e+00 + %328 = bitcast i32 %52 to float + %329 = bitcast i32 %52 to float + %330 = fmul float %328, %329 + %331 = fadd float %330, 0.000000e+00 + %332 = bitcast i32 %32 to float + %333 = bitcast i32 %32 to float + %334 = fmul float %332, %333 + %335 = fadd float %331, %334 + %336 = call float @llvm.sqrt.f32(float %335) + %337 = fneg float %68 + %338 = fmul float %336, %337 + %339 = fmul float %338, 0.000000e+00 + %340 = bitcast i32 %32 to float + %341 = fadd float %340, %339 + %342 = bitcast i32 %52 to float + %343 = bitcast i32 %52 to float + %344 = fmul float %342, %343 + %345 = fadd float %344, 0.000000e+00 + %346 = bitcast i32 %32 to float + %347 = bitcast i32 %32 to float + %348 = fmul float %346, %347 + %349 = fadd float %345, %348 + %350 = call float @llvm.sqrt.f32(float %349) + %351 = fneg float %68 + %352 = fmul float %350, %351 + %353 = fmul float %352, 0.000000e+00 + %354 = bitcast i32 %32 to float + %355 = fadd float %354, %353 + %356 = fmul float %341, %355 + %357 = fadd float %327, %356 + %358 = call float @llvm.sqrt.f32(float %357) + %359 = fadd float %358, 0.000000e+00 + %360 = fdiv float %299, %359 + %361 = fmul float %285, %360 + %362 = insertelement <4 x float> %210, float %361, i32 2 + %363 = bitcast i32 %52 to float + %364 = bitcast i32 %52 to float + %365 = fmul float %363, %364 + %366 = fadd float %365, 0.000000e+00 + %367 = bitcast i32 %32 to float + %368 = bitcast i32 %32 to float + %369 = fmul float %367, %368 + %370 = fadd float %366, %369 + %371 = call float @llvm.sqrt.f32(float %370) + %372 = fneg float %68 + %373 = fmul float %371, %372 + %374 = fmul float %373, 0.000000e+00 + %375 = bitcast i32 %32 to float + %376 = fadd float %375, %374 + %377 = bitcast i32 %52 to float + %378 = bitcast i32 %52 to float + %379 = fmul float %377, %378 + %380 = fadd float %379, 0.000000e+00 + %381 = bitcast i32 %32 to float + %382 = bitcast i32 %32 to float + %383 = fmul float %381, %382 + %384 = fadd float %380, %383 + %385 = call float @llvm.sqrt.f32(float %384) + %386 = fneg float %68 + %387 = fmul float %385, %386 + %388 = bitcast i32 %52 to float + %389 = fadd float %388, %387 + %390 = bitcast i32 %52 to float + %391 = bitcast i32 %52 to float + %392 = fmul float %390, %391 + %393 = fadd float %392, 0.000000e+00 + %394 = bitcast i32 %32 to float + %395 = bitcast i32 %32 to float + %396 = fmul float %394, %395 + %397 = fadd float %393, %396 + %398 = call float @llvm.sqrt.f32(float %397) + %399 = fneg float %68 + %400 = fmul float %398, %399 + %401 = bitcast i32 %52 to float + %402 = fadd float %401, %400 + %403 = fmul float %389, %402 + %404 = fadd float %403, 0.000000e+00 + %405 = bitcast i32 %52 to float + %406 = bitcast i32 %52 to float + %407 = fmul float %405, %406 + %408 = fadd float %407, 0.000000e+00 + %409 = bitcast i32 %32 to float + %410 = bitcast i32 %32 to float + %411 = fmul float %409, %410 + %412 = fadd float %408, %411 + %413 = call float @llvm.sqrt.f32(float %412) + %414 = fneg float %68 + %415 = fmul float %413, %414 + %416 = fmul float %415, 0.000000e+00 + %417 = bitcast i32 %32 to float + %418 = fadd float %417, %416 + %419 = bitcast i32 %52 to float + %420 = bitcast i32 %52 to float + %421 = fmul float %419, %420 + %422 = fadd float %421, 0.000000e+00 + %423 = bitcast i32 %32 to float + %424 = bitcast i32 %32 to float + %425 = fmul float %423, %424 + %426 = fadd float %422, %425 + %427 = call float @llvm.sqrt.f32(float %426) + %428 = fneg float %68 + %429 = fmul float %427, %428 + %430 = fmul float %429, 0.000000e+00 + %431 = bitcast i32 %32 to float + %432 = fadd float %431, %430 + %433 = fmul float %418, %432 + %434 = fadd float %404, %433 + %435 = call float @llvm.sqrt.f32(float %434) + %436 = fadd float %435, 0.000000e+00 + %437 = fdiv float %376, %436 + %438 = fmul float %437, 2.000000e+00 + %439 = bitcast i32 %52 to float + %440 = bitcast i32 %52 to float + %441 = fmul float %439, %440 + %442 = fadd float %441, 0.000000e+00 + %443 = bitcast i32 %32 to float + %444 = bitcast i32 %32 to float + %445 = fmul float %443, %444 + %446 = fadd float %442, %445 + %447 = call float @llvm.sqrt.f32(float %446) + %448 = fneg float %68 + %449 = fmul float %447, %448 + %450 = bitcast i32 %52 to float + %451 = fadd float %450, %449 + %452 = bitcast i32 %52 to float + %453 = bitcast i32 %52 to float + %454 = fmul float %452, %453 + %455 = fadd float %454, 0.000000e+00 + %456 = bitcast i32 %32 to float + %457 = bitcast i32 %32 to float + %458 = fmul float %456, %457 + %459 = fadd float %455, %458 + %460 = call float @llvm.sqrt.f32(float %459) + %461 = fneg float %68 + %462 = fmul float %460, %461 + %463 = bitcast i32 %52 to float + %464 = fadd float %463, %462 + %465 = bitcast i32 %52 to float + %466 = bitcast i32 %52 to float + %467 = fmul float %465, %466 + %468 = fadd float %467, 0.000000e+00 + %469 = bitcast i32 %32 to float + %470 = bitcast i32 %32 to float + %471 = fmul float %469, %470 + %472 = fadd float %468, %471 + %473 = call float @llvm.sqrt.f32(float %472) + %474 = fneg float %68 + %475 = fmul float %473, %474 + %476 = bitcast i32 %52 to float + %477 = fadd float %476, %475 + %478 = fmul float %464, %477 + %479 = fadd float %478, 0.000000e+00 + %480 = bitcast i32 %52 to float + %481 = bitcast i32 %52 to float + %482 = fmul float %480, %481 + %483 = fadd float %482, 0.000000e+00 + %484 = bitcast i32 %32 to float + %485 = bitcast i32 %32 to float + %486 = fmul float %484, %485 + %487 = fadd float %483, %486 + %488 = call float @llvm.sqrt.f32(float %487) + %489 = fneg float %68 + %490 = fmul float %488, %489 + %491 = fmul float %490, 0.000000e+00 + %492 = bitcast i32 %32 to float + %493 = fadd float %492, %491 + %494 = bitcast i32 %52 to float + %495 = bitcast i32 %52 to float + %496 = fmul float %494, %495 + %497 = fadd float %496, 0.000000e+00 + %498 = bitcast i32 %32 to float + %499 = bitcast i32 %32 to float + %500 = fmul float %498, %499 + %501 = fadd float %497, %500 + %502 = call float @llvm.sqrt.f32(float %501) + %503 = fneg float %68 + %504 = fmul float %502, %503 + %505 = fmul float %504, 0.000000e+00 + %506 = bitcast i32 %32 to float + %507 = fadd float %506, %505 + %508 = fmul float %493, %507 + %509 = fadd float %479, %508 + %510 = call float @llvm.sqrt.f32(float %509) + %511 = fadd float %510, 0.000000e+00 + %512 = fdiv float %451, %511 + %513 = fmul float %438, %512 + %514 = insertelement <4 x float> %362, float %513, i32 3 + %515 = fsub <4 x float> , %514 + %516 = bitcast i32 %52 to float + %517 = bitcast i32 %52 to float + %518 = fmul float %516, %517 + %519 = fadd float %518, 0.000000e+00 + %520 = bitcast i32 %32 to float + %521 = bitcast i32 %32 to float + %522 = fmul float %520, %521 + %523 = fadd float %519, %522 + %524 = call float @llvm.sqrt.f32(float %523) + %525 = fneg float %68 + %526 = fmul float %524, %525 + %527 = fmul float %526, 0.000000e+00 + %528 = bitcast i32 %32 to float + %529 = fadd float %528, %527 + %530 = bitcast i32 %52 to float + %531 = bitcast i32 %52 to float + %532 = fmul float %530, %531 + %533 = fadd float %532, 0.000000e+00 + %534 = bitcast i32 %32 to float + %535 = bitcast i32 %32 to float + %536 = fmul float %534, %535 + %537 = fadd float %533, %536 + %538 = call float @llvm.sqrt.f32(float %537) + %539 = fneg float %68 + %540 = fmul float %538, %539 + %541 = bitcast i32 %52 to float + %542 = fadd float %541, %540 + %543 = bitcast i32 %52 to float + %544 = bitcast i32 %52 to float + %545 = fmul float %543, %544 + %546 = fadd float %545, 0.000000e+00 + %547 = bitcast i32 %32 to float + %548 = bitcast i32 %32 to float + %549 = fmul float %547, %548 + %550 = fadd float %546, %549 + %551 = call float @llvm.sqrt.f32(float %550) + %552 = fneg float %68 + %553 = fmul float %551, %552 + %554 = bitcast i32 %52 to float + %555 = fadd float %554, %553 + %556 = fmul float %542, %555 + %557 = fadd float %556, 0.000000e+00 + %558 = bitcast i32 %52 to float + %559 = bitcast i32 %52 to float + %560 = fmul float %558, %559 + %561 = fadd float %560, 0.000000e+00 + %562 = bitcast i32 %32 to float + %563 = bitcast i32 %32 to float + %564 = fmul float %562, %563 + %565 = fadd float %561, %564 + %566 = call float @llvm.sqrt.f32(float %565) + %567 = fneg float %68 + %568 = fmul float %566, %567 + %569 = fmul float %568, 0.000000e+00 + %570 = bitcast i32 %32 to float + %571 = fadd float %570, %569 + %572 = bitcast i32 %52 to float + %573 = bitcast i32 %52 to float + %574 = fmul float %572, %573 + %575 = fadd float %574, 0.000000e+00 + %576 = bitcast i32 %32 to float + %577 = bitcast i32 %32 to float + %578 = fmul float %576, %577 + %579 = fadd float %575, %578 + %580 = call float @llvm.sqrt.f32(float %579) + %581 = fneg float %68 + %582 = fmul float %580, %581 + %583 = fmul float %582, 0.000000e+00 + %584 = bitcast i32 %32 to float + %585 = fadd float %584, %583 + %586 = fmul float %571, %585 + %587 = fadd float %557, %586 + %588 = call float @llvm.sqrt.f32(float %587) + %589 = fadd float %588, 0.000000e+00 + %590 = fdiv float %529, %589 + %591 = fmul float %590, 2.000000e+00 + %592 = bitcast i32 %52 to float + %593 = bitcast i32 %52 to float + %594 = fmul float %592, %593 + %595 = fadd float %594, 0.000000e+00 + %596 = bitcast i32 %32 to float + %597 = bitcast i32 %32 to float + %598 = fmul float %596, %597 + %599 = fadd float %595, %598 + %600 = call float @llvm.sqrt.f32(float %599) + %601 = fneg float %68 + %602 = fmul float %600, %601 + %603 = fmul float %602, 0.000000e+00 + %604 = bitcast i32 %32 to float + %605 = fadd float %604, %603 + %606 = bitcast i32 %52 to float + %607 = bitcast i32 %52 to float + %608 = fmul float %606, %607 + %609 = fadd float %608, 0.000000e+00 + %610 = bitcast i32 %32 to float + %611 = bitcast i32 %32 to float + %612 = fmul float %610, %611 + %613 = fadd float %609, %612 + %614 = call float @llvm.sqrt.f32(float %613) + %615 = fneg float %68 + %616 = fmul float %614, %615 + %617 = bitcast i32 %52 to float + %618 = fadd float %617, %616 + %619 = bitcast i32 %52 to float + %620 = bitcast i32 %52 to float + %621 = fmul float %619, %620 + %622 = fadd float %621, 0.000000e+00 + %623 = bitcast i32 %32 to float + %624 = bitcast i32 %32 to float + %625 = fmul float %623, %624 + %626 = fadd float %622, %625 + %627 = call float @llvm.sqrt.f32(float %626) + %628 = fneg float %68 + %629 = fmul float %627, %628 + %630 = bitcast i32 %52 to float + %631 = fadd float %630, %629 + %632 = fmul float %618, %631 + %633 = fadd float %632, 0.000000e+00 + %634 = bitcast i32 %52 to float + %635 = bitcast i32 %52 to float + %636 = fmul float %634, %635 + %637 = fadd float %636, 0.000000e+00 + %638 = bitcast i32 %32 to float + %639 = bitcast i32 %32 to float + %640 = fmul float %638, %639 + %641 = fadd float %637, %640 + %642 = call float @llvm.sqrt.f32(float %641) + %643 = fneg float %68 + %644 = fmul float %642, %643 + %645 = fmul float %644, 0.000000e+00 + %646 = bitcast i32 %32 to float + %647 = fadd float %646, %645 + %648 = bitcast i32 %52 to float + %649 = bitcast i32 %52 to float + %650 = fmul float %648, %649 + %651 = fadd float %650, 0.000000e+00 + %652 = bitcast i32 %32 to float + %653 = bitcast i32 %32 to float + %654 = fmul float %652, %653 + %655 = fadd float %651, %654 + %656 = call float @llvm.sqrt.f32(float %655) + %657 = fneg float %68 + %658 = fmul float %656, %657 + %659 = fmul float %658, 0.000000e+00 + %660 = bitcast i32 %32 to float + %661 = fadd float %660, %659 + %662 = fmul float %647, %661 + %663 = fadd float %633, %662 + %664 = call float @llvm.sqrt.f32(float %663) + %665 = fadd float %664, 0.000000e+00 + %666 = fdiv float %605, %665 + %667 = fmul float %591, %666 + %668 = fsub float 1.000000e+00, %667 + %669 = insertelement <4 x float> zeroinitializer, float %668, i32 0 + %670 = insertelement <4 x float> %669, float 0.000000e+00, i32 1 + %671 = insertelement <4 x float> %670, float 0.000000e+00, i32 2 + %672 = insertelement <4 x float> %671, float 0.000000e+00, i32 3 + %673 = shufflevector <4 x float> %515, <4 x float> %672, <8 x i32> + %674 = extractelement <8 x float> %673, i32 0 + %675 = getelementptr float, float* %2, i32 0 + %676 = getelementptr inbounds float, float* %675, i64 3 + %677 = bitcast float* %676 to i32* + %678 = bitcast i32* %677 to float* + store float %674, float* %678, align 4 + %679 = bitcast float* %1 to i8* + %680 = alloca [4 x float], align 16 + %681 = bitcast [4 x float]* %680 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %679, i8* nonnull align 16 dereferenceable(16) %681, i64 16, i1 false) + store float 0.000000e+00, float* %2, align 4 + %682 = bitcast i32 %52 to float + %683 = bitcast i32 %52 to float + %684 = fmul float %682, %683 + %685 = fadd float %684, 0.000000e+00 + %686 = load i32, i32* %31, align 4 + %687 = bitcast i32 %686 to float + %688 = bitcast i32 %686 to float + %689 = fmul float %687, %688 + %690 = fadd float %685, %689 + %691 = call float @llvm.sqrt.f32(float %690) + %692 = sitofp i32 %67 to float + %693 = fneg float %692 + %694 = fmul float %691, %693 + %695 = bitcast i32 %52 to float + %696 = fadd float %695, %694 + %697 = bitcast i32 %52 to float + %698 = bitcast i32 %52 to float + %699 = fmul float %697, %698 + %700 = fadd float %699, 0.000000e+00 + %701 = bitcast i32 %686 to float + %702 = bitcast i32 %686 to float + %703 = fmul float %701, %702 + %704 = fadd float %700, %703 + %705 = call float @llvm.sqrt.f32(float %704) + %706 = fneg float %692 + %707 = fmul float %705, %706 + %708 = bitcast i32 %52 to float + %709 = fadd float %708, %707 + %710 = bitcast i32 %52 to float + %711 = bitcast i32 %52 to float + %712 = fmul float %710, %711 + %713 = fadd float %712, 0.000000e+00 + %714 = bitcast i32 %686 to float + %715 = bitcast i32 %686 to float + %716 = fmul float %714, %715 + %717 = fadd float %713, %716 + %718 = call float @llvm.sqrt.f32(float %717) + %719 = fneg float %692 + %720 = fmul float %718, %719 + %721 = bitcast i32 %52 to float + %722 = fadd float %721, %720 + %723 = fmul float %709, %722 + %724 = fadd float %723, 0.000000e+00 + %725 = bitcast i32 %52 to float + %726 = bitcast i32 %52 to float + %727 = fmul float %725, %726 + %728 = fadd float %727, 0.000000e+00 + %729 = bitcast i32 %686 to float + %730 = bitcast i32 %686 to float + %731 = fmul float %729, %730 + %732 = fadd float %728, %731 + %733 = call float @llvm.sqrt.f32(float %732) + %734 = fneg float %692 + %735 = fmul float %733, %734 + %736 = fmul float %735, 0.000000e+00 + %737 = bitcast i32 %686 to float + %738 = fadd float %737, %736 + %739 = bitcast i32 %52 to float + %740 = bitcast i32 %52 to float + %741 = fmul float %739, %740 + %742 = fadd float %741, 0.000000e+00 + %743 = bitcast i32 %686 to float + %744 = bitcast i32 %686 to float + %745 = fmul float %743, %744 + %746 = fadd float %742, %745 + %747 = call float @llvm.sqrt.f32(float %746) + %748 = fneg float %692 + %749 = fmul float %747, %748 + %750 = fmul float %749, 0.000000e+00 + %751 = bitcast i32 %686 to float + %752 = fadd float %751, %750 + %753 = fmul float %738, %752 + %754 = fadd float %724, %753 + %755 = call float @llvm.sqrt.f32(float %754) + %756 = fadd float %755, 0.000000e+00 + %757 = fdiv float %696, %756 + %758 = fmul float %757, 2.000000e+00 + %759 = bitcast i32 %52 to float + %760 = bitcast i32 %52 to float + %761 = fmul float %759, %760 + %762 = fadd float %761, 0.000000e+00 + %763 = bitcast i32 %686 to float + %764 = bitcast i32 %686 to float + %765 = fmul float %763, %764 + %766 = fadd float %762, %765 + %767 = call float @llvm.sqrt.f32(float %766) + %768 = fneg float %692 + %769 = fmul float %767, %768 + %770 = bitcast i32 %52 to float + %771 = fadd float %770, %769 + %772 = bitcast i32 %52 to float + %773 = bitcast i32 %52 to float + %774 = fmul float %772, %773 + %775 = fadd float %774, 0.000000e+00 + %776 = bitcast i32 %686 to float + %777 = bitcast i32 %686 to float + %778 = fmul float %776, %777 + %779 = fadd float %775, %778 + %780 = call float @llvm.sqrt.f32(float %779) + %781 = fneg float %692 + %782 = fmul float %780, %781 + %783 = bitcast i32 %52 to float + %784 = fadd float %783, %782 + %785 = bitcast i32 %52 to float + %786 = bitcast i32 %52 to float + %787 = fmul float %785, %786 + %788 = fadd float %787, 0.000000e+00 + %789 = bitcast i32 %686 to float + %790 = bitcast i32 %686 to float + %791 = fmul float %789, %790 + %792 = fadd float %788, %791 + %793 = call float @llvm.sqrt.f32(float %792) + %794 = fneg float %692 + %795 = fmul float %793, %794 + %796 = bitcast i32 %52 to float + %797 = fadd float %796, %795 + %798 = fmul float %784, %797 + %799 = fadd float %798, 0.000000e+00 + %800 = bitcast i32 %52 to float + %801 = bitcast i32 %52 to float + %802 = fmul float %800, %801 + %803 = fadd float %802, 0.000000e+00 + %804 = bitcast i32 %686 to float + %805 = bitcast i32 %686 to float + %806 = fmul float %804, %805 + %807 = fadd float %803, %806 + %808 = call float @llvm.sqrt.f32(float %807) + %809 = fneg float %692 + %810 = fmul float %808, %809 + %811 = fmul float %810, 0.000000e+00 + %812 = bitcast i32 %686 to float + %813 = fadd float %812, %811 + %814 = bitcast i32 %52 to float + %815 = bitcast i32 %52 to float + %816 = fmul float %814, %815 + %817 = fadd float %816, 0.000000e+00 + %818 = bitcast i32 %686 to float + %819 = bitcast i32 %686 to float + %820 = fmul float %818, %819 + %821 = fadd float %817, %820 + %822 = call float @llvm.sqrt.f32(float %821) + %823 = fneg float %692 + %824 = fmul float %822, %823 + %825 = fmul float %824, 0.000000e+00 + %826 = bitcast i32 %686 to float + %827 = fadd float %826, %825 + %828 = fmul float %813, %827 + %829 = fadd float %799, %828 + %830 = call float @llvm.sqrt.f32(float %829) + %831 = fadd float %830, 0.000000e+00 + %832 = fdiv float %771, %831 + %833 = fmul float %758, %832 + %834 = fsub float 1.000000e+00, %833 + %835 = insertelement <4 x float> zeroinitializer, float %834, i32 0 + %836 = insertelement <4 x float> %835, float 0.000000e+00, i32 1 + %837 = insertelement <4 x float> %836, float 0.000000e+00, i32 2 + %838 = insertelement <4 x float> %837, float 0.000000e+00, i32 3 + %839 = getelementptr float, float* %0, i32 0 + %840 = load float, float* %839, align 4 + %841 = insertelement <4 x float> zeroinitializer, float %840, i32 0 + %842 = insertelement <4 x float> %841, float 0.000000e+00, i32 1 + %843 = insertelement <4 x float> %842, float 0.000000e+00, i32 2 + %844 = insertelement <4 x float> %843, float 0.000000e+00, i32 3 + %845 = call <4 x float> @llvm.fma.v4f32(<4 x float> %838, <4 x float> %844, <4 x float> zeroinitializer) + %846 = extractelement <4 x float> %845, i32 0 + store float %846, float* %2, align 4 + %847 = bitcast i32 %52 to float + %848 = bitcast i32 %52 to float + %849 = fmul float %847, %848 + %850 = fadd float %849, 0.000000e+00 + %851 = bitcast i32 %686 to float + %852 = bitcast i32 %686 to float + %853 = fmul float %851, %852 + %854 = fadd float %850, %853 + %855 = call float @llvm.sqrt.f32(float %854) + %856 = fneg float %692 + %857 = fmul float %855, %856 + %858 = bitcast i32 %52 to float + %859 = fadd float %858, %857 + %860 = bitcast i32 %52 to float + %861 = bitcast i32 %52 to float + %862 = fmul float %860, %861 + %863 = fadd float %862, 0.000000e+00 + %864 = bitcast i32 %686 to float + %865 = bitcast i32 %686 to float + %866 = fmul float %864, %865 + %867 = fadd float %863, %866 + %868 = call float @llvm.sqrt.f32(float %867) + %869 = fneg float %692 + %870 = fmul float %868, %869 + %871 = bitcast i32 %52 to float + %872 = fadd float %871, %870 + %873 = bitcast i32 %52 to float + %874 = bitcast i32 %52 to float + %875 = fmul float %873, %874 + %876 = fadd float %875, 0.000000e+00 + %877 = bitcast i32 %686 to float + %878 = bitcast i32 %686 to float + %879 = fmul float %877, %878 + %880 = fadd float %876, %879 + %881 = call float @llvm.sqrt.f32(float %880) + %882 = fneg float %692 + %883 = fmul float %881, %882 + %884 = bitcast i32 %52 to float + %885 = fadd float %884, %883 + %886 = fmul float %872, %885 + %887 = fadd float %886, 0.000000e+00 + %888 = bitcast i32 %52 to float + %889 = bitcast i32 %52 to float + %890 = fmul float %888, %889 + %891 = fadd float %890, 0.000000e+00 + %892 = bitcast i32 %686 to float + %893 = bitcast i32 %686 to float + %894 = fmul float %892, %893 + %895 = fadd float %891, %894 + %896 = call float @llvm.sqrt.f32(float %895) + %897 = fneg float %692 + %898 = fmul float %896, %897 + %899 = fmul float %898, 0.000000e+00 + %900 = bitcast i32 %686 to float + %901 = fadd float %900, %899 + %902 = bitcast i32 %52 to float + %903 = bitcast i32 %52 to float + %904 = fmul float %902, %903 + %905 = fadd float %904, 0.000000e+00 + %906 = bitcast i32 %686 to float + %907 = bitcast i32 %686 to float + %908 = fmul float %906, %907 + %909 = fadd float %905, %908 + %910 = call float @llvm.sqrt.f32(float %909) + %911 = fneg float %692 + %912 = fmul float %910, %911 + %913 = fmul float %912, 0.000000e+00 + %914 = bitcast i32 %686 to float + %915 = fadd float %914, %913 + %916 = fmul float %901, %915 + %917 = fadd float %887, %916 + %918 = call float @llvm.sqrt.f32(float %917) + %919 = fadd float %918, 0.000000e+00 + %920 = fdiv float %859, %919 + %921 = fmul float %920, 2.000000e+00 + %922 = bitcast i32 %52 to float + %923 = bitcast i32 %52 to float + %924 = fmul float %922, %923 + %925 = fadd float %924, 0.000000e+00 + %926 = bitcast i32 %686 to float + %927 = bitcast i32 %686 to float + %928 = fmul float %926, %927 + %929 = fadd float %925, %928 + %930 = call float @llvm.sqrt.f32(float %929) + %931 = fneg float %692 + %932 = fmul float %930, %931 + %933 = bitcast i32 %52 to float + %934 = fadd float %933, %932 + %935 = bitcast i32 %52 to float + %936 = bitcast i32 %52 to float + %937 = fmul float %935, %936 + %938 = fadd float %937, 0.000000e+00 + %939 = bitcast i32 %686 to float + %940 = bitcast i32 %686 to float + %941 = fmul float %939, %940 + %942 = fadd float %938, %941 + %943 = call float @llvm.sqrt.f32(float %942) + %944 = fneg float %692 + %945 = fmul float %943, %944 + %946 = bitcast i32 %52 to float + %947 = fadd float %946, %945 + %948 = bitcast i32 %52 to float + %949 = bitcast i32 %52 to float + %950 = fmul float %948, %949 + %951 = fadd float %950, 0.000000e+00 + %952 = bitcast i32 %686 to float + %953 = bitcast i32 %686 to float + %954 = fmul float %952, %953 + %955 = fadd float %951, %954 + %956 = call float @llvm.sqrt.f32(float %955) + %957 = fneg float %692 + %958 = fmul float %956, %957 + %959 = bitcast i32 %52 to float + %960 = fadd float %959, %958 + %961 = fmul float %947, %960 + %962 = fadd float %961, 0.000000e+00 + %963 = bitcast i32 %52 to float + %964 = bitcast i32 %52 to float + %965 = fmul float %963, %964 + %966 = fadd float %965, 0.000000e+00 + %967 = bitcast i32 %686 to float + %968 = bitcast i32 %686 to float + %969 = fmul float %967, %968 + %970 = fadd float %966, %969 + %971 = call float @llvm.sqrt.f32(float %970) + %972 = fneg float %692 + %973 = fmul float %971, %972 + %974 = fmul float %973, 0.000000e+00 + %975 = bitcast i32 %686 to float + %976 = fadd float %975, %974 + %977 = bitcast i32 %52 to float + %978 = bitcast i32 %52 to float + %979 = fmul float %977, %978 + %980 = fadd float %979, 0.000000e+00 + %981 = bitcast i32 %686 to float + %982 = bitcast i32 %686 to float + %983 = fmul float %981, %982 + %984 = fadd float %980, %983 + %985 = call float @llvm.sqrt.f32(float %984) + %986 = fneg float %692 + %987 = fmul float %985, %986 + %988 = fmul float %987, 0.000000e+00 + %989 = bitcast i32 %686 to float + %990 = fadd float %989, %988 + %991 = fmul float %976, %990 + %992 = fadd float %962, %991 + %993 = call float @llvm.sqrt.f32(float %992) + %994 = fadd float %993, 0.000000e+00 + %995 = fdiv float %934, %994 + %996 = fmul float %921, %995 + %997 = fsub float 1.000000e+00, %996 + %998 = fmul float %997, %840 + %999 = fadd float %998, 0.000000e+00 + %1000 = bitcast i32 %52 to float + %1001 = bitcast i32 %52 to float + %1002 = fmul float %1000, %1001 + %1003 = fadd float %1002, 0.000000e+00 + %1004 = bitcast i32 %686 to float + %1005 = bitcast i32 %686 to float + %1006 = fmul float %1004, %1005 + %1007 = fadd float %1003, %1006 + %1008 = call float @llvm.sqrt.f32(float %1007) + %1009 = fneg float %692 + %1010 = fmul float %1008, %1009 + %1011 = bitcast i32 %52 to float + %1012 = fadd float %1011, %1010 + %1013 = bitcast i32 %52 to float + %1014 = bitcast i32 %52 to float + %1015 = fmul float %1013, %1014 + %1016 = fadd float %1015, 0.000000e+00 + %1017 = bitcast i32 %686 to float + %1018 = bitcast i32 %686 to float + %1019 = fmul float %1017, %1018 + %1020 = fadd float %1016, %1019 + %1021 = call float @llvm.sqrt.f32(float %1020) + %1022 = fneg float %692 + %1023 = fmul float %1021, %1022 + %1024 = bitcast i32 %52 to float + %1025 = fadd float %1024, %1023 + %1026 = bitcast i32 %52 to float + %1027 = bitcast i32 %52 to float + %1028 = fmul float %1026, %1027 + %1029 = fadd float %1028, 0.000000e+00 + %1030 = bitcast i32 %686 to float + %1031 = bitcast i32 %686 to float + %1032 = fmul float %1030, %1031 + %1033 = fadd float %1029, %1032 + %1034 = call float @llvm.sqrt.f32(float %1033) + %1035 = fneg float %692 + %1036 = fmul float %1034, %1035 + %1037 = bitcast i32 %52 to float + %1038 = fadd float %1037, %1036 + %1039 = fmul float %1025, %1038 + %1040 = fadd float %1039, 0.000000e+00 + %1041 = bitcast i32 %52 to float + %1042 = bitcast i32 %52 to float + %1043 = fmul float %1041, %1042 + %1044 = fadd float %1043, 0.000000e+00 + %1045 = bitcast i32 %686 to float + %1046 = bitcast i32 %686 to float + %1047 = fmul float %1045, %1046 + %1048 = fadd float %1044, %1047 + %1049 = call float @llvm.sqrt.f32(float %1048) + %1050 = fneg float %692 + %1051 = fmul float %1049, %1050 + %1052 = fmul float %1051, 0.000000e+00 + %1053 = bitcast i32 %686 to float + %1054 = fadd float %1053, %1052 + %1055 = bitcast i32 %52 to float + %1056 = bitcast i32 %52 to float + %1057 = fmul float %1055, %1056 + %1058 = fadd float %1057, 0.000000e+00 + %1059 = bitcast i32 %686 to float + %1060 = bitcast i32 %686 to float + %1061 = fmul float %1059, %1060 + %1062 = fadd float %1058, %1061 + %1063 = call float @llvm.sqrt.f32(float %1062) + %1064 = fneg float %692 + %1065 = fmul float %1063, %1064 + %1066 = fmul float %1065, 0.000000e+00 + %1067 = bitcast i32 %686 to float + %1068 = fadd float %1067, %1066 + %1069 = fmul float %1054, %1068 + %1070 = fadd float %1040, %1069 + %1071 = call float @llvm.sqrt.f32(float %1070) + %1072 = fadd float %1071, 0.000000e+00 + %1073 = fdiv float %1012, %1072 + %1074 = fmul float %1073, 2.000000e+00 + %1075 = bitcast i32 %52 to float + %1076 = bitcast i32 %52 to float + %1077 = fmul float %1075, %1076 + %1078 = fadd float %1077, 0.000000e+00 + %1079 = bitcast i32 %686 to float + %1080 = bitcast i32 %686 to float + %1081 = fmul float %1079, %1080 + %1082 = fadd float %1078, %1081 + %1083 = call float @llvm.sqrt.f32(float %1082) + %1084 = fneg float %692 + %1085 = fmul float %1083, %1084 + %1086 = fmul float %1085, 0.000000e+00 + %1087 = bitcast i32 %686 to float + %1088 = fadd float %1087, %1086 + %1089 = bitcast i32 %52 to float + %1090 = bitcast i32 %52 to float + %1091 = fmul float %1089, %1090 + %1092 = fadd float %1091, 0.000000e+00 + %1093 = bitcast i32 %686 to float + %1094 = bitcast i32 %686 to float + %1095 = fmul float %1093, %1094 + %1096 = fadd float %1092, %1095 + %1097 = call float @llvm.sqrt.f32(float %1096) + %1098 = fneg float %692 + %1099 = fmul float %1097, %1098 + %1100 = bitcast i32 %52 to float + %1101 = fadd float %1100, %1099 + %1102 = bitcast i32 %52 to float + %1103 = bitcast i32 %52 to float + %1104 = fmul float %1102, %1103 + %1105 = fadd float %1104, 0.000000e+00 + %1106 = bitcast i32 %686 to float + %1107 = bitcast i32 %686 to float + %1108 = fmul float %1106, %1107 + %1109 = fadd float %1105, %1108 + %1110 = call float @llvm.sqrt.f32(float %1109) + %1111 = fneg float %692 + %1112 = fmul float %1110, %1111 + %1113 = bitcast i32 %52 to float + %1114 = fadd float %1113, %1112 + %1115 = fmul float %1101, %1114 + %1116 = fadd float %1115, 0.000000e+00 + %1117 = bitcast i32 %52 to float + %1118 = bitcast i32 %52 to float + %1119 = fmul float %1117, %1118 + %1120 = fadd float %1119, 0.000000e+00 + %1121 = bitcast i32 %686 to float + %1122 = bitcast i32 %686 to float + %1123 = fmul float %1121, %1122 + %1124 = fadd float %1120, %1123 + %1125 = call float @llvm.sqrt.f32(float %1124) + %1126 = fneg float %692 + %1127 = fmul float %1125, %1126 + %1128 = fmul float %1127, 0.000000e+00 + %1129 = bitcast i32 %686 to float + %1130 = fadd float %1129, %1128 + %1131 = bitcast i32 %52 to float + %1132 = bitcast i32 %52 to float + %1133 = fmul float %1131, %1132 + %1134 = fadd float %1133, 0.000000e+00 + %1135 = bitcast i32 %686 to float + %1136 = bitcast i32 %686 to float + %1137 = fmul float %1135, %1136 + %1138 = fadd float %1134, %1137 + %1139 = call float @llvm.sqrt.f32(float %1138) + %1140 = fneg float %692 + %1141 = fmul float %1139, %1140 + %1142 = fmul float %1141, 0.000000e+00 + %1143 = bitcast i32 %686 to float + %1144 = fadd float %1143, %1142 + %1145 = fmul float %1130, %1144 + %1146 = fadd float %1116, %1145 + %1147 = call float @llvm.sqrt.f32(float %1146) + %1148 = fadd float %1147, 0.000000e+00 + %1149 = fdiv float %1088, %1148 + %1150 = fmul float %1074, %1149 + %1151 = fneg float %1150 + %1152 = getelementptr float, float* %0, i32 0 + %1153 = getelementptr inbounds float, float* %1152, i64 2 + %1154 = load float, float* %1153, align 4 + %1155 = fmul float %1151, %1154 + %1156 = fadd float %999, %1155 + %1157 = insertelement <4 x float> zeroinitializer, float %1156, i32 0 + %1158 = insertelement <4 x float> %1157, float 0.000000e+00, i32 1 + %1159 = insertelement <4 x float> %1158, float 0.000000e+00, i32 2 + %1160 = insertelement <4 x float> %1159, float 0.000000e+00, i32 3 + %1161 = extractelement <4 x float> %1160, i32 0 + store float %1161, float* %2, align 4 + %1162 = extractelement <4 x float> %1160, i32 1 + %1163 = getelementptr float, float* %2, i32 0 + %1164 = getelementptr inbounds float, float* %1163, i64 1 + store float %1162, float* %1164, align 4 + %1165 = bitcast i32 %52 to float + %1166 = bitcast i32 %52 to float + %1167 = fmul float %1165, %1166 + %1168 = fadd float %1167, 0.000000e+00 + %1169 = bitcast i32 %686 to float + %1170 = bitcast i32 %686 to float + %1171 = fmul float %1169, %1170 + %1172 = fadd float %1168, %1171 + %1173 = call float @llvm.sqrt.f32(float %1172) + %1174 = fneg float %692 + %1175 = fmul float %1173, %1174 + %1176 = bitcast i32 %52 to float + %1177 = fadd float %1176, %1175 + %1178 = bitcast i32 %52 to float + %1179 = bitcast i32 %52 to float + %1180 = fmul float %1178, %1179 + %1181 = fadd float %1180, 0.000000e+00 + %1182 = bitcast i32 %686 to float + %1183 = bitcast i32 %686 to float + %1184 = fmul float %1182, %1183 + %1185 = fadd float %1181, %1184 + %1186 = call float @llvm.sqrt.f32(float %1185) + %1187 = fneg float %692 + %1188 = fmul float %1186, %1187 + %1189 = bitcast i32 %52 to float + %1190 = fadd float %1189, %1188 + %1191 = bitcast i32 %52 to float + %1192 = bitcast i32 %52 to float + %1193 = fmul float %1191, %1192 + %1194 = fadd float %1193, 0.000000e+00 + %1195 = bitcast i32 %686 to float + %1196 = bitcast i32 %686 to float + %1197 = fmul float %1195, %1196 + %1198 = fadd float %1194, %1197 + %1199 = call float @llvm.sqrt.f32(float %1198) + %1200 = fneg float %692 + %1201 = fmul float %1199, %1200 + %1202 = bitcast i32 %52 to float + %1203 = fadd float %1202, %1201 + %1204 = fmul float %1190, %1203 + %1205 = fadd float %1204, 0.000000e+00 + %1206 = bitcast i32 %52 to float + %1207 = bitcast i32 %52 to float + %1208 = fmul float %1206, %1207 + %1209 = fadd float %1208, 0.000000e+00 + %1210 = bitcast i32 %686 to float + %1211 = bitcast i32 %686 to float + %1212 = fmul float %1210, %1211 + %1213 = fadd float %1209, %1212 + %1214 = call float @llvm.sqrt.f32(float %1213) + %1215 = fneg float %692 + %1216 = fmul float %1214, %1215 + %1217 = fmul float %1216, 0.000000e+00 + %1218 = bitcast i32 %686 to float + %1219 = fadd float %1218, %1217 + %1220 = bitcast i32 %52 to float + %1221 = bitcast i32 %52 to float + %1222 = fmul float %1220, %1221 + %1223 = fadd float %1222, 0.000000e+00 + %1224 = bitcast i32 %686 to float + %1225 = bitcast i32 %686 to float + %1226 = fmul float %1224, %1225 + %1227 = fadd float %1223, %1226 + %1228 = call float @llvm.sqrt.f32(float %1227) + %1229 = fneg float %692 + %1230 = fmul float %1228, %1229 + %1231 = fmul float %1230, 0.000000e+00 + %1232 = bitcast i32 %686 to float + %1233 = fadd float %1232, %1231 + %1234 = fmul float %1219, %1233 + %1235 = fadd float %1205, %1234 + %1236 = call float @llvm.sqrt.f32(float %1235) + %1237 = fadd float %1236, 0.000000e+00 + %1238 = fdiv float %1177, %1237 + %1239 = fmul float %1238, 2.000000e+00 + %1240 = bitcast i32 %52 to float + %1241 = bitcast i32 %52 to float + %1242 = fmul float %1240, %1241 + %1243 = fadd float %1242, 0.000000e+00 + %1244 = bitcast i32 %686 to float + %1245 = bitcast i32 %686 to float + %1246 = fmul float %1244, %1245 + %1247 = fadd float %1243, %1246 + %1248 = call float @llvm.sqrt.f32(float %1247) + %1249 = fneg float %692 + %1250 = fmul float %1248, %1249 + %1251 = bitcast i32 %52 to float + %1252 = fadd float %1251, %1250 + %1253 = bitcast i32 %52 to float + %1254 = bitcast i32 %52 to float + %1255 = fmul float %1253, %1254 + %1256 = fadd float %1255, 0.000000e+00 + %1257 = bitcast i32 %686 to float + %1258 = bitcast i32 %686 to float + %1259 = fmul float %1257, %1258 + %1260 = fadd float %1256, %1259 + %1261 = call float @llvm.sqrt.f32(float %1260) + %1262 = fneg float %692 + %1263 = fmul float %1261, %1262 + %1264 = bitcast i32 %52 to float + %1265 = fadd float %1264, %1263 + %1266 = bitcast i32 %52 to float + %1267 = bitcast i32 %52 to float + %1268 = fmul float %1266, %1267 + %1269 = fadd float %1268, 0.000000e+00 + %1270 = bitcast i32 %686 to float + %1271 = bitcast i32 %686 to float + %1272 = fmul float %1270, %1271 + %1273 = fadd float %1269, %1272 + %1274 = call float @llvm.sqrt.f32(float %1273) + %1275 = fneg float %692 + %1276 = fmul float %1274, %1275 + %1277 = bitcast i32 %52 to float + %1278 = fadd float %1277, %1276 + %1279 = fmul float %1265, %1278 + %1280 = fadd float %1279, 0.000000e+00 + %1281 = bitcast i32 %52 to float + %1282 = bitcast i32 %52 to float + %1283 = fmul float %1281, %1282 + %1284 = fadd float %1283, 0.000000e+00 + %1285 = bitcast i32 %686 to float + %1286 = bitcast i32 %686 to float + %1287 = fmul float %1285, %1286 + %1288 = fadd float %1284, %1287 + %1289 = call float @llvm.sqrt.f32(float %1288) + %1290 = fneg float %692 + %1291 = fmul float %1289, %1290 + %1292 = fmul float %1291, 0.000000e+00 + %1293 = bitcast i32 %686 to float + %1294 = fadd float %1293, %1292 + %1295 = bitcast i32 %52 to float + %1296 = bitcast i32 %52 to float + %1297 = fmul float %1295, %1296 + %1298 = fadd float %1297, 0.000000e+00 + %1299 = bitcast i32 %686 to float + %1300 = bitcast i32 %686 to float + %1301 = fmul float %1299, %1300 + %1302 = fadd float %1298, %1301 + %1303 = call float @llvm.sqrt.f32(float %1302) + %1304 = fneg float %692 + %1305 = fmul float %1303, %1304 + %1306 = fmul float %1305, 0.000000e+00 + %1307 = bitcast i32 %686 to float + %1308 = fadd float %1307, %1306 + %1309 = fmul float %1294, %1308 + %1310 = fadd float %1280, %1309 + %1311 = call float @llvm.sqrt.f32(float %1310) + %1312 = fadd float %1311, 0.000000e+00 + %1313 = fdiv float %1252, %1312 + %1314 = fmul float %1239, %1313 + %1315 = fsub float 1.000000e+00, %1314 + %1316 = insertelement <4 x float> zeroinitializer, float %1315, i32 0 + %1317 = insertelement <4 x float> %1316, float 0.000000e+00, i32 1 + %1318 = insertelement <4 x float> %1317, float 0.000000e+00, i32 2 + %1319 = insertelement <4 x float> %1318, float 0.000000e+00, i32 3 + %1320 = getelementptr float, float* %0, i32 0 + %1321 = getelementptr inbounds float, float* %1320, i64 1 + %1322 = load float, float* %1321, align 4 + %1323 = insertelement <4 x float> zeroinitializer, float %1322, i32 0 + %1324 = insertelement <4 x float> %1323, float 0.000000e+00, i32 1 + %1325 = insertelement <4 x float> %1324, float 0.000000e+00, i32 2 + %1326 = insertelement <4 x float> %1325, float 0.000000e+00, i32 3 + %1327 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1319, <4 x float> %1326, <4 x float> zeroinitializer) + %1328 = extractelement <4 x float> %1327, i32 0 + store float %1328, float* %1164, align 4 + %1329 = bitcast i32 %52 to float + %1330 = bitcast i32 %52 to float + %1331 = fmul float %1329, %1330 + %1332 = fadd float %1331, 0.000000e+00 + %1333 = bitcast i32 %686 to float + %1334 = bitcast i32 %686 to float + %1335 = fmul float %1333, %1334 + %1336 = fadd float %1332, %1335 + %1337 = call float @llvm.sqrt.f32(float %1336) + %1338 = fneg float %692 + %1339 = fmul float %1337, %1338 + %1340 = bitcast i32 %52 to float + %1341 = fadd float %1340, %1339 + %1342 = bitcast i32 %52 to float + %1343 = bitcast i32 %52 to float + %1344 = fmul float %1342, %1343 + %1345 = fadd float %1344, 0.000000e+00 + %1346 = bitcast i32 %686 to float + %1347 = bitcast i32 %686 to float + %1348 = fmul float %1346, %1347 + %1349 = fadd float %1345, %1348 + %1350 = call float @llvm.sqrt.f32(float %1349) + %1351 = fneg float %692 + %1352 = fmul float %1350, %1351 + %1353 = bitcast i32 %52 to float + %1354 = fadd float %1353, %1352 + %1355 = bitcast i32 %52 to float + %1356 = bitcast i32 %52 to float + %1357 = fmul float %1355, %1356 + %1358 = fadd float %1357, 0.000000e+00 + %1359 = bitcast i32 %686 to float + %1360 = bitcast i32 %686 to float + %1361 = fmul float %1359, %1360 + %1362 = fadd float %1358, %1361 + %1363 = call float @llvm.sqrt.f32(float %1362) + %1364 = fneg float %692 + %1365 = fmul float %1363, %1364 + %1366 = bitcast i32 %52 to float + %1367 = fadd float %1366, %1365 + %1368 = fmul float %1354, %1367 + %1369 = fadd float %1368, 0.000000e+00 + %1370 = bitcast i32 %52 to float + %1371 = bitcast i32 %52 to float + %1372 = fmul float %1370, %1371 + %1373 = fadd float %1372, 0.000000e+00 + %1374 = bitcast i32 %686 to float + %1375 = bitcast i32 %686 to float + %1376 = fmul float %1374, %1375 + %1377 = fadd float %1373, %1376 + %1378 = call float @llvm.sqrt.f32(float %1377) + %1379 = fneg float %692 + %1380 = fmul float %1378, %1379 + %1381 = fmul float %1380, 0.000000e+00 + %1382 = bitcast i32 %686 to float + %1383 = fadd float %1382, %1381 + %1384 = bitcast i32 %52 to float + %1385 = bitcast i32 %52 to float + %1386 = fmul float %1384, %1385 + %1387 = fadd float %1386, 0.000000e+00 + %1388 = bitcast i32 %686 to float + %1389 = bitcast i32 %686 to float + %1390 = fmul float %1388, %1389 + %1391 = fadd float %1387, %1390 + %1392 = call float @llvm.sqrt.f32(float %1391) + %1393 = fneg float %692 + %1394 = fmul float %1392, %1393 + %1395 = fmul float %1394, 0.000000e+00 + %1396 = bitcast i32 %686 to float + %1397 = fadd float %1396, %1395 + %1398 = fmul float %1383, %1397 + %1399 = fadd float %1369, %1398 + %1400 = call float @llvm.sqrt.f32(float %1399) + %1401 = fadd float %1400, 0.000000e+00 + %1402 = fdiv float %1341, %1401 + %1403 = fmul float %1402, 2.000000e+00 + %1404 = bitcast i32 %52 to float + %1405 = bitcast i32 %52 to float + %1406 = fmul float %1404, %1405 + %1407 = fadd float %1406, 0.000000e+00 + %1408 = bitcast i32 %686 to float + %1409 = bitcast i32 %686 to float + %1410 = fmul float %1408, %1409 + %1411 = fadd float %1407, %1410 + %1412 = call float @llvm.sqrt.f32(float %1411) + %1413 = fneg float %692 + %1414 = fmul float %1412, %1413 + %1415 = bitcast i32 %52 to float + %1416 = fadd float %1415, %1414 + %1417 = bitcast i32 %52 to float + %1418 = bitcast i32 %52 to float + %1419 = fmul float %1417, %1418 + %1420 = fadd float %1419, 0.000000e+00 + %1421 = bitcast i32 %686 to float + %1422 = bitcast i32 %686 to float + %1423 = fmul float %1421, %1422 + %1424 = fadd float %1420, %1423 + %1425 = call float @llvm.sqrt.f32(float %1424) + %1426 = fneg float %692 + %1427 = fmul float %1425, %1426 + %1428 = bitcast i32 %52 to float + %1429 = fadd float %1428, %1427 + %1430 = bitcast i32 %52 to float + %1431 = bitcast i32 %52 to float + %1432 = fmul float %1430, %1431 + %1433 = fadd float %1432, 0.000000e+00 + %1434 = bitcast i32 %686 to float + %1435 = bitcast i32 %686 to float + %1436 = fmul float %1434, %1435 + %1437 = fadd float %1433, %1436 + %1438 = call float @llvm.sqrt.f32(float %1437) + %1439 = fneg float %692 + %1440 = fmul float %1438, %1439 + %1441 = bitcast i32 %52 to float + %1442 = fadd float %1441, %1440 + %1443 = fmul float %1429, %1442 + %1444 = fadd float %1443, 0.000000e+00 + %1445 = bitcast i32 %52 to float + %1446 = bitcast i32 %52 to float + %1447 = fmul float %1445, %1446 + %1448 = fadd float %1447, 0.000000e+00 + %1449 = bitcast i32 %686 to float + %1450 = bitcast i32 %686 to float + %1451 = fmul float %1449, %1450 + %1452 = fadd float %1448, %1451 + %1453 = call float @llvm.sqrt.f32(float %1452) + %1454 = fneg float %692 + %1455 = fmul float %1453, %1454 + %1456 = fmul float %1455, 0.000000e+00 + %1457 = bitcast i32 %686 to float + %1458 = fadd float %1457, %1456 + %1459 = bitcast i32 %52 to float + %1460 = bitcast i32 %52 to float + %1461 = fmul float %1459, %1460 + %1462 = fadd float %1461, 0.000000e+00 + %1463 = bitcast i32 %686 to float + %1464 = bitcast i32 %686 to float + %1465 = fmul float %1463, %1464 + %1466 = fadd float %1462, %1465 + %1467 = call float @llvm.sqrt.f32(float %1466) + %1468 = fneg float %692 + %1469 = fmul float %1467, %1468 + %1470 = fmul float %1469, 0.000000e+00 + %1471 = bitcast i32 %686 to float + %1472 = fadd float %1471, %1470 + %1473 = fmul float %1458, %1472 + %1474 = fadd float %1444, %1473 + %1475 = call float @llvm.sqrt.f32(float %1474) + %1476 = fadd float %1475, 0.000000e+00 + %1477 = fdiv float %1416, %1476 + %1478 = fmul float %1403, %1477 + %1479 = fsub float 1.000000e+00, %1478 + %1480 = fmul float %1479, %1322 + %1481 = fadd float %1480, 0.000000e+00 + %1482 = bitcast i32 %52 to float + %1483 = bitcast i32 %52 to float + %1484 = fmul float %1482, %1483 + %1485 = fadd float %1484, 0.000000e+00 + %1486 = bitcast i32 %686 to float + %1487 = bitcast i32 %686 to float + %1488 = fmul float %1486, %1487 + %1489 = fadd float %1485, %1488 + %1490 = call float @llvm.sqrt.f32(float %1489) + %1491 = fneg float %692 + %1492 = fmul float %1490, %1491 + %1493 = bitcast i32 %52 to float + %1494 = fadd float %1493, %1492 + %1495 = bitcast i32 %52 to float + %1496 = bitcast i32 %52 to float + %1497 = fmul float %1495, %1496 + %1498 = fadd float %1497, 0.000000e+00 + %1499 = bitcast i32 %686 to float + %1500 = bitcast i32 %686 to float + %1501 = fmul float %1499, %1500 + %1502 = fadd float %1498, %1501 + %1503 = call float @llvm.sqrt.f32(float %1502) + %1504 = fneg float %692 + %1505 = fmul float %1503, %1504 + %1506 = bitcast i32 %52 to float + %1507 = fadd float %1506, %1505 + %1508 = bitcast i32 %52 to float + %1509 = bitcast i32 %52 to float + %1510 = fmul float %1508, %1509 + %1511 = fadd float %1510, 0.000000e+00 + %1512 = bitcast i32 %686 to float + %1513 = bitcast i32 %686 to float + %1514 = fmul float %1512, %1513 + %1515 = fadd float %1511, %1514 + %1516 = call float @llvm.sqrt.f32(float %1515) + %1517 = fneg float %692 + %1518 = fmul float %1516, %1517 + %1519 = bitcast i32 %52 to float + %1520 = fadd float %1519, %1518 + %1521 = fmul float %1507, %1520 + %1522 = fadd float %1521, 0.000000e+00 + %1523 = bitcast i32 %52 to float + %1524 = bitcast i32 %52 to float + %1525 = fmul float %1523, %1524 + %1526 = fadd float %1525, 0.000000e+00 + %1527 = bitcast i32 %686 to float + %1528 = bitcast i32 %686 to float + %1529 = fmul float %1527, %1528 + %1530 = fadd float %1526, %1529 + %1531 = call float @llvm.sqrt.f32(float %1530) + %1532 = fneg float %692 + %1533 = fmul float %1531, %1532 + %1534 = fmul float %1533, 0.000000e+00 + %1535 = bitcast i32 %686 to float + %1536 = fadd float %1535, %1534 + %1537 = bitcast i32 %52 to float + %1538 = bitcast i32 %52 to float + %1539 = fmul float %1537, %1538 + %1540 = fadd float %1539, 0.000000e+00 + %1541 = bitcast i32 %686 to float + %1542 = bitcast i32 %686 to float + %1543 = fmul float %1541, %1542 + %1544 = fadd float %1540, %1543 + %1545 = call float @llvm.sqrt.f32(float %1544) + %1546 = fneg float %692 + %1547 = fmul float %1545, %1546 + %1548 = fmul float %1547, 0.000000e+00 + %1549 = bitcast i32 %686 to float + %1550 = fadd float %1549, %1548 + %1551 = fmul float %1536, %1550 + %1552 = fadd float %1522, %1551 + %1553 = call float @llvm.sqrt.f32(float %1552) + %1554 = fadd float %1553, 0.000000e+00 + %1555 = fdiv float %1494, %1554 + %1556 = fmul float %1555, 2.000000e+00 + %1557 = bitcast i32 %52 to float + %1558 = bitcast i32 %52 to float + %1559 = fmul float %1557, %1558 + %1560 = fadd float %1559, 0.000000e+00 + %1561 = bitcast i32 %686 to float + %1562 = bitcast i32 %686 to float + %1563 = fmul float %1561, %1562 + %1564 = fadd float %1560, %1563 + %1565 = call float @llvm.sqrt.f32(float %1564) + %1566 = fneg float %692 + %1567 = fmul float %1565, %1566 + %1568 = fmul float %1567, 0.000000e+00 + %1569 = bitcast i32 %686 to float + %1570 = fadd float %1569, %1568 + %1571 = bitcast i32 %52 to float + %1572 = bitcast i32 %52 to float + %1573 = fmul float %1571, %1572 + %1574 = fadd float %1573, 0.000000e+00 + %1575 = bitcast i32 %686 to float + %1576 = bitcast i32 %686 to float + %1577 = fmul float %1575, %1576 + %1578 = fadd float %1574, %1577 + %1579 = call float @llvm.sqrt.f32(float %1578) + %1580 = fneg float %692 + %1581 = fmul float %1579, %1580 + %1582 = bitcast i32 %52 to float + %1583 = fadd float %1582, %1581 + %1584 = bitcast i32 %52 to float + %1585 = bitcast i32 %52 to float + %1586 = fmul float %1584, %1585 + %1587 = fadd float %1586, 0.000000e+00 + %1588 = bitcast i32 %686 to float + %1589 = bitcast i32 %686 to float + %1590 = fmul float %1588, %1589 + %1591 = fadd float %1587, %1590 + %1592 = call float @llvm.sqrt.f32(float %1591) + %1593 = fneg float %692 + %1594 = fmul float %1592, %1593 + %1595 = bitcast i32 %52 to float + %1596 = fadd float %1595, %1594 + %1597 = fmul float %1583, %1596 + %1598 = fadd float %1597, 0.000000e+00 + %1599 = bitcast i32 %52 to float + %1600 = bitcast i32 %52 to float + %1601 = fmul float %1599, %1600 + %1602 = fadd float %1601, 0.000000e+00 + %1603 = bitcast i32 %686 to float + %1604 = bitcast i32 %686 to float + %1605 = fmul float %1603, %1604 + %1606 = fadd float %1602, %1605 + %1607 = call float @llvm.sqrt.f32(float %1606) + %1608 = fneg float %692 + %1609 = fmul float %1607, %1608 + %1610 = fmul float %1609, 0.000000e+00 + %1611 = bitcast i32 %686 to float + %1612 = fadd float %1611, %1610 + %1613 = bitcast i32 %52 to float + %1614 = bitcast i32 %52 to float + %1615 = fmul float %1613, %1614 + %1616 = fadd float %1615, 0.000000e+00 + %1617 = bitcast i32 %686 to float + %1618 = bitcast i32 %686 to float + %1619 = fmul float %1617, %1618 + %1620 = fadd float %1616, %1619 + %1621 = call float @llvm.sqrt.f32(float %1620) + %1622 = fneg float %692 + %1623 = fmul float %1621, %1622 + %1624 = fmul float %1623, 0.000000e+00 + %1625 = bitcast i32 %686 to float + %1626 = fadd float %1625, %1624 + %1627 = fmul float %1612, %1626 + %1628 = fadd float %1598, %1627 + %1629 = call float @llvm.sqrt.f32(float %1628) + %1630 = fadd float %1629, 0.000000e+00 + %1631 = fdiv float %1570, %1630 + %1632 = fmul float %1556, %1631 + %1633 = fneg float %1632 + %1634 = load float, float* %44, align 4 + %1635 = fmul float %1633, %1634 + %1636 = fadd float %1481, %1635 + %1637 = insertelement <4 x float> zeroinitializer, float %1636, i32 0 + %1638 = insertelement <4 x float> %1637, float 0.000000e+00, i32 1 + %1639 = insertelement <4 x float> %1638, float 0.000000e+00, i32 2 + %1640 = insertelement <4 x float> %1639, float 0.000000e+00, i32 3 + %1641 = extractelement <4 x float> %1640, i32 0 + store float %1641, float* %1164, align 4 + %1642 = extractelement <4 x float> %1640, i32 1 + %1643 = getelementptr float, float* %2, i32 0 + %1644 = getelementptr inbounds float, float* %1643, i64 2 + store float %1642, float* %1644, align 4 + %1645 = bitcast i32 %52 to float + %1646 = bitcast i32 %52 to float + %1647 = fmul float %1645, %1646 + %1648 = fadd float %1647, 0.000000e+00 + %1649 = bitcast i32 %686 to float + %1650 = bitcast i32 %686 to float + %1651 = fmul float %1649, %1650 + %1652 = fadd float %1648, %1651 + %1653 = call float @llvm.sqrt.f32(float %1652) + %1654 = fneg float %692 + %1655 = fmul float %1653, %1654 + %1656 = fmul float %1655, 0.000000e+00 + %1657 = bitcast i32 %686 to float + %1658 = fadd float %1657, %1656 + %1659 = bitcast i32 %52 to float + %1660 = bitcast i32 %52 to float + %1661 = fmul float %1659, %1660 + %1662 = fadd float %1661, 0.000000e+00 + %1663 = bitcast i32 %686 to float + %1664 = bitcast i32 %686 to float + %1665 = fmul float %1663, %1664 + %1666 = fadd float %1662, %1665 + %1667 = call float @llvm.sqrt.f32(float %1666) + %1668 = fneg float %692 + %1669 = fmul float %1667, %1668 + %1670 = bitcast i32 %52 to float + %1671 = fadd float %1670, %1669 + %1672 = bitcast i32 %52 to float + %1673 = bitcast i32 %52 to float + %1674 = fmul float %1672, %1673 + %1675 = fadd float %1674, 0.000000e+00 + %1676 = bitcast i32 %686 to float + %1677 = bitcast i32 %686 to float + %1678 = fmul float %1676, %1677 + %1679 = fadd float %1675, %1678 + %1680 = call float @llvm.sqrt.f32(float %1679) + %1681 = fneg float %692 + %1682 = fmul float %1680, %1681 + %1683 = bitcast i32 %52 to float + %1684 = fadd float %1683, %1682 + %1685 = fmul float %1671, %1684 + %1686 = fadd float %1685, 0.000000e+00 + %1687 = bitcast i32 %52 to float + %1688 = bitcast i32 %52 to float + %1689 = fmul float %1687, %1688 + %1690 = fadd float %1689, 0.000000e+00 + %1691 = bitcast i32 %686 to float + %1692 = bitcast i32 %686 to float + %1693 = fmul float %1691, %1692 + %1694 = fadd float %1690, %1693 + %1695 = call float @llvm.sqrt.f32(float %1694) + %1696 = fneg float %692 + %1697 = fmul float %1695, %1696 + %1698 = fmul float %1697, 0.000000e+00 + %1699 = bitcast i32 %686 to float + %1700 = fadd float %1699, %1698 + %1701 = bitcast i32 %52 to float + %1702 = bitcast i32 %52 to float + %1703 = fmul float %1701, %1702 + %1704 = fadd float %1703, 0.000000e+00 + %1705 = bitcast i32 %686 to float + %1706 = bitcast i32 %686 to float + %1707 = fmul float %1705, %1706 + %1708 = fadd float %1704, %1707 + %1709 = call float @llvm.sqrt.f32(float %1708) + %1710 = fneg float %692 + %1711 = fmul float %1709, %1710 + %1712 = fmul float %1711, 0.000000e+00 + %1713 = bitcast i32 %686 to float + %1714 = fadd float %1713, %1712 + %1715 = fmul float %1700, %1714 + %1716 = fadd float %1686, %1715 + %1717 = call float @llvm.sqrt.f32(float %1716) + %1718 = fadd float %1717, 0.000000e+00 + %1719 = fdiv float %1658, %1718 + %1720 = fmul float %1719, 2.000000e+00 + %1721 = bitcast i32 %52 to float + %1722 = bitcast i32 %52 to float + %1723 = fmul float %1721, %1722 + %1724 = fadd float %1723, 0.000000e+00 + %1725 = bitcast i32 %686 to float + %1726 = bitcast i32 %686 to float + %1727 = fmul float %1725, %1726 + %1728 = fadd float %1724, %1727 + %1729 = call float @llvm.sqrt.f32(float %1728) + %1730 = fneg float %692 + %1731 = fmul float %1729, %1730 + %1732 = bitcast i32 %52 to float + %1733 = fadd float %1732, %1731 + %1734 = bitcast i32 %52 to float + %1735 = bitcast i32 %52 to float + %1736 = fmul float %1734, %1735 + %1737 = fadd float %1736, 0.000000e+00 + %1738 = bitcast i32 %686 to float + %1739 = bitcast i32 %686 to float + %1740 = fmul float %1738, %1739 + %1741 = fadd float %1737, %1740 + %1742 = call float @llvm.sqrt.f32(float %1741) + %1743 = fneg float %692 + %1744 = fmul float %1742, %1743 + %1745 = bitcast i32 %52 to float + %1746 = fadd float %1745, %1744 + %1747 = bitcast i32 %52 to float + %1748 = bitcast i32 %52 to float + %1749 = fmul float %1747, %1748 + %1750 = fadd float %1749, 0.000000e+00 + %1751 = bitcast i32 %686 to float + %1752 = bitcast i32 %686 to float + %1753 = fmul float %1751, %1752 + %1754 = fadd float %1750, %1753 + %1755 = call float @llvm.sqrt.f32(float %1754) + %1756 = fneg float %692 + %1757 = fmul float %1755, %1756 + %1758 = bitcast i32 %52 to float + %1759 = fadd float %1758, %1757 + %1760 = fmul float %1746, %1759 + %1761 = fadd float %1760, 0.000000e+00 + %1762 = bitcast i32 %52 to float + %1763 = bitcast i32 %52 to float + %1764 = fmul float %1762, %1763 + %1765 = fadd float %1764, 0.000000e+00 + %1766 = bitcast i32 %686 to float + %1767 = bitcast i32 %686 to float + %1768 = fmul float %1766, %1767 + %1769 = fadd float %1765, %1768 + %1770 = call float @llvm.sqrt.f32(float %1769) + %1771 = fneg float %692 + %1772 = fmul float %1770, %1771 + %1773 = fmul float %1772, 0.000000e+00 + %1774 = bitcast i32 %686 to float + %1775 = fadd float %1774, %1773 + %1776 = bitcast i32 %52 to float + %1777 = bitcast i32 %52 to float + %1778 = fmul float %1776, %1777 + %1779 = fadd float %1778, 0.000000e+00 + %1780 = bitcast i32 %686 to float + %1781 = bitcast i32 %686 to float + %1782 = fmul float %1780, %1781 + %1783 = fadd float %1779, %1782 + %1784 = call float @llvm.sqrt.f32(float %1783) + %1785 = fneg float %692 + %1786 = fmul float %1784, %1785 + %1787 = fmul float %1786, 0.000000e+00 + %1788 = bitcast i32 %686 to float + %1789 = fadd float %1788, %1787 + %1790 = fmul float %1775, %1789 + %1791 = fadd float %1761, %1790 + %1792 = call float @llvm.sqrt.f32(float %1791) + %1793 = fadd float %1792, 0.000000e+00 + %1794 = fdiv float %1733, %1793 + %1795 = fmul float %1720, %1794 + %1796 = fneg float %1795 + %1797 = insertelement <4 x float> zeroinitializer, float %1796, i32 0 + %1798 = insertelement <4 x float> %1797, float 0.000000e+00, i32 1 + %1799 = insertelement <4 x float> %1798, float 0.000000e+00, i32 2 + %1800 = insertelement <4 x float> %1799, float 0.000000e+00, i32 3 + %1801 = getelementptr float, float* %0, i32 0 + %1802 = load float, float* %1801, align 4 + %1803 = insertelement <4 x float> zeroinitializer, float %1802, i32 0 + %1804 = insertelement <4 x float> %1803, float 0.000000e+00, i32 1 + %1805 = insertelement <4 x float> %1804, float 0.000000e+00, i32 2 + %1806 = insertelement <4 x float> %1805, float 0.000000e+00, i32 3 + %1807 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1800, <4 x float> %1806, <4 x float> zeroinitializer) + %1808 = extractelement <4 x float> %1807, i32 0 + store float %1808, float* %1644, align 4 + %1809 = bitcast i32 %52 to float + %1810 = bitcast i32 %52 to float + %1811 = fmul float %1809, %1810 + %1812 = fadd float %1811, 0.000000e+00 + %1813 = bitcast i32 %686 to float + %1814 = bitcast i32 %686 to float + %1815 = fmul float %1813, %1814 + %1816 = fadd float %1812, %1815 + %1817 = call float @llvm.sqrt.f32(float %1816) + %1818 = fneg float %692 + %1819 = fmul float %1817, %1818 + %1820 = fmul float %1819, 0.000000e+00 + %1821 = bitcast i32 %686 to float + %1822 = fadd float %1821, %1820 + %1823 = bitcast i32 %52 to float + %1824 = bitcast i32 %52 to float + %1825 = fmul float %1823, %1824 + %1826 = fadd float %1825, 0.000000e+00 + %1827 = bitcast i32 %686 to float + %1828 = bitcast i32 %686 to float + %1829 = fmul float %1827, %1828 + %1830 = fadd float %1826, %1829 + %1831 = call float @llvm.sqrt.f32(float %1830) + %1832 = fneg float %692 + %1833 = fmul float %1831, %1832 + %1834 = bitcast i32 %52 to float + %1835 = fadd float %1834, %1833 + %1836 = bitcast i32 %52 to float + %1837 = bitcast i32 %52 to float + %1838 = fmul float %1836, %1837 + %1839 = fadd float %1838, 0.000000e+00 + %1840 = bitcast i32 %686 to float + %1841 = bitcast i32 %686 to float + %1842 = fmul float %1840, %1841 + %1843 = fadd float %1839, %1842 + %1844 = call float @llvm.sqrt.f32(float %1843) + %1845 = fneg float %692 + %1846 = fmul float %1844, %1845 + %1847 = bitcast i32 %52 to float + %1848 = fadd float %1847, %1846 + %1849 = fmul float %1835, %1848 + %1850 = fadd float %1849, 0.000000e+00 + %1851 = bitcast i32 %52 to float + %1852 = bitcast i32 %52 to float + %1853 = fmul float %1851, %1852 + %1854 = fadd float %1853, 0.000000e+00 + %1855 = bitcast i32 %686 to float + %1856 = bitcast i32 %686 to float + %1857 = fmul float %1855, %1856 + %1858 = fadd float %1854, %1857 + %1859 = call float @llvm.sqrt.f32(float %1858) + %1860 = fneg float %692 + %1861 = fmul float %1859, %1860 + %1862 = fmul float %1861, 0.000000e+00 + %1863 = bitcast i32 %686 to float + %1864 = fadd float %1863, %1862 + %1865 = bitcast i32 %52 to float + %1866 = bitcast i32 %52 to float + %1867 = fmul float %1865, %1866 + %1868 = fadd float %1867, 0.000000e+00 + %1869 = bitcast i32 %686 to float + %1870 = bitcast i32 %686 to float + %1871 = fmul float %1869, %1870 + %1872 = fadd float %1868, %1871 + %1873 = call float @llvm.sqrt.f32(float %1872) + %1874 = fneg float %692 + %1875 = fmul float %1873, %1874 + %1876 = fmul float %1875, 0.000000e+00 + %1877 = bitcast i32 %686 to float + %1878 = fadd float %1877, %1876 + %1879 = fmul float %1864, %1878 + %1880 = fadd float %1850, %1879 + %1881 = call float @llvm.sqrt.f32(float %1880) + %1882 = fadd float %1881, 0.000000e+00 + %1883 = fdiv float %1822, %1882 + %1884 = fmul float %1883, 2.000000e+00 + %1885 = bitcast i32 %52 to float + %1886 = bitcast i32 %52 to float + %1887 = fmul float %1885, %1886 + %1888 = fadd float %1887, 0.000000e+00 + %1889 = bitcast i32 %686 to float + %1890 = bitcast i32 %686 to float + %1891 = fmul float %1889, %1890 + %1892 = fadd float %1888, %1891 + %1893 = call float @llvm.sqrt.f32(float %1892) + %1894 = fneg float %692 + %1895 = fmul float %1893, %1894 + %1896 = bitcast i32 %52 to float + %1897 = fadd float %1896, %1895 + %1898 = bitcast i32 %52 to float + %1899 = bitcast i32 %52 to float + %1900 = fmul float %1898, %1899 + %1901 = fadd float %1900, 0.000000e+00 + %1902 = bitcast i32 %686 to float + %1903 = bitcast i32 %686 to float + %1904 = fmul float %1902, %1903 + %1905 = fadd float %1901, %1904 + %1906 = call float @llvm.sqrt.f32(float %1905) + %1907 = fneg float %692 + %1908 = fmul float %1906, %1907 + %1909 = bitcast i32 %52 to float + %1910 = fadd float %1909, %1908 + %1911 = bitcast i32 %52 to float + %1912 = bitcast i32 %52 to float + %1913 = fmul float %1911, %1912 + %1914 = fadd float %1913, 0.000000e+00 + %1915 = bitcast i32 %686 to float + %1916 = bitcast i32 %686 to float + %1917 = fmul float %1915, %1916 + %1918 = fadd float %1914, %1917 + %1919 = call float @llvm.sqrt.f32(float %1918) + %1920 = fneg float %692 + %1921 = fmul float %1919, %1920 + %1922 = bitcast i32 %52 to float + %1923 = fadd float %1922, %1921 + %1924 = fmul float %1910, %1923 + %1925 = fadd float %1924, 0.000000e+00 + %1926 = bitcast i32 %52 to float + %1927 = bitcast i32 %52 to float + %1928 = fmul float %1926, %1927 + %1929 = fadd float %1928, 0.000000e+00 + %1930 = bitcast i32 %686 to float + %1931 = bitcast i32 %686 to float + %1932 = fmul float %1930, %1931 + %1933 = fadd float %1929, %1932 + %1934 = call float @llvm.sqrt.f32(float %1933) + %1935 = fneg float %692 + %1936 = fmul float %1934, %1935 + %1937 = fmul float %1936, 0.000000e+00 + %1938 = bitcast i32 %686 to float + %1939 = fadd float %1938, %1937 + %1940 = bitcast i32 %52 to float + %1941 = bitcast i32 %52 to float + %1942 = fmul float %1940, %1941 + %1943 = fadd float %1942, 0.000000e+00 + %1944 = bitcast i32 %686 to float + %1945 = bitcast i32 %686 to float + %1946 = fmul float %1944, %1945 + %1947 = fadd float %1943, %1946 + %1948 = call float @llvm.sqrt.f32(float %1947) + %1949 = fneg float %692 + %1950 = fmul float %1948, %1949 + %1951 = fmul float %1950, 0.000000e+00 + %1952 = bitcast i32 %686 to float + %1953 = fadd float %1952, %1951 + %1954 = fmul float %1939, %1953 + %1955 = fadd float %1925, %1954 + %1956 = call float @llvm.sqrt.f32(float %1955) + %1957 = fadd float %1956, 0.000000e+00 + %1958 = fdiv float %1897, %1957 + %1959 = fmul float %1884, %1958 + %1960 = fneg float %1959 + %1961 = fmul float %1960, %1802 + %1962 = fadd float %1961, 0.000000e+00 + %1963 = bitcast i32 %52 to float + %1964 = bitcast i32 %52 to float + %1965 = fmul float %1963, %1964 + %1966 = fadd float %1965, 0.000000e+00 + %1967 = bitcast i32 %686 to float + %1968 = bitcast i32 %686 to float + %1969 = fmul float %1967, %1968 + %1970 = fadd float %1966, %1969 + %1971 = call float @llvm.sqrt.f32(float %1970) + %1972 = fneg float %692 + %1973 = fmul float %1971, %1972 + %1974 = fmul float %1973, 0.000000e+00 + %1975 = bitcast i32 %686 to float + %1976 = fadd float %1975, %1974 + %1977 = bitcast i32 %52 to float + %1978 = bitcast i32 %52 to float + %1979 = fmul float %1977, %1978 + %1980 = fadd float %1979, 0.000000e+00 + %1981 = bitcast i32 %686 to float + %1982 = bitcast i32 %686 to float + %1983 = fmul float %1981, %1982 + %1984 = fadd float %1980, %1983 + %1985 = call float @llvm.sqrt.f32(float %1984) + %1986 = fneg float %692 + %1987 = fmul float %1985, %1986 + %1988 = bitcast i32 %52 to float + %1989 = fadd float %1988, %1987 + %1990 = bitcast i32 %52 to float + %1991 = bitcast i32 %52 to float + %1992 = fmul float %1990, %1991 + %1993 = fadd float %1992, 0.000000e+00 + %1994 = bitcast i32 %686 to float + %1995 = bitcast i32 %686 to float + %1996 = fmul float %1994, %1995 + %1997 = fadd float %1993, %1996 + %1998 = call float @llvm.sqrt.f32(float %1997) + %1999 = fneg float %692 + %2000 = fmul float %1998, %1999 + %2001 = bitcast i32 %52 to float + %2002 = fadd float %2001, %2000 + %2003 = fmul float %1989, %2002 + %2004 = fadd float %2003, 0.000000e+00 + %2005 = bitcast i32 %52 to float + %2006 = bitcast i32 %52 to float + %2007 = fmul float %2005, %2006 + %2008 = fadd float %2007, 0.000000e+00 + %2009 = bitcast i32 %686 to float + %2010 = bitcast i32 %686 to float + %2011 = fmul float %2009, %2010 + %2012 = fadd float %2008, %2011 + %2013 = call float @llvm.sqrt.f32(float %2012) + %2014 = fneg float %692 + %2015 = fmul float %2013, %2014 + %2016 = fmul float %2015, 0.000000e+00 + %2017 = bitcast i32 %686 to float + %2018 = fadd float %2017, %2016 + %2019 = bitcast i32 %52 to float + %2020 = bitcast i32 %52 to float + %2021 = fmul float %2019, %2020 + %2022 = fadd float %2021, 0.000000e+00 + %2023 = bitcast i32 %686 to float + %2024 = bitcast i32 %686 to float + %2025 = fmul float %2023, %2024 + %2026 = fadd float %2022, %2025 + %2027 = call float @llvm.sqrt.f32(float %2026) + %2028 = fneg float %692 + %2029 = fmul float %2027, %2028 + %2030 = fmul float %2029, 0.000000e+00 + %2031 = bitcast i32 %686 to float + %2032 = fadd float %2031, %2030 + %2033 = fmul float %2018, %2032 + %2034 = fadd float %2004, %2033 + %2035 = call float @llvm.sqrt.f32(float %2034) + %2036 = fadd float %2035, 0.000000e+00 + %2037 = fdiv float %1976, %2036 + %2038 = fmul float %2037, 2.000000e+00 + %2039 = bitcast i32 %52 to float + %2040 = bitcast i32 %52 to float + %2041 = fmul float %2039, %2040 + %2042 = fadd float %2041, 0.000000e+00 + %2043 = bitcast i32 %686 to float + %2044 = bitcast i32 %686 to float + %2045 = fmul float %2043, %2044 + %2046 = fadd float %2042, %2045 + %2047 = call float @llvm.sqrt.f32(float %2046) + %2048 = fneg float %692 + %2049 = fmul float %2047, %2048 + %2050 = fmul float %2049, 0.000000e+00 + %2051 = bitcast i32 %686 to float + %2052 = fadd float %2051, %2050 + %2053 = bitcast i32 %52 to float + %2054 = bitcast i32 %52 to float + %2055 = fmul float %2053, %2054 + %2056 = fadd float %2055, 0.000000e+00 + %2057 = bitcast i32 %686 to float + %2058 = bitcast i32 %686 to float + %2059 = fmul float %2057, %2058 + %2060 = fadd float %2056, %2059 + %2061 = call float @llvm.sqrt.f32(float %2060) + %2062 = fneg float %692 + %2063 = fmul float %2061, %2062 + %2064 = bitcast i32 %52 to float + %2065 = fadd float %2064, %2063 + %2066 = bitcast i32 %52 to float + %2067 = bitcast i32 %52 to float + %2068 = fmul float %2066, %2067 + %2069 = fadd float %2068, 0.000000e+00 + %2070 = bitcast i32 %686 to float + %2071 = bitcast i32 %686 to float + %2072 = fmul float %2070, %2071 + %2073 = fadd float %2069, %2072 + %2074 = call float @llvm.sqrt.f32(float %2073) + %2075 = fneg float %692 + %2076 = fmul float %2074, %2075 + %2077 = bitcast i32 %52 to float + %2078 = fadd float %2077, %2076 + %2079 = fmul float %2065, %2078 + %2080 = fadd float %2079, 0.000000e+00 + %2081 = bitcast i32 %52 to float + %2082 = bitcast i32 %52 to float + %2083 = fmul float %2081, %2082 + %2084 = fadd float %2083, 0.000000e+00 + %2085 = bitcast i32 %686 to float + %2086 = bitcast i32 %686 to float + %2087 = fmul float %2085, %2086 + %2088 = fadd float %2084, %2087 + %2089 = call float @llvm.sqrt.f32(float %2088) + %2090 = fneg float %692 + %2091 = fmul float %2089, %2090 + %2092 = fmul float %2091, 0.000000e+00 + %2093 = bitcast i32 %686 to float + %2094 = fadd float %2093, %2092 + %2095 = bitcast i32 %52 to float + %2096 = bitcast i32 %52 to float + %2097 = fmul float %2095, %2096 + %2098 = fadd float %2097, 0.000000e+00 + %2099 = bitcast i32 %686 to float + %2100 = bitcast i32 %686 to float + %2101 = fmul float %2099, %2100 + %2102 = fadd float %2098, %2101 + %2103 = call float @llvm.sqrt.f32(float %2102) + %2104 = fneg float %692 + %2105 = fmul float %2103, %2104 + %2106 = fmul float %2105, 0.000000e+00 + %2107 = bitcast i32 %686 to float + %2108 = fadd float %2107, %2106 + %2109 = fmul float %2094, %2108 + %2110 = fadd float %2080, %2109 + %2111 = call float @llvm.sqrt.f32(float %2110) + %2112 = fadd float %2111, 0.000000e+00 + %2113 = fdiv float %2052, %2112 + %2114 = fmul float %2038, %2113 + %2115 = fsub float 1.000000e+00, %2114 + %2116 = load float, float* %1153, align 4 + %2117 = fmul float %2115, %2116 + %2118 = fadd float %1962, %2117 + %2119 = insertelement <4 x float> zeroinitializer, float %2118, i32 0 + %2120 = insertelement <4 x float> %2119, float 0.000000e+00, i32 1 + %2121 = insertelement <4 x float> %2120, float 0.000000e+00, i32 2 + %2122 = insertelement <4 x float> %2121, float 0.000000e+00, i32 3 + %2123 = extractelement <4 x float> %2122, i32 0 + store float %2123, float* %1644, align 4 + %2124 = extractelement <4 x float> %2122, i32 1 + %2125 = getelementptr float, float* %2, i32 0 + %2126 = getelementptr inbounds float, float* %2125, i64 3 + store float %2124, float* %2126, align 4 + %2127 = bitcast i32 %52 to float + %2128 = bitcast i32 %52 to float + %2129 = fmul float %2127, %2128 + %2130 = fadd float %2129, 0.000000e+00 + %2131 = bitcast i32 %686 to float + %2132 = bitcast i32 %686 to float + %2133 = fmul float %2131, %2132 + %2134 = fadd float %2130, %2133 + %2135 = call float @llvm.sqrt.f32(float %2134) + %2136 = fneg float %692 + %2137 = fmul float %2135, %2136 + %2138 = fmul float %2137, 0.000000e+00 + %2139 = bitcast i32 %686 to float + %2140 = fadd float %2139, %2138 + %2141 = bitcast i32 %52 to float + %2142 = bitcast i32 %52 to float + %2143 = fmul float %2141, %2142 + %2144 = fadd float %2143, 0.000000e+00 + %2145 = bitcast i32 %686 to float + %2146 = bitcast i32 %686 to float + %2147 = fmul float %2145, %2146 + %2148 = fadd float %2144, %2147 + %2149 = call float @llvm.sqrt.f32(float %2148) + %2150 = fneg float %692 + %2151 = fmul float %2149, %2150 + %2152 = bitcast i32 %52 to float + %2153 = fadd float %2152, %2151 + %2154 = bitcast i32 %52 to float + %2155 = bitcast i32 %52 to float + %2156 = fmul float %2154, %2155 + %2157 = fadd float %2156, 0.000000e+00 + %2158 = bitcast i32 %686 to float + %2159 = bitcast i32 %686 to float + %2160 = fmul float %2158, %2159 + %2161 = fadd float %2157, %2160 + %2162 = call float @llvm.sqrt.f32(float %2161) + %2163 = fneg float %692 + %2164 = fmul float %2162, %2163 + %2165 = bitcast i32 %52 to float + %2166 = fadd float %2165, %2164 + %2167 = fmul float %2153, %2166 + %2168 = fadd float %2167, 0.000000e+00 + %2169 = bitcast i32 %52 to float + %2170 = bitcast i32 %52 to float + %2171 = fmul float %2169, %2170 + %2172 = fadd float %2171, 0.000000e+00 + %2173 = bitcast i32 %686 to float + %2174 = bitcast i32 %686 to float + %2175 = fmul float %2173, %2174 + %2176 = fadd float %2172, %2175 + %2177 = call float @llvm.sqrt.f32(float %2176) + %2178 = fneg float %692 + %2179 = fmul float %2177, %2178 + %2180 = fmul float %2179, 0.000000e+00 + %2181 = bitcast i32 %686 to float + %2182 = fadd float %2181, %2180 + %2183 = bitcast i32 %52 to float + %2184 = bitcast i32 %52 to float + %2185 = fmul float %2183, %2184 + %2186 = fadd float %2185, 0.000000e+00 + %2187 = bitcast i32 %686 to float + %2188 = bitcast i32 %686 to float + %2189 = fmul float %2187, %2188 + %2190 = fadd float %2186, %2189 + %2191 = call float @llvm.sqrt.f32(float %2190) + %2192 = fneg float %692 + %2193 = fmul float %2191, %2192 + %2194 = fmul float %2193, 0.000000e+00 + %2195 = bitcast i32 %686 to float + %2196 = fadd float %2195, %2194 + %2197 = fmul float %2182, %2196 + %2198 = fadd float %2168, %2197 + %2199 = call float @llvm.sqrt.f32(float %2198) + %2200 = fadd float %2199, 0.000000e+00 + %2201 = fdiv float %2140, %2200 + %2202 = fmul float %2201, 2.000000e+00 + %2203 = bitcast i32 %52 to float + %2204 = bitcast i32 %52 to float + %2205 = fmul float %2203, %2204 + %2206 = fadd float %2205, 0.000000e+00 + %2207 = bitcast i32 %686 to float + %2208 = bitcast i32 %686 to float + %2209 = fmul float %2207, %2208 + %2210 = fadd float %2206, %2209 + %2211 = call float @llvm.sqrt.f32(float %2210) + %2212 = fneg float %692 + %2213 = fmul float %2211, %2212 + %2214 = bitcast i32 %52 to float + %2215 = fadd float %2214, %2213 + %2216 = bitcast i32 %52 to float + %2217 = bitcast i32 %52 to float + %2218 = fmul float %2216, %2217 + %2219 = fadd float %2218, 0.000000e+00 + %2220 = bitcast i32 %686 to float + %2221 = bitcast i32 %686 to float + %2222 = fmul float %2220, %2221 + %2223 = fadd float %2219, %2222 + %2224 = call float @llvm.sqrt.f32(float %2223) + %2225 = fneg float %692 + %2226 = fmul float %2224, %2225 + %2227 = bitcast i32 %52 to float + %2228 = fadd float %2227, %2226 + %2229 = bitcast i32 %52 to float + %2230 = bitcast i32 %52 to float + %2231 = fmul float %2229, %2230 + %2232 = fadd float %2231, 0.000000e+00 + %2233 = bitcast i32 %686 to float + %2234 = bitcast i32 %686 to float + %2235 = fmul float %2233, %2234 + %2236 = fadd float %2232, %2235 + %2237 = call float @llvm.sqrt.f32(float %2236) + %2238 = fneg float %692 + %2239 = fmul float %2237, %2238 + %2240 = bitcast i32 %52 to float + %2241 = fadd float %2240, %2239 + %2242 = fmul float %2228, %2241 + %2243 = fadd float %2242, 0.000000e+00 + %2244 = bitcast i32 %52 to float + %2245 = bitcast i32 %52 to float + %2246 = fmul float %2244, %2245 + %2247 = fadd float %2246, 0.000000e+00 + %2248 = bitcast i32 %686 to float + %2249 = bitcast i32 %686 to float + %2250 = fmul float %2248, %2249 + %2251 = fadd float %2247, %2250 + %2252 = call float @llvm.sqrt.f32(float %2251) + %2253 = fneg float %692 + %2254 = fmul float %2252, %2253 + %2255 = fmul float %2254, 0.000000e+00 + %2256 = bitcast i32 %686 to float + %2257 = fadd float %2256, %2255 + %2258 = bitcast i32 %52 to float + %2259 = bitcast i32 %52 to float + %2260 = fmul float %2258, %2259 + %2261 = fadd float %2260, 0.000000e+00 + %2262 = bitcast i32 %686 to float + %2263 = bitcast i32 %686 to float + %2264 = fmul float %2262, %2263 + %2265 = fadd float %2261, %2264 + %2266 = call float @llvm.sqrt.f32(float %2265) + %2267 = fneg float %692 + %2268 = fmul float %2266, %2267 + %2269 = fmul float %2268, 0.000000e+00 + %2270 = bitcast i32 %686 to float + %2271 = fadd float %2270, %2269 + %2272 = fmul float %2257, %2271 + %2273 = fadd float %2243, %2272 + %2274 = call float @llvm.sqrt.f32(float %2273) + %2275 = fadd float %2274, 0.000000e+00 + %2276 = fdiv float %2215, %2275 + %2277 = fmul float %2202, %2276 + %2278 = fneg float %2277 + %2279 = insertelement <4 x float> zeroinitializer, float %2278, i32 0 + %2280 = insertelement <4 x float> %2279, float 0.000000e+00, i32 1 + %2281 = insertelement <4 x float> %2280, float 0.000000e+00, i32 2 + %2282 = insertelement <4 x float> %2281, float 0.000000e+00, i32 3 + %2283 = load float, float* %1321, align 4 + %2284 = insertelement <4 x float> zeroinitializer, float %2283, i32 0 + %2285 = insertelement <4 x float> %2284, float 0.000000e+00, i32 1 + %2286 = insertelement <4 x float> %2285, float 0.000000e+00, i32 2 + %2287 = insertelement <4 x float> %2286, float 0.000000e+00, i32 3 + %2288 = call <4 x float> @llvm.fma.v4f32(<4 x float> %2282, <4 x float> %2287, <4 x float> zeroinitializer) + %2289 = extractelement <4 x float> %2288, i32 0 + store float %2289, float* %2126, align 4 + %2290 = bitcast i32 %52 to float + %2291 = bitcast i32 %52 to float + %2292 = fmul float %2290, %2291 + %2293 = fadd float %2292, 0.000000e+00 + %2294 = bitcast i32 %686 to float + %2295 = bitcast i32 %686 to float + %2296 = fmul float %2294, %2295 + %2297 = fadd float %2293, %2296 + %2298 = call float @llvm.sqrt.f32(float %2297) + %2299 = fneg float %692 + %2300 = fmul float %2298, %2299 + %2301 = fmul float %2300, 0.000000e+00 + %2302 = bitcast i32 %686 to float + %2303 = fadd float %2302, %2301 + %2304 = bitcast i32 %52 to float + %2305 = bitcast i32 %52 to float + %2306 = fmul float %2304, %2305 + %2307 = fadd float %2306, 0.000000e+00 + %2308 = bitcast i32 %686 to float + %2309 = bitcast i32 %686 to float + %2310 = fmul float %2308, %2309 + %2311 = fadd float %2307, %2310 + %2312 = call float @llvm.sqrt.f32(float %2311) + %2313 = fneg float %692 + %2314 = fmul float %2312, %2313 + %2315 = bitcast i32 %52 to float + %2316 = fadd float %2315, %2314 + %2317 = bitcast i32 %52 to float + %2318 = bitcast i32 %52 to float + %2319 = fmul float %2317, %2318 + %2320 = fadd float %2319, 0.000000e+00 + %2321 = bitcast i32 %686 to float + %2322 = bitcast i32 %686 to float + %2323 = fmul float %2321, %2322 + %2324 = fadd float %2320, %2323 + %2325 = call float @llvm.sqrt.f32(float %2324) + %2326 = fneg float %692 + %2327 = fmul float %2325, %2326 + %2328 = bitcast i32 %52 to float + %2329 = fadd float %2328, %2327 + %2330 = fmul float %2316, %2329 + %2331 = fadd float %2330, 0.000000e+00 + %2332 = bitcast i32 %52 to float + %2333 = bitcast i32 %52 to float + %2334 = fmul float %2332, %2333 + %2335 = fadd float %2334, 0.000000e+00 + %2336 = bitcast i32 %686 to float + %2337 = bitcast i32 %686 to float + %2338 = fmul float %2336, %2337 + %2339 = fadd float %2335, %2338 + %2340 = call float @llvm.sqrt.f32(float %2339) + %2341 = fneg float %692 + %2342 = fmul float %2340, %2341 + %2343 = fmul float %2342, 0.000000e+00 + %2344 = bitcast i32 %686 to float + %2345 = fadd float %2344, %2343 + %2346 = bitcast i32 %52 to float + %2347 = bitcast i32 %52 to float + %2348 = fmul float %2346, %2347 + %2349 = fadd float %2348, 0.000000e+00 + %2350 = bitcast i32 %686 to float + %2351 = bitcast i32 %686 to float + %2352 = fmul float %2350, %2351 + %2353 = fadd float %2349, %2352 + %2354 = call float @llvm.sqrt.f32(float %2353) + %2355 = fneg float %692 + %2356 = fmul float %2354, %2355 + %2357 = fmul float %2356, 0.000000e+00 + %2358 = bitcast i32 %686 to float + %2359 = fadd float %2358, %2357 + %2360 = fmul float %2345, %2359 + %2361 = fadd float %2331, %2360 + %2362 = call float @llvm.sqrt.f32(float %2361) + %2363 = fadd float %2362, 0.000000e+00 + %2364 = fdiv float %2303, %2363 + %2365 = fmul float %2364, 2.000000e+00 + %2366 = bitcast i32 %52 to float + %2367 = bitcast i32 %52 to float + %2368 = fmul float %2366, %2367 + %2369 = fadd float %2368, 0.000000e+00 + %2370 = bitcast i32 %686 to float + %2371 = bitcast i32 %686 to float + %2372 = fmul float %2370, %2371 + %2373 = fadd float %2369, %2372 + %2374 = call float @llvm.sqrt.f32(float %2373) + %2375 = fneg float %692 + %2376 = fmul float %2374, %2375 + %2377 = bitcast i32 %52 to float + %2378 = fadd float %2377, %2376 + %2379 = bitcast i32 %52 to float + %2380 = bitcast i32 %52 to float + %2381 = fmul float %2379, %2380 + %2382 = fadd float %2381, 0.000000e+00 + %2383 = bitcast i32 %686 to float + %2384 = bitcast i32 %686 to float + %2385 = fmul float %2383, %2384 + %2386 = fadd float %2382, %2385 + %2387 = call float @llvm.sqrt.f32(float %2386) + %2388 = fneg float %692 + %2389 = fmul float %2387, %2388 + %2390 = bitcast i32 %52 to float + %2391 = fadd float %2390, %2389 + %2392 = bitcast i32 %52 to float + %2393 = bitcast i32 %52 to float + %2394 = fmul float %2392, %2393 + %2395 = fadd float %2394, 0.000000e+00 + %2396 = bitcast i32 %686 to float + %2397 = bitcast i32 %686 to float + %2398 = fmul float %2396, %2397 + %2399 = fadd float %2395, %2398 + %2400 = call float @llvm.sqrt.f32(float %2399) + %2401 = fneg float %692 + %2402 = fmul float %2400, %2401 + %2403 = bitcast i32 %52 to float + %2404 = fadd float %2403, %2402 + %2405 = fmul float %2391, %2404 + %2406 = fadd float %2405, 0.000000e+00 + %2407 = bitcast i32 %52 to float + %2408 = bitcast i32 %52 to float + %2409 = fmul float %2407, %2408 + %2410 = fadd float %2409, 0.000000e+00 + %2411 = bitcast i32 %686 to float + %2412 = bitcast i32 %686 to float + %2413 = fmul float %2411, %2412 + %2414 = fadd float %2410, %2413 + %2415 = call float @llvm.sqrt.f32(float %2414) + %2416 = fneg float %692 + %2417 = fmul float %2415, %2416 + %2418 = fmul float %2417, 0.000000e+00 + %2419 = bitcast i32 %686 to float + %2420 = fadd float %2419, %2418 + %2421 = bitcast i32 %52 to float + %2422 = bitcast i32 %52 to float + %2423 = fmul float %2421, %2422 + %2424 = fadd float %2423, 0.000000e+00 + %2425 = bitcast i32 %686 to float + %2426 = bitcast i32 %686 to float + %2427 = fmul float %2425, %2426 + %2428 = fadd float %2424, %2427 + %2429 = call float @llvm.sqrt.f32(float %2428) + %2430 = fneg float %692 + %2431 = fmul float %2429, %2430 + %2432 = fmul float %2431, 0.000000e+00 + %2433 = bitcast i32 %686 to float + %2434 = fadd float %2433, %2432 + %2435 = fmul float %2420, %2434 + %2436 = fadd float %2406, %2435 + %2437 = call float @llvm.sqrt.f32(float %2436) + %2438 = fadd float %2437, 0.000000e+00 + %2439 = fdiv float %2378, %2438 + %2440 = fmul float %2365, %2439 + %2441 = fneg float %2440 + %2442 = fmul float %2441, %2283 + %2443 = fadd float %2442, 0.000000e+00 + %2444 = bitcast i32 %52 to float + %2445 = bitcast i32 %52 to float + %2446 = fmul float %2444, %2445 + %2447 = fadd float %2446, 0.000000e+00 + %2448 = bitcast i32 %686 to float + %2449 = bitcast i32 %686 to float + %2450 = fmul float %2448, %2449 + %2451 = fadd float %2447, %2450 + %2452 = call float @llvm.sqrt.f32(float %2451) + %2453 = fneg float %692 + %2454 = fmul float %2452, %2453 + %2455 = fmul float %2454, 0.000000e+00 + %2456 = bitcast i32 %686 to float + %2457 = fadd float %2456, %2455 + %2458 = bitcast i32 %52 to float + %2459 = bitcast i32 %52 to float + %2460 = fmul float %2458, %2459 + %2461 = fadd float %2460, 0.000000e+00 + %2462 = bitcast i32 %686 to float + %2463 = bitcast i32 %686 to float + %2464 = fmul float %2462, %2463 + %2465 = fadd float %2461, %2464 + %2466 = call float @llvm.sqrt.f32(float %2465) + %2467 = fneg float %692 + %2468 = fmul float %2466, %2467 + %2469 = bitcast i32 %52 to float + %2470 = fadd float %2469, %2468 + %2471 = bitcast i32 %52 to float + %2472 = bitcast i32 %52 to float + %2473 = fmul float %2471, %2472 + %2474 = fadd float %2473, 0.000000e+00 + %2475 = bitcast i32 %686 to float + %2476 = bitcast i32 %686 to float + %2477 = fmul float %2475, %2476 + %2478 = fadd float %2474, %2477 + %2479 = call float @llvm.sqrt.f32(float %2478) + %2480 = fneg float %692 + %2481 = fmul float %2479, %2480 + %2482 = bitcast i32 %52 to float + %2483 = fadd float %2482, %2481 + %2484 = fmul float %2470, %2483 + %2485 = fadd float %2484, 0.000000e+00 + %2486 = bitcast i32 %52 to float + %2487 = bitcast i32 %52 to float + %2488 = fmul float %2486, %2487 + %2489 = fadd float %2488, 0.000000e+00 + %2490 = bitcast i32 %686 to float + %2491 = bitcast i32 %686 to float + %2492 = fmul float %2490, %2491 + %2493 = fadd float %2489, %2492 + %2494 = call float @llvm.sqrt.f32(float %2493) + %2495 = fneg float %692 + %2496 = fmul float %2494, %2495 + %2497 = fmul float %2496, 0.000000e+00 + %2498 = bitcast i32 %686 to float + %2499 = fadd float %2498, %2497 + %2500 = bitcast i32 %52 to float + %2501 = bitcast i32 %52 to float + %2502 = fmul float %2500, %2501 + %2503 = fadd float %2502, 0.000000e+00 + %2504 = bitcast i32 %686 to float + %2505 = bitcast i32 %686 to float + %2506 = fmul float %2504, %2505 + %2507 = fadd float %2503, %2506 + %2508 = call float @llvm.sqrt.f32(float %2507) + %2509 = fneg float %692 + %2510 = fmul float %2508, %2509 + %2511 = fmul float %2510, 0.000000e+00 + %2512 = bitcast i32 %686 to float + %2513 = fadd float %2512, %2511 + %2514 = fmul float %2499, %2513 + %2515 = fadd float %2485, %2514 + %2516 = call float @llvm.sqrt.f32(float %2515) + %2517 = fadd float %2516, 0.000000e+00 + %2518 = fdiv float %2457, %2517 + %2519 = fmul float %2518, 2.000000e+00 + %2520 = bitcast i32 %52 to float + %2521 = bitcast i32 %52 to float + %2522 = fmul float %2520, %2521 + %2523 = fadd float %2522, 0.000000e+00 + %2524 = bitcast i32 %686 to float + %2525 = bitcast i32 %686 to float + %2526 = fmul float %2524, %2525 + %2527 = fadd float %2523, %2526 + %2528 = call float @llvm.sqrt.f32(float %2527) + %2529 = fneg float %692 + %2530 = fmul float %2528, %2529 + %2531 = fmul float %2530, 0.000000e+00 + %2532 = bitcast i32 %686 to float + %2533 = fadd float %2532, %2531 + %2534 = bitcast i32 %52 to float + %2535 = bitcast i32 %52 to float + %2536 = fmul float %2534, %2535 + %2537 = fadd float %2536, 0.000000e+00 + %2538 = bitcast i32 %686 to float + %2539 = bitcast i32 %686 to float + %2540 = fmul float %2538, %2539 + %2541 = fadd float %2537, %2540 + %2542 = call float @llvm.sqrt.f32(float %2541) + %2543 = fneg float %692 + %2544 = fmul float %2542, %2543 + %2545 = bitcast i32 %52 to float + %2546 = fadd float %2545, %2544 + %2547 = bitcast i32 %52 to float + %2548 = bitcast i32 %52 to float + %2549 = fmul float %2547, %2548 + %2550 = fadd float %2549, 0.000000e+00 + %2551 = bitcast i32 %686 to float + %2552 = bitcast i32 %686 to float + %2553 = fmul float %2551, %2552 + %2554 = fadd float %2550, %2553 + %2555 = call float @llvm.sqrt.f32(float %2554) + %2556 = fneg float %692 + %2557 = fmul float %2555, %2556 + %2558 = bitcast i32 %52 to float + %2559 = fadd float %2558, %2557 + %2560 = fmul float %2546, %2559 + %2561 = fadd float %2560, 0.000000e+00 + %2562 = bitcast i32 %52 to float + %2563 = bitcast i32 %52 to float + %2564 = fmul float %2562, %2563 + %2565 = fadd float %2564, 0.000000e+00 + %2566 = bitcast i32 %686 to float + %2567 = bitcast i32 %686 to float + %2568 = fmul float %2566, %2567 + %2569 = fadd float %2565, %2568 + %2570 = call float @llvm.sqrt.f32(float %2569) + %2571 = fneg float %692 + %2572 = fmul float %2570, %2571 + %2573 = fmul float %2572, 0.000000e+00 + %2574 = bitcast i32 %686 to float + %2575 = fadd float %2574, %2573 + %2576 = bitcast i32 %52 to float + %2577 = bitcast i32 %52 to float + %2578 = fmul float %2576, %2577 + %2579 = fadd float %2578, 0.000000e+00 + %2580 = bitcast i32 %686 to float + %2581 = bitcast i32 %686 to float + %2582 = fmul float %2580, %2581 + %2583 = fadd float %2579, %2582 + %2584 = call float @llvm.sqrt.f32(float %2583) + %2585 = fneg float %692 + %2586 = fmul float %2584, %2585 + %2587 = fmul float %2586, 0.000000e+00 + %2588 = bitcast i32 %686 to float + %2589 = fadd float %2588, %2587 + %2590 = fmul float %2575, %2589 + %2591 = fadd float %2561, %2590 + %2592 = call float @llvm.sqrt.f32(float %2591) + %2593 = fadd float %2592, 0.000000e+00 + %2594 = fdiv float %2533, %2593 + %2595 = fmul float %2519, %2594 + %2596 = fsub float 1.000000e+00, %2595 + %2597 = load float, float* %44, align 4 + %2598 = fmul float %2596, %2597 + %2599 = fadd float %2443, %2598 + %2600 = insertelement <4 x float> zeroinitializer, float %2599, i32 0 + %2601 = insertelement <4 x float> %2600, float 0.000000e+00, i32 1 + %2602 = insertelement <4 x float> %2601, float 0.000000e+00, i32 2 + %2603 = insertelement <4 x float> %2602, float 0.000000e+00, i32 3 + %2604 = extractelement <4 x float> %2603, i32 0 + store float %2604, float* %2126, align 4 + %2605 = getelementptr float, float* %1, i32 0 + %2606 = getelementptr inbounds float, float* %2605, i64 2 + %2607 = bitcast float* %2606 to i32* + %2608 = load i32, i32* %2607, align 4 + %2609 = bitcast i32 %2608 to float + %2610 = insertelement <4 x float> zeroinitializer, float %2609, i32 0 + %2611 = getelementptr float, float* %1, i32 0 + %2612 = getelementptr inbounds float, float* %2611, i64 1 + %2613 = bitcast float* %2612 to i32* + %2614 = load i32, i32* %2613, align 4 + %2615 = bitcast i32 %2614 to float + %2616 = insertelement <4 x float> %2610, float %2615, i32 1 + %2617 = insertelement <4 x float> %2616, float 0.000000e+00, i32 2 + %2618 = insertelement <4 x float> %2617, float 0.000000e+00, i32 3 + %2619 = extractelement <4 x float> %2618, i32 0 + %2620 = bitcast i32* %2613 to float* + store float %2619, float* %2620, align 4 + %2621 = extractelement <4 x float> %2618, i32 1 + %2622 = bitcast i32* %2607 to float* + store float %2621, float* %2622, align 4 + ret void +} + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { +.preheader13: + %3 = bitcast float* %2 to i8* + %4 = bitcast float* %0 to i8* + %5 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false, i1 true, i1 false) + %6 = call i8* @__memcpy_chk(i8* %3, i8* %4, i64 16, i64 %5) #9 + %7 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 + %8 = bitcast i8* %7 to float* + store float 1.000000e+00, float* %8, align 4 + %9 = getelementptr inbounds i8, i8* %7, i64 8 + %10 = getelementptr inbounds i8, i8* %7, i64 12 + %11 = bitcast i8* %10 to float* + store float 1.000000e+00, float* %11, align 4 + %12 = bitcast float* %1 to i8* + %13 = call i64 @llvm.objectsize.i64.p0i8(i8* %12, i1 false, i1 true, i1 false) + %14 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 + %15 = bitcast i8* %14 to float* + %16 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 + %17 = bitcast i8* %16 to float* + %18 = bitcast float* %2 to i32* + %19 = load i32, i32* %18, align 4 + %20 = bitcast i8* %14 to i32* + store i32 %19, i32* %20, align 4 + %21 = bitcast i8* %7 to i32* + %22 = load i32, i32* %21, align 4 + %23 = bitcast i8* %16 to i32* + store i32 %22, i32* %23, align 4 + %24 = getelementptr inbounds float, float* %2, i64 2 + %25 = bitcast float* %24 to i32* + %26 = load i32, i32* %25, align 4 + %27 = getelementptr inbounds i8, i8* %14, i64 4 + %28 = bitcast i8* %27 to i32* + store i32 %26, i32* %28, align 4 + %29 = bitcast i8* %9 to i32* + %30 = load i32, i32* %29, align 4 + %31 = getelementptr inbounds i8, i8* %16, i64 4 + %32 = bitcast i8* %31 to i32* + store i32 %30, i32* %32, align 4 + %33 = load float, float* %15, align 4 + %34 = call float @no_opt_sgn(float %33) + %35 = fneg float %34 + %36 = call float @no_opt_naive_norm(float* nonnull %15, i32 2) + %37 = fmul float %36, %35 + %38 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 + %39 = bitcast i8* %38 to float* + %40 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 + %41 = load float, float* %15, align 4 + %42 = load float, float* %17, align 4 + %43 = fmul float %37, %42 + %44 = fadd float %41, %43 + store float %44, float* %39, align 4 + %45 = bitcast i8* %27 to float* + %46 = load float, float* %45, align 4 + %47 = bitcast i8* %31 to float* + %48 = load float, float* %47, align 4 + %49 = fmul float %37, %48 + %50 = fadd float %46, %49 + %51 = getelementptr inbounds i8, i8* %38, i64 4 + %52 = bitcast i8* %51 to float* + store float %50, float* %52, align 4 + %53 = bitcast i8* %40 to float* + %54 = call float @no_opt_naive_norm(float* nonnull %39, i32 2) + %55 = fadd float %54, 0x3EE4F8B580000000 + %56 = load float, float* %39, align 4 + %57 = fdiv float %56, %55 + store float %57, float* %53, align 4 + %58 = load float, float* %52, align 4 + %59 = fdiv float %58, %55 + %60 = getelementptr inbounds i8, i8* %40, i64 4 + %61 = bitcast i8* %60 to float* + store float %59, float* %61, align 4 + %62 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 + %63 = bitcast i8* %62 to float* + %64 = load float, float* %53, align 4 + %65 = fmul float %64, 2.000000e+00 + %66 = fmul float %65, %64 + %67 = fsub float 1.000000e+00, %66 + store float %67, float* %63, align 4 + %68 = load float, float* %53, align 4 + %69 = fmul float %68, 2.000000e+00 + %70 = load float, float* %61, align 4 + %71 = fmul float %69, %70 + %72 = fsub float 0.000000e+00, %71 + %73 = getelementptr inbounds i8, i8* %62, i64 4 + %74 = bitcast i8* %73 to float* + store float %72, float* %74, align 4 + %75 = load float, float* %61, align 4 + %76 = fmul float %75, 2.000000e+00 + %77 = load float, float* %53, align 4 + %78 = fmul float %76, %77 + %79 = fsub float 0.000000e+00, %78 + %80 = getelementptr inbounds i8, i8* %62, i64 8 + %81 = bitcast i8* %80 to float* + store float %79, float* %81, align 4 + %82 = load float, float* %61, align 4 + %83 = fmul float %82, 2.000000e+00 + %84 = fmul float %83, %82 + %85 = fsub float 1.000000e+00, %84 + %86 = getelementptr inbounds i8, i8* %62, i64 12 + %87 = bitcast i8* %86 to float* + store float %85, float* %87, align 4 + %88 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 + %89 = bitcast i8* %88 to float* + %90 = bitcast i8* %62 to i32* + %91 = load i32, i32* %90, align 4 + %92 = bitcast i8* %88 to i32* + store i32 %91, i32* %92, align 4 + %93 = bitcast i8* %73 to i32* + %94 = load i32, i32* %93, align 4 + %95 = getelementptr inbounds i8, i8* %88, i64 4 + %96 = bitcast i8* %95 to i32* + store i32 %94, i32* %96, align 4 + %97 = bitcast i8* %80 to i32* + %98 = load i32, i32* %97, align 4 + %99 = getelementptr inbounds i8, i8* %88, i64 8 + %100 = bitcast i8* %99 to i32* + store i32 %98, i32* %100, align 4 + %101 = bitcast i8* %86 to i32* + %102 = load i32, i32* %101, align 4 + %103 = getelementptr inbounds i8, i8* %88, i64 12 + %104 = bitcast i8* %103 to i32* + store i32 %102, i32* %104, align 4 + %105 = call i8* @__memcpy_chk(i8* %12, i8* %88, i64 16, i64 %13) #9 + call void @no_opt_naive_fixed_matrix_multiply(float* %89, float* %0, float* %2) + call void @free(i8* %14) + call void @free(i8* %16) + call void @free(i8* %38) + call void @free(i8* %40) + call void @free(i8* %62) + call void @free(i8* %88) + call void @no_opt_naive_fixed_transpose(float* %1) + ret void +} + +; Function Attrs: nounwind +declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 + +; Function Attrs: nounwind readnone speculatable willreturn +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 + +; Function Attrs: allocsize(0,1) +declare i8* @calloc(i64, i64) #5 + +declare void @free(i8*) #6 + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #1 { +.preheader6: + %0 = alloca i64, align 8 + %1 = alloca [4 x float], align 16 + %2 = alloca [4 x float], align 16 + %3 = alloca [4 x float], align 16 + %4 = alloca [4 x float], align 16 + %5 = alloca [4 x float], align 16 + %6 = call i64 @time(i64* null) #9 + store i64 %6, i64* %0, align 8 + %7 = call i64 @time(i64* nonnull %0) #9 + %8 = trunc i64 %7 to i32 + call void @srand(i32 %8) #9 + %9 = call i32 @rand() #9 + %10 = sitofp i32 %9 to float + %11 = fdiv float %10, 0x41747AE140000000 + %12 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 + store float %11, float* %12, align 16 + %13 = call i32 @rand() #9 + %14 = sitofp i32 %13 to float + %15 = fdiv float %14, 0x41747AE140000000 + %16 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1 + store float %15, float* %16, align 4 + %17 = call i32 @rand() #9 + %18 = sitofp i32 %17 to float + %19 = fdiv float %18, 0x41747AE140000000 + %20 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2 + store float %19, float* %20, align 8 + %21 = call i32 @rand() #9 + %22 = sitofp i32 %21 to float + %23 = fdiv float %22, 0x41747AE140000000 + %24 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3 + store float %23, float* %24, align 4 + %25 = bitcast [4 x float]* %2 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %25, i8 0, i64 16, i1 false) + %26 = bitcast [4 x float]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %26, i8 0, i64 16, i1 false) + %27 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 + %28 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 + call void @naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %27, float* nonnull %28) + %29 = bitcast [4 x float]* %4 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %29, i8 0, i64 16, i1 false) + %30 = bitcast [4 x float]* %5 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %30, i8 0, i64 16, i1 false) + %31 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 + %32 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 + call void @no_opt_naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %31, float* nonnull %32) + %33 = load float, float* %27, align 16 + %34 = fpext float %33 to double + %35 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %34) #9 + %36 = load float, float* %31, align 16 + %37 = fpext float %36 to double + %38 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %37) #9 + %39 = load float, float* %31, align 16 + %40 = load float, float* %27, align 16 + %41 = fsub float %39, %40 + %42 = call float @llvm.fabs.f32(float %41) + %43 = fcmp uge float %42, 0x3FB99999A0000000 + br i1 %43, label %58, label %44 + +44: ; preds = %.preheader6 + %45 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1 + %46 = load float, float* %45, align 4 + %47 = fpext float %46 to double + %48 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %47) #9 + %49 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 + %50 = load float, float* %49, align 4 + %51 = fpext float %50 to double + %52 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %51) #9 + %53 = load float, float* %31, align 16 + %54 = load float, float* %27, align 16 + %55 = fsub float %53, %54 + %56 = call float @llvm.fabs.f32(float %55) + %57 = fcmp uge float %56, 0x3FB99999A0000000 + br i1 %57, label %58, label %.preheader6.1 + +58: ; preds = %115, %.preheader6.1, %44, %.preheader6 + call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.2, i64 0, i64 0), i32 300, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.3, i64 0, i64 0)) #11 + unreachable + +59: ; preds = %.preheader5 + %60 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1 + %61 = load float, float* %60, align 4 + %62 = fpext float %61 to double + %63 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %62) #9 + %64 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 1 + %65 = load float, float* %64, align 4 + %66 = fpext float %65 to double + %67 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %66) #9 + %68 = load float, float* %32, align 16 + %69 = load float, float* %28, align 16 + %70 = fsub float %68, %69 + %71 = call float @llvm.fabs.f32(float %70) + %72 = fcmp uge float %71, 0x3FB99999A0000000 + br i1 %72, label %73, label %.preheader.1 + +73: ; preds = %.preheader5, %87, %.preheader.1, %59 + call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.2, i64 0, i64 0), i32 307, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.6, i64 0, i64 0)) #11 + unreachable + +.preheader.1: ; preds = %59 + %74 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2 + %75 = load float, float* %74, align 8 + %76 = fpext float %75 to double + %77 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %76) #9 + %78 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 2 + %79 = load float, float* %78, align 8 + %80 = fpext float %79 to double + %81 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %80) #9 + %82 = load float, float* %64, align 4 + %83 = load float, float* %60, align 4 + %84 = fsub float %82, %83 + %85 = call float @llvm.fabs.f32(float %84) + %86 = fcmp uge float %85, 0x3FB99999A0000000 + br i1 %86, label %73, label %87 + +87: ; preds = %.preheader.1 + %88 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3 + %89 = load float, float* %88, align 4 + %90 = fpext float %89 to double + %91 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %90) #9 + %92 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 3 + %93 = load float, float* %92, align 4 + %94 = fpext float %93 to double + %95 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %94) #9 + %96 = load float, float* %64, align 4 + %97 = load float, float* %60, align 4 + %98 = fsub float %96, %97 + %99 = call float @llvm.fabs.f32(float %98) + %100 = fcmp uge float %99, 0x3FB99999A0000000 + br i1 %100, label %73, label %101 + +101: ; preds = %87 + ret i32 0 + +.preheader6.1: ; preds = %44 + %102 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2 + %103 = load float, float* %102, align 8 + %104 = fpext float %103 to double + %105 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %104) #9 + %106 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 + %107 = load float, float* %106, align 8 + %108 = fpext float %107 to double + %109 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %108) #9 + %110 = load float, float* %49, align 4 + %111 = load float, float* %45, align 4 + %112 = fsub float %110, %111 + %113 = call float @llvm.fabs.f32(float %112) + %114 = fcmp uge float %113, 0x3FB99999A0000000 + br i1 %114, label %58, label %115 + +115: ; preds = %.preheader6.1 + %116 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3 + %117 = load float, float* %116, align 4 + %118 = fpext float %117 to double + %119 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %118) #9 + %120 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 + %121 = load float, float* %120, align 4 + %122 = fpext float %121 to double + %123 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %122) #9 + %124 = load float, float* %49, align 4 + %125 = load float, float* %45, align 4 + %126 = fsub float %124, %125 + %127 = call float @llvm.fabs.f32(float %126) + %128 = fcmp uge float %127, 0x3FB99999A0000000 + br i1 %128, label %58, label %.preheader5 + +.preheader5: ; preds = %115 + %129 = load float, float* %28, align 16 + %130 = fpext float %129 to double + %131 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %130) #9 + %132 = load float, float* %32, align 16 + %133 = fpext float %132 to double + %134 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %133) #9 + %135 = load float, float* %32, align 16 + %136 = load float, float* %28, align 16 + %137 = fsub float %135, %136 + %138 = call float @llvm.fabs.f32(float %137) + %139 = fcmp uge float %138, 0x3FB99999A0000000 + br i1 %139, label %73, label %59 +} + +declare i64 @time(i64*) #6 + +declare void @srand(i32) #6 + +declare i32 @rand() #6 + +declare i32 @printf(i8*, ...) #6 + +; Function Attrs: nounwind readnone speculatable willreturn +declare double @llvm.fabs.f64(double) #2 + +; Function Attrs: noreturn +declare void @__assert_rtn(i8*, i8*, i32, i8*) #7 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #8 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.fabs.f32(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #2 + +attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone speculatable willreturn } +attributes #3 = { argmemonly nounwind willreturn writeonly } +attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #7 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #8 = { argmemonly nounwind willreturn } +attributes #9 = { nounwind } +attributes #10 = { nounwind allocsize(0,1) } +attributes #11 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} +!3 = distinct !{!3, !4} +!4 = !{!"llvm.loop.unroll.disable"} +!5 = distinct !{!5, !4} diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/flaky-diospyros.ll b/src/dios-egraphs/Diospyros/flaky-outputs/flaky-diospyros.ll new file mode 100644 index 00000000..00e98758 --- /dev/null +++ b/src/dios-egraphs/Diospyros/flaky-outputs/flaky-diospyros.ll @@ -0,0 +1,4260 @@ +; ModuleID = 'build/aa.ll' +source_filename = "fail-tests/qr-decomp-local-arrays.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@.str = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 +@.str.1 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 +@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 +@.str.2 = private unnamed_addr constant [36 x i8] c"fail-tests/qr-decomp-local-arrays.c\00", align 1 +@.str.3 = private unnamed_addr constant [34 x i8] c"fabs(expectedQ[i] - Q[i]) < DELTA\00", align 1 +@.str.4 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 +@.str.5 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 +@.str.6 = private unnamed_addr constant [34 x i8] c"fabs(expectedR[i] - R[i]) < DELTA\00", align 1 + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @sgn(float %0) #0 { + %2 = fcmp ogt float %0, 0.000000e+00 + %3 = zext i1 %2 to i32 + %4 = fcmp olt float %0, 0.000000e+00 + %.neg = sext i1 %4 to i32 + %5 = add nsw i32 %.neg, %3 + %6 = sitofp i32 %5 to float + ret float %6 +} + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_sgn(float %0) #1 { + %2 = fcmp ogt float %0, 0.000000e+00 + %3 = zext i1 %2 to i32 + %4 = fcmp olt float %0, 0.000000e+00 + %.neg = sext i1 %4 to i32 + %5 = add nsw i32 %.neg, %3 + %6 = sitofp i32 %5 to float + ret float %6 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @naive_norm(float* %0, i32 %1) #0 { + %3 = icmp sgt i32 %1, 0 + %smax = select i1 %3, i32 %1, i32 0 + %wide.trip.count = zext i32 %smax to i64 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %2 + %4 = add nsw i64 %wide.trip.count, -1 + %xtraiter = and i64 %wide.trip.count, 3 + %5 = icmp ult i64 %4, 3 + br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count, 2147483644 + br label %6 + +6: ; preds = %6, %.lr.ph.new + %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] + %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 + %8 = load float, float* %7, align 4 + %9 = fmul float %8, %8 + %10 = fadd float %.013, %9 + %indvars.iv.next = or i64 %indvars.iv2, 1 + %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next + %12 = load float, float* %11, align 4 + %13 = fmul float %12, %12 + %14 = fadd float %10, %13 + %indvars.iv.next.1 = or i64 %indvars.iv2, 2 + %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 + %16 = load float, float* %15, align 4 + %17 = fmul float %16, %16 + %18 = fadd float %14, %17 + %indvars.iv.next.2 = or i64 %indvars.iv2, 3 + %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 + %20 = load float, float* %19, align 4 + %21 = fmul float %20, %20 + %22 = fadd float %18, %21 + %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 + %niter.nsub.3 = add i64 %niter, -4 + %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 + br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 + +._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] + %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] + %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] + %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil + %24 = load float, float* %23, align 4 + %25 = fmul float %24, %24 + %26 = fadd float %.013.epil, %25 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !3 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 + %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] + %27 = call float @llvm.sqrt.f32(float %.01.lcssa) + ret float %27 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32(float) #2 + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_naive_norm(float* %0, i32 %1) #1 { + %3 = icmp sgt i32 %1, 0 + %smax = select i1 %3, i32 %1, i32 0 + %wide.trip.count = zext i32 %smax to i64 + br i1 %3, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %2 + %4 = add nsw i64 %wide.trip.count, -1 + %xtraiter = and i64 %wide.trip.count, 3 + %5 = icmp ult i64 %4, 3 + br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count, 2147483644 + br label %6 + +6: ; preds = %6, %.lr.ph.new + %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] + %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 + %8 = load float, float* %7, align 4 + %9 = fmul float %8, %8 + %10 = fadd float %.013, %9 + %indvars.iv.next = or i64 %indvars.iv2, 1 + %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next + %12 = load float, float* %11, align 4 + %13 = fmul float %12, %12 + %14 = fadd float %10, %13 + %indvars.iv.next.1 = or i64 %indvars.iv2, 2 + %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 + %16 = load float, float* %15, align 4 + %17 = fmul float %16, %16 + %18 = fadd float %14, %17 + %indvars.iv.next.2 = or i64 %indvars.iv2, 3 + %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 + %20 = load float, float* %19, align 4 + %21 = fmul float %20, %20 + %22 = fadd float %18, %21 + %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 + %niter.nsub.3 = add i64 %niter, -4 + %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 + br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 + +._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] + %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] + %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] + %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil + %24 = load float, float* %23, align 4 + %25 = fmul float %24, %24 + %26 = fadd float %.013.epil, %25 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 + %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] + %27 = call float @llvm.sqrt.f32(float %.01.lcssa) + ret float %27 +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_transpose(float* %0) #0 { +.lr.ph: + %1 = getelementptr inbounds float, float* %0, i64 1 + %2 = bitcast float* %1 to i32* + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds float, float* %0, i64 2 + %5 = bitcast float* %4 to i32* + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %2, align 4 + store i32 %3, i32* %5, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_transpose(float* %0) #1 { +.lr.ph: + %1 = getelementptr inbounds float, float* %0, i64 1 + %2 = bitcast float* %1 to i32* + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds float, float* %0, i64 2 + %5 = bitcast float* %4 to i32* + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %2, align 4 + store i32 %3, i32* %5, align 4 + ret void +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { +.preheader: + %3 = load float, float* %0, align 4 + %4 = load float, float* %1, align 4 + %5 = fmul float %3, %4 + %6 = fadd float %5, 0.000000e+00 + %7 = getelementptr inbounds float, float* %0, i64 1 + %8 = load float, float* %7, align 4 + %9 = getelementptr inbounds float, float* %1, i64 2 + %10 = load float, float* %9, align 4 + %11 = fmul float %8, %10 + %12 = fadd float %6, %11 + %13 = getelementptr inbounds float, float* %2, i64 1 + %14 = load float, float* %0, align 4 + %15 = getelementptr inbounds float, float* %1, i64 1 + %16 = load float, float* %15, align 4 + %17 = fmul float %14, %16 + %18 = fadd float %17, 0.000000e+00 + %19 = load float, float* %7, align 4 + %20 = getelementptr inbounds float, float* %1, i64 3 + %21 = load float, float* %20, align 4 + %22 = fmul float %19, %21 + %23 = fadd float %18, %22 + %24 = getelementptr inbounds float, float* %0, i64 2 + %25 = getelementptr inbounds float, float* %2, i64 2 + %26 = load float, float* %24, align 4 + %27 = load float, float* %1, align 4 + %28 = fmul float %26, %27 + %29 = fadd float %28, 0.000000e+00 + %30 = getelementptr inbounds float, float* %0, i64 3 + %31 = load float, float* %30, align 4 + %32 = load float, float* %9, align 4 + %33 = fmul float %31, %32 + %34 = fadd float %29, %33 + %35 = getelementptr inbounds float, float* %2, i64 3 + %36 = load float, float* %24, align 4 + %37 = load float, float* %15, align 4 + %38 = fmul float %36, %37 + %39 = fadd float %38, 0.000000e+00 + %40 = load float, float* %30, align 4 + %41 = load float, float* %20, align 4 + %42 = fmul float %40, %41 + %43 = fadd float %39, %42 + store float 0.000000e+00, float* %2, align 4 + %44 = getelementptr float, float* %0, i32 0 + %45 = load float, float* %44, align 4 + %46 = insertelement <4 x float> zeroinitializer, float %45, i32 0 + %47 = insertelement <4 x float> %46, float 0.000000e+00, i32 1 + %48 = insertelement <4 x float> %47, float 0.000000e+00, i32 2 + %49 = insertelement <4 x float> %48, float 0.000000e+00, i32 3 + %50 = getelementptr float, float* %1, i32 0 + %51 = load float, float* %50, align 4 + %52 = insertelement <4 x float> zeroinitializer, float %51, i32 0 + %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 1 + %54 = insertelement <4 x float> %53, float 0.000000e+00, i32 2 + %55 = insertelement <4 x float> %54, float 0.000000e+00, i32 3 + %56 = call <4 x float> @llvm.fma.f32(<4 x float> %49, <4 x float> %55, <4 x float> zeroinitializer) + %57 = extractelement <4 x float> %56, i32 0 + store float %57, float* %2, align 4 + %58 = insertelement <4 x float> zeroinitializer, float %45, i32 0 + %59 = insertelement <4 x float> %58, float 1.000000e+00, i32 1 + %60 = insertelement <4 x float> %59, float 1.000000e+00, i32 2 + %61 = insertelement <4 x float> %60, float 1.000000e+00, i32 3 + %62 = getelementptr float, float* %1, i32 0 + %63 = load float, float* %62, align 4 + %64 = insertelement <4 x float> zeroinitializer, float %63, i32 0 + %65 = insertelement <4 x float> %64, float 0.000000e+00, i32 1 + %66 = insertelement <4 x float> %65, float 0.000000e+00, i32 2 + %67 = insertelement <4 x float> %66, float 0.000000e+00, i32 3 + %68 = fmul <4 x float> %61, %67 + %69 = fadd <4 x float> %68, zeroinitializer + %70 = getelementptr float, float* %0, i32 0 + %71 = getelementptr inbounds float, float* %70, i64 1 + %72 = load float, float* %71, align 4 + %73 = insertelement <4 x float> zeroinitializer, float %72, i32 0 + %74 = insertelement <4 x float> %73, float 0.000000e+00, i32 1 + %75 = insertelement <4 x float> %74, float 0.000000e+00, i32 2 + %76 = insertelement <4 x float> %75, float 0.000000e+00, i32 3 + %77 = getelementptr float, float* %1, i32 0 + %78 = getelementptr inbounds float, float* %77, i64 2 + %79 = load float, float* %78, align 4 + %80 = insertelement <4 x float> zeroinitializer, float %79, i32 0 + %81 = insertelement <4 x float> %80, float 0.000000e+00, i32 1 + %82 = insertelement <4 x float> %81, float 0.000000e+00, i32 2 + %83 = insertelement <4 x float> %82, float 0.000000e+00, i32 3 + %84 = call <4 x float> @llvm.fma.f32.1(<4 x float> %76, <4 x float> %83, <4 x float> %69) + %85 = extractelement <4 x float> %84, i32 0 + store float %85, float* %2, align 4 + %86 = extractelement <4 x float> %84, i32 1 + %87 = getelementptr float, float* %2, i32 0 + %88 = getelementptr inbounds float, float* %87, i64 1 + store float %86, float* %88, align 4 + %89 = getelementptr float, float* %0, i32 0 + %90 = load float, float* %89, align 4 + %91 = insertelement <4 x float> zeroinitializer, float %90, i32 0 + %92 = insertelement <4 x float> %91, float 0.000000e+00, i32 1 + %93 = insertelement <4 x float> %92, float 0.000000e+00, i32 2 + %94 = insertelement <4 x float> %93, float 0.000000e+00, i32 3 + %95 = getelementptr float, float* %1, i32 0 + %96 = getelementptr inbounds float, float* %95, i64 1 + %97 = load float, float* %96, align 4 + %98 = insertelement <4 x float> zeroinitializer, float %97, i32 0 + %99 = insertelement <4 x float> %98, float 0.000000e+00, i32 1 + %100 = insertelement <4 x float> %99, float 0.000000e+00, i32 2 + %101 = insertelement <4 x float> %100, float 0.000000e+00, i32 3 + %102 = call <4 x float> @llvm.fma.f32.2(<4 x float> %94, <4 x float> %101, <4 x float> zeroinitializer) + %103 = extractelement <4 x float> %102, i32 0 + %104 = getelementptr float, float* %2, i32 0 + %105 = getelementptr inbounds float, float* %104, i64 1 + store float %103, float* %105, align 4 + %106 = insertelement <4 x float> zeroinitializer, float %90, i32 0 + %107 = insertelement <4 x float> %106, float 1.000000e+00, i32 1 + %108 = insertelement <4 x float> %107, float 1.000000e+00, i32 2 + %109 = insertelement <4 x float> %108, float 1.000000e+00, i32 3 + %110 = load float, float* %96, align 4 + %111 = insertelement <4 x float> zeroinitializer, float %110, i32 0 + %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 1 + %113 = insertelement <4 x float> %112, float 0.000000e+00, i32 2 + %114 = insertelement <4 x float> %113, float 0.000000e+00, i32 3 + %115 = fmul <4 x float> %109, %114 + %116 = fadd <4 x float> %115, zeroinitializer + %117 = getelementptr float, float* %0, i32 0 + %118 = getelementptr inbounds float, float* %117, i64 1 + %119 = load float, float* %118, align 4 + %120 = insertelement <4 x float> zeroinitializer, float %119, i32 0 + %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 1 + %122 = insertelement <4 x float> %121, float 0.000000e+00, i32 2 + %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3 + %124 = getelementptr float, float* %1, i32 0 + %125 = getelementptr inbounds float, float* %124, i64 3 + %126 = load float, float* %125, align 4 + %127 = insertelement <4 x float> zeroinitializer, float %126, i32 0 + %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 1 + %129 = insertelement <4 x float> %128, float 0.000000e+00, i32 2 + %130 = insertelement <4 x float> %129, float 0.000000e+00, i32 3 + %131 = call <4 x float> @llvm.fma.f32.3(<4 x float> %123, <4 x float> %130, <4 x float> %116) + %132 = extractelement <4 x float> %131, i32 0 + %133 = getelementptr float, float* %2, i32 0 + %134 = getelementptr inbounds float, float* %133, i64 1 + store float %132, float* %134, align 4 + %135 = extractelement <4 x float> %131, i32 1 + %136 = getelementptr float, float* %2, i32 0 + %137 = getelementptr inbounds float, float* %136, i64 2 + store float %135, float* %137, align 4 + %138 = getelementptr float, float* %0, i32 0 + %139 = getelementptr inbounds float, float* %138, i64 2 + %140 = load float, float* %139, align 4 + %141 = insertelement <4 x float> zeroinitializer, float %140, i32 0 + %142 = insertelement <4 x float> %141, float 0.000000e+00, i32 1 + %143 = insertelement <4 x float> %142, float 0.000000e+00, i32 2 + %144 = insertelement <4 x float> %143, float 0.000000e+00, i32 3 + %145 = getelementptr float, float* %1, i32 0 + %146 = load float, float* %145, align 4 + %147 = insertelement <4 x float> zeroinitializer, float %146, i32 0 + %148 = insertelement <4 x float> %147, float 0.000000e+00, i32 1 + %149 = insertelement <4 x float> %148, float 0.000000e+00, i32 2 + %150 = insertelement <4 x float> %149, float 0.000000e+00, i32 3 + %151 = call <4 x float> @llvm.fma.f32.4(<4 x float> %144, <4 x float> %150, <4 x float> zeroinitializer) + %152 = extractelement <4 x float> %151, i32 0 + %153 = getelementptr float, float* %2, i32 0 + %154 = getelementptr inbounds float, float* %153, i64 2 + store float %152, float* %154, align 4 + %155 = insertelement <4 x float> zeroinitializer, float %140, i32 0 + %156 = insertelement <4 x float> %155, float 1.000000e+00, i32 1 + %157 = insertelement <4 x float> %156, float 1.000000e+00, i32 2 + %158 = insertelement <4 x float> %157, float 1.000000e+00, i32 3 + %159 = insertelement <4 x float> zeroinitializer, float %146, i32 0 + %160 = insertelement <4 x float> %159, float 0.000000e+00, i32 1 + %161 = insertelement <4 x float> %160, float 0.000000e+00, i32 2 + %162 = insertelement <4 x float> %161, float 0.000000e+00, i32 3 + %163 = fmul <4 x float> %158, %162 + %164 = fadd <4 x float> %163, zeroinitializer + %165 = getelementptr float, float* %0, i32 0 + %166 = getelementptr inbounds float, float* %165, i64 3 + %167 = load float, float* %166, align 4 + %168 = insertelement <4 x float> zeroinitializer, float %167, i32 0 + %169 = insertelement <4 x float> %168, float 0.000000e+00, i32 1 + %170 = insertelement <4 x float> %169, float 0.000000e+00, i32 2 + %171 = insertelement <4 x float> %170, float 0.000000e+00, i32 3 + %172 = load float, float* %78, align 4 + %173 = insertelement <4 x float> zeroinitializer, float %172, i32 0 + %174 = insertelement <4 x float> %173, float 0.000000e+00, i32 1 + %175 = insertelement <4 x float> %174, float 0.000000e+00, i32 2 + %176 = insertelement <4 x float> %175, float 0.000000e+00, i32 3 + %177 = call <4 x float> @llvm.fma.f32.5(<4 x float> %171, <4 x float> %176, <4 x float> %164) + %178 = extractelement <4 x float> %177, i32 0 + store float %178, float* %154, align 4 + %179 = extractelement <4 x float> %177, i32 1 + %180 = getelementptr float, float* %2, i32 0 + %181 = getelementptr inbounds float, float* %180, i64 3 + store float %179, float* %181, align 4 + %182 = load float, float* %139, align 4 + %183 = insertelement <4 x float> zeroinitializer, float %182, i32 0 + %184 = insertelement <4 x float> %183, float 0.000000e+00, i32 1 + %185 = insertelement <4 x float> %184, float 0.000000e+00, i32 2 + %186 = insertelement <4 x float> %185, float 0.000000e+00, i32 3 + %187 = load float, float* %96, align 4 + %188 = insertelement <4 x float> zeroinitializer, float %187, i32 0 + %189 = insertelement <4 x float> %188, float 0.000000e+00, i32 1 + %190 = insertelement <4 x float> %189, float 0.000000e+00, i32 2 + %191 = insertelement <4 x float> %190, float 0.000000e+00, i32 3 + %192 = call <4 x float> @llvm.fma.f32.6(<4 x float> %186, <4 x float> %191, <4 x float> zeroinitializer) + %193 = extractelement <4 x float> %192, i32 0 + store float %193, float* %181, align 4 + %194 = insertelement <4 x float> zeroinitializer, float %182, i32 0 + %195 = insertelement <4 x float> %194, float 1.000000e+00, i32 1 + %196 = insertelement <4 x float> %195, float 1.000000e+00, i32 2 + %197 = insertelement <4 x float> %196, float 1.000000e+00, i32 3 + %198 = insertelement <4 x float> zeroinitializer, float %187, i32 0 + %199 = insertelement <4 x float> %198, float 0.000000e+00, i32 1 + %200 = insertelement <4 x float> %199, float 0.000000e+00, i32 2 + %201 = insertelement <4 x float> %200, float 0.000000e+00, i32 3 + %202 = fmul <4 x float> %197, %201 + %203 = fadd <4 x float> %202, zeroinitializer + %204 = getelementptr float, float* %0, i32 0 + %205 = getelementptr inbounds float, float* %204, i64 3 + %206 = load float, float* %205, align 4 + %207 = insertelement <4 x float> zeroinitializer, float %206, i32 0 + %208 = insertelement <4 x float> %207, float 0.000000e+00, i32 1 + %209 = insertelement <4 x float> %208, float 0.000000e+00, i32 2 + %210 = insertelement <4 x float> %209, float 0.000000e+00, i32 3 + %211 = load float, float* %125, align 4 + %212 = insertelement <4 x float> zeroinitializer, float %211, i32 0 + %213 = insertelement <4 x float> %212, float 0.000000e+00, i32 1 + %214 = insertelement <4 x float> %213, float 0.000000e+00, i32 2 + %215 = insertelement <4 x float> %214, float 0.000000e+00, i32 3 + %216 = call <4 x float> @llvm.fma.f32.7(<4 x float> %210, <4 x float> %215, <4 x float> %203) + %217 = extractelement <4 x float> %216, i32 0 + store float %217, float* %181, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { +.preheader: + store float 0.000000e+00, float* %2, align 4 + %3 = load float, float* %0, align 4 + %4 = load float, float* %1, align 4 + %5 = fmul float %3, %4 + %6 = fadd float %5, 0.000000e+00 + store float %6, float* %2, align 4 + %7 = getelementptr inbounds float, float* %0, i64 1 + %8 = load float, float* %7, align 4 + %9 = getelementptr inbounds float, float* %1, i64 2 + %10 = load float, float* %9, align 4 + %11 = fmul float %8, %10 + %12 = fadd float %6, %11 + store float %12, float* %2, align 4 + %13 = getelementptr inbounds float, float* %2, i64 1 + store float 0.000000e+00, float* %13, align 4 + %14 = load float, float* %0, align 4 + %15 = getelementptr inbounds float, float* %1, i64 1 + %16 = load float, float* %15, align 4 + %17 = fmul float %14, %16 + %18 = fadd float %17, 0.000000e+00 + store float %18, float* %13, align 4 + %19 = load float, float* %7, align 4 + %20 = getelementptr inbounds float, float* %1, i64 3 + %21 = load float, float* %20, align 4 + %22 = fmul float %19, %21 + %23 = fadd float %18, %22 + store float %23, float* %13, align 4 + %24 = getelementptr inbounds float, float* %0, i64 2 + %25 = getelementptr inbounds float, float* %2, i64 2 + store float 0.000000e+00, float* %25, align 4 + %26 = load float, float* %24, align 4 + %27 = load float, float* %1, align 4 + %28 = fmul float %26, %27 + %29 = fadd float %28, 0.000000e+00 + store float %29, float* %25, align 4 + %30 = getelementptr inbounds float, float* %0, i64 3 + %31 = load float, float* %30, align 4 + %32 = load float, float* %9, align 4 + %33 = fmul float %31, %32 + %34 = fadd float %29, %33 + store float %34, float* %25, align 4 + %35 = getelementptr inbounds float, float* %2, i64 3 + store float 0.000000e+00, float* %35, align 4 + %36 = load float, float* %24, align 4 + %37 = load float, float* %15, align 4 + %38 = fmul float %36, %37 + %39 = fadd float %38, 0.000000e+00 + store float %39, float* %35, align 4 + %40 = load float, float* %30, align 4 + %41 = load float, float* %20, align 4 + %42 = fmul float %40, %41 + %43 = fadd float %39, %42 + store float %43, float* %35, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { +.preheader49: + %3 = bitcast float* %1 to i8* + %4 = alloca [4 x float], align 16 + %5 = bitcast [4 x float]* %4 to i8* + %6 = bitcast float* %0 to i32* + %7 = load i32, i32* %6, align 4 + %8 = bitcast float* %2 to i32* + %9 = getelementptr inbounds float, float* %0, i64 1 + %10 = bitcast float* %9 to i32* + %11 = load i32, i32* %10, align 4 + %12 = getelementptr inbounds float, float* %2, i64 1 + %13 = bitcast float* %12 to i32* + %14 = getelementptr inbounds float, float* %0, i64 2 + %15 = bitcast float* %14 to i32* + %16 = load i32, i32* %15, align 4 + %17 = getelementptr inbounds float, float* %2, i64 2 + %18 = bitcast float* %17 to i32* + %19 = getelementptr inbounds float, float* %0, i64 3 + %20 = bitcast float* %19 to i32* + %21 = load i32, i32* %20, align 4 + %22 = getelementptr inbounds float, float* %2, i64 3 + %23 = bitcast float* %22 to i32* + %24 = bitcast i32 %7 to float + %25 = fcmp ogt float %24, 0.000000e+00 + %26 = zext i1 %25 to i32 + %27 = fcmp olt float %24, 0.000000e+00 + %.neg = sext i1 %27 to i32 + %28 = add nsw i32 %.neg, %26 + %29 = sitofp i32 %28 to float + %30 = fmul float %24, %24 + %31 = fadd float %30, 0.000000e+00 + %32 = bitcast i32 %16 to float + %33 = fmul float %32, %32 + %34 = fadd float %31, %33 + %35 = call float @llvm.sqrt.f32(float %34) #9 + %36 = fneg float %29 + %37 = fmul float %35, %36 + %38 = fadd float %24, %37 + %39 = fmul float %37, 0.000000e+00 + %40 = fadd float %32, %39 + %41 = fmul float %38, %38 + %42 = fadd float %41, 0.000000e+00 + %43 = fmul float %40, %40 + %44 = fadd float %42, %43 + %45 = call float @llvm.sqrt.f32(float %44) #9 + %46 = fadd float %45, 0x3EE4F8B580000000 + %47 = fdiv float %38, %46 + %48 = fdiv float %40, %46 + %49 = fmul float %47, 2.000000e+00 + %50 = fmul float %49, %47 + %51 = fsub float 1.000000e+00, %50 + %52 = fmul float %49, %48 + %53 = fsub float 0.000000e+00, %52 + %54 = fmul float %48, 2.000000e+00 + %55 = fmul float %54, %47 + %56 = fsub float 0.000000e+00, %55 + %57 = fmul float %54, %48 + %58 = fsub float 1.000000e+00, %57 + %59 = bitcast float %51 to i32 + %60 = bitcast [4 x float]* %4 to i32* + %61 = bitcast float %53 to i32 + %62 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 + %63 = bitcast float* %62 to i32* + %64 = bitcast float %56 to i32 + %65 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 + %66 = bitcast float* %65 to i32* + %67 = bitcast float %58 to i32 + %68 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 + %69 = bitcast float* %68 to i32* + %70 = load float, float* %0, align 4 + %71 = fmul float %51, %70 + %72 = fadd float %71, 0.000000e+00 + %73 = load float, float* %14, align 4 + %74 = fmul float %53, %73 + %75 = fadd float %72, %74 + %76 = load float, float* %9, align 4 + %77 = fmul float %51, %76 + %78 = fadd float %77, 0.000000e+00 + %79 = load float, float* %19, align 4 + %80 = fmul float %53, %79 + %81 = fadd float %78, %80 + %82 = load float, float* %0, align 4 + %83 = fmul float %56, %82 + %84 = fadd float %83, 0.000000e+00 + %85 = load float, float* %14, align 4 + %86 = fmul float %58, %85 + %87 = fadd float %84, %86 + %88 = load float, float* %9, align 4 + %89 = fmul float %56, %88 + %90 = fadd float %89, 0.000000e+00 + %91 = load float, float* %19, align 4 + %92 = fmul float %58, %91 + %93 = fadd float %90, %92 + %94 = getelementptr inbounds float, float* %1, i64 1 + %95 = bitcast float* %94 to i32* + %96 = load i32, i32* %95, align 4 + %97 = getelementptr inbounds float, float* %1, i64 2 + %98 = bitcast float* %97 to i32* + %99 = load i32, i32* %98, align 4 + %100 = getelementptr float, float* %0, i32 0 + %101 = bitcast float* %100 to i32* + %102 = load i32, i32* %101, align 4 + %103 = bitcast i32 %102 to float + %104 = insertelement <4 x float> zeroinitializer, float %103, i32 0 + %105 = insertelement <4 x float> %104, float 0.000000e+00, i32 1 + %106 = insertelement <4 x float> %105, float 0.000000e+00, i32 2 + %107 = insertelement <4 x float> %106, float 0.000000e+00, i32 3 + %108 = extractelement <4 x float> %107, i32 0 + %109 = bitcast i32* %8 to float* + %110 = getelementptr float, float* %2, i32 0 + %111 = bitcast float* %110 to i32* + %112 = bitcast i32* %111 to float* + store float %108, float* %112, align 4 + %113 = getelementptr float, float* %0, i32 0 + %114 = getelementptr inbounds float, float* %113, i64 1 + %115 = bitcast float* %114 to i32* + %116 = load i32, i32* %115, align 4 + %117 = bitcast i32 %116 to float + %118 = insertelement <4 x float> zeroinitializer, float %117, i32 0 + %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 1 + %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 2 + %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 3 + %122 = extractelement <4 x float> %121, i32 0 + %123 = bitcast i32* %13 to float* + %124 = getelementptr float, float* %2, i32 0 + %125 = getelementptr inbounds float, float* %124, i64 1 + %126 = bitcast float* %125 to i32* + %127 = bitcast i32* %126 to float* + store float %122, float* %127, align 4 + %128 = getelementptr float, float* %0, i32 0 + %129 = getelementptr inbounds float, float* %128, i64 2 + %130 = bitcast float* %129 to i32* + %131 = load i32, i32* %130, align 4 + %132 = bitcast i32 %131 to float + %133 = insertelement <4 x float> zeroinitializer, float %132, i32 0 + %134 = insertelement <4 x float> %133, float 0.000000e+00, i32 1 + %135 = insertelement <4 x float> %134, float 0.000000e+00, i32 2 + %136 = insertelement <4 x float> %135, float 0.000000e+00, i32 3 + %137 = extractelement <4 x float> %136, i32 0 + %138 = bitcast i32* %18 to float* + %139 = getelementptr float, float* %2, i32 0 + %140 = getelementptr inbounds float, float* %139, i64 2 + %141 = bitcast float* %140 to i32* + %142 = bitcast i32* %141 to float* + store float %137, float* %142, align 4 + %143 = getelementptr float, float* %0, i32 0 + %144 = getelementptr inbounds float, float* %143, i64 3 + %145 = bitcast float* %144 to i32* + %146 = load i32, i32* %145, align 4 + %147 = bitcast i32 %146 to float + %148 = fneg float %147 + %149 = insertelement <4 x float> zeroinitializer, float %148, i32 0 + %150 = getelementptr float, float* %0, i32 0 + %151 = bitcast float* %150 to i32* + %152 = load i32, i32* %151, align 4 + %153 = bitcast i32 %152 to float + %154 = bitcast i32 %152 to float + %155 = fmul float %153, %154 + %156 = fadd float %155, 0.000000e+00 + %157 = bitcast i32 %131 to float + %158 = bitcast i32 %131 to float + %159 = fmul float %157, %158 + %160 = fadd float %156, %159 + %161 = call float @llvm.sqrt.f32.8(float %160) + %162 = bitcast i32 %152 to float + %163 = fcmp olt float %162, 0.000000e+00 + %164 = sext i1 %163 to i32 + %165 = fcmp ogt float %162, 0.000000e+00 + %166 = zext i1 %165 to i32 + %167 = add nsw i32 %164, %166 + %168 = sitofp i32 %167 to float + %169 = fneg float %168 + %170 = fmul float %161, %169 + %171 = bitcast i32 %152 to float + %172 = fadd float %171, %170 + %173 = bitcast i32 %152 to float + %174 = bitcast i32 %152 to float + %175 = fmul float %173, %174 + %176 = fadd float %175, 0.000000e+00 + %177 = bitcast i32 %131 to float + %178 = bitcast i32 %131 to float + %179 = fmul float %177, %178 + %180 = fadd float %176, %179 + %181 = call float @llvm.sqrt.f32.9(float %180) + %182 = fneg float %168 + %183 = fmul float %181, %182 + %184 = bitcast i32 %152 to float + %185 = fadd float %184, %183 + %186 = bitcast i32 %152 to float + %187 = bitcast i32 %152 to float + %188 = fmul float %186, %187 + %189 = fadd float %188, 0.000000e+00 + %190 = bitcast i32 %131 to float + %191 = bitcast i32 %131 to float + %192 = fmul float %190, %191 + %193 = fadd float %189, %192 + %194 = call float @llvm.sqrt.f32.10(float %193) + %195 = fneg float %168 + %196 = fmul float %194, %195 + %197 = bitcast i32 %152 to float + %198 = fadd float %197, %196 + %199 = fmul float %185, %198 + %200 = fadd float %199, 0.000000e+00 + %201 = bitcast i32 %152 to float + %202 = bitcast i32 %152 to float + %203 = fmul float %201, %202 + %204 = fadd float %203, 0.000000e+00 + %205 = bitcast i32 %131 to float + %206 = bitcast i32 %131 to float + %207 = fmul float %205, %206 + %208 = fadd float %204, %207 + %209 = call float @llvm.sqrt.f32.11(float %208) + %210 = fneg float %168 + %211 = fmul float %209, %210 + %212 = fmul float %211, 0.000000e+00 + %213 = bitcast i32 %131 to float + %214 = fadd float %213, %212 + %215 = bitcast i32 %152 to float + %216 = bitcast i32 %152 to float + %217 = fmul float %215, %216 + %218 = fadd float %217, 0.000000e+00 + %219 = bitcast i32 %131 to float + %220 = bitcast i32 %131 to float + %221 = fmul float %219, %220 + %222 = fadd float %218, %221 + %223 = call float @llvm.sqrt.f32.12(float %222) + %224 = fneg float %168 + %225 = fmul float %223, %224 + %226 = fmul float %225, 0.000000e+00 + %227 = bitcast i32 %131 to float + %228 = fadd float %227, %226 + %229 = fmul float %214, %228 + %230 = fadd float %200, %229 + %231 = call float @llvm.sqrt.f32.13(float %230) + %232 = fadd float %231, 0.000000e+00 + %233 = fdiv float %172, %232 + %234 = fmul float %233, 2.000000e+00 + %235 = bitcast i32 %152 to float + %236 = bitcast i32 %152 to float + %237 = fmul float %235, %236 + %238 = fadd float %237, 0.000000e+00 + %239 = bitcast i32 %131 to float + %240 = bitcast i32 %131 to float + %241 = fmul float %239, %240 + %242 = fadd float %238, %241 + %243 = call float @llvm.sqrt.f32.14(float %242) + %244 = fneg float %168 + %245 = fmul float %243, %244 + %246 = bitcast i32 %152 to float + %247 = fadd float %246, %245 + %248 = bitcast i32 %152 to float + %249 = bitcast i32 %152 to float + %250 = fmul float %248, %249 + %251 = fadd float %250, 0.000000e+00 + %252 = bitcast i32 %131 to float + %253 = bitcast i32 %131 to float + %254 = fmul float %252, %253 + %255 = fadd float %251, %254 + %256 = call float @llvm.sqrt.f32.15(float %255) + %257 = fneg float %168 + %258 = fmul float %256, %257 + %259 = bitcast i32 %152 to float + %260 = fadd float %259, %258 + %261 = bitcast i32 %152 to float + %262 = bitcast i32 %152 to float + %263 = fmul float %261, %262 + %264 = fadd float %263, 0.000000e+00 + %265 = bitcast i32 %131 to float + %266 = bitcast i32 %131 to float + %267 = fmul float %265, %266 + %268 = fadd float %264, %267 + %269 = call float @llvm.sqrt.f32.16(float %268) + %270 = fneg float %168 + %271 = fmul float %269, %270 + %272 = bitcast i32 %152 to float + %273 = fadd float %272, %271 + %274 = fmul float %260, %273 + %275 = fadd float %274, 0.000000e+00 + %276 = bitcast i32 %152 to float + %277 = bitcast i32 %152 to float + %278 = fmul float %276, %277 + %279 = fadd float %278, 0.000000e+00 + %280 = bitcast i32 %131 to float + %281 = bitcast i32 %131 to float + %282 = fmul float %280, %281 + %283 = fadd float %279, %282 + %284 = call float @llvm.sqrt.f32.17(float %283) + %285 = fneg float %168 + %286 = fmul float %284, %285 + %287 = fmul float %286, 0.000000e+00 + %288 = bitcast i32 %131 to float + %289 = fadd float %288, %287 + %290 = bitcast i32 %152 to float + %291 = bitcast i32 %152 to float + %292 = fmul float %290, %291 + %293 = fadd float %292, 0.000000e+00 + %294 = bitcast i32 %131 to float + %295 = bitcast i32 %131 to float + %296 = fmul float %294, %295 + %297 = fadd float %293, %296 + %298 = call float @llvm.sqrt.f32.18(float %297) + %299 = fneg float %168 + %300 = fmul float %298, %299 + %301 = fmul float %300, 0.000000e+00 + %302 = bitcast i32 %131 to float + %303 = fadd float %302, %301 + %304 = fmul float %289, %303 + %305 = fadd float %275, %304 + %306 = call float @llvm.sqrt.f32.19(float %305) + %307 = fadd float %306, 0.000000e+00 + %308 = fdiv float %247, %307 + %309 = fmul float %234, %308 + %310 = insertelement <4 x float> %149, float %309, i32 1 + %311 = bitcast i32 %152 to float + %312 = bitcast i32 %152 to float + %313 = fmul float %311, %312 + %314 = fadd float %313, 0.000000e+00 + %315 = bitcast i32 %131 to float + %316 = bitcast i32 %131 to float + %317 = fmul float %315, %316 + %318 = fadd float %314, %317 + %319 = call float @llvm.sqrt.f32.20(float %318) + %320 = fneg float %168 + %321 = fmul float %319, %320 + %322 = bitcast i32 %152 to float + %323 = fadd float %322, %321 + %324 = bitcast i32 %152 to float + %325 = bitcast i32 %152 to float + %326 = fmul float %324, %325 + %327 = fadd float %326, 0.000000e+00 + %328 = bitcast i32 %131 to float + %329 = bitcast i32 %131 to float + %330 = fmul float %328, %329 + %331 = fadd float %327, %330 + %332 = call float @llvm.sqrt.f32.21(float %331) + %333 = fneg float %168 + %334 = fmul float %332, %333 + %335 = bitcast i32 %152 to float + %336 = fadd float %335, %334 + %337 = bitcast i32 %152 to float + %338 = bitcast i32 %152 to float + %339 = fmul float %337, %338 + %340 = fadd float %339, 0.000000e+00 + %341 = bitcast i32 %131 to float + %342 = bitcast i32 %131 to float + %343 = fmul float %341, %342 + %344 = fadd float %340, %343 + %345 = call float @llvm.sqrt.f32.22(float %344) + %346 = fneg float %168 + %347 = fmul float %345, %346 + %348 = bitcast i32 %152 to float + %349 = fadd float %348, %347 + %350 = fmul float %336, %349 + %351 = fadd float %350, 0.000000e+00 + %352 = bitcast i32 %152 to float + %353 = bitcast i32 %152 to float + %354 = fmul float %352, %353 + %355 = fadd float %354, 0.000000e+00 + %356 = bitcast i32 %131 to float + %357 = bitcast i32 %131 to float + %358 = fmul float %356, %357 + %359 = fadd float %355, %358 + %360 = call float @llvm.sqrt.f32.23(float %359) + %361 = fneg float %168 + %362 = fmul float %360, %361 + %363 = fmul float %362, 0.000000e+00 + %364 = bitcast i32 %131 to float + %365 = fadd float %364, %363 + %366 = bitcast i32 %152 to float + %367 = bitcast i32 %152 to float + %368 = fmul float %366, %367 + %369 = fadd float %368, 0.000000e+00 + %370 = bitcast i32 %131 to float + %371 = bitcast i32 %131 to float + %372 = fmul float %370, %371 + %373 = fadd float %369, %372 + %374 = call float @llvm.sqrt.f32.24(float %373) + %375 = fneg float %168 + %376 = fmul float %374, %375 + %377 = fmul float %376, 0.000000e+00 + %378 = bitcast i32 %131 to float + %379 = fadd float %378, %377 + %380 = fmul float %365, %379 + %381 = fadd float %351, %380 + %382 = call float @llvm.sqrt.f32.25(float %381) + %383 = fadd float %382, 0.000000e+00 + %384 = fdiv float %323, %383 + %385 = fmul float %384, 2.000000e+00 + %386 = bitcast i32 %152 to float + %387 = bitcast i32 %152 to float + %388 = fmul float %386, %387 + %389 = fadd float %388, 0.000000e+00 + %390 = bitcast i32 %131 to float + %391 = bitcast i32 %131 to float + %392 = fmul float %390, %391 + %393 = fadd float %389, %392 + %394 = call float @llvm.sqrt.f32.26(float %393) + %395 = fneg float %168 + %396 = fmul float %394, %395 + %397 = fmul float %396, 0.000000e+00 + %398 = bitcast i32 %131 to float + %399 = fadd float %398, %397 + %400 = bitcast i32 %152 to float + %401 = bitcast i32 %152 to float + %402 = fmul float %400, %401 + %403 = fadd float %402, 0.000000e+00 + %404 = bitcast i32 %131 to float + %405 = bitcast i32 %131 to float + %406 = fmul float %404, %405 + %407 = fadd float %403, %406 + %408 = call float @llvm.sqrt.f32.27(float %407) + %409 = fneg float %168 + %410 = fmul float %408, %409 + %411 = bitcast i32 %152 to float + %412 = fadd float %411, %410 + %413 = bitcast i32 %152 to float + %414 = bitcast i32 %152 to float + %415 = fmul float %413, %414 + %416 = fadd float %415, 0.000000e+00 + %417 = bitcast i32 %131 to float + %418 = bitcast i32 %131 to float + %419 = fmul float %417, %418 + %420 = fadd float %416, %419 + %421 = call float @llvm.sqrt.f32.28(float %420) + %422 = fneg float %168 + %423 = fmul float %421, %422 + %424 = bitcast i32 %152 to float + %425 = fadd float %424, %423 + %426 = fmul float %412, %425 + %427 = fadd float %426, 0.000000e+00 + %428 = bitcast i32 %152 to float + %429 = bitcast i32 %152 to float + %430 = fmul float %428, %429 + %431 = fadd float %430, 0.000000e+00 + %432 = bitcast i32 %131 to float + %433 = bitcast i32 %131 to float + %434 = fmul float %432, %433 + %435 = fadd float %431, %434 + %436 = call float @llvm.sqrt.f32.29(float %435) + %437 = fneg float %168 + %438 = fmul float %436, %437 + %439 = fmul float %438, 0.000000e+00 + %440 = bitcast i32 %131 to float + %441 = fadd float %440, %439 + %442 = bitcast i32 %152 to float + %443 = bitcast i32 %152 to float + %444 = fmul float %442, %443 + %445 = fadd float %444, 0.000000e+00 + %446 = bitcast i32 %131 to float + %447 = bitcast i32 %131 to float + %448 = fmul float %446, %447 + %449 = fadd float %445, %448 + %450 = call float @llvm.sqrt.f32.30(float %449) + %451 = fneg float %168 + %452 = fmul float %450, %451 + %453 = fmul float %452, 0.000000e+00 + %454 = bitcast i32 %131 to float + %455 = fadd float %454, %453 + %456 = fmul float %441, %455 + %457 = fadd float %427, %456 + %458 = call float @llvm.sqrt.f32.31(float %457) + %459 = fadd float %458, 0.000000e+00 + %460 = fdiv float %399, %459 + %461 = fmul float %385, %460 + %462 = insertelement <4 x float> %310, float %461, i32 2 + %463 = bitcast i32 %152 to float + %464 = bitcast i32 %152 to float + %465 = fmul float %463, %464 + %466 = fadd float %465, 0.000000e+00 + %467 = bitcast i32 %131 to float + %468 = bitcast i32 %131 to float + %469 = fmul float %467, %468 + %470 = fadd float %466, %469 + %471 = call float @llvm.sqrt.f32.32(float %470) + %472 = fneg float %168 + %473 = fmul float %471, %472 + %474 = fmul float %473, 0.000000e+00 + %475 = bitcast i32 %131 to float + %476 = fadd float %475, %474 + %477 = bitcast i32 %152 to float + %478 = bitcast i32 %152 to float + %479 = fmul float %477, %478 + %480 = fadd float %479, 0.000000e+00 + %481 = bitcast i32 %131 to float + %482 = bitcast i32 %131 to float + %483 = fmul float %481, %482 + %484 = fadd float %480, %483 + %485 = call float @llvm.sqrt.f32.33(float %484) + %486 = fneg float %168 + %487 = fmul float %485, %486 + %488 = bitcast i32 %152 to float + %489 = fadd float %488, %487 + %490 = bitcast i32 %152 to float + %491 = bitcast i32 %152 to float + %492 = fmul float %490, %491 + %493 = fadd float %492, 0.000000e+00 + %494 = bitcast i32 %131 to float + %495 = bitcast i32 %131 to float + %496 = fmul float %494, %495 + %497 = fadd float %493, %496 + %498 = call float @llvm.sqrt.f32.34(float %497) + %499 = fneg float %168 + %500 = fmul float %498, %499 + %501 = bitcast i32 %152 to float + %502 = fadd float %501, %500 + %503 = fmul float %489, %502 + %504 = fadd float %503, 0.000000e+00 + %505 = bitcast i32 %152 to float + %506 = bitcast i32 %152 to float + %507 = fmul float %505, %506 + %508 = fadd float %507, 0.000000e+00 + %509 = bitcast i32 %131 to float + %510 = bitcast i32 %131 to float + %511 = fmul float %509, %510 + %512 = fadd float %508, %511 + %513 = call float @llvm.sqrt.f32.35(float %512) + %514 = fneg float %168 + %515 = fmul float %513, %514 + %516 = fmul float %515, 0.000000e+00 + %517 = bitcast i32 %131 to float + %518 = fadd float %517, %516 + %519 = bitcast i32 %152 to float + %520 = bitcast i32 %152 to float + %521 = fmul float %519, %520 + %522 = fadd float %521, 0.000000e+00 + %523 = bitcast i32 %131 to float + %524 = bitcast i32 %131 to float + %525 = fmul float %523, %524 + %526 = fadd float %522, %525 + %527 = call float @llvm.sqrt.f32.36(float %526) + %528 = fneg float %168 + %529 = fmul float %527, %528 + %530 = fmul float %529, 0.000000e+00 + %531 = bitcast i32 %131 to float + %532 = fadd float %531, %530 + %533 = fmul float %518, %532 + %534 = fadd float %504, %533 + %535 = call float @llvm.sqrt.f32.37(float %534) + %536 = fadd float %535, 0.000000e+00 + %537 = fdiv float %476, %536 + %538 = fmul float %537, 2.000000e+00 + %539 = bitcast i32 %152 to float + %540 = bitcast i32 %152 to float + %541 = fmul float %539, %540 + %542 = fadd float %541, 0.000000e+00 + %543 = bitcast i32 %131 to float + %544 = bitcast i32 %131 to float + %545 = fmul float %543, %544 + %546 = fadd float %542, %545 + %547 = call float @llvm.sqrt.f32.38(float %546) + %548 = fneg float %168 + %549 = fmul float %547, %548 + %550 = bitcast i32 %152 to float + %551 = fadd float %550, %549 + %552 = bitcast i32 %152 to float + %553 = bitcast i32 %152 to float + %554 = fmul float %552, %553 + %555 = fadd float %554, 0.000000e+00 + %556 = bitcast i32 %131 to float + %557 = bitcast i32 %131 to float + %558 = fmul float %556, %557 + %559 = fadd float %555, %558 + %560 = call float @llvm.sqrt.f32.39(float %559) + %561 = fneg float %168 + %562 = fmul float %560, %561 + %563 = bitcast i32 %152 to float + %564 = fadd float %563, %562 + %565 = bitcast i32 %152 to float + %566 = bitcast i32 %152 to float + %567 = fmul float %565, %566 + %568 = fadd float %567, 0.000000e+00 + %569 = bitcast i32 %131 to float + %570 = bitcast i32 %131 to float + %571 = fmul float %569, %570 + %572 = fadd float %568, %571 + %573 = call float @llvm.sqrt.f32.40(float %572) + %574 = fneg float %168 + %575 = fmul float %573, %574 + %576 = bitcast i32 %152 to float + %577 = fadd float %576, %575 + %578 = fmul float %564, %577 + %579 = fadd float %578, 0.000000e+00 + %580 = bitcast i32 %152 to float + %581 = bitcast i32 %152 to float + %582 = fmul float %580, %581 + %583 = fadd float %582, 0.000000e+00 + %584 = bitcast i32 %131 to float + %585 = bitcast i32 %131 to float + %586 = fmul float %584, %585 + %587 = fadd float %583, %586 + %588 = call float @llvm.sqrt.f32.41(float %587) + %589 = fneg float %168 + %590 = fmul float %588, %589 + %591 = fmul float %590, 0.000000e+00 + %592 = bitcast i32 %131 to float + %593 = fadd float %592, %591 + %594 = bitcast i32 %152 to float + %595 = bitcast i32 %152 to float + %596 = fmul float %594, %595 + %597 = fadd float %596, 0.000000e+00 + %598 = bitcast i32 %131 to float + %599 = bitcast i32 %131 to float + %600 = fmul float %598, %599 + %601 = fadd float %597, %600 + %602 = call float @llvm.sqrt.f32.42(float %601) + %603 = fneg float %168 + %604 = fmul float %602, %603 + %605 = fmul float %604, 0.000000e+00 + %606 = bitcast i32 %131 to float + %607 = fadd float %606, %605 + %608 = fmul float %593, %607 + %609 = fadd float %579, %608 + %610 = call float @llvm.sqrt.f32.43(float %609) + %611 = fadd float %610, 0.000000e+00 + %612 = fdiv float %551, %611 + %613 = fmul float %538, %612 + %614 = insertelement <4 x float> %462, float %613, i32 3 + %615 = fsub <4 x float> , %614 + %616 = bitcast i32 %152 to float + %617 = bitcast i32 %152 to float + %618 = fmul float %616, %617 + %619 = fadd float %618, 0.000000e+00 + %620 = bitcast i32 %131 to float + %621 = bitcast i32 %131 to float + %622 = fmul float %620, %621 + %623 = fadd float %619, %622 + %624 = call float @llvm.sqrt.f32.44(float %623) + %625 = fneg float %168 + %626 = fmul float %624, %625 + %627 = fmul float %626, 0.000000e+00 + %628 = bitcast i32 %131 to float + %629 = fadd float %628, %627 + %630 = bitcast i32 %152 to float + %631 = bitcast i32 %152 to float + %632 = fmul float %630, %631 + %633 = fadd float %632, 0.000000e+00 + %634 = bitcast i32 %131 to float + %635 = bitcast i32 %131 to float + %636 = fmul float %634, %635 + %637 = fadd float %633, %636 + %638 = call float @llvm.sqrt.f32.45(float %637) + %639 = fneg float %168 + %640 = fmul float %638, %639 + %641 = bitcast i32 %152 to float + %642 = fadd float %641, %640 + %643 = bitcast i32 %152 to float + %644 = bitcast i32 %152 to float + %645 = fmul float %643, %644 + %646 = fadd float %645, 0.000000e+00 + %647 = bitcast i32 %131 to float + %648 = bitcast i32 %131 to float + %649 = fmul float %647, %648 + %650 = fadd float %646, %649 + %651 = call float @llvm.sqrt.f32.46(float %650) + %652 = fneg float %168 + %653 = fmul float %651, %652 + %654 = bitcast i32 %152 to float + %655 = fadd float %654, %653 + %656 = fmul float %642, %655 + %657 = fadd float %656, 0.000000e+00 + %658 = bitcast i32 %152 to float + %659 = bitcast i32 %152 to float + %660 = fmul float %658, %659 + %661 = fadd float %660, 0.000000e+00 + %662 = bitcast i32 %131 to float + %663 = bitcast i32 %131 to float + %664 = fmul float %662, %663 + %665 = fadd float %661, %664 + %666 = call float @llvm.sqrt.f32.47(float %665) + %667 = fneg float %168 + %668 = fmul float %666, %667 + %669 = fmul float %668, 0.000000e+00 + %670 = bitcast i32 %131 to float + %671 = fadd float %670, %669 + %672 = bitcast i32 %152 to float + %673 = bitcast i32 %152 to float + %674 = fmul float %672, %673 + %675 = fadd float %674, 0.000000e+00 + %676 = bitcast i32 %131 to float + %677 = bitcast i32 %131 to float + %678 = fmul float %676, %677 + %679 = fadd float %675, %678 + %680 = call float @llvm.sqrt.f32.48(float %679) + %681 = fneg float %168 + %682 = fmul float %680, %681 + %683 = fmul float %682, 0.000000e+00 + %684 = bitcast i32 %131 to float + %685 = fadd float %684, %683 + %686 = fmul float %671, %685 + %687 = fadd float %657, %686 + %688 = call float @llvm.sqrt.f32.49(float %687) + %689 = fadd float %688, 0.000000e+00 + %690 = fdiv float %629, %689 + %691 = fmul float %690, 2.000000e+00 + %692 = bitcast i32 %152 to float + %693 = bitcast i32 %152 to float + %694 = fmul float %692, %693 + %695 = fadd float %694, 0.000000e+00 + %696 = bitcast i32 %131 to float + %697 = bitcast i32 %131 to float + %698 = fmul float %696, %697 + %699 = fadd float %695, %698 + %700 = call float @llvm.sqrt.f32.50(float %699) + %701 = fneg float %168 + %702 = fmul float %700, %701 + %703 = fmul float %702, 0.000000e+00 + %704 = bitcast i32 %131 to float + %705 = fadd float %704, %703 + %706 = bitcast i32 %152 to float + %707 = bitcast i32 %152 to float + %708 = fmul float %706, %707 + %709 = fadd float %708, 0.000000e+00 + %710 = bitcast i32 %131 to float + %711 = bitcast i32 %131 to float + %712 = fmul float %710, %711 + %713 = fadd float %709, %712 + %714 = call float @llvm.sqrt.f32.51(float %713) + %715 = fneg float %168 + %716 = fmul float %714, %715 + %717 = bitcast i32 %152 to float + %718 = fadd float %717, %716 + %719 = bitcast i32 %152 to float + %720 = bitcast i32 %152 to float + %721 = fmul float %719, %720 + %722 = fadd float %721, 0.000000e+00 + %723 = bitcast i32 %131 to float + %724 = bitcast i32 %131 to float + %725 = fmul float %723, %724 + %726 = fadd float %722, %725 + %727 = call float @llvm.sqrt.f32.52(float %726) + %728 = fneg float %168 + %729 = fmul float %727, %728 + %730 = bitcast i32 %152 to float + %731 = fadd float %730, %729 + %732 = fmul float %718, %731 + %733 = fadd float %732, 0.000000e+00 + %734 = bitcast i32 %152 to float + %735 = bitcast i32 %152 to float + %736 = fmul float %734, %735 + %737 = fadd float %736, 0.000000e+00 + %738 = bitcast i32 %131 to float + %739 = bitcast i32 %131 to float + %740 = fmul float %738, %739 + %741 = fadd float %737, %740 + %742 = call float @llvm.sqrt.f32.53(float %741) + %743 = fneg float %168 + %744 = fmul float %742, %743 + %745 = fmul float %744, 0.000000e+00 + %746 = bitcast i32 %131 to float + %747 = fadd float %746, %745 + %748 = bitcast i32 %152 to float + %749 = bitcast i32 %152 to float + %750 = fmul float %748, %749 + %751 = fadd float %750, 0.000000e+00 + %752 = bitcast i32 %131 to float + %753 = bitcast i32 %131 to float + %754 = fmul float %752, %753 + %755 = fadd float %751, %754 + %756 = call float @llvm.sqrt.f32.54(float %755) + %757 = fneg float %168 + %758 = fmul float %756, %757 + %759 = fmul float %758, 0.000000e+00 + %760 = bitcast i32 %131 to float + %761 = fadd float %760, %759 + %762 = fmul float %747, %761 + %763 = fadd float %733, %762 + %764 = call float @llvm.sqrt.f32.55(float %763) + %765 = fadd float %764, 0.000000e+00 + %766 = fdiv float %705, %765 + %767 = fmul float %691, %766 + %768 = fsub float 1.000000e+00, %767 + %769 = insertelement <4 x float> zeroinitializer, float %768, i32 0 + %770 = insertelement <4 x float> %769, float 0.000000e+00, i32 1 + %771 = insertelement <4 x float> %770, float 0.000000e+00, i32 2 + %772 = insertelement <4 x float> %771, float 0.000000e+00, i32 3 + %773 = shufflevector <4 x float> %615, <4 x float> %772, <8 x i32> + %774 = extractelement <8 x float> %773, i32 0 + %775 = bitcast i32* %23 to float* + %776 = getelementptr float, float* %2, i32 0 + %777 = getelementptr inbounds float, float* %776, i64 3 + %778 = bitcast float* %777 to i32* + %779 = bitcast i32* %778 to float* + store float %774, float* %779, align 4 + %780 = extractelement <8 x float> %773, i32 1 + %781 = bitcast i32* %60 to float* + %782 = alloca [4 x float], align 16 + %783 = bitcast [4 x float]* %782 to i32* + %784 = bitcast i32* %783 to float* + store float %780, float* %784, align 4 + %785 = extractelement <8 x float> %773, i32 2 + %786 = bitcast i32* %63 to float* + %787 = getelementptr inbounds [4 x float], [4 x float]* %782, i64 0, i64 1 + %788 = bitcast float* %787 to i32* + %789 = bitcast i32* %788 to float* + store float %785, float* %789, align 4 + %790 = extractelement <8 x float> %773, i32 3 + %791 = bitcast i32* %66 to float* + %792 = getelementptr inbounds [4 x float], [4 x float]* %782, i64 0, i64 2 + %793 = bitcast float* %792 to i32* + %794 = bitcast i32* %793 to float* + store float %790, float* %794, align 4 + %795 = extractelement <8 x float> %773, i32 4 + %796 = bitcast i32* %69 to float* + %797 = getelementptr inbounds [4 x float], [4 x float]* %782, i64 0, i64 3 + %798 = bitcast float* %797 to i32* + %799 = bitcast i32* %798 to float* + store float %795, float* %799, align 4 + %800 = bitcast float* %1 to i8* + %801 = alloca [4 x float], align 16 + %802 = bitcast [4 x float]* %801 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %800, i8* nonnull align 16 dereferenceable(16) %802, i64 16, i1 false) + store float 0.000000e+00, float* %2, align 4 + %803 = bitcast i32 %152 to float + %804 = bitcast i32 %152 to float + %805 = fmul float %803, %804 + %806 = fadd float %805, 0.000000e+00 + %807 = load i32, i32* %130, align 4 + %808 = bitcast i32 %807 to float + %809 = bitcast i32 %807 to float + %810 = fmul float %808, %809 + %811 = fadd float %806, %810 + %812 = call float @llvm.sqrt.f32.56(float %811) + %813 = sitofp i32 %167 to float + %814 = fneg float %813 + %815 = fmul float %812, %814 + %816 = bitcast i32 %152 to float + %817 = fadd float %816, %815 + %818 = bitcast i32 %152 to float + %819 = bitcast i32 %152 to float + %820 = fmul float %818, %819 + %821 = fadd float %820, 0.000000e+00 + %822 = bitcast i32 %807 to float + %823 = bitcast i32 %807 to float + %824 = fmul float %822, %823 + %825 = fadd float %821, %824 + %826 = call float @llvm.sqrt.f32.57(float %825) + %827 = fneg float %813 + %828 = fmul float %826, %827 + %829 = bitcast i32 %152 to float + %830 = fadd float %829, %828 + %831 = bitcast i32 %152 to float + %832 = bitcast i32 %152 to float + %833 = fmul float %831, %832 + %834 = fadd float %833, 0.000000e+00 + %835 = bitcast i32 %807 to float + %836 = bitcast i32 %807 to float + %837 = fmul float %835, %836 + %838 = fadd float %834, %837 + %839 = call float @llvm.sqrt.f32.58(float %838) + %840 = fneg float %813 + %841 = fmul float %839, %840 + %842 = bitcast i32 %152 to float + %843 = fadd float %842, %841 + %844 = fmul float %830, %843 + %845 = fadd float %844, 0.000000e+00 + %846 = bitcast i32 %152 to float + %847 = bitcast i32 %152 to float + %848 = fmul float %846, %847 + %849 = fadd float %848, 0.000000e+00 + %850 = bitcast i32 %807 to float + %851 = bitcast i32 %807 to float + %852 = fmul float %850, %851 + %853 = fadd float %849, %852 + %854 = call float @llvm.sqrt.f32.59(float %853) + %855 = fneg float %813 + %856 = fmul float %854, %855 + %857 = fmul float %856, 0.000000e+00 + %858 = bitcast i32 %807 to float + %859 = fadd float %858, %857 + %860 = bitcast i32 %152 to float + %861 = bitcast i32 %152 to float + %862 = fmul float %860, %861 + %863 = fadd float %862, 0.000000e+00 + %864 = bitcast i32 %807 to float + %865 = bitcast i32 %807 to float + %866 = fmul float %864, %865 + %867 = fadd float %863, %866 + %868 = call float @llvm.sqrt.f32.60(float %867) + %869 = fneg float %813 + %870 = fmul float %868, %869 + %871 = fmul float %870, 0.000000e+00 + %872 = bitcast i32 %807 to float + %873 = fadd float %872, %871 + %874 = fmul float %859, %873 + %875 = fadd float %845, %874 + %876 = call float @llvm.sqrt.f32.61(float %875) + %877 = fadd float %876, 0.000000e+00 + %878 = fdiv float %817, %877 + %879 = fmul float %878, 2.000000e+00 + %880 = bitcast i32 %152 to float + %881 = bitcast i32 %152 to float + %882 = fmul float %880, %881 + %883 = fadd float %882, 0.000000e+00 + %884 = bitcast i32 %807 to float + %885 = bitcast i32 %807 to float + %886 = fmul float %884, %885 + %887 = fadd float %883, %886 + %888 = call float @llvm.sqrt.f32.62(float %887) + %889 = fneg float %813 + %890 = fmul float %888, %889 + %891 = bitcast i32 %152 to float + %892 = fadd float %891, %890 + %893 = bitcast i32 %152 to float + %894 = bitcast i32 %152 to float + %895 = fmul float %893, %894 + %896 = fadd float %895, 0.000000e+00 + %897 = bitcast i32 %807 to float + %898 = bitcast i32 %807 to float + %899 = fmul float %897, %898 + %900 = fadd float %896, %899 + %901 = call float @llvm.sqrt.f32.63(float %900) + %902 = fneg float %813 + %903 = fmul float %901, %902 + %904 = bitcast i32 %152 to float + %905 = fadd float %904, %903 + %906 = bitcast i32 %152 to float + %907 = bitcast i32 %152 to float + %908 = fmul float %906, %907 + %909 = fadd float %908, 0.000000e+00 + %910 = bitcast i32 %807 to float + %911 = bitcast i32 %807 to float + %912 = fmul float %910, %911 + %913 = fadd float %909, %912 + %914 = call float @llvm.sqrt.f32.64(float %913) + %915 = fneg float %813 + %916 = fmul float %914, %915 + %917 = bitcast i32 %152 to float + %918 = fadd float %917, %916 + %919 = fmul float %905, %918 + %920 = fadd float %919, 0.000000e+00 + %921 = bitcast i32 %152 to float + %922 = bitcast i32 %152 to float + %923 = fmul float %921, %922 + %924 = fadd float %923, 0.000000e+00 + %925 = bitcast i32 %807 to float + %926 = bitcast i32 %807 to float + %927 = fmul float %925, %926 + %928 = fadd float %924, %927 + %929 = call float @llvm.sqrt.f32.65(float %928) + %930 = fneg float %813 + %931 = fmul float %929, %930 + %932 = fmul float %931, 0.000000e+00 + %933 = bitcast i32 %807 to float + %934 = fadd float %933, %932 + %935 = bitcast i32 %152 to float + %936 = bitcast i32 %152 to float + %937 = fmul float %935, %936 + %938 = fadd float %937, 0.000000e+00 + %939 = bitcast i32 %807 to float + %940 = bitcast i32 %807 to float + %941 = fmul float %939, %940 + %942 = fadd float %938, %941 + %943 = call float @llvm.sqrt.f32.66(float %942) + %944 = fneg float %813 + %945 = fmul float %943, %944 + %946 = fmul float %945, 0.000000e+00 + %947 = bitcast i32 %807 to float + %948 = fadd float %947, %946 + %949 = fmul float %934, %948 + %950 = fadd float %920, %949 + %951 = call float @llvm.sqrt.f32.67(float %950) + %952 = fadd float %951, 0.000000e+00 + %953 = fdiv float %892, %952 + %954 = fmul float %879, %953 + %955 = fsub float 1.000000e+00, %954 + %956 = insertelement <4 x float> zeroinitializer, float %955, i32 0 + %957 = insertelement <4 x float> %956, float 0.000000e+00, i32 1 + %958 = insertelement <4 x float> %957, float 0.000000e+00, i32 2 + %959 = insertelement <4 x float> %958, float 0.000000e+00, i32 3 + %960 = getelementptr float, float* %0, i32 0 + %961 = load float, float* %960, align 4 + %962 = insertelement <4 x float> zeroinitializer, float %961, i32 0 + %963 = insertelement <4 x float> %962, float 0.000000e+00, i32 1 + %964 = insertelement <4 x float> %963, float 0.000000e+00, i32 2 + %965 = insertelement <4 x float> %964, float 0.000000e+00, i32 3 + %966 = call <4 x float> @llvm.fma.f32.68(<4 x float> %959, <4 x float> %965, <4 x float> zeroinitializer) + %967 = extractelement <4 x float> %966, i32 0 + store float %967, float* %2, align 4 + %968 = bitcast i32 %152 to float + %969 = bitcast i32 %152 to float + %970 = fmul float %968, %969 + %971 = fadd float %970, 0.000000e+00 + %972 = bitcast i32 %807 to float + %973 = bitcast i32 %807 to float + %974 = fmul float %972, %973 + %975 = fadd float %971, %974 + %976 = call float @llvm.sqrt.f32.69(float %975) + %977 = fneg float %813 + %978 = fmul float %976, %977 + %979 = bitcast i32 %152 to float + %980 = fadd float %979, %978 + %981 = bitcast i32 %152 to float + %982 = bitcast i32 %152 to float + %983 = fmul float %981, %982 + %984 = fadd float %983, 0.000000e+00 + %985 = bitcast i32 %807 to float + %986 = bitcast i32 %807 to float + %987 = fmul float %985, %986 + %988 = fadd float %984, %987 + %989 = call float @llvm.sqrt.f32.70(float %988) + %990 = fneg float %813 + %991 = fmul float %989, %990 + %992 = bitcast i32 %152 to float + %993 = fadd float %992, %991 + %994 = bitcast i32 %152 to float + %995 = bitcast i32 %152 to float + %996 = fmul float %994, %995 + %997 = fadd float %996, 0.000000e+00 + %998 = bitcast i32 %807 to float + %999 = bitcast i32 %807 to float + %1000 = fmul float %998, %999 + %1001 = fadd float %997, %1000 + %1002 = call float @llvm.sqrt.f32.71(float %1001) + %1003 = fneg float %813 + %1004 = fmul float %1002, %1003 + %1005 = bitcast i32 %152 to float + %1006 = fadd float %1005, %1004 + %1007 = fmul float %993, %1006 + %1008 = fadd float %1007, 0.000000e+00 + %1009 = bitcast i32 %152 to float + %1010 = bitcast i32 %152 to float + %1011 = fmul float %1009, %1010 + %1012 = fadd float %1011, 0.000000e+00 + %1013 = bitcast i32 %807 to float + %1014 = bitcast i32 %807 to float + %1015 = fmul float %1013, %1014 + %1016 = fadd float %1012, %1015 + %1017 = call float @llvm.sqrt.f32.72(float %1016) + %1018 = fneg float %813 + %1019 = fmul float %1017, %1018 + %1020 = fmul float %1019, 0.000000e+00 + %1021 = bitcast i32 %807 to float + %1022 = fadd float %1021, %1020 + %1023 = bitcast i32 %152 to float + %1024 = bitcast i32 %152 to float + %1025 = fmul float %1023, %1024 + %1026 = fadd float %1025, 0.000000e+00 + %1027 = bitcast i32 %807 to float + %1028 = bitcast i32 %807 to float + %1029 = fmul float %1027, %1028 + %1030 = fadd float %1026, %1029 + %1031 = call float @llvm.sqrt.f32.73(float %1030) + %1032 = fneg float %813 + %1033 = fmul float %1031, %1032 + %1034 = fmul float %1033, 0.000000e+00 + %1035 = bitcast i32 %807 to float + %1036 = fadd float %1035, %1034 + %1037 = fmul float %1022, %1036 + %1038 = fadd float %1008, %1037 + %1039 = call float @llvm.sqrt.f32.74(float %1038) + %1040 = fadd float %1039, 0.000000e+00 + %1041 = fdiv float %980, %1040 + %1042 = fmul float %1041, 2.000000e+00 + %1043 = bitcast i32 %152 to float + %1044 = bitcast i32 %152 to float + %1045 = fmul float %1043, %1044 + %1046 = fadd float %1045, 0.000000e+00 + %1047 = bitcast i32 %807 to float + %1048 = bitcast i32 %807 to float + %1049 = fmul float %1047, %1048 + %1050 = fadd float %1046, %1049 + %1051 = call float @llvm.sqrt.f32.75(float %1050) + %1052 = fneg float %813 + %1053 = fmul float %1051, %1052 + %1054 = bitcast i32 %152 to float + %1055 = fadd float %1054, %1053 + %1056 = bitcast i32 %152 to float + %1057 = bitcast i32 %152 to float + %1058 = fmul float %1056, %1057 + %1059 = fadd float %1058, 0.000000e+00 + %1060 = bitcast i32 %807 to float + %1061 = bitcast i32 %807 to float + %1062 = fmul float %1060, %1061 + %1063 = fadd float %1059, %1062 + %1064 = call float @llvm.sqrt.f32.76(float %1063) + %1065 = fneg float %813 + %1066 = fmul float %1064, %1065 + %1067 = bitcast i32 %152 to float + %1068 = fadd float %1067, %1066 + %1069 = bitcast i32 %152 to float + %1070 = bitcast i32 %152 to float + %1071 = fmul float %1069, %1070 + %1072 = fadd float %1071, 0.000000e+00 + %1073 = bitcast i32 %807 to float + %1074 = bitcast i32 %807 to float + %1075 = fmul float %1073, %1074 + %1076 = fadd float %1072, %1075 + %1077 = call float @llvm.sqrt.f32.77(float %1076) + %1078 = fneg float %813 + %1079 = fmul float %1077, %1078 + %1080 = bitcast i32 %152 to float + %1081 = fadd float %1080, %1079 + %1082 = fmul float %1068, %1081 + %1083 = fadd float %1082, 0.000000e+00 + %1084 = bitcast i32 %152 to float + %1085 = bitcast i32 %152 to float + %1086 = fmul float %1084, %1085 + %1087 = fadd float %1086, 0.000000e+00 + %1088 = bitcast i32 %807 to float + %1089 = bitcast i32 %807 to float + %1090 = fmul float %1088, %1089 + %1091 = fadd float %1087, %1090 + %1092 = call float @llvm.sqrt.f32.78(float %1091) + %1093 = fneg float %813 + %1094 = fmul float %1092, %1093 + %1095 = fmul float %1094, 0.000000e+00 + %1096 = bitcast i32 %807 to float + %1097 = fadd float %1096, %1095 + %1098 = bitcast i32 %152 to float + %1099 = bitcast i32 %152 to float + %1100 = fmul float %1098, %1099 + %1101 = fadd float %1100, 0.000000e+00 + %1102 = bitcast i32 %807 to float + %1103 = bitcast i32 %807 to float + %1104 = fmul float %1102, %1103 + %1105 = fadd float %1101, %1104 + %1106 = call float @llvm.sqrt.f32.79(float %1105) + %1107 = fneg float %813 + %1108 = fmul float %1106, %1107 + %1109 = fmul float %1108, 0.000000e+00 + %1110 = bitcast i32 %807 to float + %1111 = fadd float %1110, %1109 + %1112 = fmul float %1097, %1111 + %1113 = fadd float %1083, %1112 + %1114 = call float @llvm.sqrt.f32.80(float %1113) + %1115 = fadd float %1114, 0.000000e+00 + %1116 = fdiv float %1055, %1115 + %1117 = fmul float %1042, %1116 + %1118 = fsub float 1.000000e+00, %1117 + %1119 = fmul float %1118, %961 + %1120 = fadd float %1119, 0.000000e+00 + %1121 = bitcast i32 %152 to float + %1122 = bitcast i32 %152 to float + %1123 = fmul float %1121, %1122 + %1124 = fadd float %1123, 0.000000e+00 + %1125 = bitcast i32 %807 to float + %1126 = bitcast i32 %807 to float + %1127 = fmul float %1125, %1126 + %1128 = fadd float %1124, %1127 + %1129 = call float @llvm.sqrt.f32.81(float %1128) + %1130 = fneg float %813 + %1131 = fmul float %1129, %1130 + %1132 = bitcast i32 %152 to float + %1133 = fadd float %1132, %1131 + %1134 = bitcast i32 %152 to float + %1135 = bitcast i32 %152 to float + %1136 = fmul float %1134, %1135 + %1137 = fadd float %1136, 0.000000e+00 + %1138 = bitcast i32 %807 to float + %1139 = bitcast i32 %807 to float + %1140 = fmul float %1138, %1139 + %1141 = fadd float %1137, %1140 + %1142 = call float @llvm.sqrt.f32.82(float %1141) + %1143 = fneg float %813 + %1144 = fmul float %1142, %1143 + %1145 = bitcast i32 %152 to float + %1146 = fadd float %1145, %1144 + %1147 = bitcast i32 %152 to float + %1148 = bitcast i32 %152 to float + %1149 = fmul float %1147, %1148 + %1150 = fadd float %1149, 0.000000e+00 + %1151 = bitcast i32 %807 to float + %1152 = bitcast i32 %807 to float + %1153 = fmul float %1151, %1152 + %1154 = fadd float %1150, %1153 + %1155 = call float @llvm.sqrt.f32.83(float %1154) + %1156 = fneg float %813 + %1157 = fmul float %1155, %1156 + %1158 = bitcast i32 %152 to float + %1159 = fadd float %1158, %1157 + %1160 = fmul float %1146, %1159 + %1161 = fadd float %1160, 0.000000e+00 + %1162 = bitcast i32 %152 to float + %1163 = bitcast i32 %152 to float + %1164 = fmul float %1162, %1163 + %1165 = fadd float %1164, 0.000000e+00 + %1166 = bitcast i32 %807 to float + %1167 = bitcast i32 %807 to float + %1168 = fmul float %1166, %1167 + %1169 = fadd float %1165, %1168 + %1170 = call float @llvm.sqrt.f32.84(float %1169) + %1171 = fneg float %813 + %1172 = fmul float %1170, %1171 + %1173 = fmul float %1172, 0.000000e+00 + %1174 = bitcast i32 %807 to float + %1175 = fadd float %1174, %1173 + %1176 = bitcast i32 %152 to float + %1177 = bitcast i32 %152 to float + %1178 = fmul float %1176, %1177 + %1179 = fadd float %1178, 0.000000e+00 + %1180 = bitcast i32 %807 to float + %1181 = bitcast i32 %807 to float + %1182 = fmul float %1180, %1181 + %1183 = fadd float %1179, %1182 + %1184 = call float @llvm.sqrt.f32.85(float %1183) + %1185 = fneg float %813 + %1186 = fmul float %1184, %1185 + %1187 = fmul float %1186, 0.000000e+00 + %1188 = bitcast i32 %807 to float + %1189 = fadd float %1188, %1187 + %1190 = fmul float %1175, %1189 + %1191 = fadd float %1161, %1190 + %1192 = call float @llvm.sqrt.f32.86(float %1191) + %1193 = fadd float %1192, 0.000000e+00 + %1194 = fdiv float %1133, %1193 + %1195 = fmul float %1194, 2.000000e+00 + %1196 = bitcast i32 %152 to float + %1197 = bitcast i32 %152 to float + %1198 = fmul float %1196, %1197 + %1199 = fadd float %1198, 0.000000e+00 + %1200 = bitcast i32 %807 to float + %1201 = bitcast i32 %807 to float + %1202 = fmul float %1200, %1201 + %1203 = fadd float %1199, %1202 + %1204 = call float @llvm.sqrt.f32.87(float %1203) + %1205 = fneg float %813 + %1206 = fmul float %1204, %1205 + %1207 = fmul float %1206, 0.000000e+00 + %1208 = bitcast i32 %807 to float + %1209 = fadd float %1208, %1207 + %1210 = bitcast i32 %152 to float + %1211 = bitcast i32 %152 to float + %1212 = fmul float %1210, %1211 + %1213 = fadd float %1212, 0.000000e+00 + %1214 = bitcast i32 %807 to float + %1215 = bitcast i32 %807 to float + %1216 = fmul float %1214, %1215 + %1217 = fadd float %1213, %1216 + %1218 = call float @llvm.sqrt.f32.88(float %1217) + %1219 = fneg float %813 + %1220 = fmul float %1218, %1219 + %1221 = bitcast i32 %152 to float + %1222 = fadd float %1221, %1220 + %1223 = bitcast i32 %152 to float + %1224 = bitcast i32 %152 to float + %1225 = fmul float %1223, %1224 + %1226 = fadd float %1225, 0.000000e+00 + %1227 = bitcast i32 %807 to float + %1228 = bitcast i32 %807 to float + %1229 = fmul float %1227, %1228 + %1230 = fadd float %1226, %1229 + %1231 = call float @llvm.sqrt.f32.89(float %1230) + %1232 = fneg float %813 + %1233 = fmul float %1231, %1232 + %1234 = bitcast i32 %152 to float + %1235 = fadd float %1234, %1233 + %1236 = fmul float %1222, %1235 + %1237 = fadd float %1236, 0.000000e+00 + %1238 = bitcast i32 %152 to float + %1239 = bitcast i32 %152 to float + %1240 = fmul float %1238, %1239 + %1241 = fadd float %1240, 0.000000e+00 + %1242 = bitcast i32 %807 to float + %1243 = bitcast i32 %807 to float + %1244 = fmul float %1242, %1243 + %1245 = fadd float %1241, %1244 + %1246 = call float @llvm.sqrt.f32.90(float %1245) + %1247 = fneg float %813 + %1248 = fmul float %1246, %1247 + %1249 = fmul float %1248, 0.000000e+00 + %1250 = bitcast i32 %807 to float + %1251 = fadd float %1250, %1249 + %1252 = bitcast i32 %152 to float + %1253 = bitcast i32 %152 to float + %1254 = fmul float %1252, %1253 + %1255 = fadd float %1254, 0.000000e+00 + %1256 = bitcast i32 %807 to float + %1257 = bitcast i32 %807 to float + %1258 = fmul float %1256, %1257 + %1259 = fadd float %1255, %1258 + %1260 = call float @llvm.sqrt.f32.91(float %1259) + %1261 = fneg float %813 + %1262 = fmul float %1260, %1261 + %1263 = fmul float %1262, 0.000000e+00 + %1264 = bitcast i32 %807 to float + %1265 = fadd float %1264, %1263 + %1266 = fmul float %1251, %1265 + %1267 = fadd float %1237, %1266 + %1268 = call float @llvm.sqrt.f32.92(float %1267) + %1269 = fadd float %1268, 0.000000e+00 + %1270 = fdiv float %1209, %1269 + %1271 = fmul float %1195, %1270 + %1272 = fneg float %1271 + %1273 = getelementptr float, float* %0, i32 0 + %1274 = getelementptr inbounds float, float* %1273, i64 2 + %1275 = load float, float* %1274, align 4 + %1276 = fmul float %1272, %1275 + %1277 = fadd float %1120, %1276 + %1278 = insertelement <4 x float> zeroinitializer, float %1277, i32 0 + %1279 = insertelement <4 x float> %1278, float 0.000000e+00, i32 1 + %1280 = insertelement <4 x float> %1279, float 0.000000e+00, i32 2 + %1281 = insertelement <4 x float> %1280, float 0.000000e+00, i32 3 + %1282 = extractelement <4 x float> %1281, i32 0 + store float %1282, float* %2, align 4 + %1283 = extractelement <4 x float> %1281, i32 1 + %1284 = getelementptr float, float* %2, i32 0 + %1285 = getelementptr inbounds float, float* %1284, i64 1 + store float %1283, float* %1285, align 4 + %1286 = bitcast i32 %152 to float + %1287 = bitcast i32 %152 to float + %1288 = fmul float %1286, %1287 + %1289 = fadd float %1288, 0.000000e+00 + %1290 = bitcast i32 %807 to float + %1291 = bitcast i32 %807 to float + %1292 = fmul float %1290, %1291 + %1293 = fadd float %1289, %1292 + %1294 = call float @llvm.sqrt.f32.93(float %1293) + %1295 = fneg float %813 + %1296 = fmul float %1294, %1295 + %1297 = bitcast i32 %152 to float + %1298 = fadd float %1297, %1296 + %1299 = bitcast i32 %152 to float + %1300 = bitcast i32 %152 to float + %1301 = fmul float %1299, %1300 + %1302 = fadd float %1301, 0.000000e+00 + %1303 = bitcast i32 %807 to float + %1304 = bitcast i32 %807 to float + %1305 = fmul float %1303, %1304 + %1306 = fadd float %1302, %1305 + %1307 = call float @llvm.sqrt.f32.94(float %1306) + %1308 = fneg float %813 + %1309 = fmul float %1307, %1308 + %1310 = bitcast i32 %152 to float + %1311 = fadd float %1310, %1309 + %1312 = bitcast i32 %152 to float + %1313 = bitcast i32 %152 to float + %1314 = fmul float %1312, %1313 + %1315 = fadd float %1314, 0.000000e+00 + %1316 = bitcast i32 %807 to float + %1317 = bitcast i32 %807 to float + %1318 = fmul float %1316, %1317 + %1319 = fadd float %1315, %1318 + %1320 = call float @llvm.sqrt.f32.95(float %1319) + %1321 = fneg float %813 + %1322 = fmul float %1320, %1321 + %1323 = bitcast i32 %152 to float + %1324 = fadd float %1323, %1322 + %1325 = fmul float %1311, %1324 + %1326 = fadd float %1325, 0.000000e+00 + %1327 = bitcast i32 %152 to float + %1328 = bitcast i32 %152 to float + %1329 = fmul float %1327, %1328 + %1330 = fadd float %1329, 0.000000e+00 + %1331 = bitcast i32 %807 to float + %1332 = bitcast i32 %807 to float + %1333 = fmul float %1331, %1332 + %1334 = fadd float %1330, %1333 + %1335 = call float @llvm.sqrt.f32.96(float %1334) + %1336 = fneg float %813 + %1337 = fmul float %1335, %1336 + %1338 = fmul float %1337, 0.000000e+00 + %1339 = bitcast i32 %807 to float + %1340 = fadd float %1339, %1338 + %1341 = bitcast i32 %152 to float + %1342 = bitcast i32 %152 to float + %1343 = fmul float %1341, %1342 + %1344 = fadd float %1343, 0.000000e+00 + %1345 = bitcast i32 %807 to float + %1346 = bitcast i32 %807 to float + %1347 = fmul float %1345, %1346 + %1348 = fadd float %1344, %1347 + %1349 = call float @llvm.sqrt.f32.97(float %1348) + %1350 = fneg float %813 + %1351 = fmul float %1349, %1350 + %1352 = fmul float %1351, 0.000000e+00 + %1353 = bitcast i32 %807 to float + %1354 = fadd float %1353, %1352 + %1355 = fmul float %1340, %1354 + %1356 = fadd float %1326, %1355 + %1357 = call float @llvm.sqrt.f32.98(float %1356) + %1358 = fadd float %1357, 0.000000e+00 + %1359 = fdiv float %1298, %1358 + %1360 = fmul float %1359, 2.000000e+00 + %1361 = bitcast i32 %152 to float + %1362 = bitcast i32 %152 to float + %1363 = fmul float %1361, %1362 + %1364 = fadd float %1363, 0.000000e+00 + %1365 = bitcast i32 %807 to float + %1366 = bitcast i32 %807 to float + %1367 = fmul float %1365, %1366 + %1368 = fadd float %1364, %1367 + %1369 = call float @llvm.sqrt.f32.99(float %1368) + %1370 = fneg float %813 + %1371 = fmul float %1369, %1370 + %1372 = bitcast i32 %152 to float + %1373 = fadd float %1372, %1371 + %1374 = bitcast i32 %152 to float + %1375 = bitcast i32 %152 to float + %1376 = fmul float %1374, %1375 + %1377 = fadd float %1376, 0.000000e+00 + %1378 = bitcast i32 %807 to float + %1379 = bitcast i32 %807 to float + %1380 = fmul float %1378, %1379 + %1381 = fadd float %1377, %1380 + %1382 = call float @llvm.sqrt.f32.100(float %1381) + %1383 = fneg float %813 + %1384 = fmul float %1382, %1383 + %1385 = bitcast i32 %152 to float + %1386 = fadd float %1385, %1384 + %1387 = bitcast i32 %152 to float + %1388 = bitcast i32 %152 to float + %1389 = fmul float %1387, %1388 + %1390 = fadd float %1389, 0.000000e+00 + %1391 = bitcast i32 %807 to float + %1392 = bitcast i32 %807 to float + %1393 = fmul float %1391, %1392 + %1394 = fadd float %1390, %1393 + %1395 = call float @llvm.sqrt.f32.101(float %1394) + %1396 = fneg float %813 + %1397 = fmul float %1395, %1396 + %1398 = bitcast i32 %152 to float + %1399 = fadd float %1398, %1397 + %1400 = fmul float %1386, %1399 + %1401 = fadd float %1400, 0.000000e+00 + %1402 = bitcast i32 %152 to float + %1403 = bitcast i32 %152 to float + %1404 = fmul float %1402, %1403 + %1405 = fadd float %1404, 0.000000e+00 + %1406 = bitcast i32 %807 to float + %1407 = bitcast i32 %807 to float + %1408 = fmul float %1406, %1407 + %1409 = fadd float %1405, %1408 + %1410 = call float @llvm.sqrt.f32.102(float %1409) + %1411 = fneg float %813 + %1412 = fmul float %1410, %1411 + %1413 = fmul float %1412, 0.000000e+00 + %1414 = bitcast i32 %807 to float + %1415 = fadd float %1414, %1413 + %1416 = bitcast i32 %152 to float + %1417 = bitcast i32 %152 to float + %1418 = fmul float %1416, %1417 + %1419 = fadd float %1418, 0.000000e+00 + %1420 = bitcast i32 %807 to float + %1421 = bitcast i32 %807 to float + %1422 = fmul float %1420, %1421 + %1423 = fadd float %1419, %1422 + %1424 = call float @llvm.sqrt.f32.103(float %1423) + %1425 = fneg float %813 + %1426 = fmul float %1424, %1425 + %1427 = fmul float %1426, 0.000000e+00 + %1428 = bitcast i32 %807 to float + %1429 = fadd float %1428, %1427 + %1430 = fmul float %1415, %1429 + %1431 = fadd float %1401, %1430 + %1432 = call float @llvm.sqrt.f32.104(float %1431) + %1433 = fadd float %1432, 0.000000e+00 + %1434 = fdiv float %1373, %1433 + %1435 = fmul float %1360, %1434 + %1436 = fsub float 1.000000e+00, %1435 + %1437 = insertelement <4 x float> zeroinitializer, float %1436, i32 0 + %1438 = insertelement <4 x float> %1437, float 0.000000e+00, i32 1 + %1439 = insertelement <4 x float> %1438, float 0.000000e+00, i32 2 + %1440 = insertelement <4 x float> %1439, float 0.000000e+00, i32 3 + %1441 = getelementptr float, float* %0, i32 0 + %1442 = getelementptr inbounds float, float* %1441, i64 1 + %1443 = load float, float* %1442, align 4 + %1444 = insertelement <4 x float> zeroinitializer, float %1443, i32 0 + %1445 = insertelement <4 x float> %1444, float 0.000000e+00, i32 1 + %1446 = insertelement <4 x float> %1445, float 0.000000e+00, i32 2 + %1447 = insertelement <4 x float> %1446, float 0.000000e+00, i32 3 + %1448 = call <4 x float> @llvm.fma.f32.105(<4 x float> %1440, <4 x float> %1447, <4 x float> zeroinitializer) + %1449 = extractelement <4 x float> %1448, i32 0 + store float %1449, float* %1285, align 4 + %1450 = bitcast i32 %152 to float + %1451 = bitcast i32 %152 to float + %1452 = fmul float %1450, %1451 + %1453 = fadd float %1452, 0.000000e+00 + %1454 = bitcast i32 %807 to float + %1455 = bitcast i32 %807 to float + %1456 = fmul float %1454, %1455 + %1457 = fadd float %1453, %1456 + %1458 = call float @llvm.sqrt.f32.106(float %1457) + %1459 = fneg float %813 + %1460 = fmul float %1458, %1459 + %1461 = bitcast i32 %152 to float + %1462 = fadd float %1461, %1460 + %1463 = bitcast i32 %152 to float + %1464 = bitcast i32 %152 to float + %1465 = fmul float %1463, %1464 + %1466 = fadd float %1465, 0.000000e+00 + %1467 = bitcast i32 %807 to float + %1468 = bitcast i32 %807 to float + %1469 = fmul float %1467, %1468 + %1470 = fadd float %1466, %1469 + %1471 = call float @llvm.sqrt.f32.107(float %1470) + %1472 = fneg float %813 + %1473 = fmul float %1471, %1472 + %1474 = bitcast i32 %152 to float + %1475 = fadd float %1474, %1473 + %1476 = bitcast i32 %152 to float + %1477 = bitcast i32 %152 to float + %1478 = fmul float %1476, %1477 + %1479 = fadd float %1478, 0.000000e+00 + %1480 = bitcast i32 %807 to float + %1481 = bitcast i32 %807 to float + %1482 = fmul float %1480, %1481 + %1483 = fadd float %1479, %1482 + %1484 = call float @llvm.sqrt.f32.108(float %1483) + %1485 = fneg float %813 + %1486 = fmul float %1484, %1485 + %1487 = bitcast i32 %152 to float + %1488 = fadd float %1487, %1486 + %1489 = fmul float %1475, %1488 + %1490 = fadd float %1489, 0.000000e+00 + %1491 = bitcast i32 %152 to float + %1492 = bitcast i32 %152 to float + %1493 = fmul float %1491, %1492 + %1494 = fadd float %1493, 0.000000e+00 + %1495 = bitcast i32 %807 to float + %1496 = bitcast i32 %807 to float + %1497 = fmul float %1495, %1496 + %1498 = fadd float %1494, %1497 + %1499 = call float @llvm.sqrt.f32.109(float %1498) + %1500 = fneg float %813 + %1501 = fmul float %1499, %1500 + %1502 = fmul float %1501, 0.000000e+00 + %1503 = bitcast i32 %807 to float + %1504 = fadd float %1503, %1502 + %1505 = bitcast i32 %152 to float + %1506 = bitcast i32 %152 to float + %1507 = fmul float %1505, %1506 + %1508 = fadd float %1507, 0.000000e+00 + %1509 = bitcast i32 %807 to float + %1510 = bitcast i32 %807 to float + %1511 = fmul float %1509, %1510 + %1512 = fadd float %1508, %1511 + %1513 = call float @llvm.sqrt.f32.110(float %1512) + %1514 = fneg float %813 + %1515 = fmul float %1513, %1514 + %1516 = fmul float %1515, 0.000000e+00 + %1517 = bitcast i32 %807 to float + %1518 = fadd float %1517, %1516 + %1519 = fmul float %1504, %1518 + %1520 = fadd float %1490, %1519 + %1521 = call float @llvm.sqrt.f32.111(float %1520) + %1522 = fadd float %1521, 0.000000e+00 + %1523 = fdiv float %1462, %1522 + %1524 = fmul float %1523, 2.000000e+00 + %1525 = bitcast i32 %152 to float + %1526 = bitcast i32 %152 to float + %1527 = fmul float %1525, %1526 + %1528 = fadd float %1527, 0.000000e+00 + %1529 = bitcast i32 %807 to float + %1530 = bitcast i32 %807 to float + %1531 = fmul float %1529, %1530 + %1532 = fadd float %1528, %1531 + %1533 = call float @llvm.sqrt.f32.112(float %1532) + %1534 = fneg float %813 + %1535 = fmul float %1533, %1534 + %1536 = bitcast i32 %152 to float + %1537 = fadd float %1536, %1535 + %1538 = bitcast i32 %152 to float + %1539 = bitcast i32 %152 to float + %1540 = fmul float %1538, %1539 + %1541 = fadd float %1540, 0.000000e+00 + %1542 = bitcast i32 %807 to float + %1543 = bitcast i32 %807 to float + %1544 = fmul float %1542, %1543 + %1545 = fadd float %1541, %1544 + %1546 = call float @llvm.sqrt.f32.113(float %1545) + %1547 = fneg float %813 + %1548 = fmul float %1546, %1547 + %1549 = bitcast i32 %152 to float + %1550 = fadd float %1549, %1548 + %1551 = bitcast i32 %152 to float + %1552 = bitcast i32 %152 to float + %1553 = fmul float %1551, %1552 + %1554 = fadd float %1553, 0.000000e+00 + %1555 = bitcast i32 %807 to float + %1556 = bitcast i32 %807 to float + %1557 = fmul float %1555, %1556 + %1558 = fadd float %1554, %1557 + %1559 = call float @llvm.sqrt.f32.114(float %1558) + %1560 = fneg float %813 + %1561 = fmul float %1559, %1560 + %1562 = bitcast i32 %152 to float + %1563 = fadd float %1562, %1561 + %1564 = fmul float %1550, %1563 + %1565 = fadd float %1564, 0.000000e+00 + %1566 = bitcast i32 %152 to float + %1567 = bitcast i32 %152 to float + %1568 = fmul float %1566, %1567 + %1569 = fadd float %1568, 0.000000e+00 + %1570 = bitcast i32 %807 to float + %1571 = bitcast i32 %807 to float + %1572 = fmul float %1570, %1571 + %1573 = fadd float %1569, %1572 + %1574 = call float @llvm.sqrt.f32.115(float %1573) + %1575 = fneg float %813 + %1576 = fmul float %1574, %1575 + %1577 = fmul float %1576, 0.000000e+00 + %1578 = bitcast i32 %807 to float + %1579 = fadd float %1578, %1577 + %1580 = bitcast i32 %152 to float + %1581 = bitcast i32 %152 to float + %1582 = fmul float %1580, %1581 + %1583 = fadd float %1582, 0.000000e+00 + %1584 = bitcast i32 %807 to float + %1585 = bitcast i32 %807 to float + %1586 = fmul float %1584, %1585 + %1587 = fadd float %1583, %1586 + %1588 = call float @llvm.sqrt.f32.116(float %1587) + %1589 = fneg float %813 + %1590 = fmul float %1588, %1589 + %1591 = fmul float %1590, 0.000000e+00 + %1592 = bitcast i32 %807 to float + %1593 = fadd float %1592, %1591 + %1594 = fmul float %1579, %1593 + %1595 = fadd float %1565, %1594 + %1596 = call float @llvm.sqrt.f32.117(float %1595) + %1597 = fadd float %1596, 0.000000e+00 + %1598 = fdiv float %1537, %1597 + %1599 = fmul float %1524, %1598 + %1600 = fsub float 1.000000e+00, %1599 + %1601 = fmul float %1600, %1443 + %1602 = fadd float %1601, 0.000000e+00 + %1603 = bitcast i32 %152 to float + %1604 = bitcast i32 %152 to float + %1605 = fmul float %1603, %1604 + %1606 = fadd float %1605, 0.000000e+00 + %1607 = bitcast i32 %807 to float + %1608 = bitcast i32 %807 to float + %1609 = fmul float %1607, %1608 + %1610 = fadd float %1606, %1609 + %1611 = call float @llvm.sqrt.f32.118(float %1610) + %1612 = fneg float %813 + %1613 = fmul float %1611, %1612 + %1614 = bitcast i32 %152 to float + %1615 = fadd float %1614, %1613 + %1616 = bitcast i32 %152 to float + %1617 = bitcast i32 %152 to float + %1618 = fmul float %1616, %1617 + %1619 = fadd float %1618, 0.000000e+00 + %1620 = bitcast i32 %807 to float + %1621 = bitcast i32 %807 to float + %1622 = fmul float %1620, %1621 + %1623 = fadd float %1619, %1622 + %1624 = call float @llvm.sqrt.f32.119(float %1623) + %1625 = fneg float %813 + %1626 = fmul float %1624, %1625 + %1627 = bitcast i32 %152 to float + %1628 = fadd float %1627, %1626 + %1629 = bitcast i32 %152 to float + %1630 = bitcast i32 %152 to float + %1631 = fmul float %1629, %1630 + %1632 = fadd float %1631, 0.000000e+00 + %1633 = bitcast i32 %807 to float + %1634 = bitcast i32 %807 to float + %1635 = fmul float %1633, %1634 + %1636 = fadd float %1632, %1635 + %1637 = call float @llvm.sqrt.f32.120(float %1636) + %1638 = fneg float %813 + %1639 = fmul float %1637, %1638 + %1640 = bitcast i32 %152 to float + %1641 = fadd float %1640, %1639 + %1642 = fmul float %1628, %1641 + %1643 = fadd float %1642, 0.000000e+00 + %1644 = bitcast i32 %152 to float + %1645 = bitcast i32 %152 to float + %1646 = fmul float %1644, %1645 + %1647 = fadd float %1646, 0.000000e+00 + %1648 = bitcast i32 %807 to float + %1649 = bitcast i32 %807 to float + %1650 = fmul float %1648, %1649 + %1651 = fadd float %1647, %1650 + %1652 = call float @llvm.sqrt.f32.121(float %1651) + %1653 = fneg float %813 + %1654 = fmul float %1652, %1653 + %1655 = fmul float %1654, 0.000000e+00 + %1656 = bitcast i32 %807 to float + %1657 = fadd float %1656, %1655 + %1658 = bitcast i32 %152 to float + %1659 = bitcast i32 %152 to float + %1660 = fmul float %1658, %1659 + %1661 = fadd float %1660, 0.000000e+00 + %1662 = bitcast i32 %807 to float + %1663 = bitcast i32 %807 to float + %1664 = fmul float %1662, %1663 + %1665 = fadd float %1661, %1664 + %1666 = call float @llvm.sqrt.f32.122(float %1665) + %1667 = fneg float %813 + %1668 = fmul float %1666, %1667 + %1669 = fmul float %1668, 0.000000e+00 + %1670 = bitcast i32 %807 to float + %1671 = fadd float %1670, %1669 + %1672 = fmul float %1657, %1671 + %1673 = fadd float %1643, %1672 + %1674 = call float @llvm.sqrt.f32.123(float %1673) + %1675 = fadd float %1674, 0.000000e+00 + %1676 = fdiv float %1615, %1675 + %1677 = fmul float %1676, 2.000000e+00 + %1678 = bitcast i32 %152 to float + %1679 = bitcast i32 %152 to float + %1680 = fmul float %1678, %1679 + %1681 = fadd float %1680, 0.000000e+00 + %1682 = bitcast i32 %807 to float + %1683 = bitcast i32 %807 to float + %1684 = fmul float %1682, %1683 + %1685 = fadd float %1681, %1684 + %1686 = call float @llvm.sqrt.f32.124(float %1685) + %1687 = fneg float %813 + %1688 = fmul float %1686, %1687 + %1689 = fmul float %1688, 0.000000e+00 + %1690 = bitcast i32 %807 to float + %1691 = fadd float %1690, %1689 + %1692 = bitcast i32 %152 to float + %1693 = bitcast i32 %152 to float + %1694 = fmul float %1692, %1693 + %1695 = fadd float %1694, 0.000000e+00 + %1696 = bitcast i32 %807 to float + %1697 = bitcast i32 %807 to float + %1698 = fmul float %1696, %1697 + %1699 = fadd float %1695, %1698 + %1700 = call float @llvm.sqrt.f32.125(float %1699) + %1701 = fneg float %813 + %1702 = fmul float %1700, %1701 + %1703 = bitcast i32 %152 to float + %1704 = fadd float %1703, %1702 + %1705 = bitcast i32 %152 to float + %1706 = bitcast i32 %152 to float + %1707 = fmul float %1705, %1706 + %1708 = fadd float %1707, 0.000000e+00 + %1709 = bitcast i32 %807 to float + %1710 = bitcast i32 %807 to float + %1711 = fmul float %1709, %1710 + %1712 = fadd float %1708, %1711 + %1713 = call float @llvm.sqrt.f32.126(float %1712) + %1714 = fneg float %813 + %1715 = fmul float %1713, %1714 + %1716 = bitcast i32 %152 to float + %1717 = fadd float %1716, %1715 + %1718 = fmul float %1704, %1717 + %1719 = fadd float %1718, 0.000000e+00 + %1720 = bitcast i32 %152 to float + %1721 = bitcast i32 %152 to float + %1722 = fmul float %1720, %1721 + %1723 = fadd float %1722, 0.000000e+00 + %1724 = bitcast i32 %807 to float + %1725 = bitcast i32 %807 to float + %1726 = fmul float %1724, %1725 + %1727 = fadd float %1723, %1726 + %1728 = call float @llvm.sqrt.f32.127(float %1727) + %1729 = fneg float %813 + %1730 = fmul float %1728, %1729 + %1731 = fmul float %1730, 0.000000e+00 + %1732 = bitcast i32 %807 to float + %1733 = fadd float %1732, %1731 + %1734 = bitcast i32 %152 to float + %1735 = bitcast i32 %152 to float + %1736 = fmul float %1734, %1735 + %1737 = fadd float %1736, 0.000000e+00 + %1738 = bitcast i32 %807 to float + %1739 = bitcast i32 %807 to float + %1740 = fmul float %1738, %1739 + %1741 = fadd float %1737, %1740 + %1742 = call float @llvm.sqrt.f32.128(float %1741) + %1743 = fneg float %813 + %1744 = fmul float %1742, %1743 + %1745 = fmul float %1744, 0.000000e+00 + %1746 = bitcast i32 %807 to float + %1747 = fadd float %1746, %1745 + %1748 = fmul float %1733, %1747 + %1749 = fadd float %1719, %1748 + %1750 = call float @llvm.sqrt.f32.129(float %1749) + %1751 = fadd float %1750, 0.000000e+00 + %1752 = fdiv float %1691, %1751 + %1753 = fmul float %1677, %1752 + %1754 = fneg float %1753 + %1755 = load float, float* %144, align 4 + %1756 = fmul float %1754, %1755 + %1757 = fadd float %1602, %1756 + %1758 = insertelement <4 x float> zeroinitializer, float %1757, i32 0 + %1759 = insertelement <4 x float> %1758, float 0.000000e+00, i32 1 + %1760 = insertelement <4 x float> %1759, float 0.000000e+00, i32 2 + %1761 = insertelement <4 x float> %1760, float 0.000000e+00, i32 3 + %1762 = extractelement <4 x float> %1761, i32 0 + store float %1762, float* %1285, align 4 + %1763 = extractelement <4 x float> %1761, i32 1 + %1764 = getelementptr float, float* %2, i32 0 + %1765 = getelementptr inbounds float, float* %1764, i64 2 + store float %1763, float* %1765, align 4 + %1766 = bitcast i32 %152 to float + %1767 = bitcast i32 %152 to float + %1768 = fmul float %1766, %1767 + %1769 = fadd float %1768, 0.000000e+00 + %1770 = bitcast i32 %807 to float + %1771 = bitcast i32 %807 to float + %1772 = fmul float %1770, %1771 + %1773 = fadd float %1769, %1772 + %1774 = call float @llvm.sqrt.f32.130(float %1773) + %1775 = fneg float %813 + %1776 = fmul float %1774, %1775 + %1777 = fmul float %1776, 0.000000e+00 + %1778 = bitcast i32 %807 to float + %1779 = fadd float %1778, %1777 + %1780 = bitcast i32 %152 to float + %1781 = bitcast i32 %152 to float + %1782 = fmul float %1780, %1781 + %1783 = fadd float %1782, 0.000000e+00 + %1784 = bitcast i32 %807 to float + %1785 = bitcast i32 %807 to float + %1786 = fmul float %1784, %1785 + %1787 = fadd float %1783, %1786 + %1788 = call float @llvm.sqrt.f32.131(float %1787) + %1789 = fneg float %813 + %1790 = fmul float %1788, %1789 + %1791 = bitcast i32 %152 to float + %1792 = fadd float %1791, %1790 + %1793 = bitcast i32 %152 to float + %1794 = bitcast i32 %152 to float + %1795 = fmul float %1793, %1794 + %1796 = fadd float %1795, 0.000000e+00 + %1797 = bitcast i32 %807 to float + %1798 = bitcast i32 %807 to float + %1799 = fmul float %1797, %1798 + %1800 = fadd float %1796, %1799 + %1801 = call float @llvm.sqrt.f32.132(float %1800) + %1802 = fneg float %813 + %1803 = fmul float %1801, %1802 + %1804 = bitcast i32 %152 to float + %1805 = fadd float %1804, %1803 + %1806 = fmul float %1792, %1805 + %1807 = fadd float %1806, 0.000000e+00 + %1808 = bitcast i32 %152 to float + %1809 = bitcast i32 %152 to float + %1810 = fmul float %1808, %1809 + %1811 = fadd float %1810, 0.000000e+00 + %1812 = bitcast i32 %807 to float + %1813 = bitcast i32 %807 to float + %1814 = fmul float %1812, %1813 + %1815 = fadd float %1811, %1814 + %1816 = call float @llvm.sqrt.f32.133(float %1815) + %1817 = fneg float %813 + %1818 = fmul float %1816, %1817 + %1819 = fmul float %1818, 0.000000e+00 + %1820 = bitcast i32 %807 to float + %1821 = fadd float %1820, %1819 + %1822 = bitcast i32 %152 to float + %1823 = bitcast i32 %152 to float + %1824 = fmul float %1822, %1823 + %1825 = fadd float %1824, 0.000000e+00 + %1826 = bitcast i32 %807 to float + %1827 = bitcast i32 %807 to float + %1828 = fmul float %1826, %1827 + %1829 = fadd float %1825, %1828 + %1830 = call float @llvm.sqrt.f32.134(float %1829) + %1831 = fneg float %813 + %1832 = fmul float %1830, %1831 + %1833 = fmul float %1832, 0.000000e+00 + %1834 = bitcast i32 %807 to float + %1835 = fadd float %1834, %1833 + %1836 = fmul float %1821, %1835 + %1837 = fadd float %1807, %1836 + %1838 = call float @llvm.sqrt.f32.135(float %1837) + %1839 = fadd float %1838, 0.000000e+00 + %1840 = fdiv float %1779, %1839 + %1841 = fmul float %1840, 2.000000e+00 + %1842 = bitcast i32 %152 to float + %1843 = bitcast i32 %152 to float + %1844 = fmul float %1842, %1843 + %1845 = fadd float %1844, 0.000000e+00 + %1846 = bitcast i32 %807 to float + %1847 = bitcast i32 %807 to float + %1848 = fmul float %1846, %1847 + %1849 = fadd float %1845, %1848 + %1850 = call float @llvm.sqrt.f32.136(float %1849) + %1851 = fneg float %813 + %1852 = fmul float %1850, %1851 + %1853 = bitcast i32 %152 to float + %1854 = fadd float %1853, %1852 + %1855 = bitcast i32 %152 to float + %1856 = bitcast i32 %152 to float + %1857 = fmul float %1855, %1856 + %1858 = fadd float %1857, 0.000000e+00 + %1859 = bitcast i32 %807 to float + %1860 = bitcast i32 %807 to float + %1861 = fmul float %1859, %1860 + %1862 = fadd float %1858, %1861 + %1863 = call float @llvm.sqrt.f32.137(float %1862) + %1864 = fneg float %813 + %1865 = fmul float %1863, %1864 + %1866 = bitcast i32 %152 to float + %1867 = fadd float %1866, %1865 + %1868 = bitcast i32 %152 to float + %1869 = bitcast i32 %152 to float + %1870 = fmul float %1868, %1869 + %1871 = fadd float %1870, 0.000000e+00 + %1872 = bitcast i32 %807 to float + %1873 = bitcast i32 %807 to float + %1874 = fmul float %1872, %1873 + %1875 = fadd float %1871, %1874 + %1876 = call float @llvm.sqrt.f32.138(float %1875) + %1877 = fneg float %813 + %1878 = fmul float %1876, %1877 + %1879 = bitcast i32 %152 to float + %1880 = fadd float %1879, %1878 + %1881 = fmul float %1867, %1880 + %1882 = fadd float %1881, 0.000000e+00 + %1883 = bitcast i32 %152 to float + %1884 = bitcast i32 %152 to float + %1885 = fmul float %1883, %1884 + %1886 = fadd float %1885, 0.000000e+00 + %1887 = bitcast i32 %807 to float + %1888 = bitcast i32 %807 to float + %1889 = fmul float %1887, %1888 + %1890 = fadd float %1886, %1889 + %1891 = call float @llvm.sqrt.f32.139(float %1890) + %1892 = fneg float %813 + %1893 = fmul float %1891, %1892 + %1894 = fmul float %1893, 0.000000e+00 + %1895 = bitcast i32 %807 to float + %1896 = fadd float %1895, %1894 + %1897 = bitcast i32 %152 to float + %1898 = bitcast i32 %152 to float + %1899 = fmul float %1897, %1898 + %1900 = fadd float %1899, 0.000000e+00 + %1901 = bitcast i32 %807 to float + %1902 = bitcast i32 %807 to float + %1903 = fmul float %1901, %1902 + %1904 = fadd float %1900, %1903 + %1905 = call float @llvm.sqrt.f32.140(float %1904) + %1906 = fneg float %813 + %1907 = fmul float %1905, %1906 + %1908 = fmul float %1907, 0.000000e+00 + %1909 = bitcast i32 %807 to float + %1910 = fadd float %1909, %1908 + %1911 = fmul float %1896, %1910 + %1912 = fadd float %1882, %1911 + %1913 = call float @llvm.sqrt.f32.141(float %1912) + %1914 = fadd float %1913, 0.000000e+00 + %1915 = fdiv float %1854, %1914 + %1916 = fmul float %1841, %1915 + %1917 = fneg float %1916 + %1918 = insertelement <4 x float> zeroinitializer, float %1917, i32 0 + %1919 = insertelement <4 x float> %1918, float 0.000000e+00, i32 1 + %1920 = insertelement <4 x float> %1919, float 0.000000e+00, i32 2 + %1921 = insertelement <4 x float> %1920, float 0.000000e+00, i32 3 + %1922 = getelementptr float, float* %0, i32 0 + %1923 = load float, float* %1922, align 4 + %1924 = insertelement <4 x float> zeroinitializer, float %1923, i32 0 + %1925 = insertelement <4 x float> %1924, float 0.000000e+00, i32 1 + %1926 = insertelement <4 x float> %1925, float 0.000000e+00, i32 2 + %1927 = insertelement <4 x float> %1926, float 0.000000e+00, i32 3 + %1928 = call <4 x float> @llvm.fma.f32.142(<4 x float> %1921, <4 x float> %1927, <4 x float> zeroinitializer) + %1929 = extractelement <4 x float> %1928, i32 0 + store float %1929, float* %1765, align 4 + %1930 = bitcast i32 %152 to float + %1931 = bitcast i32 %152 to float + %1932 = fmul float %1930, %1931 + %1933 = fadd float %1932, 0.000000e+00 + %1934 = bitcast i32 %807 to float + %1935 = bitcast i32 %807 to float + %1936 = fmul float %1934, %1935 + %1937 = fadd float %1933, %1936 + %1938 = call float @llvm.sqrt.f32.143(float %1937) + %1939 = fneg float %813 + %1940 = fmul float %1938, %1939 + %1941 = fmul float %1940, 0.000000e+00 + %1942 = bitcast i32 %807 to float + %1943 = fadd float %1942, %1941 + %1944 = bitcast i32 %152 to float + %1945 = bitcast i32 %152 to float + %1946 = fmul float %1944, %1945 + %1947 = fadd float %1946, 0.000000e+00 + %1948 = bitcast i32 %807 to float + %1949 = bitcast i32 %807 to float + %1950 = fmul float %1948, %1949 + %1951 = fadd float %1947, %1950 + %1952 = call float @llvm.sqrt.f32.144(float %1951) + %1953 = fneg float %813 + %1954 = fmul float %1952, %1953 + %1955 = bitcast i32 %152 to float + %1956 = fadd float %1955, %1954 + %1957 = bitcast i32 %152 to float + %1958 = bitcast i32 %152 to float + %1959 = fmul float %1957, %1958 + %1960 = fadd float %1959, 0.000000e+00 + %1961 = bitcast i32 %807 to float + %1962 = bitcast i32 %807 to float + %1963 = fmul float %1961, %1962 + %1964 = fadd float %1960, %1963 + %1965 = call float @llvm.sqrt.f32.145(float %1964) + %1966 = fneg float %813 + %1967 = fmul float %1965, %1966 + %1968 = bitcast i32 %152 to float + %1969 = fadd float %1968, %1967 + %1970 = fmul float %1956, %1969 + %1971 = fadd float %1970, 0.000000e+00 + %1972 = bitcast i32 %152 to float + %1973 = bitcast i32 %152 to float + %1974 = fmul float %1972, %1973 + %1975 = fadd float %1974, 0.000000e+00 + %1976 = bitcast i32 %807 to float + %1977 = bitcast i32 %807 to float + %1978 = fmul float %1976, %1977 + %1979 = fadd float %1975, %1978 + %1980 = call float @llvm.sqrt.f32.146(float %1979) + %1981 = fneg float %813 + %1982 = fmul float %1980, %1981 + %1983 = fmul float %1982, 0.000000e+00 + %1984 = bitcast i32 %807 to float + %1985 = fadd float %1984, %1983 + %1986 = bitcast i32 %152 to float + %1987 = bitcast i32 %152 to float + %1988 = fmul float %1986, %1987 + %1989 = fadd float %1988, 0.000000e+00 + %1990 = bitcast i32 %807 to float + %1991 = bitcast i32 %807 to float + %1992 = fmul float %1990, %1991 + %1993 = fadd float %1989, %1992 + %1994 = call float @llvm.sqrt.f32.147(float %1993) + %1995 = fneg float %813 + %1996 = fmul float %1994, %1995 + %1997 = fmul float %1996, 0.000000e+00 + %1998 = bitcast i32 %807 to float + %1999 = fadd float %1998, %1997 + %2000 = fmul float %1985, %1999 + %2001 = fadd float %1971, %2000 + %2002 = call float @llvm.sqrt.f32.148(float %2001) + %2003 = fadd float %2002, 0.000000e+00 + %2004 = fdiv float %1943, %2003 + %2005 = fmul float %2004, 2.000000e+00 + %2006 = bitcast i32 %152 to float + %2007 = bitcast i32 %152 to float + %2008 = fmul float %2006, %2007 + %2009 = fadd float %2008, 0.000000e+00 + %2010 = bitcast i32 %807 to float + %2011 = bitcast i32 %807 to float + %2012 = fmul float %2010, %2011 + %2013 = fadd float %2009, %2012 + %2014 = call float @llvm.sqrt.f32.149(float %2013) + %2015 = fneg float %813 + %2016 = fmul float %2014, %2015 + %2017 = bitcast i32 %152 to float + %2018 = fadd float %2017, %2016 + %2019 = bitcast i32 %152 to float + %2020 = bitcast i32 %152 to float + %2021 = fmul float %2019, %2020 + %2022 = fadd float %2021, 0.000000e+00 + %2023 = bitcast i32 %807 to float + %2024 = bitcast i32 %807 to float + %2025 = fmul float %2023, %2024 + %2026 = fadd float %2022, %2025 + %2027 = call float @llvm.sqrt.f32.150(float %2026) + %2028 = fneg float %813 + %2029 = fmul float %2027, %2028 + %2030 = bitcast i32 %152 to float + %2031 = fadd float %2030, %2029 + %2032 = bitcast i32 %152 to float + %2033 = bitcast i32 %152 to float + %2034 = fmul float %2032, %2033 + %2035 = fadd float %2034, 0.000000e+00 + %2036 = bitcast i32 %807 to float + %2037 = bitcast i32 %807 to float + %2038 = fmul float %2036, %2037 + %2039 = fadd float %2035, %2038 + %2040 = call float @llvm.sqrt.f32.151(float %2039) + %2041 = fneg float %813 + %2042 = fmul float %2040, %2041 + %2043 = bitcast i32 %152 to float + %2044 = fadd float %2043, %2042 + %2045 = fmul float %2031, %2044 + %2046 = fadd float %2045, 0.000000e+00 + %2047 = bitcast i32 %152 to float + %2048 = bitcast i32 %152 to float + %2049 = fmul float %2047, %2048 + %2050 = fadd float %2049, 0.000000e+00 + %2051 = bitcast i32 %807 to float + %2052 = bitcast i32 %807 to float + %2053 = fmul float %2051, %2052 + %2054 = fadd float %2050, %2053 + %2055 = call float @llvm.sqrt.f32.152(float %2054) + %2056 = fneg float %813 + %2057 = fmul float %2055, %2056 + %2058 = fmul float %2057, 0.000000e+00 + %2059 = bitcast i32 %807 to float + %2060 = fadd float %2059, %2058 + %2061 = bitcast i32 %152 to float + %2062 = bitcast i32 %152 to float + %2063 = fmul float %2061, %2062 + %2064 = fadd float %2063, 0.000000e+00 + %2065 = bitcast i32 %807 to float + %2066 = bitcast i32 %807 to float + %2067 = fmul float %2065, %2066 + %2068 = fadd float %2064, %2067 + %2069 = call float @llvm.sqrt.f32.153(float %2068) + %2070 = fneg float %813 + %2071 = fmul float %2069, %2070 + %2072 = fmul float %2071, 0.000000e+00 + %2073 = bitcast i32 %807 to float + %2074 = fadd float %2073, %2072 + %2075 = fmul float %2060, %2074 + %2076 = fadd float %2046, %2075 + %2077 = call float @llvm.sqrt.f32.154(float %2076) + %2078 = fadd float %2077, 0.000000e+00 + %2079 = fdiv float %2018, %2078 + %2080 = fmul float %2005, %2079 + %2081 = fneg float %2080 + %2082 = fmul float %2081, %1923 + %2083 = fadd float %2082, 0.000000e+00 + %2084 = bitcast i32 %152 to float + %2085 = bitcast i32 %152 to float + %2086 = fmul float %2084, %2085 + %2087 = fadd float %2086, 0.000000e+00 + %2088 = bitcast i32 %807 to float + %2089 = bitcast i32 %807 to float + %2090 = fmul float %2088, %2089 + %2091 = fadd float %2087, %2090 + %2092 = call float @llvm.sqrt.f32.155(float %2091) + %2093 = fneg float %813 + %2094 = fmul float %2092, %2093 + %2095 = fmul float %2094, 0.000000e+00 + %2096 = bitcast i32 %807 to float + %2097 = fadd float %2096, %2095 + %2098 = bitcast i32 %152 to float + %2099 = bitcast i32 %152 to float + %2100 = fmul float %2098, %2099 + %2101 = fadd float %2100, 0.000000e+00 + %2102 = bitcast i32 %807 to float + %2103 = bitcast i32 %807 to float + %2104 = fmul float %2102, %2103 + %2105 = fadd float %2101, %2104 + %2106 = call float @llvm.sqrt.f32.156(float %2105) + %2107 = fneg float %813 + %2108 = fmul float %2106, %2107 + %2109 = bitcast i32 %152 to float + %2110 = fadd float %2109, %2108 + %2111 = bitcast i32 %152 to float + %2112 = bitcast i32 %152 to float + %2113 = fmul float %2111, %2112 + %2114 = fadd float %2113, 0.000000e+00 + %2115 = bitcast i32 %807 to float + %2116 = bitcast i32 %807 to float + %2117 = fmul float %2115, %2116 + %2118 = fadd float %2114, %2117 + %2119 = call float @llvm.sqrt.f32.157(float %2118) + %2120 = fneg float %813 + %2121 = fmul float %2119, %2120 + %2122 = bitcast i32 %152 to float + %2123 = fadd float %2122, %2121 + %2124 = fmul float %2110, %2123 + %2125 = fadd float %2124, 0.000000e+00 + %2126 = bitcast i32 %152 to float + %2127 = bitcast i32 %152 to float + %2128 = fmul float %2126, %2127 + %2129 = fadd float %2128, 0.000000e+00 + %2130 = bitcast i32 %807 to float + %2131 = bitcast i32 %807 to float + %2132 = fmul float %2130, %2131 + %2133 = fadd float %2129, %2132 + %2134 = call float @llvm.sqrt.f32.158(float %2133) + %2135 = fneg float %813 + %2136 = fmul float %2134, %2135 + %2137 = fmul float %2136, 0.000000e+00 + %2138 = bitcast i32 %807 to float + %2139 = fadd float %2138, %2137 + %2140 = bitcast i32 %152 to float + %2141 = bitcast i32 %152 to float + %2142 = fmul float %2140, %2141 + %2143 = fadd float %2142, 0.000000e+00 + %2144 = bitcast i32 %807 to float + %2145 = bitcast i32 %807 to float + %2146 = fmul float %2144, %2145 + %2147 = fadd float %2143, %2146 + %2148 = call float @llvm.sqrt.f32.159(float %2147) + %2149 = fneg float %813 + %2150 = fmul float %2148, %2149 + %2151 = fmul float %2150, 0.000000e+00 + %2152 = bitcast i32 %807 to float + %2153 = fadd float %2152, %2151 + %2154 = fmul float %2139, %2153 + %2155 = fadd float %2125, %2154 + %2156 = call float @llvm.sqrt.f32.160(float %2155) + %2157 = fadd float %2156, 0.000000e+00 + %2158 = fdiv float %2097, %2157 + %2159 = fmul float %2158, 2.000000e+00 + %2160 = bitcast i32 %152 to float + %2161 = bitcast i32 %152 to float + %2162 = fmul float %2160, %2161 + %2163 = fadd float %2162, 0.000000e+00 + %2164 = bitcast i32 %807 to float + %2165 = bitcast i32 %807 to float + %2166 = fmul float %2164, %2165 + %2167 = fadd float %2163, %2166 + %2168 = call float @llvm.sqrt.f32.161(float %2167) + %2169 = fneg float %813 + %2170 = fmul float %2168, %2169 + %2171 = fmul float %2170, 0.000000e+00 + %2172 = bitcast i32 %807 to float + %2173 = fadd float %2172, %2171 + %2174 = bitcast i32 %152 to float + %2175 = bitcast i32 %152 to float + %2176 = fmul float %2174, %2175 + %2177 = fadd float %2176, 0.000000e+00 + %2178 = bitcast i32 %807 to float + %2179 = bitcast i32 %807 to float + %2180 = fmul float %2178, %2179 + %2181 = fadd float %2177, %2180 + %2182 = call float @llvm.sqrt.f32.162(float %2181) + %2183 = fneg float %813 + %2184 = fmul float %2182, %2183 + %2185 = bitcast i32 %152 to float + %2186 = fadd float %2185, %2184 + %2187 = bitcast i32 %152 to float + %2188 = bitcast i32 %152 to float + %2189 = fmul float %2187, %2188 + %2190 = fadd float %2189, 0.000000e+00 + %2191 = bitcast i32 %807 to float + %2192 = bitcast i32 %807 to float + %2193 = fmul float %2191, %2192 + %2194 = fadd float %2190, %2193 + %2195 = call float @llvm.sqrt.f32.163(float %2194) + %2196 = fneg float %813 + %2197 = fmul float %2195, %2196 + %2198 = bitcast i32 %152 to float + %2199 = fadd float %2198, %2197 + %2200 = fmul float %2186, %2199 + %2201 = fadd float %2200, 0.000000e+00 + %2202 = bitcast i32 %152 to float + %2203 = bitcast i32 %152 to float + %2204 = fmul float %2202, %2203 + %2205 = fadd float %2204, 0.000000e+00 + %2206 = bitcast i32 %807 to float + %2207 = bitcast i32 %807 to float + %2208 = fmul float %2206, %2207 + %2209 = fadd float %2205, %2208 + %2210 = call float @llvm.sqrt.f32.164(float %2209) + %2211 = fneg float %813 + %2212 = fmul float %2210, %2211 + %2213 = fmul float %2212, 0.000000e+00 + %2214 = bitcast i32 %807 to float + %2215 = fadd float %2214, %2213 + %2216 = bitcast i32 %152 to float + %2217 = bitcast i32 %152 to float + %2218 = fmul float %2216, %2217 + %2219 = fadd float %2218, 0.000000e+00 + %2220 = bitcast i32 %807 to float + %2221 = bitcast i32 %807 to float + %2222 = fmul float %2220, %2221 + %2223 = fadd float %2219, %2222 + %2224 = call float @llvm.sqrt.f32.165(float %2223) + %2225 = fneg float %813 + %2226 = fmul float %2224, %2225 + %2227 = fmul float %2226, 0.000000e+00 + %2228 = bitcast i32 %807 to float + %2229 = fadd float %2228, %2227 + %2230 = fmul float %2215, %2229 + %2231 = fadd float %2201, %2230 + %2232 = call float @llvm.sqrt.f32.166(float %2231) + %2233 = fadd float %2232, 0.000000e+00 + %2234 = fdiv float %2173, %2233 + %2235 = fmul float %2159, %2234 + %2236 = fsub float 1.000000e+00, %2235 + %2237 = load float, float* %1274, align 4 + %2238 = fmul float %2236, %2237 + %2239 = fadd float %2083, %2238 + %2240 = insertelement <4 x float> zeroinitializer, float %2239, i32 0 + %2241 = insertelement <4 x float> %2240, float 0.000000e+00, i32 1 + %2242 = insertelement <4 x float> %2241, float 0.000000e+00, i32 2 + %2243 = insertelement <4 x float> %2242, float 0.000000e+00, i32 3 + %2244 = extractelement <4 x float> %2243, i32 0 + store float %2244, float* %1765, align 4 + %2245 = extractelement <4 x float> %2243, i32 1 + %2246 = getelementptr float, float* %2, i32 0 + %2247 = getelementptr inbounds float, float* %2246, i64 3 + store float %2245, float* %2247, align 4 + %2248 = bitcast i32 %152 to float + %2249 = bitcast i32 %152 to float + %2250 = fmul float %2248, %2249 + %2251 = fadd float %2250, 0.000000e+00 + %2252 = bitcast i32 %807 to float + %2253 = bitcast i32 %807 to float + %2254 = fmul float %2252, %2253 + %2255 = fadd float %2251, %2254 + %2256 = call float @llvm.sqrt.f32.167(float %2255) + %2257 = fneg float %813 + %2258 = fmul float %2256, %2257 + %2259 = fmul float %2258, 0.000000e+00 + %2260 = bitcast i32 %807 to float + %2261 = fadd float %2260, %2259 + %2262 = bitcast i32 %152 to float + %2263 = bitcast i32 %152 to float + %2264 = fmul float %2262, %2263 + %2265 = fadd float %2264, 0.000000e+00 + %2266 = bitcast i32 %807 to float + %2267 = bitcast i32 %807 to float + %2268 = fmul float %2266, %2267 + %2269 = fadd float %2265, %2268 + %2270 = call float @llvm.sqrt.f32.168(float %2269) + %2271 = fneg float %813 + %2272 = fmul float %2270, %2271 + %2273 = bitcast i32 %152 to float + %2274 = fadd float %2273, %2272 + %2275 = bitcast i32 %152 to float + %2276 = bitcast i32 %152 to float + %2277 = fmul float %2275, %2276 + %2278 = fadd float %2277, 0.000000e+00 + %2279 = bitcast i32 %807 to float + %2280 = bitcast i32 %807 to float + %2281 = fmul float %2279, %2280 + %2282 = fadd float %2278, %2281 + %2283 = call float @llvm.sqrt.f32.169(float %2282) + %2284 = fneg float %813 + %2285 = fmul float %2283, %2284 + %2286 = bitcast i32 %152 to float + %2287 = fadd float %2286, %2285 + %2288 = fmul float %2274, %2287 + %2289 = fadd float %2288, 0.000000e+00 + %2290 = bitcast i32 %152 to float + %2291 = bitcast i32 %152 to float + %2292 = fmul float %2290, %2291 + %2293 = fadd float %2292, 0.000000e+00 + %2294 = bitcast i32 %807 to float + %2295 = bitcast i32 %807 to float + %2296 = fmul float %2294, %2295 + %2297 = fadd float %2293, %2296 + %2298 = call float @llvm.sqrt.f32.170(float %2297) + %2299 = fneg float %813 + %2300 = fmul float %2298, %2299 + %2301 = fmul float %2300, 0.000000e+00 + %2302 = bitcast i32 %807 to float + %2303 = fadd float %2302, %2301 + %2304 = bitcast i32 %152 to float + %2305 = bitcast i32 %152 to float + %2306 = fmul float %2304, %2305 + %2307 = fadd float %2306, 0.000000e+00 + %2308 = bitcast i32 %807 to float + %2309 = bitcast i32 %807 to float + %2310 = fmul float %2308, %2309 + %2311 = fadd float %2307, %2310 + %2312 = call float @llvm.sqrt.f32.171(float %2311) + %2313 = fneg float %813 + %2314 = fmul float %2312, %2313 + %2315 = fmul float %2314, 0.000000e+00 + %2316 = bitcast i32 %807 to float + %2317 = fadd float %2316, %2315 + %2318 = fmul float %2303, %2317 + %2319 = fadd float %2289, %2318 + %2320 = call float @llvm.sqrt.f32.172(float %2319) + %2321 = fadd float %2320, 0.000000e+00 + %2322 = fdiv float %2261, %2321 + %2323 = fmul float %2322, 2.000000e+00 + %2324 = bitcast i32 %152 to float + %2325 = bitcast i32 %152 to float + %2326 = fmul float %2324, %2325 + %2327 = fadd float %2326, 0.000000e+00 + %2328 = bitcast i32 %807 to float + %2329 = bitcast i32 %807 to float + %2330 = fmul float %2328, %2329 + %2331 = fadd float %2327, %2330 + %2332 = call float @llvm.sqrt.f32.173(float %2331) + %2333 = fneg float %813 + %2334 = fmul float %2332, %2333 + %2335 = bitcast i32 %152 to float + %2336 = fadd float %2335, %2334 + %2337 = bitcast i32 %152 to float + %2338 = bitcast i32 %152 to float + %2339 = fmul float %2337, %2338 + %2340 = fadd float %2339, 0.000000e+00 + %2341 = bitcast i32 %807 to float + %2342 = bitcast i32 %807 to float + %2343 = fmul float %2341, %2342 + %2344 = fadd float %2340, %2343 + %2345 = call float @llvm.sqrt.f32.174(float %2344) + %2346 = fneg float %813 + %2347 = fmul float %2345, %2346 + %2348 = bitcast i32 %152 to float + %2349 = fadd float %2348, %2347 + %2350 = bitcast i32 %152 to float + %2351 = bitcast i32 %152 to float + %2352 = fmul float %2350, %2351 + %2353 = fadd float %2352, 0.000000e+00 + %2354 = bitcast i32 %807 to float + %2355 = bitcast i32 %807 to float + %2356 = fmul float %2354, %2355 + %2357 = fadd float %2353, %2356 + %2358 = call float @llvm.sqrt.f32.175(float %2357) + %2359 = fneg float %813 + %2360 = fmul float %2358, %2359 + %2361 = bitcast i32 %152 to float + %2362 = fadd float %2361, %2360 + %2363 = fmul float %2349, %2362 + %2364 = fadd float %2363, 0.000000e+00 + %2365 = bitcast i32 %152 to float + %2366 = bitcast i32 %152 to float + %2367 = fmul float %2365, %2366 + %2368 = fadd float %2367, 0.000000e+00 + %2369 = bitcast i32 %807 to float + %2370 = bitcast i32 %807 to float + %2371 = fmul float %2369, %2370 + %2372 = fadd float %2368, %2371 + %2373 = call float @llvm.sqrt.f32.176(float %2372) + %2374 = fneg float %813 + %2375 = fmul float %2373, %2374 + %2376 = fmul float %2375, 0.000000e+00 + %2377 = bitcast i32 %807 to float + %2378 = fadd float %2377, %2376 + %2379 = bitcast i32 %152 to float + %2380 = bitcast i32 %152 to float + %2381 = fmul float %2379, %2380 + %2382 = fadd float %2381, 0.000000e+00 + %2383 = bitcast i32 %807 to float + %2384 = bitcast i32 %807 to float + %2385 = fmul float %2383, %2384 + %2386 = fadd float %2382, %2385 + %2387 = call float @llvm.sqrt.f32.177(float %2386) + %2388 = fneg float %813 + %2389 = fmul float %2387, %2388 + %2390 = fmul float %2389, 0.000000e+00 + %2391 = bitcast i32 %807 to float + %2392 = fadd float %2391, %2390 + %2393 = fmul float %2378, %2392 + %2394 = fadd float %2364, %2393 + %2395 = call float @llvm.sqrt.f32.178(float %2394) + %2396 = fadd float %2395, 0.000000e+00 + %2397 = fdiv float %2336, %2396 + %2398 = fmul float %2323, %2397 + %2399 = fneg float %2398 + %2400 = insertelement <4 x float> zeroinitializer, float %2399, i32 0 + %2401 = insertelement <4 x float> %2400, float 0.000000e+00, i32 1 + %2402 = insertelement <4 x float> %2401, float 0.000000e+00, i32 2 + %2403 = insertelement <4 x float> %2402, float 0.000000e+00, i32 3 + %2404 = load float, float* %1442, align 4 + %2405 = insertelement <4 x float> zeroinitializer, float %2404, i32 0 + %2406 = insertelement <4 x float> %2405, float 0.000000e+00, i32 1 + %2407 = insertelement <4 x float> %2406, float 0.000000e+00, i32 2 + %2408 = insertelement <4 x float> %2407, float 0.000000e+00, i32 3 + %2409 = call <4 x float> @llvm.fma.f32.179(<4 x float> %2403, <4 x float> %2408, <4 x float> zeroinitializer) + %2410 = extractelement <4 x float> %2409, i32 0 + store float %2410, float* %2247, align 4 + %2411 = bitcast i32 %152 to float + %2412 = bitcast i32 %152 to float + %2413 = fmul float %2411, %2412 + %2414 = fadd float %2413, 0.000000e+00 + %2415 = bitcast i32 %807 to float + %2416 = bitcast i32 %807 to float + %2417 = fmul float %2415, %2416 + %2418 = fadd float %2414, %2417 + %2419 = call float @llvm.sqrt.f32.180(float %2418) + %2420 = fneg float %813 + %2421 = fmul float %2419, %2420 + %2422 = fmul float %2421, 0.000000e+00 + %2423 = bitcast i32 %807 to float + %2424 = fadd float %2423, %2422 + %2425 = bitcast i32 %152 to float + %2426 = bitcast i32 %152 to float + %2427 = fmul float %2425, %2426 + %2428 = fadd float %2427, 0.000000e+00 + %2429 = bitcast i32 %807 to float + %2430 = bitcast i32 %807 to float + %2431 = fmul float %2429, %2430 + %2432 = fadd float %2428, %2431 + %2433 = call float @llvm.sqrt.f32.181(float %2432) + %2434 = fneg float %813 + %2435 = fmul float %2433, %2434 + %2436 = bitcast i32 %152 to float + %2437 = fadd float %2436, %2435 + %2438 = bitcast i32 %152 to float + %2439 = bitcast i32 %152 to float + %2440 = fmul float %2438, %2439 + %2441 = fadd float %2440, 0.000000e+00 + %2442 = bitcast i32 %807 to float + %2443 = bitcast i32 %807 to float + %2444 = fmul float %2442, %2443 + %2445 = fadd float %2441, %2444 + %2446 = call float @llvm.sqrt.f32.182(float %2445) + %2447 = fneg float %813 + %2448 = fmul float %2446, %2447 + %2449 = bitcast i32 %152 to float + %2450 = fadd float %2449, %2448 + %2451 = fmul float %2437, %2450 + %2452 = fadd float %2451, 0.000000e+00 + %2453 = bitcast i32 %152 to float + %2454 = bitcast i32 %152 to float + %2455 = fmul float %2453, %2454 + %2456 = fadd float %2455, 0.000000e+00 + %2457 = bitcast i32 %807 to float + %2458 = bitcast i32 %807 to float + %2459 = fmul float %2457, %2458 + %2460 = fadd float %2456, %2459 + %2461 = call float @llvm.sqrt.f32.183(float %2460) + %2462 = fneg float %813 + %2463 = fmul float %2461, %2462 + %2464 = fmul float %2463, 0.000000e+00 + %2465 = bitcast i32 %807 to float + %2466 = fadd float %2465, %2464 + %2467 = bitcast i32 %152 to float + %2468 = bitcast i32 %152 to float + %2469 = fmul float %2467, %2468 + %2470 = fadd float %2469, 0.000000e+00 + %2471 = bitcast i32 %807 to float + %2472 = bitcast i32 %807 to float + %2473 = fmul float %2471, %2472 + %2474 = fadd float %2470, %2473 + %2475 = call float @llvm.sqrt.f32.184(float %2474) + %2476 = fneg float %813 + %2477 = fmul float %2475, %2476 + %2478 = fmul float %2477, 0.000000e+00 + %2479 = bitcast i32 %807 to float + %2480 = fadd float %2479, %2478 + %2481 = fmul float %2466, %2480 + %2482 = fadd float %2452, %2481 + %2483 = call float @llvm.sqrt.f32.185(float %2482) + %2484 = fadd float %2483, 0.000000e+00 + %2485 = fdiv float %2424, %2484 + %2486 = fmul float %2485, 2.000000e+00 + %2487 = bitcast i32 %152 to float + %2488 = bitcast i32 %152 to float + %2489 = fmul float %2487, %2488 + %2490 = fadd float %2489, 0.000000e+00 + %2491 = bitcast i32 %807 to float + %2492 = bitcast i32 %807 to float + %2493 = fmul float %2491, %2492 + %2494 = fadd float %2490, %2493 + %2495 = call float @llvm.sqrt.f32.186(float %2494) + %2496 = fneg float %813 + %2497 = fmul float %2495, %2496 + %2498 = bitcast i32 %152 to float + %2499 = fadd float %2498, %2497 + %2500 = bitcast i32 %152 to float + %2501 = bitcast i32 %152 to float + %2502 = fmul float %2500, %2501 + %2503 = fadd float %2502, 0.000000e+00 + %2504 = bitcast i32 %807 to float + %2505 = bitcast i32 %807 to float + %2506 = fmul float %2504, %2505 + %2507 = fadd float %2503, %2506 + %2508 = call float @llvm.sqrt.f32.187(float %2507) + %2509 = fneg float %813 + %2510 = fmul float %2508, %2509 + %2511 = bitcast i32 %152 to float + %2512 = fadd float %2511, %2510 + %2513 = bitcast i32 %152 to float + %2514 = bitcast i32 %152 to float + %2515 = fmul float %2513, %2514 + %2516 = fadd float %2515, 0.000000e+00 + %2517 = bitcast i32 %807 to float + %2518 = bitcast i32 %807 to float + %2519 = fmul float %2517, %2518 + %2520 = fadd float %2516, %2519 + %2521 = call float @llvm.sqrt.f32.188(float %2520) + %2522 = fneg float %813 + %2523 = fmul float %2521, %2522 + %2524 = bitcast i32 %152 to float + %2525 = fadd float %2524, %2523 + %2526 = fmul float %2512, %2525 + %2527 = fadd float %2526, 0.000000e+00 + %2528 = bitcast i32 %152 to float + %2529 = bitcast i32 %152 to float + %2530 = fmul float %2528, %2529 + %2531 = fadd float %2530, 0.000000e+00 + %2532 = bitcast i32 %807 to float + %2533 = bitcast i32 %807 to float + %2534 = fmul float %2532, %2533 + %2535 = fadd float %2531, %2534 + %2536 = call float @llvm.sqrt.f32.189(float %2535) + %2537 = fneg float %813 + %2538 = fmul float %2536, %2537 + %2539 = fmul float %2538, 0.000000e+00 + %2540 = bitcast i32 %807 to float + %2541 = fadd float %2540, %2539 + %2542 = bitcast i32 %152 to float + %2543 = bitcast i32 %152 to float + %2544 = fmul float %2542, %2543 + %2545 = fadd float %2544, 0.000000e+00 + %2546 = bitcast i32 %807 to float + %2547 = bitcast i32 %807 to float + %2548 = fmul float %2546, %2547 + %2549 = fadd float %2545, %2548 + %2550 = call float @llvm.sqrt.f32.190(float %2549) + %2551 = fneg float %813 + %2552 = fmul float %2550, %2551 + %2553 = fmul float %2552, 0.000000e+00 + %2554 = bitcast i32 %807 to float + %2555 = fadd float %2554, %2553 + %2556 = fmul float %2541, %2555 + %2557 = fadd float %2527, %2556 + %2558 = call float @llvm.sqrt.f32.191(float %2557) + %2559 = fadd float %2558, 0.000000e+00 + %2560 = fdiv float %2499, %2559 + %2561 = fmul float %2486, %2560 + %2562 = fneg float %2561 + %2563 = fmul float %2562, %2404 + %2564 = fadd float %2563, 0.000000e+00 + %2565 = bitcast i32 %152 to float + %2566 = bitcast i32 %152 to float + %2567 = fmul float %2565, %2566 + %2568 = fadd float %2567, 0.000000e+00 + %2569 = bitcast i32 %807 to float + %2570 = bitcast i32 %807 to float + %2571 = fmul float %2569, %2570 + %2572 = fadd float %2568, %2571 + %2573 = call float @llvm.sqrt.f32.192(float %2572) + %2574 = fneg float %813 + %2575 = fmul float %2573, %2574 + %2576 = fmul float %2575, 0.000000e+00 + %2577 = bitcast i32 %807 to float + %2578 = fadd float %2577, %2576 + %2579 = bitcast i32 %152 to float + %2580 = bitcast i32 %152 to float + %2581 = fmul float %2579, %2580 + %2582 = fadd float %2581, 0.000000e+00 + %2583 = bitcast i32 %807 to float + %2584 = bitcast i32 %807 to float + %2585 = fmul float %2583, %2584 + %2586 = fadd float %2582, %2585 + %2587 = call float @llvm.sqrt.f32.193(float %2586) + %2588 = fneg float %813 + %2589 = fmul float %2587, %2588 + %2590 = bitcast i32 %152 to float + %2591 = fadd float %2590, %2589 + %2592 = bitcast i32 %152 to float + %2593 = bitcast i32 %152 to float + %2594 = fmul float %2592, %2593 + %2595 = fadd float %2594, 0.000000e+00 + %2596 = bitcast i32 %807 to float + %2597 = bitcast i32 %807 to float + %2598 = fmul float %2596, %2597 + %2599 = fadd float %2595, %2598 + %2600 = call float @llvm.sqrt.f32.194(float %2599) + %2601 = fneg float %813 + %2602 = fmul float %2600, %2601 + %2603 = bitcast i32 %152 to float + %2604 = fadd float %2603, %2602 + %2605 = fmul float %2591, %2604 + %2606 = fadd float %2605, 0.000000e+00 + %2607 = bitcast i32 %152 to float + %2608 = bitcast i32 %152 to float + %2609 = fmul float %2607, %2608 + %2610 = fadd float %2609, 0.000000e+00 + %2611 = bitcast i32 %807 to float + %2612 = bitcast i32 %807 to float + %2613 = fmul float %2611, %2612 + %2614 = fadd float %2610, %2613 + %2615 = call float @llvm.sqrt.f32.195(float %2614) + %2616 = fneg float %813 + %2617 = fmul float %2615, %2616 + %2618 = fmul float %2617, 0.000000e+00 + %2619 = bitcast i32 %807 to float + %2620 = fadd float %2619, %2618 + %2621 = bitcast i32 %152 to float + %2622 = bitcast i32 %152 to float + %2623 = fmul float %2621, %2622 + %2624 = fadd float %2623, 0.000000e+00 + %2625 = bitcast i32 %807 to float + %2626 = bitcast i32 %807 to float + %2627 = fmul float %2625, %2626 + %2628 = fadd float %2624, %2627 + %2629 = call float @llvm.sqrt.f32.196(float %2628) + %2630 = fneg float %813 + %2631 = fmul float %2629, %2630 + %2632 = fmul float %2631, 0.000000e+00 + %2633 = bitcast i32 %807 to float + %2634 = fadd float %2633, %2632 + %2635 = fmul float %2620, %2634 + %2636 = fadd float %2606, %2635 + %2637 = call float @llvm.sqrt.f32.197(float %2636) + %2638 = fadd float %2637, 0.000000e+00 + %2639 = fdiv float %2578, %2638 + %2640 = fmul float %2639, 2.000000e+00 + %2641 = bitcast i32 %152 to float + %2642 = bitcast i32 %152 to float + %2643 = fmul float %2641, %2642 + %2644 = fadd float %2643, 0.000000e+00 + %2645 = bitcast i32 %807 to float + %2646 = bitcast i32 %807 to float + %2647 = fmul float %2645, %2646 + %2648 = fadd float %2644, %2647 + %2649 = call float @llvm.sqrt.f32.198(float %2648) + %2650 = fneg float %813 + %2651 = fmul float %2649, %2650 + %2652 = fmul float %2651, 0.000000e+00 + %2653 = bitcast i32 %807 to float + %2654 = fadd float %2653, %2652 + %2655 = bitcast i32 %152 to float + %2656 = bitcast i32 %152 to float + %2657 = fmul float %2655, %2656 + %2658 = fadd float %2657, 0.000000e+00 + %2659 = bitcast i32 %807 to float + %2660 = bitcast i32 %807 to float + %2661 = fmul float %2659, %2660 + %2662 = fadd float %2658, %2661 + %2663 = call float @llvm.sqrt.f32.199(float %2662) + %2664 = fneg float %813 + %2665 = fmul float %2663, %2664 + %2666 = bitcast i32 %152 to float + %2667 = fadd float %2666, %2665 + %2668 = bitcast i32 %152 to float + %2669 = bitcast i32 %152 to float + %2670 = fmul float %2668, %2669 + %2671 = fadd float %2670, 0.000000e+00 + %2672 = bitcast i32 %807 to float + %2673 = bitcast i32 %807 to float + %2674 = fmul float %2672, %2673 + %2675 = fadd float %2671, %2674 + %2676 = call float @llvm.sqrt.f32.200(float %2675) + %2677 = fneg float %813 + %2678 = fmul float %2676, %2677 + %2679 = bitcast i32 %152 to float + %2680 = fadd float %2679, %2678 + %2681 = fmul float %2667, %2680 + %2682 = fadd float %2681, 0.000000e+00 + %2683 = bitcast i32 %152 to float + %2684 = bitcast i32 %152 to float + %2685 = fmul float %2683, %2684 + %2686 = fadd float %2685, 0.000000e+00 + %2687 = bitcast i32 %807 to float + %2688 = bitcast i32 %807 to float + %2689 = fmul float %2687, %2688 + %2690 = fadd float %2686, %2689 + %2691 = call float @llvm.sqrt.f32.201(float %2690) + %2692 = fneg float %813 + %2693 = fmul float %2691, %2692 + %2694 = fmul float %2693, 0.000000e+00 + %2695 = bitcast i32 %807 to float + %2696 = fadd float %2695, %2694 + %2697 = bitcast i32 %152 to float + %2698 = bitcast i32 %152 to float + %2699 = fmul float %2697, %2698 + %2700 = fadd float %2699, 0.000000e+00 + %2701 = bitcast i32 %807 to float + %2702 = bitcast i32 %807 to float + %2703 = fmul float %2701, %2702 + %2704 = fadd float %2700, %2703 + %2705 = call float @llvm.sqrt.f32.202(float %2704) + %2706 = fneg float %813 + %2707 = fmul float %2705, %2706 + %2708 = fmul float %2707, 0.000000e+00 + %2709 = bitcast i32 %807 to float + %2710 = fadd float %2709, %2708 + %2711 = fmul float %2696, %2710 + %2712 = fadd float %2682, %2711 + %2713 = call float @llvm.sqrt.f32.203(float %2712) + %2714 = fadd float %2713, 0.000000e+00 + %2715 = fdiv float %2654, %2714 + %2716 = fmul float %2640, %2715 + %2717 = fsub float 1.000000e+00, %2716 + %2718 = load float, float* %144, align 4 + %2719 = fmul float %2717, %2718 + %2720 = fadd float %2564, %2719 + %2721 = insertelement <4 x float> zeroinitializer, float %2720, i32 0 + %2722 = insertelement <4 x float> %2721, float 0.000000e+00, i32 1 + %2723 = insertelement <4 x float> %2722, float 0.000000e+00, i32 2 + %2724 = insertelement <4 x float> %2723, float 0.000000e+00, i32 3 + %2725 = extractelement <4 x float> %2724, i32 0 + store float %2725, float* %2247, align 4 + %2726 = getelementptr float, float* %1, i32 0 + %2727 = getelementptr inbounds float, float* %2726, i64 2 + %2728 = bitcast float* %2727 to i32* + %2729 = load i32, i32* %2728, align 4 + %2730 = bitcast i32 %2729 to float + %2731 = insertelement <4 x float> zeroinitializer, float %2730, i32 0 + %2732 = getelementptr float, float* %1, i32 0 + %2733 = getelementptr inbounds float, float* %2732, i64 1 + %2734 = bitcast float* %2733 to i32* + %2735 = load i32, i32* %2734, align 4 + %2736 = bitcast i32 %2735 to float + %2737 = insertelement <4 x float> %2731, float %2736, i32 1 + %2738 = insertelement <4 x float> %2737, float 0.000000e+00, i32 2 + %2739 = insertelement <4 x float> %2738, float 0.000000e+00, i32 3 + %2740 = extractelement <4 x float> %2739, i32 0 + %2741 = bitcast i32* %95 to float* + %2742 = bitcast i32* %2734 to float* + store float %2740, float* %2742, align 4 + %2743 = extractelement <4 x float> %2739, i32 1 + %2744 = bitcast i32* %98 to float* + %2745 = bitcast i32* %2728 to float* + store float %2743, float* %2745, align 4 + ret void +} + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { +.preheader13: + %3 = bitcast float* %2 to i8* + %4 = bitcast float* %0 to i8* + %5 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false, i1 true, i1 false) + %6 = call i8* @__memcpy_chk(i8* %3, i8* %4, i64 16, i64 %5) #9 + %7 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 + %8 = bitcast i8* %7 to float* + store float 1.000000e+00, float* %8, align 4 + %9 = getelementptr inbounds i8, i8* %7, i64 8 + %10 = getelementptr inbounds i8, i8* %7, i64 12 + %11 = bitcast i8* %10 to float* + store float 1.000000e+00, float* %11, align 4 + %12 = bitcast float* %1 to i8* + %13 = call i64 @llvm.objectsize.i64.p0i8(i8* %12, i1 false, i1 true, i1 false) + %14 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 + %15 = bitcast i8* %14 to float* + %16 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 + %17 = bitcast i8* %16 to float* + %18 = bitcast float* %2 to i32* + %19 = load i32, i32* %18, align 4 + %20 = bitcast i8* %14 to i32* + store i32 %19, i32* %20, align 4 + %21 = bitcast i8* %7 to i32* + %22 = load i32, i32* %21, align 4 + %23 = bitcast i8* %16 to i32* + store i32 %22, i32* %23, align 4 + %24 = getelementptr inbounds float, float* %2, i64 2 + %25 = bitcast float* %24 to i32* + %26 = load i32, i32* %25, align 4 + %27 = getelementptr inbounds i8, i8* %14, i64 4 + %28 = bitcast i8* %27 to i32* + store i32 %26, i32* %28, align 4 + %29 = bitcast i8* %9 to i32* + %30 = load i32, i32* %29, align 4 + %31 = getelementptr inbounds i8, i8* %16, i64 4 + %32 = bitcast i8* %31 to i32* + store i32 %30, i32* %32, align 4 + %33 = load float, float* %15, align 4 + %34 = call float @no_opt_sgn(float %33) + %35 = fneg float %34 + %36 = call float @no_opt_naive_norm(float* nonnull %15, i32 2) + %37 = fmul float %36, %35 + %38 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 + %39 = bitcast i8* %38 to float* + %40 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 + %41 = load float, float* %15, align 4 + %42 = load float, float* %17, align 4 + %43 = fmul float %37, %42 + %44 = fadd float %41, %43 + store float %44, float* %39, align 4 + %45 = bitcast i8* %27 to float* + %46 = load float, float* %45, align 4 + %47 = bitcast i8* %31 to float* + %48 = load float, float* %47, align 4 + %49 = fmul float %37, %48 + %50 = fadd float %46, %49 + %51 = getelementptr inbounds i8, i8* %38, i64 4 + %52 = bitcast i8* %51 to float* + store float %50, float* %52, align 4 + %53 = bitcast i8* %40 to float* + %54 = call float @no_opt_naive_norm(float* nonnull %39, i32 2) + %55 = fadd float %54, 0x3EE4F8B580000000 + %56 = load float, float* %39, align 4 + %57 = fdiv float %56, %55 + store float %57, float* %53, align 4 + %58 = load float, float* %52, align 4 + %59 = fdiv float %58, %55 + %60 = getelementptr inbounds i8, i8* %40, i64 4 + %61 = bitcast i8* %60 to float* + store float %59, float* %61, align 4 + %62 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 + %63 = bitcast i8* %62 to float* + %64 = load float, float* %53, align 4 + %65 = fmul float %64, 2.000000e+00 + %66 = fmul float %65, %64 + %67 = fsub float 1.000000e+00, %66 + store float %67, float* %63, align 4 + %68 = load float, float* %53, align 4 + %69 = fmul float %68, 2.000000e+00 + %70 = load float, float* %61, align 4 + %71 = fmul float %69, %70 + %72 = fsub float 0.000000e+00, %71 + %73 = getelementptr inbounds i8, i8* %62, i64 4 + %74 = bitcast i8* %73 to float* + store float %72, float* %74, align 4 + %75 = load float, float* %61, align 4 + %76 = fmul float %75, 2.000000e+00 + %77 = load float, float* %53, align 4 + %78 = fmul float %76, %77 + %79 = fsub float 0.000000e+00, %78 + %80 = getelementptr inbounds i8, i8* %62, i64 8 + %81 = bitcast i8* %80 to float* + store float %79, float* %81, align 4 + %82 = load float, float* %61, align 4 + %83 = fmul float %82, 2.000000e+00 + %84 = fmul float %83, %82 + %85 = fsub float 1.000000e+00, %84 + %86 = getelementptr inbounds i8, i8* %62, i64 12 + %87 = bitcast i8* %86 to float* + store float %85, float* %87, align 4 + %88 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 + %89 = bitcast i8* %88 to float* + %90 = bitcast i8* %62 to i32* + %91 = load i32, i32* %90, align 4 + %92 = bitcast i8* %88 to i32* + store i32 %91, i32* %92, align 4 + %93 = bitcast i8* %73 to i32* + %94 = load i32, i32* %93, align 4 + %95 = getelementptr inbounds i8, i8* %88, i64 4 + %96 = bitcast i8* %95 to i32* + store i32 %94, i32* %96, align 4 + %97 = bitcast i8* %80 to i32* + %98 = load i32, i32* %97, align 4 + %99 = getelementptr inbounds i8, i8* %88, i64 8 + %100 = bitcast i8* %99 to i32* + store i32 %98, i32* %100, align 4 + %101 = bitcast i8* %86 to i32* + %102 = load i32, i32* %101, align 4 + %103 = getelementptr inbounds i8, i8* %88, i64 12 + %104 = bitcast i8* %103 to i32* + store i32 %102, i32* %104, align 4 + %105 = call i8* @__memcpy_chk(i8* %12, i8* %88, i64 16, i64 %13) #9 + call void @no_opt_naive_fixed_matrix_multiply(float* %89, float* %0, float* %2) + call void @free(i8* %14) + call void @free(i8* %16) + call void @free(i8* %38) + call void @free(i8* %40) + call void @free(i8* %62) + call void @free(i8* %88) + call void @no_opt_naive_fixed_transpose(float* %1) + ret void +} + +; Function Attrs: nounwind +declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 + +; Function Attrs: nounwind readnone speculatable willreturn +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 + +; Function Attrs: allocsize(0,1) +declare i8* @calloc(i64, i64) #5 + +declare void @free(i8*) #6 + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #1 { +.preheader6: + %0 = alloca i64, align 8 + %1 = alloca [4 x float], align 16 + %2 = alloca [4 x float], align 16 + %3 = alloca [4 x float], align 16 + %4 = alloca [4 x float], align 16 + %5 = alloca [4 x float], align 16 + %6 = call i64 @time(i64* null) #9 + store i64 %6, i64* %0, align 8 + %7 = call i64 @time(i64* nonnull %0) #9 + %8 = trunc i64 %7 to i32 + call void @srand(i32 %8) #9 + %9 = call i32 @rand() #9 + %10 = sitofp i32 %9 to float + %11 = fdiv float %10, 0x41747AE140000000 + %12 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 + store float %11, float* %12, align 16 + %13 = call i32 @rand() #9 + %14 = sitofp i32 %13 to float + %15 = fdiv float %14, 0x41747AE140000000 + %16 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1 + store float %15, float* %16, align 4 + %17 = call i32 @rand() #9 + %18 = sitofp i32 %17 to float + %19 = fdiv float %18, 0x41747AE140000000 + %20 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2 + store float %19, float* %20, align 8 + %21 = call i32 @rand() #9 + %22 = sitofp i32 %21 to float + %23 = fdiv float %22, 0x41747AE140000000 + %24 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3 + store float %23, float* %24, align 4 + %25 = bitcast [4 x float]* %2 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %25, i8 0, i64 16, i1 false) + %26 = bitcast [4 x float]* %3 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %26, i8 0, i64 16, i1 false) + %27 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 + %28 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 + call void @naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %27, float* nonnull %28) + %29 = bitcast [4 x float]* %4 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %29, i8 0, i64 16, i1 false) + %30 = bitcast [4 x float]* %5 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %30, i8 0, i64 16, i1 false) + %31 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 + %32 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 + call void @no_opt_naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %31, float* nonnull %32) + %33 = load float, float* %27, align 16 + %34 = fpext float %33 to double + %35 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %34) #9 + %36 = load float, float* %31, align 16 + %37 = fpext float %36 to double + %38 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %37) #9 + %39 = load float, float* %31, align 16 + %40 = load float, float* %27, align 16 + %41 = fsub float %39, %40 + %42 = call float @llvm.fabs.f32(float %41) + %43 = fcmp uge float %42, 0x3FB99999A0000000 + br i1 %43, label %58, label %44 + +44: ; preds = %.preheader6 + %45 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1 + %46 = load float, float* %45, align 4 + %47 = fpext float %46 to double + %48 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %47) #9 + %49 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 + %50 = load float, float* %49, align 4 + %51 = fpext float %50 to double + %52 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %51) #9 + %53 = load float, float* %31, align 16 + %54 = load float, float* %27, align 16 + %55 = fsub float %53, %54 + %56 = call float @llvm.fabs.f32(float %55) + %57 = fcmp uge float %56, 0x3FB99999A0000000 + br i1 %57, label %58, label %.preheader6.1 + +58: ; preds = %115, %.preheader6.1, %44, %.preheader6 + call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.2, i64 0, i64 0), i32 300, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.3, i64 0, i64 0)) #11 + unreachable + +59: ; preds = %.preheader5 + %60 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1 + %61 = load float, float* %60, align 4 + %62 = fpext float %61 to double + %63 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %62) #9 + %64 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 1 + %65 = load float, float* %64, align 4 + %66 = fpext float %65 to double + %67 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %66) #9 + %68 = load float, float* %32, align 16 + %69 = load float, float* %28, align 16 + %70 = fsub float %68, %69 + %71 = call float @llvm.fabs.f32(float %70) + %72 = fcmp uge float %71, 0x3FB99999A0000000 + br i1 %72, label %73, label %.preheader.1 + +73: ; preds = %.preheader5, %87, %.preheader.1, %59 + call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.2, i64 0, i64 0), i32 307, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.6, i64 0, i64 0)) #11 + unreachable + +.preheader.1: ; preds = %59 + %74 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2 + %75 = load float, float* %74, align 8 + %76 = fpext float %75 to double + %77 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %76) #9 + %78 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 2 + %79 = load float, float* %78, align 8 + %80 = fpext float %79 to double + %81 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %80) #9 + %82 = load float, float* %64, align 4 + %83 = load float, float* %60, align 4 + %84 = fsub float %82, %83 + %85 = call float @llvm.fabs.f32(float %84) + %86 = fcmp uge float %85, 0x3FB99999A0000000 + br i1 %86, label %73, label %87 + +87: ; preds = %.preheader.1 + %88 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3 + %89 = load float, float* %88, align 4 + %90 = fpext float %89 to double + %91 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %90) #9 + %92 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 3 + %93 = load float, float* %92, align 4 + %94 = fpext float %93 to double + %95 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %94) #9 + %96 = load float, float* %64, align 4 + %97 = load float, float* %60, align 4 + %98 = fsub float %96, %97 + %99 = call float @llvm.fabs.f32(float %98) + %100 = fcmp uge float %99, 0x3FB99999A0000000 + br i1 %100, label %73, label %101 + +101: ; preds = %87 + ret i32 0 + +.preheader6.1: ; preds = %44 + %102 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2 + %103 = load float, float* %102, align 8 + %104 = fpext float %103 to double + %105 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %104) #9 + %106 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 + %107 = load float, float* %106, align 8 + %108 = fpext float %107 to double + %109 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %108) #9 + %110 = load float, float* %49, align 4 + %111 = load float, float* %45, align 4 + %112 = fsub float %110, %111 + %113 = call float @llvm.fabs.f32(float %112) + %114 = fcmp uge float %113, 0x3FB99999A0000000 + br i1 %114, label %58, label %115 + +115: ; preds = %.preheader6.1 + %116 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3 + %117 = load float, float* %116, align 4 + %118 = fpext float %117 to double + %119 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %118) #9 + %120 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 + %121 = load float, float* %120, align 4 + %122 = fpext float %121 to double + %123 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %122) #9 + %124 = load float, float* %49, align 4 + %125 = load float, float* %45, align 4 + %126 = fsub float %124, %125 + %127 = call float @llvm.fabs.f32(float %126) + %128 = fcmp uge float %127, 0x3FB99999A0000000 + br i1 %128, label %58, label %.preheader5 + +.preheader5: ; preds = %115 + %129 = load float, float* %28, align 16 + %130 = fpext float %129 to double + %131 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %130) #9 + %132 = load float, float* %32, align 16 + %133 = fpext float %132 to double + %134 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %133) #9 + %135 = load float, float* %32, align 16 + %136 = load float, float* %28, align 16 + %137 = fsub float %135, %136 + %138 = call float @llvm.fabs.f32(float %137) + %139 = fcmp uge float %138, 0x3FB99999A0000000 + br i1 %139, label %73, label %59 +} + +declare i64 @time(i64*) #6 + +declare void @srand(i32) #6 + +declare i32 @rand() #6 + +declare i32 @printf(i8*, ...) #6 + +; Function Attrs: nounwind readnone speculatable willreturn +declare double @llvm.fabs.f64(double) #2 + +; Function Attrs: noreturn +declare void @__assert_rtn(i8*, i8*, i32, i8*) #7 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #8 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.fabs.f32(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.1(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.2(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.3(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.4(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.5(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.6(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.7(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.8(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.9(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.10(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.11(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.12(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.13(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.14(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.15(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.16(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.17(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.18(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.19(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.20(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.21(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.22(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.23(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.24(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.25(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.26(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.27(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.28(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.29(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.30(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.31(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.32(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.33(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.34(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.35(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.36(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.37(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.38(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.39(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.40(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.41(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.42(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.43(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.44(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.45(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.46(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.47(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.48(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.49(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.50(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.51(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.52(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.53(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.54(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.55(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.56(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.57(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.58(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.59(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.60(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.61(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.62(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.63(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.64(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.65(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.66(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.67(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.68(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.69(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.70(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.71(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.72(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.73(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.74(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.75(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.76(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.77(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.78(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.79(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.80(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.81(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.82(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.83(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.84(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.85(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.86(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.87(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.88(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.89(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.90(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.91(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.92(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.93(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.94(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.95(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.96(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.97(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.98(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.99(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.100(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.101(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.102(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.103(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.104(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.105(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.106(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.107(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.108(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.109(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.110(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.111(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.112(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.113(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.114(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.115(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.116(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.117(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.118(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.119(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.120(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.121(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.122(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.123(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.124(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.125(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.126(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.127(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.128(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.129(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.130(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.131(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.132(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.133(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.134(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.135(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.136(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.137(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.138(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.139(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.140(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.141(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.142(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.143(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.144(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.145(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.146(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.147(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.148(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.149(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.150(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.151(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.152(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.153(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.154(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.155(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.156(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.157(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.158(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.159(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.160(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.161(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.162(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.163(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.164(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.165(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.166(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.167(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.168(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.169(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.170(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.171(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.172(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.173(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.174(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.175(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.176(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.177(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.178(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fma.f32.179(<4 x float>, <4 x float>, <4 x float>) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.180(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.181(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.182(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.183(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.184(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.185(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.186(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.187(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.188(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.189(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.190(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.191(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.192(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.193(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.194(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.195(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.196(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.197(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.198(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.199(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.200(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.201(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.202(float) #2 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.sqrt.f32.203(float) #2 + +attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone speculatable willreturn } +attributes #3 = { argmemonly nounwind willreturn writeonly } +attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #7 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #8 = { argmemonly nounwind willreturn } +attributes #9 = { nounwind } +attributes #10 = { nounwind allocsize(0,1) } +attributes #11 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} +!3 = distinct !{!3, !4} +!4 = !{!"llvm.loop.unroll.disable"} +!5 = distinct !{!5, !4} diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/flaky-final b/src/dios-egraphs/Diospyros/flaky-outputs/flaky-final new file mode 100755 index 0000000000000000000000000000000000000000..0555e7480c8c0b64524b5228ee2ab242a3b18ae6 GIT binary patch literal 13676 zcmeHOe{fY-oxgzu0}Y<{N`J80AJ1;FFKh{5?Mkb)FPH}x%!5FQB$#js;RP}#FY*1T zV8tdaujTrP#!-}+g)y_*8Fr_;lXaj*#&%yIC845?rBc>1YpN74Q&uxZYuYN=&*z+T zU*3DL-O)dGW}SB@_nhzdd%nNV_ndp*y*Dqu{a;t!V;Ds<4Wny@VHg7EmJ-8A8a_aV z@i#b4oR(EnxiYvZxPC3ExkphhdQFUa{4iiy8-jHka#^IjFPEmx8*oLQv-GfK#lt(} z)4cimw#-2=82;Kk#Y4P!nw>~jHfmY1cw%cTL(13pyIYmM<(k9AU4c@qZ`wcVvv!2r zI>S*g{>lfMqqu;?dT~c1_RM zXWXv*`<^QYJp0J;6$6%)h}eyXqcN)^jbu+ECX^F&=`pjQhsqF7G2!dx^?lkU2WTd^LRf_`mwC{=3iAl zwd~YCeKk<=osWKh-iJW*;e<;_7&ZeOgjFuDpRo|IheUn&;Xd{BvB^4UcNtES8FSIw z7qHBVrhS1w5ABM@!|fHzE8th_RxsiCAWrHZLw+b*u5sq8a1=67$Uq?jf6W=FHSO2M z##5qVP<%6eXOVbjL{yAO@Xd4LxzVB*Cd9(yjVD^f?R#Vz{u2JB>Bp}a;#-4k*PgV~ z_VG6di^RsXY44si?b`7P;SF9ENnY_TMX>oVP1}QJs&?G$K0bxROeFwb+3{4Hqr29H4YFoNi#>}9cMH7%rqec(Avi6d9X(mlvP z%o3SD*9}P6l<^R0Xd)l+ql_%Ie>B*W|6a;--A<#jW40QO>Dd%%0uJA|3gt;J0|=uW6@&8B~Z_` zBdDVAkQ2slQj{YxN|(&W{gm*B##_Yw9~kk7lOx3_RF<|^)W0D7&{FpTDwcJ=sTfUD z>4doH5`e^zC=d1v|4CTv6Mn3kpF(HjL7F+>T){0S)M62MY7c_3OZbliZajeDo-d$~ z;Un<<1f8FN+4V{IgI)0D=0Gt>A(}11IU6p}srAL8yl0CP$6n{D9dh1DM9e}pUoJ7m&Pzl^GfOc2;l z8f2D8MuvBYGL*erl4OJgo!{UtT}LRrq=<~N)Qz$ddG)L+)p1@AlwrzJ_oOwf4;clmD2^pheo=CbLSU4Kwn-sFf@G$eaY?m(6b_%4H}%mjgn z^XykboPa#!x zm;h3|%eex`wg!OYTTO~$ggcUvB=2%g=UOxwjm)`cBbcUDlH^^^FeK-+Qj%7Zbp;w_ zrwp$5a=*v=@8i`XRq_p42nMbQtjsZEr3$?OB;k z2RZP34wHHX$}#zMui%n|9jG2lr4EY>rB#0c;4jf*M(Tbm{AU4zqwW{rBp1k=6=&TA z;r}B>Cnu_~AML_#-5{>RKpI|z55e=5!aoRrlkcs4S@_RjQ^RFCA@34EYdj*#+p*Dj z6Kd~)q&N6=wxM3w6rKc8G);iWOcP&pCZf!mfp#1>G-#}@O z0Er^XKLJOi4a3wjjGamCys{b+(C7{B!#)V+&^{oId(pJ-ao(AoF&$u)DBIx3vaOA5 z!ZdrrdoYBIH4H&MP&^#q69efq6xOAMA8vcXaIpTo@V_Dai9Sr#uR*(?9$Z8q42hH=!N01*B8MP|er-0{3B_vH?;!|9%(wTcHoEhl2u{ySfWc?nQLLF1HH^A;$#sab zFpf;HfV?#*&_p`5h5@Z8wz+`3E3k(?@Q9rD2T{6{)%X2eBoVw}b`SNTYdqo9!nn9j z-#=kJ^>Wn4Me;3mlRJ#+8pn;`0G~DB(&?D?adUVOwx^tVko~^AYqVeFO{g6lbFKpo z9+W&Qdls5E!0l@M-^utKuhig^x4zEL@vO>h`iF2IqrncAI}R^dqSh;#P$ zGW)!btj{vc!G34$HT*fu+gIma<T3;@PKhz6$ViJn8++Q5J<0cD_p@0A{)I`sTp!|Ffo{vs?>5$ zPAU2_^r(Q{sHv03SVgN;`5>`-tL;bsuGqs<)i1UOT*xic+cY!98du$Tf=?V40X#dhQD$BJ0V4rl=@82Qy(gN=z z?PPoQV+sSH3VDPLOEcpV6VB*4Rz)xoXjb8zwMZEd~fBir41g6*Q367d2ph{)9dP!wF zK|m=5l(xOlhIso;^;4Htu?>35zIujCLh-1gCN_<87tP{P)s|M_Dml5zGA!7N9Zv^-yrm5Li-5)1EI$Nq0by$4+0E4{rp`Y`O)dq56pSx+n>w~ zDWD5o=7jL0+2=an+C;oF5x?KJpk?yYtr4T8skLoUJRFP17JoLn zXj{0sqrG!cTSs$K+oGmuv}sqYq8U6}WB#yKvhLHZjlM-bcICd0`Kp4|c%ixe+V*nE zIcnA`HSDI(Z|e8T>vi9t z`xf1I>i+Y(-=q64>;8c5|55i(>)zF0=nEMrWT23NLIw&MC}g0JfkFlf87O3+kby!5 z3K=M5;QuZI-aF=hn%2Ali=V3&)Ao0PDoZ>v73f2Q1nY*~XDAfMYJQ14z8JoeJ=Yu7&>}D7M;nSmB-F z=0rS%(Eos554e^`VGD zBQuTO8^ILmh_)N8Ev-Al+bm-b{(&N$p+2gqM?oFOSHCmka4pu^5ewsiVtW(5zu#%K zC)(nzoo%~}i$$M}TIz9J1^-{`FVqS0C7cwxbV(TXCz=TYB7-z0JM-Lv~_iH{h!;QJBc_v!I1*DJh6 z<32sUUE^sydm=yoqk{7NUWI?t#gQbAAJuq3%b(GBC*I6*{G!HtwZ7kK-2MNKKL7_G zS>;ipkPJRA4_}an^M4G~pOO#c;Vbg+>O6cy9^R0LZ_UG7^YD&5oG+yDafJM)pR(akt<1o$YI-;@72az6am!PV=)lJ#$?>Q;rDU!pmK)76?oo-T1s zmg~ccsI~O&+OOuxHgff7wyo^Z9FttLmTW95Yn#G0m$gCVY, next_idx: i32, @@ -1447,7 +1469,7 @@ unsafe fn const_to_egg( (enode_vec, next_idx + 1) } -unsafe fn load_arg_to_egg( +unsafe fn _load_arg_to_egg( expr: LLVMValueRef, mut enode_vec: Vec, next_idx: i32, @@ -1693,7 +1715,7 @@ unsafe fn ref_to_egg( llvm_arg_pairs: &LLVMPairMap, node_to_arg: &mut Vec, ) -> (Vec, i32) { - for (original_val, new_val) in llvm_arg_pairs.iter() { + for (original_val, _) in llvm_arg_pairs.iter() { if cmp_val_ref_address(&**original_val, &*expr) { // Here we create a new numbered variable node let var_idx = gen_node_idx(); @@ -1947,7 +1969,7 @@ unsafe fn translate_egg( let node_index = node_arg_pair.node_int; let string_node_index = node_index.to_string(); if string_node_index.parse::().unwrap() == *symbol { - for (original_val, new_val) in &mut *llvm_arg_pairs { + for (original_val, new_val) in (&mut *llvm_arg_pairs).iter() { // let original_llvm = llvm_pair.original_value; // let new_llvm = llvm_pair.new_value; if cmp_val_ref_address(&**original_val, &*llvm_node) { @@ -1982,7 +2004,7 @@ unsafe fn translate_egg( let load_value = if isa_load(*gep_value) { let mut matched = false; let mut matched_expr = *gep_value; - for (original_val, new_val) in &*llvm_arg_pairs { + for (original_val, new_val) in (&*llvm_arg_pairs).iter() { if cmp_val_ref_address(&**original_val, &**gep_value) { matched = true; matched_expr = *new_val; @@ -1996,10 +2018,12 @@ unsafe fn translate_egg( // let cloned_gep = LLVMInstructionClone(addr); let new_gep = llvm_recursive_add(builder, addr, context, llvm_arg_pairs); let new_load = LLVMBuildLoad(builder, new_gep, b"\0".as_ptr() as *const _); - let llvm_pair = LLVMPair { - original_value: *gep_value, - new_value: new_load, - }; + // let llvm_pair = LLVMPair { + // original_value: *gep_value, + // new_value: new_load, + // }; + assert!(isa_load(*gep_value) || isa_alloca(*gep_value)); + assert!(isa_load(new_load) || isa_alloca(new_load)); llvm_arg_pairs.insert(*gep_value, new_load); new_load } @@ -2034,7 +2058,7 @@ unsafe fn translate_egg( // includes isa_alloca case let mut matched = false; let mut matched_expr = *gep_value; - for (original_val, new_val) in &*llvm_arg_pairs { + for (original_val, new_val) in (&*llvm_arg_pairs).iter() { if cmp_val_ref_address(&**original_val, &**gep_value) { matched = true; matched_expr = *new_val; @@ -2045,19 +2069,21 @@ unsafe fn translate_egg( matched_expr } else { let new_load_value = LLVMBuildLoad(builder, *gep_value, b"\0".as_ptr() as *const _); - let llvm_pair = LLVMPair { - original_value: *gep_value, - new_value: new_load_value, - }; + // let llvm_pair = LLVMPair { + // original_value: *gep_value, + // new_value: new_load_value, + // }; + assert!(isa_load(*gep_value) || isa_alloca(*gep_value)); + assert!(isa_load(new_load_value) || isa_alloca(new_load_value)); llvm_arg_pairs.insert(*gep_value, new_load_value); new_load_value } }; - let llvm_pair = LLVMPair { - original_value: *gep_value, - new_value: load_value, - }; - llvm_arg_pairs.insert(*gep_value, load_value); + // let llvm_pair = LLVMPair { + // original_value: *gep_value, + // new_value: load_value, + // }; + // llvm_arg_pairs.insert(*gep_value, load_value); load_value } VecLang::LitVec(boxed_ids) | VecLang::Vec(boxed_ids) | VecLang::List(boxed_ids) => { From 93074a92f799c2fdadabba0fcbc4f6ed890a50de Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 17 Mar 2022 21:10:06 -0400 Subject: [PATCH 065/143] delete dead commented out code --- src/dios-egraphs/Diospyros/c-tests/turnt.toml | 1 + src/dios-egraphs/Diospyros/diospyros.cpp | 41 +- .../Diospyros/fail-tests/test2-local-array.c | 276 ++++++ .../Diospyros/fail-tests/test3-local-array.c | 314 ++++++ .../Diospyros/llvm-tests/turnt.toml | 1 + .../Diospyros/randomized-tests/turnt.toml | 1 + src/dios-egraphs/Diospyros/runt_FAIL.py | 4 + src/dios-egraphs/Diospyros/src/lib.rs | 923 ++---------------- src/dios-egraphs/Diospyros/test-runner.sh | 3 + 9 files changed, 686 insertions(+), 878 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/c-tests/turnt.toml create mode 100644 src/dios-egraphs/Diospyros/fail-tests/test2-local-array.c create mode 100644 src/dios-egraphs/Diospyros/fail-tests/test3-local-array.c create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/turnt.toml create mode 100644 src/dios-egraphs/Diospyros/randomized-tests/turnt.toml create mode 100644 src/dios-egraphs/Diospyros/test-runner.sh diff --git a/src/dios-egraphs/Diospyros/c-tests/turnt.toml b/src/dios-egraphs/Diospyros/c-tests/turnt.toml new file mode 100644 index 00000000..9a0ac4b9 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/turnt.toml @@ -0,0 +1 @@ +command = "bash ../test-runner.sh c-tests/{filename}" diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index 884fc280..63f0ad67 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -592,8 +592,9 @@ Instruction *dfs_instructions(Instruction *current_instr, } } LLVMPair new_pair; - assert(isa(current_instr) || isa(current_instr)); - assert(isa(cloned_instr) || isa(cloned_instr)); + // assert(isa(current_instr) || + // isa(current_instr)); assert(isa(cloned_instr) + // || isa(cloned_instr)); new_pair.original_value = wrap(current_instr); new_pair.new_value = wrap(cloned_instr); translated_exprs.push_back(new_pair); @@ -613,26 +614,26 @@ Instruction *dfs_instructions(Instruction *current_instr, } } - if (isa(current_instr)) { - bool load_in_map = false; - for (LLVMPair pair : translated_exprs) { - Instruction *original_val = - dyn_cast(unwrap(pair.original_value)); - if (current_instr == original_val) { - load_in_map = true; - } - } - if (!load_in_map) { - LLVMPair new_pair; - assert(isa(current_instr) || - isa(current_instr)); - assert(isa(cloned_instr) || - isa(cloned_instr)); - new_pair.original_value = wrap(current_instr); - new_pair.new_value = wrap(cloned_instr); - translated_exprs.push_back(new_pair); + // if (isa(current_instr)) { + bool in_map = false; + for (LLVMPair pair : translated_exprs) { + Instruction *original_val = + dyn_cast(unwrap(pair.original_value)); + if (current_instr == original_val) { + in_map = true; } } + if (!in_map) { + LLVMPair new_pair; + // assert(isa(current_instr) || + // isa(current_instr)); + // assert(isa(cloned_instr) || + // isa(cloned_instr)); + new_pair.original_value = wrap(current_instr); + new_pair.new_value = wrap(cloned_instr); + translated_exprs.push_back(new_pair); + } + // } BasicBlock::InstListType &intermediate_instrs = B->getInstList(); intermediate_instrs.push_back(cloned_instr); return cloned_instr; diff --git a/src/dios-egraphs/Diospyros/fail-tests/test2-local-array.c b/src/dios-egraphs/Diospyros/fail-tests/test2-local-array.c new file mode 100644 index 00000000..4339b38b --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/test2-local-array.c @@ -0,0 +1,276 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 3 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + float I[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float e[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + float u[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float v[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + float q_min[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + float q_t[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + printf("%f\n", A[i]); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + // assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + // assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test3-local-array.c b/src/dios-egraphs/Diospyros/fail-tests/test3-local-array.c new file mode 100644 index 00000000..7adc9634 --- /dev/null +++ b/src/dios-egraphs/Diospyros/fail-tests/test3-local-array.c @@ -0,0 +1,314 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 3 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + float I[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float e[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + float u[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float v[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + float q_min[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + float q_t[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + // else { + // // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE + // * + // // SIZE); + // float res[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + // for (int i = 0; i < SIZE * SIZE; i++) { + // res[i] = 0.0f; + // } + // naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + // for (int i = 0; i < SIZE * SIZE; i++) { + // Q[i] = res[i]; + // } + // naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + // for (int i = 0; i < SIZE * SIZE; i++) { + // R[i] = res[i]; + // } + // } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + // else { + // float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + // no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // memcpy(Q, res, sizeof(float) * SIZE * SIZE); + // no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // memcpy(R, res, sizeof(float) * SIZE * SIZE); + // } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + printf("%f\n", A[i]); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + // assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + // assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/turnt.toml b/src/dios-egraphs/Diospyros/llvm-tests/turnt.toml new file mode 100644 index 00000000..10455d53 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/turnt.toml @@ -0,0 +1 @@ +command = "bash ../test-runner.sh llvm-tests/{filename}" diff --git a/src/dios-egraphs/Diospyros/randomized-tests/turnt.toml b/src/dios-egraphs/Diospyros/randomized-tests/turnt.toml new file mode 100644 index 00000000..8f5b227e --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/turnt.toml @@ -0,0 +1 @@ +command = "bash ../test-runner.sh randomized-tests/{filename}" diff --git a/src/dios-egraphs/Diospyros/runt_FAIL.py b/src/dios-egraphs/Diospyros/runt_FAIL.py index 8d7bb4ae..15fdaef9 100644 --- a/src/dios-egraphs/Diospyros/runt_FAIL.py +++ b/src/dios-egraphs/Diospyros/runt_FAIL.py @@ -2,6 +2,10 @@ import subprocess import sys +# I'd like for this to be able to detect if there is an "error" in a file +# e.g. an assertion error +# I'd like to grep for an error strings, and then report if the test passed or failed. + @click.command() @click.argument('test_file', diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 11fd9699..02c84153 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -51,30 +51,10 @@ extern "C" { // LLVM Value Ref that LLVM Generated type GEPMap = BTreeMap<(Symbol, Symbol), LLVMValueRef>; type LLVMPairMap = HashMap; -// VarMap : Maps a symbol to a llvm value ref representing a variable -// type VarMap = BTreeMap; -// // BopMap : Maps a binary oeprator llvm value ref to an ID, indicating a -// // binary operator has been seen. Binary Operators be ordered in the order -// // they were generated in LLVM, which is earliest to latest in code. -// type BopMap = BTreeMap; -// // ValueVec : A vector of LLVM Value Refs for which we must do extract element -// // for after vectorization. -// type ValueVec = Vec; - -// const SQRT_OPERATOR: i32 = 3; -// const BINARY_OPERATOR: i32 = 2; -// static mut SYMBOL_IDX: i32 = 0; + static mut ARG_IDX: i32 = 0; static mut CALL_IDX: i32 = 0; static mut NODE_IDX: u32 = 0; -// static mut PHI_IDX: u32 = 0; - -// unsafe fn gen_symbol_name() -> String { -// SYMBOL_IDX += 1; -// let string = "SYMBOL".to_string(); -// let result = format!("{}{}", string, SYMBOL_IDX.to_string()); -// result -// } unsafe fn gen_node_idx() -> u32 { NODE_IDX += 1; @@ -95,13 +75,6 @@ unsafe fn gen_call_name() -> String { result } -// unsafe fn gen_phi_name() -> String { -// PHI_IDX += 1; -// let string = "PHI".to_string(); -// let result = format!("{}{}", string, PHI_IDX.to_string()); -// result -// } - // Reference Comparison: https://www.reddit.com/r/rust/comments/2r3wjk/is_there_way_to_compare_objects_by_address_in_rust/ // Compares whether addresses of LLVMValueRefs are the same. // Not the contents of the Value Refs @@ -124,270 +97,6 @@ unsafe fn choose_binop(bop: &LLVMValueRef, ids: [Id; 2]) -> VecLang { } } -// /// Convert the sqrt into a unique symbol, which maps to the sqet argument LLVMValueRef -// /// And then Make sqrt point to that unique symbol. -// /// On the other side, the symbol gets retranslted to the LLVMValueRef argument that came in -// /// and then the sqrt takes the square root of it. -// unsafe fn to_expr_sqrt( -// sqrt_ref: &LLVMValueRef, -// var_map: &mut VarMap, -// enode_vec: &mut Vec, -// ) -> () { -// let symbol = Symbol::from(gen_symbol_name()); -// enode_vec.push(VecLang::Symbol(symbol)); -// let symbol_idx = enode_vec.len() - 1; -// var_map.insert(symbol, *sqrt_ref); -// enode_vec.push(VecLang::Sqrt([Id::from(symbol_idx)])); -// } - -// /// Converts LLVMValueRef constant to a VecLang Num. -// unsafe fn to_expr_constant( -// operand: &LLVMValueRef, -// vec: &mut Vec, -// ids: &mut [egg::Id; 2], -// id_index: usize, -// ) -> () { -// let value = get_constant_float(*operand); -// vec.push(VecLang::Num(value as i32)); -// ids[id_index] = Id::from(vec.len() - 1); -// } - -// /// Converts LLVMValueRef GEP to a VecLang Symbol with variable name. -// unsafe fn to_expr_var( -// var_operand: &LLVMValueRef, -// enode_vec: &mut Vec, -// ids: &mut [egg::Id; 2], -// id_index: usize, -// var_map: &mut VarMap, -// ) -> () { -// let var_name = CStr::from_ptr(llvm_name(*var_operand)).to_str().unwrap(); -// let symbol = Symbol::from(var_name); -// enode_vec.push(VecLang::Symbol(symbol)); -// ids[id_index] = Id::from(enode_vec.len() - 1); -// (*var_map).insert(symbol, *var_operand); -// } - -// /// Converts LLVMValueRef GEP to a VecLang Get and VecLang Symbol for array -// /// and VecLang Symbol for offset. -// unsafe fn to_expr_gep( -// gep_operand: &LLVMValueRef, -// ids: &mut [egg::Id; 2], -// id_index: usize, -// enode_vec: &mut Vec, -// gep_map: &mut GEPMap, -// ) -> () { -// let array_var_name = CStr::from_ptr(llvm_name(*gep_operand)).to_str().unwrap(); -// enode_vec.push(VecLang::Symbol(Symbol::from(array_var_name))); -// let array_var_idx = enode_vec.len() - 1; -// // --- get offsets for multidimensional arrays ---- -// let num_gep_operands = LLVMGetNumOperands(*gep_operand); -// let mut indices = Vec::new(); -// for operand_idx in 1..num_gep_operands { -// let array_offset = llvm_index(*gep_operand, operand_idx); -// indices.push(array_offset); -// } -// let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); -// let offsets_symbol = Symbol::from(&offsets_string); -// enode_vec.push(VecLang::Symbol(offsets_symbol)); -// let array_offset_idx = enode_vec.len() - 1; - -// enode_vec.push(VecLang::Get([ -// Id::from(array_var_idx), -// Id::from(array_offset_idx), -// ])); - -// ids[id_index] = Id::from(enode_vec.len() - 1); - -// let array_name_symbol = Symbol::from(array_var_name); -// gep_map.insert((array_name_symbol, offsets_symbol), *gep_operand); -// } - -// /// Makes binary operators as "used", which means that no extract is needed -// /// for these binary operators. -// /// -// /// For example: -// /// x = (3 + z) + (2 + y) -// /// will record 3 + z, 2 + y as used in the final addition (3 + z) + (2 + y)/ -// /// Only 1 extraction is needed (to assign to x's location). Not 3. -// unsafe fn mark_used_bops( -// operand: &LLVMValueRef, -// ids: &mut [egg::Id; 2], -// id_index: usize, -// bop_map: &mut BopMap, -// used_bop_ids: &mut Vec, -// ) -> bool { -// let mut changed = false; -// for (&prev_used_bop, &mut prev_used_id) in bop_map { -// if dfs_llvm_value_ref(*operand, prev_used_bop) { -// ids[id_index] = prev_used_id; -// used_bop_ids.push(prev_used_id); -// changed |= true; -// } -// } -// return changed; -// } - -// /// Converts LLVMValueRef operand to corresponding VecLang node -// unsafe fn to_expr_operand( -// operand: &LLVMValueRef, -// bop_map: &mut BopMap, -// ids: &mut [egg::Id; 2], -// id_index: usize, -// used_bop_ids: &mut Vec, -// enode_vec: &mut Vec, -// gep_map: &mut GEPMap, -// var_map: &mut VarMap, -// ) -> () { -// let removed_bops = mark_used_bops(operand, ids, id_index, bop_map, used_bop_ids); -// if removed_bops { -// return (); -// } -// if bop_map.contains_key(&operand) { -// let used_id = *bop_map.get(&operand).expect("Expected key in map"); -// ids[id_index] = used_id; -// used_bop_ids.push(used_id); -// } else if isa_bop(*operand) { -// } else if isa_constant(*operand) { -// to_expr_constant(&operand, enode_vec, ids, id_index); -// } else if isa_load(*operand) { -// let inner_operand = LLVMGetOperand(*operand, 0); -// if isa_gep(inner_operand) { -// to_expr_gep(&inner_operand, ids, id_index, enode_vec, gep_map); -// } else { -// // assume load of some temporary/global variable -// to_expr_var(operand, enode_vec, ids, id_index, var_map); -// } -// } else { -// panic!("Cannot handle LLVM IR Operand.") -// } -// } - -// /// Pads a vector to be always the Vector Lane Width. -// fn pad_vector(binop_vec: &Vec, enode_vec: &mut Vec) -> () { -// let width = config::vector_width(); -// let mut length = binop_vec.len(); -// let mut vec_indices = Vec::new(); -// let mut idx = 0; -// while length > width { -// let mut width_vec = Vec::new(); -// for _ in 0..width { -// width_vec.push(binop_vec[idx]); -// idx += 1; -// length -= 1; -// } -// enode_vec.push(VecLang::Vec(width_vec.into_boxed_slice())); -// vec_indices.push(enode_vec.len() - 1); -// } -// // wrap up extras at end -// let diff = width - length; -// let mut extras = Vec::new(); -// for _ in 0..diff { -// enode_vec.push(VecLang::Num(0)); -// extras.push(enode_vec.len() - 1); -// } -// let mut final_vec = Vec::new(); -// let original_length = binop_vec.len(); -// for i in idx..original_length { -// final_vec.push(binop_vec[i]); -// } -// for id in extras.iter() { -// final_vec.push(Id::from(*id)); -// } -// enode_vec.push(VecLang::Vec(final_vec.into_boxed_slice())); -// vec_indices.push(enode_vec.len() - 1); -// // create concats -// let mut num_concats = vec_indices.len() - 1; -// let mut idx = 0; -// let mut prev_id = Id::from(vec_indices[idx]); -// idx += 1; -// while num_concats > 0 { -// let concat = VecLang::Concat([prev_id, Id::from(vec_indices[idx])]); -// enode_vec.push(concat); -// prev_id = Id::from(enode_vec.len() - 1); -// idx += 1; -// num_concats -= 1; -// } -// } - -// /// Converts LLVMValueRef to a corresponding VecLang expression, as well as a GEPMap, -// /// which maps each LLVM gep expression to a symbol representing the array name -// /// and a symbol representing the array offset, a var map, which maps a symbol to the -// /// LLVMValueRef representing the variable, and a ValueVec, which reprsents -// /// the values we generate extract instructions on. -// pub fn to_expr( -// bb_vec: &[LLVMValueRef], -// operand_types: &[i32], -// ) -> (RecExpr, GEPMap, VarMap, ValueVec) { -// let (mut enode_vec, mut bops_vec, mut ops_to_replace, mut used_bop_ids) = -// (Vec::new(), Vec::new(), Vec::new(), Vec::new()); -// let (mut gep_map, mut var_map, mut bop_map) = (BTreeMap::new(), BTreeMap::new(), BTreeMap::new()); -// let mut ids = [Id::from(0); 2]; -// for (i, bop) in bb_vec.iter().enumerate() { -// unsafe { -// if operand_types[i] == BINARY_OPERATOR { -// // to_expr on left and then right operands -// to_expr_operand( -// &LLVMGetOperand(*bop, 0), -// &mut bop_map, -// &mut ids, -// 0, -// &mut used_bop_ids, -// &mut enode_vec, -// &mut gep_map, -// &mut var_map, -// ); -// to_expr_operand( -// &LLVMGetOperand(*bop, 1), -// &mut bop_map, -// &mut ids, -// 1, -// &mut used_bop_ids, -// &mut enode_vec, -// &mut gep_map, -// &mut var_map, -// ); -// // lhs bop rhs -// enode_vec.push(choose_binop(bop, ids)); -// } else if operand_types[i] == SQRT_OPERATOR { -// // currently fails to generate correct code or optimize. -// // to_expr_sqrt(bop, &mut var_map, &mut enode_vec); -// } -// } -// // add in the binary/unary operator to the bops_vec list -// let id = Id::from(enode_vec.len() - 1); -// bops_vec.push(id); -// ops_to_replace.push((*bop, id)); -// bop_map.insert(*bop, id); -// // remove binops that are used as part of another binop -// for used_id in used_bop_ids.iter() { -// if bops_vec.contains(used_id) { -// let index = bops_vec -// .iter() -// .position(|&_id| _id == *used_id) -// .expect("Require used_id in vector"); -// bops_vec.remove(index); -// } -// } -// } -// // decompose bops_vec into width number of binops -// pad_vector(&bops_vec, &mut enode_vec); - -// // remove binary ops that were used, and thus not the ones we want to replace directly -// let mut final_ops_to_replace = Vec::new(); -// for (bop, id) in ops_to_replace.iter() { -// if !used_bop_ids.contains(id) { -// final_ops_to_replace.push(*bop); -// } -// } - -// return ( -// RecExpr::from(enode_vec), -// gep_map, -// var_map, -// final_ops_to_replace, -// ); -// } - /// Translates VecLang binop expression node to the corresponding LLVMValueRef unsafe fn translate_binop( enode: &VecLang, @@ -457,255 +166,6 @@ unsafe fn translate_get(get: &VecLang, enode_vec: &[VecLang]) -> (Symbol, Symbol } } -// /// translate converts a VecLang expression to the corresponding LLVMValueRef. -// unsafe fn translate( -// enode: &VecLang, -// vec: &[VecLang], -// gep_map: &GEPMap, -// var_map: &VarMap, -// builder: LLVMBuilderRef, -// module: LLVMModuleRef, -// ) -> LLVMValueRef { -// match enode { -// VecLang::Symbol(s) => *var_map.get(s).expect("Var map lookup error"), -// VecLang::Num(n) => LLVMConstReal(LLVMFloatTypeInContext(context), *n as f64), -// VecLang::Get(..) => { -// let (array_name, array_offsets) = translate_get(enode, vec); -// let gep_value = gep_map -// .get(&(array_name, array_offsets)) -// .expect("Symbol map lookup error"); -// LLVMBuildLoad(builder, *gep_value, b"\0".as_ptr() as *const _) -// } -// VecLang::LitVec(boxed_ids) | VecLang::Vec(boxed_ids) | VecLang::List(boxed_ids) => { -// let idvec = boxed_ids.to_vec(); -// let idvec_len = idvec.len(); -// let mut zeros = Vec::new(); -// for _ in 0..idvec_len { -// zeros.push(LLVMConstReal(LLVMFloatTypeInContext(context), 0 as f64)); -// } -// let zeros_ptr = zeros.as_mut_ptr(); -// let mut vector = LLVMConstVector(zeros_ptr, idvec.len() as u32); -// for (idx, &eggid) in idvec.iter().enumerate() { -// let elt = &vec[usize::from(eggid)]; -// let elt_val = translate(elt, vec, gep_map, var_map, builder, module); -// vector = LLVMBuildInsertElement( -// builder, -// vector, -// elt_val, -// LLVMConstInt(LLVMIntTypeInContext(context, 32), idx as u64, 0), -// b"\0".as_ptr() as *const _, -// ); -// } -// vector -// } -// VecLang::VecAdd([l, r]) -// | VecLang::VecMinus([l, r]) -// | VecLang::VecMul([l, r]) -// | VecLang::VecDiv([l, r]) -// | VecLang::Add([l, r]) -// | VecLang::Minus([l, r]) -// | VecLang::Mul([l, r]) -// | VecLang::Div([l, r]) -// | VecLang::Or([l, r]) -// | VecLang::And([l, r]) -// | VecLang::Lt([l, r]) => { -// let left = translate( -// &vec[usize::from(*l)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// let right = translate( -// &vec[usize::from(*r)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// translate_binop(enode, left, right, builder, b"\0".as_ptr() as *const _) -// } -// VecLang::Concat([v1, v2]) => { -// let trans_v1 = translate( -// &vec[usize::from(*v1)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// let trans_v2 = translate( -// &vec[usize::from(*v2)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// // manually concatenate 2 vectors by using a LLVM shuffle operation. -// let v1_type = LLVMTypeOf(trans_v1); -// let v1_size = LLVMGetVectorSize(v1_type); -// let v2_type = LLVMTypeOf(trans_v2); -// let v2_size = LLVMGetVectorSize(v2_type); -// let size = v1_size + v2_size; -// let mut indices = Vec::new(); -// for i in 0..size { -// indices.push(LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0)); -// } -// let mask = indices.as_mut_ptr(); -// let mask_vector = LLVMConstVector(mask, size); -// LLVMBuildShuffleVector( -// builder, -// trans_v1, -// trans_v2, -// mask_vector, -// b"\0".as_ptr() as *const _, -// ) -// } -// VecLang::VecMAC([acc, v1, v2]) => { -// let trans_acc = translate( -// &vec[usize::from(*acc)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// let trans_v1 = translate( -// &vec[usize::from(*v1)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// let trans_v2 = translate( -// &vec[usize::from(*v2)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// let vec_type = LLVMTypeOf(trans_acc); - -// let param_types = [vec_type, vec_type, vec_type].as_mut_ptr(); -// let fn_type = LLVMFunctionType(vec_type, param_types, 3, 0 as i32); -// let func = LLVMAddFunction(module, b"llvm.fma.f32\0".as_ptr() as *const _, fn_type); -// let args = [trans_v1, trans_v2, trans_acc].as_mut_ptr(); -// LLVMBuildCall(builder, func, args, 3, b"\0".as_ptr() as *const _) -// } -// // TODO: VecNeg, VecSqrt, VecSgn all have not been tested, need test cases. -// // TODO: LLVM actually supports many more vector intrinsics, including -// // vector sine/cosine instructions for floats. -// VecLang::VecNeg([v]) => { -// let neg_vector = translate( -// &vec[usize::from(*v)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// LLVMBuildFNeg(builder, neg_vector, b"\0".as_ptr() as *const _) -// } -// VecLang::VecSqrt([v]) => { -// let sqrt_vec = translate( -// &vec[usize::from(*v)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// let vec_type = LLVMTypeOf(sqrt_vec); -// let param_types = [vec_type].as_mut_ptr(); -// let fn_type = LLVMFunctionType(vec_type, param_types, 1, 0 as i32); -// let func = LLVMAddFunction(module, b"llvm.sqrt.f32\0".as_ptr() as *const _, fn_type); -// let args = [sqrt_vec].as_mut_ptr(); -// LLVMBuildCall(builder, func, args, 1, b"\0".as_ptr() as *const _) -// } -// // compliant with c++ LibMath copysign function, which differs with sgn at x = 0. -// VecLang::VecSgn([v]) => { -// let sgn_vec = translate( -// &vec[usize::from(*v)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// let vec_type = LLVMTypeOf(sgn_vec); -// let vec_size = LLVMGetVectorSize(vec_type); -// let mut ones = Vec::new(); -// for _ in 0..vec_size { -// ones.push(LLVMConstReal(LLVMFloatTypeInContext(context), 1 as f64)); -// } -// let ones_ptr = ones.as_mut_ptr(); -// let ones_vector = LLVMConstVector(ones_ptr, vec_size); -// let param_types = [vec_type, vec_type].as_mut_ptr(); -// let fn_type = LLVMFunctionType(vec_type, param_types, 2, 0 as i32); -// let func = LLVMAddFunction(module, b"llvm.copysign.f32\0".as_ptr() as *const _, fn_type); -// let args = [ones_vector, sgn_vec].as_mut_ptr(); -// LLVMBuildCall(builder, func, args, 2, b"\0".as_ptr() as *const _) -// } -// VecLang::Sgn([n]) | VecLang::Sqrt([n]) | VecLang::Neg([n]) => { -// let number = translate( -// &vec[usize::from(*n)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// translate_unop(enode, number, builder, module, b"\0".as_ptr() as *const _) -// } -// VecLang::Ite(..) => panic!("Ite is not handled."), -// } -// } - -// /// Convert a Veclang `expr` to LLVM IR code in place, using an LLVM builder. -// unsafe fn to_llvm( -// module: LLVMModuleRef, -// expr: RecExpr, -// gep_map: &GEPMap, -// var_map: &VarMap, -// ops_to_replace: &ValueVec, -// builder: LLVMBuilderRef, -// ) -> () { -// let vec = expr.as_ref(); -// let last = vec -// .last() -// .expect("No match for last element of vector of Egg Terms."); - -// // create vectorized instructions. -// let vector = translate(last, vec, gep_map, var_map, builder, module); - -// // for each binary operation that has been vectorized AND requires replacement -// // we extract the correct index from the vector and -// // determine the store to that binary op, copy it and move it after the extraction -// for (i, op) in ops_to_replace.iter().enumerate() { -// let index = LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0); -// let extracted_value = -// LLVMBuildExtractElement(builder, vector, index, b"\0".as_ptr() as *const _); -// // figure out where the next store is located, after the binary operation to replace. -// let mut store_instr = *op; -// // assumes there is a store next: could segfault or loop forever if not. -// // WARNING: In particular, could infinitely loop under -02/-03 optimizations. -// while !isa_store(store_instr) { -// store_instr = LLVMGetNextInstruction(store_instr); -// } -// let cloned_store = LLVMInstructionClone(store_instr); -// LLVMSetOperand(cloned_store, 0, extracted_value); -// LLVMInsertIntoBuilder(builder, cloned_store); -// // erase stores -> this was affecting a load and then a store to the same -// // location in matrix multiply -// LLVMInstructionEraseFromParent(store_instr); -// } -// } - /// Main function to optimize: Takes in a basic block of instructions, /// optimizes it, and then translates it to LLVM IR code, in place. @@ -743,22 +203,14 @@ pub fn optimize( // llvm to egg let llvm_instrs = from_raw_parts(bb, size); let past_llvm_instrs = from_raw_parts(past_instrs, past_size); - // let mut llvm_arg_pairs = Vec::new(); let mut llvm_arg_pairs = HashMap::new(); for instr_pair in past_llvm_instrs { let original_value = instr_pair.original_value; let new_value = instr_pair.new_value; - assert!(isa_load(original_value) || isa_alloca(original_value)); - assert!(isa_load(new_value) || isa_alloca(new_value)); + // assert!(isa_load(original_value) || isa_alloca(original_value)); + // assert!(isa_load(new_value) || isa_alloca(new_value)); llvm_arg_pairs.insert(original_value, new_value); } - // for instr_pair in past_llvm_instrs { - // let new_instr_pair = LLVMPair { - // original_value: instr_pair.original_value, - // new_value: instr_pair.new_value, - // }; - // llvm_arg_pairs.push(new_instr_pair); - // } let mut node_to_arg = Vec::new(); let (expr, gep_map, store_map, symbol_map) = llvm_to_egg(llvm_instrs, &mut llvm_arg_pairs, &mut node_to_arg); @@ -789,22 +241,14 @@ pub fn optimize( builder, ); - // let mut final_llvm_arg_pairs = Vec::new(); - // for pair in llvm_arg_pairs { - // final_llvm_arg_pairs.push(pair); - // } let mut final_llvm_arg_pairs = Vec::new(); for (unchanged_val, new_val) in llvm_arg_pairs.iter() { let pair = LLVMPair { original_value: *unchanged_val, new_value: *new_val, }; - assert!(isa_load(*unchanged_val) || isa_alloca(*unchanged_val)); - assert!(isa_load(*new_val) || isa_alloca(*new_val)); - // compare structurally, not pointer addresses - // _llvm_recursive_print(*unchanged_val); - // _llvm_recursive_print(*new_val); - // assert!(*unchanged_val == *new_val); + // assert!(isa_load(*unchanged_val) || isa_alloca(*unchanged_val)); + // assert!(isa_load(*new_val) || isa_alloca(*new_val)); final_llvm_arg_pairs.push(pair); } @@ -844,17 +288,8 @@ enum LLVMOpType { Sqrt32, Sqrt64, FPExt, - // Phi, } -// unsafe fn is_pow2(n: u32) -> bool { -// let mut pow = 1; -// while pow < n { -// pow *= 2; -// } -// return pow == n; -// } - unsafe fn get_pow2(n: u32) -> u32 { let mut pow = 1; while pow < n { @@ -936,92 +371,6 @@ unsafe fn _llvm_recursive_print(inst: LLVMValueRef) -> () { return; } -// unsafe fn llvm_recursive_add( -// builder: LLVMBuilderRef, -// inst: LLVMValueRef, -// context: LLVMContextRef, -// llvm_arg_pairs: &mut LLVMPairMap, -// ) -> LLVMValueRef { -// if isa_argument(inst) { -// let mut indices = Vec::new(); -// for i in 0..1 { -// indices.push(LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0)); -// } -// let indices_vector = indices.as_mut_ptr(); -// return LLVMBuildGEP(builder, inst, indices_vector, 1, b"\0".as_ptr() as *const _); -// // return inst; -// } else if isa_constant(inst) { -// return inst; -// } else if isa_phi(inst) { -// return inst; -// } else if isa_alloca(inst) { -// // We have this in the base case to stop reconstruction of allocas, -// // because allocas are like loads, and should not get reconstructioned -// // search the llvm_arg_pairs for allocas that were already created -// let mut matched = false; -// let mut ret_value = inst; -// for llvm_pair in &*llvm_arg_pairs { -// let original_llvm = llvm_pair.original_value; -// let new_llvm = llvm_pair.new_value; -// if cmp_val_ref_address(&original_llvm, &inst) { -// matched = true; -// ret_value = new_llvm; -// break; -// } -// } -// if matched { -// return ret_value; -// } else { -// // Don't clone Inst; we should only clone if recursive call, -// // which is handled previously -// return inst; -// } -// } -// // TODO: CALLs should not be rebuilt? -// // else if isa_call(inst) { -// // let cloned_inst = LLVMInstructionClone(inst); -// // LLVMInsertIntoBuilder(builder, cloned_inst); -// // return cloned_inst; -// // } -// let cloned_inst = LLVMInstructionClone(inst); -// let num_ops = LLVMGetNumOperands(inst); -// for i in 0..num_ops { -// let operand = LLVMGetOperand(inst, i as u32); -// // search the llvm_arg_pairs -// let mut matched = false; -// let mut ret_value = operand; -// for llvm_pair in &mut *llvm_arg_pairs { -// let original_llvm = llvm_pair.original_value; -// let new_llvm = llvm_pair.new_value; -// if !matched && cmp_val_ref_address(&original_llvm, &operand) { -// matched = true; -// ret_value = new_llvm; -// } -// } -// if matched { -// LLVMSetOperand(cloned_inst, i as u32, ret_value); -// } else { -// let new_inst = llvm_recursive_add(builder, operand, context, llvm_arg_pairs); -// LLVMSetOperand(cloned_inst, i as u32, new_inst); - -// let pair = LLVMPair { -// new_value: new_inst, -// original_value: operand, -// }; -// llvm_arg_pairs.push(pair); -// } -// } -// LLVMInsertIntoBuilder(builder, cloned_inst); - -// let pair = LLVMPair { -// new_value: cloned_inst, -// original_value: inst, -// }; -// llvm_arg_pairs.push(pair); - -// return cloned_inst; -// } - unsafe fn llvm_recursive_add( builder: LLVMBuilderRef, inst: LLVMValueRef, @@ -1030,18 +379,11 @@ unsafe fn llvm_recursive_add( ) -> LLVMValueRef { let cloned_inst = LLVMInstructionClone(inst); if isa_argument(inst) { - let mut indices = Vec::new(); - for i in 0..1 { - indices.push(LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0)); - } - let indices_vector = indices.as_mut_ptr(); - return LLVMBuildGEP(builder, inst, indices_vector, 1, b"\0".as_ptr() as *const _); + return inst; } let mut matched = false; let mut ret_value = inst; for (original_val, new_val) in (&*llvm_arg_pairs).iter() { - // let original_llvm = llvm_pair.original_value; - // let new_llvm = llvm_pair.new_value; if cmp_val_ref_address(&**original_val, &*inst) { matched = true; ret_value = *new_val; @@ -1073,12 +415,8 @@ unsafe fn llvm_recursive_add( if matched { return ret_value; } else { - // let pair = LLVMPair { - // new_value: cloned_inst, - // original_value: inst, - // }; - assert!(isa_load(inst) || isa_alloca(inst)); - assert!(isa_load(cloned_inst) || isa_alloca(cloned_inst)); + // assert!(isa_load(inst) || isa_alloca(inst)); + // assert!(isa_load(cloned_inst) || isa_alloca(cloned_inst)); llvm_arg_pairs.insert(inst, cloned_inst); LLVMInsertIntoBuilder(builder, cloned_inst); return cloned_inst; @@ -1092,23 +430,18 @@ unsafe fn llvm_recursive_add( } LLVMInsertIntoBuilder(builder, cloned_inst); - // let pair = LLVMPair { - // new_value: cloned_inst, - // original_value: inst, - // }; - if isa_load(inst) { - let mut load_in_map = false; - for (original_inst, _) in (&*llvm_arg_pairs).iter() { - if cmp_val_ref_address(&**original_inst, &*inst) { - load_in_map = true; - } - } - if !load_in_map { - assert!(isa_load(inst) || isa_alloca(inst)); - assert!(isa_load(cloned_inst) || isa_alloca(cloned_inst)); - llvm_arg_pairs.insert(inst, cloned_inst); + let mut in_map = false; + for (original_inst, _) in (&*llvm_arg_pairs).iter() { + if cmp_val_ref_address(&**original_inst, &*inst) { + in_map = true; } } + if !in_map { + // assert!(isa_load(inst) || isa_alloca(inst)); + // assert!(isa_load(cloned_inst) || isa_alloca(cloned_inst)); + llvm_arg_pairs.insert(inst, cloned_inst); + } + // } return cloned_inst; } @@ -1141,11 +474,7 @@ unsafe fn match_llvm_op(expr: &LLVMValueRef) -> LLVMOpType { return LLVMOpType::Sqrt64; } else if isa_fpext(*expr) { return LLVMOpType::FPExt; - } - // else if isa_phi(*expr) { - // return LLVMOpType::Phi; - // } - else { + } else { LLVMDumpValue(*expr); println!(); panic!("ref_to_egg: Unmatched case for LLVMValueRef {:?}", *expr); @@ -1212,7 +541,6 @@ unsafe fn bop_to_egg( llvm_arg_pairs, node_to_arg, ); - // let mut concat = [&v1[..], &v2[..]].concat(); // https://users.rust-lang.org/t/how-to-concatenate-two-vectors/8324/3 let ids = [ Id::from((next_idx1 - 1) as usize), Id::from((next_idx2 - 1) as usize), @@ -1260,7 +588,7 @@ unsafe fn gep_to_egg( _llvm_arg_pairs: &LLVMPairMap, _node_to_arg: &mut Vec, ) -> (Vec, i32) { - // assert!(isa_argument(expr) || isa_gep(expr) || isa_load(expr)); + // // assert!(isa_argument(expr) || isa_gep(expr) || isa_load(expr)); // let mut enode_vec = Vec::new(); let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); @@ -1384,44 +712,6 @@ unsafe fn load_to_egg( llvm_arg_pairs, node_to_arg, ); - // let addr = LLVMGetOperand(expr, 0); - // if isa_argument(addr) { - // return load_arg_to_egg( - // addr, - // enode_vec, - // next_idx, - // gep_map, - // store_map, - // id_map, - // symbol_map, - // llvm_arg_pairs, - // node_to_arg, - // ); - // } else if isa_gep(addr) { - // return gep_to_egg( - // expr, // we pass the entire instruction and not just the address - // enode_vec, - // next_idx, - // gep_map, - // store_map, - // id_map, - // symbol_map, - // llvm_arg_pairs, - // node_to_arg, - // ); - // } else { - // return address_to_egg( - // addr, - // enode_vec, - // next_idx, - // gep_map, - // store_map, - // id_map, - // symbol_map, - // llvm_arg_pairs, - // node_to_arg, - // ); - // } } unsafe fn store_to_egg( @@ -1480,7 +770,7 @@ unsafe fn _load_arg_to_egg( _llvm_arg_pairs: &LLVMPairMap, _node_to_arg: &mut Vec, ) -> (Vec, i32) { - assert!(isa_argument(expr) || isa_gep(expr)); + // assert!(isa_argument(expr) || isa_gep(expr)); let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); @@ -1549,7 +839,7 @@ unsafe fn fpext_to_egg( llvm_arg_pairs: &LLVMPairMap, node_to_arg: &mut Vec, ) -> (Vec, i32) { - assert!(isa_fpext(expr)); + // assert!(isa_fpext(expr)); let operand = LLVMGetOperand(expr, 0); return ref_to_egg( operand, @@ -1575,7 +865,7 @@ unsafe fn sqrt32_to_egg( llvm_arg_pairs: &LLVMPairMap, node_to_arg: &mut Vec, ) -> (Vec, i32) { - assert!(isa_sqrt32(expr)); + // assert!(isa_sqrt32(expr)); let operand = LLVMGetOperand(expr, 0); let (mut new_enode_vec, next_idx1) = ref_to_egg( operand, @@ -1604,7 +894,7 @@ unsafe fn sqrt64_to_egg( _llvm_arg_pairs: &LLVMPairMap, _node_to_arg: &mut Vec, ) -> (Vec, i32) { - assert!(isa_sqrt64(expr)); + // assert!(isa_sqrt64(expr)); panic!("Currently, we do not handle calls to sqrt.f64 without fpext and fptrunc before and after!. This is the only 'context sensitive' instance in the dispatch matching. ") } @@ -1619,7 +909,7 @@ unsafe fn fptrunc_to_egg( llvm_arg_pairs: &LLVMPairMap, node_to_arg: &mut Vec, ) -> (Vec, i32) { - assert!(isa_fptrunc(expr)); + // assert!(isa_fptrunc(expr)); let operand = LLVMGetOperand(expr, 0); if isa_sqrt64(operand) { return sqrt64_to_egg( @@ -1645,7 +935,6 @@ unsafe fn fptrunc_to_egg( llvm_arg_pairs, node_to_arg, ); - // panic!("TODO: Currently, only square roots for f64 are supported after fptrunc. "); } unsafe fn bitcast_to_egg( @@ -1659,7 +948,7 @@ unsafe fn bitcast_to_egg( llvm_arg_pairs: &LLVMPairMap, node_to_arg: &mut Vec, ) -> (Vec, i32) { - assert!(isa_bitcast(expr)); + // assert!(isa_bitcast(expr)); let operand = LLVMGetOperand(expr, 0); let result = ref_to_egg( operand, @@ -1675,35 +964,6 @@ unsafe fn bitcast_to_egg( return result; } -// unsafe fn phi_to_egg( -// expr: LLVMValueRef, -// mut enode_vec: Vec, -// next_idx: i32, -// gep_map: &mut GEPMap, -// _store_map: &mut StoreMap, -// _id_map: &mut IdMap, -// _symbol_map: &mut SymbolMap, -// _llvm_arg_pairs: LLVMPairMap, -// _node_to_arg: &mut Vec, -// ) -> (Vec, i32) { -// assert!(isa_phi(expr)); - -// let phi_name = gen_phi_name(); -// let symbol1 = Symbol::from(phi_name.clone()); -// enode_vec.push(VecLang::Symbol(symbol1)); -// let symbol2 = Symbol::from(phi_name); -// enode_vec.push(VecLang::Symbol(symbol2)); - -// let get_node = VecLang::Get([ -// Id::from((next_idx) as usize), -// Id::from((next_idx + 1) as usize), -// ]); -// (*gep_map).insert((symbol1, symbol2), expr); -// enode_vec.push(get_node); - -// return (enode_vec, next_idx + 3); -// } - unsafe fn ref_to_egg( expr: LLVMValueRef, mut enode_vec: Vec, @@ -1885,17 +1145,6 @@ unsafe fn ref_to_egg( llvm_arg_pairs, node_to_arg, ), - // LLVMOpType::Phi => phi_to_egg( - // expr, - // enode_vec, - // next_idx, - // gep_map, - // store_map, - // id_map, - // symbol_map, - // llvm_arg_pairs, - // node_to_arg, - // ), }; return (vec, next_idx); } @@ -1953,49 +1202,41 @@ unsafe fn translate_egg( module: LLVMModuleRef, ) -> LLVMValueRef { let instr = match enode { - VecLang::Symbol(symbol) => { - match symbol_map.get(enode) { - Some(llvm_instr) => llvm_recursive_add(builder, *llvm_instr, context, llvm_arg_pairs), - None => { - let mut matched = false; - let mut ret_value = LLVMBuildAdd( - builder, - LLVMConstReal(LLVMFloatTypeInContext(context), 0 as f64), - LLVMConstReal(LLVMFloatTypeInContext(context), 0 as f64), - b"nop\0".as_ptr() as *const _, - ); - for node_arg_pair in node_to_arg_pair { - let llvm_node = node_arg_pair.arg; - let node_index = node_arg_pair.node_int; - let string_node_index = node_index.to_string(); - if string_node_index.parse::().unwrap() == *symbol { - for (original_val, new_val) in (&mut *llvm_arg_pairs).iter() { - // let original_llvm = llvm_pair.original_value; - // let new_llvm = llvm_pair.new_value; - if cmp_val_ref_address(&**original_val, &*llvm_node) { - matched = true; - ret_value = *new_val; - break; - } + VecLang::Symbol(symbol) => match symbol_map.get(enode) { + Some(llvm_instr) => llvm_recursive_add(builder, *llvm_instr, context, llvm_arg_pairs), + None => { + let mut matched = false; + let mut ret_value = LLVMBuildAdd( + builder, + LLVMConstReal(LLVMFloatTypeInContext(context), 0 as f64), + LLVMConstReal(LLVMFloatTypeInContext(context), 0 as f64), + b"nop\0".as_ptr() as *const _, + ); + for node_arg_pair in node_to_arg_pair { + let llvm_node = node_arg_pair.arg; + let node_index = node_arg_pair.node_int; + let string_node_index = node_index.to_string(); + if string_node_index.parse::().unwrap() == *symbol { + for (original_val, new_val) in (&mut *llvm_arg_pairs).iter() { + if cmp_val_ref_address(&**original_val, &*llvm_node) { + matched = true; + ret_value = *new_val; + break; } } - if matched { - break; - } } if matched { - ret_value - } else { - panic!("No Match in Node Arg Pair List.") + break; } } + if matched { + ret_value + } else { + panic!("No Match in Node Arg Pair List.") + } } - // *symbol_map - // .get(enode) - // .expect("Symbol Should Exist in Symbol Map.") - } + }, VecLang::Num(n) => LLVMConstReal(LLVMFloatTypeInContext(context), *n as f64), - // VecLang::Num(n) => build_constant_float(*n as f64, context), VecLang::Get(..) => { let (array_name, array_offsets) = translate_get(enode, vec); let gep_value = gep_map @@ -2015,27 +1256,17 @@ unsafe fn translate_egg( matched_expr } else { let addr = LLVMGetOperand(*gep_value, 0); - // let cloned_gep = LLVMInstructionClone(addr); let new_gep = llvm_recursive_add(builder, addr, context, llvm_arg_pairs); let new_load = LLVMBuildLoad(builder, new_gep, b"\0".as_ptr() as *const _); - // let llvm_pair = LLVMPair { - // original_value: *gep_value, - // new_value: new_load, - // }; - assert!(isa_load(*gep_value) || isa_alloca(*gep_value)); - assert!(isa_load(new_load) || isa_alloca(new_load)); llvm_arg_pairs.insert(*gep_value, new_load); new_load } } else if isa_gep(*gep_value) { - // let cloned_gep = LLVMInstructionClone(*gep_value); let new_gep = llvm_recursive_add(builder, *gep_value, context, llvm_arg_pairs); LLVMBuildLoad(builder, new_gep, b"\0".as_ptr() as *const _) } else if isa_bitcast(*gep_value) { // TODO: DO NOT REGERATE CALLS. THESE SHOULD BE CACHED!!. e.g. a CALLOC - // let cloned_bitcast = LLVMInstructionClone(*gep_value); let mut new_bitcast = llvm_recursive_add(builder, *gep_value, context, llvm_arg_pairs); - // if bitcast was to i32, handle bitcast from float* to i32* if !isa_floatptr(new_bitcast) { let addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(new_bitcast)); let new_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(context), addr_space); @@ -2048,7 +1279,6 @@ unsafe fn translate_egg( } LLVMBuildLoad(builder, new_bitcast, b"\0".as_ptr() as *const _) } else if isa_sitofp(*gep_value) { - // let cloned_sitofp = LLVMInstructionClone(*gep_value); let new_sitofp = llvm_recursive_add(builder, *gep_value, context, llvm_arg_pairs); new_sitofp } else if isa_argument(*gep_value) { @@ -2069,21 +1299,22 @@ unsafe fn translate_egg( matched_expr } else { let new_load_value = LLVMBuildLoad(builder, *gep_value, b"\0".as_ptr() as *const _); - // let llvm_pair = LLVMPair { - // original_value: *gep_value, - // new_value: new_load_value, - // }; - assert!(isa_load(*gep_value) || isa_alloca(*gep_value)); - assert!(isa_load(new_load_value) || isa_alloca(new_load_value)); + // assert!(isa_load(*gep_value) || isa_alloca(*gep_value)); + // assert!(isa_load(new_load_value) || isa_alloca(new_load_value)); llvm_arg_pairs.insert(*gep_value, new_load_value); new_load_value } }; - // let llvm_pair = LLVMPair { - // original_value: *gep_value, - // new_value: load_value, - // }; - // llvm_arg_pairs.insert(*gep_value, load_value); + let mut matched = false; + for (original_val, _) in (&*llvm_arg_pairs).iter() { + if cmp_val_ref_address(&**original_val, &**gep_value) { + matched = true; + break; + } + } + if !matched { + llvm_arg_pairs.insert(*gep_value, load_value); + } load_value } VecLang::LitVec(boxed_ids) | VecLang::Vec(boxed_ids) | VecLang::List(boxed_ids) => { @@ -2117,12 +1348,6 @@ unsafe fn translate_egg( LLVMFloatTypeInContext(context), b"\0".as_ptr() as *const _, ); - // elt_val = LLVMBuildSIToFP( - // builder, - // elt_val, - // LLVMFloatTypeInContext(context), - // b"\0".as_ptr() as *const _, - // ); } vector = LLVMBuildInsertElement( builder, @@ -2425,12 +1650,6 @@ unsafe fn translate_egg( LLVMFloatTypeInContext(context), b"\0".as_ptr() as *const _, ); - // number = LLVMBuildSIToFP( - // builder, - // number, - // LLVMFloatTypeInContext(context), - // b"\0".as_ptr() as *const _, - // ) } translate_unop( enode, @@ -2518,14 +1737,6 @@ unsafe fn egg_to_llvm( } else { *addr }; - // if isa_floattype(extracted_value) && isa_intptr(*mut_addr) { - // extracted_value = LLVMBuildFPToSI( - // builder, - // extracted_value, - // LLVMIntTypeInContext(context, 32), - // b"\0".as_ptr() as *const _, - // ); - // } if isa_argument(mut_addr) { if LLVMTypeOf(extracted_value) != LLVMGetElementType(LLVMTypeOf(mut_addr)) { extracted_value = gen_type_cast( @@ -2536,10 +1747,8 @@ unsafe fn egg_to_llvm( builder, ); } - assert!(LLVMTypeOf(extracted_value) == LLVMGetElementType(LLVMTypeOf(mut_addr))); LLVMBuildStore(builder, extracted_value, mut_addr); } else { - // let cloned_addr = LLVMInstructionClone(mut_addr); let new_addr = llvm_recursive_add(builder, mut_addr, context, llvm_arg_pairs); if LLVMTypeOf(extracted_value) != LLVMGetElementType(LLVMTypeOf(mut_addr)) { extracted_value = gen_type_cast( @@ -2550,8 +1759,6 @@ unsafe fn egg_to_llvm( builder, ); } - // LLVMReplaceAllUsesWith(*addr, new_addr); - assert!(LLVMTypeOf(extracted_value) == LLVMGetElementType(LLVMTypeOf(new_addr))); LLVMBuildStore(builder, extracted_value, new_addr); } } diff --git a/src/dios-egraphs/Diospyros/test-runner.sh b/src/dios-egraphs/Diospyros/test-runner.sh new file mode 100644 index 00000000..b2a87c53 --- /dev/null +++ b/src/dios-egraphs/Diospyros/test-runner.sh @@ -0,0 +1,3 @@ +cd .. +make run-opt test=$1 +cd - \ No newline at end of file From 163542128723119d299e1ffc5fab218af8c5ec9d Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 17 Mar 2022 21:15:45 -0400 Subject: [PATCH 066/143] more dead code removal --- src/dios-egraphs/Diospyros/diospyros.cpp | 121 ----------------------- src/dios-egraphs/Diospyros/src/lib.rs | 1 - 2 files changed, 122 deletions(-) diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index 63f0ad67..ce2ea935 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -452,127 +452,6 @@ extern "C" LLVMValueRef build_constant_float(double n, LLVMContextRef context) { return wrap(ConstantFP::get(float_type, n)); } -/** - * DFS backwards from current instruction to see if any past insdtruction - * matches match_instr. - * - * Terminates when no more previous expressions, or reaches a - * cosntant/argument/alloca instruction in LLVM. - * - * Searches for consecutive load/store instructions to same addresses, - * which LLVM generates at -01 optimization. - */ -bool dfs_llvm_instrs(User *current_instr, User *match_instr) { - if (current_instr == NULL) { - return false; - } - if (current_instr == match_instr) { - return true; - } - if (isa(current_instr) || isa(current_instr) || - isa(current_instr)) { - return false; - } - bool result = false; - // special case for loads: check if prev is store and continue - // in fact this test will VERY LIKELY lead to errors on some well-crafted - // test cases if LLVM decides to load and store values to the same locations - // multiple times, this could mess up the final result badly, if the some of - // the previous values need to be stored back before revectorizing after. - if (auto load_instr = dyn_cast(current_instr)) { - if (auto prev_node = load_instr->getPrevNode()) { - if (auto store_instr = dyn_cast(prev_node)) { - Value *load_pointer_operand = load_instr->getPointerOperand(); - Value *store_pointer_operand = store_instr->getPointerOperand(); - if (load_pointer_operand == store_pointer_operand) { - Value *value_operand = store_instr->getValueOperand(); - - auto user_cast = dyn_cast(value_operand); - result |= dfs_llvm_instrs(user_cast, match_instr); - user_cast = dyn_cast(store_pointer_operand); - result |= dfs_llvm_instrs(user_cast, match_instr); - return result; - } - } - } - return false; - } - // remainder of instructions, besides stores - for (auto i = 0; i < current_instr->getNumOperands(); i++) { - Value *operand = current_instr->getOperand(i); - auto user_cast = dyn_cast(operand); - if (user_cast == NULL) { - throw std::invalid_argument("Could not convert Value * to User *"); - } - result |= dfs_llvm_instrs(user_cast, match_instr); - } - return result; -} - -/** - * Main method to call dfs llvm_value - */ -extern "C" bool dfs_llvm_value_ref(LLVMValueRef current_instr, - LLVMValueRef match_instr) { - auto current_user = dyn_cast(unwrap(current_instr)); - auto match_user = dyn_cast(unwrap(match_instr)); - if (current_user == NULL || match_user == NULL) { - throw std::invalid_argument("Could not convert Value * to User *"); - } - return dfs_llvm_instrs(current_user, match_user); -} - -// Instruction *dfs_instructions(Instruction *current_instr, -// std::vector &translated_exprs, -// BasicBlock *B) { -// for (LLVMPair pair : translated_exprs) { -// Instruction *original_val = -// dyn_cast(unwrap(pair.original_value)); -// Instruction *new_val = dyn_cast(unwrap(pair.new_value)); -// if (current_instr == original_val) { -// return new_val; -// } -// } - -// Instruction *cloned_instr = current_instr->clone(); - -// int num_operands = current_instr->getNumOperands(); -// if (num_operands == 0) { -// BasicBlock::InstListType &intermediate_instrs = B->getInstList(); -// intermediate_instrs.push_back(cloned_instr); - -// LLVMPair new_pair; -// new_pair.original_value = wrap(current_instr); -// new_pair.new_value = wrap(cloned_instr); -// translated_exprs.push_back(new_pair); - -// return cloned_instr; -// } - -// for (int i = 0; i < num_operands; i++) { -// Instruction *arg = -// dyn_cast(current_instr->getOperand(i)); if (arg != NULL) -// { -// Instruction *cloned_arg = -// dfs_instructions(arg, translated_exprs, B); -// cloned_instr->setOperand(i, cloned_arg); - -// LLVMPair new_pair; -// new_pair.original_value = wrap(arg); -// new_pair.new_value = wrap(cloned_arg); -// translated_exprs.push_back(new_pair); -// } -// } -// LLVMPair new_pair; -// new_pair.original_value = wrap(current_instr); -// new_pair.new_value = wrap(cloned_instr); -// translated_exprs.push_back(new_pair); - -// BasicBlock::InstListType &intermediate_instrs = B->getInstList(); -// intermediate_instrs.push_back(cloned_instr); -// return cloned_instr; -// } - Instruction *dfs_instructions(Instruction *current_instr, std::vector &translated_exprs, BasicBlock *B) { diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 02c84153..73fffc0d 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -40,7 +40,6 @@ extern "C" { fn isa_sqrt32(val: LLVMValueRef) -> bool; fn isa_sqrt64(val: LLVMValueRef) -> bool; fn get_constant_float(val: LLVMValueRef) -> f32; - fn _dfs_llvm_value_ref(val: LLVMValueRef, match_val: LLVMValueRef) -> bool; fn build_constant_float(n: f64, context: LLVMContextRef) -> LLVMValueRef; } From 08daedb73c4a3d2d41ede304a6416ce9937a1ffa Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 23 Mar 2022 11:38:31 -0400 Subject: [PATCH 067/143] set up testing infrastructure --- .../Diospyros/LoadStoreMovement.cpp | 4 +- src/dios-egraphs/Diospyros/c-tests/README.md | 1 + .../Diospyros/c-tests/identity_matrix.c | 48 ++++++++++ src/dios-egraphs/Diospyros/c-tests/triangle.c | 96 +++++++++++++++++++ .../Diospyros/llvm-output-tests/README.md | 2 + .../Diospyros/llvm-tests/README.md | 2 + .../Diospyros/llvm-tests/load-prior-chunk.ll | 67 +++++++++++++ .../Diospyros/randomized-tests/README.md | 1 + src/dios-egraphs/Diospyros/src/lib.rs | 31 +++--- 9 files changed, 235 insertions(+), 17 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/c-tests/README.md create mode 100644 src/dios-egraphs/Diospyros/c-tests/triangle.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/README.md create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/README.md create mode 100644 src/dios-egraphs/Diospyros/llvm-tests/load-prior-chunk.ll create mode 100644 src/dios-egraphs/Diospyros/randomized-tests/README.md diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index 804004a4..6735e435 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -260,8 +260,8 @@ struct LoadStoreMovementPass : public FunctionPass { (F.getName().size() > 7 && F.getName().substr(0, 7) == "no_opt_")) { return false; } - // rewrite_stores(F); - // rewrite_loads(F); + rewrite_stores(F); + rewrite_loads(F); return true; } diff --git a/src/dios-egraphs/Diospyros/c-tests/README.md b/src/dios-egraphs/Diospyros/c-tests/README.md new file mode 100644 index 00000000..c4328512 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/README.md @@ -0,0 +1 @@ +C tests contains tests of simple c programs that are run through the diospyros pass, and the outputs, which are printed out, are compared between optimization with and without diospyros. The "correct" result is taken to be optimization without diospyros. \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/identity_matrix.c b/src/dios-egraphs/Diospyros/c-tests/identity_matrix.c index 35f63b3e..9e5f062e 100644 --- a/src/dios-egraphs/Diospyros/c-tests/identity_matrix.c +++ b/src/dios-egraphs/Diospyros/c-tests/identity_matrix.c @@ -1,3 +1,12 @@ +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1 + void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { memcpy(R, A, sizeof(float) * SIZE * SIZE); @@ -8,3 +17,42 @@ void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { I[i * SIZE + j] = (i == j); } } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } +} + +int main(void) { + float A[SIZE * SIZE] = {0, 1, 2, 3}; + float Q[SIZE * SIZE] = {0, 1, 2, 3}; + float R[SIZE * SIZE] = {0, 1, 2, 3}; + float AExpected[SIZE * SIZE] = {0, 1, 2, 3}; + float QExpected[SIZE * SIZE] = {0, 1, 2, 3}; + float RExpected[SIZE * SIZE] = {0, 1, 2, 3}; + naive_fixed_qr_decomp(A, Q, R); + no_opt_naive_fixed_qr_decomp(AExpected, QExpected, RExpected); + for (int i = 0; i < SIZE * SIZE; i++) { + printf("Expected Q: %f\n", QExpected[i]); + printf("Actual Q: %f\n", Q[i]); + assert(fabsf(QExpected[i] - Q[i]) < DELTA); + } + for (int i = 0; i < SIZE * SIZE; i++) { + printf("Expected R: %f\n", RExpected[i]); + printf("Actual R: %f\n", R[i]); + assert(fabsf(RExpected[i] - R[i]) < DELTA); + } + for (int i = 0; i < SIZE * SIZE; i++) { + printf("Expected A: %f\n", AExpected[i]); + printf("Actual A: %f\n", A[i]); + assert(fabsf(AExpected[i] - A[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/triangle.c b/src/dios-egraphs/Diospyros/c-tests/triangle.c new file mode 100644 index 00000000..344beb52 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/triangle.c @@ -0,0 +1,96 @@ +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1 + +// Triangle Access Pattern Test + +void lower_triangle(float A[SIZE * SIZE], float B[SIZE * SIZE], + float C[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < i; j++) { + C[i + SIZE * j] = A[i + SIZE * j] + B[i + SIZE * j]; + } + } +} + +void no_opt_lower_triangle(float A[SIZE * SIZE], float B[SIZE * SIZE], + float C[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < i; j++) { + C[i + SIZE * j] = A[i + SIZE * j] + B[i + SIZE * j]; + } + } +} + +void upper_triangle(float A[SIZE * SIZE], float B[SIZE * SIZE], + float C[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i; j < SIZE; j++) { + C[i + SIZE * j] = A[i + SIZE * j] + B[i + SIZE * j]; + } + } +} + +void no_opt_upper_triangle(float A[SIZE * SIZE], float B[SIZE * SIZE], + float C[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i; j < SIZE; j++) { + C[i + SIZE * j] = A[i + SIZE * j] + B[i + SIZE * j]; + } + } +} + +int main(void) { + float A1[SIZE * SIZE] = {0, 1, 2, 3}; + float B1[SIZE * SIZE] = {0, 1, 2, 3}; + float C1[SIZE * SIZE] = {0, 1, 2, 3}; + + float A1Expected[SIZE * SIZE] = {0, 1, 2, 3}; + float B1Expected[SIZE * SIZE] = {0, 1, 2, 3}; + float C1Expected[SIZE * SIZE] = {0, 1, 2, 3}; + + lower_triangle(A1, B1, C1); + no_opt_lower_triangle(A1Expected, B1Expected, C1Expected); + + for (int i = 0; i < SIZE * SIZE; i++) { + printf("A: %f\n", A1[i]); + printf("A Expected: %f\n", A1Expected[i]); + printf("B: %f\n", B1[i]); + printf("B Expected: %f\n", B1Expected[i]); + printf("C: %f\n", C1[i]); + printf("C Expected: %f\n", C1Expected[i]); + + assert(fabsf(A1[i] - A1Expected[i]) < DELTA); + assert(fabsf(B1[i] - B1Expected[i]) < DELTA); + assert(fabsf(C1[i] - C1Expected[i]) < DELTA); + } + + float A2[SIZE * SIZE] = {0, 1, 2, 3}; + float B2[SIZE * SIZE] = {0, 1, 2, 3}; + float C2[SIZE * SIZE] = {0, 1, 2, 3}; + + float A2Expected[SIZE * SIZE] = {0, 1, 2, 3}; + float B2Expected[SIZE * SIZE] = {0, 1, 2, 3}; + float C2Expected[SIZE * SIZE] = {0, 1, 2, 3}; + + upper_triangle(A2, B2, C2); + no_opt_upper_triangle(A2Expected, B2Expected, C2Expected); + + for (int i = 0; i < SIZE * SIZE; i++) { + printf("A: %f\n", A2[i]); + printf("A Expected: %f\n", A2Expected[i]); + printf("B: %f\n", B2[i]); + printf("B Expected: %f\n", B2Expected[i]); + printf("C: %f\n", C2[i]); + printf("C Expected: %f\n", C2Expected[i]); + + assert(fabsf(A2[i] - A2Expected[i]) < DELTA); + assert(fabsf(B2[i] - B2Expected[i]) < DELTA); + assert(fabsf(C2[i] - C2Expected[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/README.md b/src/dios-egraphs/Diospyros/llvm-output-tests/README.md new file mode 100644 index 00000000..83ff0cec --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/README.md @@ -0,0 +1,2 @@ +LLVM Output Tests are tests that check the LLVM output from dce.ll. +Essentially, the final LLVM IR output is checked for regressions in this test suite. \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/README.md b/src/dios-egraphs/Diospyros/llvm-tests/README.md new file mode 100644 index 00000000..d0499362 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/README.md @@ -0,0 +1,2 @@ +LLVM Tests contains hand crafted LLVM tests, that test transformation of llvm ir, +using the diospyros pass. \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/load-prior-chunk.ll b/src/dios-egraphs/Diospyros/llvm-tests/load-prior-chunk.ll new file mode 100644 index 00000000..d8888bf8 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-tests/load-prior-chunk.ll @@ -0,0 +1,67 @@ +; ModuleID = 'build/opt.ll' +source_filename = "fail-tests/local-array-4.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@.str = private unnamed_addr constant [14 x i8] c"A Output: %f\0A\00", align 1 +@.memset_pattern = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 + +; Function Attrs: noinline nounwind ssp uwtable +define void @test(float* %0) #0 { +.preheader: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = store i32 0, i32* %a + %1 = alloca i64, align 8 + %tmpcast = bitcast i64* %1 to [2 x float]* + %2 = bitcast i64* %1 to i8* + %3 = bitcast i64* %1 to float* + store i64 0, i64* %1, align 8 + call void @memset_pattern16(i8* nonnull %2, i8* bitcast ([4 x float]* @.memset_pattern to i8*), i64 8) #4 + %4 = load float, float* %3, align 8 + %5 = fadd float %4, 0.000000e+00 + %6 = getelementptr inbounds [2 x float], [2 x float]* %tmpcast, i64 0, i64 1 + %7 = load float, float* %6, align 4 + %8 = fadd float %5, %7 + store float %8, float* %0, align 4 + ret void +} + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #1 + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #0 { + %1 = alloca i64, align 8 + %tmpcast = bitcast i64* %1 to [2 x float]* + %2 = bitcast i64* %1 to float* + store float 0.000000e+00, float* %2, align 8 + %3 = getelementptr inbounds [2 x float], [2 x float]* %tmpcast, i64 0, i64 1 + store float 1.000000e+00, float* %3, align 4 + call void @test(float* nonnull %2) + %4 = load float, float* %2, align 8 + %5 = fpext float %4 to double + %6 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %5) #4 + %7 = load float, float* %3, align 4 + %8 = fpext float %7 to double + %9 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %8) #4 + ret i32 0 +} + +declare i32 @printf(i8*, ...) #2 + +; Function Attrs: argmemonly nofree +declare void @memset_pattern16(i8* nocapture, i8* nocapture readonly, i64) #3 + +attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn writeonly } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { argmemonly nofree } +attributes #4 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/randomized-tests/README.md b/src/dios-egraphs/Diospyros/randomized-tests/README.md new file mode 100644 index 00000000..740c814b --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/README.md @@ -0,0 +1 @@ +Randomized Tests contains tests of large programs with random float array inputs. The correct result is taken to be the optimization without diospyros, and the diospyros pass is compared against the correct result. \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 73fffc0d..a98fcb71 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -435,12 +435,13 @@ unsafe fn llvm_recursive_add( in_map = true; } } - if !in_map { - // assert!(isa_load(inst) || isa_alloca(inst)); - // assert!(isa_load(cloned_inst) || isa_alloca(cloned_inst)); - llvm_arg_pairs.insert(inst, cloned_inst); + if isa_load(inst) { + if !in_map { + // assert!(isa_load(inst) || isa_alloca(inst)); + // assert!(isa_load(cloned_inst) || isa_alloca(cloned_inst)); + llvm_arg_pairs.insert(inst, cloned_inst); + } } - // } return cloned_inst; } @@ -1304,16 +1305,16 @@ unsafe fn translate_egg( new_load_value } }; - let mut matched = false; - for (original_val, _) in (&*llvm_arg_pairs).iter() { - if cmp_val_ref_address(&**original_val, &**gep_value) { - matched = true; - break; - } - } - if !matched { - llvm_arg_pairs.insert(*gep_value, load_value); - } + // let mut matched = false; + // for (original_val, _) in (&*llvm_arg_pairs).iter() { + // if cmp_val_ref_address(&**original_val, &**gep_value) { + // matched = true; + // break; + // } + // } + // if !matched { + // llvm_arg_pairs.insert(*gep_value, load_value); + // } load_value } VecLang::LitVec(boxed_ids) | VecLang::Vec(boxed_ids) | VecLang::List(boxed_ids) => { From 0c2122853f0ffa829fa7be374d745224f5df34f0 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 23 Mar 2022 15:03:43 -0400 Subject: [PATCH 068/143] no-opt tests turnt added, llvm output tests for c-tests added --- src/dios-egraphs/Diospyros/Makefile | 17 + src/dios-egraphs/Diospyros/c-tests/turnt.toml | 2 +- .../Diospyros/llvm-output-tests/2d-2d-conv.c | 58 ++++ .../Diospyros/llvm-output-tests/2d-conv.c | 58 ++++ .../2d-matrix-multiply-new.c | 36 +++ .../llvm-output-tests/2d-matrix-multiply.c | 31 ++ .../Diospyros/llvm-output-tests/2d.c | 28 ++ .../3-by-3-matrix-multiply.c | 43 +++ .../5-by-5-matrix-multiply.c | 59 ++++ .../Diospyros/llvm-output-tests/add.c | 27 ++ .../Diospyros/llvm-output-tests/add_mult.c | 27 ++ .../Diospyros/llvm-output-tests/break-w.c | 34 ++ .../Diospyros/llvm-output-tests/break.c | 32 ++ .../Diospyros/llvm-output-tests/calloc.c | 30 ++ .../Diospyros/llvm-output-tests/continue-w.c | 36 +++ .../Diospyros/llvm-output-tests/continue.c | 31 ++ .../Diospyros/llvm-output-tests/cube.c | 34 ++ .../Diospyros/llvm-output-tests/five_binops.c | 31 ++ .../llvm-output-tests/identity_matrix.c | 58 ++++ .../Diospyros/llvm-output-tests/if-else.c | 32 ++ .../llvm-output-tests/inline-float.c | 78 +++++ .../Diospyros/llvm-output-tests/inline-void.c | 54 ++++ .../Diospyros/llvm-output-tests/load_reuse.c | 81 +++++ .../Diospyros/llvm-output-tests/loop-inline.c | 60 ++++ .../Diospyros/llvm-output-tests/mac.c | 29 ++ .../Diospyros/llvm-output-tests/malloc.c | 30 ++ .../Diospyros/llvm-output-tests/mat_mul.c | 27 ++ .../llvm-output-tests/matrix-multiply.c | 36 +++ .../Diospyros/llvm-output-tests/mixed.c | 27 ++ .../Diospyros/llvm-output-tests/mult.c | 27 ++ .../llvm-output-tests/multi-mat-mul.c | 47 +++ .../llvm-output-tests/multiple_adds.c | 29 ++ .../llvm-output-tests/naive-norm-inline.c | 50 +++ .../Diospyros/llvm-output-tests/naive-norm.c | 43 +++ .../llvm-output-tests/nested-inline.c | 105 ++++++ .../llvm-output-tests/out_of_order.c | 27 ++ .../llvm-output-tests/point-product.c | 47 +++ .../Diospyros/llvm-output-tests/q-prod.c | 72 +++++ .../qr-decomp-modified-no-local-array.c | 184 +++++++++++ .../qr-decomp-no-local-array.c | 304 ++++++++++++++++++ .../llvm-output-tests/qr-decomp-test-1.c | 99 ++++++ .../llvm-output-tests/qr-decomp-test-2.c | 92 ++++++ .../llvm-output-tests/qr-decomp-test-3.c | 120 +++++++ .../qr-decomp-test-4-1-linear-array.c | 201 ++++++++++++ .../llvm-output-tests/qr-decomp-test-4-1.c | 210 ++++++++++++ .../llvm-output-tests/qr-decomp-test-4-2-2.c | 217 +++++++++++++ .../qr-decomp-test-4-2-linear-array.c | 201 ++++++++++++ .../llvm-output-tests/qr-decomp-test-4-2.c | 206 ++++++++++++ .../qr-decomp-test-4-3-linear-array.c | 201 ++++++++++++ .../llvm-output-tests/qr-decomp-test-4-3.c | 209 ++++++++++++ .../qr-decomp-test-4-4-linear-array.c | 137 ++++++++ .../qr-decomp-test-4-5-linear-array.c | 201 ++++++++++++ .../Diospyros/llvm-output-tests/return.c | 37 +++ .../Diospyros/llvm-output-tests/scalar.c | 27 ++ .../Diospyros/llvm-output-tests/sgn-inline.c | 42 +++ .../sgn-naive-norm-inline-1.c | 61 ++++ .../sgn-naive-norm-inline-2.c | 63 ++++ .../Diospyros/llvm-output-tests/sqrt.c | 33 ++ .../Diospyros/llvm-output-tests/stencil-2d.c | 78 +++++ .../Diospyros/llvm-output-tests/ternary.c | 35 ++ .../Diospyros/llvm-output-tests/transpose.c | 57 ++++ .../Diospyros/llvm-output-tests/triangle.c | 96 ++++++ .../Diospyros/llvm-output-tests/turnt.toml | 1 + .../Diospyros/llvm-output-tests/var.c | 29 ++ .../Diospyros/llvm-output-tests/width5.c | 30 ++ .../Diospyros/llvm-output-tests/width9.c | 42 +++ .../Diospyros/no-egg-c-tests/2d-2d-conv.c | 58 ++++ .../Diospyros/no-egg-c-tests/2d-conv.c | 58 ++++ .../no-egg-c-tests/2d-matrix-multiply-new.c | 36 +++ .../no-egg-c-tests/2d-matrix-multiply.c | 31 ++ .../Diospyros/no-egg-c-tests/2d.c | 28 ++ .../no-egg-c-tests/3-by-3-matrix-multiply.c | 43 +++ .../no-egg-c-tests/5-by-5-matrix-multiply.c | 59 ++++ .../Diospyros/no-egg-c-tests/README.md | 1 + .../Diospyros/no-egg-c-tests/add.c | 27 ++ .../Diospyros/no-egg-c-tests/add_mult.c | 27 ++ .../Diospyros/no-egg-c-tests/break-w.c | 34 ++ .../Diospyros/no-egg-c-tests/break.c | 32 ++ .../Diospyros/no-egg-c-tests/calloc.c | 30 ++ .../Diospyros/no-egg-c-tests/continue-w.c | 36 +++ .../Diospyros/no-egg-c-tests/continue.c | 31 ++ .../Diospyros/no-egg-c-tests/cube.c | 34 ++ .../Diospyros/no-egg-c-tests/five_binops.c | 31 ++ .../no-egg-c-tests/identity_matrix.c | 58 ++++ .../Diospyros/no-egg-c-tests/if-else.c | 32 ++ .../Diospyros/no-egg-c-tests/inline-float.c | 78 +++++ .../Diospyros/no-egg-c-tests/inline-void.c | 54 ++++ .../Diospyros/no-egg-c-tests/load_reuse.c | 81 +++++ .../Diospyros/no-egg-c-tests/loop-inline.c | 60 ++++ .../Diospyros/no-egg-c-tests/mac.c | 29 ++ .../Diospyros/no-egg-c-tests/malloc.c | 30 ++ .../Diospyros/no-egg-c-tests/mat_mul.c | 27 ++ .../no-egg-c-tests/matrix-multiply.c | 36 +++ .../Diospyros/no-egg-c-tests/mixed.c | 27 ++ .../Diospyros/no-egg-c-tests/mult.c | 27 ++ .../Diospyros/no-egg-c-tests/multi-mat-mul.c | 47 +++ .../Diospyros/no-egg-c-tests/multiple_adds.c | 29 ++ .../no-egg-c-tests/naive-norm-inline.c | 50 +++ .../Diospyros/no-egg-c-tests/naive-norm.c | 43 +++ .../Diospyros/no-egg-c-tests/nested-inline.c | 105 ++++++ .../Diospyros/no-egg-c-tests/out_of_order.c | 27 ++ .../Diospyros/no-egg-c-tests/point-product.c | 47 +++ .../Diospyros/no-egg-c-tests/q-prod.c | 72 +++++ .../qr-decomp-modified-no-local-array.c | 184 +++++++++++ .../no-egg-c-tests/qr-decomp-no-local-array.c | 304 ++++++++++++++++++ .../no-egg-c-tests/qr-decomp-test-1.c | 99 ++++++ .../no-egg-c-tests/qr-decomp-test-2.c | 92 ++++++ .../no-egg-c-tests/qr-decomp-test-3.c | 120 +++++++ .../qr-decomp-test-4-1-linear-array.c | 201 ++++++++++++ .../no-egg-c-tests/qr-decomp-test-4-1.c | 210 ++++++++++++ .../no-egg-c-tests/qr-decomp-test-4-2-2.c | 217 +++++++++++++ .../qr-decomp-test-4-2-linear-array.c | 201 ++++++++++++ .../no-egg-c-tests/qr-decomp-test-4-2.c | 206 ++++++++++++ .../qr-decomp-test-4-3-linear-array.c | 201 ++++++++++++ .../no-egg-c-tests/qr-decomp-test-4-3.c | 209 ++++++++++++ .../qr-decomp-test-4-4-linear-array.c | 137 ++++++++ .../qr-decomp-test-4-5-linear-array.c | 201 ++++++++++++ .../Diospyros/no-egg-c-tests/return.c | 37 +++ .../Diospyros/no-egg-c-tests/scalar.c | 27 ++ .../Diospyros/no-egg-c-tests/sgn-inline.c | 42 +++ .../no-egg-c-tests/sgn-naive-norm-inline-1.c | 61 ++++ .../no-egg-c-tests/sgn-naive-norm-inline-2.c | 63 ++++ .../Diospyros/no-egg-c-tests/sqrt.c | 33 ++ .../Diospyros/no-egg-c-tests/stencil-2d.c | 78 +++++ .../Diospyros/no-egg-c-tests/ternary.c | 35 ++ .../Diospyros/no-egg-c-tests/transpose.c | 57 ++++ .../Diospyros/no-egg-c-tests/triangle.c | 96 ++++++ .../Diospyros/no-egg-c-tests/turnt.toml | 1 + .../Diospyros/no-egg-c-tests/var.c | 29 ++ .../Diospyros/no-egg-c-tests/width5.c | 30 ++ .../Diospyros/no-egg-c-tests/width9.c | 42 +++ src/dios-egraphs/Diospyros/test-runner.sh | 15 +- 132 files changed, 9567 insertions(+), 2 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/2d-2d-conv.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/2d-conv.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/2d-matrix-multiply-new.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/2d-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/2d.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/3-by-3-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/5-by-5-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/add.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/add_mult.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/break-w.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/break.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/calloc.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/continue-w.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/continue.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/cube.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/five_binops.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/identity_matrix.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/if-else.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/inline-float.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/inline-void.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/load_reuse.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/loop-inline.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/mac.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/malloc.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/mat_mul.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/mixed.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/mult.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/multi-mat-mul.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/multiple_adds.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/naive-norm-inline.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/naive-norm.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/nested-inline.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/out_of_order.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/point-product.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/q-prod.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-modified-no-local-array.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-no-local-array.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-1.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-2.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-3.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-1-linear-array.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-1.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-2-2.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-2-linear-array.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-2.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-3-linear-array.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-3.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-4-linear-array.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-5-linear-array.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/return.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/scalar.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/sgn-inline.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/sgn-naive-norm-inline-1.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/sgn-naive-norm-inline-2.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/sqrt.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/stencil-2d.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/ternary.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/transpose.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/triangle.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/turnt.toml create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/var.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/width5.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/width9.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/2d-2d-conv.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/2d-conv.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/2d-matrix-multiply-new.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/2d-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/2d.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/3-by-3-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/5-by-5-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/README.md create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/add.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/add_mult.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/break-w.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/break.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/calloc.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/continue-w.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/continue.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/cube.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/five_binops.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/identity_matrix.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/if-else.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/inline-float.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/inline-void.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/load_reuse.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/loop-inline.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/mac.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/malloc.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/mat_mul.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/mixed.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/mult.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/multi-mat-mul.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/multiple_adds.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/naive-norm-inline.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/naive-norm.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/nested-inline.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/out_of_order.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/point-product.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/q-prod.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-modified-no-local-array.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-no-local-array.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-1.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-2.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-3.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-1-linear-array.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-1.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-2-2.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-2-linear-array.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-2.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-3-linear-array.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-3.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-4-linear-array.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-5-linear-array.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/return.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/scalar.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/sgn-inline.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/sgn-naive-norm-inline-1.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/sgn-naive-norm-inline-2.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/sqrt.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/stencil-2d.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/ternary.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/transpose.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/triangle.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/turnt.toml create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/var.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/width5.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/width9.c diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index e780eca4..448faecb 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -77,6 +77,23 @@ no-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp $(CLANG) build/dce.ll -o build/final build/final +run-opt-stdout: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp + $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) + opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll + opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll + $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=false build/aa.ll -o build/diospyros.ll + opt -S --adce --dse build/diospyros.ll -o build/dce.ll + cat build/dce.ll + +no-opt-stdout: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp + $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) + opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll + opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll + $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt=false -mllvm -print=false build/aa.ll -o build/diospyros.ll + rm build/dce.ll + opt -S --adce --dse build/diospyros.ll -o build/dce.ll + cat build/dce.ll + run-diospyros: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt=false -mllvm -print=false $(test) -o build/diospyros.ll diff --git a/src/dios-egraphs/Diospyros/c-tests/turnt.toml b/src/dios-egraphs/Diospyros/c-tests/turnt.toml index 9a0ac4b9..4c3e1bc9 100644 --- a/src/dios-egraphs/Diospyros/c-tests/turnt.toml +++ b/src/dios-egraphs/Diospyros/c-tests/turnt.toml @@ -1 +1 @@ -command = "bash ../test-runner.sh c-tests/{filename}" +command = "bash ../test-runner.sh run-opt c-tests/{filename}" diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/2d-2d-conv.c b/src/dios-egraphs/Diospyros/llvm-output-tests/2d-2d-conv.c new file mode 100644 index 00000000..4653f9c1 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/2d-2d-conv.c @@ -0,0 +1,58 @@ +#include +#include + +#define I_ROWS 2 +#define I_COLS 2 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS][I_COLS], float f_in[F_ROWS][F_COLS], + float mat_out[O_ROWS][O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + mat_out[outRow][outCol] += v; + // mat_out[outRow][outCol] += + // mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + } + } + } + } + } +} + +int main(void) { + float mat_in[I_ROWS][I_COLS] = {{1, 2}, {3, 4}}; + float f_in[F_ROWS][F_COLS] = {{1, 1}, {1, 1}}; + float mat_out[O_ROWS][O_COLS] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; + convolution(mat_in, f_in, mat_out); + float expected[O_ROWS][O_COLS] = {{1, 3, 2}, {4, 10, 6}, {3, 7, 4}}; + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + printf("output: %f\n", mat_out[i][j]); + assert(mat_out[i][j] == expected[i][j]); + } + } + // output: 1.000000 + // output: 3.000000 + // output: 2.000000 + // output: 4.000000 + // output: 10.000000 + // output: 6.000000 + // output: 3.000000 + // output: 7.000000 + // output: 4.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/2d-conv.c b/src/dios-egraphs/Diospyros/llvm-output-tests/2d-conv.c new file mode 100644 index 00000000..46d4d15c --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/2d-conv.c @@ -0,0 +1,58 @@ +#include +#include + +#define I_ROWS 2 +#define I_COLS 2 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + + // mat_out[outRow * O_COLS + outCol] += + // mat_in[iRow * I_COLS + iCol] * + // f_in[fRowTrans * F_COLS + fColTrans]; + } + } + } + } + } +} + +int main(void) { + float mat_in[I_ROWS * I_COLS] = {1, 2, 3, 4}; + float f_in[F_ROWS * F_COLS] = {1, 1, 1, 1}; + float mat_out[O_ROWS * O_COLS] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; + float expected[O_ROWS * O_COLS] = {1, 3, 2, 4, 10, 6, 3, 7, 4}; + convolution(mat_in, f_in, mat_out); + for (int i = 0; i < O_ROWS * O_COLS; i++) { + printf("output: %f\n", mat_out[i]); + assert(mat_out[i] == expected[i]); + } + // output: 1.000000 + // output: 3.000000 + // output: 2.000000 + // output: 4.000000 + // output: 10.000000 + // output: 6.000000 + // output: 3.000000 + // output: 7.000000 + // output: 4.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/2d-matrix-multiply-new.c b/src/dios-egraphs/Diospyros/llvm-output-tests/2d-matrix-multiply-new.c new file mode 100644 index 00000000..bff538bc --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/2d-matrix-multiply-new.c @@ -0,0 +1,36 @@ +#include +#include + +#define A_ROWS 2 +#define A_COLS 2 +#define B_COLS 2 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + float a_in[A_ROWS][A_COLS] = {{1, 2}, {3, 4}}; + float b_in[A_COLS][B_COLS] = {{1, 2}, {3, 4}}; + float c_out[A_ROWS][B_COLS] = {{0, 0}, {0, 0}}; + matrix_multiply(a_in, b_in, c_out); + printf("first: %f\n", c_out[0][0]); + printf("second: %f\n", c_out[0][1]); + printf("third: %f\n", c_out[1][0]); + printf("fourth: %f\n", c_out[1][1]); + assert(c_out[0][0] == 7); + assert(c_out[0][1] == 10); + assert(c_out[1][0] == 15); + assert(c_out[1][1] == 22); + // expected (7, 10, 15, 22) + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/2d-matrix-multiply.c b/src/dios-egraphs/Diospyros/llvm-output-tests/2d-matrix-multiply.c new file mode 100644 index 00000000..371b7967 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/2d-matrix-multiply.c @@ -0,0 +1,31 @@ +#include + +#define A_ROWS 2 +#define A_COLS 2 +#define B_COLS 2 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + float a_in[A_ROWS][A_COLS] = {{1, 2}, {3, 4}}; + float b_in[A_COLS][B_COLS] = {{1, 2}, {3, 4}}; + float c_out[A_ROWS][B_COLS] = {{0, 0}, {0, 0}}; + matrix_multiply(a_in, b_in, c_out); + printf("first: %f\n", c_out[0][0]); + printf("second: %f\n", c_out[0][1]); + printf("third: %f\n", c_out[1][0]); + printf("fourth: %f\n", c_out[1][1]); + // expected (7, 10, 15, 22) + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/2d.c b/src/dios-egraphs/Diospyros/llvm-output-tests/2d.c new file mode 100644 index 00000000..a794d75a --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/2d.c @@ -0,0 +1,28 @@ +#include +#include +#define SIZE 4 + +void sum(float a_in[SIZE][SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0][3] + b_in[0]; + c_out[1] = a_in[1][3] + b_in[1]; + c_out[2] = a_in[2][3] + b_in[2]; + c_out[3] = a_in[3][3] + a_in[0][1]; +} + +int main(int argc, char **argv) { + float a_in[SIZE][SIZE] = { + {1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 9); + assert(c_out[1] == 14); + assert(c_out[2] == 19); + assert(c_out[3] == 18); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + // expected: 9, 14, 19, 18 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/3-by-3-matrix-multiply.c b/src/dios-egraphs/Diospyros/llvm-output-tests/3-by-3-matrix-multiply.c new file mode 100644 index 00000000..1fde8829 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/3-by-3-matrix-multiply.c @@ -0,0 +1,43 @@ +#include +#include + +#define A_ROWS 3 +#define A_COLS 3 +#define B_COLS 3 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + float a_in[A_ROWS][A_COLS] = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; + float b_in[A_COLS][B_COLS] = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; + float c_out[A_ROWS][B_COLS] = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; + matrix_multiply(a_in, b_in, c_out); + float expected[A_ROWS][B_COLS] = {{30, 36, 42}, {66, 81, 96}, {102, 126, 150}}; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + assert(expected[i][j] == c_out[i][j]); + } + } + // output: 30.000000 + // output: 36.000000 + // output: 42.000000 + // output: 66.000000 + // output: 81.000000 + // output: 96.000000 + // output: 102.000000 + // output: 126.000000 + // output: 150.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/5-by-5-matrix-multiply.c b/src/dios-egraphs/Diospyros/llvm-output-tests/5-by-5-matrix-multiply.c new file mode 100644 index 00000000..bab5bad9 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/5-by-5-matrix-multiply.c @@ -0,0 +1,59 @@ +#include +#include + +#define A_ROWS 5 +#define A_COLS 5 +#define B_COLS 5 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + float a_in[A_ROWS][A_COLS] = {{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}}; + float b_in[A_COLS][B_COLS] = {{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}}; + float c_out[A_ROWS][B_COLS] = {{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}}; + matrix_multiply(a_in, b_in, c_out); + float expected[A_ROWS][B_COLS] = {{45, 60, 75, 90, 105}, {120, 160, 200, 240, 280}, {45, 60, 75, 90, 105}, {120, 160, 200, 240, 280},{45, 60, 75, 90, 105}}; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + assert(expected[i][j] == c_out[i][j]); + } + } + // output: 45.000000 + // output: 60.000000 + // output: 75.000000 + // output: 90.000000 + // output: 105.000000 + // output: 120.000000 + // output: 160.000000 + // output: 200.000000 + // output: 240.000000 + // output: 280.000000 + // output: 45.000000 + // output: 60.000000 + // output: 75.000000 + // output: 90.000000 + // output: 105.000000 + // output: 120.000000 + // output: 160.000000 + // output: 200.000000 + // output: 240.000000 + // output: 280.000000 + // output: 45.000000 + // output: 60.000000 + // output: 75.000000 + // output: 90.000000 + // output: 105.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/add.c b/src/dios-egraphs/Diospyros/llvm-output-tests/add.c new file mode 100644 index 00000000..ebbcdf35 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/add.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 4 + +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] + b_in[0]; + c_out[1] = a_in[1] + b_in[1]; + c_out[2] = a_in[2] + b_in[2]; + c_out[3] = a_in[3] + b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 6); + assert(c_out[1] == 8); + assert(c_out[2] == 10); + assert(c_out[3] == 12); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + // expected: 6, 8, 10, 12 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/add_mult.c b/src/dios-egraphs/Diospyros/llvm-output-tests/add_mult.c new file mode 100644 index 00000000..732ba987 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/add_mult.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 4 + +void add_mult(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] + b_in[0]; + c_out[1] = a_in[1] * b_in[1]; + c_out[2] = a_in[2] + b_in[2]; + c_out[3] = a_in[3] * b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {2, 3, 4, 5}; + float c_out[SIZE]; + add_mult(a_in, b_in, c_out); + assert(c_out[0] == 3); + assert(c_out[1] == 6); + assert(c_out[2] == 7); + assert(c_out[3] == 20); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + // expected:3, 6, 7, 20 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/break-w.c b/src/dios-egraphs/Diospyros/llvm-output-tests/break-w.c new file mode 100644 index 00000000..8afaed68 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/break-w.c @@ -0,0 +1,34 @@ +#include +#include +#define SIZE 8 + +void break_w_test(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { + int i = SIZE - 1; + while (i >= 0) { + if (i < SIZE / 2) break; + b_out[i] = a_in[i] * scalar_in; + i -= 1; + } + b_out[0] = scalar_in; +} + +int main(void) { + float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; + float scalar_in = 10; + float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + break_w_test(a_in, scalar_in, b_out); + float expected[SIZE] = {10, 0, 0, 0, 50, 40, 30, 20}; + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_out[i]); + assert(expected[i] == b_out[i]); + } + // 10.000000 + // 0.000000 + // 0.000000 + // 0.000000 + // 50.000000 + // 40.000000 + // 30.000000 + // 20.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/break.c b/src/dios-egraphs/Diospyros/llvm-output-tests/break.c new file mode 100644 index 00000000..1790765f --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/break.c @@ -0,0 +1,32 @@ +#include +#include +#define SIZE 8 + +void break_test(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { + for (int i = SIZE - 1; i >= 0; i--) { + if (i < SIZE / 2) break; + b_out[i] = a_in[i] * scalar_in; + } + b_out[0] = scalar_in; +} + +int main(void) { + float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; + float scalar_in = 10; + float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + break_test(a_in, scalar_in, b_out); + float expected[SIZE] = {10, 0, 0, 0, 50, 40, 30, 20}; + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_out[i]); + assert(expected[i] == b_out[i]); + } + // 10.000000 + // 0.000000 + // 0.000000 + // 0.000000 + // 50.000000 + // 40.000000 + // 30.000000 + // 20.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/calloc.c b/src/dios-egraphs/Diospyros/llvm-output-tests/calloc.c new file mode 100644 index 00000000..d8e0f10a --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/calloc.c @@ -0,0 +1,30 @@ +#include +#include +#define SIZE 4 + +void calloc_func(int m, float q_out[SIZE][SIZE]) { + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + q_min[i * m + j] = 10.0f; + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + q_out[i][j] = q_min[i * m + j]; + } + } +} + +int main(int argc, char **argv) { + float q_out[SIZE][SIZE] = { + {1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}; + calloc_func(SIZE, q_out); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("q_out: %f\n", q_out[i][j]); + assert(q_out[i][j] == 10); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/continue-w.c b/src/dios-egraphs/Diospyros/llvm-output-tests/continue-w.c new file mode 100644 index 00000000..d7b42a84 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/continue-w.c @@ -0,0 +1,36 @@ +#include +#include +#define SIZE 8 + +void continue_w_test(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { + int i = 0; + while (i < SIZE) { + if (i < SIZE / 2) { + i += 1; + continue; + } + b_out[i] = a_in[i] * scalar_in; + i += 1; + } +} + +int main(void) { + float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; + float scalar_in = 10; + float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + continue_w_test(a_in, scalar_in, b_out); + float expected[SIZE] = {0, 0, 0, 0, 50, 40, 30, 20}; + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_out[i]); + assert(expected[i] == b_out[i]); + } + // 0.000000 + // 0.000000 + // 0.000000 + // 0.000000 + // 50.000000 + // 40.000000 + // 30.000000 + // 20.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/continue.c b/src/dios-egraphs/Diospyros/llvm-output-tests/continue.c new file mode 100644 index 00000000..40bc13b8 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/continue.c @@ -0,0 +1,31 @@ +#include +#include +#define SIZE 8 + +void continue_test(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { + for (int i = 0; i < SIZE; i++) { + if (i < SIZE / 2) continue; + b_out[i] = a_in[i] * scalar_in; + } +} + +int main(void) { + float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; + float scalar_in = 10; + float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + continue_test(a_in, scalar_in, b_out); + float expected[SIZE] = {0, 0, 0, 0, 50, 40, 30, 20}; + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_out[i]); + assert(expected[i] == b_out[i]); + } + // 0.000000 + // 0.000000 + // 0.000000 + // 0.000000 + // 50.000000 + // 40.000000 + // 30.000000 + // 20.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/cube.c b/src/dios-egraphs/Diospyros/llvm-output-tests/cube.c new file mode 100644 index 00000000..38de21a5 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/cube.c @@ -0,0 +1,34 @@ +#include +#include +#include +#define SIZE 8 + +void cube(float a_in[SIZE], float b_out[SIZE]) { + for (int i = 0; i < SIZE; i++) { + b_out[i] = powf(a_in[i], 3); + } +} +int main(void) { + float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; + float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + cube(a_in, b_out); + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_out[i]); + } + assert(b_out[0] == 729); + assert(b_out[1] == 512); + assert(b_out[2] == 343); + assert(b_out[3] == 216); + assert(b_out[4] == 125); + assert(b_out[5] == 64); + assert(b_out[6] == 27); + assert(b_out[7] == 8); + // 729.000000 + // 512.000000 + // 343.000000 + // 216.000000 + // 125.000000 + // 64.000000 + // 27.000000 + // 8.000000 +} diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/five_binops.c b/src/dios-egraphs/Diospyros/llvm-output-tests/five_binops.c new file mode 100644 index 00000000..6a33c896 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/five_binops.c @@ -0,0 +1,31 @@ +#include +#include +#define SIZE 4 + +void add5(float a_in[SIZE], float b_in[SIZE], float c_in[SIZE], + float d_in[SIZE], float e_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] + b_in[0] + c_in[0] + d_in[0] + e_in[0]; + c_out[1] = a_in[1] + b_in[1] + c_in[1] + d_in[1] + e_in[1]; + c_out[2] = a_in[2] + b_in[2] + c_in[2] + d_in[2] + e_in[2]; + c_out[3] = a_in[3] + b_in[3] + c_in[3] + d_in[3] + e_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_in[SIZE] = {1, 2, 3, 4}; + float d_in[SIZE] = {5, 6, 7, 8}; + float e_in[SIZE] = {1, 2, 3, 4}; + float c_out[SIZE]; + add5(a_in, b_in, c_in, d_in, e_in, c_out); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + assert(c_out[0] == 13); + assert(c_out[1] == 18); + assert(c_out[2] == 23); + assert(c_out[3] == 28); + // expected: 13, 18, 23, 28 + return 0; +} diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/identity_matrix.c b/src/dios-egraphs/Diospyros/llvm-output-tests/identity_matrix.c new file mode 100644 index 00000000..9e5f062e --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/identity_matrix.c @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1 + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } +} + +int main(void) { + float A[SIZE * SIZE] = {0, 1, 2, 3}; + float Q[SIZE * SIZE] = {0, 1, 2, 3}; + float R[SIZE * SIZE] = {0, 1, 2, 3}; + float AExpected[SIZE * SIZE] = {0, 1, 2, 3}; + float QExpected[SIZE * SIZE] = {0, 1, 2, 3}; + float RExpected[SIZE * SIZE] = {0, 1, 2, 3}; + naive_fixed_qr_decomp(A, Q, R); + no_opt_naive_fixed_qr_decomp(AExpected, QExpected, RExpected); + for (int i = 0; i < SIZE * SIZE; i++) { + printf("Expected Q: %f\n", QExpected[i]); + printf("Actual Q: %f\n", Q[i]); + assert(fabsf(QExpected[i] - Q[i]) < DELTA); + } + for (int i = 0; i < SIZE * SIZE; i++) { + printf("Expected R: %f\n", RExpected[i]); + printf("Actual R: %f\n", R[i]); + assert(fabsf(RExpected[i] - R[i]) < DELTA); + } + for (int i = 0; i < SIZE * SIZE; i++) { + printf("Expected A: %f\n", AExpected[i]); + printf("Actual A: %f\n", A[i]); + assert(fabsf(AExpected[i] - A[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/if-else.c b/src/dios-egraphs/Diospyros/llvm-output-tests/if-else.c new file mode 100644 index 00000000..3cd9c502 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/if-else.c @@ -0,0 +1,32 @@ +#include +#include +#define SIZE 8 + +void if_else(float a_in[SIZE], float b_out[SIZE]) { + for (int i = 0; i < SIZE; i++) { + if (i < SIZE / 2) { + b_out[i] = a_in[i]; + } else { + b_out[i] = a_in[i] + 1; + } + } +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; + float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + if_else(a_in, b_out); + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_out[i]); + } + assert(b_out[0] == 1); + assert(b_out[1] == 2); + assert(b_out[2] == 3); + assert(b_out[3] == 4); + assert(b_out[4] == 6); + assert(b_out[5] == 7); + assert(b_out[6] == 8); + assert(b_out[7] == 9); + // expected: 1, 2, 3, 4, 6, 7, 8, 9 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/inline-float.c b/src/dios-egraphs/Diospyros/llvm-output-tests/inline-float.c new file mode 100644 index 00000000..2b0245d8 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/inline-float.c @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + return sum; +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = no_opt_test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +float test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + return sum; +} + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +int main() { + float A[SIZE] = {1.0f}; + float expectedA[SIZE] = {1.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = 1.0f; + expectedA[i] = 1.0f; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + B[i] = 0.0f; + expectedB[i] = 0.0f; + } + float C[SIZE] = {0.0f}; + float expectedC[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + C[i] = 0.0f; + expectedC[i] = 0.0f; + } + test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("C Output: %f\n", C[i]); + printf("Expected C Output: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/inline-void.c b/src/dios-egraphs/Diospyros/llvm-output-tests/inline-void.c new file mode 100644 index 00000000..71e7aa89 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/inline-void.c @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +void test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +void no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } +} + +void no_opt_test(float A[SIZE], float B[SIZE]) { + no_opt_test_inline(A, B, SIZE); +} + +void test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } +} + +void test(float A[SIZE], float B[SIZE]) { test_inline(A, B, SIZE); } + +int main() { + float A[SIZE] = {1.0f}; + float expectedA[SIZE] = {1.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = 1.0f; + expectedA[i] = 1.0f; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + B[i] = 0.0f; + expectedB[i] = 0.0f; + } + test(A, B); + no_opt_test(expectedA, expectedB); + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/load_reuse.c b/src/dios-egraphs/Diospyros/llvm-output-tests/load_reuse.c new file mode 100644 index 00000000..3a447db4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/load_reuse.c @@ -0,0 +1,81 @@ +#include +#include + +#define I_ROWS 2 +#define I_COLS 2 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void load_use_twice(float mat_in[I_ROWS][I_COLS], float f_in[F_ROWS][F_COLS], + float mat_out[O_ROWS][O_COLS], + float mat_out2[O_ROWS][O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + mat_out[outRow][outCol] += + 3 * v - + 4; // try something to use v in a different way + mat_out2[outRow][outCol] += + 2 * v + + 1; // try something to use v in a different way + } + } + } + } + } +} + +int main(void) { + float mat_in[I_ROWS][I_COLS] = {{1, 2}, {3, 4}}; + float f_in[F_ROWS][F_COLS] = {{1, 1}, {1, 1}}; + float mat_out1[O_ROWS][O_COLS] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; + float mat_out2[O_ROWS][O_COLS] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; + load_use_twice(mat_in, f_in, mat_out1, mat_out2); + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + printf("output: %f\n", mat_out1[i][j]); + printf("output: %f\n", mat_out2[i][j]); + } + } + float output1[O_ROWS][O_COLS] = {{-1, 1, 2}, {4, 14, 10}, {5, 13, 8}}; + float output2[O_ROWS][O_COLS] = {{3, 8, 5}, {10, 24, 14}, {7, 16, 9}}; + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + assert(output1[i][j] == mat_out1[i][j]); + assert(output2[i][j] == mat_out2[i][j]); + } + } +// output: -1.000000 +// output: 3.000000 +// output: 1.000000 +// output: 8.000000 +// output: 2.000000 +// output: 5.000000 + +// output: 4.000000 +// output: 10.000000 +// output: 14.000000 +// output: 24.000000 +// output: 10.000000 +// output: 14.000000 + +// output: 5.000000 +// output: 7.000000 +// output: 13.000000 +// output: 16.000000 +// output: 8.000000 +// output: 9.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/loop-inline.c b/src/dios-egraphs/Diospyros/llvm-output-tests/loop-inline.c new file mode 100644 index 00000000..5a817002 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/loop-inline.c @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float x[SIZE], float A[SIZE]) { + for (int k = 0; k < SIZE; k++) { + float alpha = -sgn(x[k]) * naive_norm(x, k); + A[k] = alpha; + } +} + +void no_opt_sample_test(float x[SIZE], float A[SIZE]) { + for (int k = 0; k < SIZE; k++) { + float alpha = -no_opt_sgn(x[k]) * no_opt_naive_norm(x, k); + A[k] = alpha; + } +} + +int main(void) { + float x[SIZE] = {1, -1, 2, 3, 5}; + float A[SIZE] = {0}; + sample_test(x, A); + float expectedA[SIZE] = {0}; + no_opt_sample_test(x, expectedA); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/mac.c b/src/dios-egraphs/Diospyros/llvm-output-tests/mac.c new file mode 100644 index 00000000..475ba09f --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/mac.c @@ -0,0 +1,29 @@ +#include +#include +#define SIZE 4 + +void mac(float a_in[SIZE], float b_in[SIZE], float c_in[SIZE], + float d_out[SIZE]) { + d_out[0] = a_in[0] + (b_in[0] * c_in[0]); + d_out[1] = a_in[1] + (b_in[1] * c_in[1]); + d_out[2] = a_in[2] + (b_in[2] * c_in[2]); + d_out[3] = a_in[3] + (b_in[3] * c_in[3]); +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {2, 3, 4, 5}; + float c_in[SIZE] = {3, 4, 5, 6}; + float d_out[SIZE]; + mac(a_in, b_in, c_in, d_out); + assert(d_out[0] == 7); + assert(d_out[1] == 14); + assert(d_out[2] == 23); + assert(d_out[3] == 34); + printf("first: %f\n", d_out[0]); + printf("second: %f\n", d_out[1]); + printf("third: %f\n", d_out[2]); + printf("fourth: %f\n", d_out[3]); + // expected: 7, 14, 23, 34 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/malloc.c b/src/dios-egraphs/Diospyros/llvm-output-tests/malloc.c new file mode 100644 index 00000000..fb91400c --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/malloc.c @@ -0,0 +1,30 @@ +#include +#include +#define SIZE 4 + +void malloc_func(int m, float q_out[SIZE][SIZE]) { + float *q_min = (float *)malloc(sizeof(float) * m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + q_min[i * m + j] = 10.0f; + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + q_out[i][j] = q_min[i * m + j]; + } + } +} + +int main(int argc, char **argv) { + float q_out[SIZE][SIZE] = { + {1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}; + malloc_func(SIZE, q_out); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("q_out: %f\n", q_out[i][j]); + assert(q_out[i][j] == 10); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/mat_mul.c b/src/dios-egraphs/Diospyros/llvm-output-tests/mat_mul.c new file mode 100644 index 00000000..58c9208d --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/mat_mul.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 4 + +void mat_mul(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] * b_in[0] + a_in[1] * b_in[2]; + c_out[1] = a_in[0] * b_in[1] + a_in[1] * b_in[3]; + c_out[2] = a_in[2] * b_in[0] + a_in[3] * b_in[2]; + c_out[3] = a_in[2] * b_in[1] + a_in[3] * b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + mat_mul(a_in, b_in, c_out); + assert(c_out[0] == 19); + assert(c_out[1] == 22); + assert(c_out[2] == 43); + assert(c_out[3] == 50); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + // expected: 19, 22, 43, 50 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/matrix-multiply.c b/src/dios-egraphs/Diospyros/llvm-output-tests/matrix-multiply.c new file mode 100644 index 00000000..5da28448 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/matrix-multiply.c @@ -0,0 +1,36 @@ +#include +#include + +#define A_ROWS 2 +#define A_COLS 2 +#define B_COLS 2 + +void matrix_multiply(float a_in[A_ROWS * A_COLS], float b_in[A_COLS * B_COLS], + float c_out[A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + float a_in[A_ROWS * A_COLS] = {1, 2, 3, 4}; + float b_in[A_COLS * B_COLS] = {1, 2, 3, 4}; + float c_out[A_ROWS * B_COLS] = {0, 0, 0, 0}; + matrix_multiply(a_in, b_in, c_out); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + assert(c_out[0] == 7); + assert(c_out[1] == 10); + assert(c_out[2] == 15); + assert(c_out[3] == 22); + // expected (7, 10, 15, 22) + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/mixed.c b/src/dios-egraphs/Diospyros/llvm-output-tests/mixed.c new file mode 100644 index 00000000..90c97d64 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/mixed.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 4 + +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] + 3; + c_out[1] = 5 + b_in[1]; + c_out[2] = 3 + b_in[2]; + c_out[3] = a_in[3] + b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 4); + assert(c_out[1] == 11); + assert(c_out[2] == 10); + assert(c_out[3] == 12); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + // expected: 4, 11, 10, 12 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/mult.c b/src/dios-egraphs/Diospyros/llvm-output-tests/mult.c new file mode 100644 index 00000000..f72d5d39 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/mult.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 4 + +void prod(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] * b_in[0]; + c_out[1] = a_in[1] * b_in[1]; + c_out[2] = a_in[2] * b_in[2]; + c_out[3] = a_in[3] * b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + prod(a_in, b_in, c_out); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + assert(c_out[0] == 5); + assert(c_out[1] == 12); + assert(c_out[2] == 21); + assert(c_out[3] == 32); + // expected: 5, 12, 21, 32 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/multi-mat-mul.c b/src/dios-egraphs/Diospyros/llvm-output-tests/multi-mat-mul.c new file mode 100644 index 00000000..af5da2f1 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/multi-mat-mul.c @@ -0,0 +1,47 @@ +#include +#include +#define ROWS 3 +#define COLS 3 + +void matrix_multiply_3x3(float a[ROWS * COLS], float b[COLS * COLS], + float c[ROWS * COLS]) { + for (int i = 0; i < ROWS; i++) { + for (int j = 0; j < COLS; j++) { + c[j * ROWS + i] = 0; + + for (int k = 0; k < COLS; k++) { + c[j * ROWS + i] += a[k * ROWS + i] * b[j * COLS + k]; + } + } + } +} + +void multimatrix_multiply(float a_in[ROWS * COLS], float b_in[ROWS * COLS], + float c_in[ROWS * COLS], float d_out[ROWS * COLS]) { + float ab[ROWS * COLS]; + matrix_multiply_3x3(a_in, b_in, ab); + matrix_multiply_3x3(ab, c_in, d_out); +} + +int main(void) { + float a_in[ROWS * COLS] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + float b_in[ROWS * COLS] = {1, 0, 1, 0, 1, 0, 1, 0, 1}; + float c_in[ROWS * COLS] = {9, 8, 7, 6, 5, 4, 3, 2, 1}; + float d_out[ROWS * COLS] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; + multimatrix_multiply(a_in, b_in, c_in, d_out); + float expected[ROWS * COLS] = {160, 200, 240, 100, 125, 150, 40, 50, 60}; + for (int i = 0; i < ROWS * COLS; i++) { + printf("output: %f\n", d_out[i]); + assert(expected[i] == d_out[i]); + } + // output: 160.000000 + // output: 200.000000 + // output: 240.000000 + // output: 100.000000 + // output: 125.000000 + // output: 150.000000 + // output: 40.000000 + // output: 50.000000 + // output: 60.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/multiple_adds.c b/src/dios-egraphs/Diospyros/llvm-output-tests/multiple_adds.c new file mode 100644 index 00000000..33ac2dc3 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/multiple_adds.c @@ -0,0 +1,29 @@ +#include +#include +#define SIZE 4 + +void madd(float a_in[SIZE], float b_in[SIZE], float c_in[SIZE], + float d_out[SIZE]) { + d_out[0] = a_in[0] + b_in[0] + c_in[0]; + d_out[1] = a_in[1] + b_in[1] + c_in[1]; + d_out[2] = a_in[2] + b_in[2] + c_in[2]; + d_out[3] = a_in[3] + b_in[3] + c_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_in[SIZE] = {9, 10, 11, 12}; + float d_out[4]; + madd(a_in, b_in, c_in, d_out); + printf("first: %f\n", d_out[0]); + printf("second: %f\n", d_out[1]); + printf("third: %f\n", d_out[2]); + printf("fourth: %f\n", d_out[3]); + assert(d_out[0] == 15); + assert(d_out[1] == 18); + assert(d_out[2] == 21); + assert(d_out[3] == 24); + // expected: 6, 8, 10, 12 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/naive-norm-inline.c b/src/dios-egraphs/Diospyros/llvm-output-tests/naive-norm-inline.c new file mode 100644 index 00000000..66605c05 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/naive-norm-inline.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float B[SIZE - 1]) { + B[0] = naive_norm(A, SIZE); +} + +void no_opt_sample_test(float A[SIZE], float B[SIZE - 1]) { + B[0] = no_opt_naive_norm(A, SIZE); +} + +int main(void) { + float A[SIZE] = {1, 2, 3, 4, 5}; + float B[SIZE - 1] = {0}; + sample_test(A, B); + float expectedA[SIZE] = {1, 2, 3, 4, 5}; + float expectedB[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedB); + for (int i = 0; i < SIZE - 1; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/naive-norm.c b/src/dios-egraphs/Diospyros/llvm-output-tests/naive-norm.c new file mode 100644 index 00000000..1727c165 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/naive-norm.c @@ -0,0 +1,43 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +float naive_norm(float x[SIZE], int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float x[SIZE], int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +int main() { + float x[SIZE] = {1.0f}; + for (int i = 0; i < SIZE; i++) { + if (i % 2 == 0) { + x[i] = 1.0f; + } else { + x[i] = 0.0f; + } + } + float calculated = naive_norm(x, SIZE); + float expected = no_opt_naive_norm(x, SIZE); + printf("Calculated of Naive L2 Norm: %f\n", calculated); + printf("Expected of Naive L2 Norm: %f\n", expected); + assert(fabs(expected - calculated) < DELTA); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/nested-inline.c b/src/dios-egraphs/Diospyros/llvm-output-tests/nested-inline.c new file mode 100644 index 00000000..02650d06 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/nested-inline.c @@ -0,0 +1,105 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float nested_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float no_opt_nested_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = -1 * A[i]; + } + float prod = 0.0f; + for (int i = 0; i < n; i++) { + prod *= B[i]; + } + return prod; +} + +float no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + float prod = no_opt_nested_inline(A, B, n); + return prod - sum; +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = no_opt_test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +float nested_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = -1 * A[i]; + } + float prod = 0.0f; + for (int i = 0; i < n; i++) { + prod *= B[i]; + } + return prod; +} + +float test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + float prod = nested_inline(A, B, n); + return prod - sum; +} + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +int main() { + float A[SIZE] = {0.0f}; + float expectedA[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = 1.0f; + expectedA[i] = 1.0f; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + B[i] = -1.0f; + expectedB[i] = -1.0f; + } + float C[SIZE] = {0.0f}; + float expectedC[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + C[i] = 0.0f; + expectedC[i] = 0.0f; + } + test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("Calculated C Output: %f\n", C[i]); + printf("Expected C Output: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/out_of_order.c b/src/dios-egraphs/Diospyros/llvm-output-tests/out_of_order.c new file mode 100644 index 00000000..6c59dd20 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/out_of_order.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 4 + +void oo(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] + b_in[0]; + c_out[3] = a_in[3] + b_in[3]; + c_out[2] = a_in[2] + b_in[2]; + c_out[1] = a_in[1] + b_in[1]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + oo(a_in, b_in, c_out); + assert(c_out[0] == 6); + assert(c_out[1] == 8); + assert(c_out[2] == 10); + assert(c_out[3] == 12); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + // 6, 8, 10, 12 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/point-product.c b/src/dios-egraphs/Diospyros/llvm-output-tests/point-product.c new file mode 100644 index 00000000..1d1b41e5 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/point-product.c @@ -0,0 +1,47 @@ +#include +#include + +void cross_product(float lhs[3], float rhs[3], float result[3]) + __attribute__((always_inline)); + +void cross_product(float lhs[3], float rhs[3], float result[3]) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +void point_product(float q_in[4], float p_in[4], float result_out[4]) { + float qvec[3] = {q_in[0], q_in[1], q_in[2]}; + // qvec = {0, 1, 2} + + float uv[3]; + cross_product(qvec, p_in, uv); + // uv = {1 * 2 - 2 * 1, 2 * 0 - 0 * 2, 0 * 1 - 1 * 0} = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + // uv = {0, 0 , 0} + float qxuv[3]; + cross_product(qvec, uv, qxuv); + // qxuv = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + result_out[i] = p_in[i] + q_in[3] * uv[i] + qxuv[i]; + } +} + +int main(void) { + float q_in[4] = {0, 1, 2, 3}; + float p_in[4] = {0, 1, 2, 3}; + float result_out[4] = {0, 0, 0, 0}; + point_product(q_in, p_in, result_out); + float expected[4] = {0, 1, 2, 0}; + for (int i = 0; i < 4; i++) { + printf("%f\n", result_out[i]); + assert(expected[i] == result_out[i]); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/q-prod.c b/src/dios-egraphs/Diospyros/llvm-output-tests/q-prod.c new file mode 100644 index 00000000..2ad30a6a --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/q-prod.c @@ -0,0 +1,72 @@ +#include +#include +#include +#include +#include +#include + +#define SIZE 4 + +__attribute__((always_inline)) void naive_cross_product(float *lhs, float *rhs, + float *result) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +__attribute__((always_inline)) void naive_point_product(float *q, float *p, + float *result) { + float qvec[3] = {q[0], q[1], q[2]}; + float uv[3]; + naive_cross_product(qvec, p, uv); + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + float qxuv[3]; + naive_cross_product(qvec, uv, qxuv); + + for (int i = 0; i < 3; i++) { + result[i] = p[i] + q[3] * uv[i] + qxuv[i]; + } +} + +void naive_quaternion_product(float *a_q, float *a_t, float *b_q, float *b_t, + float *r_q, float *r_t) { + r_q[3] = + a_q[3] * b_q[3] - a_q[0] * b_q[0] - a_q[1] * b_q[1] - a_q[2] * b_q[2]; + r_q[0] = + a_q[3] * b_q[0] + a_q[0] * b_q[3] + a_q[1] * b_q[2] - a_q[2] * b_q[1]; + r_q[1] = + a_q[3] * b_q[1] + a_q[1] * b_q[3] + a_q[2] * b_q[0] - a_q[0] * b_q[2]; + r_q[2] = + a_q[3] * b_q[2] + a_q[2] * b_q[3] + a_q[0] * b_q[1] - a_q[1] * b_q[0]; + + naive_point_product(a_q, b_t, r_t); + for (int i = 0; i < 3; i++) { + r_t[i] += a_t[i]; + } +} + +int main(void) { + float a_q[SIZE] = {1, 2, 3, 4}; + float a_t[SIZE] = {1, 2, 3, 4}; + float b_q[SIZE] = {0, 0, 0, 0}; + float b_t[SIZE] = {1, 2, 3, 4}; + float r_q[SIZE] = {0, 0, 0, 0}; + float r_t[SIZE] = {0, 0, 0, 0}; + naive_quaternion_product(a_q, a_t, b_q, b_t, r_q, r_t); + float expectedq[SIZE] = {0, 0, 0, 0}; + for (int i = 0; i < SIZE; i++) { + printf("%f\n", r_q[i]); + assert(expectedq[i] == r_q[i]); + } + float expectedt[SIZE] = {2, 4, 6, 0}; + for (int i = 0; i < SIZE; i++) { + printf("%f\n", r_t[i]); + assert(expectedt[i] == r_t[i]); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-modified-no-local-array.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-modified-no-local-array.c new file mode 100644 index 00000000..6f030330 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-modified-no-local-array.c @@ -0,0 +1,184 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], + float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // No Calloc is used here. + float I[SIZE * SIZE] = {0}; + // float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + float q_t[SIZE * SIZE] = {1}; + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float res[SIZE * SIZE] = {0}; + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + float I[SIZE * SIZE] = {0}; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + float q_t[SIZE * SIZE] = {1}; + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float res[SIZE * SIZE] = {0}; + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + } + naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1, 2, 3, 4}; + float Q[SIZE * SIZE] = {0, 0, 0, 0}; + float R[SIZE * SIZE] = {0, 0, 0, 0}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0, 0, 0, 0}; + float expectedR[SIZE * SIZE] = {0, 0, 0, 0}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-no-local-array.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-no-local-array.c new file mode 100644 index 00000000..44b534ee --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-no-local-array.c @@ -0,0 +1,304 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], float u[SIZE], + float v[SIZE], float q_min[SIZE * SIZE], + float q_t[SIZE * SIZE], float res[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1, 2, 3, 4}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float I[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float u[SIZE] = {0.0f}; + float v[SIZE] = {0.0f}; + float q_min[SIZE * SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + float res[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, I, x, e, u, v, q_min, q_t, res); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-1.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-1.c new file mode 100644 index 00000000..8c131bfb --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-1.c @@ -0,0 +1,99 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +// float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // float x[SIZE]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + // float e[SIZE]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + A[k] = alpha; + } +} + +void no_opt_sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // float x[SIZE]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + // float e[SIZE]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + A[k] = alpha; + } +} + +int main(void) { + float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; + sample_test(A, x, e); + float expectedA[SIZE] = {0}; + float expectedx[SIZE] = {0}; + float expectede[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedx, expectede); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + printf("X Output: %f\n", x[i]); + printf("Expected X Output: %f\n", expectedx[i]); + printf("E Output: %f\n", e[i]); + printf("Expected E Output: %f\n", expectede[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + assert(fabs(expectedx[i] - x[i]) < DELTA); + assert(fabs(expectede[i] - e[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-2.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-2.c new file mode 100644 index 00000000..367d75a4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-2.c @@ -0,0 +1,92 @@ +// Modification of test 1, with dynamically sized arrays. + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + A[k] = alpha; + } +} + +void no_opt_sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + A[k] = alpha; + } +} + +int main(void) { + float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; + sample_test(A, x, e); + float expectedA[SIZE] = {0}; + float expectedX[SIZE] = {0}; + float expectedE[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedX, expectedE); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-3.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-3.c new file mode 100644 index 00000000..ebe988ae --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-3.c @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float x[SIZE], float e[SIZE], + float Q[SIZE * SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m) * naive_norm(e, m); + A[k] = alpha; + + // float q_t[SIZE * SIZE] = {alpha}; + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = alpha; + } + } + } +} + +void no_opt_sample_test(float A[SIZE], float x[SIZE], float e[SIZE], + float Q[SIZE * SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m) * + no_opt_naive_norm(e, m); + A[k] = alpha; + + // float q_t[SIZE * SIZE] = {alpha}; + + // for (int i = 0; i < SIZE * SIZE; i++) { + // Q[i] = q_t[i]; + // } + + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = alpha; + } + } + } +} + +int main(void) { + float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; + float Q[SIZE * SIZE] = {0}; + sample_test(A, x, e, Q); + float expectedA[SIZE] = {0}; + float expectedX[SIZE] = {0}; + float expectedE[SIZE] = {0}; + float expectedQ[SIZE * SIZE] = {0}; + no_opt_sample_test(expectedA, expectedX, expectedE, expectedQ); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } + + for (int i = 0; i < SIZE * SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-1-linear-array.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-1-linear-array.c new file mode 100644 index 00000000..97726430 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-1-linear-array.c @@ -0,0 +1,201 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 1 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +int main(void) { + float A[SIZE] = {1.0f}; + float Q[SIZE] = {0.0f}; + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE] = {0.0f}; + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-1.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-1.c new file mode 100644 index 00000000..b0fc07ae --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-1.c @@ -0,0 +1,210 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 1 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1.0f}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float i[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + float expectedi[SIZE * SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-2-2.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-2-2.c new file mode 100644 index 00000000..63ea53cd --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-2-2.c @@ -0,0 +1,217 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1.0f, 2.0f, 3.0f, 4.0f}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {1.0f}; + float i[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {1.0f}; + float expectedi[SIZE * SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q_T Output: %f\n", q_t[i * SIZE + j]); + printf("Expected Q_T Output: %f\n", expectedq_t[i * SIZE + j]); + assert(fabs(expectedq_t[i] - q_t[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-2-linear-array.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-2-linear-array.c new file mode 100644 index 00000000..fc1db5ba --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-2-linear-array.c @@ -0,0 +1,201 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +int main(void) { + float A[SIZE] = {1.0f, 2.0f}; + float Q[SIZE] = {0.0f}; + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE] = {0.0f}; + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-2.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-2.c new file mode 100644 index 00000000..4898553e --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-2.c @@ -0,0 +1,206 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], + float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE * SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE * SIZE], float x[SIZE], + float e[SIZE], float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1.0f, 2.0f, 3.0f, 4.0f}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float i[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + float expectedi[SIZE * SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-3-linear-array.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-3-linear-array.c new file mode 100644 index 00000000..4a9b7256 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-3-linear-array.c @@ -0,0 +1,201 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 3 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +int main(void) { + float A[SIZE] = {1.0f, 2.0f, 3.0f}; + float Q[SIZE] = {0.0f}; + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE] = {0.0f}; + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-3.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-3.c new file mode 100644 index 00000000..cdb08413 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-3.c @@ -0,0 +1,209 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 3 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE * SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE * SIZE], float x[SIZE], + float e[SIZE], float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, 9.0f}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float i[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + float expectedi[SIZE * SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-4-linear-array.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-4-linear-array.c new file mode 100644 index 00000000..5dc845dc --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-4-linear-array.c @@ -0,0 +1,137 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 4 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return v; } + +float no_opt_sgn(float v) { return v; } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void naive_fixed_qr_decomp(float Q[SIZE], float x[SIZE], float q_t[SIZE]) { + // for (int i = 0; i < SIZE; i++) { + // R[i] = A[i]; + // } + + // for (int i = 0; i < SIZE; i++) { + // I[i] = 1.0f; + // } + + // Householder + // for (int k = 0; k < SIZE - 1; k++) { + // int k = 0; + // int m = SIZE - k; + + float alpha = -sgn(x[0]); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + // if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + // } + // } +} + +void no_opt_naive_fixed_qr_decomp(float Q[SIZE], float x[SIZE], + float q_t[SIZE]) { + // for (int i = 0; i < SIZE; i++) { + // R[i] = A[i]; + // } + + // for (int i = 0; i < SIZE; i++) { + // I[i] = 1.0f; + // } + + // Householder + // for (int k = 0; k < SIZE - 1; k++) { + // int k = 0; + // int m = SIZE - k; + + float alpha = -no_opt_sgn(x[0]); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + // if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + // } + // } +} + +int main(void) { + float A[SIZE] = {1.1f, 2.1f, 3.1f, 4.1f}; + float Q[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + Q[i] = 0.0f; + } + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {1.2f, 1.3f, 1.4f, 1.5f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(Q, x, q_t); + float expectedQ[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + expectedQ[i] = 0.0f; + } + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {1.2f, 1.3f, 1.4f, 1.5f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(expectedQ, expectedx, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-5-linear-array.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-5-linear-array.c new file mode 100644 index 00000000..de55e82a --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-4-5-linear-array.c @@ -0,0 +1,201 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +int main(void) { + float A[SIZE] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; + float Q[SIZE] = {0.0f}; + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE] = {0.0f}; + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/return.c b/src/dios-egraphs/Diospyros/llvm-output-tests/return.c new file mode 100644 index 00000000..02a7a6f1 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/return.c @@ -0,0 +1,37 @@ +#include +#include +#define SIZE 8 + +void return_test(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { + for (int i = 0; i < SIZE; i++) { + if (i == SIZE / 2) return; + b_out[i] = a_in[i] * scalar_in; + } + b_out[SIZE / 2] = a_in[SIZE / 2] * scalar_in; // shouldn't run +} + +int main(void) { + float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; + float scalar_in = 10; + float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + return_test(a_in, scalar_in, b_out); + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_out[i]); + } + assert(b_out[0] == 90); + assert(b_out[1] == 80); + assert(b_out[2] == 70); + assert(b_out[3] == 60); + assert(b_out[4] == 0); + assert(b_out[5] == 0); + assert(b_out[6] == 0); + assert(b_out[7] == 0); + // 90.000000 + // 80.000000 + // 70.000000 + // 60.000000 + // 0.000000 + // 0.000000 + // 0.000000 + // 0.000000 +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/scalar.c b/src/dios-egraphs/Diospyros/llvm-output-tests/scalar.c new file mode 100644 index 00000000..f5ca5592 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/scalar.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 8 + +void matrix_multiply(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { + for (int i = 0; i < SIZE; i++) { + b_out[i] = a_in[i] * scalar_in; + } +} + +int main(void) { + float a_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; + float scalar_in = 10; + float b_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; + matrix_multiply(a_in, scalar_in, b_in); + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_in[i]); + } + assert(b_in[0] == 10); + assert(b_in[1] == 20); + assert(b_in[2] == 30); + assert(b_in[3] == 40); + assert(b_in[4] == 50); + assert(b_in[5] == 60); + assert(b_in[6] == 70); + assert(b_in[7] == 80); +} diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/sgn-inline.c b/src/dios-egraphs/Diospyros/llvm-output-tests/sgn-inline.c new file mode 100644 index 00000000..e982b0eb --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/sgn-inline.c @@ -0,0 +1,42 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +void sample_test(float A[SIZE], float B[SIZE]) { + for (int i = 0; i < SIZE; i++) { + B[i] = sgn(A[i]); + } +} + +void no_opt_sample_test(float A[SIZE], float B[SIZE]) { + for (int i = 0; i < SIZE; i++) { + B[i] = no_opt_sgn(A[i]); + } +} + +int main(void) { + float A[SIZE] = {1, -2, 0, -4, 5}; + float B[SIZE] = {0}; + sample_test(A, B); + float expectedA[SIZE] = {1, -2, 0, -4, 5}; + float expectedB[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedB); + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/sgn-naive-norm-inline-1.c b/src/dios-egraphs/Diospyros/llvm-output-tests/sgn-naive-norm-inline-1.c new file mode 100644 index 00000000..c04592b6 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/sgn-naive-norm-inline-1.c @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float naive_norm(float *x, int m) __attribute__((always_inline)); +float sgn(float v) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float B[SIZE]) { + B[0] = naive_norm(A, SIZE); + for (int i = 1; i < SIZE; i++) { + B[i] = sgn(A[i]); + } +} + +void no_opt_sample_test(float A[SIZE], float B[SIZE]) { + B[0] = no_opt_naive_norm(A, SIZE); + for (int i = 1; i < SIZE; i++) { + B[i] = no_opt_sgn(A[i]); + } +} + +int main(void) { + float A[SIZE] = {1, -2, 0, -4, 5}; + float B[SIZE] = {0}; + sample_test(A, B); + float expectedA[SIZE] = {1, -2, 0, -4, 5}; + float expectedB[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedB); + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/sgn-naive-norm-inline-2.c b/src/dios-egraphs/Diospyros/llvm-output-tests/sgn-naive-norm-inline-2.c new file mode 100644 index 00000000..9613c3e4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/sgn-naive-norm-inline-2.c @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float naive_norm(float *x, int m) __attribute__((always_inline)); +float sgn(float v) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float B[SIZE]) { + float alpha = -sgn(A[0]) * naive_norm(A, SIZE); + B[0] = alpha; + for (int i = 1; i < SIZE; i++) { + B[i] = sgn(A[i]); + } +} + +void no_opt_sample_test(float A[SIZE], float B[SIZE]) { + float alpha = -no_opt_sgn(A[0]) * no_opt_naive_norm(A, SIZE); + B[0] = alpha; + for (int i = 1; i < SIZE; i++) { + B[i] = no_opt_sgn(A[i]); + } +} + +int main(void) { + float A[SIZE] = {1, -2, 0, -4, 5}; + float B[SIZE] = {0}; + sample_test(A, B); + float expectedA[SIZE] = {1, -2, 0, -4, 5}; + float expectedB[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedB); + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/sqrt.c b/src/dios-egraphs/Diospyros/llvm-output-tests/sqrt.c new file mode 100644 index 00000000..a753d824 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/sqrt.c @@ -0,0 +1,33 @@ +#include +#include +#include +#define SIZE 8 + +void vsqrt(float a_in[SIZE], float b_out[SIZE], float c_out[SIZE]) { + for (int i = 0; i < SIZE; i++) { + b_out[i] = sqrtf(a_in[i]); + c_out[i] = sqrtf(a_in[i]); + } +} + +int main(void) { + float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; + float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + float c_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + vsqrt(a_in, b_out, c_out); + float delta = 0.00001f; + float expected[SIZE] = {3.000000f, 2.828427f, 2.645751f, 2.449490f, + 2.236068f, 2.000000f, 1.732051f, 1.414214f}; + for (int i = 0; i < SIZE; i++) { + printf("%f\n", c_out[i]); + assert(fabs(expected[i] - c_out[i]) < delta); + } + // 3.000000 + // 2.828427 + // 2.645751 + // 2.449490 + // 2.236068 + // 2.000000 + // 1.732051 + // 1.414214 +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/stencil-2d.c b/src/dios-egraphs/Diospyros/llvm-output-tests/stencil-2d.c new file mode 100644 index 00000000..38362c79 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/stencil-2d.c @@ -0,0 +1,78 @@ +#include +#include +#define ROW_SIZE 8 +#define COL_SIZE 4 +#define F_SIZE 9 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + // float mul = filter_in[k1 * 3 + k2] * + // orig_in[(r + k1) * COL_SIZE + c + k2]; + // temp += mul; + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + float orig_in[ROW_SIZE * COL_SIZE] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + float sol_out[ROW_SIZE * COL_SIZE] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + float filter_in[F_SIZE] = {1, 1, 1, 1, 1, 1, 1, 1, 1}; + stencil(orig_in, sol_out, filter_in); + float expected[ROW_SIZE * COL_SIZE] = { + 9, 9, 1, 1, 9, 9, 1, 1, 9, 9, 1, 1, 9, 9, 1, 1, + 9, 9, 1, 1, 9, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("%f\n", sol_out[i]); + assert(expected[i] == sol_out[i]); + } + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/ternary.c b/src/dios-egraphs/Diospyros/llvm-output-tests/ternary.c new file mode 100644 index 00000000..04acb85e --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/ternary.c @@ -0,0 +1,35 @@ +#include +#include +#define SIZE 8 + +void tern(float a_in[SIZE], float b_out[SIZE]) { + for (int i = 0; i < SIZE; i++) { + b_out[i] = (i < SIZE / 2) ? a_in[i] : 0; + } +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; + float b_out[SIZE] = {5, 6, 7, 8, 1, 2, 3, 4}; + tern(a_in, b_out); + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_out[i]); + } + assert(b_out[0] == 1); + assert(b_out[1] == 2); + assert(b_out[2] == 3); + assert(b_out[3] == 4); + assert(b_out[4] == 0); + assert(b_out[5] == 0); + assert(b_out[6] == 0); + assert(b_out[7] == 0); + // 1.000000 + // 2.000000 + // 3.000000 + // 4.000000 + // 0.000000 + // 0.000000 + // 0.000000 + // 0.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/transpose.c b/src/dios-egraphs/Diospyros/llvm-output-tests/transpose.c new file mode 100644 index 00000000..934d651f --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/transpose.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +void naive_transpose(float a[SIZE * SIZE], int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} + +void no_opt_naive_transpose(float a[SIZE * SIZE], int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} + +int main() { + float calculated[SIZE * SIZE] = {0}; + for (int i = 0; i < SIZE * SIZE; i++) { + if (i % 2 == 0) { + calculated[i] = 1.0f; + } else { + calculated[i] = 0.0f; + } + } + float expected[SIZE * SIZE] = {0}; + for (int i = 0; i < SIZE * SIZE; i++) { + if (i % 2 == 0) { + expected[i] = 1.0f; + } else { + expected[i] = 0.0f; + } + } + naive_transpose(calculated, SIZE); + no_opt_naive_transpose(expected, SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + printf("A Transpose Calculated: %f\n", calculated[i]); + printf("A Transpose Expected: %f\n", expected[i]); + assert(fabs(expected[i] - calculated[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/triangle.c b/src/dios-egraphs/Diospyros/llvm-output-tests/triangle.c new file mode 100644 index 00000000..344beb52 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/triangle.c @@ -0,0 +1,96 @@ +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1 + +// Triangle Access Pattern Test + +void lower_triangle(float A[SIZE * SIZE], float B[SIZE * SIZE], + float C[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < i; j++) { + C[i + SIZE * j] = A[i + SIZE * j] + B[i + SIZE * j]; + } + } +} + +void no_opt_lower_triangle(float A[SIZE * SIZE], float B[SIZE * SIZE], + float C[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < i; j++) { + C[i + SIZE * j] = A[i + SIZE * j] + B[i + SIZE * j]; + } + } +} + +void upper_triangle(float A[SIZE * SIZE], float B[SIZE * SIZE], + float C[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i; j < SIZE; j++) { + C[i + SIZE * j] = A[i + SIZE * j] + B[i + SIZE * j]; + } + } +} + +void no_opt_upper_triangle(float A[SIZE * SIZE], float B[SIZE * SIZE], + float C[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i; j < SIZE; j++) { + C[i + SIZE * j] = A[i + SIZE * j] + B[i + SIZE * j]; + } + } +} + +int main(void) { + float A1[SIZE * SIZE] = {0, 1, 2, 3}; + float B1[SIZE * SIZE] = {0, 1, 2, 3}; + float C1[SIZE * SIZE] = {0, 1, 2, 3}; + + float A1Expected[SIZE * SIZE] = {0, 1, 2, 3}; + float B1Expected[SIZE * SIZE] = {0, 1, 2, 3}; + float C1Expected[SIZE * SIZE] = {0, 1, 2, 3}; + + lower_triangle(A1, B1, C1); + no_opt_lower_triangle(A1Expected, B1Expected, C1Expected); + + for (int i = 0; i < SIZE * SIZE; i++) { + printf("A: %f\n", A1[i]); + printf("A Expected: %f\n", A1Expected[i]); + printf("B: %f\n", B1[i]); + printf("B Expected: %f\n", B1Expected[i]); + printf("C: %f\n", C1[i]); + printf("C Expected: %f\n", C1Expected[i]); + + assert(fabsf(A1[i] - A1Expected[i]) < DELTA); + assert(fabsf(B1[i] - B1Expected[i]) < DELTA); + assert(fabsf(C1[i] - C1Expected[i]) < DELTA); + } + + float A2[SIZE * SIZE] = {0, 1, 2, 3}; + float B2[SIZE * SIZE] = {0, 1, 2, 3}; + float C2[SIZE * SIZE] = {0, 1, 2, 3}; + + float A2Expected[SIZE * SIZE] = {0, 1, 2, 3}; + float B2Expected[SIZE * SIZE] = {0, 1, 2, 3}; + float C2Expected[SIZE * SIZE] = {0, 1, 2, 3}; + + upper_triangle(A2, B2, C2); + no_opt_upper_triangle(A2Expected, B2Expected, C2Expected); + + for (int i = 0; i < SIZE * SIZE; i++) { + printf("A: %f\n", A2[i]); + printf("A Expected: %f\n", A2Expected[i]); + printf("B: %f\n", B2[i]); + printf("B Expected: %f\n", B2Expected[i]); + printf("C: %f\n", C2[i]); + printf("C Expected: %f\n", C2Expected[i]); + + assert(fabsf(A2[i] - A2Expected[i]) < DELTA); + assert(fabsf(B2[i] - B2Expected[i]) < DELTA); + assert(fabsf(C2[i] - C2Expected[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/turnt.toml b/src/dios-egraphs/Diospyros/llvm-output-tests/turnt.toml new file mode 100644 index 00000000..a3b83868 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/turnt.toml @@ -0,0 +1 @@ +command = "bash ../test-runner.sh run-opt-stdout c-tests/{filename}" diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/var.c b/src/dios-egraphs/Diospyros/llvm-output-tests/var.c new file mode 100644 index 00000000..486903c0 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/var.c @@ -0,0 +1,29 @@ +#include +#include +#define SIZE 4 + +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + float t1 = 10; + float t2 = 20; + c_out[0] = a_in[0] + b_in[0]; + c_out[1] = t1 + b_in[1]; + c_out[2] = a_in[2] + t2; + c_out[3] = t2 + t1; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 6); + assert(c_out[1] == 16); + assert(c_out[2] == 23); + assert(c_out[3] == 30); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + // expected: 6, 16, 23, 30 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/width5.c b/src/dios-egraphs/Diospyros/llvm-output-tests/width5.c new file mode 100644 index 00000000..a50c3e0b --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/width5.c @@ -0,0 +1,30 @@ +#include +#include +#define SIZE 5 + +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] + b_in[0]; + c_out[1] = a_in[1] + b_in[1]; + c_out[2] = a_in[2] + b_in[2]; + c_out[3] = a_in[3] + b_in[3]; + c_out[4] = a_in[4] + b_in[4]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4, 5}; + float b_in[SIZE] = {6, 7, 8, 9, 10}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 7); + assert(c_out[1] == 9); + assert(c_out[2] == 11); + assert(c_out[3] == 13); + assert(c_out[4] == 15); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + printf("fifth: %f\n", c_out[4]); + // expected: 7, 9, 11, 13, 15 + return 0; +} diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/width9.c b/src/dios-egraphs/Diospyros/llvm-output-tests/width9.c new file mode 100644 index 00000000..7b7b2e96 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/width9.c @@ -0,0 +1,42 @@ +#include +#include +#define SIZE 9 + +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] + b_in[0]; + c_out[1] = a_in[1] + b_in[1]; + c_out[2] = a_in[2] + b_in[2]; + c_out[3] = a_in[3] + b_in[3]; + c_out[4] = a_in[4] + b_in[4]; + c_out[5] = a_in[5] + b_in[5]; + c_out[6] = a_in[6] + b_in[6]; + c_out[7] = a_in[7] + b_in[7]; + c_out[8] = a_in[8] + b_in[8]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + float b_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 2); + assert(c_out[1] == 4); + assert(c_out[2] == 6); + assert(c_out[3] == 8); + assert(c_out[4] == 10); + assert(c_out[5] == 12); + assert(c_out[6] == 14); + assert(c_out[7] == 16); + assert(c_out[8] == 18); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + printf("fifth: %f\n", c_out[4]); + printf("sixth: %f\n", c_out[5]); + printf("seventh: %f\n", c_out[6]); + printf("eight: %f\n", c_out[7]); + printf("ninth: %f\n", c_out[8]); + // expected: 2, 4, 6, 8, 10, 12, 14, 16, 18 + return 0; +} diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/2d-2d-conv.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/2d-2d-conv.c new file mode 100644 index 00000000..4653f9c1 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/2d-2d-conv.c @@ -0,0 +1,58 @@ +#include +#include + +#define I_ROWS 2 +#define I_COLS 2 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS][I_COLS], float f_in[F_ROWS][F_COLS], + float mat_out[O_ROWS][O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + mat_out[outRow][outCol] += v; + // mat_out[outRow][outCol] += + // mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + } + } + } + } + } +} + +int main(void) { + float mat_in[I_ROWS][I_COLS] = {{1, 2}, {3, 4}}; + float f_in[F_ROWS][F_COLS] = {{1, 1}, {1, 1}}; + float mat_out[O_ROWS][O_COLS] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; + convolution(mat_in, f_in, mat_out); + float expected[O_ROWS][O_COLS] = {{1, 3, 2}, {4, 10, 6}, {3, 7, 4}}; + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + printf("output: %f\n", mat_out[i][j]); + assert(mat_out[i][j] == expected[i][j]); + } + } + // output: 1.000000 + // output: 3.000000 + // output: 2.000000 + // output: 4.000000 + // output: 10.000000 + // output: 6.000000 + // output: 3.000000 + // output: 7.000000 + // output: 4.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/2d-conv.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/2d-conv.c new file mode 100644 index 00000000..46d4d15c --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/2d-conv.c @@ -0,0 +1,58 @@ +#include +#include + +#define I_ROWS 2 +#define I_COLS 2 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + + // mat_out[outRow * O_COLS + outCol] += + // mat_in[iRow * I_COLS + iCol] * + // f_in[fRowTrans * F_COLS + fColTrans]; + } + } + } + } + } +} + +int main(void) { + float mat_in[I_ROWS * I_COLS] = {1, 2, 3, 4}; + float f_in[F_ROWS * F_COLS] = {1, 1, 1, 1}; + float mat_out[O_ROWS * O_COLS] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; + float expected[O_ROWS * O_COLS] = {1, 3, 2, 4, 10, 6, 3, 7, 4}; + convolution(mat_in, f_in, mat_out); + for (int i = 0; i < O_ROWS * O_COLS; i++) { + printf("output: %f\n", mat_out[i]); + assert(mat_out[i] == expected[i]); + } + // output: 1.000000 + // output: 3.000000 + // output: 2.000000 + // output: 4.000000 + // output: 10.000000 + // output: 6.000000 + // output: 3.000000 + // output: 7.000000 + // output: 4.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/2d-matrix-multiply-new.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/2d-matrix-multiply-new.c new file mode 100644 index 00000000..bff538bc --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/2d-matrix-multiply-new.c @@ -0,0 +1,36 @@ +#include +#include + +#define A_ROWS 2 +#define A_COLS 2 +#define B_COLS 2 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + float a_in[A_ROWS][A_COLS] = {{1, 2}, {3, 4}}; + float b_in[A_COLS][B_COLS] = {{1, 2}, {3, 4}}; + float c_out[A_ROWS][B_COLS] = {{0, 0}, {0, 0}}; + matrix_multiply(a_in, b_in, c_out); + printf("first: %f\n", c_out[0][0]); + printf("second: %f\n", c_out[0][1]); + printf("third: %f\n", c_out[1][0]); + printf("fourth: %f\n", c_out[1][1]); + assert(c_out[0][0] == 7); + assert(c_out[0][1] == 10); + assert(c_out[1][0] == 15); + assert(c_out[1][1] == 22); + // expected (7, 10, 15, 22) + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/2d-matrix-multiply.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/2d-matrix-multiply.c new file mode 100644 index 00000000..371b7967 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/2d-matrix-multiply.c @@ -0,0 +1,31 @@ +#include + +#define A_ROWS 2 +#define A_COLS 2 +#define B_COLS 2 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + float a_in[A_ROWS][A_COLS] = {{1, 2}, {3, 4}}; + float b_in[A_COLS][B_COLS] = {{1, 2}, {3, 4}}; + float c_out[A_ROWS][B_COLS] = {{0, 0}, {0, 0}}; + matrix_multiply(a_in, b_in, c_out); + printf("first: %f\n", c_out[0][0]); + printf("second: %f\n", c_out[0][1]); + printf("third: %f\n", c_out[1][0]); + printf("fourth: %f\n", c_out[1][1]); + // expected (7, 10, 15, 22) + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/2d.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/2d.c new file mode 100644 index 00000000..a794d75a --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/2d.c @@ -0,0 +1,28 @@ +#include +#include +#define SIZE 4 + +void sum(float a_in[SIZE][SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0][3] + b_in[0]; + c_out[1] = a_in[1][3] + b_in[1]; + c_out[2] = a_in[2][3] + b_in[2]; + c_out[3] = a_in[3][3] + a_in[0][1]; +} + +int main(int argc, char **argv) { + float a_in[SIZE][SIZE] = { + {1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 9); + assert(c_out[1] == 14); + assert(c_out[2] == 19); + assert(c_out[3] == 18); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + // expected: 9, 14, 19, 18 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/3-by-3-matrix-multiply.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/3-by-3-matrix-multiply.c new file mode 100644 index 00000000..1fde8829 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/3-by-3-matrix-multiply.c @@ -0,0 +1,43 @@ +#include +#include + +#define A_ROWS 3 +#define A_COLS 3 +#define B_COLS 3 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + float a_in[A_ROWS][A_COLS] = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; + float b_in[A_COLS][B_COLS] = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; + float c_out[A_ROWS][B_COLS] = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; + matrix_multiply(a_in, b_in, c_out); + float expected[A_ROWS][B_COLS] = {{30, 36, 42}, {66, 81, 96}, {102, 126, 150}}; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + assert(expected[i][j] == c_out[i][j]); + } + } + // output: 30.000000 + // output: 36.000000 + // output: 42.000000 + // output: 66.000000 + // output: 81.000000 + // output: 96.000000 + // output: 102.000000 + // output: 126.000000 + // output: 150.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/5-by-5-matrix-multiply.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/5-by-5-matrix-multiply.c new file mode 100644 index 00000000..bab5bad9 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/5-by-5-matrix-multiply.c @@ -0,0 +1,59 @@ +#include +#include + +#define A_ROWS 5 +#define A_COLS 5 +#define B_COLS 5 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + float a_in[A_ROWS][A_COLS] = {{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}}; + float b_in[A_COLS][B_COLS] = {{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}}; + float c_out[A_ROWS][B_COLS] = {{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}}; + matrix_multiply(a_in, b_in, c_out); + float expected[A_ROWS][B_COLS] = {{45, 60, 75, 90, 105}, {120, 160, 200, 240, 280}, {45, 60, 75, 90, 105}, {120, 160, 200, 240, 280},{45, 60, 75, 90, 105}}; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + assert(expected[i][j] == c_out[i][j]); + } + } + // output: 45.000000 + // output: 60.000000 + // output: 75.000000 + // output: 90.000000 + // output: 105.000000 + // output: 120.000000 + // output: 160.000000 + // output: 200.000000 + // output: 240.000000 + // output: 280.000000 + // output: 45.000000 + // output: 60.000000 + // output: 75.000000 + // output: 90.000000 + // output: 105.000000 + // output: 120.000000 + // output: 160.000000 + // output: 200.000000 + // output: 240.000000 + // output: 280.000000 + // output: 45.000000 + // output: 60.000000 + // output: 75.000000 + // output: 90.000000 + // output: 105.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/README.md b/src/dios-egraphs/Diospyros/no-egg-c-tests/README.md new file mode 100644 index 00000000..0809430e --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/README.md @@ -0,0 +1 @@ +Version of c-tests, except that the EGG rewriting engine is not used in any of these tests. Translation from llvm to egg and egg back to llvm occurs. \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/add.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/add.c new file mode 100644 index 00000000..ebbcdf35 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/add.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 4 + +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] + b_in[0]; + c_out[1] = a_in[1] + b_in[1]; + c_out[2] = a_in[2] + b_in[2]; + c_out[3] = a_in[3] + b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 6); + assert(c_out[1] == 8); + assert(c_out[2] == 10); + assert(c_out[3] == 12); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + // expected: 6, 8, 10, 12 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/add_mult.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/add_mult.c new file mode 100644 index 00000000..732ba987 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/add_mult.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 4 + +void add_mult(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] + b_in[0]; + c_out[1] = a_in[1] * b_in[1]; + c_out[2] = a_in[2] + b_in[2]; + c_out[3] = a_in[3] * b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {2, 3, 4, 5}; + float c_out[SIZE]; + add_mult(a_in, b_in, c_out); + assert(c_out[0] == 3); + assert(c_out[1] == 6); + assert(c_out[2] == 7); + assert(c_out[3] == 20); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + // expected:3, 6, 7, 20 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/break-w.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/break-w.c new file mode 100644 index 00000000..8afaed68 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/break-w.c @@ -0,0 +1,34 @@ +#include +#include +#define SIZE 8 + +void break_w_test(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { + int i = SIZE - 1; + while (i >= 0) { + if (i < SIZE / 2) break; + b_out[i] = a_in[i] * scalar_in; + i -= 1; + } + b_out[0] = scalar_in; +} + +int main(void) { + float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; + float scalar_in = 10; + float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + break_w_test(a_in, scalar_in, b_out); + float expected[SIZE] = {10, 0, 0, 0, 50, 40, 30, 20}; + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_out[i]); + assert(expected[i] == b_out[i]); + } + // 10.000000 + // 0.000000 + // 0.000000 + // 0.000000 + // 50.000000 + // 40.000000 + // 30.000000 + // 20.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/break.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/break.c new file mode 100644 index 00000000..1790765f --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/break.c @@ -0,0 +1,32 @@ +#include +#include +#define SIZE 8 + +void break_test(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { + for (int i = SIZE - 1; i >= 0; i--) { + if (i < SIZE / 2) break; + b_out[i] = a_in[i] * scalar_in; + } + b_out[0] = scalar_in; +} + +int main(void) { + float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; + float scalar_in = 10; + float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + break_test(a_in, scalar_in, b_out); + float expected[SIZE] = {10, 0, 0, 0, 50, 40, 30, 20}; + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_out[i]); + assert(expected[i] == b_out[i]); + } + // 10.000000 + // 0.000000 + // 0.000000 + // 0.000000 + // 50.000000 + // 40.000000 + // 30.000000 + // 20.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/calloc.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/calloc.c new file mode 100644 index 00000000..d8e0f10a --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/calloc.c @@ -0,0 +1,30 @@ +#include +#include +#define SIZE 4 + +void calloc_func(int m, float q_out[SIZE][SIZE]) { + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + q_min[i * m + j] = 10.0f; + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + q_out[i][j] = q_min[i * m + j]; + } + } +} + +int main(int argc, char **argv) { + float q_out[SIZE][SIZE] = { + {1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}; + calloc_func(SIZE, q_out); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("q_out: %f\n", q_out[i][j]); + assert(q_out[i][j] == 10); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/continue-w.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/continue-w.c new file mode 100644 index 00000000..d7b42a84 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/continue-w.c @@ -0,0 +1,36 @@ +#include +#include +#define SIZE 8 + +void continue_w_test(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { + int i = 0; + while (i < SIZE) { + if (i < SIZE / 2) { + i += 1; + continue; + } + b_out[i] = a_in[i] * scalar_in; + i += 1; + } +} + +int main(void) { + float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; + float scalar_in = 10; + float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + continue_w_test(a_in, scalar_in, b_out); + float expected[SIZE] = {0, 0, 0, 0, 50, 40, 30, 20}; + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_out[i]); + assert(expected[i] == b_out[i]); + } + // 0.000000 + // 0.000000 + // 0.000000 + // 0.000000 + // 50.000000 + // 40.000000 + // 30.000000 + // 20.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/continue.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/continue.c new file mode 100644 index 00000000..40bc13b8 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/continue.c @@ -0,0 +1,31 @@ +#include +#include +#define SIZE 8 + +void continue_test(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { + for (int i = 0; i < SIZE; i++) { + if (i < SIZE / 2) continue; + b_out[i] = a_in[i] * scalar_in; + } +} + +int main(void) { + float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; + float scalar_in = 10; + float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + continue_test(a_in, scalar_in, b_out); + float expected[SIZE] = {0, 0, 0, 0, 50, 40, 30, 20}; + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_out[i]); + assert(expected[i] == b_out[i]); + } + // 0.000000 + // 0.000000 + // 0.000000 + // 0.000000 + // 50.000000 + // 40.000000 + // 30.000000 + // 20.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/cube.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/cube.c new file mode 100644 index 00000000..38de21a5 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/cube.c @@ -0,0 +1,34 @@ +#include +#include +#include +#define SIZE 8 + +void cube(float a_in[SIZE], float b_out[SIZE]) { + for (int i = 0; i < SIZE; i++) { + b_out[i] = powf(a_in[i], 3); + } +} +int main(void) { + float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; + float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + cube(a_in, b_out); + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_out[i]); + } + assert(b_out[0] == 729); + assert(b_out[1] == 512); + assert(b_out[2] == 343); + assert(b_out[3] == 216); + assert(b_out[4] == 125); + assert(b_out[5] == 64); + assert(b_out[6] == 27); + assert(b_out[7] == 8); + // 729.000000 + // 512.000000 + // 343.000000 + // 216.000000 + // 125.000000 + // 64.000000 + // 27.000000 + // 8.000000 +} diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/five_binops.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/five_binops.c new file mode 100644 index 00000000..6a33c896 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/five_binops.c @@ -0,0 +1,31 @@ +#include +#include +#define SIZE 4 + +void add5(float a_in[SIZE], float b_in[SIZE], float c_in[SIZE], + float d_in[SIZE], float e_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] + b_in[0] + c_in[0] + d_in[0] + e_in[0]; + c_out[1] = a_in[1] + b_in[1] + c_in[1] + d_in[1] + e_in[1]; + c_out[2] = a_in[2] + b_in[2] + c_in[2] + d_in[2] + e_in[2]; + c_out[3] = a_in[3] + b_in[3] + c_in[3] + d_in[3] + e_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_in[SIZE] = {1, 2, 3, 4}; + float d_in[SIZE] = {5, 6, 7, 8}; + float e_in[SIZE] = {1, 2, 3, 4}; + float c_out[SIZE]; + add5(a_in, b_in, c_in, d_in, e_in, c_out); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + assert(c_out[0] == 13); + assert(c_out[1] == 18); + assert(c_out[2] == 23); + assert(c_out[3] == 28); + // expected: 13, 18, 23, 28 + return 0; +} diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/identity_matrix.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/identity_matrix.c new file mode 100644 index 00000000..9e5f062e --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/identity_matrix.c @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1 + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } +} + +int main(void) { + float A[SIZE * SIZE] = {0, 1, 2, 3}; + float Q[SIZE * SIZE] = {0, 1, 2, 3}; + float R[SIZE * SIZE] = {0, 1, 2, 3}; + float AExpected[SIZE * SIZE] = {0, 1, 2, 3}; + float QExpected[SIZE * SIZE] = {0, 1, 2, 3}; + float RExpected[SIZE * SIZE] = {0, 1, 2, 3}; + naive_fixed_qr_decomp(A, Q, R); + no_opt_naive_fixed_qr_decomp(AExpected, QExpected, RExpected); + for (int i = 0; i < SIZE * SIZE; i++) { + printf("Expected Q: %f\n", QExpected[i]); + printf("Actual Q: %f\n", Q[i]); + assert(fabsf(QExpected[i] - Q[i]) < DELTA); + } + for (int i = 0; i < SIZE * SIZE; i++) { + printf("Expected R: %f\n", RExpected[i]); + printf("Actual R: %f\n", R[i]); + assert(fabsf(RExpected[i] - R[i]) < DELTA); + } + for (int i = 0; i < SIZE * SIZE; i++) { + printf("Expected A: %f\n", AExpected[i]); + printf("Actual A: %f\n", A[i]); + assert(fabsf(AExpected[i] - A[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/if-else.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/if-else.c new file mode 100644 index 00000000..3cd9c502 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/if-else.c @@ -0,0 +1,32 @@ +#include +#include +#define SIZE 8 + +void if_else(float a_in[SIZE], float b_out[SIZE]) { + for (int i = 0; i < SIZE; i++) { + if (i < SIZE / 2) { + b_out[i] = a_in[i]; + } else { + b_out[i] = a_in[i] + 1; + } + } +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; + float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + if_else(a_in, b_out); + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_out[i]); + } + assert(b_out[0] == 1); + assert(b_out[1] == 2); + assert(b_out[2] == 3); + assert(b_out[3] == 4); + assert(b_out[4] == 6); + assert(b_out[5] == 7); + assert(b_out[6] == 8); + assert(b_out[7] == 9); + // expected: 1, 2, 3, 4, 6, 7, 8, 9 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/inline-float.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/inline-float.c new file mode 100644 index 00000000..2b0245d8 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/inline-float.c @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + return sum; +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = no_opt_test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +float test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + return sum; +} + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +int main() { + float A[SIZE] = {1.0f}; + float expectedA[SIZE] = {1.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = 1.0f; + expectedA[i] = 1.0f; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + B[i] = 0.0f; + expectedB[i] = 0.0f; + } + float C[SIZE] = {0.0f}; + float expectedC[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + C[i] = 0.0f; + expectedC[i] = 0.0f; + } + test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("C Output: %f\n", C[i]); + printf("Expected C Output: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/inline-void.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/inline-void.c new file mode 100644 index 00000000..71e7aa89 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/inline-void.c @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +void test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +void no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } +} + +void no_opt_test(float A[SIZE], float B[SIZE]) { + no_opt_test_inline(A, B, SIZE); +} + +void test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } +} + +void test(float A[SIZE], float B[SIZE]) { test_inline(A, B, SIZE); } + +int main() { + float A[SIZE] = {1.0f}; + float expectedA[SIZE] = {1.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = 1.0f; + expectedA[i] = 1.0f; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + B[i] = 0.0f; + expectedB[i] = 0.0f; + } + test(A, B); + no_opt_test(expectedA, expectedB); + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/load_reuse.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/load_reuse.c new file mode 100644 index 00000000..3a447db4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/load_reuse.c @@ -0,0 +1,81 @@ +#include +#include + +#define I_ROWS 2 +#define I_COLS 2 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void load_use_twice(float mat_in[I_ROWS][I_COLS], float f_in[F_ROWS][F_COLS], + float mat_out[O_ROWS][O_COLS], + float mat_out2[O_ROWS][O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + mat_out[outRow][outCol] += + 3 * v - + 4; // try something to use v in a different way + mat_out2[outRow][outCol] += + 2 * v + + 1; // try something to use v in a different way + } + } + } + } + } +} + +int main(void) { + float mat_in[I_ROWS][I_COLS] = {{1, 2}, {3, 4}}; + float f_in[F_ROWS][F_COLS] = {{1, 1}, {1, 1}}; + float mat_out1[O_ROWS][O_COLS] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; + float mat_out2[O_ROWS][O_COLS] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; + load_use_twice(mat_in, f_in, mat_out1, mat_out2); + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + printf("output: %f\n", mat_out1[i][j]); + printf("output: %f\n", mat_out2[i][j]); + } + } + float output1[O_ROWS][O_COLS] = {{-1, 1, 2}, {4, 14, 10}, {5, 13, 8}}; + float output2[O_ROWS][O_COLS] = {{3, 8, 5}, {10, 24, 14}, {7, 16, 9}}; + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + assert(output1[i][j] == mat_out1[i][j]); + assert(output2[i][j] == mat_out2[i][j]); + } + } +// output: -1.000000 +// output: 3.000000 +// output: 1.000000 +// output: 8.000000 +// output: 2.000000 +// output: 5.000000 + +// output: 4.000000 +// output: 10.000000 +// output: 14.000000 +// output: 24.000000 +// output: 10.000000 +// output: 14.000000 + +// output: 5.000000 +// output: 7.000000 +// output: 13.000000 +// output: 16.000000 +// output: 8.000000 +// output: 9.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/loop-inline.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/loop-inline.c new file mode 100644 index 00000000..5a817002 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/loop-inline.c @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float x[SIZE], float A[SIZE]) { + for (int k = 0; k < SIZE; k++) { + float alpha = -sgn(x[k]) * naive_norm(x, k); + A[k] = alpha; + } +} + +void no_opt_sample_test(float x[SIZE], float A[SIZE]) { + for (int k = 0; k < SIZE; k++) { + float alpha = -no_opt_sgn(x[k]) * no_opt_naive_norm(x, k); + A[k] = alpha; + } +} + +int main(void) { + float x[SIZE] = {1, -1, 2, 3, 5}; + float A[SIZE] = {0}; + sample_test(x, A); + float expectedA[SIZE] = {0}; + no_opt_sample_test(x, expectedA); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/mac.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/mac.c new file mode 100644 index 00000000..475ba09f --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/mac.c @@ -0,0 +1,29 @@ +#include +#include +#define SIZE 4 + +void mac(float a_in[SIZE], float b_in[SIZE], float c_in[SIZE], + float d_out[SIZE]) { + d_out[0] = a_in[0] + (b_in[0] * c_in[0]); + d_out[1] = a_in[1] + (b_in[1] * c_in[1]); + d_out[2] = a_in[2] + (b_in[2] * c_in[2]); + d_out[3] = a_in[3] + (b_in[3] * c_in[3]); +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {2, 3, 4, 5}; + float c_in[SIZE] = {3, 4, 5, 6}; + float d_out[SIZE]; + mac(a_in, b_in, c_in, d_out); + assert(d_out[0] == 7); + assert(d_out[1] == 14); + assert(d_out[2] == 23); + assert(d_out[3] == 34); + printf("first: %f\n", d_out[0]); + printf("second: %f\n", d_out[1]); + printf("third: %f\n", d_out[2]); + printf("fourth: %f\n", d_out[3]); + // expected: 7, 14, 23, 34 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/malloc.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/malloc.c new file mode 100644 index 00000000..fb91400c --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/malloc.c @@ -0,0 +1,30 @@ +#include +#include +#define SIZE 4 + +void malloc_func(int m, float q_out[SIZE][SIZE]) { + float *q_min = (float *)malloc(sizeof(float) * m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + q_min[i * m + j] = 10.0f; + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + q_out[i][j] = q_min[i * m + j]; + } + } +} + +int main(int argc, char **argv) { + float q_out[SIZE][SIZE] = { + {1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}; + malloc_func(SIZE, q_out); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("q_out: %f\n", q_out[i][j]); + assert(q_out[i][j] == 10); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/mat_mul.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/mat_mul.c new file mode 100644 index 00000000..58c9208d --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/mat_mul.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 4 + +void mat_mul(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] * b_in[0] + a_in[1] * b_in[2]; + c_out[1] = a_in[0] * b_in[1] + a_in[1] * b_in[3]; + c_out[2] = a_in[2] * b_in[0] + a_in[3] * b_in[2]; + c_out[3] = a_in[2] * b_in[1] + a_in[3] * b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + mat_mul(a_in, b_in, c_out); + assert(c_out[0] == 19); + assert(c_out[1] == 22); + assert(c_out[2] == 43); + assert(c_out[3] == 50); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + // expected: 19, 22, 43, 50 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/matrix-multiply.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/matrix-multiply.c new file mode 100644 index 00000000..5da28448 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/matrix-multiply.c @@ -0,0 +1,36 @@ +#include +#include + +#define A_ROWS 2 +#define A_COLS 2 +#define B_COLS 2 + +void matrix_multiply(float a_in[A_ROWS * A_COLS], float b_in[A_COLS * B_COLS], + float c_out[A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + float a_in[A_ROWS * A_COLS] = {1, 2, 3, 4}; + float b_in[A_COLS * B_COLS] = {1, 2, 3, 4}; + float c_out[A_ROWS * B_COLS] = {0, 0, 0, 0}; + matrix_multiply(a_in, b_in, c_out); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + assert(c_out[0] == 7); + assert(c_out[1] == 10); + assert(c_out[2] == 15); + assert(c_out[3] == 22); + // expected (7, 10, 15, 22) + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/mixed.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/mixed.c new file mode 100644 index 00000000..90c97d64 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/mixed.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 4 + +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] + 3; + c_out[1] = 5 + b_in[1]; + c_out[2] = 3 + b_in[2]; + c_out[3] = a_in[3] + b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 4); + assert(c_out[1] == 11); + assert(c_out[2] == 10); + assert(c_out[3] == 12); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + // expected: 4, 11, 10, 12 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/mult.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/mult.c new file mode 100644 index 00000000..f72d5d39 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/mult.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 4 + +void prod(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] * b_in[0]; + c_out[1] = a_in[1] * b_in[1]; + c_out[2] = a_in[2] * b_in[2]; + c_out[3] = a_in[3] * b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + prod(a_in, b_in, c_out); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + assert(c_out[0] == 5); + assert(c_out[1] == 12); + assert(c_out[2] == 21); + assert(c_out[3] == 32); + // expected: 5, 12, 21, 32 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/multi-mat-mul.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/multi-mat-mul.c new file mode 100644 index 00000000..af5da2f1 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/multi-mat-mul.c @@ -0,0 +1,47 @@ +#include +#include +#define ROWS 3 +#define COLS 3 + +void matrix_multiply_3x3(float a[ROWS * COLS], float b[COLS * COLS], + float c[ROWS * COLS]) { + for (int i = 0; i < ROWS; i++) { + for (int j = 0; j < COLS; j++) { + c[j * ROWS + i] = 0; + + for (int k = 0; k < COLS; k++) { + c[j * ROWS + i] += a[k * ROWS + i] * b[j * COLS + k]; + } + } + } +} + +void multimatrix_multiply(float a_in[ROWS * COLS], float b_in[ROWS * COLS], + float c_in[ROWS * COLS], float d_out[ROWS * COLS]) { + float ab[ROWS * COLS]; + matrix_multiply_3x3(a_in, b_in, ab); + matrix_multiply_3x3(ab, c_in, d_out); +} + +int main(void) { + float a_in[ROWS * COLS] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + float b_in[ROWS * COLS] = {1, 0, 1, 0, 1, 0, 1, 0, 1}; + float c_in[ROWS * COLS] = {9, 8, 7, 6, 5, 4, 3, 2, 1}; + float d_out[ROWS * COLS] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; + multimatrix_multiply(a_in, b_in, c_in, d_out); + float expected[ROWS * COLS] = {160, 200, 240, 100, 125, 150, 40, 50, 60}; + for (int i = 0; i < ROWS * COLS; i++) { + printf("output: %f\n", d_out[i]); + assert(expected[i] == d_out[i]); + } + // output: 160.000000 + // output: 200.000000 + // output: 240.000000 + // output: 100.000000 + // output: 125.000000 + // output: 150.000000 + // output: 40.000000 + // output: 50.000000 + // output: 60.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/multiple_adds.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/multiple_adds.c new file mode 100644 index 00000000..33ac2dc3 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/multiple_adds.c @@ -0,0 +1,29 @@ +#include +#include +#define SIZE 4 + +void madd(float a_in[SIZE], float b_in[SIZE], float c_in[SIZE], + float d_out[SIZE]) { + d_out[0] = a_in[0] + b_in[0] + c_in[0]; + d_out[1] = a_in[1] + b_in[1] + c_in[1]; + d_out[2] = a_in[2] + b_in[2] + c_in[2]; + d_out[3] = a_in[3] + b_in[3] + c_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_in[SIZE] = {9, 10, 11, 12}; + float d_out[4]; + madd(a_in, b_in, c_in, d_out); + printf("first: %f\n", d_out[0]); + printf("second: %f\n", d_out[1]); + printf("third: %f\n", d_out[2]); + printf("fourth: %f\n", d_out[3]); + assert(d_out[0] == 15); + assert(d_out[1] == 18); + assert(d_out[2] == 21); + assert(d_out[3] == 24); + // expected: 6, 8, 10, 12 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/naive-norm-inline.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/naive-norm-inline.c new file mode 100644 index 00000000..66605c05 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/naive-norm-inline.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float B[SIZE - 1]) { + B[0] = naive_norm(A, SIZE); +} + +void no_opt_sample_test(float A[SIZE], float B[SIZE - 1]) { + B[0] = no_opt_naive_norm(A, SIZE); +} + +int main(void) { + float A[SIZE] = {1, 2, 3, 4, 5}; + float B[SIZE - 1] = {0}; + sample_test(A, B); + float expectedA[SIZE] = {1, 2, 3, 4, 5}; + float expectedB[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedB); + for (int i = 0; i < SIZE - 1; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/naive-norm.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/naive-norm.c new file mode 100644 index 00000000..1727c165 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/naive-norm.c @@ -0,0 +1,43 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +float naive_norm(float x[SIZE], int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float x[SIZE], int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +int main() { + float x[SIZE] = {1.0f}; + for (int i = 0; i < SIZE; i++) { + if (i % 2 == 0) { + x[i] = 1.0f; + } else { + x[i] = 0.0f; + } + } + float calculated = naive_norm(x, SIZE); + float expected = no_opt_naive_norm(x, SIZE); + printf("Calculated of Naive L2 Norm: %f\n", calculated); + printf("Expected of Naive L2 Norm: %f\n", expected); + assert(fabs(expected - calculated) < DELTA); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/nested-inline.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/nested-inline.c new file mode 100644 index 00000000..02650d06 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/nested-inline.c @@ -0,0 +1,105 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float nested_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float no_opt_nested_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = -1 * A[i]; + } + float prod = 0.0f; + for (int i = 0; i < n; i++) { + prod *= B[i]; + } + return prod; +} + +float no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + float prod = no_opt_nested_inline(A, B, n); + return prod - sum; +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = no_opt_test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +float nested_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = -1 * A[i]; + } + float prod = 0.0f; + for (int i = 0; i < n; i++) { + prod *= B[i]; + } + return prod; +} + +float test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + float prod = nested_inline(A, B, n); + return prod - sum; +} + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +int main() { + float A[SIZE] = {0.0f}; + float expectedA[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = 1.0f; + expectedA[i] = 1.0f; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + B[i] = -1.0f; + expectedB[i] = -1.0f; + } + float C[SIZE] = {0.0f}; + float expectedC[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + C[i] = 0.0f; + expectedC[i] = 0.0f; + } + test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("Calculated C Output: %f\n", C[i]); + printf("Expected C Output: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/out_of_order.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/out_of_order.c new file mode 100644 index 00000000..6c59dd20 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/out_of_order.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 4 + +void oo(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] + b_in[0]; + c_out[3] = a_in[3] + b_in[3]; + c_out[2] = a_in[2] + b_in[2]; + c_out[1] = a_in[1] + b_in[1]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + oo(a_in, b_in, c_out); + assert(c_out[0] == 6); + assert(c_out[1] == 8); + assert(c_out[2] == 10); + assert(c_out[3] == 12); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + // 6, 8, 10, 12 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/point-product.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/point-product.c new file mode 100644 index 00000000..1d1b41e5 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/point-product.c @@ -0,0 +1,47 @@ +#include +#include + +void cross_product(float lhs[3], float rhs[3], float result[3]) + __attribute__((always_inline)); + +void cross_product(float lhs[3], float rhs[3], float result[3]) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +void point_product(float q_in[4], float p_in[4], float result_out[4]) { + float qvec[3] = {q_in[0], q_in[1], q_in[2]}; + // qvec = {0, 1, 2} + + float uv[3]; + cross_product(qvec, p_in, uv); + // uv = {1 * 2 - 2 * 1, 2 * 0 - 0 * 2, 0 * 1 - 1 * 0} = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + // uv = {0, 0 , 0} + float qxuv[3]; + cross_product(qvec, uv, qxuv); + // qxuv = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + result_out[i] = p_in[i] + q_in[3] * uv[i] + qxuv[i]; + } +} + +int main(void) { + float q_in[4] = {0, 1, 2, 3}; + float p_in[4] = {0, 1, 2, 3}; + float result_out[4] = {0, 0, 0, 0}; + point_product(q_in, p_in, result_out); + float expected[4] = {0, 1, 2, 0}; + for (int i = 0; i < 4; i++) { + printf("%f\n", result_out[i]); + assert(expected[i] == result_out[i]); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/q-prod.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/q-prod.c new file mode 100644 index 00000000..2ad30a6a --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/q-prod.c @@ -0,0 +1,72 @@ +#include +#include +#include +#include +#include +#include + +#define SIZE 4 + +__attribute__((always_inline)) void naive_cross_product(float *lhs, float *rhs, + float *result) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +__attribute__((always_inline)) void naive_point_product(float *q, float *p, + float *result) { + float qvec[3] = {q[0], q[1], q[2]}; + float uv[3]; + naive_cross_product(qvec, p, uv); + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + float qxuv[3]; + naive_cross_product(qvec, uv, qxuv); + + for (int i = 0; i < 3; i++) { + result[i] = p[i] + q[3] * uv[i] + qxuv[i]; + } +} + +void naive_quaternion_product(float *a_q, float *a_t, float *b_q, float *b_t, + float *r_q, float *r_t) { + r_q[3] = + a_q[3] * b_q[3] - a_q[0] * b_q[0] - a_q[1] * b_q[1] - a_q[2] * b_q[2]; + r_q[0] = + a_q[3] * b_q[0] + a_q[0] * b_q[3] + a_q[1] * b_q[2] - a_q[2] * b_q[1]; + r_q[1] = + a_q[3] * b_q[1] + a_q[1] * b_q[3] + a_q[2] * b_q[0] - a_q[0] * b_q[2]; + r_q[2] = + a_q[3] * b_q[2] + a_q[2] * b_q[3] + a_q[0] * b_q[1] - a_q[1] * b_q[0]; + + naive_point_product(a_q, b_t, r_t); + for (int i = 0; i < 3; i++) { + r_t[i] += a_t[i]; + } +} + +int main(void) { + float a_q[SIZE] = {1, 2, 3, 4}; + float a_t[SIZE] = {1, 2, 3, 4}; + float b_q[SIZE] = {0, 0, 0, 0}; + float b_t[SIZE] = {1, 2, 3, 4}; + float r_q[SIZE] = {0, 0, 0, 0}; + float r_t[SIZE] = {0, 0, 0, 0}; + naive_quaternion_product(a_q, a_t, b_q, b_t, r_q, r_t); + float expectedq[SIZE] = {0, 0, 0, 0}; + for (int i = 0; i < SIZE; i++) { + printf("%f\n", r_q[i]); + assert(expectedq[i] == r_q[i]); + } + float expectedt[SIZE] = {2, 4, 6, 0}; + for (int i = 0; i < SIZE; i++) { + printf("%f\n", r_t[i]); + assert(expectedt[i] == r_t[i]); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-modified-no-local-array.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-modified-no-local-array.c new file mode 100644 index 00000000..6f030330 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-modified-no-local-array.c @@ -0,0 +1,184 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], + float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // No Calloc is used here. + float I[SIZE * SIZE] = {0}; + // float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + float q_t[SIZE * SIZE] = {1}; + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float res[SIZE * SIZE] = {0}; + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + float I[SIZE * SIZE] = {0}; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + float q_t[SIZE * SIZE] = {1}; + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float res[SIZE * SIZE] = {0}; + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + } + naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1, 2, 3, 4}; + float Q[SIZE * SIZE] = {0, 0, 0, 0}; + float R[SIZE * SIZE] = {0, 0, 0, 0}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0, 0, 0, 0}; + float expectedR[SIZE * SIZE] = {0, 0, 0, 0}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-no-local-array.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-no-local-array.c new file mode 100644 index 00000000..44b534ee --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-no-local-array.c @@ -0,0 +1,304 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], float u[SIZE], + float v[SIZE], float q_min[SIZE * SIZE], + float q_t[SIZE * SIZE], float res[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1, 2, 3, 4}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float I[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float u[SIZE] = {0.0f}; + float v[SIZE] = {0.0f}; + float q_min[SIZE * SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + float res[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, I, x, e, u, v, q_min, q_t, res); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-1.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-1.c new file mode 100644 index 00000000..8c131bfb --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-1.c @@ -0,0 +1,99 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +// float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // float x[SIZE]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + // float e[SIZE]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + A[k] = alpha; + } +} + +void no_opt_sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // float x[SIZE]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + // float e[SIZE]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + A[k] = alpha; + } +} + +int main(void) { + float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; + sample_test(A, x, e); + float expectedA[SIZE] = {0}; + float expectedx[SIZE] = {0}; + float expectede[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedx, expectede); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + printf("X Output: %f\n", x[i]); + printf("Expected X Output: %f\n", expectedx[i]); + printf("E Output: %f\n", e[i]); + printf("Expected E Output: %f\n", expectede[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + assert(fabs(expectedx[i] - x[i]) < DELTA); + assert(fabs(expectede[i] - e[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-2.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-2.c new file mode 100644 index 00000000..367d75a4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-2.c @@ -0,0 +1,92 @@ +// Modification of test 1, with dynamically sized arrays. + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + A[k] = alpha; + } +} + +void no_opt_sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + A[k] = alpha; + } +} + +int main(void) { + float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; + sample_test(A, x, e); + float expectedA[SIZE] = {0}; + float expectedX[SIZE] = {0}; + float expectedE[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedX, expectedE); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-3.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-3.c new file mode 100644 index 00000000..ebe988ae --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-3.c @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float x[SIZE], float e[SIZE], + float Q[SIZE * SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m) * naive_norm(e, m); + A[k] = alpha; + + // float q_t[SIZE * SIZE] = {alpha}; + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = alpha; + } + } + } +} + +void no_opt_sample_test(float A[SIZE], float x[SIZE], float e[SIZE], + float Q[SIZE * SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m) * + no_opt_naive_norm(e, m); + A[k] = alpha; + + // float q_t[SIZE * SIZE] = {alpha}; + + // for (int i = 0; i < SIZE * SIZE; i++) { + // Q[i] = q_t[i]; + // } + + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = alpha; + } + } + } +} + +int main(void) { + float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; + float Q[SIZE * SIZE] = {0}; + sample_test(A, x, e, Q); + float expectedA[SIZE] = {0}; + float expectedX[SIZE] = {0}; + float expectedE[SIZE] = {0}; + float expectedQ[SIZE * SIZE] = {0}; + no_opt_sample_test(expectedA, expectedX, expectedE, expectedQ); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } + + for (int i = 0; i < SIZE * SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-1-linear-array.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-1-linear-array.c new file mode 100644 index 00000000..97726430 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-1-linear-array.c @@ -0,0 +1,201 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 1 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +int main(void) { + float A[SIZE] = {1.0f}; + float Q[SIZE] = {0.0f}; + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE] = {0.0f}; + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-1.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-1.c new file mode 100644 index 00000000..b0fc07ae --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-1.c @@ -0,0 +1,210 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 1 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1.0f}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float i[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + float expectedi[SIZE * SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-2-2.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-2-2.c new file mode 100644 index 00000000..63ea53cd --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-2-2.c @@ -0,0 +1,217 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1.0f, 2.0f, 3.0f, 4.0f}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {1.0f}; + float i[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {1.0f}; + float expectedi[SIZE * SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q_T Output: %f\n", q_t[i * SIZE + j]); + printf("Expected Q_T Output: %f\n", expectedq_t[i * SIZE + j]); + assert(fabs(expectedq_t[i] - q_t[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-2-linear-array.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-2-linear-array.c new file mode 100644 index 00000000..fc1db5ba --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-2-linear-array.c @@ -0,0 +1,201 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +int main(void) { + float A[SIZE] = {1.0f, 2.0f}; + float Q[SIZE] = {0.0f}; + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE] = {0.0f}; + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-2.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-2.c new file mode 100644 index 00000000..4898553e --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-2.c @@ -0,0 +1,206 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], + float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE * SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE * SIZE], float x[SIZE], + float e[SIZE], float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1.0f, 2.0f, 3.0f, 4.0f}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float i[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + float expectedi[SIZE * SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-3-linear-array.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-3-linear-array.c new file mode 100644 index 00000000..4a9b7256 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-3-linear-array.c @@ -0,0 +1,201 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 3 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +int main(void) { + float A[SIZE] = {1.0f, 2.0f, 3.0f}; + float Q[SIZE] = {0.0f}; + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE] = {0.0f}; + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-3.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-3.c new file mode 100644 index 00000000..cdb08413 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-3.c @@ -0,0 +1,209 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 3 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE * SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE * SIZE], float x[SIZE], + float e[SIZE], float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, 9.0f}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float i[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + float expectedi[SIZE * SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-4-linear-array.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-4-linear-array.c new file mode 100644 index 00000000..5dc845dc --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-4-linear-array.c @@ -0,0 +1,137 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 4 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return v; } + +float no_opt_sgn(float v) { return v; } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void naive_fixed_qr_decomp(float Q[SIZE], float x[SIZE], float q_t[SIZE]) { + // for (int i = 0; i < SIZE; i++) { + // R[i] = A[i]; + // } + + // for (int i = 0; i < SIZE; i++) { + // I[i] = 1.0f; + // } + + // Householder + // for (int k = 0; k < SIZE - 1; k++) { + // int k = 0; + // int m = SIZE - k; + + float alpha = -sgn(x[0]); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + // if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + // } + // } +} + +void no_opt_naive_fixed_qr_decomp(float Q[SIZE], float x[SIZE], + float q_t[SIZE]) { + // for (int i = 0; i < SIZE; i++) { + // R[i] = A[i]; + // } + + // for (int i = 0; i < SIZE; i++) { + // I[i] = 1.0f; + // } + + // Householder + // for (int k = 0; k < SIZE - 1; k++) { + // int k = 0; + // int m = SIZE - k; + + float alpha = -no_opt_sgn(x[0]); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + // if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + // } + // } +} + +int main(void) { + float A[SIZE] = {1.1f, 2.1f, 3.1f, 4.1f}; + float Q[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + Q[i] = 0.0f; + } + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {1.2f, 1.3f, 1.4f, 1.5f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(Q, x, q_t); + float expectedQ[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + expectedQ[i] = 0.0f; + } + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {1.2f, 1.3f, 1.4f, 1.5f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(expectedQ, expectedx, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-5-linear-array.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-5-linear-array.c new file mode 100644 index 00000000..de55e82a --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-4-5-linear-array.c @@ -0,0 +1,201 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +int main(void) { + float A[SIZE] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; + float Q[SIZE] = {0.0f}; + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE] = {0.0f}; + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/return.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/return.c new file mode 100644 index 00000000..02a7a6f1 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/return.c @@ -0,0 +1,37 @@ +#include +#include +#define SIZE 8 + +void return_test(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { + for (int i = 0; i < SIZE; i++) { + if (i == SIZE / 2) return; + b_out[i] = a_in[i] * scalar_in; + } + b_out[SIZE / 2] = a_in[SIZE / 2] * scalar_in; // shouldn't run +} + +int main(void) { + float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; + float scalar_in = 10; + float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + return_test(a_in, scalar_in, b_out); + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_out[i]); + } + assert(b_out[0] == 90); + assert(b_out[1] == 80); + assert(b_out[2] == 70); + assert(b_out[3] == 60); + assert(b_out[4] == 0); + assert(b_out[5] == 0); + assert(b_out[6] == 0); + assert(b_out[7] == 0); + // 90.000000 + // 80.000000 + // 70.000000 + // 60.000000 + // 0.000000 + // 0.000000 + // 0.000000 + // 0.000000 +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/scalar.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/scalar.c new file mode 100644 index 00000000..f5ca5592 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/scalar.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 8 + +void matrix_multiply(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { + for (int i = 0; i < SIZE; i++) { + b_out[i] = a_in[i] * scalar_in; + } +} + +int main(void) { + float a_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; + float scalar_in = 10; + float b_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; + matrix_multiply(a_in, scalar_in, b_in); + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_in[i]); + } + assert(b_in[0] == 10); + assert(b_in[1] == 20); + assert(b_in[2] == 30); + assert(b_in[3] == 40); + assert(b_in[4] == 50); + assert(b_in[5] == 60); + assert(b_in[6] == 70); + assert(b_in[7] == 80); +} diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/sgn-inline.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/sgn-inline.c new file mode 100644 index 00000000..e982b0eb --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/sgn-inline.c @@ -0,0 +1,42 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +void sample_test(float A[SIZE], float B[SIZE]) { + for (int i = 0; i < SIZE; i++) { + B[i] = sgn(A[i]); + } +} + +void no_opt_sample_test(float A[SIZE], float B[SIZE]) { + for (int i = 0; i < SIZE; i++) { + B[i] = no_opt_sgn(A[i]); + } +} + +int main(void) { + float A[SIZE] = {1, -2, 0, -4, 5}; + float B[SIZE] = {0}; + sample_test(A, B); + float expectedA[SIZE] = {1, -2, 0, -4, 5}; + float expectedB[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedB); + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/sgn-naive-norm-inline-1.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/sgn-naive-norm-inline-1.c new file mode 100644 index 00000000..c04592b6 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/sgn-naive-norm-inline-1.c @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float naive_norm(float *x, int m) __attribute__((always_inline)); +float sgn(float v) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float B[SIZE]) { + B[0] = naive_norm(A, SIZE); + for (int i = 1; i < SIZE; i++) { + B[i] = sgn(A[i]); + } +} + +void no_opt_sample_test(float A[SIZE], float B[SIZE]) { + B[0] = no_opt_naive_norm(A, SIZE); + for (int i = 1; i < SIZE; i++) { + B[i] = no_opt_sgn(A[i]); + } +} + +int main(void) { + float A[SIZE] = {1, -2, 0, -4, 5}; + float B[SIZE] = {0}; + sample_test(A, B); + float expectedA[SIZE] = {1, -2, 0, -4, 5}; + float expectedB[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedB); + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/sgn-naive-norm-inline-2.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/sgn-naive-norm-inline-2.c new file mode 100644 index 00000000..9613c3e4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/sgn-naive-norm-inline-2.c @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float naive_norm(float *x, int m) __attribute__((always_inline)); +float sgn(float v) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float B[SIZE]) { + float alpha = -sgn(A[0]) * naive_norm(A, SIZE); + B[0] = alpha; + for (int i = 1; i < SIZE; i++) { + B[i] = sgn(A[i]); + } +} + +void no_opt_sample_test(float A[SIZE], float B[SIZE]) { + float alpha = -no_opt_sgn(A[0]) * no_opt_naive_norm(A, SIZE); + B[0] = alpha; + for (int i = 1; i < SIZE; i++) { + B[i] = no_opt_sgn(A[i]); + } +} + +int main(void) { + float A[SIZE] = {1, -2, 0, -4, 5}; + float B[SIZE] = {0}; + sample_test(A, B); + float expectedA[SIZE] = {1, -2, 0, -4, 5}; + float expectedB[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedB); + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/sqrt.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/sqrt.c new file mode 100644 index 00000000..a753d824 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/sqrt.c @@ -0,0 +1,33 @@ +#include +#include +#include +#define SIZE 8 + +void vsqrt(float a_in[SIZE], float b_out[SIZE], float c_out[SIZE]) { + for (int i = 0; i < SIZE; i++) { + b_out[i] = sqrtf(a_in[i]); + c_out[i] = sqrtf(a_in[i]); + } +} + +int main(void) { + float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; + float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + float c_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; + vsqrt(a_in, b_out, c_out); + float delta = 0.00001f; + float expected[SIZE] = {3.000000f, 2.828427f, 2.645751f, 2.449490f, + 2.236068f, 2.000000f, 1.732051f, 1.414214f}; + for (int i = 0; i < SIZE; i++) { + printf("%f\n", c_out[i]); + assert(fabs(expected[i] - c_out[i]) < delta); + } + // 3.000000 + // 2.828427 + // 2.645751 + // 2.449490 + // 2.236068 + // 2.000000 + // 1.732051 + // 1.414214 +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/stencil-2d.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/stencil-2d.c new file mode 100644 index 00000000..38362c79 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/stencil-2d.c @@ -0,0 +1,78 @@ +#include +#include +#define ROW_SIZE 8 +#define COL_SIZE 4 +#define F_SIZE 9 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + // float mul = filter_in[k1 * 3 + k2] * + // orig_in[(r + k1) * COL_SIZE + c + k2]; + // temp += mul; + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + float orig_in[ROW_SIZE * COL_SIZE] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + float sol_out[ROW_SIZE * COL_SIZE] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + float filter_in[F_SIZE] = {1, 1, 1, 1, 1, 1, 1, 1, 1}; + stencil(orig_in, sol_out, filter_in); + float expected[ROW_SIZE * COL_SIZE] = { + 9, 9, 1, 1, 9, 9, 1, 1, 9, 9, 1, 1, 9, 9, 1, 1, + 9, 9, 1, 1, 9, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("%f\n", sol_out[i]); + assert(expected[i] == sol_out[i]); + } + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/ternary.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/ternary.c new file mode 100644 index 00000000..04acb85e --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/ternary.c @@ -0,0 +1,35 @@ +#include +#include +#define SIZE 8 + +void tern(float a_in[SIZE], float b_out[SIZE]) { + for (int i = 0; i < SIZE; i++) { + b_out[i] = (i < SIZE / 2) ? a_in[i] : 0; + } +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; + float b_out[SIZE] = {5, 6, 7, 8, 1, 2, 3, 4}; + tern(a_in, b_out); + for (int i = 0; i < SIZE; i++) { + printf("%f\n", b_out[i]); + } + assert(b_out[0] == 1); + assert(b_out[1] == 2); + assert(b_out[2] == 3); + assert(b_out[3] == 4); + assert(b_out[4] == 0); + assert(b_out[5] == 0); + assert(b_out[6] == 0); + assert(b_out[7] == 0); + // 1.000000 + // 2.000000 + // 3.000000 + // 4.000000 + // 0.000000 + // 0.000000 + // 0.000000 + // 0.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/transpose.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/transpose.c new file mode 100644 index 00000000..934d651f --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/transpose.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +void naive_transpose(float a[SIZE * SIZE], int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} + +void no_opt_naive_transpose(float a[SIZE * SIZE], int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} + +int main() { + float calculated[SIZE * SIZE] = {0}; + for (int i = 0; i < SIZE * SIZE; i++) { + if (i % 2 == 0) { + calculated[i] = 1.0f; + } else { + calculated[i] = 0.0f; + } + } + float expected[SIZE * SIZE] = {0}; + for (int i = 0; i < SIZE * SIZE; i++) { + if (i % 2 == 0) { + expected[i] = 1.0f; + } else { + expected[i] = 0.0f; + } + } + naive_transpose(calculated, SIZE); + no_opt_naive_transpose(expected, SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + printf("A Transpose Calculated: %f\n", calculated[i]); + printf("A Transpose Expected: %f\n", expected[i]); + assert(fabs(expected[i] - calculated[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/triangle.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/triangle.c new file mode 100644 index 00000000..344beb52 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/triangle.c @@ -0,0 +1,96 @@ +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1 + +// Triangle Access Pattern Test + +void lower_triangle(float A[SIZE * SIZE], float B[SIZE * SIZE], + float C[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < i; j++) { + C[i + SIZE * j] = A[i + SIZE * j] + B[i + SIZE * j]; + } + } +} + +void no_opt_lower_triangle(float A[SIZE * SIZE], float B[SIZE * SIZE], + float C[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < i; j++) { + C[i + SIZE * j] = A[i + SIZE * j] + B[i + SIZE * j]; + } + } +} + +void upper_triangle(float A[SIZE * SIZE], float B[SIZE * SIZE], + float C[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i; j < SIZE; j++) { + C[i + SIZE * j] = A[i + SIZE * j] + B[i + SIZE * j]; + } + } +} + +void no_opt_upper_triangle(float A[SIZE * SIZE], float B[SIZE * SIZE], + float C[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i; j < SIZE; j++) { + C[i + SIZE * j] = A[i + SIZE * j] + B[i + SIZE * j]; + } + } +} + +int main(void) { + float A1[SIZE * SIZE] = {0, 1, 2, 3}; + float B1[SIZE * SIZE] = {0, 1, 2, 3}; + float C1[SIZE * SIZE] = {0, 1, 2, 3}; + + float A1Expected[SIZE * SIZE] = {0, 1, 2, 3}; + float B1Expected[SIZE * SIZE] = {0, 1, 2, 3}; + float C1Expected[SIZE * SIZE] = {0, 1, 2, 3}; + + lower_triangle(A1, B1, C1); + no_opt_lower_triangle(A1Expected, B1Expected, C1Expected); + + for (int i = 0; i < SIZE * SIZE; i++) { + printf("A: %f\n", A1[i]); + printf("A Expected: %f\n", A1Expected[i]); + printf("B: %f\n", B1[i]); + printf("B Expected: %f\n", B1Expected[i]); + printf("C: %f\n", C1[i]); + printf("C Expected: %f\n", C1Expected[i]); + + assert(fabsf(A1[i] - A1Expected[i]) < DELTA); + assert(fabsf(B1[i] - B1Expected[i]) < DELTA); + assert(fabsf(C1[i] - C1Expected[i]) < DELTA); + } + + float A2[SIZE * SIZE] = {0, 1, 2, 3}; + float B2[SIZE * SIZE] = {0, 1, 2, 3}; + float C2[SIZE * SIZE] = {0, 1, 2, 3}; + + float A2Expected[SIZE * SIZE] = {0, 1, 2, 3}; + float B2Expected[SIZE * SIZE] = {0, 1, 2, 3}; + float C2Expected[SIZE * SIZE] = {0, 1, 2, 3}; + + upper_triangle(A2, B2, C2); + no_opt_upper_triangle(A2Expected, B2Expected, C2Expected); + + for (int i = 0; i < SIZE * SIZE; i++) { + printf("A: %f\n", A2[i]); + printf("A Expected: %f\n", A2Expected[i]); + printf("B: %f\n", B2[i]); + printf("B Expected: %f\n", B2Expected[i]); + printf("C: %f\n", C2[i]); + printf("C Expected: %f\n", C2Expected[i]); + + assert(fabsf(A2[i] - A2Expected[i]) < DELTA); + assert(fabsf(B2[i] - B2Expected[i]) < DELTA); + assert(fabsf(C2[i] - C2Expected[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/turnt.toml b/src/dios-egraphs/Diospyros/no-egg-c-tests/turnt.toml new file mode 100644 index 00000000..c1dc5af6 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/turnt.toml @@ -0,0 +1 @@ +command = "bash ../test-runner.sh no-opt-stdout c-tests/{filename}" diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/var.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/var.c new file mode 100644 index 00000000..486903c0 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/var.c @@ -0,0 +1,29 @@ +#include +#include +#define SIZE 4 + +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + float t1 = 10; + float t2 = 20; + c_out[0] = a_in[0] + b_in[0]; + c_out[1] = t1 + b_in[1]; + c_out[2] = a_in[2] + t2; + c_out[3] = t2 + t1; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 6); + assert(c_out[1] == 16); + assert(c_out[2] == 23); + assert(c_out[3] == 30); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + // expected: 6, 16, 23, 30 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/width5.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/width5.c new file mode 100644 index 00000000..a50c3e0b --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/width5.c @@ -0,0 +1,30 @@ +#include +#include +#define SIZE 5 + +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] + b_in[0]; + c_out[1] = a_in[1] + b_in[1]; + c_out[2] = a_in[2] + b_in[2]; + c_out[3] = a_in[3] + b_in[3]; + c_out[4] = a_in[4] + b_in[4]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4, 5}; + float b_in[SIZE] = {6, 7, 8, 9, 10}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 7); + assert(c_out[1] == 9); + assert(c_out[2] == 11); + assert(c_out[3] == 13); + assert(c_out[4] == 15); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + printf("fifth: %f\n", c_out[4]); + // expected: 7, 9, 11, 13, 15 + return 0; +} diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/width9.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/width9.c new file mode 100644 index 00000000..7b7b2e96 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/width9.c @@ -0,0 +1,42 @@ +#include +#include +#define SIZE 9 + +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] + b_in[0]; + c_out[1] = a_in[1] + b_in[1]; + c_out[2] = a_in[2] + b_in[2]; + c_out[3] = a_in[3] + b_in[3]; + c_out[4] = a_in[4] + b_in[4]; + c_out[5] = a_in[5] + b_in[5]; + c_out[6] = a_in[6] + b_in[6]; + c_out[7] = a_in[7] + b_in[7]; + c_out[8] = a_in[8] + b_in[8]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + float b_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(c_out[0] == 2); + assert(c_out[1] == 4); + assert(c_out[2] == 6); + assert(c_out[3] == 8); + assert(c_out[4] == 10); + assert(c_out[5] == 12); + assert(c_out[6] == 14); + assert(c_out[7] == 16); + assert(c_out[8] == 18); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + printf("fifth: %f\n", c_out[4]); + printf("sixth: %f\n", c_out[5]); + printf("seventh: %f\n", c_out[6]); + printf("eight: %f\n", c_out[7]); + printf("ninth: %f\n", c_out[8]); + // expected: 2, 4, 6, 8, 10, 12, 14, 16, 18 + return 0; +} diff --git a/src/dios-egraphs/Diospyros/test-runner.sh b/src/dios-egraphs/Diospyros/test-runner.sh index b2a87c53..4a84ae9c 100644 --- a/src/dios-egraphs/Diospyros/test-runner.sh +++ b/src/dios-egraphs/Diospyros/test-runner.sh @@ -1,3 +1,16 @@ cd .. -make run-opt test=$1 +case $1 in + run-opt) + make run-opt test=$2 + ;; + run-opt-stdout) + make run-opt-stdout test=$2 + ;; + no-opt-stdout) + make no-opt-stdout test=$2 + ;; + *) + echo "match failure" + ;; +esac cd - \ No newline at end of file From 058ab1bb6e05b802558a02f4c2fe518bc3ad6420 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 22 Apr 2022 12:52:05 -0400 Subject: [PATCH 069/143] checkpoint before changing llvm-to-egg --- src/dios-egraphs/Diospyros/Makefile | 44 +- src/dios-egraphs/Diospyros/aa.ll.orig | 516 +++++++ src/dios-egraphs/Diospyros/diospyros.cpp | 218 ++- src/dios-egraphs/Diospyros/err.txt | 312 +++++ src/dios-egraphs/Diospyros/err1.txt | 312 +++++ src/dios-egraphs/Diospyros/err2.txt | 3 + src/dios-egraphs/Diospyros/inline-float.c | 78 ++ .../Diospyros/inline-float.c.orig | 78 ++ src/dios-egraphs/Diospyros/reduce.sh | 11 + src/dios-egraphs/Diospyros/src/lib.rs | 31 +- src/dios-egraphs/Diospyros/src/llvm_to_egg.rs | 1213 +++++++++++++++++ src/dios-egraphs/Diospyros/test-runner.sh | 4 +- 12 files changed, 2784 insertions(+), 36 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/aa.ll.orig create mode 100644 src/dios-egraphs/Diospyros/err.txt create mode 100644 src/dios-egraphs/Diospyros/err1.txt create mode 100644 src/dios-egraphs/Diospyros/err2.txt create mode 100644 src/dios-egraphs/Diospyros/inline-float.c create mode 100644 src/dios-egraphs/Diospyros/inline-float.c.orig create mode 100755 src/dios-egraphs/Diospyros/reduce.sh create mode 100644 src/dios-egraphs/Diospyros/src/llvm_to_egg.rs diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index 448faecb..00ce628c 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -51,13 +51,13 @@ min-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp build/final run-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) - opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll - opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll - $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=false build/aa.ll -o build/diospyros.ll - opt -S --adce --dse build/diospyros.ll -o build/dce.ll - $(CLANG) build/dce.ll -o build/final - build/final + @$(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) + @opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll + @opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll + @$(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=false build/aa.ll -o build/diospyros.ll + @opt -S --adce --dse build/diospyros.ll -o build/dce.ll + @$(CLANG) build/dce.ll -o build/final + @build/final print-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) @@ -78,21 +78,21 @@ no-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp build/final run-opt-stdout: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) - opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll - opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll - $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=false build/aa.ll -o build/diospyros.ll - opt -S --adce --dse build/diospyros.ll -o build/dce.ll - cat build/dce.ll + @$(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) + @opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll + @opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll + @$(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=false build/aa.ll -o build/diospyros.ll + @opt -S --adce --dse build/diospyros.ll -o build/dce.ll + @cat build/dce.ll no-opt-stdout: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) - opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll - opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll - $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt=false -mllvm -print=false build/aa.ll -o build/diospyros.ll - rm build/dce.ll - opt -S --adce --dse build/diospyros.ll -o build/dce.ll - cat build/dce.ll + @$(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) + @opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll + @opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll + @$(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt=false -mllvm -print=false build/aa.ll -o build/diospyros.ll + @rm build/dce.ll + @opt -S --adce --dse build/diospyros.ll -o build/dce.ll + @cat build/dce.ll run-diospyros: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt=false -mllvm -print=false $(test) -o build/diospyros.ll @@ -110,10 +110,10 @@ test: set-up-mac runt.sh runt.toml target/debug/libllvmlib.$(EXT) runt target/debug/libllvmlib.$(EXT): $(LIB) - cargo build + @cargo build set-up-mac: - $(SETUP) + @$(SETUP) clean: rm -r build/* diff --git a/src/dios-egraphs/Diospyros/aa.ll.orig b/src/dios-egraphs/Diospyros/aa.ll.orig new file mode 100644 index 00000000..608340dd --- /dev/null +++ b/src/dios-egraphs/Diospyros/aa.ll.orig @@ -0,0 +1,516 @@ +; ModuleID = 'build/opt.ll' +source_filename = "c-tests/inline-float.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@__const.main.A = private unnamed_addr constant [5 x float] [float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00], align 16 +@__const.main.expectedA = private unnamed_addr constant [5 x float] [float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00], align 16 +@.str = private unnamed_addr constant [14 x i8] c"C Output: %f\0A\00", align 1 +@.str.1 = private unnamed_addr constant [23 x i8] c"Expected C Output: %f\0A\00", align 1 +@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 +@.str.2 = private unnamed_addr constant [23 x i8] c"c-tests/inline-float.c\00", align 1 +@.str.3 = private unnamed_addr constant [34 x i8] c"fabs(expectedC[i] - C[i]) < DELTA\00", align 1 +@.memset_pattern = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 +@.memset_pattern.1 = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 + +; Function Attrs: noinline nounwind ssp uwtable +define float @no_opt_test_inline(float* %0, float* %1, i32 %2) #0 { + %4 = icmp sgt i32 %2, 0 + %smax6 = select i1 %4, i32 %2, i32 0 + %wide.trip.count7 = zext i32 %smax6 to i64 + br i1 %4, label %.lr.ph11, label %.preheader + +.lr.ph11: ; preds = %3 + %5 = add nsw i64 %wide.trip.count7, -1 + %xtraiter14 = and i64 %wide.trip.count7, 3 + %6 = icmp ult i64 %5, 3 + br i1 %6, label %..preheader_crit_edge.unr-lcssa, label %.lr.ph11.new + +.lr.ph11.new: ; preds = %.lr.ph11 + %unroll_iter17 = and i64 %wide.trip.count7, 2147483644 + br label %13 + +..preheader_crit_edge.unr-lcssa: ; preds = %13, %.lr.ph11 + %indvars.iv9.unr = phi i64 [ 0, %.lr.ph11 ], [ %indvars.iv.next5.3, %13 ] + %lcmp.mod16.not = icmp eq i64 %xtraiter14, 0 + br i1 %lcmp.mod16.not, label %.preheader, label %.epil.preheader13 + +.epil.preheader13: ; preds = %.epil.preheader13, %..preheader_crit_edge.unr-lcssa + %indvars.iv9.epil = phi i64 [ %indvars.iv.next5.epil, %.epil.preheader13 ], [ %indvars.iv9.unr, %..preheader_crit_edge.unr-lcssa ] + %epil.iter15 = phi i64 [ %epil.iter15.sub, %.epil.preheader13 ], [ %xtraiter14, %..preheader_crit_edge.unr-lcssa ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv9.epil + %8 = load float, float* %7, align 4 + %9 = fmul float %8, 2.000000e+00 + %10 = getelementptr inbounds float, float* %1, i64 %indvars.iv9.epil + store float %9, float* %10, align 4 + %indvars.iv.next5.epil = add nuw nsw i64 %indvars.iv9.epil, 1 + %epil.iter15.sub = add i64 %epil.iter15, -1 + %epil.iter15.cmp.not = icmp eq i64 %epil.iter15.sub, 0 + br i1 %epil.iter15.cmp.not, label %.preheader, label %.epil.preheader13, !llvm.loop !3 + +.preheader: ; preds = %.epil.preheader13, %..preheader_crit_edge.unr-lcssa, %3 + br i1 %4, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %.preheader + %11 = add nsw i64 %wide.trip.count7, -1 + %xtraiter = and i64 %wide.trip.count7, 7 + %12 = icmp ult i64 %11, 7 + br i1 %12, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count7, 2147483640 + br label %30 + +13: ; preds = %13, %.lr.ph11.new + %indvars.iv9 = phi i64 [ 0, %.lr.ph11.new ], [ %indvars.iv.next5.3, %13 ] + %niter18 = phi i64 [ %unroll_iter17, %.lr.ph11.new ], [ %niter18.nsub.3, %13 ] + %14 = getelementptr inbounds float, float* %0, i64 %indvars.iv9 + %15 = load float, float* %14, align 4 + %16 = fmul float %15, 2.000000e+00 + %17 = getelementptr inbounds float, float* %1, i64 %indvars.iv9 + store float %16, float* %17, align 4 + %indvars.iv.next5 = or i64 %indvars.iv9, 1 + %18 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next5 + %19 = load float, float* %18, align 4 + %20 = fmul float %19, 2.000000e+00 + %21 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next5 + store float %20, float* %21, align 4 + %indvars.iv.next5.1 = or i64 %indvars.iv9, 2 + %22 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next5.1 + %23 = load float, float* %22, align 4 + %24 = fmul float %23, 2.000000e+00 + %25 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next5.1 + store float %24, float* %25, align 4 + %indvars.iv.next5.2 = or i64 %indvars.iv9, 3 + %26 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next5.2 + %27 = load float, float* %26, align 4 + %28 = fmul float %27, 2.000000e+00 + %29 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next5.2 + store float %28, float* %29, align 4 + %indvars.iv.next5.3 = add nuw nsw i64 %indvars.iv9, 4 + %niter18.nsub.3 = add i64 %niter18, -4 + %niter18.ncmp.3.not = icmp eq i64 %niter18.nsub.3, 0 + br i1 %niter18.ncmp.3.not, label %..preheader_crit_edge.unr-lcssa, label %13 + +30: ; preds = %30, %.lr.ph.new + %.014 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %54, %30 ] + %indvars.iv3 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.7, %30 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.7, %30 ] + %31 = getelementptr inbounds float, float* %1, i64 %indvars.iv3 + %32 = load float, float* %31, align 4 + %33 = fadd float %.014, %32 + %indvars.iv.next = or i64 %indvars.iv3, 1 + %34 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next + %35 = load float, float* %34, align 4 + %36 = fadd float %33, %35 + %indvars.iv.next.1 = or i64 %indvars.iv3, 2 + %37 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.1 + %38 = load float, float* %37, align 4 + %39 = fadd float %36, %38 + %indvars.iv.next.2 = or i64 %indvars.iv3, 3 + %40 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.2 + %41 = load float, float* %40, align 4 + %42 = fadd float %39, %41 + %indvars.iv.next.3 = or i64 %indvars.iv3, 4 + %43 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.3 + %44 = load float, float* %43, align 4 + %45 = fadd float %42, %44 + %indvars.iv.next.4 = or i64 %indvars.iv3, 5 + %46 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.4 + %47 = load float, float* %46, align 4 + %48 = fadd float %45, %47 + %indvars.iv.next.5 = or i64 %indvars.iv3, 6 + %49 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.5 + %50 = load float, float* %49, align 4 + %51 = fadd float %48, %50 + %indvars.iv.next.6 = or i64 %indvars.iv3, 7 + %52 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.6 + %53 = load float, float* %52, align 4 + %54 = fadd float %51, %53 + %indvars.iv.next.7 = add nuw nsw i64 %indvars.iv3, 8 + %niter.nsub.7 = add i64 %niter, -8 + %niter.ncmp.7.not = icmp eq i64 %niter.nsub.7, 0 + br i1 %niter.ncmp.7.not, label %._crit_edge.unr-lcssa, label %30 + +._crit_edge.unr-lcssa: ; preds = %30, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %54, %30 ] + %.014.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %54, %30 ] + %indvars.iv3.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.7, %30 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.014.epil = phi float [ %57, %.epil.preheader ], [ %.014.unr, %._crit_edge.unr-lcssa ] + %indvars.iv3.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv3.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %55 = getelementptr inbounds float, float* %1, i64 %indvars.iv3.epil + %56 = load float, float* %55, align 4 + %57 = fadd float %.014.epil, %56 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv3.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %.preheader + %.01.lcssa = phi float [ 0.000000e+00, %.preheader ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %57, %.epil.preheader ] + ret float %.01.lcssa +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @no_opt_test(float* %0, float* %1, float* %2) #0 { + %4 = call float @no_opt_test_inline(float* %0, float* %1, i32 5) + store float %4, float* %2, align 4 + %5 = getelementptr inbounds float, float* %2, i64 1 + store float %4, float* %5, align 4 + %6 = getelementptr inbounds float, float* %2, i64 2 + store float %4, float* %6, align 4 + %7 = getelementptr inbounds float, float* %2, i64 3 + store float %4, float* %7, align 4 + %8 = getelementptr inbounds float, float* %2, i64 4 + store float %4, float* %8, align 4 + ret void +} + +; Function Attrs: alwaysinline nounwind ssp uwtable +define float @test_inline(float* %0, float* %1, i32 %2) #1 { + %4 = icmp sgt i32 %2, 0 + %smax6 = select i1 %4, i32 %2, i32 0 + %wide.trip.count7 = zext i32 %smax6 to i64 + br i1 %4, label %.lr.ph11, label %.preheader + +.lr.ph11: ; preds = %3 + %5 = add nsw i64 %wide.trip.count7, -1 + %xtraiter14 = and i64 %wide.trip.count7, 3 + %6 = icmp ult i64 %5, 3 + br i1 %6, label %..preheader_crit_edge.unr-lcssa, label %.lr.ph11.new + +.lr.ph11.new: ; preds = %.lr.ph11 + %unroll_iter17 = and i64 %wide.trip.count7, 2147483644 + br label %13 + +..preheader_crit_edge.unr-lcssa: ; preds = %13, %.lr.ph11 + %indvars.iv9.unr = phi i64 [ 0, %.lr.ph11 ], [ %indvars.iv.next5.3, %13 ] + %lcmp.mod16.not = icmp eq i64 %xtraiter14, 0 + br i1 %lcmp.mod16.not, label %.preheader, label %.epil.preheader13 + +.epil.preheader13: ; preds = %.epil.preheader13, %..preheader_crit_edge.unr-lcssa + %indvars.iv9.epil = phi i64 [ %indvars.iv.next5.epil, %.epil.preheader13 ], [ %indvars.iv9.unr, %..preheader_crit_edge.unr-lcssa ] + %epil.iter15 = phi i64 [ %epil.iter15.sub, %.epil.preheader13 ], [ %xtraiter14, %..preheader_crit_edge.unr-lcssa ] + %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv9.epil + %8 = load float, float* %7, align 4 + %9 = fmul float %8, 2.000000e+00 + %10 = getelementptr inbounds float, float* %1, i64 %indvars.iv9.epil + store float %9, float* %10, align 4 + %indvars.iv.next5.epil = add nuw nsw i64 %indvars.iv9.epil, 1 + %epil.iter15.sub = add i64 %epil.iter15, -1 + %epil.iter15.cmp.not = icmp eq i64 %epil.iter15.sub, 0 + br i1 %epil.iter15.cmp.not, label %.preheader, label %.epil.preheader13, !llvm.loop !6 + +.preheader: ; preds = %.epil.preheader13, %..preheader_crit_edge.unr-lcssa, %3 + br i1 %4, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %.preheader + %11 = add nsw i64 %wide.trip.count7, -1 + %xtraiter = and i64 %wide.trip.count7, 7 + %12 = icmp ult i64 %11, 7 + br i1 %12, label %._crit_edge.unr-lcssa, label %.lr.ph.new + +.lr.ph.new: ; preds = %.lr.ph + %unroll_iter = and i64 %wide.trip.count7, 2147483640 + br label %30 + +13: ; preds = %13, %.lr.ph11.new + %indvars.iv9 = phi i64 [ 0, %.lr.ph11.new ], [ %indvars.iv.next5.3, %13 ] + %niter18 = phi i64 [ %unroll_iter17, %.lr.ph11.new ], [ %niter18.nsub.3, %13 ] + %14 = getelementptr inbounds float, float* %0, i64 %indvars.iv9 + %15 = load float, float* %14, align 4 + %16 = fmul float %15, 2.000000e+00 + %17 = getelementptr inbounds float, float* %1, i64 %indvars.iv9 + store float %16, float* %17, align 4 + %indvars.iv.next5 = or i64 %indvars.iv9, 1 + %18 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next5 + %19 = load float, float* %18, align 4 + %20 = fmul float %19, 2.000000e+00 + %21 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next5 + store float %20, float* %21, align 4 + %indvars.iv.next5.1 = or i64 %indvars.iv9, 2 + %22 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next5.1 + %23 = load float, float* %22, align 4 + %24 = fmul float %23, 2.000000e+00 + %25 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next5.1 + store float %24, float* %25, align 4 + %indvars.iv.next5.2 = or i64 %indvars.iv9, 3 + %26 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next5.2 + %27 = load float, float* %26, align 4 + %28 = fmul float %27, 2.000000e+00 + %29 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next5.2 + store float %28, float* %29, align 4 + %indvars.iv.next5.3 = add nuw nsw i64 %indvars.iv9, 4 + %niter18.nsub.3 = add i64 %niter18, -4 + %niter18.ncmp.3.not = icmp eq i64 %niter18.nsub.3, 0 + br i1 %niter18.ncmp.3.not, label %..preheader_crit_edge.unr-lcssa, label %13 + +30: ; preds = %30, %.lr.ph.new + %.014 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %54, %30 ] + %indvars.iv3 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.7, %30 ] + %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.7, %30 ] + %31 = getelementptr inbounds float, float* %1, i64 %indvars.iv3 + %32 = load float, float* %31, align 4 + %33 = fadd float %.014, %32 + %indvars.iv.next = or i64 %indvars.iv3, 1 + %34 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next + %35 = load float, float* %34, align 4 + %36 = fadd float %33, %35 + %indvars.iv.next.1 = or i64 %indvars.iv3, 2 + %37 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.1 + %38 = load float, float* %37, align 4 + %39 = fadd float %36, %38 + %indvars.iv.next.2 = or i64 %indvars.iv3, 3 + %40 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.2 + %41 = load float, float* %40, align 4 + %42 = fadd float %39, %41 + %indvars.iv.next.3 = or i64 %indvars.iv3, 4 + %43 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.3 + %44 = load float, float* %43, align 4 + %45 = fadd float %42, %44 + %indvars.iv.next.4 = or i64 %indvars.iv3, 5 + %46 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.4 + %47 = load float, float* %46, align 4 + %48 = fadd float %45, %47 + %indvars.iv.next.5 = or i64 %indvars.iv3, 6 + %49 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.5 + %50 = load float, float* %49, align 4 + %51 = fadd float %48, %50 + %indvars.iv.next.6 = or i64 %indvars.iv3, 7 + %52 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.6 + %53 = load float, float* %52, align 4 + %54 = fadd float %51, %53 + %indvars.iv.next.7 = add nuw nsw i64 %indvars.iv3, 8 + %niter.nsub.7 = add i64 %niter, -8 + %niter.ncmp.7.not = icmp eq i64 %niter.nsub.7, 0 + br i1 %niter.ncmp.7.not, label %._crit_edge.unr-lcssa, label %30 + +._crit_edge.unr-lcssa: ; preds = %30, %.lr.ph + %split.ph = phi float [ undef, %.lr.ph ], [ %54, %30 ] + %.014.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %54, %30 ] + %indvars.iv3.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.7, %30 ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0 + br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader + +.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa + %.014.epil = phi float [ %57, %.epil.preheader ], [ %.014.unr, %._crit_edge.unr-lcssa ] + %indvars.iv3.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv3.unr, %._crit_edge.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] + %55 = getelementptr inbounds float, float* %1, i64 %indvars.iv3.epil + %56 = load float, float* %55, align 4 + %57 = fadd float %.014.epil, %56 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv3.epil, 1 + %epil.iter.sub = add i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !7 + +._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %.preheader + %.01.lcssa = phi float [ 0.000000e+00, %.preheader ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %57, %.epil.preheader ] + ret float %.01.lcssa +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @test(float* %0, float* %1, float* %2) #0 { +.preheader4: + %3 = load float, float* %0, align 4 + %4 = fmul float %3, 2.000000e+00 + store float %4, float* %1, align 4 + %5 = getelementptr inbounds float, float* %0, i64 1 + %6 = load float, float* %5, align 4 + %7 = fmul float %6, 2.000000e+00 + %8 = getelementptr inbounds float, float* %1, i64 1 + store float %7, float* %8, align 4 + %9 = getelementptr inbounds float, float* %0, i64 2 + %10 = load float, float* %9, align 4 + %11 = fmul float %10, 2.000000e+00 + %12 = getelementptr inbounds float, float* %1, i64 2 + store float %11, float* %12, align 4 + %13 = getelementptr inbounds float, float* %0, i64 3 + %14 = load float, float* %13, align 4 + %15 = fmul float %14, 2.000000e+00 + %16 = getelementptr inbounds float, float* %1, i64 3 + store float %15, float* %16, align 4 + %17 = getelementptr inbounds float, float* %0, i64 4 + %18 = load float, float* %17, align 4 + %19 = fmul float %18, 2.000000e+00 + %20 = getelementptr inbounds float, float* %1, i64 4 + store float %19, float* %20, align 4 + %21 = fadd float %4, 0.000000e+00 + %22 = fadd float %21, %7 + %23 = fadd float %22, %11 + %24 = fadd float %23, %15 + %25 = fadd float %24, %19 + store float %25, float* %2, align 4 + %26 = getelementptr inbounds float, float* %2, i64 1 + store float %25, float* %26, align 4 + %27 = getelementptr inbounds float, float* %2, i64 2 + store float %25, float* %27, align 4 + %28 = getelementptr inbounds float, float* %2, i64 3 + store float %25, float* %28, align 4 + %29 = getelementptr inbounds float, float* %2, i64 4 + store float %25, float* %29, align 4 + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main() #0 { + %1 = alloca [5 x float], align 16 + %2 = bitcast [5 x float]* %1 to i8* + %3 = alloca [5 x float], align 16 + %4 = bitcast [5 x float]* %3 to i8* + %5 = alloca [5 x float], align 16 + %6 = bitcast [5 x float]* %5 to i8* + %7 = alloca [5 x float], align 16 + %8 = bitcast [5 x float]* %7 to i8* + %9 = alloca [5 x float], align 16 + %10 = bitcast [5 x float]* %9 to i8* + %11 = alloca [5 x float], align 16 + %12 = bitcast [5 x float]* %11 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(20) %2, i8* nonnull align 16 dereferenceable(20) bitcast ([5 x float]* @__const.main.A to i8*), i64 20, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(20) %4, i8* nonnull align 16 dereferenceable(20) bitcast ([5 x float]* @__const.main.expectedA to i8*), i64 20, i1 false) + call void @memset_pattern16(i8* nonnull %2, i8* bitcast ([4 x float]* @.memset_pattern to i8*), i64 20) #8 + call void @memset_pattern16(i8* nonnull %4, i8* bitcast ([4 x float]* @.memset_pattern.1 to i8*), i64 20) #8 + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(20) %6, i8 0, i64 20, i1 false) + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(20) %8, i8 0, i64 20, i1 false) + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(20) %10, i8 0, i64 20, i1 false) + call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(20) %12, i8 0, i64 20, i1 false) + %13 = getelementptr inbounds [5 x float], [5 x float]* %1, i64 0, i64 0 + %14 = getelementptr inbounds [5 x float], [5 x float]* %5, i64 0, i64 0 + %15 = getelementptr inbounds [5 x float], [5 x float]* %9, i64 0, i64 0 + call void @test(float* nonnull %13, float* nonnull %14, float* nonnull %15) + %16 = getelementptr inbounds [5 x float], [5 x float]* %3, i64 0, i64 0 + %17 = getelementptr inbounds [5 x float], [5 x float]* %7, i64 0, i64 0 + %18 = getelementptr inbounds [5 x float], [5 x float]* %11, i64 0, i64 0 + call void @no_opt_test(float* nonnull %16, float* nonnull %17, float* nonnull %18) + %19 = load float, float* %15, align 16 + %20 = fpext float %19 to double + %21 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %20) #8 + %22 = load float, float* %18, align 16 + %23 = fpext float %22 to double + %24 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %23) #8 + %25 = load float, float* %18, align 16 + %26 = load float, float* %15, align 16 + %27 = fsub float %25, %26 + %28 = call float @llvm.fabs.f32(float %27) + %29 = fcmp uge float %28, 0x3FB99999A0000000 + br i1 %29, label %44, label %30 + +30: ; preds = %0 + %31 = getelementptr inbounds [5 x float], [5 x float]* %9, i64 0, i64 1 + %32 = load float, float* %31, align 4 + %33 = fpext float %32 to double + %34 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %33) #8 + %35 = getelementptr inbounds [5 x float], [5 x float]* %11, i64 0, i64 1 + %36 = load float, float* %35, align 4 + %37 = fpext float %36 to double + %38 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %37) #8 + %39 = load float, float* %35, align 4 + %40 = load float, float* %31, align 4 + %41 = fsub float %39, %40 + %42 = call float @llvm.fabs.f32(float %41) + %43 = fcmp uge float %42, 0x3FB99999A0000000 + br i1 %43, label %44, label %45 + +44: ; preds = %73, %59, %45, %30, %0 + call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), i32 75, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.3, i64 0, i64 0)) #9 + unreachable + +45: ; preds = %30 + %46 = getelementptr inbounds [5 x float], [5 x float]* %9, i64 0, i64 2 + %47 = load float, float* %46, align 8 + %48 = fpext float %47 to double + %49 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %48) #8 + %50 = getelementptr inbounds [5 x float], [5 x float]* %11, i64 0, i64 2 + %51 = load float, float* %50, align 8 + %52 = fpext float %51 to double + %53 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %52) #8 + %54 = load float, float* %50, align 8 + %55 = load float, float* %46, align 8 + %56 = fsub float %54, %55 + %57 = call float @llvm.fabs.f32(float %56) + %58 = fcmp uge float %57, 0x3FB99999A0000000 + br i1 %58, label %44, label %59 + +59: ; preds = %45 + %60 = getelementptr inbounds [5 x float], [5 x float]* %9, i64 0, i64 3 + %61 = load float, float* %60, align 4 + %62 = fpext float %61 to double + %63 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %62) #8 + %64 = getelementptr inbounds [5 x float], [5 x float]* %11, i64 0, i64 3 + %65 = load float, float* %64, align 4 + %66 = fpext float %65 to double + %67 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %66) #8 + %68 = load float, float* %64, align 4 + %69 = load float, float* %60, align 4 + %70 = fsub float %68, %69 + %71 = call float @llvm.fabs.f32(float %70) + %72 = fcmp uge float %71, 0x3FB99999A0000000 + br i1 %72, label %44, label %73 + +73: ; preds = %59 + %74 = getelementptr inbounds [5 x float], [5 x float]* %9, i64 0, i64 4 + %75 = load float, float* %74, align 16 + %76 = fpext float %75 to double + %77 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %76) #8 + %78 = getelementptr inbounds [5 x float], [5 x float]* %11, i64 0, i64 4 + %79 = load float, float* %78, align 16 + %80 = fpext float %79 to double + %81 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %80) #8 + %82 = load float, float* %78, align 16 + %83 = load float, float* %74, align 16 + %84 = fsub float %82, %83 + %85 = call float @llvm.fabs.f32(float %84) + %86 = fcmp uge float %85, 0x3FB99999A0000000 + br i1 %86, label %44, label %87 + +87: ; preds = %73 + ret i32 0 +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #2 + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 + +declare i32 @printf(i8*, ...) #4 + +; Function Attrs: nounwind readnone speculatable willreturn +declare double @llvm.fabs.f64(double) #5 + +; Function Attrs: noreturn +declare void @__assert_rtn(i8*, i8*, i32, i8*) #6 + +; Function Attrs: argmemonly nofree +declare void @memset_pattern16(i8* nocapture, i8* nocapture readonly, i64) #7 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.fabs.f32(float) #5 + +attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { argmemonly nounwind willreturn } +attributes #3 = { argmemonly nounwind willreturn writeonly } +attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { nounwind readnone speculatable willreturn } +attributes #6 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #7 = { argmemonly nofree } +attributes #8 = { nounwind } +attributes #9 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.1"} +!3 = distinct !{!3, !4} +!4 = !{!"llvm.loop.unroll.disable"} +!5 = distinct !{!5, !4} +!6 = distinct !{!6, !4} +!7 = distinct !{!7, !4} diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index ce2ea935..694220fd 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -561,6 +562,41 @@ bool call_is_not_sqrt(CallInst *inst) { // will be done } +std::vector dfs_in_basic_block( + Instruction *instr, std::vector basic_block_instrs, + std::vector visited_instrs) { + if (isa(instr)) { + assert(false); + } + errs() << "Incoming instr\n"; + errs() << *instr << "\n"; + assert(std::find(basic_block_instrs.begin(), basic_block_instrs.end(), + instr) != basic_block_instrs.end()); + assert(std::find(visited_instrs.begin(), visited_instrs.end(), instr) == + visited_instrs.end()); + visited_instrs.push_back(instr); + std::vector output = {}; + int num_operands = instr->getNumOperands(); + for (int i = 0; i < num_operands; i++) { + Instruction *arg = dyn_cast(instr->getOperand(i)); + if (arg != NULL && !isa(arg) && + std::find(visited_instrs.begin(), visited_instrs.end(), arg) == + visited_instrs.end() && + std::find(basic_block_instrs.begin(), basic_block_instrs.end(), + arg) != basic_block_instrs.end() && + arg->getNumOperands() > 0) { + errs() << "Incoming arg\n"; + errs() << *arg << "\n"; + std::vector new_instrs = + dfs_in_basic_block(arg, basic_block_instrs, visited_instrs); + for (Instruction *new_instr : new_instrs) { + output.push_back(new_instr); + } + } + } + return output; +} + /** * Below is the main DiospyrosPass that activates the Rust lib.rs code, * which calls the Egg vectorizer and rewrites the optimized code in place. @@ -581,9 +617,51 @@ struct DiospyrosPass : public FunctionPass { bool has_changes = false; std::vector translated_exprs = {}; for (auto &B : F) { + // Emergency conditions + // Bail if instruction is not in list of handleable instructions + // TODO: need to identify not handleable instructions + bool has_excluded_instr = false; + for (auto &I : B) { + if (isa(I)) { + has_excluded_instr = true; + } + } + if (has_excluded_instr) { + continue; + } + + // Bail if instruction is used outside the current basic block + bool instr_used_twice = false; + for (auto &I : B) { + Instruction *original_instr = dyn_cast(&I); + for (auto &otherB : F) { + if (otherB.getName() != B.getName()) { + for (auto &otherI : otherB) { + Instruction *other_instr = + dyn_cast(&otherI); + int num_operands = other_instr->getNumOperands(); + for (int i = 0; i < num_operands; i++) { + Instruction *use = dyn_cast( + other_instr->getOperand(i)); + if (use != NULL) { + if (use == original_instr) { + instr_used_twice = true; + } + } + } + } + } + } + } + if (instr_used_twice) { + continue; + } + // We skip over basic blocks without floating point types bool has_float = false; for (auto &I : B) { + errs() << "All instructions\n"; + errs() << I << "\n"; if (I.getType()->isFloatTy()) { has_float = true; } @@ -598,7 +676,8 @@ struct DiospyrosPass : public FunctionPass { } continue; } - // We also skip over all basic blocks without stores + // We also skip over all basic blocks without stores or related + // memory operations bool has_store_or_mem_intrinsic = false; for (auto &I : B) { if (auto *op = dyn_cast(&I)) { @@ -649,6 +728,16 @@ struct DiospyrosPass : public FunctionPass { continue; } + // We grab all the block args: the phi nodes of the block + std::vector phi_instrs = {}; + for (auto &I : B) { + if (auto *op = dyn_cast(&I)) { + Instruction *phi = dyn_cast(&I); + assert(phi != NULL); + phi_instrs.push_back(phi); + } + } + // Grab the terminator from the LLVM Basic Block Instruction *terminator = B.getTerminator(); Instruction *cloned_terminator = terminator->clone(); @@ -743,20 +832,95 @@ struct DiospyrosPass : public FunctionPass { vectorization_accumulator.push_back(inner_vector); } + // acquire all instructions in a basic block + std::vector basic_block_instrs = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + basic_block_instrs.push_back(instr); + } + // Acquire each of the instructions in the "run" that terminates at // a store We will send these instructions to optimize. - int vec_length = vectorization_accumulator.size(); - int counter = 0; - // std::vector translated_exprs = {}; + // maintain list of all instructions processed thus far in basic + // block via DFS + std::vector dfs_bb_instrs = {}; for (auto &vec : vectorization_accumulator) { - ++counter; if (not vec.empty()) { + // check that a instruction is not used multiple times + // within a chunk + bool instr_used_twice_in_chunk = false; + for (auto *instr : vec) { + Instruction *first_instr = + dyn_cast(unwrap(instr)); + for (auto *other_instr : vec) { + Instruction *second_instr = + dyn_cast(unwrap(instr)); + if (first_instr != second_instr) { + int num_operands = + second_instr->getNumOperands(); + for (int i = 0; i < num_operands; i++) { + Instruction *use = dyn_cast( + second_instr->getOperand(i)); + if (use != NULL) { + if (use == first_instr) { + instr_used_twice_in_chunk = true; + } + } + } + } + } + } + if (instr_used_twice_in_chunk) { + continue; + } + + // check that an instruction is not used + // outside the chunk + bool instr_used_twice_outside_chunk = false; + for (auto &chunk : vectorization_accumulator) { + if (chunk != vec) { + for (auto *first_instr : vec) { + Instruction *first = + dyn_cast(unwrap(first_instr)); + for (auto *second_instr : chunk) { + Instruction *second = dyn_cast( + unwrap(second_instr)); + int num_operands = second->getNumOperands(); + for (int i = 0; i < num_operands; i++) { + Instruction *use = + dyn_cast( + second->getOperand(i)); + if (use != NULL) { + if (use == first) { + instr_used_twice_outside_chunk = + true; + } + } + } + } + } + } + } + if (instr_used_twice_outside_chunk) { + continue; + } + has_changes = has_changes || true; Value *last_store = unwrap(vec.back()); IRBuilder<> builder(dyn_cast(last_store)); Instruction *store_instr = dyn_cast(last_store); + // gather all instructions + // for (Instruction *bbinst : basic_block_instrs) { + // if (bbinst == store_instr) { + // errs() << "Match!\n"; + // errs() << *store_instr << "\n"; + // } + // } + // dfs_in_basic_block(store_instr, basic_block_instrs, + // dfs_bb_instrs); if (auto *op = dyn_cast(store_instr)) { assert(isa(store_instr)); builder.SetInsertPoint(store_instr); @@ -770,7 +934,6 @@ struct DiospyrosPass : public FunctionPass { int size = pair.llvm_pointer_size; LLVMPair const *expr_array = pair.llvm_pointer; - // translated_exprs = {}; for (int i = 0; i < size; i++) { translated_exprs.push_back(expr_array[i]); } @@ -802,6 +965,47 @@ struct DiospyrosPass : public FunctionPass { translated_exprs = new_translated_exprs; } } + + // // grab unprocessed instructions + // std::vector missed_instrs = {}; + // for (auto &I : B) { + // Instruction *instr = dyn_cast(&I); + // assert(instr != NULL); + // if (std::find(dfs_bb_instrs.begin(), dfs_bb_instrs.end(), + // instr) != dfs_bb_instrs.end()) { + // errs() << "Missed instrs\n"; + // errs() << *instr << "\n"; + // missed_instrs.push_back(instr); + // } + // } + + // // add in unprocessed phi instructions at front of basic block + // BasicBlock::InstListType &intermediate_instrs = B.getInstList(); + // for (Instruction *missed_instr : missed_instrs) { + // if (isa(missed_instr)) { + // intermediate_instrs.push_front(missed_instr); + // } + // } + + // // add in the "unprocessed" instructions that dfs on memory ops + // // missed + // for (Instruction *missed_instr : missed_instrs) { + // if (!isa(missed_instr)) { + // Instruction *cloned_instr = missed_instr->clone(); + // intermediate_instrs.push_back(cloned_instr); + // for (auto &U : missed_instr->uses()) { + // User *user = U.getUser(); // user of the add; could + // be + // // a store, for example + // user->setOperand(U.getOperandNo(), cloned_instr); + // } + // errs() << "Adding instruction\n"; + // errs() << *missed_instr << "\n"; + // } + // } + + // delete old instructions that are memory related; adce will handle + // rest std::reverse(bb_instrs.begin(), bb_instrs.end()); for (auto &I : bb_instrs) { if (I->isTerminator()) { @@ -823,6 +1027,8 @@ struct DiospyrosPass : public FunctionPass { I->eraseFromParent(); } } + + // add back the terminator BasicBlock::InstListType &final_instrs = B.getInstList(); final_instrs.push_back(cloned_terminator); diff --git a/src/dios-egraphs/Diospyros/err.txt b/src/dios-egraphs/Diospyros/err.txt new file mode 100644 index 00000000..a6533f78 --- /dev/null +++ b/src/dios-egraphs/Diospyros/err.txt @@ -0,0 +1,312 @@ +Match! + store float %17, float* %18, align 4 +Incoming instr + store float %17, float* %18, align 4 +Incoming arg + %17 = fmul float %16, 2.000000e+00 +Incoming instr + %17 = fmul float %16, 2.000000e+00 +Incoming arg + %16 = load float, float* %15, align 4 +Incoming instr + %16 = load float, float* %15, align 4 +Incoming arg + %15 = getelementptr inbounds float, float* %0, i64 %13 +Incoming instr + %15 = getelementptr inbounds float, float* %0, i64 %13 +Incoming arg + %13 = phi i64 [ %19, %.epil.preheader13 ], [ %11, %..preheader_crit_edge.unr-lcssa ] +Incoming instr + %13 = phi i64 [ %19, %.epil.preheader13 ], [ %11, %..preheader_crit_edge.unr-lcssa ] +Incoming arg + %19 = add nuw nsw i64 %13, 1 +Incoming instr + %19 = add nuw nsw i64 %13, 1 +Incoming arg + %18 = getelementptr inbounds float, float* %1, i64 %13 +Incoming instr + %18 = getelementptr inbounds float, float* %1, i64 %13 +Incoming arg + %13 = phi i64 [ %19, %.epil.preheader13 ], [ %11, %..preheader_crit_edge.unr-lcssa ] +Incoming instr + %13 = phi i64 [ %19, %.epil.preheader13 ], [ %11, %..preheader_crit_edge.unr-lcssa ] +Incoming arg + %19 = add nuw nsw i64 %13, 1 +Incoming instr + %19 = add nuw nsw i64 %13, 1 +Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop +Stopped after 4 iterations, reason: Some(Saturated) +Match! + store float %37, float* %38, align 4 +Incoming instr + store float %37, float* %38, align 4 +Incoming arg + %37 = fmul float %36, 2.000000e+00 +Incoming instr + %37 = fmul float %36, 2.000000e+00 +Incoming arg + %36 = load float, float* %35, align 4 +Incoming instr + %36 = load float, float* %35, align 4 +Incoming arg + %35 = getelementptr inbounds float, float* %0, i64 %33 +Incoming instr + %35 = getelementptr inbounds float, float* %0, i64 %33 +Incoming arg + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming instr + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming arg + %54 = add nuw nsw i64 %33, 4 +Incoming instr + %54 = add nuw nsw i64 %33, 4 +Incoming arg + %38 = getelementptr inbounds float, float* %1, i64 %33 +Incoming instr + %38 = getelementptr inbounds float, float* %1, i64 %33 +Incoming arg + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming instr + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming arg + %54 = add nuw nsw i64 %33, 4 +Incoming instr + %54 = add nuw nsw i64 %33, 4 +Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop +Stopped after 4 iterations, reason: Some(Saturated) +Match! + store float %42, float* %43, align 4 +Incoming instr + store float %42, float* %43, align 4 +Incoming arg + %42 = fmul float %41, 2.000000e+00 +Incoming instr + %42 = fmul float %41, 2.000000e+00 +Incoming arg + %41 = load float, float* %40, align 4 +Incoming instr + %41 = load float, float* %40, align 4 +Incoming arg + %40 = getelementptr inbounds float, float* %0, i64 %39 +Incoming instr + %40 = getelementptr inbounds float, float* %0, i64 %39 +Incoming arg + %39 = or i64 %33, 1 +Incoming instr + %39 = or i64 %33, 1 +Incoming arg + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming instr + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming arg + %54 = add nuw nsw i64 %33, 4 +Incoming instr + %54 = add nuw nsw i64 %33, 4 +Incoming arg + %43 = getelementptr inbounds float, float* %1, i64 %39 +Incoming instr + %43 = getelementptr inbounds float, float* %1, i64 %39 +Incoming arg + %39 = or i64 %33, 1 +Incoming instr + %39 = or i64 %33, 1 +Incoming arg + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming instr + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming arg + %54 = add nuw nsw i64 %33, 4 +Incoming instr + %54 = add nuw nsw i64 %33, 4 +Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop +Stopped after 4 iterations, reason: Some(Saturated) +Match! + store float %52, float* %53, align 4 +Incoming instr + store float %52, float* %53, align 4 +Incoming arg + %52 = fmul float %51, 2.000000e+00 +Incoming instr + %52 = fmul float %51, 2.000000e+00 +Incoming arg + %51 = load float, float* %50, align 4 +Incoming instr + %51 = load float, float* %50, align 4 +Incoming arg + %50 = getelementptr inbounds float, float* %0, i64 %49 +Incoming instr + %50 = getelementptr inbounds float, float* %0, i64 %49 +Incoming arg + %49 = or i64 %33, 3 +Incoming instr + %49 = or i64 %33, 3 +Incoming arg + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming instr + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming arg + %54 = add nuw nsw i64 %33, 4 +Incoming instr + %54 = add nuw nsw i64 %33, 4 +Incoming arg + %53 = getelementptr inbounds float, float* %1, i64 %49 +Incoming instr + %53 = getelementptr inbounds float, float* %1, i64 %49 +Incoming arg + %49 = or i64 %33, 3 +Incoming instr + %49 = or i64 %33, 3 +Incoming arg + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming instr + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming arg + %54 = add nuw nsw i64 %33, 4 +Incoming instr + %54 = add nuw nsw i64 %33, 4 +Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop +Stopped after 4 iterations, reason: Some(Saturated) +Match! + store float %4, float* %1, align 4 +Incoming instr + store float %4, float* %1, align 4 +Incoming arg + %4 = fmul float %3, 2.000000e+00 +Incoming instr + %4 = fmul float %3, 2.000000e+00 +Incoming arg + %3 = load float, float* %0, align 4 +Incoming instr + %3 = load float, float* %0, align 4 +Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop +Stopped after 4 iterations, reason: Some(Saturated) +Match! + store float %7, float* %8, align 4 +Incoming instr + store float %7, float* %8, align 4 +Incoming arg + %7 = fmul float %6, 2.000000e+00 +Incoming instr + %7 = fmul float %6, 2.000000e+00 +Incoming arg + %6 = load float, float* %5, align 4 +Incoming instr + %6 = load float, float* %5, align 4 +Incoming arg + %5 = getelementptr inbounds float, float* %0, i64 1 +Incoming instr + %5 = getelementptr inbounds float, float* %0, i64 1 +Incoming arg + %8 = getelementptr inbounds float, float* %1, i64 1 +Incoming instr + %8 = getelementptr inbounds float, float* %1, i64 1 +Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop +Stopped after 4 iterations, reason: Some(Saturated) +Match! + store float %11, float* %12, align 4 +Incoming instr + store float %11, float* %12, align 4 +Incoming arg + %11 = fmul float %10, 2.000000e+00 +Incoming instr + %11 = fmul float %10, 2.000000e+00 +Incoming arg + %10 = load float, float* %9, align 4 +Incoming instr + %10 = load float, float* %9, align 4 +Incoming arg + %9 = getelementptr inbounds float, float* %0, i64 2 +Incoming instr + %9 = getelementptr inbounds float, float* %0, i64 2 +Incoming arg + %12 = getelementptr inbounds float, float* %1, i64 2 +Incoming instr + %12 = getelementptr inbounds float, float* %1, i64 2 +Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop +Stopped after 4 iterations, reason: Some(Saturated) +Match! + store float %25, float* %28, align 4 +Incoming instr + store float %25, float* %28, align 4 +Incoming arg + %25 = fadd float %24, %19 +Incoming instr + %25 = fadd float %24, %19 +Incoming arg + %24 = fadd float %23, %15 +Incoming instr + %24 = fadd float %23, %15 +Incoming arg + %23 = fadd float %22, %11 +Incoming instr + %23 = fadd float %22, %11 +Incoming arg + %22 = fadd float %21, %7 +Incoming instr + %22 = fadd float %21, %7 +Incoming arg + %21 = fadd float %4, 0.000000e+00 +Incoming instr + %21 = fadd float %4, 0.000000e+00 +Incoming arg + %4 = fmul float %3, 2.000000e+00 +Incoming instr + %4 = fmul float %3, 2.000000e+00 +Incoming arg + %3 = load float, float* %0, align 4 +Incoming instr + %3 = load float, float* %0, align 4 +Incoming arg + %7 = fmul float %6, 2.000000e+00 +Incoming instr + %7 = fmul float %6, 2.000000e+00 +Incoming arg + %6 = load float, float* %5, align 4 +Incoming instr + %6 = load float, float* %5, align 4 +Incoming arg + %5 = getelementptr inbounds float, float* %0, i64 1 +Incoming instr + %5 = getelementptr inbounds float, float* %0, i64 1 +Incoming arg + %11 = fmul float %10, 2.000000e+00 +Incoming instr + %11 = fmul float %10, 2.000000e+00 +Incoming arg + %10 = load float, float* %9, align 4 +Incoming instr + %10 = load float, float* %9, align 4 +Incoming arg + %9 = getelementptr inbounds float, float* %0, i64 2 +Incoming instr + %9 = getelementptr inbounds float, float* %0, i64 2 +Incoming arg + %15 = fmul float %14, 2.000000e+00 +Incoming instr + %15 = fmul float %14, 2.000000e+00 +Incoming arg + %14 = load float, float* %13, align 4 +Incoming instr + %14 = load float, float* %13, align 4 +Incoming arg + %13 = getelementptr inbounds float, float* %0, i64 3 +Incoming instr + %13 = getelementptr inbounds float, float* %0, i64 3 +Incoming arg + %19 = fmul float %18, 2.000000e+00 +Incoming instr + %19 = fmul float %18, 2.000000e+00 +Incoming arg + %18 = load float, float* %17, align 4 +Incoming instr + %18 = load float, float* %17, align 4 +Incoming arg + %17 = getelementptr inbounds float, float* %0, i64 4 +Incoming instr + %17 = getelementptr inbounds float, float* %0, i64 4 +Incoming arg + %28 = getelementptr inbounds float, float* %2, i64 3 +Incoming instr + %28 = getelementptr inbounds float, float* %2, i64 3 +Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop +Stopped after 11 iterations, reason: Some(Saturated) diff --git a/src/dios-egraphs/Diospyros/err1.txt b/src/dios-egraphs/Diospyros/err1.txt new file mode 100644 index 00000000..a6533f78 --- /dev/null +++ b/src/dios-egraphs/Diospyros/err1.txt @@ -0,0 +1,312 @@ +Match! + store float %17, float* %18, align 4 +Incoming instr + store float %17, float* %18, align 4 +Incoming arg + %17 = fmul float %16, 2.000000e+00 +Incoming instr + %17 = fmul float %16, 2.000000e+00 +Incoming arg + %16 = load float, float* %15, align 4 +Incoming instr + %16 = load float, float* %15, align 4 +Incoming arg + %15 = getelementptr inbounds float, float* %0, i64 %13 +Incoming instr + %15 = getelementptr inbounds float, float* %0, i64 %13 +Incoming arg + %13 = phi i64 [ %19, %.epil.preheader13 ], [ %11, %..preheader_crit_edge.unr-lcssa ] +Incoming instr + %13 = phi i64 [ %19, %.epil.preheader13 ], [ %11, %..preheader_crit_edge.unr-lcssa ] +Incoming arg + %19 = add nuw nsw i64 %13, 1 +Incoming instr + %19 = add nuw nsw i64 %13, 1 +Incoming arg + %18 = getelementptr inbounds float, float* %1, i64 %13 +Incoming instr + %18 = getelementptr inbounds float, float* %1, i64 %13 +Incoming arg + %13 = phi i64 [ %19, %.epil.preheader13 ], [ %11, %..preheader_crit_edge.unr-lcssa ] +Incoming instr + %13 = phi i64 [ %19, %.epil.preheader13 ], [ %11, %..preheader_crit_edge.unr-lcssa ] +Incoming arg + %19 = add nuw nsw i64 %13, 1 +Incoming instr + %19 = add nuw nsw i64 %13, 1 +Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop +Stopped after 4 iterations, reason: Some(Saturated) +Match! + store float %37, float* %38, align 4 +Incoming instr + store float %37, float* %38, align 4 +Incoming arg + %37 = fmul float %36, 2.000000e+00 +Incoming instr + %37 = fmul float %36, 2.000000e+00 +Incoming arg + %36 = load float, float* %35, align 4 +Incoming instr + %36 = load float, float* %35, align 4 +Incoming arg + %35 = getelementptr inbounds float, float* %0, i64 %33 +Incoming instr + %35 = getelementptr inbounds float, float* %0, i64 %33 +Incoming arg + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming instr + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming arg + %54 = add nuw nsw i64 %33, 4 +Incoming instr + %54 = add nuw nsw i64 %33, 4 +Incoming arg + %38 = getelementptr inbounds float, float* %1, i64 %33 +Incoming instr + %38 = getelementptr inbounds float, float* %1, i64 %33 +Incoming arg + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming instr + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming arg + %54 = add nuw nsw i64 %33, 4 +Incoming instr + %54 = add nuw nsw i64 %33, 4 +Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop +Stopped after 4 iterations, reason: Some(Saturated) +Match! + store float %42, float* %43, align 4 +Incoming instr + store float %42, float* %43, align 4 +Incoming arg + %42 = fmul float %41, 2.000000e+00 +Incoming instr + %42 = fmul float %41, 2.000000e+00 +Incoming arg + %41 = load float, float* %40, align 4 +Incoming instr + %41 = load float, float* %40, align 4 +Incoming arg + %40 = getelementptr inbounds float, float* %0, i64 %39 +Incoming instr + %40 = getelementptr inbounds float, float* %0, i64 %39 +Incoming arg + %39 = or i64 %33, 1 +Incoming instr + %39 = or i64 %33, 1 +Incoming arg + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming instr + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming arg + %54 = add nuw nsw i64 %33, 4 +Incoming instr + %54 = add nuw nsw i64 %33, 4 +Incoming arg + %43 = getelementptr inbounds float, float* %1, i64 %39 +Incoming instr + %43 = getelementptr inbounds float, float* %1, i64 %39 +Incoming arg + %39 = or i64 %33, 1 +Incoming instr + %39 = or i64 %33, 1 +Incoming arg + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming instr + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming arg + %54 = add nuw nsw i64 %33, 4 +Incoming instr + %54 = add nuw nsw i64 %33, 4 +Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop +Stopped after 4 iterations, reason: Some(Saturated) +Match! + store float %52, float* %53, align 4 +Incoming instr + store float %52, float* %53, align 4 +Incoming arg + %52 = fmul float %51, 2.000000e+00 +Incoming instr + %52 = fmul float %51, 2.000000e+00 +Incoming arg + %51 = load float, float* %50, align 4 +Incoming instr + %51 = load float, float* %50, align 4 +Incoming arg + %50 = getelementptr inbounds float, float* %0, i64 %49 +Incoming instr + %50 = getelementptr inbounds float, float* %0, i64 %49 +Incoming arg + %49 = or i64 %33, 3 +Incoming instr + %49 = or i64 %33, 3 +Incoming arg + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming instr + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming arg + %54 = add nuw nsw i64 %33, 4 +Incoming instr + %54 = add nuw nsw i64 %33, 4 +Incoming arg + %53 = getelementptr inbounds float, float* %1, i64 %49 +Incoming instr + %53 = getelementptr inbounds float, float* %1, i64 %49 +Incoming arg + %49 = or i64 %33, 3 +Incoming instr + %49 = or i64 %33, 3 +Incoming arg + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming instr + %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] +Incoming arg + %54 = add nuw nsw i64 %33, 4 +Incoming instr + %54 = add nuw nsw i64 %33, 4 +Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop +Stopped after 4 iterations, reason: Some(Saturated) +Match! + store float %4, float* %1, align 4 +Incoming instr + store float %4, float* %1, align 4 +Incoming arg + %4 = fmul float %3, 2.000000e+00 +Incoming instr + %4 = fmul float %3, 2.000000e+00 +Incoming arg + %3 = load float, float* %0, align 4 +Incoming instr + %3 = load float, float* %0, align 4 +Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop +Stopped after 4 iterations, reason: Some(Saturated) +Match! + store float %7, float* %8, align 4 +Incoming instr + store float %7, float* %8, align 4 +Incoming arg + %7 = fmul float %6, 2.000000e+00 +Incoming instr + %7 = fmul float %6, 2.000000e+00 +Incoming arg + %6 = load float, float* %5, align 4 +Incoming instr + %6 = load float, float* %5, align 4 +Incoming arg + %5 = getelementptr inbounds float, float* %0, i64 1 +Incoming instr + %5 = getelementptr inbounds float, float* %0, i64 1 +Incoming arg + %8 = getelementptr inbounds float, float* %1, i64 1 +Incoming instr + %8 = getelementptr inbounds float, float* %1, i64 1 +Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop +Stopped after 4 iterations, reason: Some(Saturated) +Match! + store float %11, float* %12, align 4 +Incoming instr + store float %11, float* %12, align 4 +Incoming arg + %11 = fmul float %10, 2.000000e+00 +Incoming instr + %11 = fmul float %10, 2.000000e+00 +Incoming arg + %10 = load float, float* %9, align 4 +Incoming instr + %10 = load float, float* %9, align 4 +Incoming arg + %9 = getelementptr inbounds float, float* %0, i64 2 +Incoming instr + %9 = getelementptr inbounds float, float* %0, i64 2 +Incoming arg + %12 = getelementptr inbounds float, float* %1, i64 2 +Incoming instr + %12 = getelementptr inbounds float, float* %1, i64 2 +Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop +Stopped after 4 iterations, reason: Some(Saturated) +Match! + store float %25, float* %28, align 4 +Incoming instr + store float %25, float* %28, align 4 +Incoming arg + %25 = fadd float %24, %19 +Incoming instr + %25 = fadd float %24, %19 +Incoming arg + %24 = fadd float %23, %15 +Incoming instr + %24 = fadd float %23, %15 +Incoming arg + %23 = fadd float %22, %11 +Incoming instr + %23 = fadd float %22, %11 +Incoming arg + %22 = fadd float %21, %7 +Incoming instr + %22 = fadd float %21, %7 +Incoming arg + %21 = fadd float %4, 0.000000e+00 +Incoming instr + %21 = fadd float %4, 0.000000e+00 +Incoming arg + %4 = fmul float %3, 2.000000e+00 +Incoming instr + %4 = fmul float %3, 2.000000e+00 +Incoming arg + %3 = load float, float* %0, align 4 +Incoming instr + %3 = load float, float* %0, align 4 +Incoming arg + %7 = fmul float %6, 2.000000e+00 +Incoming instr + %7 = fmul float %6, 2.000000e+00 +Incoming arg + %6 = load float, float* %5, align 4 +Incoming instr + %6 = load float, float* %5, align 4 +Incoming arg + %5 = getelementptr inbounds float, float* %0, i64 1 +Incoming instr + %5 = getelementptr inbounds float, float* %0, i64 1 +Incoming arg + %11 = fmul float %10, 2.000000e+00 +Incoming instr + %11 = fmul float %10, 2.000000e+00 +Incoming arg + %10 = load float, float* %9, align 4 +Incoming instr + %10 = load float, float* %9, align 4 +Incoming arg + %9 = getelementptr inbounds float, float* %0, i64 2 +Incoming instr + %9 = getelementptr inbounds float, float* %0, i64 2 +Incoming arg + %15 = fmul float %14, 2.000000e+00 +Incoming instr + %15 = fmul float %14, 2.000000e+00 +Incoming arg + %14 = load float, float* %13, align 4 +Incoming instr + %14 = load float, float* %13, align 4 +Incoming arg + %13 = getelementptr inbounds float, float* %0, i64 3 +Incoming instr + %13 = getelementptr inbounds float, float* %0, i64 3 +Incoming arg + %19 = fmul float %18, 2.000000e+00 +Incoming instr + %19 = fmul float %18, 2.000000e+00 +Incoming arg + %18 = load float, float* %17, align 4 +Incoming instr + %18 = load float, float* %17, align 4 +Incoming arg + %17 = getelementptr inbounds float, float* %0, i64 4 +Incoming instr + %17 = getelementptr inbounds float, float* %0, i64 4 +Incoming arg + %28 = getelementptr inbounds float, float* %2, i64 3 +Incoming instr + %28 = getelementptr inbounds float, float* %2, i64 3 +Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop +Stopped after 11 iterations, reason: Some(Saturated) diff --git a/src/dios-egraphs/Diospyros/err2.txt b/src/dios-egraphs/Diospyros/err2.txt new file mode 100644 index 00000000..d81c92b7 --- /dev/null +++ b/src/dios-egraphs/Diospyros/err2.txt @@ -0,0 +1,3 @@ +opt: build/diospyros.ll:192:38: error: expected type + %11 = phi i64 [ 0, %.lr.ph11 ], [ , %32 ] + ^ diff --git a/src/dios-egraphs/Diospyros/inline-float.c b/src/dios-egraphs/Diospyros/inline-float.c new file mode 100644 index 00000000..2b0245d8 --- /dev/null +++ b/src/dios-egraphs/Diospyros/inline-float.c @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + return sum; +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = no_opt_test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +float test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + return sum; +} + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +int main() { + float A[SIZE] = {1.0f}; + float expectedA[SIZE] = {1.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = 1.0f; + expectedA[i] = 1.0f; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + B[i] = 0.0f; + expectedB[i] = 0.0f; + } + float C[SIZE] = {0.0f}; + float expectedC[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + C[i] = 0.0f; + expectedC[i] = 0.0f; + } + test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("C Output: %f\n", C[i]); + printf("Expected C Output: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/inline-float.c.orig b/src/dios-egraphs/Diospyros/inline-float.c.orig new file mode 100644 index 00000000..2b0245d8 --- /dev/null +++ b/src/dios-egraphs/Diospyros/inline-float.c.orig @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + return sum; +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = no_opt_test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +float test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + return sum; +} + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +int main() { + float A[SIZE] = {1.0f}; + float expectedA[SIZE] = {1.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = 1.0f; + expectedA[i] = 1.0f; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + B[i] = 0.0f; + expectedB[i] = 0.0f; + } + float C[SIZE] = {0.0f}; + float expectedC[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + C[i] = 0.0f; + expectedC[i] = 0.0f; + } + test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("C Output: %f\n", C[i]); + printf("Expected C Output: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/reduce.sh b/src/dios-egraphs/Diospyros/reduce.sh new file mode 100755 index 00000000..bb961006 --- /dev/null +++ b/src/dios-egraphs/Diospyros/reduce.sh @@ -0,0 +1,11 @@ +#!/bin/bash +/usr/local/opt/llvm/bin/clang -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll inline-float.c +opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll +opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll +/usr/local/opt/llvm/bin/clang -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.dylib -mllvm -opt -mllvm -print=false build/aa.ll -o build/diospyros.ll &> err1.txt +opt -S --adce --dse build/diospyros.ll -o build/dce.ll 2> err2.txt \ +&& $(CLANG) build/dce.ll -o build/final \ +&& build/final +output1=$(grep -c 'Error' err1.txt) +output2=$(grep -c 'Error' err2.txt) +echo $output1 || $output2 \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index a98fcb71..92e3a929 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -5,7 +5,7 @@ use libc::size_t; use llvm::{core::*, prelude::*, LLVMOpcode::*, LLVMRealPredicate}; use std::{ cmp, - collections::{BTreeMap, BTreeSet, HashMap}, + collections::{BTreeMap, BTreeSet}, ffi::CStr, mem, os::raw::c_char, @@ -49,7 +49,7 @@ extern "C" { // GEPMap : Maps the array name and array offset as symbols to the GEP // LLVM Value Ref that LLVM Generated type GEPMap = BTreeMap<(Symbol, Symbol), LLVMValueRef>; -type LLVMPairMap = HashMap; +type LLVMPairMap = BTreeMap; static mut ARG_IDX: i32 = 0; static mut CALL_IDX: i32 = 0; @@ -202,7 +202,7 @@ pub fn optimize( // llvm to egg let llvm_instrs = from_raw_parts(bb, size); let past_llvm_instrs = from_raw_parts(past_instrs, past_size); - let mut llvm_arg_pairs = HashMap::new(); + let mut llvm_arg_pairs = BTreeMap::new(); for instr_pair in past_llvm_instrs { let original_value = instr_pair.original_value; let new_value = instr_pair.new_value; @@ -268,7 +268,6 @@ pub fn optimize( // ------------ NEW CONVERSION FROM LLVM IR TO EGG EXPRESSIONS ------- type StoreMap = BTreeMap; -// type gep_map = BTreeMap; type IdMap = BTreeSet; type SymbolMap = BTreeMap; @@ -297,14 +296,31 @@ unsafe fn get_pow2(n: u32) -> u32 { return pow; } +fn is_pow2(n: u32) -> bool { + if n == 1 { + return true; + } else if n % 2 == 1 { + return false; + } + return is_pow2(n / 2); +} + /// New Pad Vector should round the number of elements up to a power of 2, and then recursive -/// divide each into the lane width. Assumes lane width is also a power of 2 in size +/// divide each into the lane width. Assumes lane width is also a power of 2 in size. +/// Raises assertion error if width is not a power of 2 +/// If the vector has less than the width, we do not pad, and just append that vector to enodevect unsafe fn balanced_pad_vector<'a>( binop_vec: &mut Vec, enode_vec: &'a mut Vec, ) -> &'a mut Vec { let width = config::vector_width(); + assert!(is_pow2(width as u32)); let length = binop_vec.len(); + // Check vector less than width, and then return + if length < width { + enode_vec.push(VecLang::Vec(binop_vec.clone().into_boxed_slice())); + return enode_vec; + } let closest_pow2 = get_pow2(cmp::max(length, width) as u32); let diff = closest_pow2 - (length as u32); for _ in 0..diff { @@ -316,6 +332,7 @@ unsafe fn balanced_pad_vector<'a>( return build_concat(width, binop_vec, enode_vec); } +/// Recursively concatenate vectors together unsafe fn build_concat<'a>( lane_width: usize, binop_vec: &mut Vec, @@ -884,7 +901,7 @@ unsafe fn sqrt32_to_egg( } unsafe fn sqrt64_to_egg( - expr: LLVMValueRef, + _expr: LLVMValueRef, _enode_vec: Vec, _next_idx: i32, _gep_map: &mut GEPMap, @@ -1202,6 +1219,8 @@ unsafe fn translate_egg( module: LLVMModuleRef, ) -> LLVMValueRef { let instr = match enode { + // VecLang::RegInfo(_) => panic!("RegInfo Currently Not Handled"), + VecLang::Reg(_) => panic!("Reg Currently Not Handled"), VecLang::Symbol(symbol) => match symbol_map.get(enode) { Some(llvm_instr) => llvm_recursive_add(builder, *llvm_instr, context, llvm_arg_pairs), None => { diff --git a/src/dios-egraphs/Diospyros/src/llvm_to_egg.rs b/src/dios-egraphs/Diospyros/src/llvm_to_egg.rs new file mode 100644 index 00000000..41cc5757 --- /dev/null +++ b/src/dios-egraphs/Diospyros/src/llvm_to_egg.rs @@ -0,0 +1,1213 @@ +extern crate llvm_sys as llvm; +use dioslib::{config, rules, veclang::VecLang}; +use egg::*; +use libc::size_t; +use llvm::{core::*, prelude::*, LLVMOpcode::*, LLVMRealPredicate}; +use std::{ + cmp, + collections::{BTreeMap, BTreeSet}, + ffi::CStr, + mem, + os::raw::c_char, + slice::from_raw_parts, +}; + +extern "C" { + fn llvm_index(val: LLVMValueRef, index: i32) -> i32; + fn llvm_name(val: LLVMValueRef) -> *const c_char; + fn isa_unop(val: LLVMValueRef) -> bool; + fn isa_bop(val: LLVMValueRef) -> bool; + fn isa_constant(val: LLVMValueRef) -> bool; + fn isa_constfp(val: LLVMValueRef) -> bool; + fn isa_gep(val: LLVMValueRef) -> bool; + fn isa_load(val: LLVMValueRef) -> bool; + fn isa_store(val: LLVMValueRef) -> bool; + fn isa_argument(val: LLVMValueRef) -> bool; + fn isa_call(val: LLVMValueRef) -> bool; + fn isa_fptrunc(val: LLVMValueRef) -> bool; + fn isa_fpext(val: LLVMValueRef) -> bool; + fn isa_alloca(val: LLVMValueRef) -> bool; + fn isa_phi(val: LLVMValueRef) -> bool; + fn _isa_sextint(val: LLVMValueRef) -> bool; + fn isa_sitofp(val: LLVMValueRef) -> bool; + fn isa_constaggregatezero(val: LLVMValueRef) -> bool; + fn _isa_constaggregate(val: LLVMValueRef) -> bool; + fn isa_integertype(val: LLVMValueRef) -> bool; + fn _isa_intptr(val: LLVMValueRef) -> bool; + fn isa_floatptr(val: LLVMValueRef) -> bool; + fn _isa_floattype(val: LLVMValueRef) -> bool; + fn isa_bitcast(val: LLVMValueRef) -> bool; + fn isa_sqrt32(val: LLVMValueRef) -> bool; + fn isa_sqrt64(val: LLVMValueRef) -> bool; + fn get_constant_float(val: LLVMValueRef) -> f32; + fn build_constant_float(n: f64, context: LLVMContextRef) -> LLVMValueRef; +} + +// Note: We use BTreeMaps to enforce ordering in the map +// Without ordering, tests become flaky and start failing a lot more often +// We do not use HashMaps for this reason as ordering is not enforced. +// GEPMap : Maps the array name and array offset as symbols to the GEP +// LLVM Value Ref that LLVM Generated +type GEPMap = BTreeMap<(Symbol, Symbol), LLVMValueRef>; +type LLVMPairMap = BTreeMap; + +static mut ARG_IDX: i32 = 0; +static mut CALL_IDX: i32 = 0; +static mut NODE_IDX: u32 = 0; + +unsafe fn gen_node_idx() -> u32 { + NODE_IDX += 1; + return NODE_IDX; +} + +unsafe fn gen_arg_name() -> String { + ARG_IDX += 1; + let string = "ARGUMENT".to_string(); + let result = format!("{}{}", string, ARG_IDX.to_string()); + result +} + +unsafe fn gen_call_name() -> String { + CALL_IDX += 1; + let string = "CALL".to_string(); + let result = format!("{}{}", string, CALL_IDX.to_string()); + result +} + +// Reference Comparison: https://www.reddit.com/r/rust/comments/2r3wjk/is_there_way_to_compare_objects_by_address_in_rust/ +// Compares whether addresses of LLVMValueRefs are the same. +// Not the contents of the Value Refs +fn cmp_val_ref_address(a1: &llvm::LLVMValue, a2: &llvm::LLVMValue) -> bool { + a1 as *const _ == a2 as *const _ +} + +fn _cmp_typ(a1: &LLVMTypeRef, a2: &LLVMTypeRef) -> bool { + a1 as *const _ == a2 as *const _ +} + +/// Converts LLVMValueRef binop to equivalent VecLang Binop node +unsafe fn choose_binop(bop: &LLVMValueRef, ids: [Id; 2]) -> VecLang { + match LLVMGetInstructionOpcode(*bop) { + LLVMFAdd => VecLang::Add(ids), + LLVMFMul => VecLang::Mul(ids), + LLVMFSub => VecLang::Minus(ids), + LLVMFDiv => VecLang::Div(ids), + _ => panic!("Choose_Binop: Opcode Match Error"), + } +} + +/// Translates VecLang binop expression node to the corresponding LLVMValueRef +unsafe fn translate_binop( + enode: &VecLang, + left: LLVMValueRef, + right: LLVMValueRef, + builder: LLVMBuilderRef, + name: *const c_char, +) -> LLVMValueRef { + match enode { + VecLang::VecAdd(_) | VecLang::Add(_) => LLVMBuildFAdd(builder, left, right, name), + VecLang::VecMul(_) | VecLang::Mul(_) => LLVMBuildFMul(builder, left, right, name), + VecLang::VecMinus(_) | VecLang::Minus(_) => LLVMBuildFSub(builder, left, right, name), + VecLang::VecDiv(_) | VecLang::Div(_) => LLVMBuildFDiv(builder, left, right, name), + // use binary bitwise operators for or / and + VecLang::Or(_) => LLVMBuildOr(builder, left, right, name), + VecLang::And(_) => LLVMBuildAnd(builder, left, right, name), + VecLang::Lt(_) => LLVMBuildFCmp(builder, LLVMRealPredicate::LLVMRealOLT, left, right, name), + _ => panic!("Not a vector or scalar binop."), + } +} + +/// Translates VecLang unop expression node to the corresponding LLVMValueRef +unsafe fn translate_unop( + enode: &VecLang, + n: LLVMValueRef, + builder: LLVMBuilderRef, + context: LLVMContextRef, + module: LLVMModuleRef, + name: *const c_char, +) -> LLVMValueRef { + match enode { + VecLang::Sgn(_) => { + let one = LLVMConstReal(LLVMFloatTypeInContext(context), 1 as f64); + let param_types = [ + LLVMFloatTypeInContext(context), + LLVMFloatTypeInContext(context), + ] + .as_mut_ptr(); + let fn_type = + LLVMFunctionType(LLVMFloatTypeInContext(context), param_types, 2, 0 as i32); + let func = + LLVMAddFunction(module, b"llvm.copysign.f32\0".as_ptr() as *const _, fn_type); + let args = [one, n].as_mut_ptr(); + LLVMBuildCall(builder, func, args, 2, name) + } + VecLang::Sqrt(_) => { + let param_types = [LLVMFloatTypeInContext(context)].as_mut_ptr(); + let fn_type = + LLVMFunctionType(LLVMFloatTypeInContext(context), param_types, 1, 0 as i32); + let func = LLVMAddFunction(module, b"llvm.sqrt.f32\0".as_ptr() as *const _, fn_type); + let args = [n].as_mut_ptr(); + LLVMBuildCall(builder, func, args, 1, name) + } + VecLang::Neg(_) => LLVMBuildFNeg(builder, n, name), + _ => panic!("Not a scalar unop."), + } +} + +/// translate_get converts a VecLang Get Node to the corresponding LLVM Ir array name and +/// LLVM IR offset, as symbols. +unsafe fn translate_get(get: &VecLang, enode_vec: &[VecLang]) -> (Symbol, Symbol) { + match get { + VecLang::Get([sym, i]) => { + match (&enode_vec[usize::from(*sym)], &enode_vec[usize::from(*i)]) { + (VecLang::Symbol(name), VecLang::Symbol(offset)) => { + return (*name, *offset); + } + _ => panic!("Match Error: Expects Pair of Symbol, Symbol."), + } + } + _ => panic!("Match Error in Translate Get: Expects Get Enode."), + } +} + +/// Main function to optimize: Takes in a basic block of instructions, +/// optimizes it, and then translates it to LLVM IR code, in place. + +#[repr(C)] +pub struct IntLLVMPair { + node_int: u32, + arg: LLVMValueRef, +} + +#[repr(C)] +pub struct LLVMPair { + original_value: LLVMValueRef, + new_value: LLVMValueRef, +} + +#[repr(C)] +pub struct VectorPointerSize { + llvm_pointer: *const LLVMPair, + llvm_pointer_size: size_t, +} + +#[no_mangle] +pub fn optimize( + module: LLVMModuleRef, + context: LLVMContextRef, + builder: LLVMBuilderRef, + bb: *const LLVMValueRef, + size: size_t, + past_instrs: *const LLVMPair, + past_size: size_t, + run_egg: bool, + print_opt: bool, +) -> VectorPointerSize { + unsafe { + // llvm to egg + let llvm_instrs = from_raw_parts(bb, size); + let past_llvm_instrs = from_raw_parts(past_instrs, past_size); + let mut llvm_arg_pairs = BTreeMap::new(); + for instr_pair in past_llvm_instrs { + let original_value = instr_pair.original_value; + let new_value = instr_pair.new_value; + // assert!(isa_load(original_value) || isa_alloca(original_value)); + // assert!(isa_load(new_value) || isa_alloca(new_value)); + llvm_arg_pairs.insert(original_value, new_value); + } + let mut node_to_arg = Vec::new(); + let (expr, gep_map, store_map, symbol_map) = + llvm_to_egg(llvm_instrs, &mut llvm_arg_pairs, &mut node_to_arg); + + // optimization pass + if print_opt { + eprintln!("{}", expr.pretty(10)); + } + let mut best = expr.clone(); + if run_egg { + let pair = rules::run(&expr, 180, true, !run_egg); + best = pair.1; + } + if print_opt { + eprintln!("{}", best.pretty(10)); + } + + // egg to llvm + egg_to_llvm( + best, + &gep_map, + &store_map, + &symbol_map, + &mut llvm_arg_pairs, // does this work properly?, IDK? Need to return mut value + &node_to_arg, + module, + context, + builder, + ); + + let mut final_llvm_arg_pairs = Vec::new(); + for (unchanged_val, new_val) in llvm_arg_pairs.iter() { + let pair = LLVMPair { + original_value: *unchanged_val, + new_value: *new_val, + }; + // assert!(isa_load(*unchanged_val) || isa_alloca(*unchanged_val)); + // assert!(isa_load(*new_val) || isa_alloca(*new_val)); + final_llvm_arg_pairs.push(pair); + } + + // https://stackoverflow.com/questions/39224904/how-to-expose-a-rust-vect-to-ffi + let mut llvm_arg_pairs_boxed_slice: Box<[LLVMPair]> = + final_llvm_arg_pairs.into_boxed_slice(); + let llvm_arg_pairs_array: *mut LLVMPair = llvm_arg_pairs_boxed_slice.as_mut_ptr(); + let llvm_arg_pairs_array_len: usize = llvm_arg_pairs_boxed_slice.len(); + mem::forget(llvm_arg_pairs_boxed_slice); + + // TODO: FIX THIS + return VectorPointerSize { + llvm_pointer: llvm_arg_pairs_array, + llvm_pointer_size: llvm_arg_pairs_array_len, + }; + } +} + +// ------------ NEW CONVERSION FROM LLVM IR TO EGG EXPRESSIONS ------- + +type StoreMap = BTreeMap; +type IdMap = BTreeSet; +type SymbolMap = BTreeMap; + +enum LLVMOpType { + Argument, + Constant, + Store, + Load, + Gep, + Unop, + Bop, + Call, + FPTrunc, + SIToFP, + Bitcast, + Sqrt32, + Sqrt64, + FPExt, +} + +unsafe fn get_pow2(n: u32) -> u32 { + let mut pow = 1; + while pow < n { + pow *= 2; + } + return pow; +} + +fn is_pow2(n: u32) -> bool { + if n == 1 { + return true; + } else if n % 2 == 1 { + return false; + } + return is_pow2(n / 2); +} + +/// New Pad Vector should round the number of elements up to a power of 2, and then recursive +/// divide each into the lane width. Assumes lane width is also a power of 2 in size. +/// Raises assertion error if width is not a power of 2 +/// If the vector has less than the width, we do not pad, and just append that vector to enodevect +unsafe fn balanced_pad_vector<'a>( + binop_vec: &mut Vec, + enode_vec: &'a mut Vec, +) -> &'a mut Vec { + let width = config::vector_width(); + assert!(is_pow2(width as u32)); + let length = binop_vec.len(); + // Check vector less than width, and then return + if length < width { + enode_vec.push(VecLang::Vec(binop_vec.clone().into_boxed_slice())); + return enode_vec; + } + let closest_pow2 = get_pow2(cmp::max(length, width) as u32); + let diff = closest_pow2 - (length as u32); + for _ in 0..diff { + let zero = VecLang::Num(0); + enode_vec.push(zero); + let zero_idx = enode_vec.len() - 1; + binop_vec.push(Id::from(zero_idx)); + } + return build_concat(width, binop_vec, enode_vec); +} + +/// Recursively concatenate vectors together +unsafe fn build_concat<'a>( + lane_width: usize, + binop_vec: &mut Vec, + enode_vec: &'a mut Vec, +) -> &'a mut Vec { + if binop_vec.len() == lane_width { + enode_vec.push(VecLang::Vec(binop_vec.clone().into_boxed_slice())); + return enode_vec; + } + let num_binops = binop_vec.len(); + let halfway = num_binops / 2; + let (mut left, mut right) = (Vec::new(), Vec::new()); + for (i, b) in binop_vec.iter().enumerate() { + if i < halfway { + left.push(*b); + } else { + right.push(*b); + } + } + assert_eq!(left.len(), right.len()); + assert_eq!(left.len() + right.len(), num_binops); + assert_eq!(left.len() % lane_width, 0); + assert_eq!(right.len() % lane_width, 0); + let enode_vec1 = build_concat(lane_width, &mut left, enode_vec); + let idx1 = enode_vec1.len() - 1; + let enode_vec2 = build_concat(lane_width, &mut right, enode_vec1); + let idx2 = enode_vec2.len() - 1; + enode_vec2.push(VecLang::Concat([Id::from(idx1), Id::from(idx2)])); + return enode_vec2; +} + +unsafe fn _llvm_print(inst: LLVMValueRef) -> () { + LLVMDumpValue(inst); + println!(); +} + +unsafe fn _llvm_recursive_print(inst: LLVMValueRef) -> () { + if isa_argument(inst) { + return LLVMDumpValue(inst); + } else if isa_constant(inst) { + return LLVMDumpValue(inst); + } + let num_ops = LLVMGetNumOperands(inst); + for i in 0..num_ops { + let operand = LLVMGetOperand(inst, i as u32); + _llvm_recursive_print(operand); + print!(" "); + } + println!(); + LLVMDumpValue(inst); + println!(); + return; +} + +unsafe fn llvm_recursive_add( + builder: LLVMBuilderRef, + inst: LLVMValueRef, + context: LLVMContextRef, + llvm_arg_pairs: &mut LLVMPairMap, +) -> LLVMValueRef { + let cloned_inst = LLVMInstructionClone(inst); + if isa_argument(inst) { + return inst; + } + let mut matched = false; + let mut ret_value = inst; + for (original_val, new_val) in (&*llvm_arg_pairs).iter() { + if cmp_val_ref_address(&**original_val, &*inst) { + matched = true; + ret_value = *new_val; + break; + } + } + if matched { + return ret_value; + } + if isa_constant(inst) { + return inst; + } else if isa_phi(inst) { + return inst; + } else if isa_alloca(inst) { + // We have this in the base case to stop reconstruction of allocas, + // because allocas are like loads, and should not get reconstructioned + // search the llvm_arg_pairs for allocas that were already created + let mut matched = false; + let mut ret_value = inst; + for (original_val, new_val) in (&*llvm_arg_pairs).iter() { + // let original_llvm = llvm_pair.original_value; + // let new_llvm = llvm_pair.new_value; + if cmp_val_ref_address(&**original_val, &*inst) { + matched = true; + ret_value = *new_val; + break; + } + } + if matched { + return ret_value; + } else { + // assert!(isa_load(inst) || isa_alloca(inst)); + // assert!(isa_load(cloned_inst) || isa_alloca(cloned_inst)); + llvm_arg_pairs.insert(inst, cloned_inst); + LLVMInsertIntoBuilder(builder, cloned_inst); + return cloned_inst; + } + } + let num_ops = LLVMGetNumOperands(inst); + for i in 0..num_ops { + let operand = LLVMGetOperand(inst, i as u32); + let new_operand = llvm_recursive_add(builder, operand, context, llvm_arg_pairs); + LLVMSetOperand(cloned_inst, i as u32, new_operand); + } + LLVMInsertIntoBuilder(builder, cloned_inst); + + let mut in_map = false; + for (original_inst, _) in (&*llvm_arg_pairs).iter() { + if cmp_val_ref_address(&**original_inst, &*inst) { + in_map = true; + } + } + if isa_load(inst) { + if !in_map { + // assert!(isa_load(inst) || isa_alloca(inst)); + // assert!(isa_load(cloned_inst) || isa_alloca(cloned_inst)); + llvm_arg_pairs.insert(inst, cloned_inst); + } + } + return cloned_inst; +} + +unsafe fn match_llvm_op(expr: &LLVMValueRef) -> LLVMOpType { + if isa_bop(*expr) { + return LLVMOpType::Bop; + } else if isa_unop(*expr) { + return LLVMOpType::Unop; + } else if isa_constant(*expr) { + return LLVMOpType::Constant; + } else if isa_gep(*expr) { + return LLVMOpType::Gep; + } else if isa_load(*expr) { + return LLVMOpType::Load; + } else if isa_store(*expr) { + return LLVMOpType::Store; + } else if isa_argument(*expr) { + return LLVMOpType::Argument; + } else if isa_call(*expr) { + return LLVMOpType::Call; + } else if isa_fptrunc(*expr) { + return LLVMOpType::FPTrunc; + } else if isa_sitofp(*expr) { + return LLVMOpType::SIToFP; + } else if isa_bitcast(*expr) { + return LLVMOpType::Bitcast; + } else if isa_sqrt32(*expr) { + return LLVMOpType::Sqrt32; + } else if isa_sqrt64(*expr) { + return LLVMOpType::Sqrt64; + } else if isa_fpext(*expr) { + return LLVMOpType::FPExt; + } else { + LLVMDumpValue(*expr); + println!(); + panic!("ref_to_egg: Unmatched case for LLVMValueRef {:?}", *expr); + } +} + +unsafe fn choose_unop(unop: &LLVMValueRef, id: Id) -> VecLang { + match LLVMGetInstructionOpcode(*unop) { + LLVMFNeg => VecLang::Neg([id]), + _ => panic!("Choose_Unop: Opcode Match Error"), + } +} + +unsafe fn arg_to_egg( + expr: LLVMValueRef, + mut enode_vec: Vec, + next_idx: i32, + _gep_map: &mut GEPMap, + _store_map: &mut StoreMap, + _id_map: &mut IdMap, + symbol_map: &mut SymbolMap, + _llvm_arg_pairs: &LLVMPairMap, + _node_to_arg: &mut Vec, +) -> (Vec, i32) { + let sym_name = gen_arg_name(); + let symbol = VecLang::Symbol(Symbol::from(sym_name)); + symbol_map.insert(symbol.clone(), expr); + enode_vec.push(symbol); + return (enode_vec, next_idx + 1); +} + +unsafe fn bop_to_egg( + expr: LLVMValueRef, + enode_vec: Vec, + next_idx: i32, + gep_map: &mut GEPMap, + store_map: &mut StoreMap, + id_map: &mut IdMap, + symbol_map: &mut SymbolMap, + llvm_arg_pairs: &LLVMPairMap, + node_to_arg: &mut Vec, +) -> (Vec, i32) { + let left = LLVMGetOperand(expr, 0); + let right = LLVMGetOperand(expr, 1); + let (v1, next_idx1) = ref_to_egg( + left, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ); + let (mut v2, next_idx2) = ref_to_egg( + right, + v1, + next_idx1, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ); + let ids = [ + Id::from((next_idx1 - 1) as usize), + Id::from((next_idx2 - 1) as usize), + ]; + v2.push(choose_binop(&expr, ids)); + (v2, next_idx2 + 1) +} + +unsafe fn unop_to_egg( + expr: LLVMValueRef, + enode_vec: Vec, + next_idx: i32, + gep_map: &mut GEPMap, + store_map: &mut StoreMap, + id_map: &mut IdMap, + symbol_map: &mut SymbolMap, + llvm_arg_pairs: &LLVMPairMap, + node_to_arg: &mut Vec, +) -> (Vec, i32) { + let sub_expr = LLVMGetOperand(expr, 0); + let (mut v, next_idx1) = ref_to_egg( + sub_expr, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ); + let id = Id::from((next_idx1 - 1) as usize); + v.push(choose_unop(&expr, id)); + (v, next_idx1 + 1) +} + +unsafe fn gep_to_egg( + expr: LLVMValueRef, + mut enode_vec: Vec, + next_idx: i32, + gep_map: &mut GEPMap, + _store_map: &mut StoreMap, + _id_map: &mut IdMap, + _symbol_map: &mut SymbolMap, + _llvm_arg_pairs: &LLVMPairMap, + _node_to_arg: &mut Vec, +) -> (Vec, i32) { + // // assert!(isa_argument(expr) || isa_gep(expr) || isa_load(expr)); + // let mut enode_vec = Vec::new(); + let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); + enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); + + let num_gep_operands = LLVMGetNumOperands(expr); + let mut indices = Vec::new(); + for operand_idx in 1..num_gep_operands { + let array_offset = llvm_index(expr, operand_idx); + indices.push(array_offset); + } + let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); + let offsets_symbol = Symbol::from(&offsets_string); + enode_vec.push(VecLang::Symbol(offsets_symbol)); + + let get_node = VecLang::Get([ + Id::from((next_idx) as usize), + Id::from((next_idx + 1) as usize), + ]); + (*gep_map).insert( + (Symbol::from(array_name), Symbol::from(&offsets_string)), + expr, + ); + enode_vec.push(get_node); + + return (enode_vec, next_idx + 3); +} + +unsafe fn _address_to_egg( + expr: LLVMValueRef, + mut enode_vec: Vec, + next_idx: i32, + gep_map: &mut GEPMap, + _store_map: &mut StoreMap, + _id_map: &mut IdMap, + _symbol_map: &mut SymbolMap, + _llvm_arg_pairs: &LLVMPairMap, + _node_to_arg: &mut Vec, +) -> (Vec, i32) { + let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); + enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); + + let num_gep_operands = LLVMGetNumOperands(expr); + let mut indices = Vec::new(); + for operand_idx in 1..num_gep_operands { + let array_offset = llvm_index(expr, operand_idx); + indices.push(array_offset); + } + let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); + let offsets_symbol = Symbol::from(&offsets_string); + enode_vec.push(VecLang::Symbol(offsets_symbol)); + + let get_node = VecLang::Get([ + Id::from((next_idx) as usize), + Id::from((next_idx + 1) as usize), + ]); + (*gep_map).insert( + (Symbol::from(array_name), Symbol::from(&offsets_string)), + expr, + ); + enode_vec.push(get_node); + + return (enode_vec, next_idx + 3); +} + +unsafe fn sitofp_to_egg( + expr: LLVMValueRef, + mut enode_vec: Vec, + next_idx: i32, + gep_map: &mut GEPMap, + _store_map: &mut StoreMap, + _id_map: &mut IdMap, + _symbol_map: &mut SymbolMap, + _llvm_arg_pairs: &LLVMPairMap, + _node_to_arg: &mut Vec, +) -> (Vec, i32) { + let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); + enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); + + let num_gep_operands = LLVMGetNumOperands(expr); + let mut indices = Vec::new(); + for operand_idx in 1..num_gep_operands { + let array_offset = llvm_index(expr, operand_idx); + indices.push(array_offset); + } + let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); + let offsets_symbol = Symbol::from(&offsets_string); + enode_vec.push(VecLang::Symbol(offsets_symbol)); + + let get_node = VecLang::Get([ + Id::from((next_idx) as usize), + Id::from((next_idx + 1) as usize), + ]); + (*gep_map).insert( + (Symbol::from(array_name), Symbol::from(&offsets_string)), + expr, + ); + enode_vec.push(get_node); + + return (enode_vec, next_idx + 3); +} + +unsafe fn load_to_egg( + expr: LLVMValueRef, + enode_vec: Vec, + next_idx: i32, + gep_map: &mut GEPMap, + store_map: &mut StoreMap, + id_map: &mut IdMap, + symbol_map: &mut SymbolMap, + llvm_arg_pairs: &LLVMPairMap, + node_to_arg: &mut Vec, +) -> (Vec, i32) { + return gep_to_egg( + expr, // we pass the entire instruction and not just the address + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ); +} + +unsafe fn store_to_egg( + expr: LLVMValueRef, + enode_vec: Vec, + next_idx: i32, + gep_map: &mut GEPMap, + store_map: &mut StoreMap, + id_map: &mut IdMap, + symbol_map: &mut SymbolMap, + llvm_arg_pairs: &LLVMPairMap, + node_to_arg: &mut Vec, +) -> (Vec, i32) { + let data = LLVMGetOperand(expr, 0); + let addr = LLVMGetOperand(expr, 1); // expected to be a gep operator or addr in LLVM + let (vec, next_idx1) = ref_to_egg( + data, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ); + (*store_map).insert(next_idx1 - 1, addr); + (*id_map).insert(Id::from((next_idx1 - 1) as usize)); + return (vec, next_idx1); +} + +unsafe fn const_to_egg( + expr: LLVMValueRef, + mut enode_vec: Vec, + next_idx: i32, + _gep_map: &mut GEPMap, + _store_map: &mut StoreMap, + _id_map: &mut IdMap, + _symbol_map: &mut SymbolMap, + _llvm_arg_pairs: &LLVMPairMap, + _node_to_arg: &mut Vec, +) -> (Vec, i32) { + let value = get_constant_float(expr); + enode_vec.push(VecLang::Num(value as i32)); + (enode_vec, next_idx + 1) +} + +unsafe fn _load_arg_to_egg( + expr: LLVMValueRef, + mut enode_vec: Vec, + next_idx: i32, + gep_map: &mut GEPMap, + _store_map: &mut StoreMap, + _id_map: &mut IdMap, + _symbol_map: &mut SymbolMap, + _llvm_arg_pairs: &LLVMPairMap, + _node_to_arg: &mut Vec, +) -> (Vec, i32) { + // assert!(isa_argument(expr) || isa_gep(expr)); + let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); + enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); + + let num_gep_operands = LLVMGetNumOperands(expr); + let mut indices = Vec::new(); + for operand_idx in 1..num_gep_operands { + let array_offset = llvm_index(expr, operand_idx); + indices.push(array_offset); + } + let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); + let offsets_symbol = Symbol::from(&offsets_string); + enode_vec.push(VecLang::Symbol(offsets_symbol)); + + let get_node = VecLang::Get([ + Id::from((next_idx) as usize), + Id::from((next_idx + 1) as usize), + ]); + (*gep_map).insert( + (Symbol::from(array_name), Symbol::from(&offsets_string)), + expr, + ); + enode_vec.push(get_node); + + return (enode_vec, next_idx + 3); +} + +unsafe fn load_call_to_egg( + expr: LLVMValueRef, + mut enode_vec: Vec, + next_idx: i32, + gep_map: &mut GEPMap, + store_map: &mut StoreMap, + id_map: &mut IdMap, + symbol_map: &mut SymbolMap, + llvm_arg_pairs: &LLVMPairMap, + node_to_arg: &mut Vec, +) -> (Vec, i32) { + if isa_sqrt32(expr) { + return sqrt32_to_egg( + expr, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ); + } + let call_sym_name = gen_call_name(); + let call_sym = VecLang::Symbol(Symbol::from(call_sym_name)); + symbol_map.insert(call_sym.clone(), expr); + enode_vec.push(call_sym); + return (enode_vec, next_idx + 1); +} + +unsafe fn fpext_to_egg( + expr: LLVMValueRef, + enode_vec: Vec, + next_idx: i32, + gep_map: &mut GEPMap, + store_map: &mut StoreMap, + id_map: &mut IdMap, + symbol_map: &mut SymbolMap, + llvm_arg_pairs: &LLVMPairMap, + node_to_arg: &mut Vec, +) -> (Vec, i32) { + // assert!(isa_fpext(expr)); + let operand = LLVMGetOperand(expr, 0); + return ref_to_egg( + operand, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ); +} + +unsafe fn sqrt32_to_egg( + expr: LLVMValueRef, + enode_vec: Vec, + next_idx: i32, + gep_map: &mut GEPMap, + store_map: &mut StoreMap, + id_map: &mut IdMap, + symbol_map: &mut SymbolMap, + llvm_arg_pairs: &LLVMPairMap, + node_to_arg: &mut Vec, +) -> (Vec, i32) { + // assert!(isa_sqrt32(expr)); + let operand = LLVMGetOperand(expr, 0); + let (mut new_enode_vec, next_idx1) = ref_to_egg( + operand, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ); + let sqrt_node = VecLang::Sqrt([Id::from((next_idx1 - 1) as usize)]); + new_enode_vec.push(sqrt_node); + return (new_enode_vec, next_idx1 + 1); +} + +unsafe fn sqrt64_to_egg( + _expr: LLVMValueRef, + _enode_vec: Vec, + _next_idx: i32, + _gep_map: &mut GEPMap, + _store_map: &mut StoreMap, + _id_map: &mut IdMap, + _symbol_map: &mut SymbolMap, + _llvm_arg_pairs: &LLVMPairMap, + _node_to_arg: &mut Vec, +) -> (Vec, i32) { + // assert!(isa_sqrt64(expr)); + panic!("Currently, we do not handle calls to sqrt.f64 without fpext and fptrunc before and after!. This is the only 'context sensitive' instance in the dispatch matching. ") +} + +unsafe fn fptrunc_to_egg( + expr: LLVMValueRef, + enode_vec: Vec, + next_idx: i32, + gep_map: &mut GEPMap, + store_map: &mut StoreMap, + id_map: &mut IdMap, + symbol_map: &mut SymbolMap, + llvm_arg_pairs: &LLVMPairMap, + node_to_arg: &mut Vec, +) -> (Vec, i32) { + // assert!(isa_fptrunc(expr)); + let operand = LLVMGetOperand(expr, 0); + if isa_sqrt64(operand) { + return sqrt64_to_egg( + operand, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ); + } + return ref_to_egg( + operand, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ); +} + +unsafe fn bitcast_to_egg( + expr: LLVMValueRef, + enode_vec: Vec, + next_idx: i32, + gep_map: &mut GEPMap, + store_map: &mut StoreMap, + id_map: &mut IdMap, + symbol_map: &mut SymbolMap, + llvm_arg_pairs: &LLVMPairMap, + node_to_arg: &mut Vec, +) -> (Vec, i32) { + // assert!(isa_bitcast(expr)); + let operand = LLVMGetOperand(expr, 0); + let result = ref_to_egg( + operand, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ); + return result; +} + +unsafe fn ref_to_egg( + expr: LLVMValueRef, + mut enode_vec: Vec, + next_idx: i32, + gep_map: &mut GEPMap, + store_map: &mut StoreMap, + id_map: &mut IdMap, + symbol_map: &mut SymbolMap, + llvm_arg_pairs: &LLVMPairMap, + node_to_arg: &mut Vec, +) -> (Vec, i32) { + for (original_val, _) in llvm_arg_pairs.iter() { + if cmp_val_ref_address(&**original_val, &*expr) { + // Here we create a new numbered variable node + let var_idx = gen_node_idx(); + let var_idx_str = var_idx.to_string(); + let special_var_node = VecLang::Symbol(Symbol::from(var_idx_str)); + enode_vec.push(special_var_node); + let node_to_arg_pair = IntLLVMPair { + arg: expr, + node_int: var_idx, + }; + node_to_arg.push(node_to_arg_pair); + return (enode_vec, next_idx + 1); + } + } + let (vec, next_idx) = match match_llvm_op(&expr) { + LLVMOpType::Bop => bop_to_egg( + expr, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ), + LLVMOpType::Unop => unop_to_egg( + expr, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ), + LLVMOpType::Constant => const_to_egg( + expr, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ), + LLVMOpType::Gep => gep_to_egg( + expr, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ), + LLVMOpType::Load => load_to_egg( + expr, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ), + LLVMOpType::Store => store_to_egg( + expr, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ), + LLVMOpType::Argument => arg_to_egg( + expr, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ), + LLVMOpType::Call => load_call_to_egg( + expr, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ), + LLVMOpType::FPTrunc => fptrunc_to_egg( + expr, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ), + LLVMOpType::FPExt => fpext_to_egg( + expr, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ), + LLVMOpType::SIToFP => sitofp_to_egg( + expr, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ), + LLVMOpType::Bitcast => bitcast_to_egg( + expr, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ), + LLVMOpType::Sqrt32 => sqrt32_to_egg( + expr, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ), + LLVMOpType::Sqrt64 => sqrt64_to_egg( + expr, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ), + }; + return (vec, next_idx); +} + +unsafe fn llvm_to_egg<'a>( + bb_vec: &[LLVMValueRef], + llvm_arg_pairs: &mut LLVMPairMap, + node_to_arg: &mut Vec, +) -> (RecExpr, GEPMap, StoreMap, SymbolMap) { + let mut enode_vec = Vec::new(); + let (mut gep_map, mut store_map, mut id_map, mut symbol_map) = ( + BTreeMap::new(), + BTreeMap::new(), + BTreeSet::new(), + BTreeMap::new(), + ); + let mut next_idx = 0; + for bop in bb_vec.iter() { + if isa_store(*bop) { + let (new_enode_vec, next_idx1) = ref_to_egg( + *bop, + enode_vec, + next_idx, + &mut gep_map, + &mut store_map, + &mut id_map, + &mut symbol_map, + llvm_arg_pairs, + node_to_arg, + ); + next_idx = next_idx1; + enode_vec = new_enode_vec; + } + } + let mut final_vec = Vec::new(); + for id in id_map.iter() { + final_vec.push(*id); + } + balanced_pad_vector(&mut final_vec, &mut enode_vec); + + let rec_expr = RecExpr::from(enode_vec); + (rec_expr, gep_map, store_map, symbol_map) +} diff --git a/src/dios-egraphs/Diospyros/test-runner.sh b/src/dios-egraphs/Diospyros/test-runner.sh index 4a84ae9c..6897dcaa 100644 --- a/src/dios-egraphs/Diospyros/test-runner.sh +++ b/src/dios-egraphs/Diospyros/test-runner.sh @@ -1,4 +1,4 @@ -cd .. +cd .. 1> /dev/null case $1 in run-opt) make run-opt test=$2 @@ -13,4 +13,4 @@ case $1 in echo "match failure" ;; esac -cd - \ No newline at end of file +cd - 1> /dev/null \ No newline at end of file From 6d2f4d6734d0f3f89a2b6094569abe96fb29e03f Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Mon, 25 Apr 2022 23:08:21 -0400 Subject: [PATCH 070/143] rewrite mutually recursive framework --- src/dios-egraphs/Diospyros/src/lib.rs | 349 +++++++++++++++++- src/dios-egraphs/Diospyros/src/llvm_to_egg.rs | 45 +++ 2 files changed, 386 insertions(+), 8 deletions(-) diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 92e3a929..51e0c082 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -279,13 +279,14 @@ enum LLVMOpType { Gep, Unop, Bop, - Call, - FPTrunc, - SIToFP, - Bitcast, + // Call, + // FPTrunc, + // SIToFP, + // Bitcast, Sqrt32, Sqrt64, - FPExt, + // FPExt, + UnhandledLLVMOpCode, } unsafe fn get_pow2(n: u32) -> u32 { @@ -492,9 +493,10 @@ unsafe fn match_llvm_op(expr: &LLVMValueRef) -> LLVMOpType { } else if isa_fpext(*expr) { return LLVMOpType::FPExt; } else { - LLVMDumpValue(*expr); - println!(); - panic!("ref_to_egg: Unmatched case for LLVMValueRef {:?}", *expr); + return LLVMOpType::UnhandledLLVMOpCode; + // LLVMDumpValue(*expr); + // println!(); + // panic!("ref_to_egg: Unmatched case for LLVMValueRef {:?}", *expr); } } @@ -1206,6 +1208,337 @@ unsafe fn llvm_to_egg<'a>( (rec_expr, gep_map, store_map, symbol_map) } +// ---- Construction Zone ------- + +struct LLVM2EggState<'a> { + llvm2egg: &'a BTreeMap, +} + +unsafe fn new___arg_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: i32, + mut translation_metadata: LLVM2EggState, +) -> (Vec, i32) { + let sym_name = gen_arg_name(); + let symbol = VecLang::Symbol(Symbol::from(sym_name)); + symbol_map.insert(symbol.clone(), expr); + enode_vec.push(symbol); + return (enode_vec, next_idx + 1); +} + +unsafe fn new___bop_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: i32, + mut translation_metadata: LLVM2EggState, +) -> (Vec, i32) { + let left = LLVMGetOperand(expr, 0); + let right = LLVMGetOperand(expr, 1); + let (v1, next_idx1) = ref_to_egg( + left, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ); + let (mut v2, next_idx2) = ref_to_egg( + right, + v1, + next_idx1, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ); + let ids = [ + Id::from((next_idx1 - 1) as usize), + Id::from((next_idx2 - 1) as usize), + ]; + v2.push(choose_binop(&expr, ids)); + (v2, next_idx2 + 1) +} + +unsafe fn new___unop_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: i32, + mut translation_metadata: LLVM2EggState, +) -> (Vec, i32) { + let sub_expr = LLVMGetOperand(expr, 0); + let (mut v, next_idx1) = ref_to_egg( + sub_expr, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ); + let id = Id::from((next_idx1 - 1) as usize); + v.push(choose_unop(&expr, id)); + (v, next_idx1 + 1) +} + +unsafe fn new___gep_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: i32, + mut translation_metadata: LLVM2EggState, +) -> (Vec, i32) { + // // assert!(isa_argument(expr) || isa_gep(expr) || isa_load(expr)); + // let mut enode_vec = Vec::new(); + let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); + enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); + + let num_gep_operands = LLVMGetNumOperands(expr); + let mut indices = Vec::new(); + for operand_idx in 1..num_gep_operands { + let array_offset = llvm_index(expr, operand_idx); + indices.push(array_offset); + } + let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); + let offsets_symbol = Symbol::from(&offsets_string); + enode_vec.push(VecLang::Symbol(offsets_symbol)); + + let get_node = VecLang::Get([ + Id::from((next_idx) as usize), + Id::from((next_idx + 1) as usize), + ]); + (*gep_map).insert( + (Symbol::from(array_name), Symbol::from(&offsets_string)), + expr, + ); + enode_vec.push(get_node); + + return (enode_vec, next_idx + 3); +} + +unsafe fn new___sitofp_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: i32, + mut translation_metadata: LLVM2EggState, +) -> (Vec, i32) { + panic!("Unhandled: TODO: Eliminate sitofp enum to force translation to a register."); +} + +unsafe fn new___load_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: i32, + mut translation_metadata: LLVM2EggState, +) -> (Vec, i32) { + return gep_to_egg( + expr, // we pass the entire instruction and not just the address + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ); +} + +unsafe fn new___store_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: i32, + mut translation_metadata: LLVM2EggState, +) -> (Vec, i32) { + let data = LLVMGetOperand(expr, 0); + let addr = LLVMGetOperand(expr, 1); // expected to be a gep operator or addr in LLVM + let (vec, next_idx1) = ref_to_egg( + data, + enode_vec, + next_idx, + gep_map, + store_map, + id_map, + symbol_map, + llvm_arg_pairs, + node_to_arg, + ); + (*store_map).insert(next_idx1 - 1, addr); + (*id_map).insert(Id::from((next_idx1 - 1) as usize)); + return (vec, next_idx1); +} + +unsafe fn new___const_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: i32, + mut translation_metadata: LLVM2EggState, +) -> (Vec, i32) { + let value = get_constant_float(expr); + enode_vec.push(VecLang::Num(value as i32)); + (enode_vec, next_idx + 1) +} + +unsafe fn new___call_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: i32, + mut translation_metadata: LLVM2EggState, +) -> (Vec, i32) { + panic!("Unhandled: TODO: Eliminate call enum to force translation to a register."); +} + +unsafe fn new___fpext_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: i32, + mut translation_metadata: LLVM2EggState, +) -> (Vec, i32) { + panic!("Unhandled: TODO: Eliminate fpext enum to force translation to a register."); +} + +unsafe fn new___sqrt32_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: i32, + mut translation_metadata: LLVM2EggState, +) -> (Vec, i32) { + // assert!(isa_sqrt32(expr)); + let operand = LLVMGetOperand(expr, 0); + let (mut new_enode_vec, next_idx1) = + new___ref_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); + let sqrt_node = VecLang::Sqrt([Id::from((next_idx1 - 1) as usize)]); + new_enode_vec.push(sqrt_node); + return (new_enode_vec, next_idx1 + 1); +} + +/// TODO: Fix Immediately: Remove Context Sensitive Matching!!! +unsafe fn new___sqrt64_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: i32, + mut translation_metadata: LLVM2EggState, +) -> (Vec, i32) { + panic!("Currently, we do not handle calls to sqrt.f64 without fpext and fptrunc before and after!. This is the only 'context sensitive' instance in the dispatch matching. ") +} + +unsafe fn new___fptrunc_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: i32, + mut translation_metadata: LLVM2EggState, +) -> (Vec, i32) { + panic!("Unhandled: TODO: Eliminate fptrunc enum to force translation to a register."); +} + +unsafe fn new___bitcast_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: i32, + mut translation_metadata: LLVM2EggState, +) -> (Vec, i32) { + panic!("Unhandled: TODO: Eliminate bitcast enum to force translation to a register."); +} + +/// Translates an Unhandled OpCode to an Egg Register. +/// +/// This represents a blackbox computation that we bail on translating +/// Assumes that the OpCode is actually a computation. If not, translation fails. +unsafe fn new___unhandled_opcode_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: i32, + mut translation_metadata: LLVM2EggState, +) -> (Vec, i32) { + panic!("Unimplemented: TODO Translate to a register."); +} + +/// Recursively Translate LLVM Instruction to Egg Nodes. +/// +/// TODO: Take care of chunk boundaries: translation should never overreach a chunk +unsafe fn new___ref_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: i32, + mut translation_metadata: LLVM2EggState, +) -> (Vec, i32) { + // If, on a different pass, the instruction was translated already, then + // just used the egg node representing the translation + if translation_metadata.llvm2egg.contains_key(&llvm_instr) { + let translated_egg_node = translation_metadata + .llvm2egg + .get(&llvm_instr) + .expect("Key must exist"); + egg_nodes.push(*translated_egg_node); + return (egg_nodes, next_node_idx + 1); + } + // Recurse Backwards on the current instruction, translating its children, + // based on the opcode of the parent. + return match match_llvm_op(&llvm_instr) { + LLVMOpType::Bop => new___bop_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata), + LLVMOpType::Unop => { + new___unop_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + LLVMOpType::Constant => { + new___const_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + LLVMOpType::Gep => new___gep_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata), + LLVMOpType::Load => { + new___load_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + LLVMOpType::Store => { + new___store_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + LLVMOpType::Argument => { + new___arg_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + LLVMOpType::Sqrt32 => { + new___sqrt32_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + LLVMOpType::Sqrt64 => { + new___sqrt64_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + LLVMOpType::UnhandledLLVMOpCode => { + new___unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + }; +} + +unsafe fn new_llvm_to_egg( + llvm_instrs_in_chunk: &[LLVMValueRef], + // TODO: feed this in as an argument llvm_instr2egg_node: BTreeMap, +) -> RecExpr { + let mut egg_nodes: Vec = Vec::new(); + + // Map from (translated / opaque) llvm instructions to egg graph nodes + let llvm_instr2egg_node: BTreeMap = BTreeMap::new(); + + // State Variable To Hold Maps During Translation + let mut llvm2egg_state = LLVM2EggState { + llvm2egg: &llvm_instr2egg_node, + }; + + // Index of next node to translate + let mut next_node_idx: i32 = 0; + + // for each store, iterate backwards from that store and translate to egg + for llvm_instr in llvm_instrs_in_chunk.iter() { + if isa_store(*llvm_instr) { + new___ref_to_egg() + } + } + + panic!("Unimplemented"); +} + +// ---- Construction Zone ------- + unsafe fn translate_egg( enode: &VecLang, vec: &[VecLang], diff --git a/src/dios-egraphs/Diospyros/src/llvm_to_egg.rs b/src/dios-egraphs/Diospyros/src/llvm_to_egg.rs index 41cc5757..c088c1a3 100644 --- a/src/dios-egraphs/Diospyros/src/llvm_to_egg.rs +++ b/src/dios-egraphs/Diospyros/src/llvm_to_egg.rs @@ -1172,6 +1172,51 @@ unsafe fn ref_to_egg( return (vec, next_idx); } +struct LLVM2EggState { + llvm2egg: &BTreeMap, +} + +unsafe fn ref_to_egg( + llvm_instr: LLVMValueRef, + next_node_idx: i64, + mut egg_nodes: Vec, + mut translation_metadata: LLVM2EggState, +) -> Vec { + if (translation_metadata.llvm2egg.contains_key(llvm_instr)) { + let translated_egg_node = translation_metadata + .llvm2egg + .get(llvm_instr) + .expect("Key must exist"); + egg_nodes.push(translated_egg_node); + return (egg_nodes, next_node_idx + 1); + } + panic!("Unimplemented"); +} + +unsafe fn llvm_to_egg(llvm_instrs_in_chunk: &[LLVMValueRef]) -> RecExpr { + let mut egg_nodes: Vec = Vec::new(); + + // Map from (translated) llvm instructions to egg graph nodes + let mut llvm_instr2egg_node: BTreeMap = BTreeMap::new(); + + // State Variable To Hold Maps During Translation + let mut llvm2egg_state = LLVM2EggState { + llvm2egg: llvm_instr2egg_node, + }; + + // Index of next node to translate + let mut next_node_idx: i64 = 0; + + // for each store, iterate backwards from that store and translate to egg + for llvm_instr in llvm_instrs_in_chunk.iter() { + if isa_store(*llvm_instr) { + ref_to_egg() + } + } + + panic!("Unimplemented"); +} + unsafe fn llvm_to_egg<'a>( bb_vec: &[LLVMValueRef], llvm_arg_pairs: &mut LLVMPairMap, From c07f799ef7e5689773b7af2469df8a937d2ddf41 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Tue, 26 Apr 2022 19:56:39 -0400 Subject: [PATCH 071/143] new translation llvm to egg functions --- src/dios-egraphs/Diospyros/src/lib.rs | 457 +++++++++++--------------- 1 file changed, 194 insertions(+), 263 deletions(-) diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 51e0c082..1e4f0ed3 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -51,9 +51,20 @@ extern "C" { type GEPMap = BTreeMap<(Symbol, Symbol), LLVMValueRef>; type LLVMPairMap = BTreeMap; -static mut ARG_IDX: i32 = 0; +static mut ARG_IDX: u32 = 0; static mut CALL_IDX: i32 = 0; static mut NODE_IDX: u32 = 0; +static mut REG_IDX: u32 = 0; + +unsafe fn gen_arg_idx() -> u32 { + ARG_IDX += 1; + return ARG_IDX; +} + +unsafe fn gen_reg_idx() -> u32 { + REG_IDX += 1; + return REG_IDX; +} unsafe fn gen_node_idx() -> u32 { NODE_IDX += 1; @@ -274,18 +285,13 @@ type SymbolMap = BTreeMap; enum LLVMOpType { Argument, Constant, - Store, - Load, - Gep, - Unop, - Bop, - // Call, - // FPTrunc, - // SIToFP, - // Bitcast, + FNeg, + FAdd, + FSub, + FMul, + FDiv, Sqrt32, - Sqrt64, - // FPExt, + // TODO: SGN signum UnhandledLLVMOpCode, } @@ -463,40 +469,70 @@ unsafe fn llvm_recursive_add( return cloned_inst; } -unsafe fn match_llvm_op(expr: &LLVMValueRef) -> LLVMOpType { - if isa_bop(*expr) { - return LLVMOpType::Bop; - } else if isa_unop(*expr) { - return LLVMOpType::Unop; - } else if isa_constant(*expr) { +unsafe fn isa_fadd(llvm_instr: LLVMValueRef) -> bool { + match LLVMGetInstructionOpcode(llvm_instr) { + LLVMFadd => true, + _ => false, + } +} + +unsafe fn isa_fsub(llvm_instr: LLVMValueRef) -> bool { + match LLVMGetInstructionOpcode(llvm_instr) { + LLVMFSub => true, + _ => false, + } +} + +unsafe fn isa_fmul(llvm_instr: LLVMValueRef) -> bool { + match LLVMGetInstructionOpcode(llvm_instr) { + LLVMFMul => true, + _ => false, + } +} + +unsafe fn isa_fdiv(llvm_instr: LLVMValueRef) -> bool { + match LLVMGetInstructionOpcode(llvm_instr) { + LLVMFDiv => true, + _ => false, + } +} + +unsafe fn isa_fneg(llvm_instr: LLVMValueRef) -> bool { + match LLVMGetInstructionOpcode(llvm_instr) { + LLVMFNeg => true, + _ => false, + } +} + +unsafe fn isa_supported_binop(llvm_instr: LLVMValueRef) -> bool { + return isa_fadd(llvm_instr) + || isa_fmul(llvm_instr) + || isa_fdiv(llvm_instr) + || isa_fsub(llvm_instr); +} + +unsafe fn isa_supported_unop(llvm_instr: LLVMValueRef) -> bool { + return isa_fneg(llvm_instr); + +unsafe fn match_llvm_op(llvm_instr: &LLVMValueRef) -> LLVMOpType { + if isa_fadd(*llvm_instr) { + return LLVMOpType::FAdd; + } else if isa_fsub(*llvm_instr) { + return LLVMOpType::FSub; + } else if isa_fmul(*llvm_instr) { + return LLVMOpType::FMul; + } else if isa_fdiv(*llvm_instr) { + return LLVMOpType::FDiv; + } else if isa_fneg(*llvm_instr) { + return LLVMOpType::FNeg; + } else if isa_constant(*llvm_instr) { return LLVMOpType::Constant; - } else if isa_gep(*expr) { - return LLVMOpType::Gep; - } else if isa_load(*expr) { - return LLVMOpType::Load; - } else if isa_store(*expr) { - return LLVMOpType::Store; - } else if isa_argument(*expr) { + } else if isa_argument(*llvm_instr) { return LLVMOpType::Argument; - } else if isa_call(*expr) { - return LLVMOpType::Call; - } else if isa_fptrunc(*expr) { - return LLVMOpType::FPTrunc; - } else if isa_sitofp(*expr) { - return LLVMOpType::SIToFP; - } else if isa_bitcast(*expr) { - return LLVMOpType::Bitcast; - } else if isa_sqrt32(*expr) { + } else if isa_sqrt32(*llvm_instr) { return LLVMOpType::Sqrt32; - } else if isa_sqrt64(*expr) { - return LLVMOpType::Sqrt64; - } else if isa_fpext(*expr) { - return LLVMOpType::FPExt; } else { return LLVMOpType::UnhandledLLVMOpCode; - // LLVMDumpValue(*expr); - // println!(); - // panic!("ref_to_egg: Unmatched case for LLVMValueRef {:?}", *expr); } } @@ -1210,240 +1246,108 @@ unsafe fn llvm_to_egg<'a>( // ---- Construction Zone ------- -struct LLVM2EggState<'a> { - llvm2egg: &'a BTreeMap, +/// LLVM2EggState Contains Egg to LLVM Translation Metadata +struct LLVM2EggState<'a, 'b> { + llvm2reg: &'a BTreeMap, + llvm2arg: &'b BTreeMap, } +/// Translates LLVM Arg to an Egg Argument Node unsafe fn new___arg_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, - next_node_idx: i32, + next_node_idx: u32, mut translation_metadata: LLVM2EggState, -) -> (Vec, i32) { - let sym_name = gen_arg_name(); - let symbol = VecLang::Symbol(Symbol::from(sym_name)); - symbol_map.insert(symbol.clone(), expr); - enode_vec.push(symbol); - return (enode_vec, next_idx + 1); +) -> (Vec, u32) { + assert!(isa_argument(llvm_instr)); + let argument_idx = gen_arg_idx(); + let argument_node = VecLang::Arg(argument_idx); + egg_nodes.push(argument_node); + translation_metadata.llvm2arg.insert(llvm_instr, argument_node); + return (egg_nodes, next_node_idx + 1); } +/// Translates Supported Binop Instruction to an Egg Bunary Operator Node +/// +/// Supported Binary Operators are: FAdd, FSub, FMul, FDiv unsafe fn new___bop_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, - next_node_idx: i32, + next_node_idx: u32, mut translation_metadata: LLVM2EggState, -) -> (Vec, i32) { - let left = LLVMGetOperand(expr, 0); - let right = LLVMGetOperand(expr, 1); - let (v1, next_idx1) = ref_to_egg( +) -> (Vec, u32) { + assert!(isa_supported_binop(llvm_instr)); + let left = LLVMGetOperand(llvm_instr, 0); + let right = LLVMGetOperand(llvm_instr, 1); + let (left_egg_nodes, left_next_idx) = new___ref_to_egg( left, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, + egg_nodes, + next_node_idx, + translation_metadata ); - let (mut v2, next_idx2) = ref_to_egg( + let (mut right_egg_nodes, right_next_idx) = new___ref_to_egg( right, - v1, - next_idx1, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, + left_egg_nodes, + left_next_idx, + translation_metadata ); let ids = [ - Id::from((next_idx1 - 1) as usize), - Id::from((next_idx2 - 1) as usize), + Id::from((left_next_idx - 1) as usize), + Id::from((right_next_idx - 1) as usize), ]; - v2.push(choose_binop(&expr, ids)); - (v2, next_idx2 + 1) + right_egg_nodes.push(choose_binop(&llvm_instr, ids)); + (right_egg_nodes, right_next_idx + 1) } +/// Translates Supported Unop Instruction to an Egg Unary Operator Node +/// +/// Supported Unary Operators are: FNeg unsafe fn new___unop_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, - next_node_idx: i32, + next_node_idx: u32, mut translation_metadata: LLVM2EggState, -) -> (Vec, i32) { - let sub_expr = LLVMGetOperand(expr, 0); - let (mut v, next_idx1) = ref_to_egg( - sub_expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - let id = Id::from((next_idx1 - 1) as usize); - v.push(choose_unop(&expr, id)); - (v, next_idx1 + 1) -} - -unsafe fn new___gep_to_egg( - llvm_instr: LLVMValueRef, - mut egg_nodes: Vec, - next_node_idx: i32, - mut translation_metadata: LLVM2EggState, -) -> (Vec, i32) { - // // assert!(isa_argument(expr) || isa_gep(expr) || isa_load(expr)); - // let mut enode_vec = Vec::new(); - let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); - enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); - - let num_gep_operands = LLVMGetNumOperands(expr); - let mut indices = Vec::new(); - for operand_idx in 1..num_gep_operands { - let array_offset = llvm_index(expr, operand_idx); - indices.push(array_offset); - } - let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); - let offsets_symbol = Symbol::from(&offsets_string); - enode_vec.push(VecLang::Symbol(offsets_symbol)); - - let get_node = VecLang::Get([ - Id::from((next_idx) as usize), - Id::from((next_idx + 1) as usize), - ]); - (*gep_map).insert( - (Symbol::from(array_name), Symbol::from(&offsets_string)), - expr, +) -> (Vec, u32) { + assert!(isa_supported_unop(llvm_instr)); + let neg_expr = LLVMGetOperand(llvm_instr, 0); + let (mut new_egg_nodes, new_next_idx) = new___ref_to_egg( + neg_expr, + egg_nodes, + next_node_idx, + translation_metadata ); - enode_vec.push(get_node); - - return (enode_vec, next_idx + 3); -} - -unsafe fn new___sitofp_to_egg( - llvm_instr: LLVMValueRef, - mut egg_nodes: Vec, - next_node_idx: i32, - mut translation_metadata: LLVM2EggState, -) -> (Vec, i32) { - panic!("Unhandled: TODO: Eliminate sitofp enum to force translation to a register."); -} - -unsafe fn new___load_to_egg( - llvm_instr: LLVMValueRef, - mut egg_nodes: Vec, - next_node_idx: i32, - mut translation_metadata: LLVM2EggState, -) -> (Vec, i32) { - return gep_to_egg( - expr, // we pass the entire instruction and not just the address - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); -} - -unsafe fn new___store_to_egg( - llvm_instr: LLVMValueRef, - mut egg_nodes: Vec, - next_node_idx: i32, - mut translation_metadata: LLVM2EggState, -) -> (Vec, i32) { - let data = LLVMGetOperand(expr, 0); - let addr = LLVMGetOperand(expr, 1); // expected to be a gep operator or addr in LLVM - let (vec, next_idx1) = ref_to_egg( - data, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - (*store_map).insert(next_idx1 - 1, addr); - (*id_map).insert(Id::from((next_idx1 - 1) as usize)); - return (vec, next_idx1); + let id = Id::from((new_next_idx - 1) as usize); + new_egg_nodes.push(choose_unop(&llvm_instr, id)); + (new_egg_nodes, new_next_idx + 1) } +/// Translates Const Instruction to an Egg Number Node unsafe fn new___const_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, - next_node_idx: i32, - mut translation_metadata: LLVM2EggState, -) -> (Vec, i32) { - let value = get_constant_float(expr); - enode_vec.push(VecLang::Num(value as i32)); - (enode_vec, next_idx + 1) -} - -unsafe fn new___call_to_egg( - llvm_instr: LLVMValueRef, - mut egg_nodes: Vec, - next_node_idx: i32, + next_node_idx: u32, mut translation_metadata: LLVM2EggState, -) -> (Vec, i32) { - panic!("Unhandled: TODO: Eliminate call enum to force translation to a register."); -} - -unsafe fn new___fpext_to_egg( - llvm_instr: LLVMValueRef, - mut egg_nodes: Vec, - next_node_idx: i32, - mut translation_metadata: LLVM2EggState, -) -> (Vec, i32) { - panic!("Unhandled: TODO: Eliminate fpext enum to force translation to a register."); +) -> (Vec, u32) { + assert!(isa_constant(llvm_instr)); + let value = get_constant_float(llvm_instr); + egg_nodes.push(VecLang::Num(value as i32)); + (egg_nodes, next_node_idx + 1) } +/// Translates Sqrt 32 Instruction to an Egg Square Root Node unsafe fn new___sqrt32_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, - next_node_idx: i32, + next_node_idx: u32, mut translation_metadata: LLVM2EggState, -) -> (Vec, i32) { - // assert!(isa_sqrt32(expr)); - let operand = LLVMGetOperand(expr, 0); - let (mut new_enode_vec, next_idx1) = +) -> (Vec, u32) { + assert!(isa_sqrt32(llvm_instr)); + let operand = LLVMGetOperand(llvm_instr, 0); + let (mut new_enode_vec, new_next_node_idx) = new___ref_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); - let sqrt_node = VecLang::Sqrt([Id::from((next_idx1 - 1) as usize)]); + let sqrt_node = VecLang::Sqrt([Id::from((new_next_node_idx - 1) as usize)]); new_enode_vec.push(sqrt_node); - return (new_enode_vec, next_idx1 + 1); -} - -/// TODO: Fix Immediately: Remove Context Sensitive Matching!!! -unsafe fn new___sqrt64_to_egg( - llvm_instr: LLVMValueRef, - mut egg_nodes: Vec, - next_node_idx: i32, - mut translation_metadata: LLVM2EggState, -) -> (Vec, i32) { - panic!("Currently, we do not handle calls to sqrt.f64 without fpext and fptrunc before and after!. This is the only 'context sensitive' instance in the dispatch matching. ") -} - -unsafe fn new___fptrunc_to_egg( - llvm_instr: LLVMValueRef, - mut egg_nodes: Vec, - next_node_idx: i32, - mut translation_metadata: LLVM2EggState, -) -> (Vec, i32) { - panic!("Unhandled: TODO: Eliminate fptrunc enum to force translation to a register."); -} - -unsafe fn new___bitcast_to_egg( - llvm_instr: LLVMValueRef, - mut egg_nodes: Vec, - next_node_idx: i32, - mut translation_metadata: LLVM2EggState, -) -> (Vec, i32) { - panic!("Unhandled: TODO: Eliminate bitcast enum to force translation to a register."); + (new_enode_vec, new_next_node_idx + 1) } /// Translates an Unhandled OpCode to an Egg Register. @@ -1453,10 +1357,14 @@ unsafe fn new___bitcast_to_egg( unsafe fn new___unhandled_opcode_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, - next_node_idx: i32, + next_node_idx: u32, mut translation_metadata: LLVM2EggState, -) -> (Vec, i32) { - panic!("Unimplemented: TODO Translate to a register."); +) -> (Vec, u32) { + let register_idx = gen_reg_idx(); + let register_node = VecLang::Reg(register_idx); + egg_nodes.push(register_node); + translation_metadata.llvm2reg.insert(llvm_instr, register_node); + (egg_nodes, next_node_idx + 1) } /// Recursively Translate LLVM Instruction to Egg Nodes. @@ -1465,14 +1373,14 @@ unsafe fn new___unhandled_opcode_to_egg( unsafe fn new___ref_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, - next_node_idx: i32, + next_node_idx: u32, mut translation_metadata: LLVM2EggState, -) -> (Vec, i32) { +) -> (Vec, u32) { // If, on a different pass, the instruction was translated already, then // just used the egg node representing the translation - if translation_metadata.llvm2egg.contains_key(&llvm_instr) { + if translation_metadata.llvm2reg.contains_key(&llvm_instr) { let translated_egg_node = translation_metadata - .llvm2egg + .llvm2reg .get(&llvm_instr) .expect("Key must exist"); egg_nodes.push(*translated_egg_node); @@ -1481,56 +1389,79 @@ unsafe fn new___ref_to_egg( // Recurse Backwards on the current instruction, translating its children, // based on the opcode of the parent. return match match_llvm_op(&llvm_instr) { - LLVMOpType::Bop => new___bop_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata), - LLVMOpType::Unop => { + LLVMOpType::FAdd | LLVMOpType::FSub | LLVMOpType::FMul | LLVMOpType::FDiv => { + new___bop_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + LLVMOpType::FNeg => { new___unop_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) } LLVMOpType::Constant => { new___const_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) } - LLVMOpType::Gep => new___gep_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata), - LLVMOpType::Load => { - new___load_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) - } - LLVMOpType::Store => { - new___store_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) - } LLVMOpType::Argument => { new___arg_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) } LLVMOpType::Sqrt32 => { new___sqrt32_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) } - LLVMOpType::Sqrt64 => { - new___sqrt64_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) - } LLVMOpType::UnhandledLLVMOpCode => { new___unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) } }; } -unsafe fn new_llvm_to_egg( +unsafe fn start_translating_ref_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: u32, + mut translation_metadata: LLVM2EggState, +) -> (Vec, u32) { + // TODO: We need to record the "start" instruction so we can stitch it back into the LLVM code + // This start should be placed in translation metadata + return new___ref_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); +} + +unsafe fn can_start_translation_instr(llvm_instr: LLVMValueRef) -> bool { + return match match_llvm_op(&llvm_instr) { + LLVMOpType::FAdd + | LLVMOpType::FMul + | LLVMOpType::FDiv + | LLVMOpType::FSub + | LLVMOpType::FNeg + | LLVMOpType::Constant + | LLVMOpType::Sqrt32 => true, + LLVMOpType::Argument | LLVMOpType::UnhandledLLVMOpCode => false, + }; +} + +unsafe fn new___llvm_to_egg( llvm_instrs_in_chunk: &[LLVMValueRef], // TODO: feed this in as an argument llvm_instr2egg_node: BTreeMap, ) -> RecExpr { let mut egg_nodes: Vec = Vec::new(); - // Map from (translated / opaque) llvm instructions to egg graph nodes - let llvm_instr2egg_node: BTreeMap = BTreeMap::new(); + // Map from (translated / opaque) llvm instructions to register egg graph nodes + let llvm_instr2reg_node: BTreeMap = BTreeMap::new(); + + // Map from (translated) llvm instructions to argument egg graph nodes + let llvm_instr2arg_node: BTreeMap = BTreeMap::new(); // State Variable To Hold Maps During Translation - let mut llvm2egg_state = LLVM2EggState { - llvm2egg: &llvm_instr2egg_node, + let mut translation_metadata = LLVM2EggState { + llvm2reg: &llvm_instr2reg_node, + llvm2arg: &llvm_instr2arg_node, }; // Index of next node to translate - let mut next_node_idx: i32 = 0; + let mut next_node_idx: u32 = 0; + // for each store, iterate backwards from that store and translate to egg for llvm_instr in llvm_instrs_in_chunk.iter() { - if isa_store(*llvm_instr) { - new___ref_to_egg() + if can_start_translation_instr(*llvm_instr) { + let (new_egg_nodes, new_next_node_idx) = start_translating_ref_to_egg(*llvm_instr, egg_nodes, next_node_idx, translation_metadata); + egg_nodes = new_egg_nodes; + next_node_idx = new_next_node_idx; } } From 5f0701d50d15a6c7d3a343b4c8576b7a0420c440 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 27 Apr 2022 03:19:27 -0400 Subject: [PATCH 072/143] add restricted instructions --- src/dios-egraphs/Diospyros/src/lib.rs | 56 ++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 1e4f0ed3..3a22b6ab 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -1247,9 +1247,13 @@ unsafe fn llvm_to_egg<'a>( // ---- Construction Zone ------- /// LLVM2EggState Contains Egg to LLVM Translation Metadata -struct LLVM2EggState<'a, 'b> { +struct LLVM2EggState<'a, 'b, 'c, 'd, 'e> { llvm2reg: &'a BTreeMap, llvm2arg: &'b BTreeMap, + instructions_in_chunk: &'c BTreeSet, + restricted_instructions: &'d BTreeSet, + start_instructions: Vec, + start_ids: &'e BTreeSet, } /// Translates LLVM Arg to an Egg Argument Node @@ -1370,6 +1374,7 @@ unsafe fn new___unhandled_opcode_to_egg( /// Recursively Translate LLVM Instruction to Egg Nodes. /// /// TODO: Take care of chunk boundaries: translation should never overreach a chunk +/// TODO: May need to keep track of llvm instructions across chunks unsafe fn new___ref_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, @@ -1386,9 +1391,17 @@ unsafe fn new___ref_to_egg( egg_nodes.push(*translated_egg_node); return (egg_nodes, next_node_idx + 1); } + // If the current llvm instruction is a "restricted" instruction, do not translate, but make it a register + if translation_metadata.restricted_instructions.contains(&llvm_instr){ + return new___unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); + } + // If the current llvm instruction is not in the current chunk, we must return a register + if !translation_metadata.instructions_in_chunk.contains(&llvm_instr) { + return new___unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); + } // Recurse Backwards on the current instruction, translating its children, // based on the opcode of the parent. - return match match_llvm_op(&llvm_instr) { + return match_llvm_op(&llvm_instr) { LLVMOpType::FAdd | LLVMOpType::FSub | LLVMOpType::FMul | LLVMOpType::FDiv => { new___bop_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) } @@ -1416,8 +1429,8 @@ unsafe fn start_translating_ref_to_egg( next_node_idx: u32, mut translation_metadata: LLVM2EggState, ) -> (Vec, u32) { - // TODO: We need to record the "start" instruction so we can stitch it back into the LLVM code - // This start should be placed in translation metadata + translation_metadata.start_instructions.push(llvm_instr); + translation_metadata.start_ids.insert(Id::from(next_node_idx as usize)); return new___ref_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); } @@ -1436,6 +1449,7 @@ unsafe fn can_start_translation_instr(llvm_instr: LLVMValueRef) -> bool { unsafe fn new___llvm_to_egg( llvm_instrs_in_chunk: &[LLVMValueRef], + restricted_instrs: &[LLVMValueRef], // TODO: feed this in as an argument llvm_instr2egg_node: BTreeMap, ) -> RecExpr { let mut egg_nodes: Vec = Vec::new(); @@ -1446,16 +1460,37 @@ unsafe fn new___llvm_to_egg( // Map from (translated) llvm instructions to argument egg graph nodes let llvm_instr2arg_node: BTreeMap = BTreeMap::new(); + // Ordered Vector of Starting LLVM instructions where translation began + let start_instructions: Vec = Vec::new(); + + // Ordered Set of Instructions in Chunk + let instructions_in_chunk: BTreeSet = BTreeSet::new(); + for llvm_instr in llvm_instrs_in_chunk.iter() { + instructions_in_chunk.insert(*llvm_instr); + } + + // Ordered Set of Ids + let start_ids: BTreeSet = BTreeSet::new(); + + // Ordered Set of Instructions NOT TO BE Translated, except as registers + let restricted_instrs: BTreeSet = BTreeSet::new(); + for llvm_instr in llvm_instrs_in_chunk.iter() { + restricted_instrs.insert(*llvm_instr); + } + // State Variable To Hold Maps During Translation let mut translation_metadata = LLVM2EggState { llvm2reg: &llvm_instr2reg_node, llvm2arg: &llvm_instr2arg_node, + instructions_in_chunk: &instructions_in_chunk, + restricted_instructions: &restricted_instrs, + start_instructions: start_instructions, + start_ids: &start_ids, }; // Index of next node to translate let mut next_node_idx: u32 = 0; - // for each store, iterate backwards from that store and translate to egg for llvm_instr in llvm_instrs_in_chunk.iter() { if can_start_translation_instr(*llvm_instr) { @@ -1465,7 +1500,16 @@ unsafe fn new___llvm_to_egg( } } - panic!("Unimplemented"); + // Generate a padded vector + let mut outer_vec_ids = Vec::new(); + for id in translation_metadata.start_ids.iter() { + outer_vec_ids.push(*id); + } + balanced_pad_vector(&mut outer_vec_ids, &mut egg_nodes); + + let rec_expr = RecExpr::from(egg_nodes); + + return rec_expr; } // ---- Construction Zone ------- From 7c59f22979fc7ea62292438416ceab26ca5a2f3b Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 27 Apr 2022 03:32:49 -0400 Subject: [PATCH 073/143] remove old unusued code --- src/dios-egraphs/Diospyros/src/lib.rs | 805 ++------------------------ 1 file changed, 49 insertions(+), 756 deletions(-) diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 3a22b6ab..20127a43 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -6,7 +6,6 @@ use llvm::{core::*, prelude::*, LLVMOpcode::*, LLVMRealPredicate}; use std::{ cmp, collections::{BTreeMap, BTreeSet}, - ffi::CStr, mem, os::raw::c_char, slice::from_raw_parts, @@ -513,6 +512,7 @@ unsafe fn isa_supported_binop(llvm_instr: LLVMValueRef) -> bool { unsafe fn isa_supported_unop(llvm_instr: LLVMValueRef) -> bool { return isa_fneg(llvm_instr); +} unsafe fn match_llvm_op(llvm_instr: &LLVMValueRef) -> LLVMOpType { if isa_fadd(*llvm_instr) { @@ -543,710 +543,9 @@ unsafe fn choose_unop(unop: &LLVMValueRef, id: Id) -> VecLang { } } -unsafe fn arg_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - _gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &LLVMPairMap, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - let sym_name = gen_arg_name(); - let symbol = VecLang::Symbol(Symbol::from(sym_name)); - symbol_map.insert(symbol.clone(), expr); - enode_vec.push(symbol); - return (enode_vec, next_idx + 1); -} - -unsafe fn bop_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - let left = LLVMGetOperand(expr, 0); - let right = LLVMGetOperand(expr, 1); - let (v1, next_idx1) = ref_to_egg( - left, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - let (mut v2, next_idx2) = ref_to_egg( - right, - v1, - next_idx1, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - let ids = [ - Id::from((next_idx1 - 1) as usize), - Id::from((next_idx2 - 1) as usize), - ]; - v2.push(choose_binop(&expr, ids)); - (v2, next_idx2 + 1) -} - -unsafe fn unop_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - let sub_expr = LLVMGetOperand(expr, 0); - let (mut v, next_idx1) = ref_to_egg( - sub_expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - let id = Id::from((next_idx1 - 1) as usize); - v.push(choose_unop(&expr, id)); - (v, next_idx1 + 1) -} - -unsafe fn gep_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &LLVMPairMap, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - // // assert!(isa_argument(expr) || isa_gep(expr) || isa_load(expr)); - // let mut enode_vec = Vec::new(); - let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); - enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); - - let num_gep_operands = LLVMGetNumOperands(expr); - let mut indices = Vec::new(); - for operand_idx in 1..num_gep_operands { - let array_offset = llvm_index(expr, operand_idx); - indices.push(array_offset); - } - let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); - let offsets_symbol = Symbol::from(&offsets_string); - enode_vec.push(VecLang::Symbol(offsets_symbol)); - - let get_node = VecLang::Get([ - Id::from((next_idx) as usize), - Id::from((next_idx + 1) as usize), - ]); - (*gep_map).insert( - (Symbol::from(array_name), Symbol::from(&offsets_string)), - expr, - ); - enode_vec.push(get_node); - - return (enode_vec, next_idx + 3); -} - -unsafe fn _address_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &LLVMPairMap, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); - enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); - - let num_gep_operands = LLVMGetNumOperands(expr); - let mut indices = Vec::new(); - for operand_idx in 1..num_gep_operands { - let array_offset = llvm_index(expr, operand_idx); - indices.push(array_offset); - } - let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); - let offsets_symbol = Symbol::from(&offsets_string); - enode_vec.push(VecLang::Symbol(offsets_symbol)); - - let get_node = VecLang::Get([ - Id::from((next_idx) as usize), - Id::from((next_idx + 1) as usize), - ]); - (*gep_map).insert( - (Symbol::from(array_name), Symbol::from(&offsets_string)), - expr, - ); - enode_vec.push(get_node); - - return (enode_vec, next_idx + 3); -} - -unsafe fn sitofp_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &LLVMPairMap, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); - enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); - - let num_gep_operands = LLVMGetNumOperands(expr); - let mut indices = Vec::new(); - for operand_idx in 1..num_gep_operands { - let array_offset = llvm_index(expr, operand_idx); - indices.push(array_offset); - } - let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); - let offsets_symbol = Symbol::from(&offsets_string); - enode_vec.push(VecLang::Symbol(offsets_symbol)); - - let get_node = VecLang::Get([ - Id::from((next_idx) as usize), - Id::from((next_idx + 1) as usize), - ]); - (*gep_map).insert( - (Symbol::from(array_name), Symbol::from(&offsets_string)), - expr, - ); - enode_vec.push(get_node); - - return (enode_vec, next_idx + 3); -} - -unsafe fn load_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - return gep_to_egg( - expr, // we pass the entire instruction and not just the address - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); -} - -unsafe fn store_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - let data = LLVMGetOperand(expr, 0); - let addr = LLVMGetOperand(expr, 1); // expected to be a gep operator or addr in LLVM - let (vec, next_idx1) = ref_to_egg( - data, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - (*store_map).insert(next_idx1 - 1, addr); - (*id_map).insert(Id::from((next_idx1 - 1) as usize)); - return (vec, next_idx1); -} - -unsafe fn const_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - _gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &LLVMPairMap, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - let value = get_constant_float(expr); - enode_vec.push(VecLang::Num(value as i32)); - (enode_vec, next_idx + 1) -} - -unsafe fn _load_arg_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &LLVMPairMap, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - // assert!(isa_argument(expr) || isa_gep(expr)); - let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); - enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); - - let num_gep_operands = LLVMGetNumOperands(expr); - let mut indices = Vec::new(); - for operand_idx in 1..num_gep_operands { - let array_offset = llvm_index(expr, operand_idx); - indices.push(array_offset); - } - let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); - let offsets_symbol = Symbol::from(&offsets_string); - enode_vec.push(VecLang::Symbol(offsets_symbol)); - - let get_node = VecLang::Get([ - Id::from((next_idx) as usize), - Id::from((next_idx + 1) as usize), - ]); - (*gep_map).insert( - (Symbol::from(array_name), Symbol::from(&offsets_string)), - expr, - ); - enode_vec.push(get_node); - - return (enode_vec, next_idx + 3); -} - -unsafe fn load_call_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - if isa_sqrt32(expr) { - return sqrt32_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - } - let call_sym_name = gen_call_name(); - let call_sym = VecLang::Symbol(Symbol::from(call_sym_name)); - symbol_map.insert(call_sym.clone(), expr); - enode_vec.push(call_sym); - return (enode_vec, next_idx + 1); -} - -unsafe fn fpext_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - // assert!(isa_fpext(expr)); - let operand = LLVMGetOperand(expr, 0); - return ref_to_egg( - operand, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); -} - -unsafe fn sqrt32_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - // assert!(isa_sqrt32(expr)); - let operand = LLVMGetOperand(expr, 0); - let (mut new_enode_vec, next_idx1) = ref_to_egg( - operand, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - let sqrt_node = VecLang::Sqrt([Id::from((next_idx1 - 1) as usize)]); - new_enode_vec.push(sqrt_node); - return (new_enode_vec, next_idx1 + 1); -} - -unsafe fn sqrt64_to_egg( - _expr: LLVMValueRef, - _enode_vec: Vec, - _next_idx: i32, - _gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &LLVMPairMap, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - // assert!(isa_sqrt64(expr)); - panic!("Currently, we do not handle calls to sqrt.f64 without fpext and fptrunc before and after!. This is the only 'context sensitive' instance in the dispatch matching. ") -} - -unsafe fn fptrunc_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - // assert!(isa_fptrunc(expr)); - let operand = LLVMGetOperand(expr, 0); - if isa_sqrt64(operand) { - return sqrt64_to_egg( - operand, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - } - return ref_to_egg( - operand, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); -} - -unsafe fn bitcast_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - // assert!(isa_bitcast(expr)); - let operand = LLVMGetOperand(expr, 0); - let result = ref_to_egg( - operand, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - return result; -} - -unsafe fn ref_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - for (original_val, _) in llvm_arg_pairs.iter() { - if cmp_val_ref_address(&**original_val, &*expr) { - // Here we create a new numbered variable node - let var_idx = gen_node_idx(); - let var_idx_str = var_idx.to_string(); - let special_var_node = VecLang::Symbol(Symbol::from(var_idx_str)); - enode_vec.push(special_var_node); - let node_to_arg_pair = IntLLVMPair { - arg: expr, - node_int: var_idx, - }; - node_to_arg.push(node_to_arg_pair); - return (enode_vec, next_idx + 1); - } - } - let (vec, next_idx) = match match_llvm_op(&expr) { - LLVMOpType::Bop => bop_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Unop => unop_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Constant => const_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Gep => gep_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Load => load_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Store => store_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Argument => arg_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Call => load_call_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::FPTrunc => fptrunc_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::FPExt => fpext_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::SIToFP => sitofp_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Bitcast => bitcast_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Sqrt32 => sqrt32_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Sqrt64 => sqrt64_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - }; - return (vec, next_idx); -} - -unsafe fn llvm_to_egg<'a>( - bb_vec: &[LLVMValueRef], - llvm_arg_pairs: &mut LLVMPairMap, - node_to_arg: &mut Vec, -) -> (RecExpr, GEPMap, StoreMap, SymbolMap) { - let mut enode_vec = Vec::new(); - let (mut gep_map, mut store_map, mut id_map, mut symbol_map) = ( - BTreeMap::new(), - BTreeMap::new(), - BTreeSet::new(), - BTreeMap::new(), - ); - let mut next_idx = 0; - for bop in bb_vec.iter() { - if isa_store(*bop) { - let (new_enode_vec, next_idx1) = ref_to_egg( - *bop, - enode_vec, - next_idx, - &mut gep_map, - &mut store_map, - &mut id_map, - &mut symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - next_idx = next_idx1; - enode_vec = new_enode_vec; - } - } - let mut final_vec = Vec::new(); - for id in id_map.iter() { - final_vec.push(*id); - } - balanced_pad_vector(&mut final_vec, &mut enode_vec); - - let rec_expr = RecExpr::from(enode_vec); - (rec_expr, gep_map, store_map, symbol_map) -} - // ---- Construction Zone ------- -/// LLVM2EggState Contains Egg to LLVM Translation Metadata +/// LLVM2EggState Contains Egg to LLVM Translation Metadata struct LLVM2EggState<'a, 'b, 'c, 'd, 'e> { llvm2reg: &'a BTreeMap, llvm2arg: &'b BTreeMap, @@ -1257,7 +556,7 @@ struct LLVM2EggState<'a, 'b, 'c, 'd, 'e> { } /// Translates LLVM Arg to an Egg Argument Node -unsafe fn new___arg_to_egg( +unsafe fn arg_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, next_node_idx: u32, @@ -1267,14 +566,16 @@ unsafe fn new___arg_to_egg( let argument_idx = gen_arg_idx(); let argument_node = VecLang::Arg(argument_idx); egg_nodes.push(argument_node); - translation_metadata.llvm2arg.insert(llvm_instr, argument_node); + translation_metadata + .llvm2arg + .insert(llvm_instr, argument_node); return (egg_nodes, next_node_idx + 1); } /// Translates Supported Binop Instruction to an Egg Bunary Operator Node -/// +/// /// Supported Binary Operators are: FAdd, FSub, FMul, FDiv -unsafe fn new___bop_to_egg( +unsafe fn bop_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, next_node_idx: u32, @@ -1283,18 +584,10 @@ unsafe fn new___bop_to_egg( assert!(isa_supported_binop(llvm_instr)); let left = LLVMGetOperand(llvm_instr, 0); let right = LLVMGetOperand(llvm_instr, 1); - let (left_egg_nodes, left_next_idx) = new___ref_to_egg( - left, - egg_nodes, - next_node_idx, - translation_metadata - ); - let (mut right_egg_nodes, right_next_idx) = new___ref_to_egg( - right, - left_egg_nodes, - left_next_idx, - translation_metadata - ); + let (left_egg_nodes, left_next_idx) = + ref_to_egg(left, egg_nodes, next_node_idx, translation_metadata); + let (mut right_egg_nodes, right_next_idx) = + ref_to_egg(right, left_egg_nodes, left_next_idx, translation_metadata); let ids = [ Id::from((left_next_idx - 1) as usize), Id::from((right_next_idx - 1) as usize), @@ -1304,9 +597,9 @@ unsafe fn new___bop_to_egg( } /// Translates Supported Unop Instruction to an Egg Unary Operator Node -/// +/// /// Supported Unary Operators are: FNeg -unsafe fn new___unop_to_egg( +unsafe fn unop_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, next_node_idx: u32, @@ -1314,19 +607,15 @@ unsafe fn new___unop_to_egg( ) -> (Vec, u32) { assert!(isa_supported_unop(llvm_instr)); let neg_expr = LLVMGetOperand(llvm_instr, 0); - let (mut new_egg_nodes, new_next_idx) = new___ref_to_egg( - neg_expr, - egg_nodes, - next_node_idx, - translation_metadata - ); + let (mut new_egg_nodes, new_next_idx) = + ref_to_egg(neg_expr, egg_nodes, next_node_idx, translation_metadata); let id = Id::from((new_next_idx - 1) as usize); new_egg_nodes.push(choose_unop(&llvm_instr, id)); (new_egg_nodes, new_next_idx + 1) } /// Translates Const Instruction to an Egg Number Node -unsafe fn new___const_to_egg( +unsafe fn const_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, next_node_idx: u32, @@ -1339,7 +628,7 @@ unsafe fn new___const_to_egg( } /// Translates Sqrt 32 Instruction to an Egg Square Root Node -unsafe fn new___sqrt32_to_egg( +unsafe fn sqrt32_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, next_node_idx: u32, @@ -1348,7 +637,7 @@ unsafe fn new___sqrt32_to_egg( assert!(isa_sqrt32(llvm_instr)); let operand = LLVMGetOperand(llvm_instr, 0); let (mut new_enode_vec, new_next_node_idx) = - new___ref_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); + ref_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); let sqrt_node = VecLang::Sqrt([Id::from((new_next_node_idx - 1) as usize)]); new_enode_vec.push(sqrt_node); (new_enode_vec, new_next_node_idx + 1) @@ -1358,7 +647,7 @@ unsafe fn new___sqrt32_to_egg( /// /// This represents a blackbox computation that we bail on translating /// Assumes that the OpCode is actually a computation. If not, translation fails. -unsafe fn new___unhandled_opcode_to_egg( +unsafe fn unhandled_opcode_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, next_node_idx: u32, @@ -1367,7 +656,9 @@ unsafe fn new___unhandled_opcode_to_egg( let register_idx = gen_reg_idx(); let register_node = VecLang::Reg(register_idx); egg_nodes.push(register_node); - translation_metadata.llvm2reg.insert(llvm_instr, register_node); + translation_metadata + .llvm2reg + .insert(llvm_instr, register_node); (egg_nodes, next_node_idx + 1) } @@ -1375,7 +666,7 @@ unsafe fn new___unhandled_opcode_to_egg( /// /// TODO: Take care of chunk boundaries: translation should never overreach a chunk /// TODO: May need to keep track of llvm instructions across chunks -unsafe fn new___ref_to_egg( +unsafe fn ref_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, next_node_idx: u32, @@ -1392,33 +683,33 @@ unsafe fn new___ref_to_egg( return (egg_nodes, next_node_idx + 1); } // If the current llvm instruction is a "restricted" instruction, do not translate, but make it a register - if translation_metadata.restricted_instructions.contains(&llvm_instr){ - return new___unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); + if translation_metadata + .restricted_instructions + .contains(&llvm_instr) + { + return unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); } // If the current llvm instruction is not in the current chunk, we must return a register - if !translation_metadata.instructions_in_chunk.contains(&llvm_instr) { - return new___unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); + if !translation_metadata + .instructions_in_chunk + .contains(&llvm_instr) + { + return unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); } // Recurse Backwards on the current instruction, translating its children, // based on the opcode of the parent. - return match_llvm_op(&llvm_instr) { + return match match_llvm_op(&llvm_instr) { LLVMOpType::FAdd | LLVMOpType::FSub | LLVMOpType::FMul | LLVMOpType::FDiv => { - new___bop_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) - } - LLVMOpType::FNeg => { - new___unop_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + bop_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) } + LLVMOpType::FNeg => unop_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata), LLVMOpType::Constant => { - new___const_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) - } - LLVMOpType::Argument => { - new___arg_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) - } - LLVMOpType::Sqrt32 => { - new___sqrt32_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + const_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) } + LLVMOpType::Argument => arg_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata), + LLVMOpType::Sqrt32 => sqrt32_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata), LLVMOpType::UnhandledLLVMOpCode => { - new___unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) } }; } @@ -1430,8 +721,10 @@ unsafe fn start_translating_ref_to_egg( mut translation_metadata: LLVM2EggState, ) -> (Vec, u32) { translation_metadata.start_instructions.push(llvm_instr); - translation_metadata.start_ids.insert(Id::from(next_node_idx as usize)); - return new___ref_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); + translation_metadata + .start_ids + .insert(Id::from(next_node_idx as usize)); + return ref_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); } unsafe fn can_start_translation_instr(llvm_instr: LLVMValueRef) -> bool { @@ -1447,7 +740,7 @@ unsafe fn can_start_translation_instr(llvm_instr: LLVMValueRef) -> bool { }; } -unsafe fn new___llvm_to_egg( +unsafe fn llvm_to_egg( llvm_instrs_in_chunk: &[LLVMValueRef], restricted_instrs: &[LLVMValueRef], // TODO: feed this in as an argument llvm_instr2egg_node: BTreeMap, @@ -1456,9 +749,8 @@ unsafe fn new___llvm_to_egg( // Map from (translated / opaque) llvm instructions to register egg graph nodes let llvm_instr2reg_node: BTreeMap = BTreeMap::new(); - // Map from (translated) llvm instructions to argument egg graph nodes - let llvm_instr2arg_node: BTreeMap = BTreeMap::new(); + let llvm_instr2arg_node: BTreeMap = BTreeMap::new(); // Ordered Vector of Starting LLVM instructions where translation began let start_instructions: Vec = Vec::new(); @@ -1494,7 +786,8 @@ unsafe fn new___llvm_to_egg( // for each store, iterate backwards from that store and translate to egg for llvm_instr in llvm_instrs_in_chunk.iter() { if can_start_translation_instr(*llvm_instr) { - let (new_egg_nodes, new_next_node_idx) = start_translating_ref_to_egg(*llvm_instr, egg_nodes, next_node_idx, translation_metadata); + let (new_egg_nodes, new_next_node_idx) = + start_translating_ref_to_egg(*llvm_instr, egg_nodes, next_node_idx, translation_metadata); egg_nodes = new_egg_nodes; next_node_idx = new_next_node_idx; } From e4a133f8f2c8efed2deeffdd5bc3a7c857b359d9 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 27 Apr 2022 04:33:46 -0400 Subject: [PATCH 074/143] fix a bunch of egg to llvm translations --- src/dios-egraphs/Diospyros/src/lib.rs | 405 +++++++++++++++++++++++++- 1 file changed, 390 insertions(+), 15 deletions(-) diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 20127a43..7d1218aa 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -543,16 +543,14 @@ unsafe fn choose_unop(unop: &LLVMValueRef, id: Id) -> VecLang { } } -// ---- Construction Zone ------- - /// LLVM2EggState Contains Egg to LLVM Translation Metadata -struct LLVM2EggState<'a, 'b, 'c, 'd, 'e> { - llvm2reg: &'a BTreeMap, - llvm2arg: &'b BTreeMap, - instructions_in_chunk: &'c BTreeSet, - restricted_instructions: &'d BTreeSet, +struct LLVM2EggState { + llvm2reg: BTreeMap, + llvm2arg: BTreeMap, + instructions_in_chunk: BTreeSet, + restricted_instructions: BTreeSet, start_instructions: Vec, - start_ids: &'e BTreeSet, + start_ids: BTreeSet, } /// Translates LLVM Arg to an Egg Argument Node @@ -743,8 +741,9 @@ unsafe fn can_start_translation_instr(llvm_instr: LLVMValueRef) -> bool { unsafe fn llvm_to_egg( llvm_instrs_in_chunk: &[LLVMValueRef], restricted_instrs: &[LLVMValueRef], + vectorize: bool, // TODO: feed this in as an argument llvm_instr2egg_node: BTreeMap, -) -> RecExpr { +) -> (RecExpr, LLVM2EggState) { let mut egg_nodes: Vec = Vec::new(); // Map from (translated / opaque) llvm instructions to register egg graph nodes @@ -772,12 +771,12 @@ unsafe fn llvm_to_egg( // State Variable To Hold Maps During Translation let mut translation_metadata = LLVM2EggState { - llvm2reg: &llvm_instr2reg_node, - llvm2arg: &llvm_instr2arg_node, - instructions_in_chunk: &instructions_in_chunk, - restricted_instructions: &restricted_instrs, + llvm2reg: llvm_instr2reg_node, + llvm2arg: llvm_instr2arg_node, + instructions_in_chunk: instructions_in_chunk, + restricted_instructions: restricted_instrs, start_instructions: start_instructions, - start_ids: &start_ids, + start_ids: start_ids, }; // Index of next node to translate @@ -793,6 +792,8 @@ unsafe fn llvm_to_egg( } } + // If vectorize is true, then generate the vector, with padding + // TODO: Implement a switch to not vectorize // Generate a padded vector let mut outer_vec_ids = Vec::new(); for id in translation_metadata.start_ids.iter() { @@ -802,7 +803,381 @@ unsafe fn llvm_to_egg( let rec_expr = RecExpr::from(egg_nodes); - return rec_expr; + return (rec_expr, translation_metadata); +} + +// ---- Construction Zone ------- + +/// Egg2LLVMState represent the state needed to translate from Egg to LLVM +struct Egg2LLVMState { + llvm2egg_metadata: LLVM2EggState, + egg_nodes_vector: [VecLang], + builder: LLVMBuilderRef, + context: LLVMContextRef, + module: LLVMModuleRef, +} + +unsafe fn new___arg_to_llvm( + egg_node: &VecLang, + translation_metadata: Egg2LLVMState, +) -> LLVMValueRef { + panic!("Unimplemented"); +} + +unsafe fn new___reg_to_llvm( + egg_node: &VecLang, + translation_metadata: Egg2LLVMState, +) -> LLVMValueRef { + panic!("Unimplemented"); +} + +unsafe fn new___num_to_llvm(n: &i32, md: Egg2LLVMState) -> LLVMValueRef { + LLVMConstReal(LLVMFloatTypeInContext(md.context), *n as f64) +} + +unsafe fn new___vec_to_llvm(boxed_ids: Box<[Id]>, md: Egg2LLVMState) -> LLVMValueRef { + // Convert the Boxed Ids to a Vector, and generate a vector of zeros + let idvec = boxed_ids.to_vec(); + let idvec_len = idvec.len(); + let mut zeros = Vec::new(); + for _ in 0..idvec_len { + zeros.push(LLVMConstReal(LLVMFloatTypeInContext(md.context), 0 as f64)); + } + + // Convert the Vector of Zeros to a Mut PTr to construct an LLVM Zero Vector + let zeros_ptr = zeros.as_mut_ptr(); + let mut vector = LLVMConstVector(zeros_ptr, idvec.len() as u32); + for (idx, &eggid) in idvec.iter().enumerate() { + let elt = &md.egg_nodes_vector[usize::from(eggid)]; + let mut elt_val = new___egg_to_llvm(elt, md); + // TODO: Can We Eliminate this BitCast in the future?? + // With the new formulation, will we ever have an integer type? + // Check if the elt is an int + if isa_integertype(elt_val) { + elt_val = LLVMBuildBitCast( + md.builder, + elt_val, + LLVMFloatTypeInContext(md.context), + b"\0".as_ptr() as *const _, + ); + } + + // Construct the Vector + vector = LLVMBuildInsertElement( + md.builder, + vector, + elt_val, + LLVMConstInt(LLVMIntTypeInContext(md.context, 32), idx as u64, 0), + b"\0".as_ptr() as *const _, + ); + } + vector +} + +// TODO: Segregate Vec and Scalar Binops? +unsafe fn new___binop_to_llvm( + binop_node: &VecLang, + left_id: &Id, + right_id: &Id, + md: Egg2LLVMState, +) -> LLVMValueRef { + let left = new___egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_id)], md); + let right = new___egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_id)], md); + + // TODO: Can We Remove these Casts? + let left = if LLVMTypeOf(left) == LLVMIntTypeInContext(md.context, 32) { + LLVMBuildBitCast( + md.builder, + left, + LLVMFloatTypeInContext(md.context), + b"\0".as_ptr() as *const _, + ) + } else { + left + }; + + // TODO: Can We Remove these Casts? + let right = if LLVMTypeOf(right) == LLVMIntTypeInContext(md.context, 32) { + LLVMBuildBitCast( + md.builder, + right, + LLVMFloatTypeInContext(md.context), + b"\0".as_ptr() as *const _, + ) + } else { + right + }; + + // TODO: Can we eliminate these cases? + if isa_constfp(left) + && !isa_constaggregatezero(left) + && isa_constfp(right) + && !isa_constaggregatezero(right) + { + let mut loses_info = 1; + let nright = LLVMConstRealGetDouble(right, &mut loses_info); + let new_right = build_constant_float(nright, md.context); + let nleft = LLVMConstRealGetDouble(left, &mut loses_info); + let new_left = build_constant_float(nleft, md.context); + translate_binop( + binop_node, + new_left, + new_right, + md.builder, + b"\0".as_ptr() as *const _, + ) + } else if isa_constfp(right) && !isa_constaggregatezero(right) { + let mut loses_info = 1; + let n = LLVMConstRealGetDouble(right, &mut loses_info); + let new_right = build_constant_float(n, md.context); + translate_binop( + binop_node, + left, + new_right, + md.builder, + b"\0".as_ptr() as *const _, + ) + } else if isa_constfp(left) && !isa_constaggregatezero(left) { + let mut loses_info = 1; + let n = LLVMConstRealGetDouble(left, &mut loses_info); + let new_left = build_constant_float(n, md.context); + translate_binop( + binop_node, + new_left, + right, + md.builder, + b"\0".as_ptr() as *const _, + ) + } else { + translate_binop( + binop_node, + left, + right, + md.builder, + b"\0".as_ptr() as *const _, + ) + } +} + +unsafe fn new___concat_to_llvm( + left_vector: &Id, + right_vector: &Id, + md: Egg2LLVMState, +) -> LLVMValueRef { + { + let trans_v1 = new___egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_vector)], md); + let mut trans_v2 = new___egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_vector)], md); + + // In LLVM, it turns out all vectors need to be length power of 2 + // if the 2 vectors are not the same size, double the length of the smaller vector by padding with 0's in it + // manually concatenate 2 vectors by using a LLVM shuffle operation. + let v1_type = LLVMTypeOf(trans_v1); + let v1_size = LLVMGetVectorSize(v1_type); + let v2_type = LLVMTypeOf(trans_v2); + let v2_size = LLVMGetVectorSize(v2_type); + + // TODO: HACKY FIX FOR NOW + // assume both v1 and v2 are pow of 2 size + // assume v2 size smaller or equal to v1 size + // assume v2 is 1/2 size of v1 + if v1_size != v2_size { + // replicate v2 size + let mut zeros = Vec::new(); + for _ in 0..v2_size { + zeros.push(LLVMConstReal(LLVMFloatTypeInContext(md.context), 0 as f64)); + } + let zeros_ptr = zeros.as_mut_ptr(); + let zeros_vector = LLVMConstVector(zeros_ptr, v2_size); + let size = 2 * v2_size; + let mut indices = Vec::new(); + for i in 0..size { + indices.push(LLVMConstInt( + LLVMIntTypeInContext(md.context, 32), + i as u64, + 0, + )); + } + let mask = indices.as_mut_ptr(); + let mask_vector = LLVMConstVector(mask, size); + trans_v2 = LLVMBuildShuffleVector( + md.builder, + trans_v2, + zeros_vector, + mask_vector, + b"\0".as_ptr() as *const _, + ); + } + + let size = v1_size + v2_size; + let mut indices = Vec::new(); + for i in 0..size { + indices.push(LLVMConstInt( + LLVMIntTypeInContext(md.context, 32), + i as u64, + 0, + )); + } + + let mask = indices.as_mut_ptr(); + let mask_vector = LLVMConstVector(mask, size); + LLVMBuildShuffleVector( + md.builder, + trans_v1, + trans_v2, + mask_vector, + b"\0".as_ptr() as *const _, + ) + } +} + +unsafe fn new___mac_to_llvm( + accumulator_vector: &Id, + left_prod_vector: &Id, + right_prod_vector: &Id, + md: Egg2LLVMState, +) -> LLVMValueRef { + let trans_acc = new___egg_to_llvm(&md.egg_nodes_vector[usize::from(*accumulator_vector)], md); + let trans_v1 = new___egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_prod_vector)], md); + let trans_v2 = new___egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_prod_vector)], md); + let vec_type = LLVMTypeOf(trans_acc); + let param_types = [vec_type, vec_type, vec_type].as_mut_ptr(); + let fn_type = LLVMFunctionType(vec_type, param_types, 3, 0 as i32); + let func = LLVMAddFunction(md.module, b"llvm.fma.f32\0".as_ptr() as *const _, fn_type); + let args = [trans_v1, trans_v2, trans_acc].as_mut_ptr(); + LLVMBuildCall(md.builder, func, args, 3, b"\0".as_ptr() as *const _) +} + +unsafe fn new___egg_to_llvm( + egg_node: &VecLang, + translation_metadata: Egg2LLVMState, +) -> LLVMValueRef { + match egg_node { + VecLang::Symbol(..) => { + panic!("Symbol was found. Egg to LLVM Translation does not handle symbol nodes.") + } + VecLang::Get(..) => { + panic!("Get was found. Egg to LLVM Translation does not handle get nodes.") + } + VecLang::Ite(..) => panic!("Ite was found. Egg to LLVM Translation does not handle ite nodes."), + VecLang::Sgn(..) => panic!("Sgn was found. Egg to LLVM Translation does not handle sgn nodes. TODO: In the future, tis node will be handled alongside sqrt and neg scalar nodes."), + VecLang::Arg(_) => new___arg_to_llvm(egg_node, translation_metadata), + VecLang::Reg(_) => new___reg_to_llvm(egg_node, translation_metadata), + VecLang::Num(n) => new___num_to_llvm(n, translation_metadata), + VecLang::LitVec(boxed_ids) | VecLang::Vec(boxed_ids) | VecLang::List(boxed_ids) => { + new___vec_to_llvm(*boxed_ids, translation_metadata) + } + VecLang::VecAdd([l, r]) + | VecLang::VecMinus([l, r]) + | VecLang::VecMul([l, r]) + | VecLang::VecDiv([l, r]) + | VecLang::Add([l, r]) + | VecLang::Minus([l, r]) + | VecLang::Mul([l, r]) + | VecLang::Div([l, r]) + | VecLang::Or([l, r]) + | VecLang::And([l, r]) + | VecLang::Lt([l, r]) => new___binop_to_llvm(egg_node, l, r, translation_metadata), + VecLang::Concat([v1, v2]) => new___concat_to_llvm(v1, v2, translation_metadata), + VecLang::VecMAC([acc, v1, v2]) => new___mac_to_llvm(acc, v1, v2, translation_metadata), + + + // TODO: VecNeg, VecSqrt, VecSgn all have not been tested, need test cases. + // TODO: LLVM actually supports many more vector intrinsics, including + // vector sine/cosine instructions for floats. + VecLang::VecNeg([v]) => { + let neg_vector = translate_egg( + &vec[usize::from(*v)], + vec, + gep_map, + store_map, + symbol_map, + llvm_arg_pairs, + node_to_arg_pair, + builder, + context, + module, + ); + LLVMBuildFNeg(builder, neg_vector, b"\0".as_ptr() as *const _) + } + VecLang::VecSqrt([v]) => { + let sqrt_vec = translate_egg( + &vec[usize::from(*v)], + vec, + gep_map, + store_map, + symbol_map, + llvm_arg_pairs, + node_to_arg_pair, + builder, + context, + module, + ); + let vec_type = LLVMTypeOf(sqrt_vec); + let param_types = [vec_type].as_mut_ptr(); + let fn_type = LLVMFunctionType(vec_type, param_types, 1, 0 as i32); + let func = LLVMAddFunction(module, b"llvm.sqrt.f32\0".as_ptr() as *const _, fn_type); + let args = [sqrt_vec].as_mut_ptr(); + LLVMBuildCall(builder, func, args, 1, b"\0".as_ptr() as *const _) + } + // compliant with c++ LibMath copysign function, which differs with sgn at x = 0. + VecLang::VecSgn([v]) => { + let sgn_vec = translate_egg( + &vec[usize::from(*v)], + vec, + gep_map, + store_map, + symbol_map, + llvm_arg_pairs, + node_to_arg_pair, + builder, + context, + module, + ); + let vec_type = LLVMTypeOf(sgn_vec); + let vec_size = LLVMGetVectorSize(vec_type); + let mut ones = Vec::new(); + for _ in 0..vec_size { + ones.push(LLVMConstReal(LLVMFloatTypeInContext(context), 1 as f64)); + } + let ones_ptr = ones.as_mut_ptr(); + let ones_vector = LLVMConstVector(ones_ptr, vec_size); + let param_types = [vec_type, vec_type].as_mut_ptr(); + let fn_type = LLVMFunctionType(vec_type, param_types, 2, 0 as i32); + let func = LLVMAddFunction(module, b"llvm.copysign.f32\0".as_ptr() as *const _, fn_type); + let args = [ones_vector, sgn_vec].as_mut_ptr(); + LLVMBuildCall(builder, func, args, 2, b"\0".as_ptr() as *const _) + } + VecLang::Sgn([n]) | VecLang::Sqrt([n]) | VecLang::Neg([n]) => { + let mut number = translate_egg( + &vec[usize::from(*n)], + vec, + gep_map, + store_map, + symbol_map, + llvm_arg_pairs, + node_to_arg_pair, + builder, + context, + module, + ); + if isa_integertype(number) { + number = LLVMBuildBitCast( + builder, + number, + LLVMFloatTypeInContext(context), + b"\0".as_ptr() as *const _, + ); + } + translate_unop( + enode, + number, + builder, + context, + module, + b"\0".as_ptr() as *const _, + ) + } + } } // ---- Construction Zone ------- From 40d5bab231f0cef8f9b4e380ba0c4bf9450f9c78 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 27 Apr 2022 04:51:02 -0400 Subject: [PATCH 075/143] rewrote entire egg to llvm recursive func --- src/dios-egraphs/Diospyros/src/lib.rs | 733 ++++---------------------- 1 file changed, 107 insertions(+), 626 deletions(-) diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 7d1218aa..9523d4ef 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -222,7 +222,7 @@ pub fn optimize( } let mut node_to_arg = Vec::new(); let (expr, gep_map, store_map, symbol_map) = - llvm_to_egg(llvm_instrs, &mut llvm_arg_pairs, &mut node_to_arg); + llvm_to_egg_main(llvm_instrs, &mut llvm_arg_pairs, &mut node_to_arg); // optimization pass if print_opt { @@ -238,7 +238,7 @@ pub fn optimize( } // egg to llvm - egg_to_llvm( + egg_to_llvm_main( best, &gep_map, &store_map, @@ -583,9 +583,9 @@ unsafe fn bop_to_egg( let left = LLVMGetOperand(llvm_instr, 0); let right = LLVMGetOperand(llvm_instr, 1); let (left_egg_nodes, left_next_idx) = - ref_to_egg(left, egg_nodes, next_node_idx, translation_metadata); + llvm_to_egg(left, egg_nodes, next_node_idx, translation_metadata); let (mut right_egg_nodes, right_next_idx) = - ref_to_egg(right, left_egg_nodes, left_next_idx, translation_metadata); + llvm_to_egg(right, left_egg_nodes, left_next_idx, translation_metadata); let ids = [ Id::from((left_next_idx - 1) as usize), Id::from((right_next_idx - 1) as usize), @@ -606,7 +606,7 @@ unsafe fn unop_to_egg( assert!(isa_supported_unop(llvm_instr)); let neg_expr = LLVMGetOperand(llvm_instr, 0); let (mut new_egg_nodes, new_next_idx) = - ref_to_egg(neg_expr, egg_nodes, next_node_idx, translation_metadata); + llvm_to_egg(neg_expr, egg_nodes, next_node_idx, translation_metadata); let id = Id::from((new_next_idx - 1) as usize); new_egg_nodes.push(choose_unop(&llvm_instr, id)); (new_egg_nodes, new_next_idx + 1) @@ -635,7 +635,7 @@ unsafe fn sqrt32_to_egg( assert!(isa_sqrt32(llvm_instr)); let operand = LLVMGetOperand(llvm_instr, 0); let (mut new_enode_vec, new_next_node_idx) = - ref_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); + llvm_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); let sqrt_node = VecLang::Sqrt([Id::from((new_next_node_idx - 1) as usize)]); new_enode_vec.push(sqrt_node); (new_enode_vec, new_next_node_idx + 1) @@ -664,7 +664,7 @@ unsafe fn unhandled_opcode_to_egg( /// /// TODO: Take care of chunk boundaries: translation should never overreach a chunk /// TODO: May need to keep track of llvm instructions across chunks -unsafe fn ref_to_egg( +unsafe fn llvm_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, next_node_idx: u32, @@ -712,7 +712,7 @@ unsafe fn ref_to_egg( }; } -unsafe fn start_translating_ref_to_egg( +unsafe fn start_translating_llvm_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, next_node_idx: u32, @@ -722,7 +722,7 @@ unsafe fn start_translating_ref_to_egg( translation_metadata .start_ids .insert(Id::from(next_node_idx as usize)); - return ref_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); + return llvm_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); } unsafe fn can_start_translation_instr(llvm_instr: LLVMValueRef) -> bool { @@ -738,7 +738,7 @@ unsafe fn can_start_translation_instr(llvm_instr: LLVMValueRef) -> bool { }; } -unsafe fn llvm_to_egg( +unsafe fn llvm_to_egg_main( llvm_instrs_in_chunk: &[LLVMValueRef], restricted_instrs: &[LLVMValueRef], vectorize: bool, @@ -786,7 +786,7 @@ unsafe fn llvm_to_egg( for llvm_instr in llvm_instrs_in_chunk.iter() { if can_start_translation_instr(*llvm_instr) { let (new_egg_nodes, new_next_node_idx) = - start_translating_ref_to_egg(*llvm_instr, egg_nodes, next_node_idx, translation_metadata); + start_translating_llvm_to_egg(*llvm_instr, egg_nodes, next_node_idx, translation_metadata); egg_nodes = new_egg_nodes; next_node_idx = new_next_node_idx; } @@ -809,33 +809,27 @@ unsafe fn llvm_to_egg( // ---- Construction Zone ------- /// Egg2LLVMState represent the state needed to translate from Egg to LLVM -struct Egg2LLVMState { +struct Egg2LLVMState<'a> { llvm2egg_metadata: LLVM2EggState, - egg_nodes_vector: [VecLang], + egg_nodes_vector: &'a [VecLang], builder: LLVMBuilderRef, context: LLVMContextRef, module: LLVMModuleRef, } -unsafe fn new___arg_to_llvm( - egg_node: &VecLang, - translation_metadata: Egg2LLVMState, -) -> LLVMValueRef { +unsafe fn arg_to_llvm(egg_node: &VecLang, translation_metadata: Egg2LLVMState) -> LLVMValueRef { panic!("Unimplemented"); } -unsafe fn new___reg_to_llvm( - egg_node: &VecLang, - translation_metadata: Egg2LLVMState, -) -> LLVMValueRef { +unsafe fn reg_to_llvm(egg_node: &VecLang, translation_metadata: Egg2LLVMState) -> LLVMValueRef { panic!("Unimplemented"); } -unsafe fn new___num_to_llvm(n: &i32, md: Egg2LLVMState) -> LLVMValueRef { +unsafe fn num_to_llvm(n: &i32, md: Egg2LLVMState) -> LLVMValueRef { LLVMConstReal(LLVMFloatTypeInContext(md.context), *n as f64) } -unsafe fn new___vec_to_llvm(boxed_ids: Box<[Id]>, md: Egg2LLVMState) -> LLVMValueRef { +unsafe fn vec_to_llvm(boxed_ids: Box<[Id]>, md: Egg2LLVMState) -> LLVMValueRef { // Convert the Boxed Ids to a Vector, and generate a vector of zeros let idvec = boxed_ids.to_vec(); let idvec_len = idvec.len(); @@ -849,7 +843,7 @@ unsafe fn new___vec_to_llvm(boxed_ids: Box<[Id]>, md: Egg2LLVMState) -> LLVMValu let mut vector = LLVMConstVector(zeros_ptr, idvec.len() as u32); for (idx, &eggid) in idvec.iter().enumerate() { let elt = &md.egg_nodes_vector[usize::from(eggid)]; - let mut elt_val = new___egg_to_llvm(elt, md); + let mut elt_val = egg_to_llvm(elt, md); // TODO: Can We Eliminate this BitCast in the future?? // With the new formulation, will we ever have an integer type? // Check if the elt is an int @@ -875,14 +869,14 @@ unsafe fn new___vec_to_llvm(boxed_ids: Box<[Id]>, md: Egg2LLVMState) -> LLVMValu } // TODO: Segregate Vec and Scalar Binops? -unsafe fn new___binop_to_llvm( +unsafe fn binop_to_llvm( binop_node: &VecLang, left_id: &Id, right_id: &Id, md: Egg2LLVMState, ) -> LLVMValueRef { - let left = new___egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_id)], md); - let right = new___egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_id)], md); + let left = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_id)], md); + let right = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_id)], md); // TODO: Can We Remove these Casts? let left = if LLVMTypeOf(left) == LLVMIntTypeInContext(md.context, 32) { @@ -959,14 +953,10 @@ unsafe fn new___binop_to_llvm( } } -unsafe fn new___concat_to_llvm( - left_vector: &Id, - right_vector: &Id, - md: Egg2LLVMState, -) -> LLVMValueRef { +unsafe fn concat_to_llvm(left_vector: &Id, right_vector: &Id, md: Egg2LLVMState) -> LLVMValueRef { { - let trans_v1 = new___egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_vector)], md); - let mut trans_v2 = new___egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_vector)], md); + let trans_v1 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_vector)], md); + let mut trans_v2 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_vector)], md); // In LLVM, it turns out all vectors need to be length power of 2 // if the 2 vectors are not the same size, double the length of the smaller vector by padding with 0's in it @@ -1030,15 +1020,15 @@ unsafe fn new___concat_to_llvm( } } -unsafe fn new___mac_to_llvm( +unsafe fn mac_to_llvm( accumulator_vector: &Id, left_prod_vector: &Id, right_prod_vector: &Id, md: Egg2LLVMState, ) -> LLVMValueRef { - let trans_acc = new___egg_to_llvm(&md.egg_nodes_vector[usize::from(*accumulator_vector)], md); - let trans_v1 = new___egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_prod_vector)], md); - let trans_v2 = new___egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_prod_vector)], md); + let trans_acc = egg_to_llvm(&md.egg_nodes_vector[usize::from(*accumulator_vector)], md); + let trans_v1 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_prod_vector)], md); + let trans_v2 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_prod_vector)], md); let vec_type = LLVMTypeOf(trans_acc); let param_types = [vec_type, vec_type, vec_type].as_mut_ptr(); let fn_type = LLVMFunctionType(vec_type, param_types, 3, 0 as i32); @@ -1047,10 +1037,66 @@ unsafe fn new___mac_to_llvm( LLVMBuildCall(md.builder, func, args, 3, b"\0".as_ptr() as *const _) } -unsafe fn new___egg_to_llvm( - egg_node: &VecLang, - translation_metadata: Egg2LLVMState, -) -> LLVMValueRef { +unsafe fn scalar_unop_to_llvm(n: &Id, unop_node: &VecLang, md: Egg2LLVMState) -> LLVMValueRef { + let mut number = egg_to_llvm(&md.egg_nodes_vector[usize::from(*n)], md); + if isa_integertype(number) { + number = LLVMBuildBitCast( + md.builder, + number, + LLVMFloatTypeInContext(md.context), + b"\0".as_ptr() as *const _, + ); + } + translate_unop( + unop_node, + number, + md.builder, + md.context, + md.module, + b"\0".as_ptr() as *const _, + ) +} + +unsafe fn vecneg_to_llvm(vec: &Id, md: Egg2LLVMState) -> LLVMValueRef { + let neg_vector = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); + LLVMBuildFNeg(md.builder, neg_vector, b"\0".as_ptr() as *const _) +} + +unsafe fn vecsqrt_to_llvm(vec: &Id, md: Egg2LLVMState) -> LLVMValueRef { + let sqrt_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); + let vec_type = LLVMTypeOf(sqrt_vec); + let param_types = [vec_type].as_mut_ptr(); + let fn_type = LLVMFunctionType(vec_type, param_types, 1, 0 as i32); + let func = LLVMAddFunction(md.module, b"llvm.sqrt.f32\0".as_ptr() as *const _, fn_type); + let args = [sqrt_vec].as_mut_ptr(); + LLVMBuildCall(md.builder, func, args, 1, b"\0".as_ptr() as *const _) +} + +unsafe fn vecsgn_to_llvm(vec: &Id, md: Egg2LLVMState) -> LLVMValueRef { + let sgn_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); + let vec_type = LLVMTypeOf(sgn_vec); + let vec_size = LLVMGetVectorSize(vec_type); + let mut ones = Vec::new(); + for _ in 0..vec_size { + ones.push(LLVMConstReal(LLVMFloatTypeInContext(md.context), 1 as f64)); + } + let ones_ptr = ones.as_mut_ptr(); + let ones_vector = LLVMConstVector(ones_ptr, vec_size); + let param_types = [vec_type, vec_type].as_mut_ptr(); + let fn_type = LLVMFunctionType(vec_type, param_types, 2, 0 as i32); + let func = LLVMAddFunction( + md.module, + b"llvm.copysign.f32\0".as_ptr() as *const _, + fn_type, + ); + let args = [ones_vector, sgn_vec].as_mut_ptr(); + LLVMBuildCall(md.builder, func, args, 2, b"\0".as_ptr() as *const _) +} + +/// Egg To LLVM Dispatches translation of VecLanf Egg Nodes to LLVMValueRegs +/// +/// Side Effect: Builds and Insert LLVM instructions +unsafe fn egg_to_llvm(egg_node: &VecLang, translation_metadata: Egg2LLVMState) -> LLVMValueRef { match egg_node { VecLang::Symbol(..) => { panic!("Symbol was found. Egg to LLVM Translation does not handle symbol nodes.") @@ -1059,12 +1105,16 @@ unsafe fn new___egg_to_llvm( panic!("Get was found. Egg to LLVM Translation does not handle get nodes.") } VecLang::Ite(..) => panic!("Ite was found. Egg to LLVM Translation does not handle ite nodes."), + VecLang::Or([l, r]) => panic!("Or was found. Egg to LLVM Translation does not handle or nodes."), + VecLang::And([l, r]) => panic!("And was found. Egg to LLVM Translation does not handle and nodes."), + VecLang::Lt([l, r]) => panic!("Lt was found. Egg to LLVM Translation does not handle lt nodes."), VecLang::Sgn(..) => panic!("Sgn was found. Egg to LLVM Translation does not handle sgn nodes. TODO: In the future, tis node will be handled alongside sqrt and neg scalar nodes."), - VecLang::Arg(_) => new___arg_to_llvm(egg_node, translation_metadata), - VecLang::Reg(_) => new___reg_to_llvm(egg_node, translation_metadata), - VecLang::Num(n) => new___num_to_llvm(n, translation_metadata), + VecLang::VecSgn(..) => panic!("VecSgn was found. Egg to LLVM Translation does not handle vecsgn nodes. TODO: In the future, this node will be handled alongside VecSqrt and VecNeg vector nodes."), + VecLang::Arg(_) => arg_to_llvm(egg_node, translation_metadata), + VecLang::Reg(_) => reg_to_llvm(egg_node, translation_metadata), + VecLang::Num(n) => num_to_llvm(n, translation_metadata), VecLang::LitVec(boxed_ids) | VecLang::Vec(boxed_ids) | VecLang::List(boxed_ids) => { - new___vec_to_llvm(*boxed_ids, translation_metadata) + vec_to_llvm(*boxed_ids, translation_metadata) } VecLang::VecAdd([l, r]) | VecLang::VecMinus([l, r]) @@ -1073,593 +1123,24 @@ unsafe fn new___egg_to_llvm( | VecLang::Add([l, r]) | VecLang::Minus([l, r]) | VecLang::Mul([l, r]) - | VecLang::Div([l, r]) - | VecLang::Or([l, r]) - | VecLang::And([l, r]) - | VecLang::Lt([l, r]) => new___binop_to_llvm(egg_node, l, r, translation_metadata), - VecLang::Concat([v1, v2]) => new___concat_to_llvm(v1, v2, translation_metadata), - VecLang::VecMAC([acc, v1, v2]) => new___mac_to_llvm(acc, v1, v2, translation_metadata), - - + | VecLang::Div([l, r]) => binop_to_llvm(egg_node, l, r, translation_metadata), + VecLang::Concat([v1, v2]) => concat_to_llvm(v1, v2, translation_metadata), + VecLang::VecMAC([acc, v1, v2]) => mac_to_llvm(acc, v1, v2, translation_metadata), + + // TODO: VecNeg, VecSqrt, VecSgn all have not been tested, need test cases. // TODO: LLVM actually supports many more vector intrinsics, including // vector sine/cosine instructions for floats. - VecLang::VecNeg([v]) => { - let neg_vector = translate_egg( - &vec[usize::from(*v)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - LLVMBuildFNeg(builder, neg_vector, b"\0".as_ptr() as *const _) - } - VecLang::VecSqrt([v]) => { - let sqrt_vec = translate_egg( - &vec[usize::from(*v)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let vec_type = LLVMTypeOf(sqrt_vec); - let param_types = [vec_type].as_mut_ptr(); - let fn_type = LLVMFunctionType(vec_type, param_types, 1, 0 as i32); - let func = LLVMAddFunction(module, b"llvm.sqrt.f32\0".as_ptr() as *const _, fn_type); - let args = [sqrt_vec].as_mut_ptr(); - LLVMBuildCall(builder, func, args, 1, b"\0".as_ptr() as *const _) - } - // compliant with c++ LibMath copysign function, which differs with sgn at x = 0. - VecLang::VecSgn([v]) => { - let sgn_vec = translate_egg( - &vec[usize::from(*v)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let vec_type = LLVMTypeOf(sgn_vec); - let vec_size = LLVMGetVectorSize(vec_type); - let mut ones = Vec::new(); - for _ in 0..vec_size { - ones.push(LLVMConstReal(LLVMFloatTypeInContext(context), 1 as f64)); - } - let ones_ptr = ones.as_mut_ptr(); - let ones_vector = LLVMConstVector(ones_ptr, vec_size); - let param_types = [vec_type, vec_type].as_mut_ptr(); - let fn_type = LLVMFunctionType(vec_type, param_types, 2, 0 as i32); - let func = LLVMAddFunction(module, b"llvm.copysign.f32\0".as_ptr() as *const _, fn_type); - let args = [ones_vector, sgn_vec].as_mut_ptr(); - LLVMBuildCall(builder, func, args, 2, b"\0".as_ptr() as *const _) - } - VecLang::Sgn([n]) | VecLang::Sqrt([n]) | VecLang::Neg([n]) => { - let mut number = translate_egg( - &vec[usize::from(*n)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - if isa_integertype(number) { - number = LLVMBuildBitCast( - builder, - number, - LLVMFloatTypeInContext(context), - b"\0".as_ptr() as *const _, - ); - } - translate_unop( - enode, - number, - builder, - context, - module, - b"\0".as_ptr() as *const _, - ) - } + VecLang::VecNeg([v]) => vecneg_to_llvm(v, translation_metadata), + VecLang::VecSqrt([v]) => vecsqrt_to_llvm(v, translation_metadata), + // VecSgn compliant with c++ LibMath copysign function, which differs with sgn at x = 0. + VecLang::VecSgn([v]) => vecsgn_to_llvm(v, translation_metadata), + VecLang::Sgn([n]) | VecLang::Sqrt([n]) | VecLang::Neg([n]) => scalar_unop_to_llvm(n, egg_node, translation_metadata), } } // ---- Construction Zone ------- -unsafe fn translate_egg( - enode: &VecLang, - vec: &[VecLang], - gep_map: &GEPMap, - store_map: &StoreMap, - symbol_map: &SymbolMap, - llvm_arg_pairs: &mut LLVMPairMap, - node_to_arg_pair: &Vec, - builder: LLVMBuilderRef, - context: LLVMContextRef, - module: LLVMModuleRef, -) -> LLVMValueRef { - let instr = match enode { - // VecLang::RegInfo(_) => panic!("RegInfo Currently Not Handled"), - VecLang::Reg(_) => panic!("Reg Currently Not Handled"), - VecLang::Symbol(symbol) => match symbol_map.get(enode) { - Some(llvm_instr) => llvm_recursive_add(builder, *llvm_instr, context, llvm_arg_pairs), - None => { - let mut matched = false; - let mut ret_value = LLVMBuildAdd( - builder, - LLVMConstReal(LLVMFloatTypeInContext(context), 0 as f64), - LLVMConstReal(LLVMFloatTypeInContext(context), 0 as f64), - b"nop\0".as_ptr() as *const _, - ); - for node_arg_pair in node_to_arg_pair { - let llvm_node = node_arg_pair.arg; - let node_index = node_arg_pair.node_int; - let string_node_index = node_index.to_string(); - if string_node_index.parse::().unwrap() == *symbol { - for (original_val, new_val) in (&mut *llvm_arg_pairs).iter() { - if cmp_val_ref_address(&**original_val, &*llvm_node) { - matched = true; - ret_value = *new_val; - break; - } - } - } - if matched { - break; - } - } - if matched { - ret_value - } else { - panic!("No Match in Node Arg Pair List.") - } - } - }, - VecLang::Num(n) => LLVMConstReal(LLVMFloatTypeInContext(context), *n as f64), - VecLang::Get(..) => { - let (array_name, array_offsets) = translate_get(enode, vec); - let gep_value = gep_map - .get(&(array_name, array_offsets)) - .expect("Symbol map lookup error: Cannot Find GEP"); - let load_value = if isa_load(*gep_value) { - let mut matched = false; - let mut matched_expr = *gep_value; - for (original_val, new_val) in (&*llvm_arg_pairs).iter() { - if cmp_val_ref_address(&**original_val, &**gep_value) { - matched = true; - matched_expr = *new_val; - break; - } - } - if matched { - matched_expr - } else { - let addr = LLVMGetOperand(*gep_value, 0); - let new_gep = llvm_recursive_add(builder, addr, context, llvm_arg_pairs); - let new_load = LLVMBuildLoad(builder, new_gep, b"\0".as_ptr() as *const _); - llvm_arg_pairs.insert(*gep_value, new_load); - new_load - } - } else if isa_gep(*gep_value) { - let new_gep = llvm_recursive_add(builder, *gep_value, context, llvm_arg_pairs); - LLVMBuildLoad(builder, new_gep, b"\0".as_ptr() as *const _) - } else if isa_bitcast(*gep_value) { - // TODO: DO NOT REGERATE CALLS. THESE SHOULD BE CACHED!!. e.g. a CALLOC - let mut new_bitcast = llvm_recursive_add(builder, *gep_value, context, llvm_arg_pairs); - if !isa_floatptr(new_bitcast) { - let addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(new_bitcast)); - let new_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(context), addr_space); - new_bitcast = LLVMBuildBitCast( - builder, - new_bitcast, - new_ptr_type, - b"\0".as_ptr() as *const _, - ); - } - LLVMBuildLoad(builder, new_bitcast, b"\0".as_ptr() as *const _) - } else if isa_sitofp(*gep_value) { - let new_sitofp = llvm_recursive_add(builder, *gep_value, context, llvm_arg_pairs); - new_sitofp - } else if isa_argument(*gep_value) { - let new_load_value = LLVMBuildLoad(builder, *gep_value, b"\0".as_ptr() as *const _); - new_load_value - } else { - // includes isa_alloca case - let mut matched = false; - let mut matched_expr = *gep_value; - for (original_val, new_val) in (&*llvm_arg_pairs).iter() { - if cmp_val_ref_address(&**original_val, &**gep_value) { - matched = true; - matched_expr = *new_val; - break; - } - } - if matched { - matched_expr - } else { - let new_load_value = LLVMBuildLoad(builder, *gep_value, b"\0".as_ptr() as *const _); - // assert!(isa_load(*gep_value) || isa_alloca(*gep_value)); - // assert!(isa_load(new_load_value) || isa_alloca(new_load_value)); - llvm_arg_pairs.insert(*gep_value, new_load_value); - new_load_value - } - }; - // let mut matched = false; - // for (original_val, _) in (&*llvm_arg_pairs).iter() { - // if cmp_val_ref_address(&**original_val, &**gep_value) { - // matched = true; - // break; - // } - // } - // if !matched { - // llvm_arg_pairs.insert(*gep_value, load_value); - // } - load_value - } - VecLang::LitVec(boxed_ids) | VecLang::Vec(boxed_ids) | VecLang::List(boxed_ids) => { - let idvec = boxed_ids.to_vec(); - let idvec_len = idvec.len(); - let mut zeros = Vec::new(); - for _ in 0..idvec_len { - zeros.push(LLVMConstReal(LLVMFloatTypeInContext(context), 0 as f64)); - } - let zeros_ptr = zeros.as_mut_ptr(); - let mut vector = LLVMConstVector(zeros_ptr, idvec.len() as u32); - for (idx, &eggid) in idvec.iter().enumerate() { - let elt = &vec[usize::from(eggid)]; - let mut elt_val = translate_egg( - elt, - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - // check if the elt is an int - if isa_integertype(elt_val) { - elt_val = LLVMBuildBitCast( - builder, - elt_val, - LLVMFloatTypeInContext(context), - b"\0".as_ptr() as *const _, - ); - } - vector = LLVMBuildInsertElement( - builder, - vector, - elt_val, - LLVMConstInt(LLVMIntTypeInContext(context, 32), idx as u64, 0), - b"\0".as_ptr() as *const _, - ); - } - vector - } - VecLang::VecAdd([l, r]) - | VecLang::VecMinus([l, r]) - | VecLang::VecMul([l, r]) - | VecLang::VecDiv([l, r]) - | VecLang::Add([l, r]) - | VecLang::Minus([l, r]) - | VecLang::Mul([l, r]) - | VecLang::Div([l, r]) - | VecLang::Or([l, r]) - | VecLang::And([l, r]) - | VecLang::Lt([l, r]) => { - let left = translate_egg( - &vec[usize::from(*l)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let right = translate_egg( - &vec[usize::from(*r)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let left = if LLVMTypeOf(left) == LLVMIntTypeInContext(context, 32) { - LLVMBuildBitCast( - builder, - left, - LLVMFloatTypeInContext(context), - b"\0".as_ptr() as *const _, - ) - } else { - left - }; - let right = if LLVMTypeOf(right) == LLVMIntTypeInContext(context, 32) { - LLVMBuildBitCast( - builder, - right, - LLVMFloatTypeInContext(context), - b"\0".as_ptr() as *const _, - ) - } else { - right - }; - if isa_constfp(left) - && !isa_constaggregatezero(left) - && isa_constfp(right) - && !isa_constaggregatezero(right) - { - let mut loses_info = 1; - let nright = LLVMConstRealGetDouble(right, &mut loses_info); - let new_right = build_constant_float(nright, context); - let nleft = LLVMConstRealGetDouble(left, &mut loses_info); - let new_left = build_constant_float(nleft, context); - translate_binop( - enode, - new_left, - new_right, - builder, - b"\0".as_ptr() as *const _, - ) - } else if isa_constfp(right) && !isa_constaggregatezero(right) { - let mut loses_info = 1; - let n = LLVMConstRealGetDouble(right, &mut loses_info); - let new_right = build_constant_float(n, context); - translate_binop(enode, left, new_right, builder, b"\0".as_ptr() as *const _) - } else if isa_constfp(left) && !isa_constaggregatezero(left) { - let mut loses_info = 1; - let n = LLVMConstRealGetDouble(left, &mut loses_info); - let new_left = build_constant_float(n, context); - translate_binop(enode, new_left, right, builder, b"\0".as_ptr() as *const _) - } else { - translate_binop(enode, left, right, builder, b"\0".as_ptr() as *const _) - } - } - VecLang::Concat([v1, v2]) => { - let trans_v1 = translate_egg( - &vec[usize::from(*v1)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let mut trans_v2 = translate_egg( - &vec[usize::from(*v2)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - // it turns out all vectors need to be length power of 2 - // if the 2 vectors are not the same size, double the length of the smaller vector by padding with 0's in it - // manually concatenate 2 vectors by using a LLVM shuffle operation. - let v1_type = LLVMTypeOf(trans_v1); - let v1_size = LLVMGetVectorSize(v1_type); - let v2_type = LLVMTypeOf(trans_v2); - let v2_size = LLVMGetVectorSize(v2_type); - - // HACKY FIX FOR NOW - // assume both v1 and v2 are pow of 2 size - // assume v2 size smaller or equal to v1 size - // assume v2 is 1/2 size of v1 - if v1_size != v2_size { - // replicate v2 size - let mut zeros = Vec::new(); - for _ in 0..v2_size { - zeros.push(LLVMConstReal(LLVMFloatTypeInContext(context), 0 as f64)); - } - let zeros_ptr = zeros.as_mut_ptr(); - let zeros_vector = LLVMConstVector(zeros_ptr, v2_size); - let size = 2 * v2_size; - let mut indices = Vec::new(); - for i in 0..size { - indices.push(LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0)); - } - let mask = indices.as_mut_ptr(); - let mask_vector = LLVMConstVector(mask, size); - trans_v2 = LLVMBuildShuffleVector( - builder, - trans_v2, - zeros_vector, - mask_vector, - b"\0".as_ptr() as *const _, - ); - } - let size = v1_size + v2_size; - let mut indices = Vec::new(); - for i in 0..size { - indices.push(LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0)); - } - let mask = indices.as_mut_ptr(); - let mask_vector = LLVMConstVector(mask, size); - LLVMBuildShuffleVector( - builder, - trans_v1, - trans_v2, - mask_vector, - b"\0".as_ptr() as *const _, - ) - } - VecLang::VecMAC([acc, v1, v2]) => { - let trans_acc = translate_egg( - &vec[usize::from(*acc)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let trans_v1 = translate_egg( - &vec[usize::from(*v1)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let trans_v2 = translate_egg( - &vec[usize::from(*v2)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let vec_type = LLVMTypeOf(trans_acc); - let param_types = [vec_type, vec_type, vec_type].as_mut_ptr(); - let fn_type = LLVMFunctionType(vec_type, param_types, 3, 0 as i32); - let func = LLVMAddFunction(module, b"llvm.fma.f32\0".as_ptr() as *const _, fn_type); - let args = [trans_v1, trans_v2, trans_acc].as_mut_ptr(); - LLVMBuildCall(builder, func, args, 3, b"\0".as_ptr() as *const _) - } - // TODO: VecNeg, VecSqrt, VecSgn all have not been tested, need test cases. - // TODO: LLVM actually supports many more vector intrinsics, including - // vector sine/cosine instructions for floats. - VecLang::VecNeg([v]) => { - let neg_vector = translate_egg( - &vec[usize::from(*v)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - LLVMBuildFNeg(builder, neg_vector, b"\0".as_ptr() as *const _) - } - VecLang::VecSqrt([v]) => { - let sqrt_vec = translate_egg( - &vec[usize::from(*v)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let vec_type = LLVMTypeOf(sqrt_vec); - let param_types = [vec_type].as_mut_ptr(); - let fn_type = LLVMFunctionType(vec_type, param_types, 1, 0 as i32); - let func = LLVMAddFunction(module, b"llvm.sqrt.f32\0".as_ptr() as *const _, fn_type); - let args = [sqrt_vec].as_mut_ptr(); - LLVMBuildCall(builder, func, args, 1, b"\0".as_ptr() as *const _) - } - // compliant with c++ LibMath copysign function, which differs with sgn at x = 0. - VecLang::VecSgn([v]) => { - let sgn_vec = translate_egg( - &vec[usize::from(*v)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let vec_type = LLVMTypeOf(sgn_vec); - let vec_size = LLVMGetVectorSize(vec_type); - let mut ones = Vec::new(); - for _ in 0..vec_size { - ones.push(LLVMConstReal(LLVMFloatTypeInContext(context), 1 as f64)); - } - let ones_ptr = ones.as_mut_ptr(); - let ones_vector = LLVMConstVector(ones_ptr, vec_size); - let param_types = [vec_type, vec_type].as_mut_ptr(); - let fn_type = LLVMFunctionType(vec_type, param_types, 2, 0 as i32); - let func = LLVMAddFunction(module, b"llvm.copysign.f32\0".as_ptr() as *const _, fn_type); - let args = [ones_vector, sgn_vec].as_mut_ptr(); - LLVMBuildCall(builder, func, args, 2, b"\0".as_ptr() as *const _) - } - VecLang::Sgn([n]) | VecLang::Sqrt([n]) | VecLang::Neg([n]) => { - let mut number = translate_egg( - &vec[usize::from(*n)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - if isa_integertype(number) { - number = LLVMBuildBitCast( - builder, - number, - LLVMFloatTypeInContext(context), - b"\0".as_ptr() as *const _, - ); - } - translate_unop( - enode, - number, - builder, - context, - module, - b"\0".as_ptr() as *const _, - ) - } - VecLang::Ite(..) => panic!("Ite is not handled."), - }; - return instr; -} - unsafe fn gen_type_cast( val: LLVMValueRef, typ1: LLVMTypeRef, @@ -1681,7 +1162,7 @@ unsafe fn gen_type_cast( panic!("Cannot convert between {:?} {:?}\n.", typ1, typ2); } -unsafe fn egg_to_llvm( +unsafe fn egg_to_llvm_main( expr: RecExpr, gep_map: &GEPMap, store_map: &StoreMap, From e43fb5da9a08085a86d91b9b06f56facda4c7aa7 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 27 Apr 2022 12:24:30 -0400 Subject: [PATCH 076/143] fix up arg and reg cases --- src/dios-egraphs/Diospyros/src/lib.rs | 28 +++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 9523d4ef..47be73b7 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -564,6 +564,7 @@ unsafe fn arg_to_egg( let argument_idx = gen_arg_idx(); let argument_node = VecLang::Arg(argument_idx); egg_nodes.push(argument_node); + assert!(!translation_metadata.llvm2arg.contains_key(&llvm_instr)); translation_metadata .llvm2arg .insert(llvm_instr, argument_node); @@ -654,6 +655,7 @@ unsafe fn unhandled_opcode_to_egg( let register_idx = gen_reg_idx(); let register_node = VecLang::Reg(register_idx); egg_nodes.push(register_node); + assert!(!translation_metadata.llvm2reg.contains_key(&llvm_instr)); translation_metadata .llvm2reg .insert(llvm_instr, register_node); @@ -818,11 +820,33 @@ struct Egg2LLVMState<'a> { } unsafe fn arg_to_llvm(egg_node: &VecLang, translation_metadata: Egg2LLVMState) -> LLVMValueRef { - panic!("Unimplemented"); + // TODO: Make More Efficient with BTREEMAP? + let llvm2arg = translation_metadata.llvm2egg_metadata.llvm2arg; + for (llvm_instr, arg_node) in llvm2arg.iter() { + // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. + if arg_node == egg_node { + return *llvm_instr; + } + } + panic!( + "Expected a successful lookup in llvm2arg, but cannot find Argument Egg Node: {:?}.", + egg_node + ); } unsafe fn reg_to_llvm(egg_node: &VecLang, translation_metadata: Egg2LLVMState) -> LLVMValueRef { - panic!("Unimplemented"); + // TODO: Make More Efficient with BTREEMAP? + let llvm2reg = translation_metadata.llvm2egg_metadata.llvm2reg; + for (llvm_instr, reg_node) in llvm2reg.iter() { + // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. + if reg_node == egg_node { + return *llvm_instr; + } + } + panic!( + "Expected a successful lookup in llvm2reg, but cannot find Register Egg Node: {:?}.", + egg_node + ); } unsafe fn num_to_llvm(n: &i32, md: Egg2LLVMState) -> LLVMValueRef { From f3eadd7806e6612378932f60bca5c5d9a81b3242 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sun, 1 May 2022 01:52:41 -0400 Subject: [PATCH 077/143] rust compiles --- src/dios-egraphs/Diospyros/src/lib.rs | 401 +++++++------------------- 1 file changed, 105 insertions(+), 296 deletions(-) diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 47be73b7..b56fdce9 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -12,47 +12,37 @@ use std::{ }; extern "C" { - fn llvm_index(val: LLVMValueRef, index: i32) -> i32; - fn llvm_name(val: LLVMValueRef) -> *const c_char; - fn isa_unop(val: LLVMValueRef) -> bool; - fn isa_bop(val: LLVMValueRef) -> bool; + fn _llvm_index(val: LLVMValueRef, index: i32) -> i32; + fn _llvm_name(val: LLVMValueRef) -> *const c_char; + fn _isa_unop(val: LLVMValueRef) -> bool; + fn _isa_bop(val: LLVMValueRef) -> bool; fn isa_constant(val: LLVMValueRef) -> bool; fn isa_constfp(val: LLVMValueRef) -> bool; - fn isa_gep(val: LLVMValueRef) -> bool; - fn isa_load(val: LLVMValueRef) -> bool; - fn isa_store(val: LLVMValueRef) -> bool; + fn _isa_gep(val: LLVMValueRef) -> bool; + fn _isa_load(val: LLVMValueRef) -> bool; + fn _isa_store(val: LLVMValueRef) -> bool; fn isa_argument(val: LLVMValueRef) -> bool; - fn isa_call(val: LLVMValueRef) -> bool; - fn isa_fptrunc(val: LLVMValueRef) -> bool; - fn isa_fpext(val: LLVMValueRef) -> bool; - fn isa_alloca(val: LLVMValueRef) -> bool; - fn isa_phi(val: LLVMValueRef) -> bool; + fn _isa_call(val: LLVMValueRef) -> bool; + fn _isa_fptrunc(val: LLVMValueRef) -> bool; + fn _isa_fpext(val: LLVMValueRef) -> bool; + fn _isa_alloca(val: LLVMValueRef) -> bool; + fn _isa_phi(val: LLVMValueRef) -> bool; fn _isa_sextint(val: LLVMValueRef) -> bool; - fn isa_sitofp(val: LLVMValueRef) -> bool; + fn _isa_sitofp(val: LLVMValueRef) -> bool; fn isa_constaggregatezero(val: LLVMValueRef) -> bool; fn _isa_constaggregate(val: LLVMValueRef) -> bool; fn isa_integertype(val: LLVMValueRef) -> bool; fn _isa_intptr(val: LLVMValueRef) -> bool; - fn isa_floatptr(val: LLVMValueRef) -> bool; + fn _isa_floatptr(val: LLVMValueRef) -> bool; fn _isa_floattype(val: LLVMValueRef) -> bool; - fn isa_bitcast(val: LLVMValueRef) -> bool; + fn _isa_bitcast(val: LLVMValueRef) -> bool; fn isa_sqrt32(val: LLVMValueRef) -> bool; - fn isa_sqrt64(val: LLVMValueRef) -> bool; + fn _isa_sqrt64(val: LLVMValueRef) -> bool; fn get_constant_float(val: LLVMValueRef) -> f32; fn build_constant_float(n: f64, context: LLVMContextRef) -> LLVMValueRef; } -// Note: We use BTreeMaps to enforce ordering in the map -// Without ordering, tests become flaky and start failing a lot more often -// We do not use HashMaps for this reason as ordering is not enforced. -// GEPMap : Maps the array name and array offset as symbols to the GEP -// LLVM Value Ref that LLVM Generated -type GEPMap = BTreeMap<(Symbol, Symbol), LLVMValueRef>; -type LLVMPairMap = BTreeMap; - static mut ARG_IDX: u32 = 0; -static mut CALL_IDX: i32 = 0; -static mut NODE_IDX: u32 = 0; static mut REG_IDX: u32 = 0; unsafe fn gen_arg_idx() -> u32 { @@ -65,29 +55,10 @@ unsafe fn gen_reg_idx() -> u32 { return REG_IDX; } -unsafe fn gen_node_idx() -> u32 { - NODE_IDX += 1; - return NODE_IDX; -} - -unsafe fn gen_arg_name() -> String { - ARG_IDX += 1; - let string = "ARGUMENT".to_string(); - let result = format!("{}{}", string, ARG_IDX.to_string()); - result -} - -unsafe fn gen_call_name() -> String { - CALL_IDX += 1; - let string = "CALL".to_string(); - let result = format!("{}{}", string, CALL_IDX.to_string()); - result -} - // Reference Comparison: https://www.reddit.com/r/rust/comments/2r3wjk/is_there_way_to_compare_objects_by_address_in_rust/ // Compares whether addresses of LLVMValueRefs are the same. // Not the contents of the Value Refs -fn cmp_val_ref_address(a1: &llvm::LLVMValue, a2: &llvm::LLVMValue) -> bool { +fn _cmp_val_ref_address(a1: &llvm::LLVMValue, a2: &llvm::LLVMValue) -> bool { a1 as *const _ == a2 as *const _ } @@ -161,20 +132,6 @@ unsafe fn translate_unop( } } -/// translate_get converts a VecLang Get Node to the corresponding LLVM Ir array name and -/// LLVM IR offset, as symbols. -unsafe fn translate_get(get: &VecLang, enode_vec: &[VecLang]) -> (Symbol, Symbol) { - match get { - VecLang::Get([sym, i]) => match (&enode_vec[usize::from(*sym)], &enode_vec[usize::from(*i)]) { - (VecLang::Symbol(name), VecLang::Symbol(offset)) => { - return (*name, *offset); - } - _ => panic!("Match Error: Expects Pair of Symbol, Symbol."), - }, - _ => panic!("Match Error in Translate Get: Expects Get Enode."), - } -} - /// Main function to optimize: Takes in a basic block of instructions, /// optimizes it, and then translates it to LLVM IR code, in place. @@ -220,35 +177,23 @@ pub fn optimize( // assert!(isa_load(new_value) || isa_alloca(new_value)); llvm_arg_pairs.insert(original_value, new_value); } - let mut node_to_arg = Vec::new(); - let (expr, gep_map, store_map, symbol_map) = - llvm_to_egg_main(llvm_instrs, &mut llvm_arg_pairs, &mut node_to_arg); + let (egg_expr, llvm2egg_metadata) = llvm_to_egg_main(llvm_instrs, &[], true); // optimization pass if print_opt { - eprintln!("{}", expr.pretty(10)); + eprintln!("{}", egg_expr.pretty(10)); } - let mut best = expr.clone(); + let mut best_egg_expr = egg_expr.clone(); if run_egg { - let pair = rules::run(&expr, 180, true, !run_egg); - best = pair.1; + let pair = rules::run(&egg_expr, 180, true, !run_egg); + best_egg_expr = pair.1; } if print_opt { - eprintln!("{}", best.pretty(10)); + eprintln!("{}", best_egg_expr.pretty(10)); } // egg to llvm - egg_to_llvm_main( - best, - &gep_map, - &store_map, - &symbol_map, - &mut llvm_arg_pairs, // does this work properly?, IDK? Need to return mut value - &node_to_arg, - module, - context, - builder, - ); + egg_to_llvm_main(best_egg_expr, &llvm2egg_metadata, module, context, builder); let mut final_llvm_arg_pairs = Vec::new(); for (unchanged_val, new_val) in llvm_arg_pairs.iter() { @@ -277,10 +222,6 @@ pub fn optimize( // ------------ NEW CONVERSION FROM LLVM IR TO EGG EXPRESSIONS ------- -type StoreMap = BTreeMap; -type IdMap = BTreeSet; -type SymbolMap = BTreeMap; - enum LLVMOpType { Argument, Constant, @@ -393,84 +334,9 @@ unsafe fn _llvm_recursive_print(inst: LLVMValueRef) -> () { return; } -unsafe fn llvm_recursive_add( - builder: LLVMBuilderRef, - inst: LLVMValueRef, - context: LLVMContextRef, - llvm_arg_pairs: &mut LLVMPairMap, -) -> LLVMValueRef { - let cloned_inst = LLVMInstructionClone(inst); - if isa_argument(inst) { - return inst; - } - let mut matched = false; - let mut ret_value = inst; - for (original_val, new_val) in (&*llvm_arg_pairs).iter() { - if cmp_val_ref_address(&**original_val, &*inst) { - matched = true; - ret_value = *new_val; - break; - } - } - if matched { - return ret_value; - } - if isa_constant(inst) { - return inst; - } else if isa_phi(inst) { - return inst; - } else if isa_alloca(inst) { - // We have this in the base case to stop reconstruction of allocas, - // because allocas are like loads, and should not get reconstructioned - // search the llvm_arg_pairs for allocas that were already created - let mut matched = false; - let mut ret_value = inst; - for (original_val, new_val) in (&*llvm_arg_pairs).iter() { - // let original_llvm = llvm_pair.original_value; - // let new_llvm = llvm_pair.new_value; - if cmp_val_ref_address(&**original_val, &*inst) { - matched = true; - ret_value = *new_val; - break; - } - } - if matched { - return ret_value; - } else { - // assert!(isa_load(inst) || isa_alloca(inst)); - // assert!(isa_load(cloned_inst) || isa_alloca(cloned_inst)); - llvm_arg_pairs.insert(inst, cloned_inst); - LLVMInsertIntoBuilder(builder, cloned_inst); - return cloned_inst; - } - } - let num_ops = LLVMGetNumOperands(inst); - for i in 0..num_ops { - let operand = LLVMGetOperand(inst, i as u32); - let new_operand = llvm_recursive_add(builder, operand, context, llvm_arg_pairs); - LLVMSetOperand(cloned_inst, i as u32, new_operand); - } - LLVMInsertIntoBuilder(builder, cloned_inst); - - let mut in_map = false; - for (original_inst, _) in (&*llvm_arg_pairs).iter() { - if cmp_val_ref_address(&**original_inst, &*inst) { - in_map = true; - } - } - if isa_load(inst) { - if !in_map { - // assert!(isa_load(inst) || isa_alloca(inst)); - // assert!(isa_load(cloned_inst) || isa_alloca(cloned_inst)); - llvm_arg_pairs.insert(inst, cloned_inst); - } - } - return cloned_inst; -} - unsafe fn isa_fadd(llvm_instr: LLVMValueRef) -> bool { match LLVMGetInstructionOpcode(llvm_instr) { - LLVMFadd => true, + LLVMFAdd => true, _ => false, } } @@ -544,13 +410,14 @@ unsafe fn choose_unop(unop: &LLVMValueRef, id: Id) -> VecLang { } /// LLVM2EggState Contains Egg to LLVM Translation Metadata +#[derive(Debug, Clone)] struct LLVM2EggState { llvm2reg: BTreeMap, llvm2arg: BTreeMap, instructions_in_chunk: BTreeSet, restricted_instructions: BTreeSet, start_instructions: Vec, - start_ids: BTreeSet, + start_ids: Vec, } /// Translates LLVM Arg to an Egg Argument Node @@ -558,12 +425,12 @@ unsafe fn arg_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, next_node_idx: u32, - mut translation_metadata: LLVM2EggState, + translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { assert!(isa_argument(llvm_instr)); let argument_idx = gen_arg_idx(); let argument_node = VecLang::Arg(argument_idx); - egg_nodes.push(argument_node); + egg_nodes.push(argument_node.clone()); assert!(!translation_metadata.llvm2arg.contains_key(&llvm_instr)); translation_metadata .llvm2arg @@ -576,9 +443,9 @@ unsafe fn arg_to_egg( /// Supported Binary Operators are: FAdd, FSub, FMul, FDiv unsafe fn bop_to_egg( llvm_instr: LLVMValueRef, - mut egg_nodes: Vec, + egg_nodes: Vec, next_node_idx: u32, - mut translation_metadata: LLVM2EggState, + translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { assert!(isa_supported_binop(llvm_instr)); let left = LLVMGetOperand(llvm_instr, 0); @@ -600,9 +467,9 @@ unsafe fn bop_to_egg( /// Supported Unary Operators are: FNeg unsafe fn unop_to_egg( llvm_instr: LLVMValueRef, - mut egg_nodes: Vec, + egg_nodes: Vec, next_node_idx: u32, - mut translation_metadata: LLVM2EggState, + translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { assert!(isa_supported_unop(llvm_instr)); let neg_expr = LLVMGetOperand(llvm_instr, 0); @@ -618,7 +485,7 @@ unsafe fn const_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, next_node_idx: u32, - mut translation_metadata: LLVM2EggState, + _translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { assert!(isa_constant(llvm_instr)); let value = get_constant_float(llvm_instr); @@ -629,14 +496,14 @@ unsafe fn const_to_egg( /// Translates Sqrt 32 Instruction to an Egg Square Root Node unsafe fn sqrt32_to_egg( llvm_instr: LLVMValueRef, - mut egg_nodes: Vec, + egg_nodes: Vec, next_node_idx: u32, - mut translation_metadata: LLVM2EggState, + translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { assert!(isa_sqrt32(llvm_instr)); - let operand = LLVMGetOperand(llvm_instr, 0); + let sqrt_operand = LLVMGetOperand(llvm_instr, 0); let (mut new_enode_vec, new_next_node_idx) = - llvm_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); + llvm_to_egg(sqrt_operand, egg_nodes, next_node_idx, translation_metadata); let sqrt_node = VecLang::Sqrt([Id::from((new_next_node_idx - 1) as usize)]); new_enode_vec.push(sqrt_node); (new_enode_vec, new_next_node_idx + 1) @@ -650,11 +517,11 @@ unsafe fn unhandled_opcode_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, next_node_idx: u32, - mut translation_metadata: LLVM2EggState, + translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { let register_idx = gen_reg_idx(); let register_node = VecLang::Reg(register_idx); - egg_nodes.push(register_node); + egg_nodes.push(register_node.clone()); assert!(!translation_metadata.llvm2reg.contains_key(&llvm_instr)); translation_metadata .llvm2reg @@ -670,7 +537,7 @@ unsafe fn llvm_to_egg( llvm_instr: LLVMValueRef, mut egg_nodes: Vec, next_node_idx: u32, - mut translation_metadata: LLVM2EggState, + translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { // If, on a different pass, the instruction was translated already, then // just used the egg node representing the translation @@ -679,7 +546,7 @@ unsafe fn llvm_to_egg( .llvm2reg .get(&llvm_instr) .expect("Key must exist"); - egg_nodes.push(*translated_egg_node); + egg_nodes.push(translated_egg_node.clone()); return (egg_nodes, next_node_idx + 1); } // If the current llvm instruction is a "restricted" instruction, do not translate, but make it a register @@ -716,14 +583,14 @@ unsafe fn llvm_to_egg( unsafe fn start_translating_llvm_to_egg( llvm_instr: LLVMValueRef, - mut egg_nodes: Vec, + egg_nodes: Vec, next_node_idx: u32, - mut translation_metadata: LLVM2EggState, + translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { translation_metadata.start_instructions.push(llvm_instr); translation_metadata .start_ids - .insert(Id::from(next_node_idx as usize)); + .push(Id::from(next_node_idx as usize)); return llvm_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); } @@ -743,7 +610,7 @@ unsafe fn can_start_translation_instr(llvm_instr: LLVMValueRef) -> bool { unsafe fn llvm_to_egg_main( llvm_instrs_in_chunk: &[LLVMValueRef], restricted_instrs: &[LLVMValueRef], - vectorize: bool, + _vectorize: bool, // TODO: feed this in as an argument llvm_instr2egg_node: BTreeMap, ) -> (RecExpr, LLVM2EggState) { let mut egg_nodes: Vec = Vec::new(); @@ -757,18 +624,18 @@ unsafe fn llvm_to_egg_main( let start_instructions: Vec = Vec::new(); // Ordered Set of Instructions in Chunk - let instructions_in_chunk: BTreeSet = BTreeSet::new(); + let mut instructions_in_chunk: BTreeSet = BTreeSet::new(); for llvm_instr in llvm_instrs_in_chunk.iter() { instructions_in_chunk.insert(*llvm_instr); } // Ordered Set of Ids - let start_ids: BTreeSet = BTreeSet::new(); + let start_ids: Vec = Vec::new(); // Ordered Set of Instructions NOT TO BE Translated, except as registers - let restricted_instrs: BTreeSet = BTreeSet::new(); - for llvm_instr in llvm_instrs_in_chunk.iter() { - restricted_instrs.insert(*llvm_instr); + let mut restricted_instrs_set: BTreeSet = BTreeSet::new(); + for llvm_instr in restricted_instrs.iter() { + restricted_instrs_set.insert(*llvm_instr); } // State Variable To Hold Maps During Translation @@ -776,7 +643,7 @@ unsafe fn llvm_to_egg_main( llvm2reg: llvm_instr2reg_node, llvm2arg: llvm_instr2arg_node, instructions_in_chunk: instructions_in_chunk, - restricted_instructions: restricted_instrs, + restricted_instructions: restricted_instrs_set, start_instructions: start_instructions, start_ids: start_ids, }; @@ -787,8 +654,12 @@ unsafe fn llvm_to_egg_main( // for each store, iterate backwards from that store and translate to egg for llvm_instr in llvm_instrs_in_chunk.iter() { if can_start_translation_instr(*llvm_instr) { - let (new_egg_nodes, new_next_node_idx) = - start_translating_llvm_to_egg(*llvm_instr, egg_nodes, next_node_idx, translation_metadata); + let (new_egg_nodes, new_next_node_idx) = start_translating_llvm_to_egg( + *llvm_instr, + egg_nodes, + next_node_idx, + &mut translation_metadata, + ); egg_nodes = new_egg_nodes; next_node_idx = new_next_node_idx; } @@ -808,8 +679,6 @@ unsafe fn llvm_to_egg_main( return (rec_expr, translation_metadata); } -// ---- Construction Zone ------- - /// Egg2LLVMState represent the state needed to translate from Egg to LLVM struct Egg2LLVMState<'a> { llvm2egg_metadata: LLVM2EggState, @@ -819,9 +688,9 @@ struct Egg2LLVMState<'a> { module: LLVMModuleRef, } -unsafe fn arg_to_llvm(egg_node: &VecLang, translation_metadata: Egg2LLVMState) -> LLVMValueRef { +unsafe fn arg_to_llvm(egg_node: &VecLang, translation_metadata: &Egg2LLVMState) -> LLVMValueRef { // TODO: Make More Efficient with BTREEMAP? - let llvm2arg = translation_metadata.llvm2egg_metadata.llvm2arg; + let llvm2arg = &translation_metadata.llvm2egg_metadata.llvm2arg; for (llvm_instr, arg_node) in llvm2arg.iter() { // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. if arg_node == egg_node { @@ -834,9 +703,9 @@ unsafe fn arg_to_llvm(egg_node: &VecLang, translation_metadata: Egg2LLVMState) - ); } -unsafe fn reg_to_llvm(egg_node: &VecLang, translation_metadata: Egg2LLVMState) -> LLVMValueRef { +unsafe fn reg_to_llvm(egg_node: &VecLang, translation_metadata: &Egg2LLVMState) -> LLVMValueRef { // TODO: Make More Efficient with BTREEMAP? - let llvm2reg = translation_metadata.llvm2egg_metadata.llvm2reg; + let llvm2reg = &translation_metadata.llvm2egg_metadata.llvm2reg; for (llvm_instr, reg_node) in llvm2reg.iter() { // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. if reg_node == egg_node { @@ -849,11 +718,11 @@ unsafe fn reg_to_llvm(egg_node: &VecLang, translation_metadata: Egg2LLVMState) - ); } -unsafe fn num_to_llvm(n: &i32, md: Egg2LLVMState) -> LLVMValueRef { +unsafe fn num_to_llvm(n: &i32, md: &Egg2LLVMState) -> LLVMValueRef { LLVMConstReal(LLVMFloatTypeInContext(md.context), *n as f64) } -unsafe fn vec_to_llvm(boxed_ids: Box<[Id]>, md: Egg2LLVMState) -> LLVMValueRef { +unsafe fn vec_to_llvm(boxed_ids: &Box<[Id]>, md: &Egg2LLVMState) -> LLVMValueRef { // Convert the Boxed Ids to a Vector, and generate a vector of zeros let idvec = boxed_ids.to_vec(); let idvec_len = idvec.len(); @@ -897,7 +766,7 @@ unsafe fn binop_to_llvm( binop_node: &VecLang, left_id: &Id, right_id: &Id, - md: Egg2LLVMState, + md: &Egg2LLVMState, ) -> LLVMValueRef { let left = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_id)], md); let right = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_id)], md); @@ -977,7 +846,7 @@ unsafe fn binop_to_llvm( } } -unsafe fn concat_to_llvm(left_vector: &Id, right_vector: &Id, md: Egg2LLVMState) -> LLVMValueRef { +unsafe fn concat_to_llvm(left_vector: &Id, right_vector: &Id, md: &Egg2LLVMState) -> LLVMValueRef { { let trans_v1 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_vector)], md); let mut trans_v2 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_vector)], md); @@ -1048,7 +917,7 @@ unsafe fn mac_to_llvm( accumulator_vector: &Id, left_prod_vector: &Id, right_prod_vector: &Id, - md: Egg2LLVMState, + md: &Egg2LLVMState, ) -> LLVMValueRef { let trans_acc = egg_to_llvm(&md.egg_nodes_vector[usize::from(*accumulator_vector)], md); let trans_v1 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_prod_vector)], md); @@ -1061,7 +930,7 @@ unsafe fn mac_to_llvm( LLVMBuildCall(md.builder, func, args, 3, b"\0".as_ptr() as *const _) } -unsafe fn scalar_unop_to_llvm(n: &Id, unop_node: &VecLang, md: Egg2LLVMState) -> LLVMValueRef { +unsafe fn scalar_unop_to_llvm(n: &Id, unop_node: &VecLang, md: &Egg2LLVMState) -> LLVMValueRef { let mut number = egg_to_llvm(&md.egg_nodes_vector[usize::from(*n)], md); if isa_integertype(number) { number = LLVMBuildBitCast( @@ -1081,12 +950,12 @@ unsafe fn scalar_unop_to_llvm(n: &Id, unop_node: &VecLang, md: Egg2LLVMState) -> ) } -unsafe fn vecneg_to_llvm(vec: &Id, md: Egg2LLVMState) -> LLVMValueRef { +unsafe fn vecneg_to_llvm(vec: &Id, md: &Egg2LLVMState) -> LLVMValueRef { let neg_vector = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); LLVMBuildFNeg(md.builder, neg_vector, b"\0".as_ptr() as *const _) } -unsafe fn vecsqrt_to_llvm(vec: &Id, md: Egg2LLVMState) -> LLVMValueRef { +unsafe fn vecsqrt_to_llvm(vec: &Id, md: &Egg2LLVMState) -> LLVMValueRef { let sqrt_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); let vec_type = LLVMTypeOf(sqrt_vec); let param_types = [vec_type].as_mut_ptr(); @@ -1096,7 +965,7 @@ unsafe fn vecsqrt_to_llvm(vec: &Id, md: Egg2LLVMState) -> LLVMValueRef { LLVMBuildCall(md.builder, func, args, 1, b"\0".as_ptr() as *const _) } -unsafe fn vecsgn_to_llvm(vec: &Id, md: Egg2LLVMState) -> LLVMValueRef { +unsafe fn vecsgn_to_llvm(vec: &Id, md: &Egg2LLVMState) -> LLVMValueRef { let sgn_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); let vec_type = LLVMTypeOf(sgn_vec); let vec_size = LLVMGetVectorSize(vec_type); @@ -1120,7 +989,7 @@ unsafe fn vecsgn_to_llvm(vec: &Id, md: Egg2LLVMState) -> LLVMValueRef { /// Egg To LLVM Dispatches translation of VecLanf Egg Nodes to LLVMValueRegs /// /// Side Effect: Builds and Insert LLVM instructions -unsafe fn egg_to_llvm(egg_node: &VecLang, translation_metadata: Egg2LLVMState) -> LLVMValueRef { +unsafe fn egg_to_llvm(egg_node: &VecLang, translation_metadata: &Egg2LLVMState) -> LLVMValueRef { match egg_node { VecLang::Symbol(..) => { panic!("Symbol was found. Egg to LLVM Translation does not handle symbol nodes.") @@ -1129,16 +998,16 @@ unsafe fn egg_to_llvm(egg_node: &VecLang, translation_metadata: Egg2LLVMState) - panic!("Get was found. Egg to LLVM Translation does not handle get nodes.") } VecLang::Ite(..) => panic!("Ite was found. Egg to LLVM Translation does not handle ite nodes."), - VecLang::Or([l, r]) => panic!("Or was found. Egg to LLVM Translation does not handle or nodes."), - VecLang::And([l, r]) => panic!("And was found. Egg to LLVM Translation does not handle and nodes."), - VecLang::Lt([l, r]) => panic!("Lt was found. Egg to LLVM Translation does not handle lt nodes."), + VecLang::Or(..) => panic!("Or was found. Egg to LLVM Translation does not handle or nodes."), + VecLang::And(..) => panic!("And was found. Egg to LLVM Translation does not handle and nodes."), + VecLang::Lt(..) => panic!("Lt was found. Egg to LLVM Translation does not handle lt nodes."), VecLang::Sgn(..) => panic!("Sgn was found. Egg to LLVM Translation does not handle sgn nodes. TODO: In the future, tis node will be handled alongside sqrt and neg scalar nodes."), VecLang::VecSgn(..) => panic!("VecSgn was found. Egg to LLVM Translation does not handle vecsgn nodes. TODO: In the future, this node will be handled alongside VecSqrt and VecNeg vector nodes."), VecLang::Arg(_) => arg_to_llvm(egg_node, translation_metadata), VecLang::Reg(_) => reg_to_llvm(egg_node, translation_metadata), VecLang::Num(n) => num_to_llvm(n, translation_metadata), VecLang::LitVec(boxed_ids) | VecLang::Vec(boxed_ids) | VecLang::List(boxed_ids) => { - vec_to_llvm(*boxed_ids, translation_metadata) + vec_to_llvm(&*boxed_ids, translation_metadata) } VecLang::VecAdd([l, r]) | VecLang::VecMinus([l, r]) @@ -1163,103 +1032,43 @@ unsafe fn egg_to_llvm(egg_node: &VecLang, translation_metadata: Egg2LLVMState) - } } -// ---- Construction Zone ------- - -unsafe fn gen_type_cast( - val: LLVMValueRef, - typ1: LLVMTypeRef, - typ2: LLVMTypeRef, - context: LLVMContextRef, - builder: LLVMBuilderRef, -) -> LLVMValueRef { - if typ1 == LLVMInt32TypeInContext(context) && typ2 == LLVMInt64TypeInContext(context) { - return LLVMBuildZExt(builder, val, typ2, b"\0".as_ptr() as *const _); - } else if typ1 == LLVMInt16TypeInContext(context) && typ2 == LLVMInt64TypeInContext(context) { - return LLVMBuildZExt(builder, val, typ2, b"\0".as_ptr() as *const _); - } else if typ1 == LLVMInt16TypeInContext(context) && typ2 == LLVMInt32TypeInContext(context) { - return LLVMBuildZExt(builder, val, typ2, b"\0".as_ptr() as *const _); - } - LLVMDumpType(typ1); - println!(); - LLVMDumpType(typ2); - println!(); - panic!("Cannot convert between {:?} {:?}\n.", typ1, typ2); -} - unsafe fn egg_to_llvm_main( expr: RecExpr, - gep_map: &GEPMap, - store_map: &StoreMap, - symbol_map: &SymbolMap, - llvm_arg_pairs: &mut LLVMPairMap, - node_to_arg_pair: &Vec, + llvm2egg_metadata: &LLVM2EggState, module: LLVMModuleRef, context: LLVMContextRef, builder: LLVMBuilderRef, ) -> () { - // in fact this will look rather similar to translation from egg to llvm - // the major differece is how we reconstruct loads and stores - // whenever we encounter a get instruction, we retranslate as a gep and then a load - // whenever we encounter an operand that is within the store map, we immediately build a store too. - // This should maintain the translation - - // Note: You must include all instructions in the basic block, up to the final store - // The builder mount location must be immediately at the beginning of the basic block to start writing instrucitons - - // Walk the RecExpr and translate it in place to LLVM - let enode_vec = expr.as_ref(); - let last_enode = enode_vec + // Walk the RecExpr of Egg Nodes and translate it in place to LLVM + let egg_nodes = expr.as_ref(); + let last_egg_node = egg_nodes .last() .expect("No match for last element of vector of Egg Terms."); - let vector = translate_egg( - last_enode, - enode_vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - - // Add in the Stores - for (i, (_, addr)) in store_map.iter().enumerate() { + let translation_metadata = Egg2LLVMState { + egg_nodes_vector: egg_nodes, + llvm2egg_metadata: llvm2egg_metadata.clone(), + builder: builder, + context: context, + module: module, + }; + let llvm_vector = egg_to_llvm(last_egg_node, &translation_metadata); + + // HERE, we stitch our work back into the current LLVM code + + // NOTE: We Assume Vectorizer will maintain relative positions of elements in vector + // Extract the elements of the vector, to be assigned back to where they are to be used. + let num_extractions = llvm2egg_metadata.start_instructions.len(); + for i in 0..num_extractions { + let old_instr = llvm2egg_metadata + .start_instructions + .get(i) + .expect("Index should be in vector."); + // Build the extracted value let index = LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0); - let mut extracted_value = - LLVMBuildExtractElement(builder, vector, index, b"\0".as_ptr() as *const _); - // check if the extracted type is an float and the address is a int ptr - let mut_addr = if !isa_floatptr(*addr) { - let addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(*addr)); - let new_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(context), addr_space); - LLVMBuildBitCast(builder, *addr, new_ptr_type, b"\0".as_ptr() as *const _) - } else { - *addr - }; - if isa_argument(mut_addr) { - if LLVMTypeOf(extracted_value) != LLVMGetElementType(LLVMTypeOf(mut_addr)) { - extracted_value = gen_type_cast( - extracted_value, - LLVMTypeOf(extracted_value), - LLVMGetElementType(LLVMTypeOf(mut_addr)), - context, - builder, - ); - } - LLVMBuildStore(builder, extracted_value, mut_addr); - } else { - let new_addr = llvm_recursive_add(builder, mut_addr, context, llvm_arg_pairs); - if LLVMTypeOf(extracted_value) != LLVMGetElementType(LLVMTypeOf(mut_addr)) { - extracted_value = gen_type_cast( - extracted_value, - LLVMTypeOf(extracted_value), - LLVMGetElementType(LLVMTypeOf(mut_addr)), - context, - builder, - ); - } - LLVMBuildStore(builder, extracted_value, new_addr); - } + let extracted_value = + LLVMBuildExtractElement(builder, llvm_vector, index, b"\0".as_ptr() as *const _); + // Replace all the uses of the old instruction with the new extracted value + // Old instruction cannot have been removed. + LLVMReplaceAllUsesWith(*old_instr, extracted_value); } } From 841318be14060a9b329180ade58343e43759b04a Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sun, 1 May 2022 03:44:09 -0400 Subject: [PATCH 078/143] fix up diospyros.cpp Still crash for simple case Trying to figure out what causes crash --- src/dios-egraphs/Diospyros/diospyros.cpp | 513 +++-------------------- src/dios-egraphs/Diospyros/src/lib.rs | 91 ++-- 2 files changed, 86 insertions(+), 518 deletions(-) diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index 694220fd..4eb22cbf 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -40,27 +40,13 @@ llvm::cl::opt PrintOpt("p", llvm::cl::desc("Print Egg Optimization.")); llvm::cl::alias PrintOptAlias("print", llvm::cl::desc("Alias for -p"), llvm::cl::aliasopt(PrintOpt)); -typedef struct IntLLVMPair { - uint32_t node_int; - LLVMValueRef arg; -} IntLLVMPair; - -typedef struct LLVMPair { - LLVMValueRef original_value; - LLVMValueRef new_value; -} LLVMPair; - -typedef struct VectorPointerSize { - LLVMPair const *llvm_pointer; - std::size_t llvm_pointer_size; -} VectorPointerSize; - -extern "C" VectorPointerSize optimize(LLVMModuleRef mod, LLVMContextRef context, - LLVMBuilderRef builder, - LLVMValueRef const *bb, std::size_t size, - LLVMPair const *past_instrs, - std::size_t past_size, bool run_egg, - bool print_opt); +extern "C" void optimize(LLVMModuleRef mod, LLVMContextRef context, + LLVMBuilderRef builder, + LLVMValueRef const *chunk_instrs, + std::size_t chunk_size, + LLVMValueRef const *restricted_instrs, + std::size_t restricted_size, bool run_egg, + bool print_opt); const string ARRAY_NAME = "no-array-name"; const string TEMP_NAME = "no-temp-name"; @@ -453,72 +439,6 @@ extern "C" LLVMValueRef build_constant_float(double n, LLVMContextRef context) { return wrap(ConstantFP::get(float_type, n)); } -Instruction *dfs_instructions(Instruction *current_instr, - std::vector &translated_exprs, - BasicBlock *B) { - Instruction *cloned_instr = current_instr->clone(); - if (isa(current_instr)) { - return current_instr; - } else if (isa(current_instr)) { - return current_instr; - } else if (isa(current_instr)) { - for (LLVMPair pair : translated_exprs) { - Instruction *original_val = - dyn_cast(unwrap(pair.original_value)); - Instruction *new_val = - dyn_cast(unwrap(pair.new_value)); - if (current_instr == original_val) { - return new_val; - } - } - LLVMPair new_pair; - // assert(isa(current_instr) || - // isa(current_instr)); assert(isa(cloned_instr) - // || isa(cloned_instr)); - new_pair.original_value = wrap(current_instr); - new_pair.new_value = wrap(cloned_instr); - translated_exprs.push_back(new_pair); - - BasicBlock::InstListType &intermediate_instrs = B->getInstList(); - intermediate_instrs.push_back(cloned_instr); - return cloned_instr; - } - - int num_operands = current_instr->getNumOperands(); - for (int i = 0; i < num_operands; i++) { - Instruction *arg = dyn_cast(current_instr->getOperand(i)); - if (arg != NULL) { - Instruction *cloned_arg = - dfs_instructions(arg, translated_exprs, B); - cloned_instr->setOperand(i, cloned_arg); - } - } - - // if (isa(current_instr)) { - bool in_map = false; - for (LLVMPair pair : translated_exprs) { - Instruction *original_val = - dyn_cast(unwrap(pair.original_value)); - if (current_instr == original_val) { - in_map = true; - } - } - if (!in_map) { - LLVMPair new_pair; - // assert(isa(current_instr) || - // isa(current_instr)); - // assert(isa(cloned_instr) || - // isa(cloned_instr)); - new_pair.original_value = wrap(current_instr); - new_pair.new_value = wrap(cloned_instr); - translated_exprs.push_back(new_pair); - } - // } - BasicBlock::InstListType &intermediate_instrs = B->getInstList(); - intermediate_instrs.push_back(cloned_instr); - return cloned_instr; -} - bool is_memset_variety(CallInst *inst) { Function *function = inst->getCalledFunction(); if (function != NULL) { @@ -562,41 +482,6 @@ bool call_is_not_sqrt(CallInst *inst) { // will be done } -std::vector dfs_in_basic_block( - Instruction *instr, std::vector basic_block_instrs, - std::vector visited_instrs) { - if (isa(instr)) { - assert(false); - } - errs() << "Incoming instr\n"; - errs() << *instr << "\n"; - assert(std::find(basic_block_instrs.begin(), basic_block_instrs.end(), - instr) != basic_block_instrs.end()); - assert(std::find(visited_instrs.begin(), visited_instrs.end(), instr) == - visited_instrs.end()); - visited_instrs.push_back(instr); - std::vector output = {}; - int num_operands = instr->getNumOperands(); - for (int i = 0; i < num_operands; i++) { - Instruction *arg = dyn_cast(instr->getOperand(i)); - if (arg != NULL && !isa(arg) && - std::find(visited_instrs.begin(), visited_instrs.end(), arg) == - visited_instrs.end() && - std::find(basic_block_instrs.begin(), basic_block_instrs.end(), - arg) != basic_block_instrs.end() && - arg->getNumOperands() > 0) { - errs() << "Incoming arg\n"; - errs() << *arg << "\n"; - std::vector new_instrs = - dfs_in_basic_block(arg, basic_block_instrs, visited_instrs); - for (Instruction *new_instr : new_instrs) { - output.push_back(new_instr); - } - } - } - return output; -} - /** * Below is the main DiospyrosPass that activates the Rust lib.rs code, * which calls the Egg vectorizer and rewrites the optimized code in place. @@ -615,141 +500,31 @@ struct DiospyrosPass : public FunctionPass { return false; } bool has_changes = false; - std::vector translated_exprs = {}; for (auto &B : F) { - // Emergency conditions - // Bail if instruction is not in list of handleable instructions - // TODO: need to identify not handleable instructions - bool has_excluded_instr = false; - for (auto &I : B) { - if (isa(I)) { - has_excluded_instr = true; - } - } - if (has_excluded_instr) { - continue; - } - - // Bail if instruction is used outside the current basic block - bool instr_used_twice = false; - for (auto &I : B) { - Instruction *original_instr = dyn_cast(&I); - for (auto &otherB : F) { - if (otherB.getName() != B.getName()) { - for (auto &otherI : otherB) { - Instruction *other_instr = - dyn_cast(&otherI); - int num_operands = other_instr->getNumOperands(); - for (int i = 0; i < num_operands; i++) { - Instruction *use = dyn_cast( - other_instr->getOperand(i)); - if (use != NULL) { - if (use == original_instr) { - instr_used_twice = true; - } - } - } - } - } - } - } - if (instr_used_twice) { - continue; - } + // ------------ Construction Zone --------------- + // TODO: Consider removing as the new procedure can overcome this // We skip over basic blocks without floating point types bool has_float = false; for (auto &I : B) { - errs() << "All instructions\n"; - errs() << I << "\n"; if (I.getType()->isFloatTy()) { has_float = true; } } if (!has_float) { - for (auto &I : B) { - auto *op = wrap(dyn_cast(&I)); - LLVMPair new_pair; - new_pair.original_value = op; - new_pair.new_value = op; - translated_exprs.push_back(new_pair); - } - continue; - } - // We also skip over all basic blocks without stores or related - // memory operations - bool has_store_or_mem_intrinsic = false; - for (auto &I : B) { - if (auto *op = dyn_cast(&I)) { - has_store_or_mem_intrinsic = true; - } else if (auto *op = dyn_cast(&I)) { - has_store_or_mem_intrinsic = true; - } else if (auto *op = dyn_cast(&I)) { - has_store_or_mem_intrinsic = true; - } else if (auto *op = dyn_cast(&I)) { - has_store_or_mem_intrinsic = true; - } else if (CallInst *op = dyn_cast(&I)) { - if (is_memset_variety(op)) { - has_store_or_mem_intrinsic = true; - } else if (is_memcopy_variety(op)) { - has_store_or_mem_intrinsic = true; - } else if (is_memmove_variety(op)) { - has_store_or_mem_intrinsic = true; - } - } - } - if (!has_store_or_mem_intrinsic) { - for (auto &I : B) { - auto *op = wrap(dyn_cast(&I)); - LLVMPair new_pair; - new_pair.original_value = op; - new_pair.new_value = op; - translated_exprs.push_back(new_pair); - } continue; } - // We also skip over all basic blocks with Select as that is not - // translatable into Egg - bool has_select = false; - for (auto &I : B) { - if (auto *op = dyn_cast(&I)) { - has_select = true; - } - } - if (has_select) { - for (auto &I : B) { - auto *op = wrap(dyn_cast(&I)); - LLVMPair new_pair; - new_pair.original_value = op; - new_pair.new_value = op; - translated_exprs.push_back(new_pair); - } - continue; - } - - // We grab all the block args: the phi nodes of the block - std::vector phi_instrs = {}; - for (auto &I : B) { - if (auto *op = dyn_cast(&I)) { - Instruction *phi = dyn_cast(&I); - assert(phi != NULL); - phi_instrs.push_back(phi); - } - } - - // Grab the terminator from the LLVM Basic Block - Instruction *terminator = B.getTerminator(); - Instruction *cloned_terminator = terminator->clone(); + // ------------ Construction Zone --------------- + // Assumes Alias Analysis Movement Pass has been done previously + // Pulls out Instructions into sections of code called "Chunks" + // std::vector> vectorization_accumulator; std::vector inner_vector = {}; - std::set store_locations; - std::vector bb_instrs = {}; for (auto &I : B) { if (auto *op = dyn_cast(&I)) { Value *store_loc = op->getOperand(1); - store_locations.insert(store_loc); inner_vector.push_back(wrap(op)); } else if (auto *op = dyn_cast(&I)) { if (!inner_vector.empty()) { @@ -758,7 +533,6 @@ struct DiospyrosPass : public FunctionPass { inner_vector = {wrap(op)}; vectorization_accumulator.push_back(inner_vector); inner_vector = {}; - store_locations.clear(); } else if (auto *op = dyn_cast(&I)) { if (!inner_vector.empty()) { vectorization_accumulator.push_back(inner_vector); @@ -766,7 +540,6 @@ struct DiospyrosPass : public FunctionPass { inner_vector = {wrap(op)}; vectorization_accumulator.push_back(inner_vector); inner_vector = {}; - store_locations.clear(); } else if (auto *op = dyn_cast(&I)) { if (!inner_vector.empty()) { vectorization_accumulator.push_back(inner_vector); @@ -774,7 +547,6 @@ struct DiospyrosPass : public FunctionPass { inner_vector = {wrap(op)}; vectorization_accumulator.push_back(inner_vector); inner_vector = {}; - store_locations.clear(); } else if (CallInst *call_inst = dyn_cast(&I)) { if (is_memset_variety(call_inst)) { if (!inner_vector.empty()) { @@ -784,7 +556,6 @@ struct DiospyrosPass : public FunctionPass { inner_vector = {wrap(memset)}; vectorization_accumulator.push_back(inner_vector); inner_vector = {}; - store_locations.clear(); } else if (is_memcopy_variety(call_inst)) { if (!inner_vector.empty()) { vectorization_accumulator.push_back(inner_vector); @@ -793,7 +564,6 @@ struct DiospyrosPass : public FunctionPass { inner_vector = {wrap(memcopy)}; vectorization_accumulator.push_back(inner_vector); inner_vector = {}; - store_locations.clear(); } else if (is_memmove_variety(call_inst)) { if (!inner_vector.empty()) { vectorization_accumulator.push_back(inner_vector); @@ -802,246 +572,69 @@ struct DiospyrosPass : public FunctionPass { inner_vector = {wrap(memmove)}; vectorization_accumulator.push_back(inner_vector); inner_vector = {}; - store_locations.clear(); } - // else if (call_is_not_sqrt(call_inst)) { - // // All Calls that are not to sqrt functions - // // are not optimized. - // errs() << "There was a call!\n"; - // errs() << *call_inst << "\n"; - // if (!inner_vector.empty()) { - // vectorization_accumulator.push_back(inner_vector); - // } - // Instruction *call = dyn_cast(call_inst); - // inner_vector = {wrap(call)}; - // vectorization_accumulator.push_back(inner_vector); - // inner_vector = {}; - // store_locations.clear(); - // } } else if (auto *op = dyn_cast(&I)) { Value *load_loc = op->getOperand(0); if (!inner_vector.empty()) { vectorization_accumulator.push_back(inner_vector); } inner_vector = {}; - store_locations.clear(); } - bb_instrs.push_back(dyn_cast(&I)); } if (!inner_vector.empty()) { vectorization_accumulator.push_back(inner_vector); } - // acquire all instructions in a basic block - std::vector basic_block_instrs = {}; - for (auto &I : B) { - Instruction *instr = dyn_cast(&I); - assert(instr != NULL); - basic_block_instrs.push_back(instr); - } - - // Acquire each of the instructions in the "run" that terminates at - // a store We will send these instructions to optimize. - - // maintain list of all instructions processed thus far in basic - // block via DFS - std::vector dfs_bb_instrs = {}; - for (auto &vec : vectorization_accumulator) { - if (not vec.empty()) { - // check that a instruction is not used multiple times - // within a chunk - bool instr_used_twice_in_chunk = false; - for (auto *instr : vec) { - Instruction *first_instr = - dyn_cast(unwrap(instr)); - for (auto *other_instr : vec) { - Instruction *second_instr = - dyn_cast(unwrap(instr)); - if (first_instr != second_instr) { - int num_operands = - second_instr->getNumOperands(); - for (int i = 0; i < num_operands; i++) { - Instruction *use = dyn_cast( - second_instr->getOperand(i)); - if (use != NULL) { - if (use == first_instr) { - instr_used_twice_in_chunk = true; - } - } - } - } - } - } - if (instr_used_twice_in_chunk) { - continue; - } + for (int i = 0; i < vectorization_accumulator.size(); ++i) { + auto &chunk_vector = vectorization_accumulator[i]; + if (chunk_vector.empty()) { + continue; + } - // check that an instruction is not used - // outside the chunk - bool instr_used_twice_outside_chunk = false; - for (auto &chunk : vectorization_accumulator) { - if (chunk != vec) { - for (auto *first_instr : vec) { - Instruction *first = - dyn_cast(unwrap(first_instr)); - for (auto *second_instr : chunk) { - Instruction *second = dyn_cast( - unwrap(second_instr)); - int num_operands = second->getNumOperands(); - for (int i = 0; i < num_operands; i++) { - Instruction *use = - dyn_cast( - second->getOperand(i)); - if (use != NULL) { - if (use == first) { - instr_used_twice_outside_chunk = - true; - } - } - } - } + // If an instruction is used multiple times outside the chunk, + // add it to a restricted list. + // TODO: only consider future chunks! + std::vector restricted_instrs = {}; + for (auto chunk_instr : chunk_vector) { + for (auto j = i + 1; j < vectorization_accumulator.size(); + ++j) { + // guaranteed to be a different chunk vector ahead of + // the origianl one. + bool must_restrict = false; + auto &other_chunk_vector = vectorization_accumulator[i]; + for (auto other_chunk_instr : other_chunk_vector) { + if (unwrap(chunk_instr) == + unwrap(other_chunk_instr)) { + restricted_instrs.push_back(chunk_instr); + must_restrict = true; + break; } } - } - if (instr_used_twice_outside_chunk) { - continue; - } - - has_changes = has_changes || true; - Value *last_store = unwrap(vec.back()); - IRBuilder<> builder(dyn_cast(last_store)); - Instruction *store_instr = - dyn_cast(last_store); - // gather all instructions - // for (Instruction *bbinst : basic_block_instrs) { - // if (bbinst == store_instr) { - // errs() << "Match!\n"; - // errs() << *store_instr << "\n"; - // } - // } - // dfs_in_basic_block(store_instr, basic_block_instrs, - // dfs_bb_instrs); - if (auto *op = dyn_cast(store_instr)) { - assert(isa(store_instr)); - builder.SetInsertPoint(store_instr); - builder.SetInsertPoint(&B); - Module *mod = F.getParent(); - LLVMContext &context = F.getContext(); - VectorPointerSize pair = optimize( - wrap(mod), wrap(&context), wrap(&builder), - vec.data(), vec.size(), translated_exprs.data(), - translated_exprs.size(), RunOpt, PrintOpt); - int size = pair.llvm_pointer_size; - - LLVMPair const *expr_array = pair.llvm_pointer; - for (int i = 0; i < size; i++) { - translated_exprs.push_back(expr_array[i]); + if (must_restrict) { + break; } - } else { - assert(isa(last_store) || - isa(last_store) || - isa(last_store) || - (isa(last_store) && - is_memset_variety( - dyn_cast(last_store))) || - (isa(last_store) && - is_memcopy_variety( - dyn_cast(last_store))) || - (isa(last_store) && - is_memmove_variety( - dyn_cast(last_store))) || - (isa(last_store))); - - dfs_instructions(store_instr, translated_exprs, &B); - } - - // Trim down translated_exprs - std::vector new_translated_exprs = {}; - for (int i = 0; i < translated_exprs.size(); i++) { - LLVMPair final_instr = translated_exprs.back(); - translated_exprs.pop_back(); - new_translated_exprs.push_back(final_instr); } - translated_exprs = new_translated_exprs; } - } - - // // grab unprocessed instructions - // std::vector missed_instrs = {}; - // for (auto &I : B) { - // Instruction *instr = dyn_cast(&I); - // assert(instr != NULL); - // if (std::find(dfs_bb_instrs.begin(), dfs_bb_instrs.end(), - // instr) != dfs_bb_instrs.end()) { - // errs() << "Missed instrs\n"; - // errs() << *instr << "\n"; - // missed_instrs.push_back(instr); - // } - // } - // // add in unprocessed phi instructions at front of basic block - // BasicBlock::InstListType &intermediate_instrs = B.getInstList(); - // for (Instruction *missed_instr : missed_instrs) { - // if (isa(missed_instr)) { - // intermediate_instrs.push_front(missed_instr); - // } - // } - - // // add in the "unprocessed" instructions that dfs on memory ops - // // missed - // for (Instruction *missed_instr : missed_instrs) { - // if (!isa(missed_instr)) { - // Instruction *cloned_instr = missed_instr->clone(); - // intermediate_instrs.push_back(cloned_instr); - // for (auto &U : missed_instr->uses()) { - // User *user = U.getUser(); // user of the add; could - // be - // // a store, for example - // user->setOperand(U.getOperandNo(), cloned_instr); - // } - // errs() << "Adding instruction\n"; - // errs() << *missed_instr << "\n"; - // } - // } - - // delete old instructions that are memory related; adce will handle - // rest - std::reverse(bb_instrs.begin(), bb_instrs.end()); - for (auto &I : bb_instrs) { - if (I->isTerminator()) { - I->eraseFromParent(); - } else if (isa(I)) { - I->eraseFromParent(); - } else if ((isa(I) && - is_memset_variety(dyn_cast(I))) || - (isa(I) && - is_memcopy_variety(dyn_cast(I))) || - (isa(I) && - is_memmove_variety(dyn_cast(I)))) { - I->eraseFromParent(); - } else if (isa(I)) { - I->eraseFromParent(); - } else if (isa(I)) { - I->eraseFromParent(); - } else if (isa(I)) { - I->eraseFromParent(); - } + has_changes = has_changes || true; + Value *last_instr_val = unwrap(chunk_vector.back()); + Instruction *last_instr = dyn_cast(last_instr_val); + assert(last_instr != NULL); + IRBuilder<> builder(last_instr); + builder.SetInsertPoint(&B); + + Module *mod = F.getParent(); + LLVMContext &context = F.getContext(); + optimize(wrap(mod), wrap(&context), wrap(&builder), + chunk_vector.data(), chunk_vector.size(), + restricted_instrs.data(), restricted_instrs.size(), + RunOpt, PrintOpt); } - // add back the terminator - BasicBlock::InstListType &final_instrs = B.getInstList(); - final_instrs.push_back(cloned_terminator); - - // Trim down translated_exprs - std::vector new_translated_exprs = {}; - for (int i = 0; i < translated_exprs.size(); i++) { - LLVMPair final_instr = translated_exprs.back(); - translated_exprs.pop_back(); - new_translated_exprs.push_back(final_instr); - } - translated_exprs = new_translated_exprs; + // TODO: delete old instructions that are memory related; adce + // will handle the remainder } - return true; + return has_changes; }; }; } // namespace diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index b56fdce9..23b7ab5e 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -6,7 +6,6 @@ use llvm::{core::*, prelude::*, LLVMOpcode::*, LLVMRealPredicate}; use std::{ cmp, collections::{BTreeMap, BTreeSet}, - mem, os::raw::c_char, slice::from_raw_parts, }; @@ -58,7 +57,7 @@ unsafe fn gen_reg_idx() -> u32 { // Reference Comparison: https://www.reddit.com/r/rust/comments/2r3wjk/is_there_way_to_compare_objects_by_address_in_rust/ // Compares whether addresses of LLVMValueRefs are the same. // Not the contents of the Value Refs -fn _cmp_val_ref_address(a1: &llvm::LLVMValue, a2: &llvm::LLVMValue) -> bool { +fn cmp_val_ref_address(a1: &llvm::LLVMValue, a2: &llvm::LLVMValue) -> bool { a1 as *const _ == a2 as *const _ } @@ -135,49 +134,32 @@ unsafe fn translate_unop( /// Main function to optimize: Takes in a basic block of instructions, /// optimizes it, and then translates it to LLVM IR code, in place. -#[repr(C)] -pub struct IntLLVMPair { - node_int: u32, - arg: LLVMValueRef, -} - -#[repr(C)] -pub struct LLVMPair { - original_value: LLVMValueRef, - new_value: LLVMValueRef, -} - -#[repr(C)] -pub struct VectorPointerSize { - llvm_pointer: *const LLVMPair, - llvm_pointer_size: size_t, -} - #[no_mangle] pub fn optimize( module: LLVMModuleRef, context: LLVMContextRef, builder: LLVMBuilderRef, - bb: *const LLVMValueRef, - size: size_t, - past_instrs: *const LLVMPair, - past_size: size_t, + chunk_instrs: *const LLVMValueRef, + chunk_size: size_t, + restricted_instrs: *const LLVMValueRef, + restricted_size: size_t, run_egg: bool, print_opt: bool, -) -> VectorPointerSize { +) -> () { unsafe { + // preprocessing of instructions + let chunk_llvm_instrs = from_raw_parts(chunk_instrs, chunk_size); + let restricted_llvm_instrs = from_raw_parts(restricted_instrs, restricted_size); + // llvm to egg - let llvm_instrs = from_raw_parts(bb, size); - let past_llvm_instrs = from_raw_parts(past_instrs, past_size); - let mut llvm_arg_pairs = BTreeMap::new(); - for instr_pair in past_llvm_instrs { - let original_value = instr_pair.original_value; - let new_value = instr_pair.new_value; - // assert!(isa_load(original_value) || isa_alloca(original_value)); - // assert!(isa_load(new_value) || isa_alloca(new_value)); - llvm_arg_pairs.insert(original_value, new_value); + let (egg_expr, llvm2egg_metadata) = + llvm_to_egg_main(chunk_llvm_instrs, restricted_llvm_instrs, true); + + // Bail if no egg Nodes to optimize + if egg_expr.as_ref().is_empty() { + eprintln!("No Egg Nodes in Optimization Vector"); + return; } - let (egg_expr, llvm2egg_metadata) = llvm_to_egg_main(llvm_instrs, &[], true); // optimization pass if print_opt { @@ -194,29 +176,6 @@ pub fn optimize( // egg to llvm egg_to_llvm_main(best_egg_expr, &llvm2egg_metadata, module, context, builder); - - let mut final_llvm_arg_pairs = Vec::new(); - for (unchanged_val, new_val) in llvm_arg_pairs.iter() { - let pair = LLVMPair { - original_value: *unchanged_val, - new_value: *new_val, - }; - // assert!(isa_load(*unchanged_val) || isa_alloca(*unchanged_val)); - // assert!(isa_load(*new_val) || isa_alloca(*new_val)); - final_llvm_arg_pairs.push(pair); - } - - // https://stackoverflow.com/questions/39224904/how-to-expose-a-rust-vect-to-ffi - let mut llvm_arg_pairs_boxed_slice: Box<[LLVMPair]> = final_llvm_arg_pairs.into_boxed_slice(); - let llvm_arg_pairs_array: *mut LLVMPair = llvm_arg_pairs_boxed_slice.as_mut_ptr(); - let llvm_arg_pairs_array_len: usize = llvm_arg_pairs_boxed_slice.len(); - mem::forget(llvm_arg_pairs_boxed_slice); - - // TODO: FIX THIS - return VectorPointerSize { - llvm_pointer: llvm_arg_pairs_array, - llvm_pointer_size: llvm_arg_pairs_array_len, - }; } } @@ -638,6 +597,22 @@ unsafe fn llvm_to_egg_main( restricted_instrs_set.insert(*llvm_instr); } + // Invariant: every restricted instruction is in the chunk, using a pointer check + for restr_instr in restricted_instrs.iter() { + let mut found_match = false; + for instr in instructions_in_chunk.iter() { + if cmp_val_ref_address(&**restr_instr, &**instr) { + found_match = true; + break; + } + } + if found_match { + continue; + } + } + // Invariant: chunk instructions are not empty in size + assert!(!instructions_in_chunk.is_empty()); + // State Variable To Hold Maps During Translation let mut translation_metadata = LLVM2EggState { llvm2reg: llvm_instr2reg_node, From 760769eea5040b719cc7060c39c1ec5e4f3464dd Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sun, 1 May 2022 03:45:29 -0400 Subject: [PATCH 079/143] change some var names to be chunk based --- src/dios-egraphs/Diospyros/diospyros.cpp | 85 ++++++++++++------------ 1 file changed, 42 insertions(+), 43 deletions(-) diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index 4eb22cbf..8b1ac4bf 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -520,73 +520,73 @@ struct DiospyrosPass : public FunctionPass { // Assumes Alias Analysis Movement Pass has been done previously // Pulls out Instructions into sections of code called "Chunks" // - std::vector> vectorization_accumulator; - std::vector inner_vector = {}; + std::vector> chunk_accumulator; + std::vector chunk_vector = {}; for (auto &I : B) { if (auto *op = dyn_cast(&I)) { Value *store_loc = op->getOperand(1); - inner_vector.push_back(wrap(op)); + chunk_vector.push_back(wrap(op)); } else if (auto *op = dyn_cast(&I)) { - if (!inner_vector.empty()) { - vectorization_accumulator.push_back(inner_vector); + if (!chunk_vector.empty()) { + chunk_accumulator.push_back(chunk_vector); } - inner_vector = {wrap(op)}; - vectorization_accumulator.push_back(inner_vector); - inner_vector = {}; + chunk_vector = {wrap(op)}; + chunk_accumulator.push_back(chunk_vector); + chunk_vector = {}; } else if (auto *op = dyn_cast(&I)) { - if (!inner_vector.empty()) { - vectorization_accumulator.push_back(inner_vector); + if (!chunk_vector.empty()) { + chunk_accumulator.push_back(chunk_vector); } - inner_vector = {wrap(op)}; - vectorization_accumulator.push_back(inner_vector); - inner_vector = {}; + chunk_vector = {wrap(op)}; + chunk_accumulator.push_back(chunk_vector); + chunk_vector = {}; } else if (auto *op = dyn_cast(&I)) { - if (!inner_vector.empty()) { - vectorization_accumulator.push_back(inner_vector); + if (!chunk_vector.empty()) { + chunk_accumulator.push_back(chunk_vector); } - inner_vector = {wrap(op)}; - vectorization_accumulator.push_back(inner_vector); - inner_vector = {}; + chunk_vector = {wrap(op)}; + chunk_accumulator.push_back(chunk_vector); + chunk_vector = {}; } else if (CallInst *call_inst = dyn_cast(&I)) { if (is_memset_variety(call_inst)) { - if (!inner_vector.empty()) { - vectorization_accumulator.push_back(inner_vector); + if (!chunk_vector.empty()) { + chunk_accumulator.push_back(chunk_vector); } Instruction *memset = dyn_cast(call_inst); - inner_vector = {wrap(memset)}; - vectorization_accumulator.push_back(inner_vector); - inner_vector = {}; + chunk_vector = {wrap(memset)}; + chunk_accumulator.push_back(chunk_vector); + chunk_vector = {}; } else if (is_memcopy_variety(call_inst)) { - if (!inner_vector.empty()) { - vectorization_accumulator.push_back(inner_vector); + if (!chunk_vector.empty()) { + chunk_accumulator.push_back(chunk_vector); } Instruction *memcopy = dyn_cast(call_inst); - inner_vector = {wrap(memcopy)}; - vectorization_accumulator.push_back(inner_vector); - inner_vector = {}; + chunk_vector = {wrap(memcopy)}; + chunk_accumulator.push_back(chunk_vector); + chunk_vector = {}; } else if (is_memmove_variety(call_inst)) { - if (!inner_vector.empty()) { - vectorization_accumulator.push_back(inner_vector); + if (!chunk_vector.empty()) { + chunk_accumulator.push_back(chunk_vector); } Instruction *memmove = dyn_cast(call_inst); - inner_vector = {wrap(memmove)}; - vectorization_accumulator.push_back(inner_vector); - inner_vector = {}; + chunk_vector = {wrap(memmove)}; + chunk_accumulator.push_back(chunk_vector); + chunk_vector = {}; } } else if (auto *op = dyn_cast(&I)) { Value *load_loc = op->getOperand(0); - if (!inner_vector.empty()) { - vectorization_accumulator.push_back(inner_vector); + if (!chunk_vector.empty()) { + chunk_accumulator.push_back(chunk_vector); } - inner_vector = {}; + chunk_vector = {}; } } - if (!inner_vector.empty()) { - vectorization_accumulator.push_back(inner_vector); + if (!chunk_vector.empty()) { + chunk_accumulator.push_back(chunk_vector); } - for (int i = 0; i < vectorization_accumulator.size(); ++i) { - auto &chunk_vector = vectorization_accumulator[i]; + for (int i = 0; i < chunk_accumulator.size(); ++i) { + auto &chunk_vector = chunk_accumulator[i]; if (chunk_vector.empty()) { continue; } @@ -596,12 +596,11 @@ struct DiospyrosPass : public FunctionPass { // TODO: only consider future chunks! std::vector restricted_instrs = {}; for (auto chunk_instr : chunk_vector) { - for (auto j = i + 1; j < vectorization_accumulator.size(); - ++j) { + for (auto j = i + 1; j < chunk_accumulator.size(); ++j) { // guaranteed to be a different chunk vector ahead of // the origianl one. bool must_restrict = false; - auto &other_chunk_vector = vectorization_accumulator[i]; + auto &other_chunk_vector = chunk_accumulator[i]; for (auto other_chunk_instr : other_chunk_vector) { if (unwrap(chunk_instr) == unwrap(other_chunk_instr)) { From 8c0fda6ca18ffbf68061c39a81e38f7b6e15718a Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sun, 1 May 2022 03:52:26 -0400 Subject: [PATCH 080/143] remove a lot of dead programs --- src/dios-egraphs/Diospyros/aa.ll.orig | 516 --- src/dios-egraphs/Diospyros/dce.ll | 3403 ----------------- src/dios-egraphs/Diospyros/err.txt | 312 -- src/dios-egraphs/Diospyros/err1.txt | 312 -- src/dios-egraphs/Diospyros/err2.txt | 3 - src/dios-egraphs/Diospyros/inline-float.c | 78 - .../Diospyros/inline-float.c.orig | 78 - src/dios-egraphs/Diospyros/runt_FAIL.py | 22 - src/dios-egraphs/Diospyros/test_FAIL.sh | 16 - 9 files changed, 4740 deletions(-) delete mode 100644 src/dios-egraphs/Diospyros/aa.ll.orig delete mode 100644 src/dios-egraphs/Diospyros/dce.ll delete mode 100644 src/dios-egraphs/Diospyros/err.txt delete mode 100644 src/dios-egraphs/Diospyros/err1.txt delete mode 100644 src/dios-egraphs/Diospyros/err2.txt delete mode 100644 src/dios-egraphs/Diospyros/inline-float.c delete mode 100644 src/dios-egraphs/Diospyros/inline-float.c.orig delete mode 100644 src/dios-egraphs/Diospyros/runt_FAIL.py delete mode 100644 src/dios-egraphs/Diospyros/test_FAIL.sh diff --git a/src/dios-egraphs/Diospyros/aa.ll.orig b/src/dios-egraphs/Diospyros/aa.ll.orig deleted file mode 100644 index 608340dd..00000000 --- a/src/dios-egraphs/Diospyros/aa.ll.orig +++ /dev/null @@ -1,516 +0,0 @@ -; ModuleID = 'build/opt.ll' -source_filename = "c-tests/inline-float.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@__const.main.A = private unnamed_addr constant [5 x float] [float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00], align 16 -@__const.main.expectedA = private unnamed_addr constant [5 x float] [float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00], align 16 -@.str = private unnamed_addr constant [14 x i8] c"C Output: %f\0A\00", align 1 -@.str.1 = private unnamed_addr constant [23 x i8] c"Expected C Output: %f\0A\00", align 1 -@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 -@.str.2 = private unnamed_addr constant [23 x i8] c"c-tests/inline-float.c\00", align 1 -@.str.3 = private unnamed_addr constant [34 x i8] c"fabs(expectedC[i] - C[i]) < DELTA\00", align 1 -@.memset_pattern = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 -@.memset_pattern.1 = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_test_inline(float* %0, float* %1, i32 %2) #0 { - %4 = icmp sgt i32 %2, 0 - %smax6 = select i1 %4, i32 %2, i32 0 - %wide.trip.count7 = zext i32 %smax6 to i64 - br i1 %4, label %.lr.ph11, label %.preheader - -.lr.ph11: ; preds = %3 - %5 = add nsw i64 %wide.trip.count7, -1 - %xtraiter14 = and i64 %wide.trip.count7, 3 - %6 = icmp ult i64 %5, 3 - br i1 %6, label %..preheader_crit_edge.unr-lcssa, label %.lr.ph11.new - -.lr.ph11.new: ; preds = %.lr.ph11 - %unroll_iter17 = and i64 %wide.trip.count7, 2147483644 - br label %13 - -..preheader_crit_edge.unr-lcssa: ; preds = %13, %.lr.ph11 - %indvars.iv9.unr = phi i64 [ 0, %.lr.ph11 ], [ %indvars.iv.next5.3, %13 ] - %lcmp.mod16.not = icmp eq i64 %xtraiter14, 0 - br i1 %lcmp.mod16.not, label %.preheader, label %.epil.preheader13 - -.epil.preheader13: ; preds = %.epil.preheader13, %..preheader_crit_edge.unr-lcssa - %indvars.iv9.epil = phi i64 [ %indvars.iv.next5.epil, %.epil.preheader13 ], [ %indvars.iv9.unr, %..preheader_crit_edge.unr-lcssa ] - %epil.iter15 = phi i64 [ %epil.iter15.sub, %.epil.preheader13 ], [ %xtraiter14, %..preheader_crit_edge.unr-lcssa ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv9.epil - %8 = load float, float* %7, align 4 - %9 = fmul float %8, 2.000000e+00 - %10 = getelementptr inbounds float, float* %1, i64 %indvars.iv9.epil - store float %9, float* %10, align 4 - %indvars.iv.next5.epil = add nuw nsw i64 %indvars.iv9.epil, 1 - %epil.iter15.sub = add i64 %epil.iter15, -1 - %epil.iter15.cmp.not = icmp eq i64 %epil.iter15.sub, 0 - br i1 %epil.iter15.cmp.not, label %.preheader, label %.epil.preheader13, !llvm.loop !3 - -.preheader: ; preds = %.epil.preheader13, %..preheader_crit_edge.unr-lcssa, %3 - br i1 %4, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %.preheader - %11 = add nsw i64 %wide.trip.count7, -1 - %xtraiter = and i64 %wide.trip.count7, 7 - %12 = icmp ult i64 %11, 7 - br i1 %12, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count7, 2147483640 - br label %30 - -13: ; preds = %13, %.lr.ph11.new - %indvars.iv9 = phi i64 [ 0, %.lr.ph11.new ], [ %indvars.iv.next5.3, %13 ] - %niter18 = phi i64 [ %unroll_iter17, %.lr.ph11.new ], [ %niter18.nsub.3, %13 ] - %14 = getelementptr inbounds float, float* %0, i64 %indvars.iv9 - %15 = load float, float* %14, align 4 - %16 = fmul float %15, 2.000000e+00 - %17 = getelementptr inbounds float, float* %1, i64 %indvars.iv9 - store float %16, float* %17, align 4 - %indvars.iv.next5 = or i64 %indvars.iv9, 1 - %18 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next5 - %19 = load float, float* %18, align 4 - %20 = fmul float %19, 2.000000e+00 - %21 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next5 - store float %20, float* %21, align 4 - %indvars.iv.next5.1 = or i64 %indvars.iv9, 2 - %22 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next5.1 - %23 = load float, float* %22, align 4 - %24 = fmul float %23, 2.000000e+00 - %25 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next5.1 - store float %24, float* %25, align 4 - %indvars.iv.next5.2 = or i64 %indvars.iv9, 3 - %26 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next5.2 - %27 = load float, float* %26, align 4 - %28 = fmul float %27, 2.000000e+00 - %29 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next5.2 - store float %28, float* %29, align 4 - %indvars.iv.next5.3 = add nuw nsw i64 %indvars.iv9, 4 - %niter18.nsub.3 = add i64 %niter18, -4 - %niter18.ncmp.3.not = icmp eq i64 %niter18.nsub.3, 0 - br i1 %niter18.ncmp.3.not, label %..preheader_crit_edge.unr-lcssa, label %13 - -30: ; preds = %30, %.lr.ph.new - %.014 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %54, %30 ] - %indvars.iv3 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.7, %30 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.7, %30 ] - %31 = getelementptr inbounds float, float* %1, i64 %indvars.iv3 - %32 = load float, float* %31, align 4 - %33 = fadd float %.014, %32 - %indvars.iv.next = or i64 %indvars.iv3, 1 - %34 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next - %35 = load float, float* %34, align 4 - %36 = fadd float %33, %35 - %indvars.iv.next.1 = or i64 %indvars.iv3, 2 - %37 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.1 - %38 = load float, float* %37, align 4 - %39 = fadd float %36, %38 - %indvars.iv.next.2 = or i64 %indvars.iv3, 3 - %40 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.2 - %41 = load float, float* %40, align 4 - %42 = fadd float %39, %41 - %indvars.iv.next.3 = or i64 %indvars.iv3, 4 - %43 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.3 - %44 = load float, float* %43, align 4 - %45 = fadd float %42, %44 - %indvars.iv.next.4 = or i64 %indvars.iv3, 5 - %46 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.4 - %47 = load float, float* %46, align 4 - %48 = fadd float %45, %47 - %indvars.iv.next.5 = or i64 %indvars.iv3, 6 - %49 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.5 - %50 = load float, float* %49, align 4 - %51 = fadd float %48, %50 - %indvars.iv.next.6 = or i64 %indvars.iv3, 7 - %52 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.6 - %53 = load float, float* %52, align 4 - %54 = fadd float %51, %53 - %indvars.iv.next.7 = add nuw nsw i64 %indvars.iv3, 8 - %niter.nsub.7 = add i64 %niter, -8 - %niter.ncmp.7.not = icmp eq i64 %niter.nsub.7, 0 - br i1 %niter.ncmp.7.not, label %._crit_edge.unr-lcssa, label %30 - -._crit_edge.unr-lcssa: ; preds = %30, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %54, %30 ] - %.014.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %54, %30 ] - %indvars.iv3.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.7, %30 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.014.epil = phi float [ %57, %.epil.preheader ], [ %.014.unr, %._crit_edge.unr-lcssa ] - %indvars.iv3.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv3.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %55 = getelementptr inbounds float, float* %1, i64 %indvars.iv3.epil - %56 = load float, float* %55, align 4 - %57 = fadd float %.014.epil, %56 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv3.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %.preheader - %.01.lcssa = phi float [ 0.000000e+00, %.preheader ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %57, %.epil.preheader ] - ret float %.01.lcssa -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_test(float* %0, float* %1, float* %2) #0 { - %4 = call float @no_opt_test_inline(float* %0, float* %1, i32 5) - store float %4, float* %2, align 4 - %5 = getelementptr inbounds float, float* %2, i64 1 - store float %4, float* %5, align 4 - %6 = getelementptr inbounds float, float* %2, i64 2 - store float %4, float* %6, align 4 - %7 = getelementptr inbounds float, float* %2, i64 3 - store float %4, float* %7, align 4 - %8 = getelementptr inbounds float, float* %2, i64 4 - store float %4, float* %8, align 4 - ret void -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @test_inline(float* %0, float* %1, i32 %2) #1 { - %4 = icmp sgt i32 %2, 0 - %smax6 = select i1 %4, i32 %2, i32 0 - %wide.trip.count7 = zext i32 %smax6 to i64 - br i1 %4, label %.lr.ph11, label %.preheader - -.lr.ph11: ; preds = %3 - %5 = add nsw i64 %wide.trip.count7, -1 - %xtraiter14 = and i64 %wide.trip.count7, 3 - %6 = icmp ult i64 %5, 3 - br i1 %6, label %..preheader_crit_edge.unr-lcssa, label %.lr.ph11.new - -.lr.ph11.new: ; preds = %.lr.ph11 - %unroll_iter17 = and i64 %wide.trip.count7, 2147483644 - br label %13 - -..preheader_crit_edge.unr-lcssa: ; preds = %13, %.lr.ph11 - %indvars.iv9.unr = phi i64 [ 0, %.lr.ph11 ], [ %indvars.iv.next5.3, %13 ] - %lcmp.mod16.not = icmp eq i64 %xtraiter14, 0 - br i1 %lcmp.mod16.not, label %.preheader, label %.epil.preheader13 - -.epil.preheader13: ; preds = %.epil.preheader13, %..preheader_crit_edge.unr-lcssa - %indvars.iv9.epil = phi i64 [ %indvars.iv.next5.epil, %.epil.preheader13 ], [ %indvars.iv9.unr, %..preheader_crit_edge.unr-lcssa ] - %epil.iter15 = phi i64 [ %epil.iter15.sub, %.epil.preheader13 ], [ %xtraiter14, %..preheader_crit_edge.unr-lcssa ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv9.epil - %8 = load float, float* %7, align 4 - %9 = fmul float %8, 2.000000e+00 - %10 = getelementptr inbounds float, float* %1, i64 %indvars.iv9.epil - store float %9, float* %10, align 4 - %indvars.iv.next5.epil = add nuw nsw i64 %indvars.iv9.epil, 1 - %epil.iter15.sub = add i64 %epil.iter15, -1 - %epil.iter15.cmp.not = icmp eq i64 %epil.iter15.sub, 0 - br i1 %epil.iter15.cmp.not, label %.preheader, label %.epil.preheader13, !llvm.loop !6 - -.preheader: ; preds = %.epil.preheader13, %..preheader_crit_edge.unr-lcssa, %3 - br i1 %4, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %.preheader - %11 = add nsw i64 %wide.trip.count7, -1 - %xtraiter = and i64 %wide.trip.count7, 7 - %12 = icmp ult i64 %11, 7 - br i1 %12, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count7, 2147483640 - br label %30 - -13: ; preds = %13, %.lr.ph11.new - %indvars.iv9 = phi i64 [ 0, %.lr.ph11.new ], [ %indvars.iv.next5.3, %13 ] - %niter18 = phi i64 [ %unroll_iter17, %.lr.ph11.new ], [ %niter18.nsub.3, %13 ] - %14 = getelementptr inbounds float, float* %0, i64 %indvars.iv9 - %15 = load float, float* %14, align 4 - %16 = fmul float %15, 2.000000e+00 - %17 = getelementptr inbounds float, float* %1, i64 %indvars.iv9 - store float %16, float* %17, align 4 - %indvars.iv.next5 = or i64 %indvars.iv9, 1 - %18 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next5 - %19 = load float, float* %18, align 4 - %20 = fmul float %19, 2.000000e+00 - %21 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next5 - store float %20, float* %21, align 4 - %indvars.iv.next5.1 = or i64 %indvars.iv9, 2 - %22 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next5.1 - %23 = load float, float* %22, align 4 - %24 = fmul float %23, 2.000000e+00 - %25 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next5.1 - store float %24, float* %25, align 4 - %indvars.iv.next5.2 = or i64 %indvars.iv9, 3 - %26 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next5.2 - %27 = load float, float* %26, align 4 - %28 = fmul float %27, 2.000000e+00 - %29 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next5.2 - store float %28, float* %29, align 4 - %indvars.iv.next5.3 = add nuw nsw i64 %indvars.iv9, 4 - %niter18.nsub.3 = add i64 %niter18, -4 - %niter18.ncmp.3.not = icmp eq i64 %niter18.nsub.3, 0 - br i1 %niter18.ncmp.3.not, label %..preheader_crit_edge.unr-lcssa, label %13 - -30: ; preds = %30, %.lr.ph.new - %.014 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %54, %30 ] - %indvars.iv3 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.7, %30 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.7, %30 ] - %31 = getelementptr inbounds float, float* %1, i64 %indvars.iv3 - %32 = load float, float* %31, align 4 - %33 = fadd float %.014, %32 - %indvars.iv.next = or i64 %indvars.iv3, 1 - %34 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next - %35 = load float, float* %34, align 4 - %36 = fadd float %33, %35 - %indvars.iv.next.1 = or i64 %indvars.iv3, 2 - %37 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.1 - %38 = load float, float* %37, align 4 - %39 = fadd float %36, %38 - %indvars.iv.next.2 = or i64 %indvars.iv3, 3 - %40 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.2 - %41 = load float, float* %40, align 4 - %42 = fadd float %39, %41 - %indvars.iv.next.3 = or i64 %indvars.iv3, 4 - %43 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.3 - %44 = load float, float* %43, align 4 - %45 = fadd float %42, %44 - %indvars.iv.next.4 = or i64 %indvars.iv3, 5 - %46 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.4 - %47 = load float, float* %46, align 4 - %48 = fadd float %45, %47 - %indvars.iv.next.5 = or i64 %indvars.iv3, 6 - %49 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.5 - %50 = load float, float* %49, align 4 - %51 = fadd float %48, %50 - %indvars.iv.next.6 = or i64 %indvars.iv3, 7 - %52 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.6 - %53 = load float, float* %52, align 4 - %54 = fadd float %51, %53 - %indvars.iv.next.7 = add nuw nsw i64 %indvars.iv3, 8 - %niter.nsub.7 = add i64 %niter, -8 - %niter.ncmp.7.not = icmp eq i64 %niter.nsub.7, 0 - br i1 %niter.ncmp.7.not, label %._crit_edge.unr-lcssa, label %30 - -._crit_edge.unr-lcssa: ; preds = %30, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %54, %30 ] - %.014.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %54, %30 ] - %indvars.iv3.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.7, %30 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.014.epil = phi float [ %57, %.epil.preheader ], [ %.014.unr, %._crit_edge.unr-lcssa ] - %indvars.iv3.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv3.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %55 = getelementptr inbounds float, float* %1, i64 %indvars.iv3.epil - %56 = load float, float* %55, align 4 - %57 = fadd float %.014.epil, %56 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv3.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !7 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %.preheader - %.01.lcssa = phi float [ 0.000000e+00, %.preheader ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %57, %.epil.preheader ] - ret float %.01.lcssa -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @test(float* %0, float* %1, float* %2) #0 { -.preheader4: - %3 = load float, float* %0, align 4 - %4 = fmul float %3, 2.000000e+00 - store float %4, float* %1, align 4 - %5 = getelementptr inbounds float, float* %0, i64 1 - %6 = load float, float* %5, align 4 - %7 = fmul float %6, 2.000000e+00 - %8 = getelementptr inbounds float, float* %1, i64 1 - store float %7, float* %8, align 4 - %9 = getelementptr inbounds float, float* %0, i64 2 - %10 = load float, float* %9, align 4 - %11 = fmul float %10, 2.000000e+00 - %12 = getelementptr inbounds float, float* %1, i64 2 - store float %11, float* %12, align 4 - %13 = getelementptr inbounds float, float* %0, i64 3 - %14 = load float, float* %13, align 4 - %15 = fmul float %14, 2.000000e+00 - %16 = getelementptr inbounds float, float* %1, i64 3 - store float %15, float* %16, align 4 - %17 = getelementptr inbounds float, float* %0, i64 4 - %18 = load float, float* %17, align 4 - %19 = fmul float %18, 2.000000e+00 - %20 = getelementptr inbounds float, float* %1, i64 4 - store float %19, float* %20, align 4 - %21 = fadd float %4, 0.000000e+00 - %22 = fadd float %21, %7 - %23 = fadd float %22, %11 - %24 = fadd float %23, %15 - %25 = fadd float %24, %19 - store float %25, float* %2, align 4 - %26 = getelementptr inbounds float, float* %2, i64 1 - store float %25, float* %26, align 4 - %27 = getelementptr inbounds float, float* %2, i64 2 - store float %25, float* %27, align 4 - %28 = getelementptr inbounds float, float* %2, i64 3 - store float %25, float* %28, align 4 - %29 = getelementptr inbounds float, float* %2, i64 4 - store float %25, float* %29, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #0 { - %1 = alloca [5 x float], align 16 - %2 = bitcast [5 x float]* %1 to i8* - %3 = alloca [5 x float], align 16 - %4 = bitcast [5 x float]* %3 to i8* - %5 = alloca [5 x float], align 16 - %6 = bitcast [5 x float]* %5 to i8* - %7 = alloca [5 x float], align 16 - %8 = bitcast [5 x float]* %7 to i8* - %9 = alloca [5 x float], align 16 - %10 = bitcast [5 x float]* %9 to i8* - %11 = alloca [5 x float], align 16 - %12 = bitcast [5 x float]* %11 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(20) %2, i8* nonnull align 16 dereferenceable(20) bitcast ([5 x float]* @__const.main.A to i8*), i64 20, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(20) %4, i8* nonnull align 16 dereferenceable(20) bitcast ([5 x float]* @__const.main.expectedA to i8*), i64 20, i1 false) - call void @memset_pattern16(i8* nonnull %2, i8* bitcast ([4 x float]* @.memset_pattern to i8*), i64 20) #8 - call void @memset_pattern16(i8* nonnull %4, i8* bitcast ([4 x float]* @.memset_pattern.1 to i8*), i64 20) #8 - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(20) %6, i8 0, i64 20, i1 false) - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(20) %8, i8 0, i64 20, i1 false) - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(20) %10, i8 0, i64 20, i1 false) - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(20) %12, i8 0, i64 20, i1 false) - %13 = getelementptr inbounds [5 x float], [5 x float]* %1, i64 0, i64 0 - %14 = getelementptr inbounds [5 x float], [5 x float]* %5, i64 0, i64 0 - %15 = getelementptr inbounds [5 x float], [5 x float]* %9, i64 0, i64 0 - call void @test(float* nonnull %13, float* nonnull %14, float* nonnull %15) - %16 = getelementptr inbounds [5 x float], [5 x float]* %3, i64 0, i64 0 - %17 = getelementptr inbounds [5 x float], [5 x float]* %7, i64 0, i64 0 - %18 = getelementptr inbounds [5 x float], [5 x float]* %11, i64 0, i64 0 - call void @no_opt_test(float* nonnull %16, float* nonnull %17, float* nonnull %18) - %19 = load float, float* %15, align 16 - %20 = fpext float %19 to double - %21 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %20) #8 - %22 = load float, float* %18, align 16 - %23 = fpext float %22 to double - %24 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %23) #8 - %25 = load float, float* %18, align 16 - %26 = load float, float* %15, align 16 - %27 = fsub float %25, %26 - %28 = call float @llvm.fabs.f32(float %27) - %29 = fcmp uge float %28, 0x3FB99999A0000000 - br i1 %29, label %44, label %30 - -30: ; preds = %0 - %31 = getelementptr inbounds [5 x float], [5 x float]* %9, i64 0, i64 1 - %32 = load float, float* %31, align 4 - %33 = fpext float %32 to double - %34 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %33) #8 - %35 = getelementptr inbounds [5 x float], [5 x float]* %11, i64 0, i64 1 - %36 = load float, float* %35, align 4 - %37 = fpext float %36 to double - %38 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %37) #8 - %39 = load float, float* %35, align 4 - %40 = load float, float* %31, align 4 - %41 = fsub float %39, %40 - %42 = call float @llvm.fabs.f32(float %41) - %43 = fcmp uge float %42, 0x3FB99999A0000000 - br i1 %43, label %44, label %45 - -44: ; preds = %73, %59, %45, %30, %0 - call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), i32 75, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.3, i64 0, i64 0)) #9 - unreachable - -45: ; preds = %30 - %46 = getelementptr inbounds [5 x float], [5 x float]* %9, i64 0, i64 2 - %47 = load float, float* %46, align 8 - %48 = fpext float %47 to double - %49 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %48) #8 - %50 = getelementptr inbounds [5 x float], [5 x float]* %11, i64 0, i64 2 - %51 = load float, float* %50, align 8 - %52 = fpext float %51 to double - %53 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %52) #8 - %54 = load float, float* %50, align 8 - %55 = load float, float* %46, align 8 - %56 = fsub float %54, %55 - %57 = call float @llvm.fabs.f32(float %56) - %58 = fcmp uge float %57, 0x3FB99999A0000000 - br i1 %58, label %44, label %59 - -59: ; preds = %45 - %60 = getelementptr inbounds [5 x float], [5 x float]* %9, i64 0, i64 3 - %61 = load float, float* %60, align 4 - %62 = fpext float %61 to double - %63 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %62) #8 - %64 = getelementptr inbounds [5 x float], [5 x float]* %11, i64 0, i64 3 - %65 = load float, float* %64, align 4 - %66 = fpext float %65 to double - %67 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %66) #8 - %68 = load float, float* %64, align 4 - %69 = load float, float* %60, align 4 - %70 = fsub float %68, %69 - %71 = call float @llvm.fabs.f32(float %70) - %72 = fcmp uge float %71, 0x3FB99999A0000000 - br i1 %72, label %44, label %73 - -73: ; preds = %59 - %74 = getelementptr inbounds [5 x float], [5 x float]* %9, i64 0, i64 4 - %75 = load float, float* %74, align 16 - %76 = fpext float %75 to double - %77 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %76) #8 - %78 = getelementptr inbounds [5 x float], [5 x float]* %11, i64 0, i64 4 - %79 = load float, float* %78, align 16 - %80 = fpext float %79 to double - %81 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %80) #8 - %82 = load float, float* %78, align 16 - %83 = load float, float* %74, align 16 - %84 = fsub float %82, %83 - %85 = call float @llvm.fabs.f32(float %84) - %86 = fcmp uge float %85, 0x3FB99999A0000000 - br i1 %86, label %44, label %87 - -87: ; preds = %73 - ret i32 0 -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #2 - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 - -declare i32 @printf(i8*, ...) #4 - -; Function Attrs: nounwind readnone speculatable willreturn -declare double @llvm.fabs.f64(double) #5 - -; Function Attrs: noreturn -declare void @__assert_rtn(i8*, i8*, i32, i8*) #6 - -; Function Attrs: argmemonly nofree -declare void @memset_pattern16(i8* nocapture, i8* nocapture readonly, i64) #7 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fabs.f32(float) #5 - -attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { argmemonly nounwind willreturn } -attributes #3 = { argmemonly nounwind willreturn writeonly } -attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { nounwind readnone speculatable willreturn } -attributes #6 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { argmemonly nofree } -attributes #8 = { nounwind } -attributes #9 = { noreturn nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} -!3 = distinct !{!3, !4} -!4 = !{!"llvm.loop.unroll.disable"} -!5 = distinct !{!5, !4} -!6 = distinct !{!6, !4} -!7 = distinct !{!7, !4} diff --git a/src/dios-egraphs/Diospyros/dce.ll b/src/dios-egraphs/Diospyros/dce.ll deleted file mode 100644 index bc275dc2..00000000 --- a/src/dios-egraphs/Diospyros/dce.ll +++ /dev/null @@ -1,3403 +0,0 @@ -; ModuleID = 'build/diospyros.ll' -source_filename = "fail-tests/qr-decomp-local-arrays.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1 -@.str.1 = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 -@.str.2 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 -@.str.3 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 -@.str.4 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @sgn(float %0) #0 { - %2 = fcmp ogt float %0, 0.000000e+00 - %3 = zext i1 %2 to i32 - %4 = fcmp olt float %0, 0.000000e+00 - %.neg = sext i1 %4 to i32 - %5 = add nsw i32 %.neg, %3 - %6 = sitofp i32 %5 to float - ret float %6 -} - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_sgn(float %0) #1 { - %2 = fcmp ogt float %0, 0.000000e+00 - %3 = zext i1 %2 to i32 - %4 = fcmp olt float %0, 0.000000e+00 - %.neg = sext i1 %4 to i32 - %5 = add nsw i32 %.neg, %3 - %6 = sitofp i32 %5 to float - ret float %6 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @naive_norm(float* %0, i32 %1) #0 { - %3 = icmp sgt i32 %1, 0 - %smax = select i1 %3, i32 %1, i32 0 - %wide.trip.count = zext i32 %smax to i64 - br i1 %3, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %2 - %4 = add nsw i64 %wide.trip.count, -1 - %xtraiter = and i64 %wide.trip.count, 3 - %5 = icmp ult i64 %4, 3 - br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count, 2147483644 - br label %6 - -6: ; preds = %6, %.lr.ph.new - %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] - %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 - %8 = load float, float* %7, align 4 - %9 = fmul float %8, %8 - %10 = fadd float %.013, %9 - %indvars.iv.next = or i64 %indvars.iv2, 1 - %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next - %12 = load float, float* %11, align 4 - %13 = fmul float %12, %12 - %14 = fadd float %10, %13 - %indvars.iv.next.1 = or i64 %indvars.iv2, 2 - %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 - %16 = load float, float* %15, align 4 - %17 = fmul float %16, %16 - %18 = fadd float %14, %17 - %indvars.iv.next.2 = or i64 %indvars.iv2, 3 - %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 - %20 = load float, float* %19, align 4 - %21 = fmul float %20, %20 - %22 = fadd float %18, %21 - %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 - %niter.nsub.3 = add i64 %niter, -4 - %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 - br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 - -._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] - %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] - %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] - %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil - %24 = load float, float* %23, align 4 - %25 = fmul float %24, %24 - %26 = fadd float %.013.epil, %25 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !3 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 - %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] - %27 = call float @llvm.sqrt.f32(float %.01.lcssa) - ret float %27 -} - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32(float) #2 - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_naive_norm(float* %0, i32 %1) #1 { - %3 = icmp sgt i32 %1, 0 - %smax = select i1 %3, i32 %1, i32 0 - %wide.trip.count = zext i32 %smax to i64 - br i1 %3, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %2 - %4 = add nsw i64 %wide.trip.count, -1 - %xtraiter = and i64 %wide.trip.count, 3 - %5 = icmp ult i64 %4, 3 - br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count, 2147483644 - br label %6 - -6: ; preds = %6, %.lr.ph.new - %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] - %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 - %8 = load float, float* %7, align 4 - %9 = fmul float %8, %8 - %10 = fadd float %.013, %9 - %indvars.iv.next = or i64 %indvars.iv2, 1 - %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next - %12 = load float, float* %11, align 4 - %13 = fmul float %12, %12 - %14 = fadd float %10, %13 - %indvars.iv.next.1 = or i64 %indvars.iv2, 2 - %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 - %16 = load float, float* %15, align 4 - %17 = fmul float %16, %16 - %18 = fadd float %14, %17 - %indvars.iv.next.2 = or i64 %indvars.iv2, 3 - %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 - %20 = load float, float* %19, align 4 - %21 = fmul float %20, %20 - %22 = fadd float %18, %21 - %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 - %niter.nsub.3 = add i64 %niter, -4 - %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 - br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 - -._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] - %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] - %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] - %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil - %24 = load float, float* %23, align 4 - %25 = fmul float %24, %24 - %26 = fadd float %.013.epil, %25 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 - %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] - %27 = call float @llvm.sqrt.f32(float %.01.lcssa) - ret float %27 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_transpose(float* %0) #0 { -.lr.ph: - %1 = getelementptr inbounds float, float* %0, i64 1 - %2 = bitcast float* %1 to i32* - %3 = load i32, i32* %2, align 4 - %4 = getelementptr inbounds float, float* %0, i64 2 - %5 = bitcast float* %4 to i32* - %6 = load i32, i32* %5, align 4 - store i32 %6, i32* %2, align 4 - store i32 %3, i32* %5, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_transpose(float* %0) #1 { -.lr.ph: - %1 = getelementptr inbounds float, float* %0, i64 1 - %2 = bitcast float* %1 to i32* - %3 = load i32, i32* %2, align 4 - %4 = getelementptr inbounds float, float* %0, i64 2 - %5 = bitcast float* %4 to i32* - %6 = load i32, i32* %5, align 4 - store i32 %6, i32* %2, align 4 - store i32 %3, i32* %5, align 4 - ret void -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { -.preheader: - store float 0.000000e+00, float* %2, align 4 - %3 = getelementptr float, float* %0, i32 0 - %4 = load float, float* %3, align 4 - %5 = insertelement <4 x float> zeroinitializer, float %4, i32 0 - %6 = insertelement <4 x float> %5, float 0.000000e+00, i32 1 - %7 = insertelement <4 x float> %6, float 0.000000e+00, i32 2 - %8 = insertelement <4 x float> %7, float 0.000000e+00, i32 3 - %9 = getelementptr float, float* %1, i32 0 - %10 = load float, float* %9, align 4 - %11 = insertelement <4 x float> zeroinitializer, float %10, i32 0 - %12 = insertelement <4 x float> %11, float 0.000000e+00, i32 1 - %13 = insertelement <4 x float> %12, float 0.000000e+00, i32 2 - %14 = insertelement <4 x float> %13, float 0.000000e+00, i32 3 - %15 = call <4 x float> @llvm.fma.v4f32(<4 x float> %8, <4 x float> %14, <4 x float> zeroinitializer) - %16 = extractelement <4 x float> %15, i32 0 - store float %16, float* %2, align 4 - %17 = getelementptr float, float* %0, i32 0 - %18 = load float, float* %17, align 4 - %19 = insertelement <4 x float> zeroinitializer, float %18, i32 0 - %20 = insertelement <4 x float> %19, float 1.000000e+00, i32 1 - %21 = insertelement <4 x float> %20, float 1.000000e+00, i32 2 - %22 = insertelement <4 x float> %21, float 1.000000e+00, i32 3 - %23 = insertelement <4 x float> zeroinitializer, float %10, i32 0 - %24 = insertelement <4 x float> %23, float 0.000000e+00, i32 1 - %25 = insertelement <4 x float> %24, float 0.000000e+00, i32 2 - %26 = insertelement <4 x float> %25, float 0.000000e+00, i32 3 - %27 = fmul <4 x float> %22, %26 - %28 = fadd <4 x float> %27, zeroinitializer - %29 = getelementptr float, float* %0, i32 0 - %30 = getelementptr inbounds float, float* %29, i64 1 - %31 = load float, float* %30, align 4 - %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 - %33 = insertelement <4 x float> %32, float 0.000000e+00, i32 1 - %34 = insertelement <4 x float> %33, float 0.000000e+00, i32 2 - %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 3 - %36 = getelementptr float, float* %1, i32 0 - %37 = getelementptr inbounds float, float* %36, i64 2 - %38 = load float, float* %37, align 4 - %39 = insertelement <4 x float> zeroinitializer, float %38, i32 0 - %40 = insertelement <4 x float> %39, float 0.000000e+00, i32 1 - %41 = insertelement <4 x float> %40, float 0.000000e+00, i32 2 - %42 = insertelement <4 x float> %41, float 0.000000e+00, i32 3 - %43 = call <4 x float> @llvm.fma.v4f32(<4 x float> %35, <4 x float> %42, <4 x float> %28) - %44 = extractelement <4 x float> %43, i32 0 - store float %44, float* %2, align 4 - %45 = extractelement <4 x float> %43, i32 1 - %46 = getelementptr float, float* %2, i32 0 - %47 = getelementptr inbounds float, float* %46, i64 1 - store float %45, float* %47, align 4 - %48 = getelementptr float, float* %0, i32 0 - %49 = load float, float* %48, align 4 - %50 = insertelement <4 x float> zeroinitializer, float %49, i32 0 - %51 = insertelement <4 x float> %50, float 0.000000e+00, i32 1 - %52 = insertelement <4 x float> %51, float 0.000000e+00, i32 2 - %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 3 - %54 = getelementptr float, float* %1, i32 0 - %55 = getelementptr inbounds float, float* %54, i64 1 - %56 = load float, float* %55, align 4 - %57 = insertelement <4 x float> zeroinitializer, float %56, i32 0 - %58 = insertelement <4 x float> %57, float 0.000000e+00, i32 1 - %59 = insertelement <4 x float> %58, float 0.000000e+00, i32 2 - %60 = insertelement <4 x float> %59, float 0.000000e+00, i32 3 - %61 = call <4 x float> @llvm.fma.v4f32(<4 x float> %53, <4 x float> %60, <4 x float> zeroinitializer) - %62 = extractelement <4 x float> %61, i32 0 - store float %62, float* %47, align 4 - %63 = insertelement <4 x float> zeroinitializer, float %49, i32 0 - %64 = insertelement <4 x float> %63, float 1.000000e+00, i32 1 - %65 = insertelement <4 x float> %64, float 1.000000e+00, i32 2 - %66 = insertelement <4 x float> %65, float 1.000000e+00, i32 3 - %67 = insertelement <4 x float> zeroinitializer, float %56, i32 0 - %68 = insertelement <4 x float> %67, float 0.000000e+00, i32 1 - %69 = insertelement <4 x float> %68, float 0.000000e+00, i32 2 - %70 = insertelement <4 x float> %69, float 0.000000e+00, i32 3 - %71 = fmul <4 x float> %66, %70 - %72 = fadd <4 x float> %71, zeroinitializer - %73 = getelementptr float, float* %0, i32 0 - %74 = getelementptr inbounds float, float* %73, i64 1 - %75 = load float, float* %74, align 4 - %76 = insertelement <4 x float> zeroinitializer, float %75, i32 0 - %77 = insertelement <4 x float> %76, float 0.000000e+00, i32 1 - %78 = insertelement <4 x float> %77, float 0.000000e+00, i32 2 - %79 = insertelement <4 x float> %78, float 0.000000e+00, i32 3 - %80 = getelementptr float, float* %1, i32 0 - %81 = getelementptr inbounds float, float* %80, i64 3 - %82 = load float, float* %81, align 4 - %83 = insertelement <4 x float> zeroinitializer, float %82, i32 0 - %84 = insertelement <4 x float> %83, float 0.000000e+00, i32 1 - %85 = insertelement <4 x float> %84, float 0.000000e+00, i32 2 - %86 = insertelement <4 x float> %85, float 0.000000e+00, i32 3 - %87 = call <4 x float> @llvm.fma.v4f32(<4 x float> %79, <4 x float> %86, <4 x float> %72) - %88 = extractelement <4 x float> %87, i32 0 - store float %88, float* %47, align 4 - %89 = extractelement <4 x float> %87, i32 1 - %90 = getelementptr float, float* %2, i32 0 - %91 = getelementptr inbounds float, float* %90, i64 2 - store float %89, float* %91, align 4 - %92 = getelementptr float, float* %0, i32 0 - %93 = getelementptr inbounds float, float* %92, i64 2 - %94 = load float, float* %93, align 4 - %95 = insertelement <4 x float> zeroinitializer, float %94, i32 0 - %96 = insertelement <4 x float> %95, float 0.000000e+00, i32 1 - %97 = insertelement <4 x float> %96, float 0.000000e+00, i32 2 - %98 = insertelement <4 x float> %97, float 0.000000e+00, i32 3 - %99 = getelementptr float, float* %1, i32 0 - %100 = load float, float* %99, align 4 - %101 = insertelement <4 x float> zeroinitializer, float %100, i32 0 - %102 = insertelement <4 x float> %101, float 0.000000e+00, i32 1 - %103 = insertelement <4 x float> %102, float 0.000000e+00, i32 2 - %104 = insertelement <4 x float> %103, float 0.000000e+00, i32 3 - %105 = call <4 x float> @llvm.fma.v4f32(<4 x float> %98, <4 x float> %104, <4 x float> zeroinitializer) - %106 = extractelement <4 x float> %105, i32 0 - store float %106, float* %91, align 4 - %107 = insertelement <4 x float> zeroinitializer, float %94, i32 0 - %108 = insertelement <4 x float> %107, float 1.000000e+00, i32 1 - %109 = insertelement <4 x float> %108, float 1.000000e+00, i32 2 - %110 = insertelement <4 x float> %109, float 1.000000e+00, i32 3 - %111 = insertelement <4 x float> zeroinitializer, float %100, i32 0 - %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 1 - %113 = insertelement <4 x float> %112, float 0.000000e+00, i32 2 - %114 = insertelement <4 x float> %113, float 0.000000e+00, i32 3 - %115 = fmul <4 x float> %110, %114 - %116 = fadd <4 x float> %115, zeroinitializer - %117 = getelementptr float, float* %0, i32 0 - %118 = getelementptr inbounds float, float* %117, i64 3 - %119 = load float, float* %118, align 4 - %120 = insertelement <4 x float> zeroinitializer, float %119, i32 0 - %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 1 - %122 = insertelement <4 x float> %121, float 0.000000e+00, i32 2 - %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3 - %124 = load float, float* %37, align 4 - %125 = insertelement <4 x float> zeroinitializer, float %124, i32 0 - %126 = insertelement <4 x float> %125, float 0.000000e+00, i32 1 - %127 = insertelement <4 x float> %126, float 0.000000e+00, i32 2 - %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 3 - %129 = call <4 x float> @llvm.fma.v4f32(<4 x float> %123, <4 x float> %128, <4 x float> %116) - %130 = extractelement <4 x float> %129, i32 0 - store float %130, float* %91, align 4 - %131 = extractelement <4 x float> %129, i32 1 - %132 = getelementptr float, float* %2, i32 0 - %133 = getelementptr inbounds float, float* %132, i64 3 - store float %131, float* %133, align 4 - %134 = load float, float* %93, align 4 - %135 = insertelement <4 x float> zeroinitializer, float %134, i32 0 - %136 = insertelement <4 x float> %135, float 0.000000e+00, i32 1 - %137 = insertelement <4 x float> %136, float 0.000000e+00, i32 2 - %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 3 - %139 = load float, float* %55, align 4 - %140 = insertelement <4 x float> zeroinitializer, float %139, i32 0 - %141 = insertelement <4 x float> %140, float 0.000000e+00, i32 1 - %142 = insertelement <4 x float> %141, float 0.000000e+00, i32 2 - %143 = insertelement <4 x float> %142, float 0.000000e+00, i32 3 - %144 = call <4 x float> @llvm.fma.v4f32(<4 x float> %138, <4 x float> %143, <4 x float> zeroinitializer) - %145 = extractelement <4 x float> %144, i32 0 - store float %145, float* %133, align 4 - %146 = load float, float* %93, align 4 - %147 = insertelement <4 x float> zeroinitializer, float %146, i32 0 - %148 = insertelement <4 x float> %147, float 1.000000e+00, i32 1 - %149 = insertelement <4 x float> %148, float 1.000000e+00, i32 2 - %150 = insertelement <4 x float> %149, float 1.000000e+00, i32 3 - %151 = insertelement <4 x float> zeroinitializer, float %139, i32 0 - %152 = insertelement <4 x float> %151, float 0.000000e+00, i32 1 - %153 = insertelement <4 x float> %152, float 0.000000e+00, i32 2 - %154 = insertelement <4 x float> %153, float 0.000000e+00, i32 3 - %155 = fmul <4 x float> %150, %154 - %156 = fadd <4 x float> %155, zeroinitializer - %157 = load float, float* %118, align 4 - %158 = insertelement <4 x float> zeroinitializer, float %157, i32 0 - %159 = insertelement <4 x float> %158, float 0.000000e+00, i32 1 - %160 = insertelement <4 x float> %159, float 0.000000e+00, i32 2 - %161 = insertelement <4 x float> %160, float 0.000000e+00, i32 3 - %162 = load float, float* %81, align 4 - %163 = insertelement <4 x float> zeroinitializer, float %162, i32 0 - %164 = insertelement <4 x float> %163, float 0.000000e+00, i32 1 - %165 = insertelement <4 x float> %164, float 0.000000e+00, i32 2 - %166 = insertelement <4 x float> %165, float 0.000000e+00, i32 3 - %167 = call <4 x float> @llvm.fma.v4f32(<4 x float> %161, <4 x float> %166, <4 x float> %156) - %168 = extractelement <4 x float> %167, i32 0 - store float %168, float* %133, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { -.preheader: - store float 0.000000e+00, float* %2, align 4 - %3 = load float, float* %0, align 4 - %4 = load float, float* %1, align 4 - %5 = fmul float %3, %4 - %6 = fadd float %5, 0.000000e+00 - store float %6, float* %2, align 4 - %7 = getelementptr inbounds float, float* %0, i64 1 - %8 = load float, float* %7, align 4 - %9 = getelementptr inbounds float, float* %1, i64 2 - %10 = load float, float* %9, align 4 - %11 = fmul float %8, %10 - %12 = fadd float %6, %11 - store float %12, float* %2, align 4 - %13 = getelementptr inbounds float, float* %2, i64 1 - store float 0.000000e+00, float* %13, align 4 - %14 = load float, float* %0, align 4 - %15 = getelementptr inbounds float, float* %1, i64 1 - %16 = load float, float* %15, align 4 - %17 = fmul float %14, %16 - %18 = fadd float %17, 0.000000e+00 - store float %18, float* %13, align 4 - %19 = load float, float* %7, align 4 - %20 = getelementptr inbounds float, float* %1, i64 3 - %21 = load float, float* %20, align 4 - %22 = fmul float %19, %21 - %23 = fadd float %18, %22 - store float %23, float* %13, align 4 - %24 = getelementptr inbounds float, float* %0, i64 2 - %25 = getelementptr inbounds float, float* %2, i64 2 - store float 0.000000e+00, float* %25, align 4 - %26 = load float, float* %24, align 4 - %27 = load float, float* %1, align 4 - %28 = fmul float %26, %27 - %29 = fadd float %28, 0.000000e+00 - store float %29, float* %25, align 4 - %30 = getelementptr inbounds float, float* %0, i64 3 - %31 = load float, float* %30, align 4 - %32 = load float, float* %9, align 4 - %33 = fmul float %31, %32 - %34 = fadd float %29, %33 - store float %34, float* %25, align 4 - %35 = getelementptr inbounds float, float* %2, i64 3 - store float 0.000000e+00, float* %35, align 4 - %36 = load float, float* %24, align 4 - %37 = load float, float* %15, align 4 - %38 = fmul float %36, %37 - %39 = fadd float %38, 0.000000e+00 - store float %39, float* %35, align 4 - %40 = load float, float* %30, align 4 - %41 = load float, float* %20, align 4 - %42 = fmul float %40, %41 - %43 = fadd float %39, %42 - store float %43, float* %35, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { -.preheader49: - %3 = getelementptr float, float* %0, i32 0 - %4 = bitcast float* %3 to i32* - %5 = load i32, i32* %4, align 4 - %6 = bitcast i32 %5 to float - %7 = insertelement <4 x float> zeroinitializer, float %6, i32 0 - %8 = insertelement <4 x float> %7, float 0.000000e+00, i32 1 - %9 = insertelement <4 x float> %8, float 0.000000e+00, i32 2 - %10 = insertelement <4 x float> %9, float 0.000000e+00, i32 3 - %11 = extractelement <4 x float> %10, i32 0 - %12 = getelementptr float, float* %2, i32 0 - %13 = bitcast float* %12 to i32* - %14 = bitcast i32* %13 to float* - store float %11, float* %14, align 4 - %15 = getelementptr float, float* %0, i32 0 - %16 = getelementptr inbounds float, float* %15, i64 1 - %17 = bitcast float* %16 to i32* - %18 = load i32, i32* %17, align 4 - %19 = bitcast i32 %18 to float - %20 = insertelement <4 x float> zeroinitializer, float %19, i32 0 - %21 = insertelement <4 x float> %20, float 0.000000e+00, i32 1 - %22 = insertelement <4 x float> %21, float 0.000000e+00, i32 2 - %23 = insertelement <4 x float> %22, float 0.000000e+00, i32 3 - %24 = extractelement <4 x float> %23, i32 0 - %25 = getelementptr float, float* %2, i32 0 - %26 = getelementptr inbounds float, float* %25, i64 1 - %27 = bitcast float* %26 to i32* - %28 = bitcast i32* %27 to float* - store float %24, float* %28, align 4 - %29 = getelementptr float, float* %0, i32 0 - %30 = getelementptr inbounds float, float* %29, i64 2 - %31 = bitcast float* %30 to i32* - %32 = load i32, i32* %31, align 4 - %33 = bitcast i32 %32 to float - %34 = insertelement <4 x float> zeroinitializer, float %33, i32 0 - %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 1 - %36 = insertelement <4 x float> %35, float 0.000000e+00, i32 2 - %37 = insertelement <4 x float> %36, float 0.000000e+00, i32 3 - %38 = extractelement <4 x float> %37, i32 0 - %39 = getelementptr float, float* %2, i32 0 - %40 = getelementptr inbounds float, float* %39, i64 2 - %41 = bitcast float* %40 to i32* - %42 = bitcast i32* %41 to float* - store float %38, float* %42, align 4 - %43 = getelementptr float, float* %0, i32 0 - %44 = getelementptr inbounds float, float* %43, i64 3 - %45 = bitcast float* %44 to i32* - %46 = load i32, i32* %45, align 4 - %47 = bitcast i32 %46 to float - %48 = fneg float %47 - %49 = insertelement <4 x float> zeroinitializer, float %48, i32 0 - %50 = bitcast i32 %5 to float ; !! This is referencing an old load: match that should not occur? - %51 = bitcast i32 %5 to float - %52 = fmul float %50, %51 - %53 = fadd float %52, 0.000000e+00 - %54 = load i32, i32* %31, align 4 - %55 = bitcast i32 %54 to float - %56 = bitcast i32 %54 to float - %57 = fmul float %55, %56 - %58 = fadd float %53, %57 - %59 = call float @llvm.sqrt.f32(float %58) - %60 = bitcast i32 %5 to float - %61 = fcmp olt float %60, 0.000000e+00 - %62 = sext i1 %61 to i32 - %63 = fcmp ogt float %60, 0.000000e+00 - %64 = zext i1 %63 to i32 - %65 = add nsw i32 %62, %64 - %66 = sitofp i32 %65 to float - %67 = fneg float %66 - %68 = fmul float %59, %67 - %69 = bitcast i32 %5 to float - %70 = fadd float %69, %68 - %71 = bitcast i32 %5 to float - %72 = bitcast i32 %5 to float - %73 = fmul float %71, %72 - %74 = fadd float %73, 0.000000e+00 - %75 = bitcast i32 %54 to float - %76 = bitcast i32 %54 to float - %77 = fmul float %75, %76 - %78 = fadd float %74, %77 - %79 = call float @llvm.sqrt.f32(float %78) - %80 = fneg float %66 - %81 = fmul float %79, %80 - %82 = bitcast i32 %5 to float - %83 = fadd float %82, %81 - %84 = bitcast i32 %5 to float - %85 = bitcast i32 %5 to float - %86 = fmul float %84, %85 - %87 = fadd float %86, 0.000000e+00 - %88 = bitcast i32 %54 to float - %89 = bitcast i32 %54 to float - %90 = fmul float %88, %89 - %91 = fadd float %87, %90 - %92 = call float @llvm.sqrt.f32(float %91) - %93 = fneg float %66 - %94 = fmul float %92, %93 - %95 = bitcast i32 %5 to float - %96 = fadd float %95, %94 - %97 = fmul float %83, %96 - %98 = fadd float %97, 0.000000e+00 - %99 = bitcast i32 %5 to float - %100 = bitcast i32 %5 to float - %101 = fmul float %99, %100 - %102 = fadd float %101, 0.000000e+00 - %103 = bitcast i32 %54 to float - %104 = bitcast i32 %54 to float - %105 = fmul float %103, %104 - %106 = fadd float %102, %105 - %107 = call float @llvm.sqrt.f32(float %106) - %108 = fneg float %66 - %109 = fmul float %107, %108 - %110 = fmul float %109, 0.000000e+00 - %111 = bitcast i32 %54 to float - %112 = fadd float %111, %110 - %113 = bitcast i32 %5 to float - %114 = bitcast i32 %5 to float - %115 = fmul float %113, %114 - %116 = fadd float %115, 0.000000e+00 - %117 = bitcast i32 %54 to float - %118 = bitcast i32 %54 to float - %119 = fmul float %117, %118 - %120 = fadd float %116, %119 - %121 = call float @llvm.sqrt.f32(float %120) - %122 = fneg float %66 - %123 = fmul float %121, %122 - %124 = fmul float %123, 0.000000e+00 - %125 = bitcast i32 %54 to float - %126 = fadd float %125, %124 - %127 = fmul float %112, %126 - %128 = fadd float %98, %127 - %129 = call float @llvm.sqrt.f32(float %128) - %130 = fadd float %129, 0.000000e+00 - %131 = fdiv float %70, %130 - %132 = fmul float %131, 2.000000e+00 - %133 = bitcast i32 %5 to float - %134 = bitcast i32 %5 to float - %135 = fmul float %133, %134 - %136 = fadd float %135, 0.000000e+00 - %137 = bitcast i32 %54 to float - %138 = bitcast i32 %54 to float - %139 = fmul float %137, %138 - %140 = fadd float %136, %139 - %141 = call float @llvm.sqrt.f32(float %140) - %142 = fneg float %66 - %143 = fmul float %141, %142 - %144 = bitcast i32 %5 to float - %145 = fadd float %144, %143 - %146 = bitcast i32 %5 to float - %147 = bitcast i32 %5 to float - %148 = fmul float %146, %147 - %149 = fadd float %148, 0.000000e+00 - %150 = bitcast i32 %54 to float - %151 = bitcast i32 %54 to float - %152 = fmul float %150, %151 - %153 = fadd float %149, %152 - %154 = call float @llvm.sqrt.f32(float %153) - %155 = fneg float %66 - %156 = fmul float %154, %155 - %157 = bitcast i32 %5 to float - %158 = fadd float %157, %156 - %159 = bitcast i32 %5 to float - %160 = bitcast i32 %5 to float - %161 = fmul float %159, %160 - %162 = fadd float %161, 0.000000e+00 - %163 = bitcast i32 %54 to float - %164 = bitcast i32 %54 to float - %165 = fmul float %163, %164 - %166 = fadd float %162, %165 - %167 = call float @llvm.sqrt.f32(float %166) - %168 = fneg float %66 - %169 = fmul float %167, %168 - %170 = bitcast i32 %5 to float - %171 = fadd float %170, %169 - %172 = fmul float %158, %171 - %173 = fadd float %172, 0.000000e+00 - %174 = bitcast i32 %5 to float - %175 = bitcast i32 %5 to float - %176 = fmul float %174, %175 - %177 = fadd float %176, 0.000000e+00 - %178 = bitcast i32 %54 to float - %179 = bitcast i32 %54 to float - %180 = fmul float %178, %179 - %181 = fadd float %177, %180 - %182 = call float @llvm.sqrt.f32(float %181) - %183 = fneg float %66 - %184 = fmul float %182, %183 - %185 = fmul float %184, 0.000000e+00 - %186 = bitcast i32 %54 to float - %187 = fadd float %186, %185 - %188 = bitcast i32 %5 to float - %189 = bitcast i32 %5 to float - %190 = fmul float %188, %189 - %191 = fadd float %190, 0.000000e+00 - %192 = bitcast i32 %54 to float - %193 = bitcast i32 %54 to float - %194 = fmul float %192, %193 - %195 = fadd float %191, %194 - %196 = call float @llvm.sqrt.f32(float %195) - %197 = fneg float %66 - %198 = fmul float %196, %197 - %199 = fmul float %198, 0.000000e+00 - %200 = bitcast i32 %54 to float - %201 = fadd float %200, %199 - %202 = fmul float %187, %201 - %203 = fadd float %173, %202 - %204 = call float @llvm.sqrt.f32(float %203) - %205 = fadd float %204, 0.000000e+00 - %206 = fdiv float %145, %205 - %207 = fmul float %132, %206 - %208 = insertelement <4 x float> %49, float %207, i32 1 - %209 = bitcast i32 %5 to float - %210 = bitcast i32 %5 to float - %211 = fmul float %209, %210 - %212 = fadd float %211, 0.000000e+00 - %213 = bitcast i32 %54 to float - %214 = bitcast i32 %54 to float - %215 = fmul float %213, %214 - %216 = fadd float %212, %215 - %217 = call float @llvm.sqrt.f32(float %216) - %218 = fneg float %66 - %219 = fmul float %217, %218 - %220 = bitcast i32 %5 to float - %221 = fadd float %220, %219 - %222 = bitcast i32 %5 to float - %223 = bitcast i32 %5 to float - %224 = fmul float %222, %223 - %225 = fadd float %224, 0.000000e+00 - %226 = bitcast i32 %54 to float - %227 = bitcast i32 %54 to float - %228 = fmul float %226, %227 - %229 = fadd float %225, %228 - %230 = call float @llvm.sqrt.f32(float %229) - %231 = fneg float %66 - %232 = fmul float %230, %231 - %233 = bitcast i32 %5 to float - %234 = fadd float %233, %232 - %235 = bitcast i32 %5 to float - %236 = bitcast i32 %5 to float - %237 = fmul float %235, %236 - %238 = fadd float %237, 0.000000e+00 - %239 = bitcast i32 %54 to float - %240 = bitcast i32 %54 to float - %241 = fmul float %239, %240 - %242 = fadd float %238, %241 - %243 = call float @llvm.sqrt.f32(float %242) - %244 = fneg float %66 - %245 = fmul float %243, %244 - %246 = bitcast i32 %5 to float - %247 = fadd float %246, %245 - %248 = fmul float %234, %247 - %249 = fadd float %248, 0.000000e+00 - %250 = bitcast i32 %5 to float - %251 = bitcast i32 %5 to float - %252 = fmul float %250, %251 - %253 = fadd float %252, 0.000000e+00 - %254 = bitcast i32 %54 to float - %255 = bitcast i32 %54 to float - %256 = fmul float %254, %255 - %257 = fadd float %253, %256 - %258 = call float @llvm.sqrt.f32(float %257) - %259 = fneg float %66 - %260 = fmul float %258, %259 - %261 = fmul float %260, 0.000000e+00 - %262 = bitcast i32 %54 to float - %263 = fadd float %262, %261 - %264 = bitcast i32 %5 to float - %265 = bitcast i32 %5 to float - %266 = fmul float %264, %265 - %267 = fadd float %266, 0.000000e+00 - %268 = bitcast i32 %54 to float - %269 = bitcast i32 %54 to float - %270 = fmul float %268, %269 - %271 = fadd float %267, %270 - %272 = call float @llvm.sqrt.f32(float %271) - %273 = fneg float %66 - %274 = fmul float %272, %273 - %275 = fmul float %274, 0.000000e+00 - %276 = bitcast i32 %54 to float - %277 = fadd float %276, %275 - %278 = fmul float %263, %277 - %279 = fadd float %249, %278 - %280 = call float @llvm.sqrt.f32(float %279) - %281 = fadd float %280, 0.000000e+00 - %282 = fdiv float %221, %281 - %283 = fmul float %282, 2.000000e+00 - %284 = bitcast i32 %5 to float - %285 = bitcast i32 %5 to float - %286 = fmul float %284, %285 - %287 = fadd float %286, 0.000000e+00 - %288 = bitcast i32 %54 to float - %289 = bitcast i32 %54 to float - %290 = fmul float %288, %289 - %291 = fadd float %287, %290 - %292 = call float @llvm.sqrt.f32(float %291) - %293 = fneg float %66 - %294 = fmul float %292, %293 - %295 = fmul float %294, 0.000000e+00 - %296 = bitcast i32 %54 to float - %297 = fadd float %296, %295 - %298 = bitcast i32 %5 to float - %299 = bitcast i32 %5 to float - %300 = fmul float %298, %299 - %301 = fadd float %300, 0.000000e+00 - %302 = bitcast i32 %54 to float - %303 = bitcast i32 %54 to float - %304 = fmul float %302, %303 - %305 = fadd float %301, %304 - %306 = call float @llvm.sqrt.f32(float %305) - %307 = fneg float %66 - %308 = fmul float %306, %307 - %309 = bitcast i32 %5 to float - %310 = fadd float %309, %308 - %311 = bitcast i32 %5 to float - %312 = bitcast i32 %5 to float - %313 = fmul float %311, %312 - %314 = fadd float %313, 0.000000e+00 - %315 = bitcast i32 %54 to float - %316 = bitcast i32 %54 to float - %317 = fmul float %315, %316 - %318 = fadd float %314, %317 - %319 = call float @llvm.sqrt.f32(float %318) - %320 = fneg float %66 - %321 = fmul float %319, %320 - %322 = bitcast i32 %5 to float - %323 = fadd float %322, %321 - %324 = fmul float %310, %323 - %325 = fadd float %324, 0.000000e+00 - %326 = bitcast i32 %5 to float - %327 = bitcast i32 %5 to float - %328 = fmul float %326, %327 - %329 = fadd float %328, 0.000000e+00 - %330 = bitcast i32 %54 to float - %331 = bitcast i32 %54 to float - %332 = fmul float %330, %331 - %333 = fadd float %329, %332 - %334 = call float @llvm.sqrt.f32(float %333) - %335 = fneg float %66 - %336 = fmul float %334, %335 - %337 = fmul float %336, 0.000000e+00 - %338 = bitcast i32 %54 to float - %339 = fadd float %338, %337 - %340 = bitcast i32 %5 to float - %341 = bitcast i32 %5 to float - %342 = fmul float %340, %341 - %343 = fadd float %342, 0.000000e+00 - %344 = bitcast i32 %54 to float - %345 = bitcast i32 %54 to float - %346 = fmul float %344, %345 - %347 = fadd float %343, %346 - %348 = call float @llvm.sqrt.f32(float %347) - %349 = fneg float %66 - %350 = fmul float %348, %349 - %351 = fmul float %350, 0.000000e+00 - %352 = bitcast i32 %54 to float - %353 = fadd float %352, %351 - %354 = fmul float %339, %353 - %355 = fadd float %325, %354 - %356 = call float @llvm.sqrt.f32(float %355) - %357 = fadd float %356, 0.000000e+00 - %358 = fdiv float %297, %357 - %359 = fmul float %283, %358 - %360 = insertelement <4 x float> %208, float %359, i32 2 - %361 = bitcast i32 %5 to float - %362 = bitcast i32 %5 to float - %363 = fmul float %361, %362 - %364 = fadd float %363, 0.000000e+00 - %365 = bitcast i32 %54 to float - %366 = bitcast i32 %54 to float - %367 = fmul float %365, %366 - %368 = fadd float %364, %367 - %369 = call float @llvm.sqrt.f32(float %368) - %370 = fneg float %66 - %371 = fmul float %369, %370 - %372 = fmul float %371, 0.000000e+00 - %373 = bitcast i32 %54 to float - %374 = fadd float %373, %372 - %375 = bitcast i32 %5 to float - %376 = bitcast i32 %5 to float - %377 = fmul float %375, %376 - %378 = fadd float %377, 0.000000e+00 - %379 = bitcast i32 %54 to float - %380 = bitcast i32 %54 to float - %381 = fmul float %379, %380 - %382 = fadd float %378, %381 - %383 = call float @llvm.sqrt.f32(float %382) - %384 = fneg float %66 - %385 = fmul float %383, %384 - %386 = bitcast i32 %5 to float - %387 = fadd float %386, %385 - %388 = bitcast i32 %5 to float - %389 = bitcast i32 %5 to float - %390 = fmul float %388, %389 - %391 = fadd float %390, 0.000000e+00 - %392 = bitcast i32 %54 to float - %393 = bitcast i32 %54 to float - %394 = fmul float %392, %393 - %395 = fadd float %391, %394 - %396 = call float @llvm.sqrt.f32(float %395) - %397 = fneg float %66 - %398 = fmul float %396, %397 - %399 = bitcast i32 %5 to float - %400 = fadd float %399, %398 - %401 = fmul float %387, %400 - %402 = fadd float %401, 0.000000e+00 - %403 = bitcast i32 %5 to float - %404 = bitcast i32 %5 to float - %405 = fmul float %403, %404 - %406 = fadd float %405, 0.000000e+00 - %407 = bitcast i32 %54 to float - %408 = bitcast i32 %54 to float - %409 = fmul float %407, %408 - %410 = fadd float %406, %409 - %411 = call float @llvm.sqrt.f32(float %410) - %412 = fneg float %66 - %413 = fmul float %411, %412 - %414 = fmul float %413, 0.000000e+00 - %415 = bitcast i32 %54 to float - %416 = fadd float %415, %414 - %417 = bitcast i32 %5 to float - %418 = bitcast i32 %5 to float - %419 = fmul float %417, %418 - %420 = fadd float %419, 0.000000e+00 - %421 = bitcast i32 %54 to float - %422 = bitcast i32 %54 to float - %423 = fmul float %421, %422 - %424 = fadd float %420, %423 - %425 = call float @llvm.sqrt.f32(float %424) - %426 = fneg float %66 - %427 = fmul float %425, %426 - %428 = fmul float %427, 0.000000e+00 - %429 = bitcast i32 %54 to float - %430 = fadd float %429, %428 - %431 = fmul float %416, %430 - %432 = fadd float %402, %431 - %433 = call float @llvm.sqrt.f32(float %432) - %434 = fadd float %433, 0.000000e+00 - %435 = fdiv float %374, %434 - %436 = fmul float %435, 2.000000e+00 - %437 = bitcast i32 %5 to float - %438 = bitcast i32 %5 to float - %439 = fmul float %437, %438 - %440 = fadd float %439, 0.000000e+00 - %441 = bitcast i32 %54 to float - %442 = bitcast i32 %54 to float - %443 = fmul float %441, %442 - %444 = fadd float %440, %443 - %445 = call float @llvm.sqrt.f32(float %444) - %446 = fneg float %66 - %447 = fmul float %445, %446 - %448 = bitcast i32 %5 to float - %449 = fadd float %448, %447 - %450 = bitcast i32 %5 to float - %451 = bitcast i32 %5 to float - %452 = fmul float %450, %451 - %453 = fadd float %452, 0.000000e+00 - %454 = bitcast i32 %54 to float - %455 = bitcast i32 %54 to float - %456 = fmul float %454, %455 - %457 = fadd float %453, %456 - %458 = call float @llvm.sqrt.f32(float %457) - %459 = fneg float %66 - %460 = fmul float %458, %459 - %461 = bitcast i32 %5 to float - %462 = fadd float %461, %460 - %463 = bitcast i32 %5 to float - %464 = bitcast i32 %5 to float - %465 = fmul float %463, %464 - %466 = fadd float %465, 0.000000e+00 - %467 = bitcast i32 %54 to float - %468 = bitcast i32 %54 to float - %469 = fmul float %467, %468 - %470 = fadd float %466, %469 - %471 = call float @llvm.sqrt.f32(float %470) - %472 = fneg float %66 - %473 = fmul float %471, %472 - %474 = bitcast i32 %5 to float - %475 = fadd float %474, %473 - %476 = fmul float %462, %475 - %477 = fadd float %476, 0.000000e+00 - %478 = bitcast i32 %5 to float - %479 = bitcast i32 %5 to float - %480 = fmul float %478, %479 - %481 = fadd float %480, 0.000000e+00 - %482 = bitcast i32 %54 to float - %483 = bitcast i32 %54 to float - %484 = fmul float %482, %483 - %485 = fadd float %481, %484 - %486 = call float @llvm.sqrt.f32(float %485) - %487 = fneg float %66 - %488 = fmul float %486, %487 - %489 = fmul float %488, 0.000000e+00 - %490 = bitcast i32 %54 to float - %491 = fadd float %490, %489 - %492 = bitcast i32 %5 to float - %493 = bitcast i32 %5 to float - %494 = fmul float %492, %493 - %495 = fadd float %494, 0.000000e+00 - %496 = bitcast i32 %54 to float - %497 = bitcast i32 %54 to float - %498 = fmul float %496, %497 - %499 = fadd float %495, %498 - %500 = call float @llvm.sqrt.f32(float %499) - %501 = fneg float %66 - %502 = fmul float %500, %501 - %503 = fmul float %502, 0.000000e+00 - %504 = bitcast i32 %54 to float - %505 = fadd float %504, %503 - %506 = fmul float %491, %505 - %507 = fadd float %477, %506 - %508 = call float @llvm.sqrt.f32(float %507) - %509 = fadd float %508, 0.000000e+00 - %510 = fdiv float %449, %509 - %511 = fmul float %436, %510 - %512 = insertelement <4 x float> %360, float %511, i32 3 - %513 = fsub <4 x float> , %512 - %514 = bitcast i32 %5 to float - %515 = bitcast i32 %5 to float - %516 = fmul float %514, %515 - %517 = fadd float %516, 0.000000e+00 - %518 = bitcast i32 %54 to float - %519 = bitcast i32 %54 to float - %520 = fmul float %518, %519 - %521 = fadd float %517, %520 - %522 = call float @llvm.sqrt.f32(float %521) - %523 = fneg float %66 - %524 = fmul float %522, %523 - %525 = fmul float %524, 0.000000e+00 - %526 = bitcast i32 %54 to float - %527 = fadd float %526, %525 - %528 = bitcast i32 %5 to float - %529 = bitcast i32 %5 to float - %530 = fmul float %528, %529 - %531 = fadd float %530, 0.000000e+00 - %532 = bitcast i32 %54 to float - %533 = bitcast i32 %54 to float - %534 = fmul float %532, %533 - %535 = fadd float %531, %534 - %536 = call float @llvm.sqrt.f32(float %535) - %537 = fneg float %66 - %538 = fmul float %536, %537 - %539 = bitcast i32 %5 to float - %540 = fadd float %539, %538 - %541 = bitcast i32 %5 to float - %542 = bitcast i32 %5 to float - %543 = fmul float %541, %542 - %544 = fadd float %543, 0.000000e+00 - %545 = bitcast i32 %54 to float - %546 = bitcast i32 %54 to float - %547 = fmul float %545, %546 - %548 = fadd float %544, %547 - %549 = call float @llvm.sqrt.f32(float %548) - %550 = fneg float %66 - %551 = fmul float %549, %550 - %552 = bitcast i32 %5 to float - %553 = fadd float %552, %551 - %554 = fmul float %540, %553 - %555 = fadd float %554, 0.000000e+00 - %556 = bitcast i32 %5 to float - %557 = bitcast i32 %5 to float - %558 = fmul float %556, %557 - %559 = fadd float %558, 0.000000e+00 - %560 = bitcast i32 %54 to float - %561 = bitcast i32 %54 to float - %562 = fmul float %560, %561 - %563 = fadd float %559, %562 - %564 = call float @llvm.sqrt.f32(float %563) - %565 = fneg float %66 - %566 = fmul float %564, %565 - %567 = fmul float %566, 0.000000e+00 - %568 = bitcast i32 %54 to float - %569 = fadd float %568, %567 - %570 = bitcast i32 %5 to float - %571 = bitcast i32 %5 to float - %572 = fmul float %570, %571 - %573 = fadd float %572, 0.000000e+00 - %574 = bitcast i32 %54 to float - %575 = bitcast i32 %54 to float - %576 = fmul float %574, %575 - %577 = fadd float %573, %576 - %578 = call float @llvm.sqrt.f32(float %577) - %579 = fneg float %66 - %580 = fmul float %578, %579 - %581 = fmul float %580, 0.000000e+00 - %582 = bitcast i32 %54 to float - %583 = fadd float %582, %581 - %584 = fmul float %569, %583 - %585 = fadd float %555, %584 - %586 = call float @llvm.sqrt.f32(float %585) - %587 = fadd float %586, 0.000000e+00 - %588 = fdiv float %527, %587 - %589 = fmul float %588, 2.000000e+00 - %590 = bitcast i32 %5 to float - %591 = bitcast i32 %5 to float - %592 = fmul float %590, %591 - %593 = fadd float %592, 0.000000e+00 - %594 = bitcast i32 %54 to float - %595 = bitcast i32 %54 to float - %596 = fmul float %594, %595 - %597 = fadd float %593, %596 - %598 = call float @llvm.sqrt.f32(float %597) - %599 = fneg float %66 - %600 = fmul float %598, %599 - %601 = fmul float %600, 0.000000e+00 - %602 = bitcast i32 %54 to float - %603 = fadd float %602, %601 - %604 = bitcast i32 %5 to float - %605 = bitcast i32 %5 to float - %606 = fmul float %604, %605 - %607 = fadd float %606, 0.000000e+00 - %608 = bitcast i32 %54 to float - %609 = bitcast i32 %54 to float - %610 = fmul float %608, %609 - %611 = fadd float %607, %610 - %612 = call float @llvm.sqrt.f32(float %611) - %613 = fneg float %66 - %614 = fmul float %612, %613 - %615 = bitcast i32 %5 to float - %616 = fadd float %615, %614 - %617 = bitcast i32 %5 to float - %618 = bitcast i32 %5 to float - %619 = fmul float %617, %618 - %620 = fadd float %619, 0.000000e+00 - %621 = bitcast i32 %54 to float - %622 = bitcast i32 %54 to float - %623 = fmul float %621, %622 - %624 = fadd float %620, %623 - %625 = call float @llvm.sqrt.f32(float %624) - %626 = fneg float %66 - %627 = fmul float %625, %626 - %628 = bitcast i32 %5 to float - %629 = fadd float %628, %627 - %630 = fmul float %616, %629 - %631 = fadd float %630, 0.000000e+00 - %632 = bitcast i32 %5 to float - %633 = bitcast i32 %5 to float - %634 = fmul float %632, %633 - %635 = fadd float %634, 0.000000e+00 - %636 = bitcast i32 %54 to float - %637 = bitcast i32 %54 to float - %638 = fmul float %636, %637 - %639 = fadd float %635, %638 - %640 = call float @llvm.sqrt.f32(float %639) - %641 = fneg float %66 - %642 = fmul float %640, %641 - %643 = fmul float %642, 0.000000e+00 - %644 = bitcast i32 %54 to float - %645 = fadd float %644, %643 - %646 = bitcast i32 %5 to float - %647 = bitcast i32 %5 to float - %648 = fmul float %646, %647 - %649 = fadd float %648, 0.000000e+00 - %650 = bitcast i32 %54 to float - %651 = bitcast i32 %54 to float - %652 = fmul float %650, %651 - %653 = fadd float %649, %652 - %654 = call float @llvm.sqrt.f32(float %653) - %655 = fneg float %66 - %656 = fmul float %654, %655 - %657 = fmul float %656, 0.000000e+00 - %658 = bitcast i32 %54 to float - %659 = fadd float %658, %657 - %660 = fmul float %645, %659 - %661 = fadd float %631, %660 - %662 = call float @llvm.sqrt.f32(float %661) - %663 = fadd float %662, 0.000000e+00 - %664 = fdiv float %603, %663 - %665 = fmul float %589, %664 - %666 = fsub float 1.000000e+00, %665 - %667 = insertelement <4 x float> zeroinitializer, float %666, i32 0 - %668 = insertelement <4 x float> %667, float 0.000000e+00, i32 1 - %669 = insertelement <4 x float> %668, float 0.000000e+00, i32 2 - %670 = insertelement <4 x float> %669, float 0.000000e+00, i32 3 - %671 = shufflevector <4 x float> %513, <4 x float> %670, <8 x i32> - %672 = extractelement <8 x float> %671, i32 0 - %673 = getelementptr float, float* %2, i32 0 - %674 = getelementptr inbounds float, float* %673, i64 3 - %675 = bitcast float* %674 to i32* - %676 = bitcast i32* %675 to float* - store float %672, float* %676, align 4 - %677 = bitcast float* %1 to i8* - %678 = alloca [4 x float], align 16 - %679 = bitcast [4 x float]* %678 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %677, i8* nonnull align 16 dereferenceable(16) %679, i64 16, i1 false) - store float 0.000000e+00, float* %2, align 4 - %680 = bitcast i32 %5 to float - %681 = bitcast i32 %5 to float - %682 = fmul float %680, %681 - %683 = fadd float %682, 0.000000e+00 - %684 = load i32, i32* %31, align 4 - %685 = bitcast i32 %684 to float - %686 = bitcast i32 %684 to float - %687 = fmul float %685, %686 - %688 = fadd float %683, %687 - %689 = call float @llvm.sqrt.f32(float %688) - %690 = sext i1 %61 to i32 - %691 = zext i1 %63 to i32 - %692 = add nsw i32 %690, %691 - %693 = sitofp i32 %692 to float - %694 = fneg float %693 - %695 = fmul float %689, %694 - %696 = bitcast i32 %5 to float - %697 = fadd float %696, %695 - %698 = bitcast i32 %5 to float - %699 = bitcast i32 %5 to float - %700 = fmul float %698, %699 - %701 = fadd float %700, 0.000000e+00 - %702 = bitcast i32 %684 to float - %703 = bitcast i32 %684 to float - %704 = fmul float %702, %703 - %705 = fadd float %701, %704 - %706 = call float @llvm.sqrt.f32(float %705) - %707 = fneg float %693 - %708 = fmul float %706, %707 - %709 = bitcast i32 %5 to float - %710 = fadd float %709, %708 - %711 = bitcast i32 %5 to float - %712 = bitcast i32 %5 to float - %713 = fmul float %711, %712 - %714 = fadd float %713, 0.000000e+00 - %715 = bitcast i32 %684 to float - %716 = bitcast i32 %684 to float - %717 = fmul float %715, %716 - %718 = fadd float %714, %717 - %719 = call float @llvm.sqrt.f32(float %718) - %720 = fneg float %693 - %721 = fmul float %719, %720 - %722 = bitcast i32 %5 to float - %723 = fadd float %722, %721 - %724 = fmul float %710, %723 - %725 = fadd float %724, 0.000000e+00 - %726 = bitcast i32 %5 to float - %727 = bitcast i32 %5 to float - %728 = fmul float %726, %727 - %729 = fadd float %728, 0.000000e+00 - %730 = bitcast i32 %684 to float - %731 = bitcast i32 %684 to float - %732 = fmul float %730, %731 - %733 = fadd float %729, %732 - %734 = call float @llvm.sqrt.f32(float %733) - %735 = fneg float %693 - %736 = fmul float %734, %735 - %737 = fmul float %736, 0.000000e+00 - %738 = bitcast i32 %684 to float - %739 = fadd float %738, %737 - %740 = bitcast i32 %5 to float - %741 = bitcast i32 %5 to float - %742 = fmul float %740, %741 - %743 = fadd float %742, 0.000000e+00 - %744 = bitcast i32 %684 to float - %745 = bitcast i32 %684 to float - %746 = fmul float %744, %745 - %747 = fadd float %743, %746 - %748 = call float @llvm.sqrt.f32(float %747) - %749 = fneg float %693 - %750 = fmul float %748, %749 - %751 = fmul float %750, 0.000000e+00 - %752 = bitcast i32 %684 to float - %753 = fadd float %752, %751 - %754 = fmul float %739, %753 - %755 = fadd float %725, %754 - %756 = call float @llvm.sqrt.f32(float %755) - %757 = fadd float %756, 0.000000e+00 - %758 = fdiv float %697, %757 - %759 = fmul float %758, 2.000000e+00 - %760 = bitcast i32 %5 to float - %761 = bitcast i32 %5 to float - %762 = fmul float %760, %761 - %763 = fadd float %762, 0.000000e+00 - %764 = bitcast i32 %684 to float - %765 = bitcast i32 %684 to float - %766 = fmul float %764, %765 - %767 = fadd float %763, %766 - %768 = call float @llvm.sqrt.f32(float %767) - %769 = fneg float %693 - %770 = fmul float %768, %769 - %771 = bitcast i32 %5 to float - %772 = fadd float %771, %770 - %773 = bitcast i32 %5 to float - %774 = bitcast i32 %5 to float - %775 = fmul float %773, %774 - %776 = fadd float %775, 0.000000e+00 - %777 = bitcast i32 %684 to float - %778 = bitcast i32 %684 to float - %779 = fmul float %777, %778 - %780 = fadd float %776, %779 - %781 = call float @llvm.sqrt.f32(float %780) - %782 = fneg float %693 - %783 = fmul float %781, %782 - %784 = bitcast i32 %5 to float - %785 = fadd float %784, %783 - %786 = bitcast i32 %5 to float - %787 = bitcast i32 %5 to float - %788 = fmul float %786, %787 - %789 = fadd float %788, 0.000000e+00 - %790 = bitcast i32 %684 to float - %791 = bitcast i32 %684 to float - %792 = fmul float %790, %791 - %793 = fadd float %789, %792 - %794 = call float @llvm.sqrt.f32(float %793) - %795 = fneg float %693 - %796 = fmul float %794, %795 - %797 = bitcast i32 %5 to float - %798 = fadd float %797, %796 - %799 = fmul float %785, %798 - %800 = fadd float %799, 0.000000e+00 - %801 = bitcast i32 %5 to float - %802 = bitcast i32 %5 to float - %803 = fmul float %801, %802 - %804 = fadd float %803, 0.000000e+00 - %805 = bitcast i32 %684 to float - %806 = bitcast i32 %684 to float - %807 = fmul float %805, %806 - %808 = fadd float %804, %807 - %809 = call float @llvm.sqrt.f32(float %808) - %810 = fneg float %693 - %811 = fmul float %809, %810 - %812 = fmul float %811, 0.000000e+00 - %813 = bitcast i32 %684 to float - %814 = fadd float %813, %812 - %815 = bitcast i32 %5 to float - %816 = bitcast i32 %5 to float - %817 = fmul float %815, %816 - %818 = fadd float %817, 0.000000e+00 - %819 = bitcast i32 %684 to float - %820 = bitcast i32 %684 to float - %821 = fmul float %819, %820 - %822 = fadd float %818, %821 - %823 = call float @llvm.sqrt.f32(float %822) - %824 = fneg float %693 - %825 = fmul float %823, %824 - %826 = fmul float %825, 0.000000e+00 - %827 = bitcast i32 %684 to float - %828 = fadd float %827, %826 - %829 = fmul float %814, %828 - %830 = fadd float %800, %829 - %831 = call float @llvm.sqrt.f32(float %830) - %832 = fadd float %831, 0.000000e+00 - %833 = fdiv float %772, %832 - %834 = fmul float %759, %833 - %835 = fsub float 1.000000e+00, %834 - %836 = insertelement <4 x float> zeroinitializer, float %835, i32 0 - %837 = insertelement <4 x float> %836, float 0.000000e+00, i32 1 - %838 = insertelement <4 x float> %837, float 0.000000e+00, i32 2 - %839 = insertelement <4 x float> %838, float 0.000000e+00, i32 3 - %840 = getelementptr float, float* %0, i32 0 - %841 = load float, float* %840, align 4 - %842 = insertelement <4 x float> zeroinitializer, float %841, i32 0 - %843 = insertelement <4 x float> %842, float 0.000000e+00, i32 1 - %844 = insertelement <4 x float> %843, float 0.000000e+00, i32 2 - %845 = insertelement <4 x float> %844, float 0.000000e+00, i32 3 - %846 = call <4 x float> @llvm.fma.v4f32(<4 x float> %839, <4 x float> %845, <4 x float> zeroinitializer) - %847 = extractelement <4 x float> %846, i32 0 - store float %847, float* %2, align 4 - %848 = bitcast i32 %5 to float - %849 = bitcast i32 %5 to float - %850 = fmul float %848, %849 - %851 = fadd float %850, 0.000000e+00 - %852 = bitcast i32 %684 to float - %853 = bitcast i32 %684 to float - %854 = fmul float %852, %853 - %855 = fadd float %851, %854 - %856 = call float @llvm.sqrt.f32(float %855) - %857 = fneg float %693 - %858 = fmul float %856, %857 - %859 = bitcast i32 %5 to float - %860 = fadd float %859, %858 - %861 = bitcast i32 %5 to float - %862 = bitcast i32 %5 to float - %863 = fmul float %861, %862 - %864 = fadd float %863, 0.000000e+00 - %865 = bitcast i32 %684 to float - %866 = bitcast i32 %684 to float - %867 = fmul float %865, %866 - %868 = fadd float %864, %867 - %869 = call float @llvm.sqrt.f32(float %868) - %870 = fneg float %693 - %871 = fmul float %869, %870 - %872 = bitcast i32 %5 to float - %873 = fadd float %872, %871 - %874 = bitcast i32 %5 to float - %875 = bitcast i32 %5 to float - %876 = fmul float %874, %875 - %877 = fadd float %876, 0.000000e+00 - %878 = bitcast i32 %684 to float - %879 = bitcast i32 %684 to float - %880 = fmul float %878, %879 - %881 = fadd float %877, %880 - %882 = call float @llvm.sqrt.f32(float %881) - %883 = fneg float %693 - %884 = fmul float %882, %883 - %885 = bitcast i32 %5 to float - %886 = fadd float %885, %884 - %887 = fmul float %873, %886 - %888 = fadd float %887, 0.000000e+00 - %889 = bitcast i32 %5 to float - %890 = bitcast i32 %5 to float - %891 = fmul float %889, %890 - %892 = fadd float %891, 0.000000e+00 - %893 = bitcast i32 %684 to float - %894 = bitcast i32 %684 to float - %895 = fmul float %893, %894 - %896 = fadd float %892, %895 - %897 = call float @llvm.sqrt.f32(float %896) - %898 = fneg float %693 - %899 = fmul float %897, %898 - %900 = fmul float %899, 0.000000e+00 - %901 = bitcast i32 %684 to float - %902 = fadd float %901, %900 - %903 = bitcast i32 %5 to float - %904 = bitcast i32 %5 to float - %905 = fmul float %903, %904 - %906 = fadd float %905, 0.000000e+00 - %907 = bitcast i32 %684 to float - %908 = bitcast i32 %684 to float - %909 = fmul float %907, %908 - %910 = fadd float %906, %909 - %911 = call float @llvm.sqrt.f32(float %910) - %912 = fneg float %693 - %913 = fmul float %911, %912 - %914 = fmul float %913, 0.000000e+00 - %915 = bitcast i32 %684 to float - %916 = fadd float %915, %914 - %917 = fmul float %902, %916 - %918 = fadd float %888, %917 - %919 = call float @llvm.sqrt.f32(float %918) - %920 = fadd float %919, 0.000000e+00 - %921 = fdiv float %860, %920 - %922 = fmul float %921, 2.000000e+00 - %923 = bitcast i32 %5 to float - %924 = bitcast i32 %5 to float - %925 = fmul float %923, %924 - %926 = fadd float %925, 0.000000e+00 - %927 = bitcast i32 %684 to float - %928 = bitcast i32 %684 to float - %929 = fmul float %927, %928 - %930 = fadd float %926, %929 - %931 = call float @llvm.sqrt.f32(float %930) - %932 = fneg float %693 - %933 = fmul float %931, %932 - %934 = bitcast i32 %5 to float - %935 = fadd float %934, %933 - %936 = bitcast i32 %5 to float - %937 = bitcast i32 %5 to float - %938 = fmul float %936, %937 - %939 = fadd float %938, 0.000000e+00 - %940 = bitcast i32 %684 to float - %941 = bitcast i32 %684 to float - %942 = fmul float %940, %941 - %943 = fadd float %939, %942 - %944 = call float @llvm.sqrt.f32(float %943) - %945 = fneg float %693 - %946 = fmul float %944, %945 - %947 = bitcast i32 %5 to float - %948 = fadd float %947, %946 - %949 = bitcast i32 %5 to float - %950 = bitcast i32 %5 to float - %951 = fmul float %949, %950 - %952 = fadd float %951, 0.000000e+00 - %953 = bitcast i32 %684 to float - %954 = bitcast i32 %684 to float - %955 = fmul float %953, %954 - %956 = fadd float %952, %955 - %957 = call float @llvm.sqrt.f32(float %956) - %958 = fneg float %693 - %959 = fmul float %957, %958 - %960 = bitcast i32 %5 to float - %961 = fadd float %960, %959 - %962 = fmul float %948, %961 - %963 = fadd float %962, 0.000000e+00 - %964 = bitcast i32 %5 to float - %965 = bitcast i32 %5 to float - %966 = fmul float %964, %965 - %967 = fadd float %966, 0.000000e+00 - %968 = bitcast i32 %684 to float - %969 = bitcast i32 %684 to float - %970 = fmul float %968, %969 - %971 = fadd float %967, %970 - %972 = call float @llvm.sqrt.f32(float %971) - %973 = fneg float %693 - %974 = fmul float %972, %973 - %975 = fmul float %974, 0.000000e+00 - %976 = bitcast i32 %684 to float - %977 = fadd float %976, %975 - %978 = bitcast i32 %5 to float - %979 = bitcast i32 %5 to float - %980 = fmul float %978, %979 - %981 = fadd float %980, 0.000000e+00 - %982 = bitcast i32 %684 to float - %983 = bitcast i32 %684 to float - %984 = fmul float %982, %983 - %985 = fadd float %981, %984 - %986 = call float @llvm.sqrt.f32(float %985) - %987 = fneg float %693 - %988 = fmul float %986, %987 - %989 = fmul float %988, 0.000000e+00 - %990 = bitcast i32 %684 to float - %991 = fadd float %990, %989 - %992 = fmul float %977, %991 - %993 = fadd float %963, %992 - %994 = call float @llvm.sqrt.f32(float %993) - %995 = fadd float %994, 0.000000e+00 - %996 = fdiv float %935, %995 - %997 = fmul float %922, %996 - %998 = fsub float 1.000000e+00, %997 - %999 = fmul float %998, %841 - %1000 = fadd float %999, 0.000000e+00 - %1001 = bitcast i32 %5 to float - %1002 = bitcast i32 %5 to float - %1003 = fmul float %1001, %1002 - %1004 = fadd float %1003, 0.000000e+00 - %1005 = bitcast i32 %684 to float - %1006 = bitcast i32 %684 to float - %1007 = fmul float %1005, %1006 - %1008 = fadd float %1004, %1007 - %1009 = call float @llvm.sqrt.f32(float %1008) - %1010 = fneg float %693 - %1011 = fmul float %1009, %1010 - %1012 = bitcast i32 %5 to float - %1013 = fadd float %1012, %1011 - %1014 = bitcast i32 %5 to float - %1015 = bitcast i32 %5 to float - %1016 = fmul float %1014, %1015 - %1017 = fadd float %1016, 0.000000e+00 - %1018 = bitcast i32 %684 to float - %1019 = bitcast i32 %684 to float - %1020 = fmul float %1018, %1019 - %1021 = fadd float %1017, %1020 - %1022 = call float @llvm.sqrt.f32(float %1021) - %1023 = fneg float %693 - %1024 = fmul float %1022, %1023 - %1025 = bitcast i32 %5 to float - %1026 = fadd float %1025, %1024 - %1027 = bitcast i32 %5 to float - %1028 = bitcast i32 %5 to float - %1029 = fmul float %1027, %1028 - %1030 = fadd float %1029, 0.000000e+00 - %1031 = bitcast i32 %684 to float - %1032 = bitcast i32 %684 to float - %1033 = fmul float %1031, %1032 - %1034 = fadd float %1030, %1033 - %1035 = call float @llvm.sqrt.f32(float %1034) - %1036 = fneg float %693 - %1037 = fmul float %1035, %1036 - %1038 = bitcast i32 %5 to float - %1039 = fadd float %1038, %1037 - %1040 = fmul float %1026, %1039 - %1041 = fadd float %1040, 0.000000e+00 - %1042 = bitcast i32 %5 to float - %1043 = bitcast i32 %5 to float - %1044 = fmul float %1042, %1043 - %1045 = fadd float %1044, 0.000000e+00 - %1046 = bitcast i32 %684 to float - %1047 = bitcast i32 %684 to float - %1048 = fmul float %1046, %1047 - %1049 = fadd float %1045, %1048 - %1050 = call float @llvm.sqrt.f32(float %1049) - %1051 = fneg float %693 - %1052 = fmul float %1050, %1051 - %1053 = fmul float %1052, 0.000000e+00 - %1054 = bitcast i32 %684 to float - %1055 = fadd float %1054, %1053 - %1056 = bitcast i32 %5 to float - %1057 = bitcast i32 %5 to float - %1058 = fmul float %1056, %1057 - %1059 = fadd float %1058, 0.000000e+00 - %1060 = bitcast i32 %684 to float - %1061 = bitcast i32 %684 to float - %1062 = fmul float %1060, %1061 - %1063 = fadd float %1059, %1062 - %1064 = call float @llvm.sqrt.f32(float %1063) - %1065 = fneg float %693 - %1066 = fmul float %1064, %1065 - %1067 = fmul float %1066, 0.000000e+00 - %1068 = bitcast i32 %684 to float - %1069 = fadd float %1068, %1067 - %1070 = fmul float %1055, %1069 - %1071 = fadd float %1041, %1070 - %1072 = call float @llvm.sqrt.f32(float %1071) - %1073 = fadd float %1072, 0.000000e+00 - %1074 = fdiv float %1013, %1073 - %1075 = fmul float %1074, 2.000000e+00 - %1076 = bitcast i32 %5 to float - %1077 = bitcast i32 %5 to float - %1078 = fmul float %1076, %1077 - %1079 = fadd float %1078, 0.000000e+00 - %1080 = bitcast i32 %684 to float - %1081 = bitcast i32 %684 to float - %1082 = fmul float %1080, %1081 - %1083 = fadd float %1079, %1082 - %1084 = call float @llvm.sqrt.f32(float %1083) - %1085 = fneg float %693 - %1086 = fmul float %1084, %1085 - %1087 = fmul float %1086, 0.000000e+00 - %1088 = bitcast i32 %684 to float - %1089 = fadd float %1088, %1087 - %1090 = bitcast i32 %5 to float - %1091 = bitcast i32 %5 to float - %1092 = fmul float %1090, %1091 - %1093 = fadd float %1092, 0.000000e+00 - %1094 = bitcast i32 %684 to float - %1095 = bitcast i32 %684 to float - %1096 = fmul float %1094, %1095 - %1097 = fadd float %1093, %1096 - %1098 = call float @llvm.sqrt.f32(float %1097) - %1099 = fneg float %693 - %1100 = fmul float %1098, %1099 - %1101 = bitcast i32 %5 to float - %1102 = fadd float %1101, %1100 - %1103 = bitcast i32 %5 to float - %1104 = bitcast i32 %5 to float - %1105 = fmul float %1103, %1104 - %1106 = fadd float %1105, 0.000000e+00 - %1107 = bitcast i32 %684 to float - %1108 = bitcast i32 %684 to float - %1109 = fmul float %1107, %1108 - %1110 = fadd float %1106, %1109 - %1111 = call float @llvm.sqrt.f32(float %1110) - %1112 = fneg float %693 - %1113 = fmul float %1111, %1112 - %1114 = bitcast i32 %5 to float - %1115 = fadd float %1114, %1113 - %1116 = fmul float %1102, %1115 - %1117 = fadd float %1116, 0.000000e+00 - %1118 = bitcast i32 %5 to float - %1119 = bitcast i32 %5 to float - %1120 = fmul float %1118, %1119 - %1121 = fadd float %1120, 0.000000e+00 - %1122 = bitcast i32 %684 to float - %1123 = bitcast i32 %684 to float - %1124 = fmul float %1122, %1123 - %1125 = fadd float %1121, %1124 - %1126 = call float @llvm.sqrt.f32(float %1125) - %1127 = fneg float %693 - %1128 = fmul float %1126, %1127 - %1129 = fmul float %1128, 0.000000e+00 - %1130 = bitcast i32 %684 to float - %1131 = fadd float %1130, %1129 - %1132 = bitcast i32 %5 to float - %1133 = bitcast i32 %5 to float - %1134 = fmul float %1132, %1133 - %1135 = fadd float %1134, 0.000000e+00 - %1136 = bitcast i32 %684 to float - %1137 = bitcast i32 %684 to float - %1138 = fmul float %1136, %1137 - %1139 = fadd float %1135, %1138 - %1140 = call float @llvm.sqrt.f32(float %1139) - %1141 = fneg float %693 - %1142 = fmul float %1140, %1141 - %1143 = fmul float %1142, 0.000000e+00 - %1144 = bitcast i32 %684 to float - %1145 = fadd float %1144, %1143 - %1146 = fmul float %1131, %1145 - %1147 = fadd float %1117, %1146 - %1148 = call float @llvm.sqrt.f32(float %1147) - %1149 = fadd float %1148, 0.000000e+00 - %1150 = fdiv float %1089, %1149 - %1151 = fmul float %1075, %1150 - %1152 = fneg float %1151 - %1153 = getelementptr float, float* %0, i32 0 - %1154 = getelementptr inbounds float, float* %1153, i64 2 - %1155 = load float, float* %1154, align 4 - %1156 = fmul float %1152, %1155 - %1157 = fadd float %1000, %1156 - %1158 = insertelement <4 x float> zeroinitializer, float %1157, i32 0 - %1159 = insertelement <4 x float> %1158, float 0.000000e+00, i32 1 - %1160 = insertelement <4 x float> %1159, float 0.000000e+00, i32 2 - %1161 = insertelement <4 x float> %1160, float 0.000000e+00, i32 3 - %1162 = extractelement <4 x float> %1161, i32 0 - store float %1162, float* %2, align 4 - %1163 = extractelement <4 x float> %1161, i32 1 - %1164 = getelementptr float, float* %2, i32 0 - %1165 = getelementptr inbounds float, float* %1164, i64 1 - store float %1163, float* %1165, align 4 - %1166 = bitcast i32 %5 to float - %1167 = bitcast i32 %5 to float - %1168 = fmul float %1166, %1167 - %1169 = fadd float %1168, 0.000000e+00 - %1170 = bitcast i32 %684 to float - %1171 = bitcast i32 %684 to float - %1172 = fmul float %1170, %1171 - %1173 = fadd float %1169, %1172 - %1174 = call float @llvm.sqrt.f32(float %1173) - %1175 = fneg float %693 - %1176 = fmul float %1174, %1175 - %1177 = bitcast i32 %5 to float - %1178 = fadd float %1177, %1176 - %1179 = bitcast i32 %5 to float - %1180 = bitcast i32 %5 to float - %1181 = fmul float %1179, %1180 - %1182 = fadd float %1181, 0.000000e+00 - %1183 = bitcast i32 %684 to float - %1184 = bitcast i32 %684 to float - %1185 = fmul float %1183, %1184 - %1186 = fadd float %1182, %1185 - %1187 = call float @llvm.sqrt.f32(float %1186) - %1188 = fneg float %693 - %1189 = fmul float %1187, %1188 - %1190 = bitcast i32 %5 to float - %1191 = fadd float %1190, %1189 - %1192 = bitcast i32 %5 to float - %1193 = bitcast i32 %5 to float - %1194 = fmul float %1192, %1193 - %1195 = fadd float %1194, 0.000000e+00 - %1196 = bitcast i32 %684 to float - %1197 = bitcast i32 %684 to float - %1198 = fmul float %1196, %1197 - %1199 = fadd float %1195, %1198 - %1200 = call float @llvm.sqrt.f32(float %1199) - %1201 = fneg float %693 - %1202 = fmul float %1200, %1201 - %1203 = bitcast i32 %5 to float - %1204 = fadd float %1203, %1202 - %1205 = fmul float %1191, %1204 - %1206 = fadd float %1205, 0.000000e+00 - %1207 = bitcast i32 %5 to float - %1208 = bitcast i32 %5 to float - %1209 = fmul float %1207, %1208 - %1210 = fadd float %1209, 0.000000e+00 - %1211 = bitcast i32 %684 to float - %1212 = bitcast i32 %684 to float - %1213 = fmul float %1211, %1212 - %1214 = fadd float %1210, %1213 - %1215 = call float @llvm.sqrt.f32(float %1214) - %1216 = fneg float %693 - %1217 = fmul float %1215, %1216 - %1218 = fmul float %1217, 0.000000e+00 - %1219 = bitcast i32 %684 to float - %1220 = fadd float %1219, %1218 - %1221 = bitcast i32 %5 to float - %1222 = bitcast i32 %5 to float - %1223 = fmul float %1221, %1222 - %1224 = fadd float %1223, 0.000000e+00 - %1225 = bitcast i32 %684 to float - %1226 = bitcast i32 %684 to float - %1227 = fmul float %1225, %1226 - %1228 = fadd float %1224, %1227 - %1229 = call float @llvm.sqrt.f32(float %1228) - %1230 = fneg float %693 - %1231 = fmul float %1229, %1230 - %1232 = fmul float %1231, 0.000000e+00 - %1233 = bitcast i32 %684 to float - %1234 = fadd float %1233, %1232 - %1235 = fmul float %1220, %1234 - %1236 = fadd float %1206, %1235 - %1237 = call float @llvm.sqrt.f32(float %1236) - %1238 = fadd float %1237, 0.000000e+00 - %1239 = fdiv float %1178, %1238 - %1240 = fmul float %1239, 2.000000e+00 - %1241 = bitcast i32 %5 to float - %1242 = bitcast i32 %5 to float - %1243 = fmul float %1241, %1242 - %1244 = fadd float %1243, 0.000000e+00 - %1245 = bitcast i32 %684 to float - %1246 = bitcast i32 %684 to float - %1247 = fmul float %1245, %1246 - %1248 = fadd float %1244, %1247 - %1249 = call float @llvm.sqrt.f32(float %1248) - %1250 = fneg float %693 - %1251 = fmul float %1249, %1250 - %1252 = bitcast i32 %5 to float - %1253 = fadd float %1252, %1251 - %1254 = bitcast i32 %5 to float - %1255 = bitcast i32 %5 to float - %1256 = fmul float %1254, %1255 - %1257 = fadd float %1256, 0.000000e+00 - %1258 = bitcast i32 %684 to float - %1259 = bitcast i32 %684 to float - %1260 = fmul float %1258, %1259 - %1261 = fadd float %1257, %1260 - %1262 = call float @llvm.sqrt.f32(float %1261) - %1263 = fneg float %693 - %1264 = fmul float %1262, %1263 - %1265 = bitcast i32 %5 to float - %1266 = fadd float %1265, %1264 - %1267 = bitcast i32 %5 to float - %1268 = bitcast i32 %5 to float - %1269 = fmul float %1267, %1268 - %1270 = fadd float %1269, 0.000000e+00 - %1271 = bitcast i32 %684 to float - %1272 = bitcast i32 %684 to float - %1273 = fmul float %1271, %1272 - %1274 = fadd float %1270, %1273 - %1275 = call float @llvm.sqrt.f32(float %1274) - %1276 = fneg float %693 - %1277 = fmul float %1275, %1276 - %1278 = bitcast i32 %5 to float - %1279 = fadd float %1278, %1277 - %1280 = fmul float %1266, %1279 - %1281 = fadd float %1280, 0.000000e+00 - %1282 = bitcast i32 %5 to float - %1283 = bitcast i32 %5 to float - %1284 = fmul float %1282, %1283 - %1285 = fadd float %1284, 0.000000e+00 - %1286 = bitcast i32 %684 to float - %1287 = bitcast i32 %684 to float - %1288 = fmul float %1286, %1287 - %1289 = fadd float %1285, %1288 - %1290 = call float @llvm.sqrt.f32(float %1289) - %1291 = fneg float %693 - %1292 = fmul float %1290, %1291 - %1293 = fmul float %1292, 0.000000e+00 - %1294 = bitcast i32 %684 to float - %1295 = fadd float %1294, %1293 - %1296 = bitcast i32 %5 to float - %1297 = bitcast i32 %5 to float - %1298 = fmul float %1296, %1297 - %1299 = fadd float %1298, 0.000000e+00 - %1300 = bitcast i32 %684 to float - %1301 = bitcast i32 %684 to float - %1302 = fmul float %1300, %1301 - %1303 = fadd float %1299, %1302 - %1304 = call float @llvm.sqrt.f32(float %1303) - %1305 = fneg float %693 - %1306 = fmul float %1304, %1305 - %1307 = fmul float %1306, 0.000000e+00 - %1308 = bitcast i32 %684 to float - %1309 = fadd float %1308, %1307 - %1310 = fmul float %1295, %1309 - %1311 = fadd float %1281, %1310 - %1312 = call float @llvm.sqrt.f32(float %1311) - %1313 = fadd float %1312, 0.000000e+00 - %1314 = fdiv float %1253, %1313 - %1315 = fmul float %1240, %1314 - %1316 = fsub float 1.000000e+00, %1315 - %1317 = insertelement <4 x float> zeroinitializer, float %1316, i32 0 - %1318 = insertelement <4 x float> %1317, float 0.000000e+00, i32 1 - %1319 = insertelement <4 x float> %1318, float 0.000000e+00, i32 2 - %1320 = insertelement <4 x float> %1319, float 0.000000e+00, i32 3 - %1321 = getelementptr float, float* %0, i32 0 - %1322 = getelementptr inbounds float, float* %1321, i64 1 - %1323 = load float, float* %1322, align 4 - %1324 = insertelement <4 x float> zeroinitializer, float %1323, i32 0 - %1325 = insertelement <4 x float> %1324, float 0.000000e+00, i32 1 - %1326 = insertelement <4 x float> %1325, float 0.000000e+00, i32 2 - %1327 = insertelement <4 x float> %1326, float 0.000000e+00, i32 3 - %1328 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1320, <4 x float> %1327, <4 x float> zeroinitializer) - %1329 = extractelement <4 x float> %1328, i32 0 - store float %1329, float* %1165, align 4 - %1330 = bitcast i32 %5 to float - %1331 = bitcast i32 %5 to float - %1332 = fmul float %1330, %1331 - %1333 = fadd float %1332, 0.000000e+00 - %1334 = bitcast i32 %684 to float - %1335 = bitcast i32 %684 to float - %1336 = fmul float %1334, %1335 - %1337 = fadd float %1333, %1336 - %1338 = call float @llvm.sqrt.f32(float %1337) - %1339 = fneg float %693 - %1340 = fmul float %1338, %1339 - %1341 = bitcast i32 %5 to float - %1342 = fadd float %1341, %1340 - %1343 = bitcast i32 %5 to float - %1344 = bitcast i32 %5 to float - %1345 = fmul float %1343, %1344 - %1346 = fadd float %1345, 0.000000e+00 - %1347 = bitcast i32 %684 to float - %1348 = bitcast i32 %684 to float - %1349 = fmul float %1347, %1348 - %1350 = fadd float %1346, %1349 - %1351 = call float @llvm.sqrt.f32(float %1350) - %1352 = fneg float %693 - %1353 = fmul float %1351, %1352 - %1354 = bitcast i32 %5 to float - %1355 = fadd float %1354, %1353 - %1356 = bitcast i32 %5 to float - %1357 = bitcast i32 %5 to float - %1358 = fmul float %1356, %1357 - %1359 = fadd float %1358, 0.000000e+00 - %1360 = bitcast i32 %684 to float - %1361 = bitcast i32 %684 to float - %1362 = fmul float %1360, %1361 - %1363 = fadd float %1359, %1362 - %1364 = call float @llvm.sqrt.f32(float %1363) - %1365 = fneg float %693 - %1366 = fmul float %1364, %1365 - %1367 = bitcast i32 %5 to float - %1368 = fadd float %1367, %1366 - %1369 = fmul float %1355, %1368 - %1370 = fadd float %1369, 0.000000e+00 - %1371 = bitcast i32 %5 to float - %1372 = bitcast i32 %5 to float - %1373 = fmul float %1371, %1372 - %1374 = fadd float %1373, 0.000000e+00 - %1375 = bitcast i32 %684 to float - %1376 = bitcast i32 %684 to float - %1377 = fmul float %1375, %1376 - %1378 = fadd float %1374, %1377 - %1379 = call float @llvm.sqrt.f32(float %1378) - %1380 = fneg float %693 - %1381 = fmul float %1379, %1380 - %1382 = fmul float %1381, 0.000000e+00 - %1383 = bitcast i32 %684 to float - %1384 = fadd float %1383, %1382 - %1385 = bitcast i32 %5 to float - %1386 = bitcast i32 %5 to float - %1387 = fmul float %1385, %1386 - %1388 = fadd float %1387, 0.000000e+00 - %1389 = bitcast i32 %684 to float - %1390 = bitcast i32 %684 to float - %1391 = fmul float %1389, %1390 - %1392 = fadd float %1388, %1391 - %1393 = call float @llvm.sqrt.f32(float %1392) - %1394 = fneg float %693 - %1395 = fmul float %1393, %1394 - %1396 = fmul float %1395, 0.000000e+00 - %1397 = bitcast i32 %684 to float - %1398 = fadd float %1397, %1396 - %1399 = fmul float %1384, %1398 - %1400 = fadd float %1370, %1399 - %1401 = call float @llvm.sqrt.f32(float %1400) - %1402 = fadd float %1401, 0.000000e+00 - %1403 = fdiv float %1342, %1402 - %1404 = fmul float %1403, 2.000000e+00 - %1405 = bitcast i32 %5 to float - %1406 = bitcast i32 %5 to float - %1407 = fmul float %1405, %1406 - %1408 = fadd float %1407, 0.000000e+00 - %1409 = bitcast i32 %684 to float - %1410 = bitcast i32 %684 to float - %1411 = fmul float %1409, %1410 - %1412 = fadd float %1408, %1411 - %1413 = call float @llvm.sqrt.f32(float %1412) - %1414 = fneg float %693 - %1415 = fmul float %1413, %1414 - %1416 = bitcast i32 %5 to float - %1417 = fadd float %1416, %1415 - %1418 = bitcast i32 %5 to float - %1419 = bitcast i32 %5 to float - %1420 = fmul float %1418, %1419 - %1421 = fadd float %1420, 0.000000e+00 - %1422 = bitcast i32 %684 to float - %1423 = bitcast i32 %684 to float - %1424 = fmul float %1422, %1423 - %1425 = fadd float %1421, %1424 - %1426 = call float @llvm.sqrt.f32(float %1425) - %1427 = fneg float %693 - %1428 = fmul float %1426, %1427 - %1429 = bitcast i32 %5 to float - %1430 = fadd float %1429, %1428 - %1431 = bitcast i32 %5 to float - %1432 = bitcast i32 %5 to float - %1433 = fmul float %1431, %1432 - %1434 = fadd float %1433, 0.000000e+00 - %1435 = bitcast i32 %684 to float - %1436 = bitcast i32 %684 to float - %1437 = fmul float %1435, %1436 - %1438 = fadd float %1434, %1437 - %1439 = call float @llvm.sqrt.f32(float %1438) - %1440 = fneg float %693 - %1441 = fmul float %1439, %1440 - %1442 = bitcast i32 %5 to float - %1443 = fadd float %1442, %1441 - %1444 = fmul float %1430, %1443 - %1445 = fadd float %1444, 0.000000e+00 - %1446 = bitcast i32 %5 to float - %1447 = bitcast i32 %5 to float - %1448 = fmul float %1446, %1447 - %1449 = fadd float %1448, 0.000000e+00 - %1450 = bitcast i32 %684 to float - %1451 = bitcast i32 %684 to float - %1452 = fmul float %1450, %1451 - %1453 = fadd float %1449, %1452 - %1454 = call float @llvm.sqrt.f32(float %1453) - %1455 = fneg float %693 - %1456 = fmul float %1454, %1455 - %1457 = fmul float %1456, 0.000000e+00 - %1458 = bitcast i32 %684 to float - %1459 = fadd float %1458, %1457 - %1460 = bitcast i32 %5 to float - %1461 = bitcast i32 %5 to float - %1462 = fmul float %1460, %1461 - %1463 = fadd float %1462, 0.000000e+00 - %1464 = bitcast i32 %684 to float - %1465 = bitcast i32 %684 to float - %1466 = fmul float %1464, %1465 - %1467 = fadd float %1463, %1466 - %1468 = call float @llvm.sqrt.f32(float %1467) - %1469 = fneg float %693 - %1470 = fmul float %1468, %1469 - %1471 = fmul float %1470, 0.000000e+00 - %1472 = bitcast i32 %684 to float - %1473 = fadd float %1472, %1471 - %1474 = fmul float %1459, %1473 - %1475 = fadd float %1445, %1474 - %1476 = call float @llvm.sqrt.f32(float %1475) - %1477 = fadd float %1476, 0.000000e+00 - %1478 = fdiv float %1417, %1477 - %1479 = fmul float %1404, %1478 - %1480 = fsub float 1.000000e+00, %1479 - %1481 = load float, float* %1322, align 4 - %1482 = fmul float %1480, %1481 - %1483 = fadd float %1482, 0.000000e+00 - %1484 = bitcast i32 %5 to float - %1485 = bitcast i32 %5 to float - %1486 = fmul float %1484, %1485 - %1487 = fadd float %1486, 0.000000e+00 - %1488 = bitcast i32 %684 to float - %1489 = bitcast i32 %684 to float - %1490 = fmul float %1488, %1489 - %1491 = fadd float %1487, %1490 - %1492 = call float @llvm.sqrt.f32(float %1491) - %1493 = fneg float %693 - %1494 = fmul float %1492, %1493 - %1495 = bitcast i32 %5 to float - %1496 = fadd float %1495, %1494 - %1497 = bitcast i32 %5 to float - %1498 = bitcast i32 %5 to float - %1499 = fmul float %1497, %1498 - %1500 = fadd float %1499, 0.000000e+00 - %1501 = bitcast i32 %684 to float - %1502 = bitcast i32 %684 to float - %1503 = fmul float %1501, %1502 - %1504 = fadd float %1500, %1503 - %1505 = call float @llvm.sqrt.f32(float %1504) - %1506 = fneg float %693 - %1507 = fmul float %1505, %1506 - %1508 = bitcast i32 %5 to float - %1509 = fadd float %1508, %1507 - %1510 = bitcast i32 %5 to float - %1511 = bitcast i32 %5 to float - %1512 = fmul float %1510, %1511 - %1513 = fadd float %1512, 0.000000e+00 - %1514 = bitcast i32 %684 to float - %1515 = bitcast i32 %684 to float - %1516 = fmul float %1514, %1515 - %1517 = fadd float %1513, %1516 - %1518 = call float @llvm.sqrt.f32(float %1517) - %1519 = fneg float %693 - %1520 = fmul float %1518, %1519 - %1521 = bitcast i32 %5 to float - %1522 = fadd float %1521, %1520 - %1523 = fmul float %1509, %1522 - %1524 = fadd float %1523, 0.000000e+00 - %1525 = bitcast i32 %5 to float - %1526 = bitcast i32 %5 to float - %1527 = fmul float %1525, %1526 - %1528 = fadd float %1527, 0.000000e+00 - %1529 = bitcast i32 %684 to float - %1530 = bitcast i32 %684 to float - %1531 = fmul float %1529, %1530 - %1532 = fadd float %1528, %1531 - %1533 = call float @llvm.sqrt.f32(float %1532) - %1534 = fneg float %693 - %1535 = fmul float %1533, %1534 - %1536 = fmul float %1535, 0.000000e+00 - %1537 = bitcast i32 %684 to float - %1538 = fadd float %1537, %1536 - %1539 = bitcast i32 %5 to float - %1540 = bitcast i32 %5 to float - %1541 = fmul float %1539, %1540 - %1542 = fadd float %1541, 0.000000e+00 - %1543 = bitcast i32 %684 to float - %1544 = bitcast i32 %684 to float - %1545 = fmul float %1543, %1544 - %1546 = fadd float %1542, %1545 - %1547 = call float @llvm.sqrt.f32(float %1546) - %1548 = fneg float %693 - %1549 = fmul float %1547, %1548 - %1550 = fmul float %1549, 0.000000e+00 - %1551 = bitcast i32 %684 to float - %1552 = fadd float %1551, %1550 - %1553 = fmul float %1538, %1552 - %1554 = fadd float %1524, %1553 - %1555 = call float @llvm.sqrt.f32(float %1554) - %1556 = fadd float %1555, 0.000000e+00 - %1557 = fdiv float %1496, %1556 - %1558 = fmul float %1557, 2.000000e+00 - %1559 = bitcast i32 %5 to float - %1560 = bitcast i32 %5 to float - %1561 = fmul float %1559, %1560 - %1562 = fadd float %1561, 0.000000e+00 - %1563 = bitcast i32 %684 to float - %1564 = bitcast i32 %684 to float - %1565 = fmul float %1563, %1564 - %1566 = fadd float %1562, %1565 - %1567 = call float @llvm.sqrt.f32(float %1566) - %1568 = fneg float %693 - %1569 = fmul float %1567, %1568 - %1570 = fmul float %1569, 0.000000e+00 - %1571 = bitcast i32 %684 to float - %1572 = fadd float %1571, %1570 - %1573 = bitcast i32 %5 to float - %1574 = bitcast i32 %5 to float - %1575 = fmul float %1573, %1574 - %1576 = fadd float %1575, 0.000000e+00 - %1577 = bitcast i32 %684 to float - %1578 = bitcast i32 %684 to float - %1579 = fmul float %1577, %1578 - %1580 = fadd float %1576, %1579 - %1581 = call float @llvm.sqrt.f32(float %1580) - %1582 = fneg float %693 - %1583 = fmul float %1581, %1582 - %1584 = bitcast i32 %5 to float - %1585 = fadd float %1584, %1583 - %1586 = bitcast i32 %5 to float - %1587 = bitcast i32 %5 to float - %1588 = fmul float %1586, %1587 - %1589 = fadd float %1588, 0.000000e+00 - %1590 = bitcast i32 %684 to float - %1591 = bitcast i32 %684 to float - %1592 = fmul float %1590, %1591 - %1593 = fadd float %1589, %1592 - %1594 = call float @llvm.sqrt.f32(float %1593) - %1595 = fneg float %693 - %1596 = fmul float %1594, %1595 - %1597 = bitcast i32 %5 to float - %1598 = fadd float %1597, %1596 - %1599 = fmul float %1585, %1598 - %1600 = fadd float %1599, 0.000000e+00 - %1601 = bitcast i32 %5 to float - %1602 = bitcast i32 %5 to float - %1603 = fmul float %1601, %1602 - %1604 = fadd float %1603, 0.000000e+00 - %1605 = bitcast i32 %684 to float - %1606 = bitcast i32 %684 to float - %1607 = fmul float %1605, %1606 - %1608 = fadd float %1604, %1607 - %1609 = call float @llvm.sqrt.f32(float %1608) - %1610 = fneg float %693 - %1611 = fmul float %1609, %1610 - %1612 = fmul float %1611, 0.000000e+00 - %1613 = bitcast i32 %684 to float - %1614 = fadd float %1613, %1612 - %1615 = bitcast i32 %5 to float - %1616 = bitcast i32 %5 to float - %1617 = fmul float %1615, %1616 - %1618 = fadd float %1617, 0.000000e+00 - %1619 = bitcast i32 %684 to float - %1620 = bitcast i32 %684 to float - %1621 = fmul float %1619, %1620 - %1622 = fadd float %1618, %1621 - %1623 = call float @llvm.sqrt.f32(float %1622) - %1624 = fneg float %693 - %1625 = fmul float %1623, %1624 - %1626 = fmul float %1625, 0.000000e+00 - %1627 = bitcast i32 %684 to float - %1628 = fadd float %1627, %1626 - %1629 = fmul float %1614, %1628 - %1630 = fadd float %1600, %1629 - %1631 = call float @llvm.sqrt.f32(float %1630) - %1632 = fadd float %1631, 0.000000e+00 - %1633 = fdiv float %1572, %1632 - %1634 = fmul float %1558, %1633 - %1635 = fneg float %1634 - %1636 = getelementptr float, float* %0, i32 0 - %1637 = getelementptr inbounds float, float* %1636, i64 3 - %1638 = load float, float* %1637, align 4 - %1639 = fmul float %1635, %1638 - %1640 = fadd float %1483, %1639 - %1641 = insertelement <4 x float> zeroinitializer, float %1640, i32 0 - %1642 = insertelement <4 x float> %1641, float 0.000000e+00, i32 1 - %1643 = insertelement <4 x float> %1642, float 0.000000e+00, i32 2 - %1644 = insertelement <4 x float> %1643, float 0.000000e+00, i32 3 - %1645 = extractelement <4 x float> %1644, i32 0 - store float %1645, float* %1165, align 4 - %1646 = extractelement <4 x float> %1644, i32 1 - %1647 = getelementptr float, float* %2, i32 0 - %1648 = getelementptr inbounds float, float* %1647, i64 2 - store float %1646, float* %1648, align 4 - %1649 = getelementptr float, float* %0, i32 0 - %1650 = bitcast float* %1649 to i32* - %1651 = load i32, i32* %1650, align 4 - %1652 = bitcast i32 %1651 to float - %1653 = bitcast i32 %1651 to float - %1654 = fmul float %1652, %1653 - %1655 = fadd float %1654, 0.000000e+00 - %1656 = bitcast i32 %684 to float - %1657 = bitcast i32 %684 to float - %1658 = fmul float %1656, %1657 - %1659 = fadd float %1655, %1658 - %1660 = call float @llvm.sqrt.f32(float %1659) - %1661 = fneg float %693 - %1662 = fmul float %1660, %1661 - %1663 = fmul float %1662, 0.000000e+00 - %1664 = bitcast i32 %684 to float - %1665 = fadd float %1664, %1663 - %1666 = bitcast i32 %1651 to float - %1667 = bitcast i32 %1651 to float - %1668 = fmul float %1666, %1667 - %1669 = fadd float %1668, 0.000000e+00 - %1670 = bitcast i32 %684 to float - %1671 = bitcast i32 %684 to float - %1672 = fmul float %1670, %1671 - %1673 = fadd float %1669, %1672 - %1674 = call float @llvm.sqrt.f32(float %1673) - %1675 = fneg float %693 - %1676 = fmul float %1674, %1675 - %1677 = bitcast i32 %1651 to float - %1678 = fadd float %1677, %1676 - %1679 = bitcast i32 %1651 to float - %1680 = bitcast i32 %1651 to float - %1681 = fmul float %1679, %1680 - %1682 = fadd float %1681, 0.000000e+00 - %1683 = bitcast i32 %684 to float - %1684 = bitcast i32 %684 to float - %1685 = fmul float %1683, %1684 - %1686 = fadd float %1682, %1685 - %1687 = call float @llvm.sqrt.f32(float %1686) - %1688 = fneg float %693 - %1689 = fmul float %1687, %1688 - %1690 = bitcast i32 %1651 to float - %1691 = fadd float %1690, %1689 - %1692 = fmul float %1678, %1691 - %1693 = fadd float %1692, 0.000000e+00 - %1694 = bitcast i32 %1651 to float - %1695 = bitcast i32 %1651 to float - %1696 = fmul float %1694, %1695 - %1697 = fadd float %1696, 0.000000e+00 - %1698 = bitcast i32 %684 to float - %1699 = bitcast i32 %684 to float - %1700 = fmul float %1698, %1699 - %1701 = fadd float %1697, %1700 - %1702 = call float @llvm.sqrt.f32(float %1701) - %1703 = fneg float %693 - %1704 = fmul float %1702, %1703 - %1705 = fmul float %1704, 0.000000e+00 - %1706 = bitcast i32 %684 to float - %1707 = fadd float %1706, %1705 - %1708 = bitcast i32 %1651 to float - %1709 = bitcast i32 %1651 to float - %1710 = fmul float %1708, %1709 - %1711 = fadd float %1710, 0.000000e+00 - %1712 = bitcast i32 %684 to float - %1713 = bitcast i32 %684 to float - %1714 = fmul float %1712, %1713 - %1715 = fadd float %1711, %1714 - %1716 = call float @llvm.sqrt.f32(float %1715) - %1717 = fneg float %693 - %1718 = fmul float %1716, %1717 - %1719 = fmul float %1718, 0.000000e+00 - %1720 = bitcast i32 %684 to float - %1721 = fadd float %1720, %1719 - %1722 = fmul float %1707, %1721 - %1723 = fadd float %1693, %1722 - %1724 = call float @llvm.sqrt.f32(float %1723) - %1725 = fadd float %1724, 0.000000e+00 - %1726 = fdiv float %1665, %1725 - %1727 = fmul float %1726, 2.000000e+00 - %1728 = bitcast i32 %1651 to float - %1729 = bitcast i32 %1651 to float - %1730 = fmul float %1728, %1729 - %1731 = fadd float %1730, 0.000000e+00 - %1732 = bitcast i32 %684 to float - %1733 = bitcast i32 %684 to float - %1734 = fmul float %1732, %1733 - %1735 = fadd float %1731, %1734 - %1736 = call float @llvm.sqrt.f32(float %1735) - %1737 = fneg float %693 - %1738 = fmul float %1736, %1737 - %1739 = bitcast i32 %1651 to float - %1740 = fadd float %1739, %1738 - %1741 = bitcast i32 %1651 to float - %1742 = bitcast i32 %1651 to float - %1743 = fmul float %1741, %1742 - %1744 = fadd float %1743, 0.000000e+00 - %1745 = bitcast i32 %684 to float - %1746 = bitcast i32 %684 to float - %1747 = fmul float %1745, %1746 - %1748 = fadd float %1744, %1747 - %1749 = call float @llvm.sqrt.f32(float %1748) - %1750 = fneg float %693 - %1751 = fmul float %1749, %1750 - %1752 = bitcast i32 %1651 to float - %1753 = fadd float %1752, %1751 - %1754 = bitcast i32 %1651 to float - %1755 = bitcast i32 %1651 to float - %1756 = fmul float %1754, %1755 - %1757 = fadd float %1756, 0.000000e+00 - %1758 = bitcast i32 %684 to float - %1759 = bitcast i32 %684 to float - %1760 = fmul float %1758, %1759 - %1761 = fadd float %1757, %1760 - %1762 = call float @llvm.sqrt.f32(float %1761) - %1763 = fneg float %693 - %1764 = fmul float %1762, %1763 - %1765 = bitcast i32 %1651 to float - %1766 = fadd float %1765, %1764 - %1767 = fmul float %1753, %1766 - %1768 = fadd float %1767, 0.000000e+00 - %1769 = bitcast i32 %1651 to float - %1770 = bitcast i32 %1651 to float - %1771 = fmul float %1769, %1770 - %1772 = fadd float %1771, 0.000000e+00 - %1773 = bitcast i32 %684 to float - %1774 = bitcast i32 %684 to float - %1775 = fmul float %1773, %1774 - %1776 = fadd float %1772, %1775 - %1777 = call float @llvm.sqrt.f32(float %1776) - %1778 = fneg float %693 - %1779 = fmul float %1777, %1778 - %1780 = fmul float %1779, 0.000000e+00 - %1781 = bitcast i32 %684 to float - %1782 = fadd float %1781, %1780 - %1783 = bitcast i32 %1651 to float - %1784 = bitcast i32 %1651 to float - %1785 = fmul float %1783, %1784 - %1786 = fadd float %1785, 0.000000e+00 - %1787 = bitcast i32 %684 to float - %1788 = bitcast i32 %684 to float - %1789 = fmul float %1787, %1788 - %1790 = fadd float %1786, %1789 - %1791 = call float @llvm.sqrt.f32(float %1790) - %1792 = fneg float %693 - %1793 = fmul float %1791, %1792 - %1794 = fmul float %1793, 0.000000e+00 - %1795 = bitcast i32 %684 to float - %1796 = fadd float %1795, %1794 - %1797 = fmul float %1782, %1796 - %1798 = fadd float %1768, %1797 - %1799 = call float @llvm.sqrt.f32(float %1798) - %1800 = fadd float %1799, 0.000000e+00 - %1801 = fdiv float %1740, %1800 - %1802 = fmul float %1727, %1801 - %1803 = fneg float %1802 - %1804 = insertelement <4 x float> zeroinitializer, float %1803, i32 0 - %1805 = insertelement <4 x float> %1804, float 0.000000e+00, i32 1 - %1806 = insertelement <4 x float> %1805, float 0.000000e+00, i32 2 - %1807 = insertelement <4 x float> %1806, float 0.000000e+00, i32 3 - %1808 = getelementptr float, float* %0, i32 0 - %1809 = load float, float* %1808, align 4 - %1810 = insertelement <4 x float> zeroinitializer, float %1809, i32 0 - %1811 = insertelement <4 x float> %1810, float 0.000000e+00, i32 1 - %1812 = insertelement <4 x float> %1811, float 0.000000e+00, i32 2 - %1813 = insertelement <4 x float> %1812, float 0.000000e+00, i32 3 - %1814 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1807, <4 x float> %1813, <4 x float> zeroinitializer) - %1815 = extractelement <4 x float> %1814, i32 0 - store float %1815, float* %1648, align 4 - %1816 = bitcast i32 %1651 to float - %1817 = bitcast i32 %1651 to float - %1818 = fmul float %1816, %1817 - %1819 = fadd float %1818, 0.000000e+00 - %1820 = bitcast i32 %684 to float - %1821 = bitcast i32 %684 to float - %1822 = fmul float %1820, %1821 - %1823 = fadd float %1819, %1822 - %1824 = call float @llvm.sqrt.f32(float %1823) - %1825 = fneg float %693 - %1826 = fmul float %1824, %1825 - %1827 = fmul float %1826, 0.000000e+00 - %1828 = bitcast i32 %684 to float - %1829 = fadd float %1828, %1827 - %1830 = bitcast i32 %1651 to float - %1831 = bitcast i32 %1651 to float - %1832 = fmul float %1830, %1831 - %1833 = fadd float %1832, 0.000000e+00 - %1834 = bitcast i32 %684 to float - %1835 = bitcast i32 %684 to float - %1836 = fmul float %1834, %1835 - %1837 = fadd float %1833, %1836 - %1838 = call float @llvm.sqrt.f32(float %1837) - %1839 = fneg float %693 - %1840 = fmul float %1838, %1839 - %1841 = bitcast i32 %1651 to float - %1842 = fadd float %1841, %1840 - %1843 = bitcast i32 %1651 to float - %1844 = bitcast i32 %1651 to float - %1845 = fmul float %1843, %1844 - %1846 = fadd float %1845, 0.000000e+00 - %1847 = bitcast i32 %684 to float - %1848 = bitcast i32 %684 to float - %1849 = fmul float %1847, %1848 - %1850 = fadd float %1846, %1849 - %1851 = call float @llvm.sqrt.f32(float %1850) - %1852 = fneg float %693 - %1853 = fmul float %1851, %1852 - %1854 = bitcast i32 %1651 to float - %1855 = fadd float %1854, %1853 - %1856 = fmul float %1842, %1855 - %1857 = fadd float %1856, 0.000000e+00 - %1858 = bitcast i32 %1651 to float - %1859 = bitcast i32 %1651 to float - %1860 = fmul float %1858, %1859 - %1861 = fadd float %1860, 0.000000e+00 - %1862 = bitcast i32 %684 to float - %1863 = bitcast i32 %684 to float - %1864 = fmul float %1862, %1863 - %1865 = fadd float %1861, %1864 - %1866 = call float @llvm.sqrt.f32(float %1865) - %1867 = fneg float %693 - %1868 = fmul float %1866, %1867 - %1869 = fmul float %1868, 0.000000e+00 - %1870 = bitcast i32 %684 to float - %1871 = fadd float %1870, %1869 - %1872 = bitcast i32 %1651 to float - %1873 = bitcast i32 %1651 to float - %1874 = fmul float %1872, %1873 - %1875 = fadd float %1874, 0.000000e+00 - %1876 = bitcast i32 %684 to float - %1877 = bitcast i32 %684 to float - %1878 = fmul float %1876, %1877 - %1879 = fadd float %1875, %1878 - %1880 = call float @llvm.sqrt.f32(float %1879) - %1881 = fneg float %693 - %1882 = fmul float %1880, %1881 - %1883 = fmul float %1882, 0.000000e+00 - %1884 = bitcast i32 %684 to float - %1885 = fadd float %1884, %1883 - %1886 = fmul float %1871, %1885 - %1887 = fadd float %1857, %1886 - %1888 = call float @llvm.sqrt.f32(float %1887) - %1889 = fadd float %1888, 0.000000e+00 - %1890 = fdiv float %1829, %1889 - %1891 = fmul float %1890, 2.000000e+00 - %1892 = bitcast i32 %1651 to float - %1893 = bitcast i32 %1651 to float - %1894 = fmul float %1892, %1893 - %1895 = fadd float %1894, 0.000000e+00 - %1896 = bitcast i32 %684 to float - %1897 = bitcast i32 %684 to float - %1898 = fmul float %1896, %1897 - %1899 = fadd float %1895, %1898 - %1900 = call float @llvm.sqrt.f32(float %1899) - %1901 = fneg float %693 - %1902 = fmul float %1900, %1901 - %1903 = bitcast i32 %1651 to float - %1904 = fadd float %1903, %1902 - %1905 = bitcast i32 %1651 to float - %1906 = bitcast i32 %1651 to float - %1907 = fmul float %1905, %1906 - %1908 = fadd float %1907, 0.000000e+00 - %1909 = bitcast i32 %684 to float - %1910 = bitcast i32 %684 to float - %1911 = fmul float %1909, %1910 - %1912 = fadd float %1908, %1911 - %1913 = call float @llvm.sqrt.f32(float %1912) - %1914 = fneg float %693 - %1915 = fmul float %1913, %1914 - %1916 = bitcast i32 %1651 to float - %1917 = fadd float %1916, %1915 - %1918 = bitcast i32 %1651 to float - %1919 = bitcast i32 %1651 to float - %1920 = fmul float %1918, %1919 - %1921 = fadd float %1920, 0.000000e+00 - %1922 = bitcast i32 %684 to float - %1923 = bitcast i32 %684 to float - %1924 = fmul float %1922, %1923 - %1925 = fadd float %1921, %1924 - %1926 = call float @llvm.sqrt.f32(float %1925) - %1927 = fneg float %693 - %1928 = fmul float %1926, %1927 - %1929 = bitcast i32 %1651 to float - %1930 = fadd float %1929, %1928 - %1931 = fmul float %1917, %1930 - %1932 = fadd float %1931, 0.000000e+00 - %1933 = bitcast i32 %1651 to float - %1934 = bitcast i32 %1651 to float - %1935 = fmul float %1933, %1934 - %1936 = fadd float %1935, 0.000000e+00 - %1937 = bitcast i32 %684 to float - %1938 = bitcast i32 %684 to float - %1939 = fmul float %1937, %1938 - %1940 = fadd float %1936, %1939 - %1941 = call float @llvm.sqrt.f32(float %1940) - %1942 = fneg float %693 - %1943 = fmul float %1941, %1942 - %1944 = fmul float %1943, 0.000000e+00 - %1945 = bitcast i32 %684 to float - %1946 = fadd float %1945, %1944 - %1947 = bitcast i32 %1651 to float - %1948 = bitcast i32 %1651 to float - %1949 = fmul float %1947, %1948 - %1950 = fadd float %1949, 0.000000e+00 - %1951 = bitcast i32 %684 to float - %1952 = bitcast i32 %684 to float - %1953 = fmul float %1951, %1952 - %1954 = fadd float %1950, %1953 - %1955 = call float @llvm.sqrt.f32(float %1954) - %1956 = fneg float %693 - %1957 = fmul float %1955, %1956 - %1958 = fmul float %1957, 0.000000e+00 - %1959 = bitcast i32 %684 to float - %1960 = fadd float %1959, %1958 - %1961 = fmul float %1946, %1960 - %1962 = fadd float %1932, %1961 - %1963 = call float @llvm.sqrt.f32(float %1962) - %1964 = fadd float %1963, 0.000000e+00 - %1965 = fdiv float %1904, %1964 - %1966 = fmul float %1891, %1965 - %1967 = fneg float %1966 - %1968 = fmul float %1967, %1809 - %1969 = fadd float %1968, 0.000000e+00 - %1970 = bitcast i32 %1651 to float - %1971 = bitcast i32 %1651 to float - %1972 = fmul float %1970, %1971 - %1973 = fadd float %1972, 0.000000e+00 - %1974 = bitcast i32 %684 to float - %1975 = bitcast i32 %684 to float - %1976 = fmul float %1974, %1975 - %1977 = fadd float %1973, %1976 - %1978 = call float @llvm.sqrt.f32(float %1977) - %1979 = fneg float %693 - %1980 = fmul float %1978, %1979 - %1981 = fmul float %1980, 0.000000e+00 - %1982 = bitcast i32 %684 to float - %1983 = fadd float %1982, %1981 - %1984 = bitcast i32 %1651 to float - %1985 = bitcast i32 %1651 to float - %1986 = fmul float %1984, %1985 - %1987 = fadd float %1986, 0.000000e+00 - %1988 = bitcast i32 %684 to float - %1989 = bitcast i32 %684 to float - %1990 = fmul float %1988, %1989 - %1991 = fadd float %1987, %1990 - %1992 = call float @llvm.sqrt.f32(float %1991) - %1993 = fneg float %693 - %1994 = fmul float %1992, %1993 - %1995 = bitcast i32 %1651 to float - %1996 = fadd float %1995, %1994 - %1997 = bitcast i32 %1651 to float - %1998 = bitcast i32 %1651 to float - %1999 = fmul float %1997, %1998 - %2000 = fadd float %1999, 0.000000e+00 - %2001 = bitcast i32 %684 to float - %2002 = bitcast i32 %684 to float - %2003 = fmul float %2001, %2002 - %2004 = fadd float %2000, %2003 - %2005 = call float @llvm.sqrt.f32(float %2004) - %2006 = fneg float %693 - %2007 = fmul float %2005, %2006 - %2008 = bitcast i32 %1651 to float - %2009 = fadd float %2008, %2007 - %2010 = fmul float %1996, %2009 - %2011 = fadd float %2010, 0.000000e+00 - %2012 = bitcast i32 %1651 to float - %2013 = bitcast i32 %1651 to float - %2014 = fmul float %2012, %2013 - %2015 = fadd float %2014, 0.000000e+00 - %2016 = bitcast i32 %684 to float - %2017 = bitcast i32 %684 to float - %2018 = fmul float %2016, %2017 - %2019 = fadd float %2015, %2018 - %2020 = call float @llvm.sqrt.f32(float %2019) - %2021 = fneg float %693 - %2022 = fmul float %2020, %2021 - %2023 = fmul float %2022, 0.000000e+00 - %2024 = bitcast i32 %684 to float - %2025 = fadd float %2024, %2023 - %2026 = bitcast i32 %1651 to float - %2027 = bitcast i32 %1651 to float - %2028 = fmul float %2026, %2027 - %2029 = fadd float %2028, 0.000000e+00 - %2030 = bitcast i32 %684 to float - %2031 = bitcast i32 %684 to float - %2032 = fmul float %2030, %2031 - %2033 = fadd float %2029, %2032 - %2034 = call float @llvm.sqrt.f32(float %2033) - %2035 = fneg float %693 - %2036 = fmul float %2034, %2035 - %2037 = fmul float %2036, 0.000000e+00 - %2038 = bitcast i32 %684 to float - %2039 = fadd float %2038, %2037 - %2040 = fmul float %2025, %2039 - %2041 = fadd float %2011, %2040 - %2042 = call float @llvm.sqrt.f32(float %2041) - %2043 = fadd float %2042, 0.000000e+00 - %2044 = fdiv float %1983, %2043 - %2045 = fmul float %2044, 2.000000e+00 - %2046 = bitcast i32 %1651 to float - %2047 = bitcast i32 %1651 to float - %2048 = fmul float %2046, %2047 - %2049 = fadd float %2048, 0.000000e+00 - %2050 = bitcast i32 %684 to float - %2051 = bitcast i32 %684 to float - %2052 = fmul float %2050, %2051 - %2053 = fadd float %2049, %2052 - %2054 = call float @llvm.sqrt.f32(float %2053) - %2055 = fneg float %693 - %2056 = fmul float %2054, %2055 - %2057 = fmul float %2056, 0.000000e+00 - %2058 = bitcast i32 %684 to float - %2059 = fadd float %2058, %2057 - %2060 = bitcast i32 %1651 to float - %2061 = bitcast i32 %1651 to float - %2062 = fmul float %2060, %2061 - %2063 = fadd float %2062, 0.000000e+00 - %2064 = bitcast i32 %684 to float - %2065 = bitcast i32 %684 to float - %2066 = fmul float %2064, %2065 - %2067 = fadd float %2063, %2066 - %2068 = call float @llvm.sqrt.f32(float %2067) - %2069 = fneg float %693 - %2070 = fmul float %2068, %2069 - %2071 = bitcast i32 %1651 to float - %2072 = fadd float %2071, %2070 - %2073 = bitcast i32 %1651 to float - %2074 = bitcast i32 %1651 to float - %2075 = fmul float %2073, %2074 - %2076 = fadd float %2075, 0.000000e+00 - %2077 = bitcast i32 %684 to float - %2078 = bitcast i32 %684 to float - %2079 = fmul float %2077, %2078 - %2080 = fadd float %2076, %2079 - %2081 = call float @llvm.sqrt.f32(float %2080) - %2082 = fneg float %693 - %2083 = fmul float %2081, %2082 - %2084 = bitcast i32 %1651 to float - %2085 = fadd float %2084, %2083 - %2086 = fmul float %2072, %2085 - %2087 = fadd float %2086, 0.000000e+00 - %2088 = bitcast i32 %1651 to float - %2089 = bitcast i32 %1651 to float - %2090 = fmul float %2088, %2089 - %2091 = fadd float %2090, 0.000000e+00 - %2092 = bitcast i32 %684 to float - %2093 = bitcast i32 %684 to float - %2094 = fmul float %2092, %2093 - %2095 = fadd float %2091, %2094 - %2096 = call float @llvm.sqrt.f32(float %2095) - %2097 = fneg float %693 - %2098 = fmul float %2096, %2097 - %2099 = fmul float %2098, 0.000000e+00 - %2100 = bitcast i32 %684 to float - %2101 = fadd float %2100, %2099 - %2102 = bitcast i32 %1651 to float - %2103 = bitcast i32 %1651 to float - %2104 = fmul float %2102, %2103 - %2105 = fadd float %2104, 0.000000e+00 - %2106 = bitcast i32 %684 to float - %2107 = bitcast i32 %684 to float - %2108 = fmul float %2106, %2107 - %2109 = fadd float %2105, %2108 - %2110 = call float @llvm.sqrt.f32(float %2109) - %2111 = fneg float %693 - %2112 = fmul float %2110, %2111 - %2113 = fmul float %2112, 0.000000e+00 - %2114 = bitcast i32 %684 to float - %2115 = fadd float %2114, %2113 - %2116 = fmul float %2101, %2115 - %2117 = fadd float %2087, %2116 - %2118 = call float @llvm.sqrt.f32(float %2117) - %2119 = fadd float %2118, 0.000000e+00 - %2120 = fdiv float %2059, %2119 - %2121 = fmul float %2045, %2120 - %2122 = fsub float 1.000000e+00, %2121 - %2123 = load float, float* %1154, align 4 - %2124 = fmul float %2122, %2123 - %2125 = fadd float %1969, %2124 - %2126 = insertelement <4 x float> zeroinitializer, float %2125, i32 0 - %2127 = insertelement <4 x float> %2126, float 0.000000e+00, i32 1 - %2128 = insertelement <4 x float> %2127, float 0.000000e+00, i32 2 - %2129 = insertelement <4 x float> %2128, float 0.000000e+00, i32 3 - %2130 = extractelement <4 x float> %2129, i32 0 - store float %2130, float* %1648, align 4 - %2131 = extractelement <4 x float> %2129, i32 1 - %2132 = getelementptr float, float* %2, i32 0 - %2133 = getelementptr inbounds float, float* %2132, i64 3 - store float %2131, float* %2133, align 4 - %2134 = bitcast i32 %1651 to float - %2135 = bitcast i32 %1651 to float - %2136 = fmul float %2134, %2135 - %2137 = fadd float %2136, 0.000000e+00 - %2138 = bitcast i32 %684 to float - %2139 = bitcast i32 %684 to float - %2140 = fmul float %2138, %2139 - %2141 = fadd float %2137, %2140 - %2142 = call float @llvm.sqrt.f32(float %2141) - %2143 = fneg float %693 - %2144 = fmul float %2142, %2143 - %2145 = fmul float %2144, 0.000000e+00 - %2146 = bitcast i32 %684 to float - %2147 = fadd float %2146, %2145 - %2148 = bitcast i32 %1651 to float - %2149 = bitcast i32 %1651 to float - %2150 = fmul float %2148, %2149 - %2151 = fadd float %2150, 0.000000e+00 - %2152 = bitcast i32 %684 to float - %2153 = bitcast i32 %684 to float - %2154 = fmul float %2152, %2153 - %2155 = fadd float %2151, %2154 - %2156 = call float @llvm.sqrt.f32(float %2155) - %2157 = fneg float %693 - %2158 = fmul float %2156, %2157 - %2159 = bitcast i32 %1651 to float - %2160 = fadd float %2159, %2158 - %2161 = bitcast i32 %1651 to float - %2162 = bitcast i32 %1651 to float - %2163 = fmul float %2161, %2162 - %2164 = fadd float %2163, 0.000000e+00 - %2165 = bitcast i32 %684 to float - %2166 = bitcast i32 %684 to float - %2167 = fmul float %2165, %2166 - %2168 = fadd float %2164, %2167 - %2169 = call float @llvm.sqrt.f32(float %2168) - %2170 = fneg float %693 - %2171 = fmul float %2169, %2170 - %2172 = bitcast i32 %1651 to float - %2173 = fadd float %2172, %2171 - %2174 = fmul float %2160, %2173 - %2175 = fadd float %2174, 0.000000e+00 - %2176 = bitcast i32 %1651 to float - %2177 = bitcast i32 %1651 to float - %2178 = fmul float %2176, %2177 - %2179 = fadd float %2178, 0.000000e+00 - %2180 = bitcast i32 %684 to float - %2181 = bitcast i32 %684 to float - %2182 = fmul float %2180, %2181 - %2183 = fadd float %2179, %2182 - %2184 = call float @llvm.sqrt.f32(float %2183) - %2185 = fneg float %693 - %2186 = fmul float %2184, %2185 - %2187 = fmul float %2186, 0.000000e+00 - %2188 = bitcast i32 %684 to float - %2189 = fadd float %2188, %2187 - %2190 = bitcast i32 %1651 to float - %2191 = bitcast i32 %1651 to float - %2192 = fmul float %2190, %2191 - %2193 = fadd float %2192, 0.000000e+00 - %2194 = bitcast i32 %684 to float - %2195 = bitcast i32 %684 to float - %2196 = fmul float %2194, %2195 - %2197 = fadd float %2193, %2196 - %2198 = call float @llvm.sqrt.f32(float %2197) - %2199 = fneg float %693 - %2200 = fmul float %2198, %2199 - %2201 = fmul float %2200, 0.000000e+00 - %2202 = bitcast i32 %684 to float - %2203 = fadd float %2202, %2201 - %2204 = fmul float %2189, %2203 - %2205 = fadd float %2175, %2204 - %2206 = call float @llvm.sqrt.f32(float %2205) - %2207 = fadd float %2206, 0.000000e+00 - %2208 = fdiv float %2147, %2207 - %2209 = fmul float %2208, 2.000000e+00 - %2210 = bitcast i32 %1651 to float - %2211 = bitcast i32 %1651 to float - %2212 = fmul float %2210, %2211 - %2213 = fadd float %2212, 0.000000e+00 - %2214 = bitcast i32 %684 to float - %2215 = bitcast i32 %684 to float - %2216 = fmul float %2214, %2215 - %2217 = fadd float %2213, %2216 - %2218 = call float @llvm.sqrt.f32(float %2217) - %2219 = fneg float %693 - %2220 = fmul float %2218, %2219 - %2221 = bitcast i32 %1651 to float - %2222 = fadd float %2221, %2220 - %2223 = bitcast i32 %1651 to float - %2224 = bitcast i32 %1651 to float - %2225 = fmul float %2223, %2224 - %2226 = fadd float %2225, 0.000000e+00 - %2227 = bitcast i32 %684 to float - %2228 = bitcast i32 %684 to float - %2229 = fmul float %2227, %2228 - %2230 = fadd float %2226, %2229 - %2231 = call float @llvm.sqrt.f32(float %2230) - %2232 = fneg float %693 - %2233 = fmul float %2231, %2232 - %2234 = bitcast i32 %1651 to float - %2235 = fadd float %2234, %2233 - %2236 = bitcast i32 %1651 to float - %2237 = bitcast i32 %1651 to float - %2238 = fmul float %2236, %2237 - %2239 = fadd float %2238, 0.000000e+00 - %2240 = bitcast i32 %684 to float - %2241 = bitcast i32 %684 to float - %2242 = fmul float %2240, %2241 - %2243 = fadd float %2239, %2242 - %2244 = call float @llvm.sqrt.f32(float %2243) - %2245 = fneg float %693 - %2246 = fmul float %2244, %2245 - %2247 = bitcast i32 %1651 to float - %2248 = fadd float %2247, %2246 - %2249 = fmul float %2235, %2248 - %2250 = fadd float %2249, 0.000000e+00 - %2251 = bitcast i32 %1651 to float - %2252 = bitcast i32 %1651 to float - %2253 = fmul float %2251, %2252 - %2254 = fadd float %2253, 0.000000e+00 - %2255 = bitcast i32 %684 to float - %2256 = bitcast i32 %684 to float - %2257 = fmul float %2255, %2256 - %2258 = fadd float %2254, %2257 - %2259 = call float @llvm.sqrt.f32(float %2258) - %2260 = fneg float %693 - %2261 = fmul float %2259, %2260 - %2262 = fmul float %2261, 0.000000e+00 - %2263 = bitcast i32 %684 to float - %2264 = fadd float %2263, %2262 - %2265 = bitcast i32 %1651 to float - %2266 = bitcast i32 %1651 to float - %2267 = fmul float %2265, %2266 - %2268 = fadd float %2267, 0.000000e+00 - %2269 = bitcast i32 %684 to float - %2270 = bitcast i32 %684 to float - %2271 = fmul float %2269, %2270 - %2272 = fadd float %2268, %2271 - %2273 = call float @llvm.sqrt.f32(float %2272) - %2274 = fneg float %693 - %2275 = fmul float %2273, %2274 - %2276 = fmul float %2275, 0.000000e+00 - %2277 = bitcast i32 %684 to float - %2278 = fadd float %2277, %2276 - %2279 = fmul float %2264, %2278 - %2280 = fadd float %2250, %2279 - %2281 = call float @llvm.sqrt.f32(float %2280) - %2282 = fadd float %2281, 0.000000e+00 - %2283 = fdiv float %2222, %2282 - %2284 = fmul float %2209, %2283 - %2285 = fneg float %2284 - %2286 = insertelement <4 x float> zeroinitializer, float %2285, i32 0 - %2287 = insertelement <4 x float> %2286, float 0.000000e+00, i32 1 - %2288 = insertelement <4 x float> %2287, float 0.000000e+00, i32 2 - %2289 = insertelement <4 x float> %2288, float 0.000000e+00, i32 3 - %2290 = load float, float* %1322, align 4 - %2291 = insertelement <4 x float> zeroinitializer, float %2290, i32 0 - %2292 = insertelement <4 x float> %2291, float 0.000000e+00, i32 1 - %2293 = insertelement <4 x float> %2292, float 0.000000e+00, i32 2 - %2294 = insertelement <4 x float> %2293, float 0.000000e+00, i32 3 - %2295 = call <4 x float> @llvm.fma.v4f32(<4 x float> %2289, <4 x float> %2294, <4 x float> zeroinitializer) - %2296 = extractelement <4 x float> %2295, i32 0 - store float %2296, float* %2133, align 4 - %2297 = bitcast i32 %1651 to float - %2298 = bitcast i32 %1651 to float - %2299 = fmul float %2297, %2298 - %2300 = fadd float %2299, 0.000000e+00 - %2301 = bitcast i32 %684 to float - %2302 = bitcast i32 %684 to float - %2303 = fmul float %2301, %2302 - %2304 = fadd float %2300, %2303 - %2305 = call float @llvm.sqrt.f32(float %2304) - %2306 = fneg float %693 - %2307 = fmul float %2305, %2306 - %2308 = fmul float %2307, 0.000000e+00 - %2309 = bitcast i32 %684 to float - %2310 = fadd float %2309, %2308 - %2311 = bitcast i32 %1651 to float - %2312 = bitcast i32 %1651 to float - %2313 = fmul float %2311, %2312 - %2314 = fadd float %2313, 0.000000e+00 - %2315 = bitcast i32 %684 to float - %2316 = bitcast i32 %684 to float - %2317 = fmul float %2315, %2316 - %2318 = fadd float %2314, %2317 - %2319 = call float @llvm.sqrt.f32(float %2318) - %2320 = fneg float %693 - %2321 = fmul float %2319, %2320 - %2322 = bitcast i32 %1651 to float - %2323 = fadd float %2322, %2321 - %2324 = bitcast i32 %1651 to float - %2325 = bitcast i32 %1651 to float - %2326 = fmul float %2324, %2325 - %2327 = fadd float %2326, 0.000000e+00 - %2328 = bitcast i32 %684 to float - %2329 = bitcast i32 %684 to float - %2330 = fmul float %2328, %2329 - %2331 = fadd float %2327, %2330 - %2332 = call float @llvm.sqrt.f32(float %2331) - %2333 = fneg float %693 - %2334 = fmul float %2332, %2333 - %2335 = bitcast i32 %1651 to float - %2336 = fadd float %2335, %2334 - %2337 = fmul float %2323, %2336 - %2338 = fadd float %2337, 0.000000e+00 - %2339 = bitcast i32 %1651 to float - %2340 = bitcast i32 %1651 to float - %2341 = fmul float %2339, %2340 - %2342 = fadd float %2341, 0.000000e+00 - %2343 = bitcast i32 %684 to float - %2344 = bitcast i32 %684 to float - %2345 = fmul float %2343, %2344 - %2346 = fadd float %2342, %2345 - %2347 = call float @llvm.sqrt.f32(float %2346) - %2348 = fneg float %693 - %2349 = fmul float %2347, %2348 - %2350 = fmul float %2349, 0.000000e+00 - %2351 = bitcast i32 %684 to float - %2352 = fadd float %2351, %2350 - %2353 = bitcast i32 %1651 to float - %2354 = bitcast i32 %1651 to float - %2355 = fmul float %2353, %2354 - %2356 = fadd float %2355, 0.000000e+00 - %2357 = bitcast i32 %684 to float - %2358 = bitcast i32 %684 to float - %2359 = fmul float %2357, %2358 - %2360 = fadd float %2356, %2359 - %2361 = call float @llvm.sqrt.f32(float %2360) - %2362 = fneg float %693 - %2363 = fmul float %2361, %2362 - %2364 = fmul float %2363, 0.000000e+00 - %2365 = bitcast i32 %684 to float - %2366 = fadd float %2365, %2364 - %2367 = fmul float %2352, %2366 - %2368 = fadd float %2338, %2367 - %2369 = call float @llvm.sqrt.f32(float %2368) - %2370 = fadd float %2369, 0.000000e+00 - %2371 = fdiv float %2310, %2370 - %2372 = fmul float %2371, 2.000000e+00 - %2373 = bitcast i32 %1651 to float - %2374 = bitcast i32 %1651 to float - %2375 = fmul float %2373, %2374 - %2376 = fadd float %2375, 0.000000e+00 - %2377 = bitcast i32 %684 to float - %2378 = bitcast i32 %684 to float - %2379 = fmul float %2377, %2378 - %2380 = fadd float %2376, %2379 - %2381 = call float @llvm.sqrt.f32(float %2380) - %2382 = fneg float %693 - %2383 = fmul float %2381, %2382 - %2384 = bitcast i32 %1651 to float - %2385 = fadd float %2384, %2383 - %2386 = bitcast i32 %1651 to float - %2387 = bitcast i32 %1651 to float - %2388 = fmul float %2386, %2387 - %2389 = fadd float %2388, 0.000000e+00 - %2390 = bitcast i32 %684 to float - %2391 = bitcast i32 %684 to float - %2392 = fmul float %2390, %2391 - %2393 = fadd float %2389, %2392 - %2394 = call float @llvm.sqrt.f32(float %2393) - %2395 = fneg float %693 - %2396 = fmul float %2394, %2395 - %2397 = bitcast i32 %1651 to float - %2398 = fadd float %2397, %2396 - %2399 = bitcast i32 %1651 to float - %2400 = bitcast i32 %1651 to float - %2401 = fmul float %2399, %2400 - %2402 = fadd float %2401, 0.000000e+00 - %2403 = bitcast i32 %684 to float - %2404 = bitcast i32 %684 to float - %2405 = fmul float %2403, %2404 - %2406 = fadd float %2402, %2405 - %2407 = call float @llvm.sqrt.f32(float %2406) - %2408 = fneg float %693 - %2409 = fmul float %2407, %2408 - %2410 = bitcast i32 %1651 to float - %2411 = fadd float %2410, %2409 - %2412 = fmul float %2398, %2411 - %2413 = fadd float %2412, 0.000000e+00 - %2414 = bitcast i32 %1651 to float - %2415 = bitcast i32 %1651 to float - %2416 = fmul float %2414, %2415 - %2417 = fadd float %2416, 0.000000e+00 - %2418 = bitcast i32 %684 to float - %2419 = bitcast i32 %684 to float - %2420 = fmul float %2418, %2419 - %2421 = fadd float %2417, %2420 - %2422 = call float @llvm.sqrt.f32(float %2421) - %2423 = fneg float %693 - %2424 = fmul float %2422, %2423 - %2425 = fmul float %2424, 0.000000e+00 - %2426 = bitcast i32 %684 to float - %2427 = fadd float %2426, %2425 - %2428 = bitcast i32 %1651 to float - %2429 = bitcast i32 %1651 to float - %2430 = fmul float %2428, %2429 - %2431 = fadd float %2430, 0.000000e+00 - %2432 = bitcast i32 %684 to float - %2433 = bitcast i32 %684 to float - %2434 = fmul float %2432, %2433 - %2435 = fadd float %2431, %2434 - %2436 = call float @llvm.sqrt.f32(float %2435) - %2437 = fneg float %693 - %2438 = fmul float %2436, %2437 - %2439 = fmul float %2438, 0.000000e+00 - %2440 = bitcast i32 %684 to float - %2441 = fadd float %2440, %2439 - %2442 = fmul float %2427, %2441 - %2443 = fadd float %2413, %2442 - %2444 = call float @llvm.sqrt.f32(float %2443) - %2445 = fadd float %2444, 0.000000e+00 - %2446 = fdiv float %2385, %2445 - %2447 = fmul float %2372, %2446 - %2448 = fneg float %2447 - %2449 = fmul float %2448, %2290 - %2450 = fadd float %2449, 0.000000e+00 - %2451 = bitcast i32 %1651 to float - %2452 = bitcast i32 %1651 to float - %2453 = fmul float %2451, %2452 - %2454 = fadd float %2453, 0.000000e+00 - %2455 = bitcast i32 %684 to float - %2456 = bitcast i32 %684 to float - %2457 = fmul float %2455, %2456 - %2458 = fadd float %2454, %2457 - %2459 = call float @llvm.sqrt.f32(float %2458) - %2460 = fneg float %693 - %2461 = fmul float %2459, %2460 - %2462 = fmul float %2461, 0.000000e+00 - %2463 = bitcast i32 %684 to float - %2464 = fadd float %2463, %2462 - %2465 = bitcast i32 %1651 to float - %2466 = bitcast i32 %1651 to float - %2467 = fmul float %2465, %2466 - %2468 = fadd float %2467, 0.000000e+00 - %2469 = bitcast i32 %684 to float - %2470 = bitcast i32 %684 to float - %2471 = fmul float %2469, %2470 - %2472 = fadd float %2468, %2471 - %2473 = call float @llvm.sqrt.f32(float %2472) - %2474 = fneg float %693 - %2475 = fmul float %2473, %2474 - %2476 = bitcast i32 %1651 to float - %2477 = fadd float %2476, %2475 - %2478 = bitcast i32 %1651 to float - %2479 = bitcast i32 %1651 to float - %2480 = fmul float %2478, %2479 - %2481 = fadd float %2480, 0.000000e+00 - %2482 = bitcast i32 %684 to float - %2483 = bitcast i32 %684 to float - %2484 = fmul float %2482, %2483 - %2485 = fadd float %2481, %2484 - %2486 = call float @llvm.sqrt.f32(float %2485) - %2487 = fneg float %693 - %2488 = fmul float %2486, %2487 - %2489 = bitcast i32 %1651 to float - %2490 = fadd float %2489, %2488 - %2491 = fmul float %2477, %2490 - %2492 = fadd float %2491, 0.000000e+00 - %2493 = bitcast i32 %1651 to float - %2494 = bitcast i32 %1651 to float - %2495 = fmul float %2493, %2494 - %2496 = fadd float %2495, 0.000000e+00 - %2497 = bitcast i32 %684 to float - %2498 = bitcast i32 %684 to float - %2499 = fmul float %2497, %2498 - %2500 = fadd float %2496, %2499 - %2501 = call float @llvm.sqrt.f32(float %2500) - %2502 = fneg float %693 - %2503 = fmul float %2501, %2502 - %2504 = fmul float %2503, 0.000000e+00 - %2505 = bitcast i32 %684 to float - %2506 = fadd float %2505, %2504 - %2507 = bitcast i32 %1651 to float - %2508 = bitcast i32 %1651 to float - %2509 = fmul float %2507, %2508 - %2510 = fadd float %2509, 0.000000e+00 - %2511 = bitcast i32 %684 to float - %2512 = bitcast i32 %684 to float - %2513 = fmul float %2511, %2512 - %2514 = fadd float %2510, %2513 - %2515 = call float @llvm.sqrt.f32(float %2514) - %2516 = fneg float %693 - %2517 = fmul float %2515, %2516 - %2518 = fmul float %2517, 0.000000e+00 - %2519 = bitcast i32 %684 to float - %2520 = fadd float %2519, %2518 - %2521 = fmul float %2506, %2520 - %2522 = fadd float %2492, %2521 - %2523 = call float @llvm.sqrt.f32(float %2522) - %2524 = fadd float %2523, 0.000000e+00 - %2525 = fdiv float %2464, %2524 - %2526 = fmul float %2525, 2.000000e+00 - %2527 = bitcast i32 %1651 to float - %2528 = bitcast i32 %1651 to float - %2529 = fmul float %2527, %2528 - %2530 = fadd float %2529, 0.000000e+00 - %2531 = bitcast i32 %684 to float - %2532 = bitcast i32 %684 to float - %2533 = fmul float %2531, %2532 - %2534 = fadd float %2530, %2533 - %2535 = call float @llvm.sqrt.f32(float %2534) - %2536 = fneg float %693 - %2537 = fmul float %2535, %2536 - %2538 = fmul float %2537, 0.000000e+00 - %2539 = bitcast i32 %684 to float - %2540 = fadd float %2539, %2538 - %2541 = bitcast i32 %1651 to float - %2542 = bitcast i32 %1651 to float - %2543 = fmul float %2541, %2542 - %2544 = fadd float %2543, 0.000000e+00 - %2545 = bitcast i32 %684 to float - %2546 = bitcast i32 %684 to float - %2547 = fmul float %2545, %2546 - %2548 = fadd float %2544, %2547 - %2549 = call float @llvm.sqrt.f32(float %2548) - %2550 = fneg float %693 - %2551 = fmul float %2549, %2550 - %2552 = bitcast i32 %1651 to float - %2553 = fadd float %2552, %2551 - %2554 = bitcast i32 %1651 to float - %2555 = bitcast i32 %1651 to float - %2556 = fmul float %2554, %2555 - %2557 = fadd float %2556, 0.000000e+00 - %2558 = bitcast i32 %684 to float - %2559 = bitcast i32 %684 to float - %2560 = fmul float %2558, %2559 - %2561 = fadd float %2557, %2560 - %2562 = call float @llvm.sqrt.f32(float %2561) - %2563 = fneg float %693 - %2564 = fmul float %2562, %2563 - %2565 = bitcast i32 %1651 to float - %2566 = fadd float %2565, %2564 - %2567 = fmul float %2553, %2566 - %2568 = fadd float %2567, 0.000000e+00 - %2569 = bitcast i32 %1651 to float - %2570 = bitcast i32 %1651 to float - %2571 = fmul float %2569, %2570 - %2572 = fadd float %2571, 0.000000e+00 - %2573 = bitcast i32 %684 to float - %2574 = bitcast i32 %684 to float - %2575 = fmul float %2573, %2574 - %2576 = fadd float %2572, %2575 - %2577 = call float @llvm.sqrt.f32(float %2576) - %2578 = fneg float %693 - %2579 = fmul float %2577, %2578 - %2580 = fmul float %2579, 0.000000e+00 - %2581 = bitcast i32 %684 to float - %2582 = fadd float %2581, %2580 - %2583 = bitcast i32 %1651 to float - %2584 = bitcast i32 %1651 to float - %2585 = fmul float %2583, %2584 - %2586 = fadd float %2585, 0.000000e+00 - %2587 = bitcast i32 %684 to float - %2588 = bitcast i32 %684 to float - %2589 = fmul float %2587, %2588 - %2590 = fadd float %2586, %2589 - %2591 = call float @llvm.sqrt.f32(float %2590) - %2592 = fneg float %693 - %2593 = fmul float %2591, %2592 - %2594 = fmul float %2593, 0.000000e+00 - %2595 = bitcast i32 %684 to float - %2596 = fadd float %2595, %2594 - %2597 = fmul float %2582, %2596 - %2598 = fadd float %2568, %2597 - %2599 = call float @llvm.sqrt.f32(float %2598) - %2600 = fadd float %2599, 0.000000e+00 - %2601 = fdiv float %2540, %2600 - %2602 = fmul float %2526, %2601 - %2603 = fsub float 1.000000e+00, %2602 - %2604 = load float, float* %1637, align 4 - %2605 = fmul float %2603, %2604 - %2606 = fadd float %2450, %2605 - %2607 = insertelement <4 x float> zeroinitializer, float %2606, i32 0 - %2608 = insertelement <4 x float> %2607, float 0.000000e+00, i32 1 - %2609 = insertelement <4 x float> %2608, float 0.000000e+00, i32 2 - %2610 = insertelement <4 x float> %2609, float 0.000000e+00, i32 3 - %2611 = extractelement <4 x float> %2610, i32 0 - store float %2611, float* %2133, align 4 - %2612 = getelementptr float, float* %1, i32 0 - %2613 = getelementptr inbounds float, float* %2612, i64 2 - %2614 = bitcast float* %2613 to i32* - %2615 = load i32, i32* %2614, align 4 - %2616 = bitcast i32 %2615 to float - %2617 = insertelement <4 x float> zeroinitializer, float %2616, i32 0 - %2618 = getelementptr float, float* %1, i32 0 - %2619 = getelementptr inbounds float, float* %2618, i64 1 - %2620 = bitcast float* %2619 to i32* - %2621 = load i32, i32* %2620, align 4 - %2622 = bitcast i32 %2621 to float - %2623 = insertelement <4 x float> %2617, float %2622, i32 1 - %2624 = insertelement <4 x float> %2623, float 0.000000e+00, i32 2 - %2625 = insertelement <4 x float> %2624, float 0.000000e+00, i32 3 - %2626 = extractelement <4 x float> %2625, i32 0 - %2627 = bitcast i32* %2620 to float* - store float %2626, float* %2627, align 4 - %2628 = extractelement <4 x float> %2625, i32 1 - %2629 = bitcast i32* %2614 to float* - store float %2628, float* %2629, align 4 - ret void -} - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { -.preheader13: - %3 = bitcast float* %2 to i8* - %4 = bitcast float* %0 to i8* - %5 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false, i1 true, i1 false) - %6 = call i8* @__memcpy_chk(i8* %3, i8* %4, i64 16, i64 %5) #8 - %7 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 - %8 = bitcast i8* %7 to float* - store float 1.000000e+00, float* %8, align 4 - %9 = getelementptr inbounds i8, i8* %7, i64 8 - %10 = getelementptr inbounds i8, i8* %7, i64 12 - %11 = bitcast i8* %10 to float* - store float 1.000000e+00, float* %11, align 4 - %12 = bitcast float* %1 to i8* - %13 = call i64 @llvm.objectsize.i64.p0i8(i8* %12, i1 false, i1 true, i1 false) - %14 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %15 = bitcast i8* %14 to float* - %16 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %17 = bitcast i8* %16 to float* - %18 = bitcast float* %2 to i32* - %19 = load i32, i32* %18, align 4 - %20 = bitcast i8* %14 to i32* - store i32 %19, i32* %20, align 4 - %21 = bitcast i8* %7 to i32* - %22 = load i32, i32* %21, align 4 - %23 = bitcast i8* %16 to i32* - store i32 %22, i32* %23, align 4 - %24 = getelementptr inbounds float, float* %2, i64 2 - %25 = bitcast float* %24 to i32* - %26 = load i32, i32* %25, align 4 - %27 = getelementptr inbounds i8, i8* %14, i64 4 - %28 = bitcast i8* %27 to i32* - store i32 %26, i32* %28, align 4 - %29 = bitcast i8* %9 to i32* - %30 = load i32, i32* %29, align 4 - %31 = getelementptr inbounds i8, i8* %16, i64 4 - %32 = bitcast i8* %31 to i32* - store i32 %30, i32* %32, align 4 - %33 = load float, float* %15, align 4 - %34 = call float @no_opt_sgn(float %33) - %35 = fneg float %34 - %36 = call float @no_opt_naive_norm(float* nonnull %15, i32 2) - %37 = fmul float %36, %35 - %38 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %39 = bitcast i8* %38 to float* - %40 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %41 = load float, float* %15, align 4 - %42 = load float, float* %17, align 4 - %43 = fmul float %37, %42 - %44 = fadd float %41, %43 - store float %44, float* %39, align 4 - %45 = bitcast i8* %27 to float* - %46 = load float, float* %45, align 4 - %47 = bitcast i8* %31 to float* - %48 = load float, float* %47, align 4 - %49 = fmul float %37, %48 - %50 = fadd float %46, %49 - %51 = getelementptr inbounds i8, i8* %38, i64 4 - %52 = bitcast i8* %51 to float* - store float %50, float* %52, align 4 - %53 = bitcast i8* %40 to float* - %54 = call float @no_opt_naive_norm(float* nonnull %39, i32 2) - %55 = fadd float %54, 0x3EE4F8B580000000 - %56 = load float, float* %39, align 4 - %57 = fdiv float %56, %55 - store float %57, float* %53, align 4 - %58 = load float, float* %52, align 4 - %59 = fdiv float %58, %55 - %60 = getelementptr inbounds i8, i8* %40, i64 4 - %61 = bitcast i8* %60 to float* - store float %59, float* %61, align 4 - %62 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 - %63 = bitcast i8* %62 to float* - %64 = load float, float* %53, align 4 - %65 = fmul float %64, 2.000000e+00 - %66 = fmul float %65, %64 - %67 = fsub float 1.000000e+00, %66 - store float %67, float* %63, align 4 - %68 = load float, float* %53, align 4 - %69 = fmul float %68, 2.000000e+00 - %70 = load float, float* %61, align 4 - %71 = fmul float %69, %70 - %72 = fsub float 0.000000e+00, %71 - %73 = getelementptr inbounds i8, i8* %62, i64 4 - %74 = bitcast i8* %73 to float* - store float %72, float* %74, align 4 - %75 = load float, float* %61, align 4 - %76 = fmul float %75, 2.000000e+00 - %77 = load float, float* %53, align 4 - %78 = fmul float %76, %77 - %79 = fsub float 0.000000e+00, %78 - %80 = getelementptr inbounds i8, i8* %62, i64 8 - %81 = bitcast i8* %80 to float* - store float %79, float* %81, align 4 - %82 = load float, float* %61, align 4 - %83 = fmul float %82, 2.000000e+00 - %84 = fmul float %83, %82 - %85 = fsub float 1.000000e+00, %84 - %86 = getelementptr inbounds i8, i8* %62, i64 12 - %87 = bitcast i8* %86 to float* - store float %85, float* %87, align 4 - %88 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 - %89 = bitcast i8* %88 to float* - %90 = bitcast i8* %62 to i32* - %91 = load i32, i32* %90, align 4 - %92 = bitcast i8* %88 to i32* - store i32 %91, i32* %92, align 4 - %93 = bitcast i8* %73 to i32* - %94 = load i32, i32* %93, align 4 - %95 = getelementptr inbounds i8, i8* %88, i64 4 - %96 = bitcast i8* %95 to i32* - store i32 %94, i32* %96, align 4 - %97 = bitcast i8* %80 to i32* - %98 = load i32, i32* %97, align 4 - %99 = getelementptr inbounds i8, i8* %88, i64 8 - %100 = bitcast i8* %99 to i32* - store i32 %98, i32* %100, align 4 - %101 = bitcast i8* %86 to i32* - %102 = load i32, i32* %101, align 4 - %103 = getelementptr inbounds i8, i8* %88, i64 12 - %104 = bitcast i8* %103 to i32* - store i32 %102, i32* %104, align 4 - %105 = call i8* @__memcpy_chk(i8* %12, i8* %88, i64 16, i64 %13) #8 - call void @no_opt_naive_fixed_matrix_multiply(float* %89, float* %0, float* %2) - call void @free(i8* %14) - call void @free(i8* %16) - call void @free(i8* %38) - call void @free(i8* %40) - call void @free(i8* %62) - call void @free(i8* %88) - call void @no_opt_naive_fixed_transpose(float* %1) - ret void -} - -; Function Attrs: nounwind -declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 - -; Function Attrs: nounwind readnone speculatable willreturn -declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 - -; Function Attrs: allocsize(0,1) -declare i8* @calloc(i64, i64) #5 - -declare void @free(i8*) #6 - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #1 { -.preheader6: - %0 = alloca i64, align 8 - %1 = alloca [4 x float], align 16 - %2 = alloca [4 x float], align 16 - %3 = alloca [4 x float], align 16 - %4 = alloca [4 x float], align 16 - %5 = alloca [4 x float], align 16 - %6 = call i64 @time(i64* null) #8 - store i64 %6, i64* %0, align 8 - %7 = call i64 @time(i64* nonnull %0) #8 - %8 = trunc i64 %7 to i32 - call void @srand(i32 %8) #8 - %9 = call i32 @rand() #8 - %10 = sitofp i32 %9 to float - %11 = fdiv float %10, 0x41747AE140000000 - %12 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 - store float %11, float* %12, align 16 - %13 = fpext float %11 to double - %14 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %13) #8 - %15 = call i32 @rand() #8 - %16 = sitofp i32 %15 to float - %17 = fdiv float %16, 0x41747AE140000000 - %18 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1 - store float %17, float* %18, align 4 - %19 = fpext float %17 to double - %20 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %19) #8 - %21 = call i32 @rand() #8 - %22 = sitofp i32 %21 to float - %23 = fdiv float %22, 0x41747AE140000000 - %24 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2 - store float %23, float* %24, align 8 - %25 = fpext float %23 to double - %26 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %25) #8 - %27 = call i32 @rand() #8 - %28 = sitofp i32 %27 to float - %29 = fdiv float %28, 0x41747AE140000000 - %30 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3 - store float %29, float* %30, align 4 - %31 = fpext float %29 to double - %32 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %31) #8 - %33 = bitcast [4 x float]* %2 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %33, i8 0, i64 16, i1 false) - %34 = bitcast [4 x float]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %34, i8 0, i64 16, i1 false) - %35 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %36 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - call void @naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %35, float* nonnull %36) - %37 = bitcast [4 x float]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %37, i8 0, i64 16, i1 false) - %38 = bitcast [4 x float]* %5 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %38, i8 0, i64 16, i1 false) - %39 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 - %40 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 - call void @no_opt_naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %39, float* nonnull %40) - %41 = load float, float* %35, align 16 - %42 = fpext float %41 to double - %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %42) #8 - %44 = load float, float* %39, align 16 - %45 = fpext float %44 to double - %46 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %45) #8 - %47 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1 - %48 = load float, float* %47, align 4 - %49 = fpext float %48 to double - %50 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %49) #8 - %51 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 - %52 = load float, float* %51, align 4 - %53 = fpext float %52 to double - %54 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %53) #8 - %55 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2 - %56 = load float, float* %55, align 8 - %57 = fpext float %56 to double - %58 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %57) #8 - %59 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 - %60 = load float, float* %59, align 8 - %61 = fpext float %60 to double - %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %61) #8 - %63 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3 - %64 = load float, float* %63, align 4 - %65 = fpext float %64 to double - %66 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %65) #8 - %67 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 - %68 = load float, float* %67, align 4 - %69 = fpext float %68 to double - %70 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %69) #8 - %71 = load float, float* %36, align 16 - %72 = fpext float %71 to double - %73 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %72) #8 - %74 = load float, float* %40, align 16 - %75 = fpext float %74 to double - %76 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %75) #8 - %77 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1 - %78 = load float, float* %77, align 4 - %79 = fpext float %78 to double - %80 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %79) #8 - %81 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 1 - %82 = load float, float* %81, align 4 - %83 = fpext float %82 to double - %84 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %83) #8 - %85 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2 - %86 = load float, float* %85, align 8 - %87 = fpext float %86 to double - %88 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %87) #8 - %89 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 2 - %90 = load float, float* %89, align 8 - %91 = fpext float %90 to double - %92 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %91) #8 - %93 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3 - %94 = load float, float* %93, align 4 - %95 = fpext float %94 to double - %96 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %95) #8 - %97 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 3 - %98 = load float, float* %97, align 4 - %99 = fpext float %98 to double - %100 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %99) #8 - ret i32 0 -} - -declare i64 @time(i64*) #6 - -declare void @srand(i32) #6 - -declare i32 @rand() #6 - -declare i32 @printf(i8*, ...) #6 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #7 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #2 - -attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone speculatable willreturn } -attributes #3 = { argmemonly nounwind willreturn writeonly } -attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { argmemonly nounwind willreturn } -attributes #8 = { nounwind } -attributes #9 = { nounwind allocsize(0,1) } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} -!3 = distinct !{!3, !4} -!4 = !{!"llvm.loop.unroll.disable"} -!5 = distinct !{!5, !4} diff --git a/src/dios-egraphs/Diospyros/err.txt b/src/dios-egraphs/Diospyros/err.txt deleted file mode 100644 index a6533f78..00000000 --- a/src/dios-egraphs/Diospyros/err.txt +++ /dev/null @@ -1,312 +0,0 @@ -Match! - store float %17, float* %18, align 4 -Incoming instr - store float %17, float* %18, align 4 -Incoming arg - %17 = fmul float %16, 2.000000e+00 -Incoming instr - %17 = fmul float %16, 2.000000e+00 -Incoming arg - %16 = load float, float* %15, align 4 -Incoming instr - %16 = load float, float* %15, align 4 -Incoming arg - %15 = getelementptr inbounds float, float* %0, i64 %13 -Incoming instr - %15 = getelementptr inbounds float, float* %0, i64 %13 -Incoming arg - %13 = phi i64 [ %19, %.epil.preheader13 ], [ %11, %..preheader_crit_edge.unr-lcssa ] -Incoming instr - %13 = phi i64 [ %19, %.epil.preheader13 ], [ %11, %..preheader_crit_edge.unr-lcssa ] -Incoming arg - %19 = add nuw nsw i64 %13, 1 -Incoming instr - %19 = add nuw nsw i64 %13, 1 -Incoming arg - %18 = getelementptr inbounds float, float* %1, i64 %13 -Incoming instr - %18 = getelementptr inbounds float, float* %1, i64 %13 -Incoming arg - %13 = phi i64 [ %19, %.epil.preheader13 ], [ %11, %..preheader_crit_edge.unr-lcssa ] -Incoming instr - %13 = phi i64 [ %19, %.epil.preheader13 ], [ %11, %..preheader_crit_edge.unr-lcssa ] -Incoming arg - %19 = add nuw nsw i64 %13, 1 -Incoming instr - %19 = add nuw nsw i64 %13, 1 -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 4 iterations, reason: Some(Saturated) -Match! - store float %37, float* %38, align 4 -Incoming instr - store float %37, float* %38, align 4 -Incoming arg - %37 = fmul float %36, 2.000000e+00 -Incoming instr - %37 = fmul float %36, 2.000000e+00 -Incoming arg - %36 = load float, float* %35, align 4 -Incoming instr - %36 = load float, float* %35, align 4 -Incoming arg - %35 = getelementptr inbounds float, float* %0, i64 %33 -Incoming instr - %35 = getelementptr inbounds float, float* %0, i64 %33 -Incoming arg - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming instr - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming arg - %54 = add nuw nsw i64 %33, 4 -Incoming instr - %54 = add nuw nsw i64 %33, 4 -Incoming arg - %38 = getelementptr inbounds float, float* %1, i64 %33 -Incoming instr - %38 = getelementptr inbounds float, float* %1, i64 %33 -Incoming arg - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming instr - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming arg - %54 = add nuw nsw i64 %33, 4 -Incoming instr - %54 = add nuw nsw i64 %33, 4 -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 4 iterations, reason: Some(Saturated) -Match! - store float %42, float* %43, align 4 -Incoming instr - store float %42, float* %43, align 4 -Incoming arg - %42 = fmul float %41, 2.000000e+00 -Incoming instr - %42 = fmul float %41, 2.000000e+00 -Incoming arg - %41 = load float, float* %40, align 4 -Incoming instr - %41 = load float, float* %40, align 4 -Incoming arg - %40 = getelementptr inbounds float, float* %0, i64 %39 -Incoming instr - %40 = getelementptr inbounds float, float* %0, i64 %39 -Incoming arg - %39 = or i64 %33, 1 -Incoming instr - %39 = or i64 %33, 1 -Incoming arg - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming instr - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming arg - %54 = add nuw nsw i64 %33, 4 -Incoming instr - %54 = add nuw nsw i64 %33, 4 -Incoming arg - %43 = getelementptr inbounds float, float* %1, i64 %39 -Incoming instr - %43 = getelementptr inbounds float, float* %1, i64 %39 -Incoming arg - %39 = or i64 %33, 1 -Incoming instr - %39 = or i64 %33, 1 -Incoming arg - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming instr - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming arg - %54 = add nuw nsw i64 %33, 4 -Incoming instr - %54 = add nuw nsw i64 %33, 4 -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 4 iterations, reason: Some(Saturated) -Match! - store float %52, float* %53, align 4 -Incoming instr - store float %52, float* %53, align 4 -Incoming arg - %52 = fmul float %51, 2.000000e+00 -Incoming instr - %52 = fmul float %51, 2.000000e+00 -Incoming arg - %51 = load float, float* %50, align 4 -Incoming instr - %51 = load float, float* %50, align 4 -Incoming arg - %50 = getelementptr inbounds float, float* %0, i64 %49 -Incoming instr - %50 = getelementptr inbounds float, float* %0, i64 %49 -Incoming arg - %49 = or i64 %33, 3 -Incoming instr - %49 = or i64 %33, 3 -Incoming arg - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming instr - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming arg - %54 = add nuw nsw i64 %33, 4 -Incoming instr - %54 = add nuw nsw i64 %33, 4 -Incoming arg - %53 = getelementptr inbounds float, float* %1, i64 %49 -Incoming instr - %53 = getelementptr inbounds float, float* %1, i64 %49 -Incoming arg - %49 = or i64 %33, 3 -Incoming instr - %49 = or i64 %33, 3 -Incoming arg - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming instr - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming arg - %54 = add nuw nsw i64 %33, 4 -Incoming instr - %54 = add nuw nsw i64 %33, 4 -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 4 iterations, reason: Some(Saturated) -Match! - store float %4, float* %1, align 4 -Incoming instr - store float %4, float* %1, align 4 -Incoming arg - %4 = fmul float %3, 2.000000e+00 -Incoming instr - %4 = fmul float %3, 2.000000e+00 -Incoming arg - %3 = load float, float* %0, align 4 -Incoming instr - %3 = load float, float* %0, align 4 -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 4 iterations, reason: Some(Saturated) -Match! - store float %7, float* %8, align 4 -Incoming instr - store float %7, float* %8, align 4 -Incoming arg - %7 = fmul float %6, 2.000000e+00 -Incoming instr - %7 = fmul float %6, 2.000000e+00 -Incoming arg - %6 = load float, float* %5, align 4 -Incoming instr - %6 = load float, float* %5, align 4 -Incoming arg - %5 = getelementptr inbounds float, float* %0, i64 1 -Incoming instr - %5 = getelementptr inbounds float, float* %0, i64 1 -Incoming arg - %8 = getelementptr inbounds float, float* %1, i64 1 -Incoming instr - %8 = getelementptr inbounds float, float* %1, i64 1 -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 4 iterations, reason: Some(Saturated) -Match! - store float %11, float* %12, align 4 -Incoming instr - store float %11, float* %12, align 4 -Incoming arg - %11 = fmul float %10, 2.000000e+00 -Incoming instr - %11 = fmul float %10, 2.000000e+00 -Incoming arg - %10 = load float, float* %9, align 4 -Incoming instr - %10 = load float, float* %9, align 4 -Incoming arg - %9 = getelementptr inbounds float, float* %0, i64 2 -Incoming instr - %9 = getelementptr inbounds float, float* %0, i64 2 -Incoming arg - %12 = getelementptr inbounds float, float* %1, i64 2 -Incoming instr - %12 = getelementptr inbounds float, float* %1, i64 2 -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 4 iterations, reason: Some(Saturated) -Match! - store float %25, float* %28, align 4 -Incoming instr - store float %25, float* %28, align 4 -Incoming arg - %25 = fadd float %24, %19 -Incoming instr - %25 = fadd float %24, %19 -Incoming arg - %24 = fadd float %23, %15 -Incoming instr - %24 = fadd float %23, %15 -Incoming arg - %23 = fadd float %22, %11 -Incoming instr - %23 = fadd float %22, %11 -Incoming arg - %22 = fadd float %21, %7 -Incoming instr - %22 = fadd float %21, %7 -Incoming arg - %21 = fadd float %4, 0.000000e+00 -Incoming instr - %21 = fadd float %4, 0.000000e+00 -Incoming arg - %4 = fmul float %3, 2.000000e+00 -Incoming instr - %4 = fmul float %3, 2.000000e+00 -Incoming arg - %3 = load float, float* %0, align 4 -Incoming instr - %3 = load float, float* %0, align 4 -Incoming arg - %7 = fmul float %6, 2.000000e+00 -Incoming instr - %7 = fmul float %6, 2.000000e+00 -Incoming arg - %6 = load float, float* %5, align 4 -Incoming instr - %6 = load float, float* %5, align 4 -Incoming arg - %5 = getelementptr inbounds float, float* %0, i64 1 -Incoming instr - %5 = getelementptr inbounds float, float* %0, i64 1 -Incoming arg - %11 = fmul float %10, 2.000000e+00 -Incoming instr - %11 = fmul float %10, 2.000000e+00 -Incoming arg - %10 = load float, float* %9, align 4 -Incoming instr - %10 = load float, float* %9, align 4 -Incoming arg - %9 = getelementptr inbounds float, float* %0, i64 2 -Incoming instr - %9 = getelementptr inbounds float, float* %0, i64 2 -Incoming arg - %15 = fmul float %14, 2.000000e+00 -Incoming instr - %15 = fmul float %14, 2.000000e+00 -Incoming arg - %14 = load float, float* %13, align 4 -Incoming instr - %14 = load float, float* %13, align 4 -Incoming arg - %13 = getelementptr inbounds float, float* %0, i64 3 -Incoming instr - %13 = getelementptr inbounds float, float* %0, i64 3 -Incoming arg - %19 = fmul float %18, 2.000000e+00 -Incoming instr - %19 = fmul float %18, 2.000000e+00 -Incoming arg - %18 = load float, float* %17, align 4 -Incoming instr - %18 = load float, float* %17, align 4 -Incoming arg - %17 = getelementptr inbounds float, float* %0, i64 4 -Incoming instr - %17 = getelementptr inbounds float, float* %0, i64 4 -Incoming arg - %28 = getelementptr inbounds float, float* %2, i64 3 -Incoming instr - %28 = getelementptr inbounds float, float* %2, i64 3 -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 11 iterations, reason: Some(Saturated) diff --git a/src/dios-egraphs/Diospyros/err1.txt b/src/dios-egraphs/Diospyros/err1.txt deleted file mode 100644 index a6533f78..00000000 --- a/src/dios-egraphs/Diospyros/err1.txt +++ /dev/null @@ -1,312 +0,0 @@ -Match! - store float %17, float* %18, align 4 -Incoming instr - store float %17, float* %18, align 4 -Incoming arg - %17 = fmul float %16, 2.000000e+00 -Incoming instr - %17 = fmul float %16, 2.000000e+00 -Incoming arg - %16 = load float, float* %15, align 4 -Incoming instr - %16 = load float, float* %15, align 4 -Incoming arg - %15 = getelementptr inbounds float, float* %0, i64 %13 -Incoming instr - %15 = getelementptr inbounds float, float* %0, i64 %13 -Incoming arg - %13 = phi i64 [ %19, %.epil.preheader13 ], [ %11, %..preheader_crit_edge.unr-lcssa ] -Incoming instr - %13 = phi i64 [ %19, %.epil.preheader13 ], [ %11, %..preheader_crit_edge.unr-lcssa ] -Incoming arg - %19 = add nuw nsw i64 %13, 1 -Incoming instr - %19 = add nuw nsw i64 %13, 1 -Incoming arg - %18 = getelementptr inbounds float, float* %1, i64 %13 -Incoming instr - %18 = getelementptr inbounds float, float* %1, i64 %13 -Incoming arg - %13 = phi i64 [ %19, %.epil.preheader13 ], [ %11, %..preheader_crit_edge.unr-lcssa ] -Incoming instr - %13 = phi i64 [ %19, %.epil.preheader13 ], [ %11, %..preheader_crit_edge.unr-lcssa ] -Incoming arg - %19 = add nuw nsw i64 %13, 1 -Incoming instr - %19 = add nuw nsw i64 %13, 1 -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 4 iterations, reason: Some(Saturated) -Match! - store float %37, float* %38, align 4 -Incoming instr - store float %37, float* %38, align 4 -Incoming arg - %37 = fmul float %36, 2.000000e+00 -Incoming instr - %37 = fmul float %36, 2.000000e+00 -Incoming arg - %36 = load float, float* %35, align 4 -Incoming instr - %36 = load float, float* %35, align 4 -Incoming arg - %35 = getelementptr inbounds float, float* %0, i64 %33 -Incoming instr - %35 = getelementptr inbounds float, float* %0, i64 %33 -Incoming arg - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming instr - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming arg - %54 = add nuw nsw i64 %33, 4 -Incoming instr - %54 = add nuw nsw i64 %33, 4 -Incoming arg - %38 = getelementptr inbounds float, float* %1, i64 %33 -Incoming instr - %38 = getelementptr inbounds float, float* %1, i64 %33 -Incoming arg - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming instr - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming arg - %54 = add nuw nsw i64 %33, 4 -Incoming instr - %54 = add nuw nsw i64 %33, 4 -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 4 iterations, reason: Some(Saturated) -Match! - store float %42, float* %43, align 4 -Incoming instr - store float %42, float* %43, align 4 -Incoming arg - %42 = fmul float %41, 2.000000e+00 -Incoming instr - %42 = fmul float %41, 2.000000e+00 -Incoming arg - %41 = load float, float* %40, align 4 -Incoming instr - %41 = load float, float* %40, align 4 -Incoming arg - %40 = getelementptr inbounds float, float* %0, i64 %39 -Incoming instr - %40 = getelementptr inbounds float, float* %0, i64 %39 -Incoming arg - %39 = or i64 %33, 1 -Incoming instr - %39 = or i64 %33, 1 -Incoming arg - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming instr - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming arg - %54 = add nuw nsw i64 %33, 4 -Incoming instr - %54 = add nuw nsw i64 %33, 4 -Incoming arg - %43 = getelementptr inbounds float, float* %1, i64 %39 -Incoming instr - %43 = getelementptr inbounds float, float* %1, i64 %39 -Incoming arg - %39 = or i64 %33, 1 -Incoming instr - %39 = or i64 %33, 1 -Incoming arg - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming instr - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming arg - %54 = add nuw nsw i64 %33, 4 -Incoming instr - %54 = add nuw nsw i64 %33, 4 -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 4 iterations, reason: Some(Saturated) -Match! - store float %52, float* %53, align 4 -Incoming instr - store float %52, float* %53, align 4 -Incoming arg - %52 = fmul float %51, 2.000000e+00 -Incoming instr - %52 = fmul float %51, 2.000000e+00 -Incoming arg - %51 = load float, float* %50, align 4 -Incoming instr - %51 = load float, float* %50, align 4 -Incoming arg - %50 = getelementptr inbounds float, float* %0, i64 %49 -Incoming instr - %50 = getelementptr inbounds float, float* %0, i64 %49 -Incoming arg - %49 = or i64 %33, 3 -Incoming instr - %49 = or i64 %33, 3 -Incoming arg - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming instr - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming arg - %54 = add nuw nsw i64 %33, 4 -Incoming instr - %54 = add nuw nsw i64 %33, 4 -Incoming arg - %53 = getelementptr inbounds float, float* %1, i64 %49 -Incoming instr - %53 = getelementptr inbounds float, float* %1, i64 %49 -Incoming arg - %49 = or i64 %33, 3 -Incoming instr - %49 = or i64 %33, 3 -Incoming arg - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming instr - %33 = phi i64 [ 0, %.lr.ph11.new ], [ %54, %32 ] -Incoming arg - %54 = add nuw nsw i64 %33, 4 -Incoming instr - %54 = add nuw nsw i64 %33, 4 -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 4 iterations, reason: Some(Saturated) -Match! - store float %4, float* %1, align 4 -Incoming instr - store float %4, float* %1, align 4 -Incoming arg - %4 = fmul float %3, 2.000000e+00 -Incoming instr - %4 = fmul float %3, 2.000000e+00 -Incoming arg - %3 = load float, float* %0, align 4 -Incoming instr - %3 = load float, float* %0, align 4 -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 4 iterations, reason: Some(Saturated) -Match! - store float %7, float* %8, align 4 -Incoming instr - store float %7, float* %8, align 4 -Incoming arg - %7 = fmul float %6, 2.000000e+00 -Incoming instr - %7 = fmul float %6, 2.000000e+00 -Incoming arg - %6 = load float, float* %5, align 4 -Incoming instr - %6 = load float, float* %5, align 4 -Incoming arg - %5 = getelementptr inbounds float, float* %0, i64 1 -Incoming instr - %5 = getelementptr inbounds float, float* %0, i64 1 -Incoming arg - %8 = getelementptr inbounds float, float* %1, i64 1 -Incoming instr - %8 = getelementptr inbounds float, float* %1, i64 1 -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 4 iterations, reason: Some(Saturated) -Match! - store float %11, float* %12, align 4 -Incoming instr - store float %11, float* %12, align 4 -Incoming arg - %11 = fmul float %10, 2.000000e+00 -Incoming instr - %11 = fmul float %10, 2.000000e+00 -Incoming arg - %10 = load float, float* %9, align 4 -Incoming instr - %10 = load float, float* %9, align 4 -Incoming arg - %9 = getelementptr inbounds float, float* %0, i64 2 -Incoming instr - %9 = getelementptr inbounds float, float* %0, i64 2 -Incoming arg - %12 = getelementptr inbounds float, float* %1, i64 2 -Incoming instr - %12 = getelementptr inbounds float, float* %1, i64 2 -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 4 iterations, reason: Some(Saturated) -Match! - store float %25, float* %28, align 4 -Incoming instr - store float %25, float* %28, align 4 -Incoming arg - %25 = fadd float %24, %19 -Incoming instr - %25 = fadd float %24, %19 -Incoming arg - %24 = fadd float %23, %15 -Incoming instr - %24 = fadd float %23, %15 -Incoming arg - %23 = fadd float %22, %11 -Incoming instr - %23 = fadd float %22, %11 -Incoming arg - %22 = fadd float %21, %7 -Incoming instr - %22 = fadd float %21, %7 -Incoming arg - %21 = fadd float %4, 0.000000e+00 -Incoming instr - %21 = fadd float %4, 0.000000e+00 -Incoming arg - %4 = fmul float %3, 2.000000e+00 -Incoming instr - %4 = fmul float %3, 2.000000e+00 -Incoming arg - %3 = load float, float* %0, align 4 -Incoming instr - %3 = load float, float* %0, align 4 -Incoming arg - %7 = fmul float %6, 2.000000e+00 -Incoming instr - %7 = fmul float %6, 2.000000e+00 -Incoming arg - %6 = load float, float* %5, align 4 -Incoming instr - %6 = load float, float* %5, align 4 -Incoming arg - %5 = getelementptr inbounds float, float* %0, i64 1 -Incoming instr - %5 = getelementptr inbounds float, float* %0, i64 1 -Incoming arg - %11 = fmul float %10, 2.000000e+00 -Incoming instr - %11 = fmul float %10, 2.000000e+00 -Incoming arg - %10 = load float, float* %9, align 4 -Incoming instr - %10 = load float, float* %9, align 4 -Incoming arg - %9 = getelementptr inbounds float, float* %0, i64 2 -Incoming instr - %9 = getelementptr inbounds float, float* %0, i64 2 -Incoming arg - %15 = fmul float %14, 2.000000e+00 -Incoming instr - %15 = fmul float %14, 2.000000e+00 -Incoming arg - %14 = load float, float* %13, align 4 -Incoming instr - %14 = load float, float* %13, align 4 -Incoming arg - %13 = getelementptr inbounds float, float* %0, i64 3 -Incoming instr - %13 = getelementptr inbounds float, float* %0, i64 3 -Incoming arg - %19 = fmul float %18, 2.000000e+00 -Incoming instr - %19 = fmul float %18, 2.000000e+00 -Incoming arg - %18 = load float, float* %17, align 4 -Incoming instr - %18 = load float, float* %17, align 4 -Incoming arg - %17 = getelementptr inbounds float, float* %0, i64 4 -Incoming instr - %17 = getelementptr inbounds float, float* %0, i64 4 -Incoming arg - %28 = getelementptr inbounds float, float* %2, i64 3 -Incoming instr - %28 = getelementptr inbounds float, float* %2, i64 3 -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 11 iterations, reason: Some(Saturated) diff --git a/src/dios-egraphs/Diospyros/err2.txt b/src/dios-egraphs/Diospyros/err2.txt deleted file mode 100644 index d81c92b7..00000000 --- a/src/dios-egraphs/Diospyros/err2.txt +++ /dev/null @@ -1,3 +0,0 @@ -opt: build/diospyros.ll:192:38: error: expected type - %11 = phi i64 [ 0, %.lr.ph11 ], [ , %32 ] - ^ diff --git a/src/dios-egraphs/Diospyros/inline-float.c b/src/dios-egraphs/Diospyros/inline-float.c deleted file mode 100644 index 2b0245d8..00000000 --- a/src/dios-egraphs/Diospyros/inline-float.c +++ /dev/null @@ -1,78 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#define SIZE 5 -#define DELTA 0.1f - -float test_inline(float A[SIZE], float B[SIZE], int n) - __attribute__((always_inline)); - -float no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { - for (int i = 0; i < n; i++) { - B[i] = 2 * A[i]; - } - float sum = 0.0f; - for (int i = 0; i < n; i++) { - sum += B[i]; - } - return sum; -} - -void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { - float result = no_opt_test_inline(A, B, SIZE); - for (int i = 0; i < SIZE; i++) { - C[i] = result; - } -} - -float test_inline(float A[SIZE], float B[SIZE], int n) { - for (int i = 0; i < n; i++) { - B[i] = 2 * A[i]; - } - float sum = 0.0f; - for (int i = 0; i < n; i++) { - sum += B[i]; - } - return sum; -} - -void test(float A[SIZE], float B[SIZE], float C[SIZE]) { - float result = test_inline(A, B, SIZE); - for (int i = 0; i < SIZE; i++) { - C[i] = result; - } -} - -int main() { - float A[SIZE] = {1.0f}; - float expectedA[SIZE] = {1.0f}; - for (int i = 0; i < SIZE; i++) { - A[i] = 1.0f; - expectedA[i] = 1.0f; - } - float B[SIZE] = {0.0f}; - float expectedB[SIZE] = {0.0f}; - for (int i = 0; i < SIZE; i++) { - B[i] = 0.0f; - expectedB[i] = 0.0f; - } - float C[SIZE] = {0.0f}; - float expectedC[SIZE] = {0.0f}; - for (int i = 0; i < SIZE; i++) { - C[i] = 0.0f; - expectedC[i] = 0.0f; - } - test(A, B, C); - no_opt_test(expectedA, expectedB, expectedC); - for (int i = 0; i < SIZE; i++) { - printf("C Output: %f\n", C[i]); - printf("Expected C Output: %f\n", expectedC[i]); - assert(fabs(expectedC[i] - C[i]) < DELTA); - } - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/inline-float.c.orig b/src/dios-egraphs/Diospyros/inline-float.c.orig deleted file mode 100644 index 2b0245d8..00000000 --- a/src/dios-egraphs/Diospyros/inline-float.c.orig +++ /dev/null @@ -1,78 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#define SIZE 5 -#define DELTA 0.1f - -float test_inline(float A[SIZE], float B[SIZE], int n) - __attribute__((always_inline)); - -float no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { - for (int i = 0; i < n; i++) { - B[i] = 2 * A[i]; - } - float sum = 0.0f; - for (int i = 0; i < n; i++) { - sum += B[i]; - } - return sum; -} - -void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { - float result = no_opt_test_inline(A, B, SIZE); - for (int i = 0; i < SIZE; i++) { - C[i] = result; - } -} - -float test_inline(float A[SIZE], float B[SIZE], int n) { - for (int i = 0; i < n; i++) { - B[i] = 2 * A[i]; - } - float sum = 0.0f; - for (int i = 0; i < n; i++) { - sum += B[i]; - } - return sum; -} - -void test(float A[SIZE], float B[SIZE], float C[SIZE]) { - float result = test_inline(A, B, SIZE); - for (int i = 0; i < SIZE; i++) { - C[i] = result; - } -} - -int main() { - float A[SIZE] = {1.0f}; - float expectedA[SIZE] = {1.0f}; - for (int i = 0; i < SIZE; i++) { - A[i] = 1.0f; - expectedA[i] = 1.0f; - } - float B[SIZE] = {0.0f}; - float expectedB[SIZE] = {0.0f}; - for (int i = 0; i < SIZE; i++) { - B[i] = 0.0f; - expectedB[i] = 0.0f; - } - float C[SIZE] = {0.0f}; - float expectedC[SIZE] = {0.0f}; - for (int i = 0; i < SIZE; i++) { - C[i] = 0.0f; - expectedC[i] = 0.0f; - } - test(A, B, C); - no_opt_test(expectedA, expectedB, expectedC); - for (int i = 0; i < SIZE; i++) { - printf("C Output: %f\n", C[i]); - printf("Expected C Output: %f\n", expectedC[i]); - assert(fabs(expectedC[i] - C[i]) < DELTA); - } - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/runt_FAIL.py b/src/dios-egraphs/Diospyros/runt_FAIL.py deleted file mode 100644 index 15fdaef9..00000000 --- a/src/dios-egraphs/Diospyros/runt_FAIL.py +++ /dev/null @@ -1,22 +0,0 @@ -import click -import subprocess -import sys - -# I'd like for this to be able to detect if there is an "error" in a file -# e.g. an assertion error -# I'd like to grep for an error strings, and then report if the test passed or failed. - - -@click.command() -@click.argument('test_file', - type=click.Path(exists=True), - metavar='') -def run(test_file): - test_path = [f"test={test_file}"] - cmd = subprocess.run(["make", "run-opt"] + test_path, - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - sys.stdout.write(cmd.stdout.decode('utf-8')) - - -if __name__ == "__main__": - run() diff --git a/src/dios-egraphs/Diospyros/test_FAIL.sh b/src/dios-egraphs/Diospyros/test_FAIL.sh deleted file mode 100644 index 39e18e85..00000000 --- a/src/dios-egraphs/Diospyros/test_FAIL.sh +++ /dev/null @@ -1,16 +0,0 @@ -FILE=target/debug/libllvmlib.so - -if ! [ -f $FILE ]; then - FILE=target/debug/libllvmlib.dylib -fi - -if [[ "$OSTYPE" == "darwin"* ]]; then - CLANG=/usr/local/opt/llvm/bin/clang -else - CLANG=clang -fi - -TEST=./llvm-tests/add.c - -$CLANG -emit-llvm -S -Xclang -disable-O0-optnone $TEST \ -| opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce -o build/sample - From 158ae2c9c47ea93eff32c9a9f25d6f95b592afe6 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sun, 1 May 2022 16:05:45 -0400 Subject: [PATCH 081/143] remove flaky test outputs --- src/dios-egraphs/Diospyros/failed-test/aa.ll | 750 --- .../Diospyros/failed-test/clang.ll | 2293 ------- src/dios-egraphs/Diospyros/failed-test/dce.ll | 4711 -------------- .../Diospyros/failed-test/diospyros.ll | 5489 ----------------- src/dios-egraphs/Diospyros/failed-test/final | Bin 13628 -> 0 bytes src/dios-egraphs/Diospyros/failed-test/opt.ll | 750 --- .../Diospyros/flaky-outputs/diff-aa.txt | 0 .../Diospyros/flaky-outputs/diff-clang.txt | 0 .../Diospyros/flaky-outputs/diff-dce.txt | 5416 ---------------- .../flaky-outputs/diff-diospyros.txt | 5450 ---------------- .../Diospyros/flaky-outputs/diff-final.txt | 1 - .../Diospyros/flaky-outputs/diff-opt.txt | 0 .../Diospyros/flaky-outputs/flaky-aa.ll | 828 --- .../Diospyros/flaky-outputs/flaky-clang.ll | 2356 ------- .../Diospyros/flaky-outputs/flaky-dce.ll | 3482 ----------- .../flaky-outputs/flaky-diospyros.ll | 4260 ------------- .../Diospyros/flaky-outputs/flaky-final | Bin 13676 -> 0 bytes .../Diospyros/flaky-outputs/flaky-opt.ll | 828 --- 18 files changed, 36614 deletions(-) delete mode 100644 src/dios-egraphs/Diospyros/failed-test/aa.ll delete mode 100644 src/dios-egraphs/Diospyros/failed-test/clang.ll delete mode 100644 src/dios-egraphs/Diospyros/failed-test/dce.ll delete mode 100644 src/dios-egraphs/Diospyros/failed-test/diospyros.ll delete mode 100755 src/dios-egraphs/Diospyros/failed-test/final delete mode 100644 src/dios-egraphs/Diospyros/failed-test/opt.ll delete mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/diff-aa.txt delete mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/diff-clang.txt delete mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/diff-dce.txt delete mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/diff-diospyros.txt delete mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/diff-final.txt delete mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/diff-opt.txt delete mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/flaky-aa.ll delete mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/flaky-clang.ll delete mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/flaky-dce.ll delete mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/flaky-diospyros.ll delete mode 100755 src/dios-egraphs/Diospyros/flaky-outputs/flaky-final delete mode 100644 src/dios-egraphs/Diospyros/flaky-outputs/flaky-opt.ll diff --git a/src/dios-egraphs/Diospyros/failed-test/aa.ll b/src/dios-egraphs/Diospyros/failed-test/aa.ll deleted file mode 100644 index 5be2aa79..00000000 --- a/src/dios-egraphs/Diospyros/failed-test/aa.ll +++ /dev/null @@ -1,750 +0,0 @@ -; ModuleID = 'build/opt.ll' -source_filename = "fail-tests/qr-decomp-local-arrays.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1 -@.str.1 = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 -@.str.2 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 -@.str.3 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 -@.str.4 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @sgn(float %0) #0 { - %2 = fcmp ogt float %0, 0.000000e+00 - %3 = zext i1 %2 to i32 - %4 = fcmp olt float %0, 0.000000e+00 - %.neg = sext i1 %4 to i32 - %5 = add nsw i32 %.neg, %3 - %6 = sitofp i32 %5 to float - ret float %6 -} - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_sgn(float %0) #1 { - %2 = fcmp ogt float %0, 0.000000e+00 - %3 = zext i1 %2 to i32 - %4 = fcmp olt float %0, 0.000000e+00 - %.neg = sext i1 %4 to i32 - %5 = add nsw i32 %.neg, %3 - %6 = sitofp i32 %5 to float - ret float %6 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @naive_norm(float* %0, i32 %1) #0 { - %3 = icmp sgt i32 %1, 0 - %smax = select i1 %3, i32 %1, i32 0 - %wide.trip.count = zext i32 %smax to i64 - br i1 %3, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %2 - %4 = add nsw i64 %wide.trip.count, -1 - %xtraiter = and i64 %wide.trip.count, 3 - %5 = icmp ult i64 %4, 3 - br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count, 2147483644 - br label %6 - -6: ; preds = %6, %.lr.ph.new - %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] - %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 - %8 = load float, float* %7, align 4 - %9 = fmul float %8, %8 - %10 = fadd float %.013, %9 - %indvars.iv.next = or i64 %indvars.iv2, 1 - %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next - %12 = load float, float* %11, align 4 - %13 = fmul float %12, %12 - %14 = fadd float %10, %13 - %indvars.iv.next.1 = or i64 %indvars.iv2, 2 - %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 - %16 = load float, float* %15, align 4 - %17 = fmul float %16, %16 - %18 = fadd float %14, %17 - %indvars.iv.next.2 = or i64 %indvars.iv2, 3 - %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 - %20 = load float, float* %19, align 4 - %21 = fmul float %20, %20 - %22 = fadd float %18, %21 - %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 - %niter.nsub.3 = add i64 %niter, -4 - %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 - br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 - -._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] - %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] - %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] - %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil - %24 = load float, float* %23, align 4 - %25 = fmul float %24, %24 - %26 = fadd float %.013.epil, %25 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !3 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 - %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] - %27 = call float @llvm.sqrt.f32(float %.01.lcssa) - ret float %27 -} - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32(float) #2 - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_naive_norm(float* %0, i32 %1) #1 { - %3 = icmp sgt i32 %1, 0 - %smax = select i1 %3, i32 %1, i32 0 - %wide.trip.count = zext i32 %smax to i64 - br i1 %3, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %2 - %4 = add nsw i64 %wide.trip.count, -1 - %xtraiter = and i64 %wide.trip.count, 3 - %5 = icmp ult i64 %4, 3 - br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count, 2147483644 - br label %6 - -6: ; preds = %6, %.lr.ph.new - %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] - %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 - %8 = load float, float* %7, align 4 - %9 = fmul float %8, %8 - %10 = fadd float %.013, %9 - %indvars.iv.next = or i64 %indvars.iv2, 1 - %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next - %12 = load float, float* %11, align 4 - %13 = fmul float %12, %12 - %14 = fadd float %10, %13 - %indvars.iv.next.1 = or i64 %indvars.iv2, 2 - %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 - %16 = load float, float* %15, align 4 - %17 = fmul float %16, %16 - %18 = fadd float %14, %17 - %indvars.iv.next.2 = or i64 %indvars.iv2, 3 - %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 - %20 = load float, float* %19, align 4 - %21 = fmul float %20, %20 - %22 = fadd float %18, %21 - %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 - %niter.nsub.3 = add i64 %niter, -4 - %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 - br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 - -._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] - %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] - %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] - %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil - %24 = load float, float* %23, align 4 - %25 = fmul float %24, %24 - %26 = fadd float %.013.epil, %25 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 - %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] - %27 = call float @llvm.sqrt.f32(float %.01.lcssa) - ret float %27 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_transpose(float* %0) #0 { -.lr.ph: - %1 = getelementptr inbounds float, float* %0, i64 1 - %2 = bitcast float* %1 to i32* - %3 = load i32, i32* %2, align 4 - %4 = getelementptr inbounds float, float* %0, i64 2 - %5 = bitcast float* %4 to i32* - %6 = load i32, i32* %5, align 4 - store i32 %6, i32* %2, align 4 - store i32 %3, i32* %5, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_transpose(float* %0) #1 { -.lr.ph: - %1 = getelementptr inbounds float, float* %0, i64 1 - %2 = bitcast float* %1 to i32* - %3 = load i32, i32* %2, align 4 - %4 = getelementptr inbounds float, float* %0, i64 2 - %5 = bitcast float* %4 to i32* - %6 = load i32, i32* %5, align 4 - store i32 %6, i32* %2, align 4 - store i32 %3, i32* %5, align 4 - ret void -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { -.preheader: - store float 0.000000e+00, float* %2, align 4 - %3 = load float, float* %0, align 4 - %4 = load float, float* %1, align 4 - %5 = fmul float %3, %4 - %6 = fadd float %5, 0.000000e+00 - store float %6, float* %2, align 4 - %7 = getelementptr inbounds float, float* %0, i64 1 - %8 = load float, float* %7, align 4 - %9 = getelementptr inbounds float, float* %1, i64 2 - %10 = load float, float* %9, align 4 - %11 = fmul float %8, %10 - %12 = fadd float %6, %11 - store float %12, float* %2, align 4 - %13 = getelementptr inbounds float, float* %2, i64 1 - store float 0.000000e+00, float* %13, align 4 - %14 = load float, float* %0, align 4 - %15 = getelementptr inbounds float, float* %1, i64 1 - %16 = load float, float* %15, align 4 - %17 = fmul float %14, %16 - %18 = fadd float %17, 0.000000e+00 - store float %18, float* %13, align 4 - %19 = load float, float* %7, align 4 - %20 = getelementptr inbounds float, float* %1, i64 3 - %21 = load float, float* %20, align 4 - %22 = fmul float %19, %21 - %23 = fadd float %18, %22 - store float %23, float* %13, align 4 - %24 = getelementptr inbounds float, float* %0, i64 2 - %25 = getelementptr inbounds float, float* %2, i64 2 - store float 0.000000e+00, float* %25, align 4 - %26 = load float, float* %24, align 4 - %27 = load float, float* %1, align 4 - %28 = fmul float %26, %27 - %29 = fadd float %28, 0.000000e+00 - store float %29, float* %25, align 4 - %30 = getelementptr inbounds float, float* %0, i64 3 - %31 = load float, float* %30, align 4 - %32 = load float, float* %9, align 4 - %33 = fmul float %31, %32 - %34 = fadd float %29, %33 - store float %34, float* %25, align 4 - %35 = getelementptr inbounds float, float* %2, i64 3 - store float 0.000000e+00, float* %35, align 4 - %36 = load float, float* %24, align 4 - %37 = load float, float* %15, align 4 - %38 = fmul float %36, %37 - %39 = fadd float %38, 0.000000e+00 - store float %39, float* %35, align 4 - %40 = load float, float* %30, align 4 - %41 = load float, float* %20, align 4 - %42 = fmul float %40, %41 - %43 = fadd float %39, %42 - store float %43, float* %35, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { -.preheader: - store float 0.000000e+00, float* %2, align 4 - %3 = load float, float* %0, align 4 - %4 = load float, float* %1, align 4 - %5 = fmul float %3, %4 - %6 = fadd float %5, 0.000000e+00 - store float %6, float* %2, align 4 - %7 = getelementptr inbounds float, float* %0, i64 1 - %8 = load float, float* %7, align 4 - %9 = getelementptr inbounds float, float* %1, i64 2 - %10 = load float, float* %9, align 4 - %11 = fmul float %8, %10 - %12 = fadd float %6, %11 - store float %12, float* %2, align 4 - %13 = getelementptr inbounds float, float* %2, i64 1 - store float 0.000000e+00, float* %13, align 4 - %14 = load float, float* %0, align 4 - %15 = getelementptr inbounds float, float* %1, i64 1 - %16 = load float, float* %15, align 4 - %17 = fmul float %14, %16 - %18 = fadd float %17, 0.000000e+00 - store float %18, float* %13, align 4 - %19 = load float, float* %7, align 4 - %20 = getelementptr inbounds float, float* %1, i64 3 - %21 = load float, float* %20, align 4 - %22 = fmul float %19, %21 - %23 = fadd float %18, %22 - store float %23, float* %13, align 4 - %24 = getelementptr inbounds float, float* %0, i64 2 - %25 = getelementptr inbounds float, float* %2, i64 2 - store float 0.000000e+00, float* %25, align 4 - %26 = load float, float* %24, align 4 - %27 = load float, float* %1, align 4 - %28 = fmul float %26, %27 - %29 = fadd float %28, 0.000000e+00 - store float %29, float* %25, align 4 - %30 = getelementptr inbounds float, float* %0, i64 3 - %31 = load float, float* %30, align 4 - %32 = load float, float* %9, align 4 - %33 = fmul float %31, %32 - %34 = fadd float %29, %33 - store float %34, float* %25, align 4 - %35 = getelementptr inbounds float, float* %2, i64 3 - store float 0.000000e+00, float* %35, align 4 - %36 = load float, float* %24, align 4 - %37 = load float, float* %15, align 4 - %38 = fmul float %36, %37 - %39 = fadd float %38, 0.000000e+00 - store float %39, float* %35, align 4 - %40 = load float, float* %30, align 4 - %41 = load float, float* %20, align 4 - %42 = fmul float %40, %41 - %43 = fadd float %39, %42 - store float %43, float* %35, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { -.preheader49: - %3 = bitcast float* %1 to i8* - %4 = alloca [4 x float], align 16 - %5 = bitcast [4 x float]* %4 to i8* - %6 = bitcast float* %0 to i32* - %7 = load i32, i32* %6, align 4 - %8 = bitcast float* %2 to i32* - store i32 %7, i32* %8, align 4 - %9 = getelementptr inbounds float, float* %0, i64 1 - %10 = bitcast float* %9 to i32* - %11 = load i32, i32* %10, align 4 - %12 = getelementptr inbounds float, float* %2, i64 1 - %13 = bitcast float* %12 to i32* - store i32 %11, i32* %13, align 4 - %14 = getelementptr inbounds float, float* %0, i64 2 - %15 = bitcast float* %14 to i32* - %16 = load i32, i32* %15, align 4 - %17 = getelementptr inbounds float, float* %2, i64 2 - %18 = bitcast float* %17 to i32* - store i32 %16, i32* %18, align 4 - %19 = getelementptr inbounds float, float* %0, i64 3 - %20 = bitcast float* %19 to i32* - %21 = load i32, i32* %20, align 4 - %22 = getelementptr inbounds float, float* %2, i64 3 - %23 = bitcast float* %22 to i32* - store i32 %21, i32* %23, align 4 - %24 = bitcast i32 %7 to float - %25 = fcmp ogt float %24, 0.000000e+00 - %26 = zext i1 %25 to i32 - %27 = fcmp olt float %24, 0.000000e+00 - %.neg = sext i1 %27 to i32 - %28 = add nsw i32 %.neg, %26 - %29 = sitofp i32 %28 to float - %30 = fmul float %24, %24 - %31 = fadd float %30, 0.000000e+00 - %32 = bitcast i32 %16 to float - %33 = fmul float %32, %32 - %34 = fadd float %31, %33 - %35 = call float @llvm.sqrt.f32(float %34) #8 - %36 = fneg float %29 - %37 = fmul float %35, %36 - %38 = fadd float %24, %37 - %39 = fmul float %37, 0.000000e+00 - %40 = fadd float %32, %39 - %41 = fmul float %38, %38 - %42 = fadd float %41, 0.000000e+00 - %43 = fmul float %40, %40 - %44 = fadd float %42, %43 - %45 = call float @llvm.sqrt.f32(float %44) #8 - %46 = fadd float %45, 0x3EE4F8B580000000 - %47 = fdiv float %38, %46 - %48 = fdiv float %40, %46 - %49 = fmul float %47, 2.000000e+00 - %50 = fmul float %49, %47 - %51 = fsub float 1.000000e+00, %50 - %52 = fmul float %49, %48 - %53 = fsub float 0.000000e+00, %52 - %54 = fmul float %48, 2.000000e+00 - %55 = fmul float %54, %47 - %56 = fsub float 0.000000e+00, %55 - %57 = fmul float %54, %48 - %58 = fsub float 1.000000e+00, %57 - %59 = bitcast float %51 to i32 - %60 = bitcast [4 x float]* %4 to i32* - store i32 %59, i32* %60, align 16 - %61 = bitcast float %53 to i32 - %62 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 - %63 = bitcast float* %62 to i32* - store i32 %61, i32* %63, align 4 - %64 = bitcast float %56 to i32 - %65 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 - %66 = bitcast float* %65 to i32* - store i32 %64, i32* %66, align 8 - %67 = bitcast float %58 to i32 - %68 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 - %69 = bitcast float* %68 to i32* - store i32 %67, i32* %69, align 4 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %3, i8* nonnull align 16 dereferenceable(16) %5, i64 16, i1 false) - store float 0.000000e+00, float* %2, align 4 - %70 = load float, float* %0, align 4 - %71 = fmul float %51, %70 - %72 = fadd float %71, 0.000000e+00 - store float %72, float* %2, align 4 - %73 = load float, float* %14, align 4 - %74 = fmul float %53, %73 - %75 = fadd float %72, %74 - store float %75, float* %2, align 4 - store float 0.000000e+00, float* %12, align 4 - %76 = load float, float* %9, align 4 - %77 = fmul float %51, %76 - %78 = fadd float %77, 0.000000e+00 - store float %78, float* %12, align 4 - %79 = load float, float* %19, align 4 - %80 = fmul float %53, %79 - %81 = fadd float %78, %80 - store float %81, float* %12, align 4 - store float 0.000000e+00, float* %17, align 4 - %82 = load float, float* %0, align 4 - %83 = fmul float %56, %82 - %84 = fadd float %83, 0.000000e+00 - store float %84, float* %17, align 4 - %85 = load float, float* %14, align 4 - %86 = fmul float %58, %85 - %87 = fadd float %84, %86 - store float %87, float* %17, align 4 - store float 0.000000e+00, float* %22, align 4 - %88 = load float, float* %9, align 4 - %89 = fmul float %56, %88 - %90 = fadd float %89, 0.000000e+00 - store float %90, float* %22, align 4 - %91 = load float, float* %19, align 4 - %92 = fmul float %58, %91 - %93 = fadd float %90, %92 - store float %93, float* %22, align 4 - %94 = getelementptr inbounds float, float* %1, i64 1 - %95 = bitcast float* %94 to i32* - %96 = load i32, i32* %95, align 4 - %97 = getelementptr inbounds float, float* %1, i64 2 - %98 = bitcast float* %97 to i32* - %99 = load i32, i32* %98, align 4 - store i32 %99, i32* %95, align 4 - store i32 %96, i32* %98, align 4 - ret void -} - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { -.preheader13: - %3 = bitcast float* %2 to i8* - %4 = bitcast float* %0 to i8* - %5 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false, i1 true, i1 false) - %6 = call i8* @__memcpy_chk(i8* %3, i8* %4, i64 16, i64 %5) #8 - %7 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 - %8 = bitcast i8* %7 to float* - store float 1.000000e+00, float* %8, align 4 - %9 = getelementptr inbounds i8, i8* %7, i64 8 - %10 = getelementptr inbounds i8, i8* %7, i64 12 - %11 = bitcast i8* %10 to float* - store float 1.000000e+00, float* %11, align 4 - %12 = bitcast float* %1 to i8* - %13 = call i64 @llvm.objectsize.i64.p0i8(i8* %12, i1 false, i1 true, i1 false) - %14 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %15 = bitcast i8* %14 to float* - %16 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %17 = bitcast i8* %16 to float* - %18 = bitcast float* %2 to i32* - %19 = load i32, i32* %18, align 4 - %20 = bitcast i8* %14 to i32* - store i32 %19, i32* %20, align 4 - %21 = bitcast i8* %7 to i32* - %22 = load i32, i32* %21, align 4 - %23 = bitcast i8* %16 to i32* - store i32 %22, i32* %23, align 4 - %24 = getelementptr inbounds float, float* %2, i64 2 - %25 = bitcast float* %24 to i32* - %26 = load i32, i32* %25, align 4 - %27 = getelementptr inbounds i8, i8* %14, i64 4 - %28 = bitcast i8* %27 to i32* - store i32 %26, i32* %28, align 4 - %29 = bitcast i8* %9 to i32* - %30 = load i32, i32* %29, align 4 - %31 = getelementptr inbounds i8, i8* %16, i64 4 - %32 = bitcast i8* %31 to i32* - store i32 %30, i32* %32, align 4 - %33 = load float, float* %15, align 4 - %34 = call float @no_opt_sgn(float %33) - %35 = fneg float %34 - %36 = call float @no_opt_naive_norm(float* nonnull %15, i32 2) - %37 = fmul float %36, %35 - %38 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %39 = bitcast i8* %38 to float* - %40 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %41 = load float, float* %15, align 4 - %42 = load float, float* %17, align 4 - %43 = fmul float %37, %42 - %44 = fadd float %41, %43 - store float %44, float* %39, align 4 - %45 = bitcast i8* %27 to float* - %46 = load float, float* %45, align 4 - %47 = bitcast i8* %31 to float* - %48 = load float, float* %47, align 4 - %49 = fmul float %37, %48 - %50 = fadd float %46, %49 - %51 = getelementptr inbounds i8, i8* %38, i64 4 - %52 = bitcast i8* %51 to float* - store float %50, float* %52, align 4 - %53 = bitcast i8* %40 to float* - %54 = call float @no_opt_naive_norm(float* nonnull %39, i32 2) - %55 = fadd float %54, 0x3EE4F8B580000000 - %56 = load float, float* %39, align 4 - %57 = fdiv float %56, %55 - store float %57, float* %53, align 4 - %58 = load float, float* %52, align 4 - %59 = fdiv float %58, %55 - %60 = getelementptr inbounds i8, i8* %40, i64 4 - %61 = bitcast i8* %60 to float* - store float %59, float* %61, align 4 - %62 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 - %63 = bitcast i8* %62 to float* - %64 = load float, float* %53, align 4 - %65 = fmul float %64, 2.000000e+00 - %66 = fmul float %65, %64 - %67 = fsub float 1.000000e+00, %66 - store float %67, float* %63, align 4 - %68 = load float, float* %53, align 4 - %69 = fmul float %68, 2.000000e+00 - %70 = load float, float* %61, align 4 - %71 = fmul float %69, %70 - %72 = fsub float 0.000000e+00, %71 - %73 = getelementptr inbounds i8, i8* %62, i64 4 - %74 = bitcast i8* %73 to float* - store float %72, float* %74, align 4 - %75 = load float, float* %61, align 4 - %76 = fmul float %75, 2.000000e+00 - %77 = load float, float* %53, align 4 - %78 = fmul float %76, %77 - %79 = fsub float 0.000000e+00, %78 - %80 = getelementptr inbounds i8, i8* %62, i64 8 - %81 = bitcast i8* %80 to float* - store float %79, float* %81, align 4 - %82 = load float, float* %61, align 4 - %83 = fmul float %82, 2.000000e+00 - %84 = fmul float %83, %82 - %85 = fsub float 1.000000e+00, %84 - %86 = getelementptr inbounds i8, i8* %62, i64 12 - %87 = bitcast i8* %86 to float* - store float %85, float* %87, align 4 - %88 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 - %89 = bitcast i8* %88 to float* - %90 = bitcast i8* %62 to i32* - %91 = load i32, i32* %90, align 4 - %92 = bitcast i8* %88 to i32* - store i32 %91, i32* %92, align 4 - %93 = bitcast i8* %73 to i32* - %94 = load i32, i32* %93, align 4 - %95 = getelementptr inbounds i8, i8* %88, i64 4 - %96 = bitcast i8* %95 to i32* - store i32 %94, i32* %96, align 4 - %97 = bitcast i8* %80 to i32* - %98 = load i32, i32* %97, align 4 - %99 = getelementptr inbounds i8, i8* %88, i64 8 - %100 = bitcast i8* %99 to i32* - store i32 %98, i32* %100, align 4 - %101 = bitcast i8* %86 to i32* - %102 = load i32, i32* %101, align 4 - %103 = getelementptr inbounds i8, i8* %88, i64 12 - %104 = bitcast i8* %103 to i32* - store i32 %102, i32* %104, align 4 - %105 = call i8* @__memcpy_chk(i8* %12, i8* %88, i64 16, i64 %13) #8 - call void @no_opt_naive_fixed_matrix_multiply(float* %89, float* %0, float* %2) - call void @free(i8* %14) - call void @free(i8* %16) - call void @free(i8* %38) - call void @free(i8* %40) - call void @free(i8* %62) - call void @free(i8* %88) - call void @no_opt_naive_fixed_transpose(float* %1) - ret void -} - -; Function Attrs: nounwind -declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 - -; Function Attrs: nounwind readnone speculatable willreturn -declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 - -; Function Attrs: allocsize(0,1) -declare i8* @calloc(i64, i64) #5 - -declare void @free(i8*) #6 - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #1 { -.preheader6: - %0 = alloca i64, align 8 - %1 = alloca [4 x float], align 16 - %2 = alloca [4 x float], align 16 - %3 = alloca [4 x float], align 16 - %4 = alloca [4 x float], align 16 - %5 = alloca [4 x float], align 16 - %6 = call i64 @time(i64* null) #8 - store i64 %6, i64* %0, align 8 - %7 = call i64 @time(i64* nonnull %0) #8 - %8 = trunc i64 %7 to i32 - call void @srand(i32 %8) #8 - %9 = call i32 @rand() #8 - %10 = sitofp i32 %9 to float - %11 = fdiv float %10, 0x41747AE140000000 - %12 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 - store float %11, float* %12, align 16 - %13 = fpext float %11 to double - %14 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %13) #8 - %15 = call i32 @rand() #8 - %16 = sitofp i32 %15 to float - %17 = fdiv float %16, 0x41747AE140000000 - %18 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1 - store float %17, float* %18, align 4 - %19 = fpext float %17 to double - %20 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %19) #8 - %21 = call i32 @rand() #8 - %22 = sitofp i32 %21 to float - %23 = fdiv float %22, 0x41747AE140000000 - %24 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2 - store float %23, float* %24, align 8 - %25 = fpext float %23 to double - %26 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %25) #8 - %27 = call i32 @rand() #8 - %28 = sitofp i32 %27 to float - %29 = fdiv float %28, 0x41747AE140000000 - %30 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3 - store float %29, float* %30, align 4 - %31 = fpext float %29 to double - %32 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %31) #8 - %33 = bitcast [4 x float]* %2 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %33, i8 0, i64 16, i1 false) - %34 = bitcast [4 x float]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %34, i8 0, i64 16, i1 false) - %35 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %36 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - call void @naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %35, float* nonnull %36) - %37 = bitcast [4 x float]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %37, i8 0, i64 16, i1 false) - %38 = bitcast [4 x float]* %5 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %38, i8 0, i64 16, i1 false) - %39 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 - %40 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 - call void @no_opt_naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %39, float* nonnull %40) - %41 = load float, float* %35, align 16 - %42 = fpext float %41 to double - %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %42) #8 - %44 = load float, float* %39, align 16 - %45 = fpext float %44 to double - %46 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %45) #8 - %47 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1 - %48 = load float, float* %47, align 4 - %49 = fpext float %48 to double - %50 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %49) #8 - %51 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 - %52 = load float, float* %51, align 4 - %53 = fpext float %52 to double - %54 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %53) #8 - %55 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2 - %56 = load float, float* %55, align 8 - %57 = fpext float %56 to double - %58 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %57) #8 - %59 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 - %60 = load float, float* %59, align 8 - %61 = fpext float %60 to double - %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %61) #8 - %63 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3 - %64 = load float, float* %63, align 4 - %65 = fpext float %64 to double - %66 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %65) #8 - %67 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 - %68 = load float, float* %67, align 4 - %69 = fpext float %68 to double - %70 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %69) #8 - %71 = load float, float* %36, align 16 - %72 = fpext float %71 to double - %73 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %72) #8 - %74 = load float, float* %40, align 16 - %75 = fpext float %74 to double - %76 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %75) #8 - %77 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1 - %78 = load float, float* %77, align 4 - %79 = fpext float %78 to double - %80 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %79) #8 - %81 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 1 - %82 = load float, float* %81, align 4 - %83 = fpext float %82 to double - %84 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %83) #8 - %85 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2 - %86 = load float, float* %85, align 8 - %87 = fpext float %86 to double - %88 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %87) #8 - %89 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 2 - %90 = load float, float* %89, align 8 - %91 = fpext float %90 to double - %92 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %91) #8 - %93 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3 - %94 = load float, float* %93, align 4 - %95 = fpext float %94 to double - %96 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %95) #8 - %97 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 3 - %98 = load float, float* %97, align 4 - %99 = fpext float %98 to double - %100 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %99) #8 - ret i32 0 -} - -declare i64 @time(i64*) #6 - -declare void @srand(i32) #6 - -declare i32 @rand() #6 - -declare i32 @printf(i8*, ...) #6 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #7 - -attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone speculatable willreturn } -attributes #3 = { argmemonly nounwind willreturn writeonly } -attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { argmemonly nounwind willreturn } -attributes #8 = { nounwind } -attributes #9 = { nounwind allocsize(0,1) } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} -!3 = distinct !{!3, !4} -!4 = !{!"llvm.loop.unroll.disable"} -!5 = distinct !{!5, !4} diff --git a/src/dios-egraphs/Diospyros/failed-test/clang.ll b/src/dios-egraphs/Diospyros/failed-test/clang.ll deleted file mode 100644 index 35018816..00000000 --- a/src/dios-egraphs/Diospyros/failed-test/clang.ll +++ /dev/null @@ -1,2293 +0,0 @@ -; ModuleID = 'fail-tests/qr-decomp-local-arrays.c' -source_filename = "fail-tests/qr-decomp-local-arrays.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1 -@.str.1 = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 -@.str.2 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 -@.str.3 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 -@.str.4 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @sgn(float %0) #0 { - %2 = alloca float, align 4 - store float %0, float* %2, align 4 - %3 = load float, float* %2, align 4 - %4 = fcmp ogt float %3, 0.000000e+00 - %5 = zext i1 %4 to i32 - %6 = load float, float* %2, align 4 - %7 = fcmp olt float %6, 0.000000e+00 - %8 = zext i1 %7 to i32 - %9 = sub nsw i32 %5, %8 - %10 = sitofp i32 %9 to float - ret float %10 -} - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_sgn(float %0) #1 { - %2 = alloca float, align 4 - store float %0, float* %2, align 4 - %3 = load float, float* %2, align 4 - %4 = fcmp ogt float %3, 0.000000e+00 - %5 = zext i1 %4 to i32 - %6 = load float, float* %2, align 4 - %7 = fcmp olt float %6, 0.000000e+00 - %8 = zext i1 %7 to i32 - %9 = sub nsw i32 %5, %8 - %10 = sitofp i32 %9 to float - ret float %10 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @naive_norm(float* %0, i32 %1) #0 { - %3 = alloca float*, align 8 - %4 = alloca i32, align 4 - %5 = alloca float, align 4 - %6 = alloca i32, align 4 - store float* %0, float** %3, align 8 - store i32 %1, i32* %4, align 4 - store float 0.000000e+00, float* %5, align 4 - store i32 0, i32* %6, align 4 - br label %7 - -7: ; preds = %25, %2 - %8 = load i32, i32* %6, align 4 - %9 = load i32, i32* %4, align 4 - %10 = icmp slt i32 %8, %9 - br i1 %10, label %11, label %28 - -11: ; preds = %7 - %12 = load float*, float** %3, align 8 - %13 = load i32, i32* %6, align 4 - %14 = sext i32 %13 to i64 - %15 = getelementptr inbounds float, float* %12, i64 %14 - %16 = load float, float* %15, align 4 - %17 = load float*, float** %3, align 8 - %18 = load i32, i32* %6, align 4 - %19 = sext i32 %18 to i64 - %20 = getelementptr inbounds float, float* %17, i64 %19 - %21 = load float, float* %20, align 4 - %22 = fmul float %16, %21 - %23 = load float, float* %5, align 4 - %24 = fadd float %23, %22 - store float %24, float* %5, align 4 - br label %25 - -25: ; preds = %11 - %26 = load i32, i32* %6, align 4 - %27 = add nsw i32 %26, 1 - store i32 %27, i32* %6, align 4 - br label %7 - -28: ; preds = %7 - %29 = load float, float* %5, align 4 - %30 = call float @llvm.sqrt.f32(float %29) - ret float %30 -} - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32(float) #2 - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_naive_norm(float* %0, i32 %1) #1 { - %3 = alloca float*, align 8 - %4 = alloca i32, align 4 - %5 = alloca float, align 4 - %6 = alloca i32, align 4 - store float* %0, float** %3, align 8 - store i32 %1, i32* %4, align 4 - store float 0.000000e+00, float* %5, align 4 - store i32 0, i32* %6, align 4 - br label %7 - -7: ; preds = %25, %2 - %8 = load i32, i32* %6, align 4 - %9 = load i32, i32* %4, align 4 - %10 = icmp slt i32 %8, %9 - br i1 %10, label %11, label %28 - -11: ; preds = %7 - %12 = load float*, float** %3, align 8 - %13 = load i32, i32* %6, align 4 - %14 = sext i32 %13 to i64 - %15 = getelementptr inbounds float, float* %12, i64 %14 - %16 = load float, float* %15, align 4 - %17 = load float*, float** %3, align 8 - %18 = load i32, i32* %6, align 4 - %19 = sext i32 %18 to i64 - %20 = getelementptr inbounds float, float* %17, i64 %19 - %21 = load float, float* %20, align 4 - %22 = fmul float %16, %21 - %23 = load float, float* %5, align 4 - %24 = fadd float %23, %22 - store float %24, float* %5, align 4 - br label %25 - -25: ; preds = %11 - %26 = load i32, i32* %6, align 4 - %27 = add nsw i32 %26, 1 - store i32 %27, i32* %6, align 4 - br label %7 - -28: ; preds = %7 - %29 = load float, float* %5, align 4 - %30 = call float @llvm.sqrt.f32(float %29) - ret float %30 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_transpose(float* %0) #0 { - %2 = alloca float*, align 8 - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca float, align 4 - store float* %0, float** %2, align 8 - store i32 0, i32* %3, align 4 - br label %6 - -6: ; preds = %51, %1 - %7 = load i32, i32* %3, align 4 - %8 = icmp slt i32 %7, 2 - br i1 %8, label %9, label %54 - -9: ; preds = %6 - %10 = load i32, i32* %3, align 4 - %11 = add nsw i32 %10, 1 - store i32 %11, i32* %4, align 4 - br label %12 - -12: ; preds = %47, %9 - %13 = load i32, i32* %4, align 4 - %14 = icmp slt i32 %13, 2 - br i1 %14, label %15, label %50 - -15: ; preds = %12 - %16 = load float*, float** %2, align 8 - %17 = load i32, i32* %3, align 4 - %18 = mul nsw i32 %17, 2 - %19 = load i32, i32* %4, align 4 - %20 = add nsw i32 %18, %19 - %21 = sext i32 %20 to i64 - %22 = getelementptr inbounds float, float* %16, i64 %21 - %23 = load float, float* %22, align 4 - store float %23, float* %5, align 4 - %24 = load float*, float** %2, align 8 - %25 = load i32, i32* %4, align 4 - %26 = mul nsw i32 %25, 2 - %27 = load i32, i32* %3, align 4 - %28 = add nsw i32 %26, %27 - %29 = sext i32 %28 to i64 - %30 = getelementptr inbounds float, float* %24, i64 %29 - %31 = load float, float* %30, align 4 - %32 = load float*, float** %2, align 8 - %33 = load i32, i32* %3, align 4 - %34 = mul nsw i32 %33, 2 - %35 = load i32, i32* %4, align 4 - %36 = add nsw i32 %34, %35 - %37 = sext i32 %36 to i64 - %38 = getelementptr inbounds float, float* %32, i64 %37 - store float %31, float* %38, align 4 - %39 = load float, float* %5, align 4 - %40 = load float*, float** %2, align 8 - %41 = load i32, i32* %4, align 4 - %42 = mul nsw i32 %41, 2 - %43 = load i32, i32* %3, align 4 - %44 = add nsw i32 %42, %43 - %45 = sext i32 %44 to i64 - %46 = getelementptr inbounds float, float* %40, i64 %45 - store float %39, float* %46, align 4 - br label %47 - -47: ; preds = %15 - %48 = load i32, i32* %4, align 4 - %49 = add nsw i32 %48, 1 - store i32 %49, i32* %4, align 4 - br label %12 - -50: ; preds = %12 - br label %51 - -51: ; preds = %50 - %52 = load i32, i32* %3, align 4 - %53 = add nsw i32 %52, 1 - store i32 %53, i32* %3, align 4 - br label %6 - -54: ; preds = %6 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_transpose(float* %0) #1 { - %2 = alloca float*, align 8 - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca float, align 4 - store float* %0, float** %2, align 8 - store i32 0, i32* %3, align 4 - br label %6 - -6: ; preds = %51, %1 - %7 = load i32, i32* %3, align 4 - %8 = icmp slt i32 %7, 2 - br i1 %8, label %9, label %54 - -9: ; preds = %6 - %10 = load i32, i32* %3, align 4 - %11 = add nsw i32 %10, 1 - store i32 %11, i32* %4, align 4 - br label %12 - -12: ; preds = %47, %9 - %13 = load i32, i32* %4, align 4 - %14 = icmp slt i32 %13, 2 - br i1 %14, label %15, label %50 - -15: ; preds = %12 - %16 = load float*, float** %2, align 8 - %17 = load i32, i32* %3, align 4 - %18 = mul nsw i32 %17, 2 - %19 = load i32, i32* %4, align 4 - %20 = add nsw i32 %18, %19 - %21 = sext i32 %20 to i64 - %22 = getelementptr inbounds float, float* %16, i64 %21 - %23 = load float, float* %22, align 4 - store float %23, float* %5, align 4 - %24 = load float*, float** %2, align 8 - %25 = load i32, i32* %4, align 4 - %26 = mul nsw i32 %25, 2 - %27 = load i32, i32* %3, align 4 - %28 = add nsw i32 %26, %27 - %29 = sext i32 %28 to i64 - %30 = getelementptr inbounds float, float* %24, i64 %29 - %31 = load float, float* %30, align 4 - %32 = load float*, float** %2, align 8 - %33 = load i32, i32* %3, align 4 - %34 = mul nsw i32 %33, 2 - %35 = load i32, i32* %4, align 4 - %36 = add nsw i32 %34, %35 - %37 = sext i32 %36 to i64 - %38 = getelementptr inbounds float, float* %32, i64 %37 - store float %31, float* %38, align 4 - %39 = load float, float* %5, align 4 - %40 = load float*, float** %2, align 8 - %41 = load i32, i32* %4, align 4 - %42 = mul nsw i32 %41, 2 - %43 = load i32, i32* %3, align 4 - %44 = add nsw i32 %42, %43 - %45 = sext i32 %44 to i64 - %46 = getelementptr inbounds float, float* %40, i64 %45 - store float %39, float* %46, align 4 - br label %47 - -47: ; preds = %15 - %48 = load i32, i32* %4, align 4 - %49 = add nsw i32 %48, 1 - store i32 %49, i32* %4, align 4 - br label %12 - -50: ; preds = %12 - br label %51 - -51: ; preds = %50 - %52 = load i32, i32* %3, align 4 - %53 = add nsw i32 %52, 1 - store i32 %53, i32* %3, align 4 - br label %6 - -54: ; preds = %6 - ret void -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { - %4 = alloca float*, align 8 - %5 = alloca float*, align 8 - %6 = alloca float*, align 8 - %7 = alloca i32, align 4 - %8 = alloca i32, align 4 - %9 = alloca i32, align 4 - store float* %0, float** %4, align 8 - store float* %1, float** %5, align 8 - store float* %2, float** %6, align 8 - store i32 0, i32* %7, align 4 - br label %10 - -10: ; preds = %63, %3 - %11 = load i32, i32* %7, align 4 - %12 = icmp slt i32 %11, 2 - br i1 %12, label %13, label %66 - -13: ; preds = %10 - store i32 0, i32* %8, align 4 - br label %14 - -14: ; preds = %59, %13 - %15 = load i32, i32* %8, align 4 - %16 = icmp slt i32 %15, 2 - br i1 %16, label %17, label %62 - -17: ; preds = %14 - %18 = load float*, float** %6, align 8 - %19 = load i32, i32* %7, align 4 - %20 = mul nsw i32 2, %19 - %21 = load i32, i32* %8, align 4 - %22 = add nsw i32 %20, %21 - %23 = sext i32 %22 to i64 - %24 = getelementptr inbounds float, float* %18, i64 %23 - store float 0.000000e+00, float* %24, align 4 - store i32 0, i32* %9, align 4 - br label %25 - -25: ; preds = %55, %17 - %26 = load i32, i32* %9, align 4 - %27 = icmp slt i32 %26, 2 - br i1 %27, label %28, label %58 - -28: ; preds = %25 - %29 = load float*, float** %4, align 8 - %30 = load i32, i32* %7, align 4 - %31 = mul nsw i32 2, %30 - %32 = load i32, i32* %9, align 4 - %33 = add nsw i32 %31, %32 - %34 = sext i32 %33 to i64 - %35 = getelementptr inbounds float, float* %29, i64 %34 - %36 = load float, float* %35, align 4 - %37 = load float*, float** %5, align 8 - %38 = load i32, i32* %9, align 4 - %39 = mul nsw i32 2, %38 - %40 = load i32, i32* %8, align 4 - %41 = add nsw i32 %39, %40 - %42 = sext i32 %41 to i64 - %43 = getelementptr inbounds float, float* %37, i64 %42 - %44 = load float, float* %43, align 4 - %45 = fmul float %36, %44 - %46 = load float*, float** %6, align 8 - %47 = load i32, i32* %7, align 4 - %48 = mul nsw i32 2, %47 - %49 = load i32, i32* %8, align 4 - %50 = add nsw i32 %48, %49 - %51 = sext i32 %50 to i64 - %52 = getelementptr inbounds float, float* %46, i64 %51 - %53 = load float, float* %52, align 4 - %54 = fadd float %53, %45 - store float %54, float* %52, align 4 - br label %55 - -55: ; preds = %28 - %56 = load i32, i32* %9, align 4 - %57 = add nsw i32 %56, 1 - store i32 %57, i32* %9, align 4 - br label %25 - -58: ; preds = %25 - br label %59 - -59: ; preds = %58 - %60 = load i32, i32* %8, align 4 - %61 = add nsw i32 %60, 1 - store i32 %61, i32* %8, align 4 - br label %14 - -62: ; preds = %14 - br label %63 - -63: ; preds = %62 - %64 = load i32, i32* %7, align 4 - %65 = add nsw i32 %64, 1 - store i32 %65, i32* %7, align 4 - br label %10 - -66: ; preds = %10 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { - %4 = alloca float*, align 8 - %5 = alloca float*, align 8 - %6 = alloca float*, align 8 - %7 = alloca i32, align 4 - %8 = alloca i32, align 4 - %9 = alloca i32, align 4 - store float* %0, float** %4, align 8 - store float* %1, float** %5, align 8 - store float* %2, float** %6, align 8 - store i32 0, i32* %7, align 4 - br label %10 - -10: ; preds = %63, %3 - %11 = load i32, i32* %7, align 4 - %12 = icmp slt i32 %11, 2 - br i1 %12, label %13, label %66 - -13: ; preds = %10 - store i32 0, i32* %8, align 4 - br label %14 - -14: ; preds = %59, %13 - %15 = load i32, i32* %8, align 4 - %16 = icmp slt i32 %15, 2 - br i1 %16, label %17, label %62 - -17: ; preds = %14 - %18 = load float*, float** %6, align 8 - %19 = load i32, i32* %7, align 4 - %20 = mul nsw i32 2, %19 - %21 = load i32, i32* %8, align 4 - %22 = add nsw i32 %20, %21 - %23 = sext i32 %22 to i64 - %24 = getelementptr inbounds float, float* %18, i64 %23 - store float 0.000000e+00, float* %24, align 4 - store i32 0, i32* %9, align 4 - br label %25 - -25: ; preds = %55, %17 - %26 = load i32, i32* %9, align 4 - %27 = icmp slt i32 %26, 2 - br i1 %27, label %28, label %58 - -28: ; preds = %25 - %29 = load float*, float** %4, align 8 - %30 = load i32, i32* %7, align 4 - %31 = mul nsw i32 2, %30 - %32 = load i32, i32* %9, align 4 - %33 = add nsw i32 %31, %32 - %34 = sext i32 %33 to i64 - %35 = getelementptr inbounds float, float* %29, i64 %34 - %36 = load float, float* %35, align 4 - %37 = load float*, float** %5, align 8 - %38 = load i32, i32* %9, align 4 - %39 = mul nsw i32 2, %38 - %40 = load i32, i32* %8, align 4 - %41 = add nsw i32 %39, %40 - %42 = sext i32 %41 to i64 - %43 = getelementptr inbounds float, float* %37, i64 %42 - %44 = load float, float* %43, align 4 - %45 = fmul float %36, %44 - %46 = load float*, float** %6, align 8 - %47 = load i32, i32* %7, align 4 - %48 = mul nsw i32 2, %47 - %49 = load i32, i32* %8, align 4 - %50 = add nsw i32 %48, %49 - %51 = sext i32 %50 to i64 - %52 = getelementptr inbounds float, float* %46, i64 %51 - %53 = load float, float* %52, align 4 - %54 = fadd float %53, %45 - store float %54, float* %52, align 4 - br label %55 - -55: ; preds = %28 - %56 = load i32, i32* %9, align 4 - %57 = add nsw i32 %56, 1 - store i32 %57, i32* %9, align 4 - br label %25 - -58: ; preds = %25 - br label %59 - -59: ; preds = %58 - %60 = load i32, i32* %8, align 4 - %61 = add nsw i32 %60, 1 - store i32 %61, i32* %8, align 4 - br label %14 - -62: ; preds = %14 - br label %63 - -63: ; preds = %62 - %64 = load i32, i32* %7, align 4 - %65 = add nsw i32 %64, 1 - store i32 %65, i32* %7, align 4 - br label %10 - -66: ; preds = %10 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { - %4 = alloca float*, align 8 - %5 = alloca i32, align 4 - %6 = alloca float, align 4 - %7 = alloca i32, align 4 - %8 = alloca float*, align 8 - %9 = alloca i32, align 4 - %10 = alloca float, align 4 - %11 = alloca i32, align 4 - %12 = alloca float*, align 8 - %13 = alloca float*, align 8 - %14 = alloca float*, align 8 - %15 = alloca i32, align 4 - %16 = alloca i32, align 4 - %17 = alloca i32, align 4 - %18 = alloca float*, align 8 - %19 = alloca float*, align 8 - %20 = alloca float*, align 8 - %21 = alloca i32, align 4 - %22 = alloca i32, align 4 - %23 = alloca i32, align 4 - %24 = alloca float*, align 8 - %25 = alloca float*, align 8 - %26 = alloca float*, align 8 - %27 = alloca i32, align 4 - %28 = alloca i32, align 4 - %29 = alloca i32, align 4 - %30 = alloca float*, align 8 - %31 = alloca i32, align 4 - %32 = alloca i32, align 4 - %33 = alloca float, align 4 - %34 = alloca float, align 4 - %35 = alloca float*, align 8 - %36 = alloca float*, align 8 - %37 = alloca float*, align 8 - %38 = alloca i32, align 4 - %39 = alloca [4 x float], align 16 - %40 = alloca i32, align 4 - %41 = alloca i32, align 4 - %42 = alloca i32, align 4 - %43 = alloca i32, align 4 - %44 = alloca [2 x float], align 4 - %45 = alloca [2 x float], align 4 - %46 = alloca i32, align 4 - %47 = alloca i32, align 4 - %48 = alloca i32, align 4 - %49 = alloca float, align 4 - %50 = alloca [2 x float], align 4 - %51 = alloca [2 x float], align 4 - %52 = alloca i32, align 4 - %53 = alloca i32, align 4 - %54 = alloca float, align 4 - %55 = alloca i32, align 4 - %56 = alloca [4 x float], align 16 - %57 = alloca i32, align 4 - %58 = alloca i32, align 4 - %59 = alloca i32, align 4 - %60 = alloca float, align 4 - %61 = alloca [4 x float], align 16 - %62 = alloca i32, align 4 - %63 = alloca i32, align 4 - %64 = alloca i32, align 4 - %65 = alloca float, align 4 - %66 = alloca i32, align 4 - %67 = alloca [4 x float], align 16 - %68 = alloca i32, align 4 - %69 = alloca i32, align 4 - %70 = alloca i32, align 4 - store float* %0, float** %35, align 8 - store float* %1, float** %36, align 8 - store float* %2, float** %37, align 8 - store i32 0, i32* %38, align 4 - br label %71 - -71: ; preds = %84, %3 - %72 = load i32, i32* %38, align 4 - %73 = icmp slt i32 %72, 4 - br i1 %73, label %74, label %87 - -74: ; preds = %71 - %75 = load float*, float** %35, align 8 - %76 = load i32, i32* %38, align 4 - %77 = sext i32 %76 to i64 - %78 = getelementptr inbounds float, float* %75, i64 %77 - %79 = load float, float* %78, align 4 - %80 = load float*, float** %37, align 8 - %81 = load i32, i32* %38, align 4 - %82 = sext i32 %81 to i64 - %83 = getelementptr inbounds float, float* %80, i64 %82 - store float %79, float* %83, align 4 - br label %84 - -84: ; preds = %74 - %85 = load i32, i32* %38, align 4 - %86 = add nsw i32 %85, 1 - store i32 %86, i32* %38, align 4 - br label %71 - -87: ; preds = %71 - %88 = bitcast [4 x float]* %39 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %88, i8 0, i64 16, i1 false) - store i32 0, i32* %40, align 4 - br label %89 - -89: ; preds = %112, %87 - %90 = load i32, i32* %40, align 4 - %91 = icmp slt i32 %90, 2 - br i1 %91, label %92, label %115 - -92: ; preds = %89 - store i32 0, i32* %41, align 4 - br label %93 - -93: ; preds = %108, %92 - %94 = load i32, i32* %41, align 4 - %95 = icmp slt i32 %94, 2 - br i1 %95, label %96, label %111 - -96: ; preds = %93 - %97 = load i32, i32* %40, align 4 - %98 = load i32, i32* %41, align 4 - %99 = icmp eq i32 %97, %98 - %100 = zext i1 %99 to i32 - %101 = sitofp i32 %100 to float - %102 = load i32, i32* %40, align 4 - %103 = mul nsw i32 %102, 2 - %104 = load i32, i32* %41, align 4 - %105 = add nsw i32 %103, %104 - %106 = sext i32 %105 to i64 - %107 = getelementptr inbounds [4 x float], [4 x float]* %39, i64 0, i64 %106 - store float %101, float* %107, align 4 - br label %108 - -108: ; preds = %96 - %109 = load i32, i32* %41, align 4 - %110 = add nsw i32 %109, 1 - store i32 %110, i32* %41, align 4 - br label %93 - -111: ; preds = %93 - br label %112 - -112: ; preds = %111 - %113 = load i32, i32* %40, align 4 - %114 = add nsw i32 %113, 1 - store i32 %114, i32* %40, align 4 - br label %89 - -115: ; preds = %89 - store i32 0, i32* %42, align 4 - br label %116 - -116: ; preds = %643, %115 - %117 = load i32, i32* %42, align 4 - %118 = icmp slt i32 %117, 1 - br i1 %118, label %119, label %646 - -119: ; preds = %116 - %120 = load i32, i32* %42, align 4 - %121 = sub nsw i32 2, %120 - store i32 %121, i32* %43, align 4 - %122 = bitcast [2 x float]* %44 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %122, i8 0, i64 8, i1 false) - %123 = bitcast [2 x float]* %45 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %123, i8 0, i64 8, i1 false) - store i32 0, i32* %46, align 4 - br label %124 - -124: ; preds = %134, %119 - %125 = load i32, i32* %46, align 4 - %126 = icmp slt i32 %125, 2 - br i1 %126, label %127, label %137 - -127: ; preds = %124 - %128 = load i32, i32* %46, align 4 - %129 = sext i32 %128 to i64 - %130 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 %129 - store float 0.000000e+00, float* %130, align 4 - %131 = load i32, i32* %46, align 4 - %132 = sext i32 %131 to i64 - %133 = getelementptr inbounds [2 x float], [2 x float]* %45, i64 0, i64 %132 - store float 0.000000e+00, float* %133, align 4 - br label %134 - -134: ; preds = %127 - %135 = load i32, i32* %46, align 4 - %136 = add nsw i32 %135, 1 - store i32 %136, i32* %46, align 4 - br label %124 - -137: ; preds = %124 - store i32 0, i32* %47, align 4 - br label %138 - -138: ; preds = %167, %137 - %139 = load i32, i32* %47, align 4 - %140 = load i32, i32* %43, align 4 - %141 = icmp slt i32 %139, %140 - br i1 %141, label %142, label %170 - -142: ; preds = %138 - %143 = load i32, i32* %42, align 4 - %144 = load i32, i32* %47, align 4 - %145 = add nsw i32 %143, %144 - store i32 %145, i32* %48, align 4 - %146 = load float*, float** %37, align 8 - %147 = load i32, i32* %48, align 4 - %148 = mul nsw i32 %147, 2 - %149 = load i32, i32* %42, align 4 - %150 = add nsw i32 %148, %149 - %151 = sext i32 %150 to i64 - %152 = getelementptr inbounds float, float* %146, i64 %151 - %153 = load float, float* %152, align 4 - %154 = load i32, i32* %47, align 4 - %155 = sext i32 %154 to i64 - %156 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 %155 - store float %153, float* %156, align 4 - %157 = load i32, i32* %48, align 4 - %158 = mul nsw i32 %157, 2 - %159 = load i32, i32* %42, align 4 - %160 = add nsw i32 %158, %159 - %161 = sext i32 %160 to i64 - %162 = getelementptr inbounds [4 x float], [4 x float]* %39, i64 0, i64 %161 - %163 = load float, float* %162, align 4 - %164 = load i32, i32* %47, align 4 - %165 = sext i32 %164 to i64 - %166 = getelementptr inbounds [2 x float], [2 x float]* %45, i64 0, i64 %165 - store float %163, float* %166, align 4 - br label %167 - -167: ; preds = %142 - %168 = load i32, i32* %47, align 4 - %169 = add nsw i32 %168, 1 - store i32 %169, i32* %47, align 4 - br label %138 - -170: ; preds = %138 - %171 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 0 - %172 = load float, float* %171, align 4 - store float %172, float* %34, align 4 - %173 = load float, float* %34, align 4 - %174 = fcmp ogt float %173, 0.000000e+00 - %175 = zext i1 %174 to i32 - %176 = load float, float* %34, align 4 - %177 = fcmp olt float %176, 0.000000e+00 - %178 = zext i1 %177 to i32 - %179 = sub nsw i32 %175, %178 - %180 = sitofp i32 %179 to float - %181 = fneg float %180 - %182 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 0 - %183 = load i32, i32* %43, align 4 - store float* %182, float** %4, align 8 - store i32 %183, i32* %5, align 4 - store float 0.000000e+00, float* %6, align 4 - store i32 0, i32* %7, align 4 - br label %184 - -184: ; preds = %188, %170 - %185 = load i32, i32* %7, align 4 - %186 = load i32, i32* %5, align 4 - %187 = icmp slt i32 %185, %186 - br i1 %187, label %188, label %204 - -188: ; preds = %184 - %189 = load float*, float** %4, align 8 - %190 = load i32, i32* %7, align 4 - %191 = sext i32 %190 to i64 - %192 = getelementptr inbounds float, float* %189, i64 %191 - %193 = load float, float* %192, align 4 - %194 = load float*, float** %4, align 8 - %195 = load i32, i32* %7, align 4 - %196 = sext i32 %195 to i64 - %197 = getelementptr inbounds float, float* %194, i64 %196 - %198 = load float, float* %197, align 4 - %199 = fmul float %193, %198 - %200 = load float, float* %6, align 4 - %201 = fadd float %200, %199 - store float %201, float* %6, align 4 - %202 = load i32, i32* %7, align 4 - %203 = add nsw i32 %202, 1 - store i32 %203, i32* %7, align 4 - br label %184 - -204: ; preds = %184 - %205 = load float, float* %6, align 4 - %206 = call float @llvm.sqrt.f32(float %205) #7 - %207 = fmul float %181, %206 - store float %207, float* %49, align 4 - %208 = bitcast [2 x float]* %50 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %208, i8 0, i64 8, i1 false) - %209 = bitcast [2 x float]* %51 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %209, i8 0, i64 8, i1 false) - store i32 0, i32* %52, align 4 - br label %210 - -210: ; preds = %220, %204 - %211 = load i32, i32* %52, align 4 - %212 = icmp slt i32 %211, 2 - br i1 %212, label %213, label %223 - -213: ; preds = %210 - %214 = load i32, i32* %52, align 4 - %215 = sext i32 %214 to i64 - %216 = getelementptr inbounds [2 x float], [2 x float]* %50, i64 0, i64 %215 - store float 0.000000e+00, float* %216, align 4 - %217 = load i32, i32* %52, align 4 - %218 = sext i32 %217 to i64 - %219 = getelementptr inbounds [2 x float], [2 x float]* %51, i64 0, i64 %218 - store float 0.000000e+00, float* %219, align 4 - br label %220 - -220: ; preds = %213 - %221 = load i32, i32* %52, align 4 - %222 = add nsw i32 %221, 1 - store i32 %222, i32* %52, align 4 - br label %210 - -223: ; preds = %210 - store i32 0, i32* %53, align 4 - br label %224 - -224: ; preds = %243, %223 - %225 = load i32, i32* %53, align 4 - %226 = load i32, i32* %43, align 4 - %227 = icmp slt i32 %225, %226 - br i1 %227, label %228, label %246 - -228: ; preds = %224 - %229 = load i32, i32* %53, align 4 - %230 = sext i32 %229 to i64 - %231 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 %230 - %232 = load float, float* %231, align 4 - %233 = load float, float* %49, align 4 - %234 = load i32, i32* %53, align 4 - %235 = sext i32 %234 to i64 - %236 = getelementptr inbounds [2 x float], [2 x float]* %45, i64 0, i64 %235 - %237 = load float, float* %236, align 4 - %238 = fmul float %233, %237 - %239 = fadd float %232, %238 - %240 = load i32, i32* %53, align 4 - %241 = sext i32 %240 to i64 - %242 = getelementptr inbounds [2 x float], [2 x float]* %50, i64 0, i64 %241 - store float %239, float* %242, align 4 - br label %243 - -243: ; preds = %228 - %244 = load i32, i32* %53, align 4 - %245 = add nsw i32 %244, 1 - store i32 %245, i32* %53, align 4 - br label %224 - -246: ; preds = %224 - %247 = getelementptr inbounds [2 x float], [2 x float]* %50, i64 0, i64 0 - %248 = load i32, i32* %43, align 4 - store float* %247, float** %8, align 8 - store i32 %248, i32* %9, align 4 - store float 0.000000e+00, float* %10, align 4 - store i32 0, i32* %11, align 4 - br label %249 - -249: ; preds = %253, %246 - %250 = load i32, i32* %11, align 4 - %251 = load i32, i32* %9, align 4 - %252 = icmp slt i32 %250, %251 - br i1 %252, label %253, label %269 - -253: ; preds = %249 - %254 = load float*, float** %8, align 8 - %255 = load i32, i32* %11, align 4 - %256 = sext i32 %255 to i64 - %257 = getelementptr inbounds float, float* %254, i64 %256 - %258 = load float, float* %257, align 4 - %259 = load float*, float** %8, align 8 - %260 = load i32, i32* %11, align 4 - %261 = sext i32 %260 to i64 - %262 = getelementptr inbounds float, float* %259, i64 %261 - %263 = load float, float* %262, align 4 - %264 = fmul float %258, %263 - %265 = load float, float* %10, align 4 - %266 = fadd float %265, %264 - store float %266, float* %10, align 4 - %267 = load i32, i32* %11, align 4 - %268 = add nsw i32 %267, 1 - store i32 %268, i32* %11, align 4 - br label %249 - -269: ; preds = %249 - %270 = load float, float* %10, align 4 - %271 = call float @llvm.sqrt.f32(float %270) #7 - store float %271, float* %54, align 4 - store i32 0, i32* %55, align 4 - br label %272 - -272: ; preds = %287, %269 - %273 = load i32, i32* %55, align 4 - %274 = load i32, i32* %43, align 4 - %275 = icmp slt i32 %273, %274 - br i1 %275, label %276, label %290 - -276: ; preds = %272 - %277 = load i32, i32* %55, align 4 - %278 = sext i32 %277 to i64 - %279 = getelementptr inbounds [2 x float], [2 x float]* %50, i64 0, i64 %278 - %280 = load float, float* %279, align 4 - %281 = load float, float* %54, align 4 - %282 = fadd float %281, 0x3EE4F8B580000000 - %283 = fdiv float %280, %282 - %284 = load i32, i32* %55, align 4 - %285 = sext i32 %284 to i64 - %286 = getelementptr inbounds [2 x float], [2 x float]* %51, i64 0, i64 %285 - store float %283, float* %286, align 4 - br label %287 - -287: ; preds = %276 - %288 = load i32, i32* %55, align 4 - %289 = add nsw i32 %288, 1 - store i32 %289, i32* %55, align 4 - br label %272 - -290: ; preds = %272 - %291 = bitcast [4 x float]* %56 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %291, i8 0, i64 16, i1 false) - store i32 0, i32* %57, align 4 - br label %292 - -292: ; preds = %299, %290 - %293 = load i32, i32* %57, align 4 - %294 = icmp slt i32 %293, 4 - br i1 %294, label %295, label %302 - -295: ; preds = %292 - %296 = load i32, i32* %57, align 4 - %297 = sext i32 %296 to i64 - %298 = getelementptr inbounds [4 x float], [4 x float]* %56, i64 0, i64 %297 - store float 0.000000e+00, float* %298, align 4 - br label %299 - -299: ; preds = %295 - %300 = load i32, i32* %57, align 4 - %301 = add nsw i32 %300, 1 - store i32 %301, i32* %57, align 4 - br label %292 - -302: ; preds = %292 - store i32 0, i32* %58, align 4 - br label %303 - -303: ; preds = %341, %302 - %304 = load i32, i32* %58, align 4 - %305 = load i32, i32* %43, align 4 - %306 = icmp slt i32 %304, %305 - br i1 %306, label %307, label %344 - -307: ; preds = %303 - store i32 0, i32* %59, align 4 - br label %308 - -308: ; preds = %337, %307 - %309 = load i32, i32* %59, align 4 - %310 = load i32, i32* %43, align 4 - %311 = icmp slt i32 %309, %310 - br i1 %311, label %312, label %340 - -312: ; preds = %308 - %313 = load i32, i32* %58, align 4 - %314 = load i32, i32* %59, align 4 - %315 = icmp eq i32 %313, %314 - %316 = zext i1 %315 to i64 - %317 = select i1 %315, float 1.000000e+00, float 0.000000e+00 - %318 = load i32, i32* %58, align 4 - %319 = sext i32 %318 to i64 - %320 = getelementptr inbounds [2 x float], [2 x float]* %51, i64 0, i64 %319 - %321 = load float, float* %320, align 4 - %322 = fmul float 2.000000e+00, %321 - %323 = load i32, i32* %59, align 4 - %324 = sext i32 %323 to i64 - %325 = getelementptr inbounds [2 x float], [2 x float]* %51, i64 0, i64 %324 - %326 = load float, float* %325, align 4 - %327 = fmul float %322, %326 - %328 = fsub float %317, %327 - store float %328, float* %60, align 4 - %329 = load float, float* %60, align 4 - %330 = load i32, i32* %58, align 4 - %331 = load i32, i32* %43, align 4 - %332 = mul nsw i32 %330, %331 - %333 = load i32, i32* %59, align 4 - %334 = add nsw i32 %332, %333 - %335 = sext i32 %334 to i64 - %336 = getelementptr inbounds [4 x float], [4 x float]* %56, i64 0, i64 %335 - store float %329, float* %336, align 4 - br label %337 - -337: ; preds = %312 - %338 = load i32, i32* %59, align 4 - %339 = add nsw i32 %338, 1 - store i32 %339, i32* %59, align 4 - br label %308 - -340: ; preds = %308 - br label %341 - -341: ; preds = %340 - %342 = load i32, i32* %58, align 4 - %343 = add nsw i32 %342, 1 - store i32 %343, i32* %58, align 4 - br label %303 - -344: ; preds = %303 - %345 = bitcast [4 x float]* %61 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %345, i8 0, i64 16, i1 false) - store i32 0, i32* %62, align 4 - br label %346 - -346: ; preds = %353, %344 - %347 = load i32, i32* %62, align 4 - %348 = icmp slt i32 %347, 4 - br i1 %348, label %349, label %356 - -349: ; preds = %346 - %350 = load i32, i32* %62, align 4 - %351 = sext i32 %350 to i64 - %352 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 %351 - store float 0.000000e+00, float* %352, align 4 - br label %353 - -353: ; preds = %349 - %354 = load i32, i32* %62, align 4 - %355 = add nsw i32 %354, 1 - store i32 %355, i32* %62, align 4 - br label %346 - -356: ; preds = %346 - store i32 0, i32* %63, align 4 - br label %357 - -357: ; preds = %403, %356 - %358 = load i32, i32* %63, align 4 - %359 = icmp slt i32 %358, 2 - br i1 %359, label %360, label %406 - -360: ; preds = %357 - store i32 0, i32* %64, align 4 - br label %361 - -361: ; preds = %399, %360 - %362 = load i32, i32* %64, align 4 - %363 = icmp slt i32 %362, 2 - br i1 %363, label %364, label %402 - -364: ; preds = %361 - %365 = load i32, i32* %63, align 4 - %366 = load i32, i32* %42, align 4 - %367 = icmp slt i32 %365, %366 - br i1 %367, label %372, label %368 - -368: ; preds = %364 - %369 = load i32, i32* %64, align 4 - %370 = load i32, i32* %42, align 4 - %371 = icmp slt i32 %369, %370 - br i1 %371, label %372, label %378 - -372: ; preds = %368, %364 - %373 = load i32, i32* %63, align 4 - %374 = load i32, i32* %64, align 4 - %375 = icmp eq i32 %373, %374 - %376 = zext i1 %375 to i64 - %377 = select i1 %375, float 1.000000e+00, float 0.000000e+00 - store float %377, float* %65, align 4 - br label %391 - -378: ; preds = %368 - %379 = load i32, i32* %63, align 4 - %380 = load i32, i32* %42, align 4 - %381 = sub nsw i32 %379, %380 - %382 = load i32, i32* %43, align 4 - %383 = mul nsw i32 %381, %382 - %384 = load i32, i32* %64, align 4 - %385 = load i32, i32* %42, align 4 - %386 = sub nsw i32 %384, %385 - %387 = add nsw i32 %383, %386 - %388 = sext i32 %387 to i64 - %389 = getelementptr inbounds [4 x float], [4 x float]* %56, i64 0, i64 %388 - %390 = load float, float* %389, align 4 - store float %390, float* %65, align 4 - br label %391 - -391: ; preds = %378, %372 - %392 = load float, float* %65, align 4 - %393 = load i32, i32* %63, align 4 - %394 = mul nsw i32 %393, 2 - %395 = load i32, i32* %64, align 4 - %396 = add nsw i32 %394, %395 - %397 = sext i32 %396 to i64 - %398 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 %397 - store float %392, float* %398, align 4 - br label %399 - -399: ; preds = %391 - %400 = load i32, i32* %64, align 4 - %401 = add nsw i32 %400, 1 - store i32 %401, i32* %64, align 4 - br label %361 - -402: ; preds = %361 - br label %403 - -403: ; preds = %402 - %404 = load i32, i32* %63, align 4 - %405 = add nsw i32 %404, 1 - store i32 %405, i32* %63, align 4 - br label %357 - -406: ; preds = %357 - %407 = load i32, i32* %42, align 4 - %408 = icmp eq i32 %407, 0 - br i1 %408, label %409, label %483 - -409: ; preds = %406 - store i32 0, i32* %66, align 4 - br label %410 - -410: ; preds = %422, %409 - %411 = load i32, i32* %66, align 4 - %412 = icmp slt i32 %411, 4 - br i1 %412, label %413, label %425 - -413: ; preds = %410 - %414 = load i32, i32* %66, align 4 - %415 = sext i32 %414 to i64 - %416 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 %415 - %417 = load float, float* %416, align 4 - %418 = load float*, float** %36, align 8 - %419 = load i32, i32* %66, align 4 - %420 = sext i32 %419 to i64 - %421 = getelementptr inbounds float, float* %418, i64 %420 - store float %417, float* %421, align 4 - br label %422 - -422: ; preds = %413 - %423 = load i32, i32* %66, align 4 - %424 = add nsw i32 %423, 1 - store i32 %424, i32* %66, align 4 - br label %410 - -425: ; preds = %410 - %426 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 0 - %427 = load float*, float** %35, align 8 - %428 = load float*, float** %37, align 8 - store float* %426, float** %12, align 8 - store float* %427, float** %13, align 8 - store float* %428, float** %14, align 8 - store i32 0, i32* %15, align 4 - br label %429 - -429: ; preds = %479, %425 - %430 = load i32, i32* %15, align 4 - %431 = icmp slt i32 %430, 2 - br i1 %431, label %432, label %482 - -432: ; preds = %429 - store i32 0, i32* %16, align 4 - br label %433 - -433: ; preds = %476, %432 - %434 = load i32, i32* %16, align 4 - %435 = icmp slt i32 %434, 2 - br i1 %435, label %436, label %479 - -436: ; preds = %433 - %437 = load float*, float** %14, align 8 - %438 = load i32, i32* %15, align 4 - %439 = mul nsw i32 2, %438 - %440 = load i32, i32* %16, align 4 - %441 = add nsw i32 %439, %440 - %442 = sext i32 %441 to i64 - %443 = getelementptr inbounds float, float* %437, i64 %442 - store float 0.000000e+00, float* %443, align 4 - store i32 0, i32* %17, align 4 - br label %444 - -444: ; preds = %447, %436 - %445 = load i32, i32* %17, align 4 - %446 = icmp slt i32 %445, 2 - br i1 %446, label %447, label %476 - -447: ; preds = %444 - %448 = load float*, float** %12, align 8 - %449 = load i32, i32* %15, align 4 - %450 = mul nsw i32 2, %449 - %451 = load i32, i32* %17, align 4 - %452 = add nsw i32 %450, %451 - %453 = sext i32 %452 to i64 - %454 = getelementptr inbounds float, float* %448, i64 %453 - %455 = load float, float* %454, align 4 - %456 = load float*, float** %13, align 8 - %457 = load i32, i32* %17, align 4 - %458 = mul nsw i32 2, %457 - %459 = load i32, i32* %16, align 4 - %460 = add nsw i32 %458, %459 - %461 = sext i32 %460 to i64 - %462 = getelementptr inbounds float, float* %456, i64 %461 - %463 = load float, float* %462, align 4 - %464 = fmul float %455, %463 - %465 = load float*, float** %14, align 8 - %466 = load i32, i32* %15, align 4 - %467 = mul nsw i32 2, %466 - %468 = load i32, i32* %16, align 4 - %469 = add nsw i32 %467, %468 - %470 = sext i32 %469 to i64 - %471 = getelementptr inbounds float, float* %465, i64 %470 - %472 = load float, float* %471, align 4 - %473 = fadd float %472, %464 - store float %473, float* %471, align 4 - %474 = load i32, i32* %17, align 4 - %475 = add nsw i32 %474, 1 - store i32 %475, i32* %17, align 4 - br label %444 - -476: ; preds = %444 - %477 = load i32, i32* %16, align 4 - %478 = add nsw i32 %477, 1 - store i32 %478, i32* %16, align 4 - br label %433 - -479: ; preds = %433 - %480 = load i32, i32* %15, align 4 - %481 = add nsw i32 %480, 1 - store i32 %481, i32* %15, align 4 - br label %429 - -482: ; preds = %429 - br label %642 - -483: ; preds = %406 - %484 = bitcast [4 x float]* %67 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %484, i8 0, i64 16, i1 false) - store i32 0, i32* %68, align 4 - br label %485 - -485: ; preds = %492, %483 - %486 = load i32, i32* %68, align 4 - %487 = icmp slt i32 %486, 4 - br i1 %487, label %488, label %495 - -488: ; preds = %485 - %489 = load i32, i32* %68, align 4 - %490 = sext i32 %489 to i64 - %491 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 %490 - store float 0.000000e+00, float* %491, align 4 - br label %492 - -492: ; preds = %488 - %493 = load i32, i32* %68, align 4 - %494 = add nsw i32 %493, 1 - store i32 %494, i32* %68, align 4 - br label %485 - -495: ; preds = %485 - %496 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 0 - %497 = load float*, float** %36, align 8 - %498 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 0 - store float* %496, float** %18, align 8 - store float* %497, float** %19, align 8 - store float* %498, float** %20, align 8 - store i32 0, i32* %21, align 4 - br label %499 - -499: ; preds = %549, %495 - %500 = load i32, i32* %21, align 4 - %501 = icmp slt i32 %500, 2 - br i1 %501, label %502, label %552 - -502: ; preds = %499 - store i32 0, i32* %22, align 4 - br label %503 - -503: ; preds = %546, %502 - %504 = load i32, i32* %22, align 4 - %505 = icmp slt i32 %504, 2 - br i1 %505, label %506, label %549 - -506: ; preds = %503 - %507 = load float*, float** %20, align 8 - %508 = load i32, i32* %21, align 4 - %509 = mul nsw i32 2, %508 - %510 = load i32, i32* %22, align 4 - %511 = add nsw i32 %509, %510 - %512 = sext i32 %511 to i64 - %513 = getelementptr inbounds float, float* %507, i64 %512 - store float 0.000000e+00, float* %513, align 4 - store i32 0, i32* %23, align 4 - br label %514 - -514: ; preds = %517, %506 - %515 = load i32, i32* %23, align 4 - %516 = icmp slt i32 %515, 2 - br i1 %516, label %517, label %546 - -517: ; preds = %514 - %518 = load float*, float** %18, align 8 - %519 = load i32, i32* %21, align 4 - %520 = mul nsw i32 2, %519 - %521 = load i32, i32* %23, align 4 - %522 = add nsw i32 %520, %521 - %523 = sext i32 %522 to i64 - %524 = getelementptr inbounds float, float* %518, i64 %523 - %525 = load float, float* %524, align 4 - %526 = load float*, float** %19, align 8 - %527 = load i32, i32* %23, align 4 - %528 = mul nsw i32 2, %527 - %529 = load i32, i32* %22, align 4 - %530 = add nsw i32 %528, %529 - %531 = sext i32 %530 to i64 - %532 = getelementptr inbounds float, float* %526, i64 %531 - %533 = load float, float* %532, align 4 - %534 = fmul float %525, %533 - %535 = load float*, float** %20, align 8 - %536 = load i32, i32* %21, align 4 - %537 = mul nsw i32 2, %536 - %538 = load i32, i32* %22, align 4 - %539 = add nsw i32 %537, %538 - %540 = sext i32 %539 to i64 - %541 = getelementptr inbounds float, float* %535, i64 %540 - %542 = load float, float* %541, align 4 - %543 = fadd float %542, %534 - store float %543, float* %541, align 4 - %544 = load i32, i32* %23, align 4 - %545 = add nsw i32 %544, 1 - store i32 %545, i32* %23, align 4 - br label %514 - -546: ; preds = %514 - %547 = load i32, i32* %22, align 4 - %548 = add nsw i32 %547, 1 - store i32 %548, i32* %22, align 4 - br label %503 - -549: ; preds = %503 - %550 = load i32, i32* %21, align 4 - %551 = add nsw i32 %550, 1 - store i32 %551, i32* %21, align 4 - br label %499 - -552: ; preds = %499 - store i32 0, i32* %69, align 4 - br label %553 - -553: ; preds = %565, %552 - %554 = load i32, i32* %69, align 4 - %555 = icmp slt i32 %554, 4 - br i1 %555, label %556, label %568 - -556: ; preds = %553 - %557 = load i32, i32* %69, align 4 - %558 = sext i32 %557 to i64 - %559 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 %558 - %560 = load float, float* %559, align 4 - %561 = load float*, float** %36, align 8 - %562 = load i32, i32* %69, align 4 - %563 = sext i32 %562 to i64 - %564 = getelementptr inbounds float, float* %561, i64 %563 - store float %560, float* %564, align 4 - br label %565 - -565: ; preds = %556 - %566 = load i32, i32* %69, align 4 - %567 = add nsw i32 %566, 1 - store i32 %567, i32* %69, align 4 - br label %553 - -568: ; preds = %553 - %569 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 0 - %570 = load float*, float** %37, align 8 - %571 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 0 - store float* %569, float** %24, align 8 - store float* %570, float** %25, align 8 - store float* %571, float** %26, align 8 - store i32 0, i32* %27, align 4 - br label %572 - -572: ; preds = %622, %568 - %573 = load i32, i32* %27, align 4 - %574 = icmp slt i32 %573, 2 - br i1 %574, label %575, label %625 - -575: ; preds = %572 - store i32 0, i32* %28, align 4 - br label %576 - -576: ; preds = %619, %575 - %577 = load i32, i32* %28, align 4 - %578 = icmp slt i32 %577, 2 - br i1 %578, label %579, label %622 - -579: ; preds = %576 - %580 = load float*, float** %26, align 8 - %581 = load i32, i32* %27, align 4 - %582 = mul nsw i32 2, %581 - %583 = load i32, i32* %28, align 4 - %584 = add nsw i32 %582, %583 - %585 = sext i32 %584 to i64 - %586 = getelementptr inbounds float, float* %580, i64 %585 - store float 0.000000e+00, float* %586, align 4 - store i32 0, i32* %29, align 4 - br label %587 - -587: ; preds = %590, %579 - %588 = load i32, i32* %29, align 4 - %589 = icmp slt i32 %588, 2 - br i1 %589, label %590, label %619 - -590: ; preds = %587 - %591 = load float*, float** %24, align 8 - %592 = load i32, i32* %27, align 4 - %593 = mul nsw i32 2, %592 - %594 = load i32, i32* %29, align 4 - %595 = add nsw i32 %593, %594 - %596 = sext i32 %595 to i64 - %597 = getelementptr inbounds float, float* %591, i64 %596 - %598 = load float, float* %597, align 4 - %599 = load float*, float** %25, align 8 - %600 = load i32, i32* %29, align 4 - %601 = mul nsw i32 2, %600 - %602 = load i32, i32* %28, align 4 - %603 = add nsw i32 %601, %602 - %604 = sext i32 %603 to i64 - %605 = getelementptr inbounds float, float* %599, i64 %604 - %606 = load float, float* %605, align 4 - %607 = fmul float %598, %606 - %608 = load float*, float** %26, align 8 - %609 = load i32, i32* %27, align 4 - %610 = mul nsw i32 2, %609 - %611 = load i32, i32* %28, align 4 - %612 = add nsw i32 %610, %611 - %613 = sext i32 %612 to i64 - %614 = getelementptr inbounds float, float* %608, i64 %613 - %615 = load float, float* %614, align 4 - %616 = fadd float %615, %607 - store float %616, float* %614, align 4 - %617 = load i32, i32* %29, align 4 - %618 = add nsw i32 %617, 1 - store i32 %618, i32* %29, align 4 - br label %587 - -619: ; preds = %587 - %620 = load i32, i32* %28, align 4 - %621 = add nsw i32 %620, 1 - store i32 %621, i32* %28, align 4 - br label %576 - -622: ; preds = %576 - %623 = load i32, i32* %27, align 4 - %624 = add nsw i32 %623, 1 - store i32 %624, i32* %27, align 4 - br label %572 - -625: ; preds = %572 - store i32 0, i32* %70, align 4 - br label %626 - -626: ; preds = %638, %625 - %627 = load i32, i32* %70, align 4 - %628 = icmp slt i32 %627, 4 - br i1 %628, label %629, label %641 - -629: ; preds = %626 - %630 = load i32, i32* %70, align 4 - %631 = sext i32 %630 to i64 - %632 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 %631 - %633 = load float, float* %632, align 4 - %634 = load float*, float** %37, align 8 - %635 = load i32, i32* %70, align 4 - %636 = sext i32 %635 to i64 - %637 = getelementptr inbounds float, float* %634, i64 %636 - store float %633, float* %637, align 4 - br label %638 - -638: ; preds = %629 - %639 = load i32, i32* %70, align 4 - %640 = add nsw i32 %639, 1 - store i32 %640, i32* %70, align 4 - br label %626 - -641: ; preds = %626 - br label %642 - -642: ; preds = %641, %482 - br label %643 - -643: ; preds = %642 - %644 = load i32, i32* %42, align 4 - %645 = add nsw i32 %644, 1 - store i32 %645, i32* %42, align 4 - br label %116 - -646: ; preds = %116 - %647 = load float*, float** %36, align 8 - store float* %647, float** %30, align 8 - store i32 0, i32* %31, align 4 - br label %648 - -648: ; preds = %691, %646 - %649 = load i32, i32* %31, align 4 - %650 = icmp slt i32 %649, 2 - br i1 %650, label %651, label %694 - -651: ; preds = %648 - %652 = load i32, i32* %31, align 4 - %653 = add nsw i32 %652, 1 - store i32 %653, i32* %32, align 4 - br label %654 - -654: ; preds = %657, %651 - %655 = load i32, i32* %32, align 4 - %656 = icmp slt i32 %655, 2 - br i1 %656, label %657, label %691 - -657: ; preds = %654 - %658 = load float*, float** %30, align 8 - %659 = load i32, i32* %31, align 4 - %660 = mul nsw i32 %659, 2 - %661 = load i32, i32* %32, align 4 - %662 = add nsw i32 %660, %661 - %663 = sext i32 %662 to i64 - %664 = getelementptr inbounds float, float* %658, i64 %663 - %665 = load float, float* %664, align 4 - store float %665, float* %33, align 4 - %666 = load float*, float** %30, align 8 - %667 = load i32, i32* %32, align 4 - %668 = mul nsw i32 %667, 2 - %669 = load i32, i32* %31, align 4 - %670 = add nsw i32 %668, %669 - %671 = sext i32 %670 to i64 - %672 = getelementptr inbounds float, float* %666, i64 %671 - %673 = load float, float* %672, align 4 - %674 = load float*, float** %30, align 8 - %675 = load i32, i32* %31, align 4 - %676 = mul nsw i32 %675, 2 - %677 = load i32, i32* %32, align 4 - %678 = add nsw i32 %676, %677 - %679 = sext i32 %678 to i64 - %680 = getelementptr inbounds float, float* %674, i64 %679 - store float %673, float* %680, align 4 - %681 = load float, float* %33, align 4 - %682 = load float*, float** %30, align 8 - %683 = load i32, i32* %32, align 4 - %684 = mul nsw i32 %683, 2 - %685 = load i32, i32* %31, align 4 - %686 = add nsw i32 %684, %685 - %687 = sext i32 %686 to i64 - %688 = getelementptr inbounds float, float* %682, i64 %687 - store float %681, float* %688, align 4 - %689 = load i32, i32* %32, align 4 - %690 = add nsw i32 %689, 1 - store i32 %690, i32* %32, align 4 - br label %654 - -691: ; preds = %654 - %692 = load i32, i32* %31, align 4 - %693 = add nsw i32 %692, 1 - store i32 %693, i32* %31, align 4 - br label %648 - -694: ; preds = %648 - ret void -} - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { - %4 = alloca float*, align 8 - %5 = alloca float*, align 8 - %6 = alloca float*, align 8 - %7 = alloca float*, align 8 - %8 = alloca i32, align 4 - %9 = alloca i32, align 4 - %10 = alloca i32, align 4 - %11 = alloca i32, align 4 - %12 = alloca float*, align 8 - %13 = alloca float*, align 8 - %14 = alloca i32, align 4 - %15 = alloca i32, align 4 - %16 = alloca float, align 4 - %17 = alloca float*, align 8 - %18 = alloca float*, align 8 - %19 = alloca i32, align 4 - %20 = alloca float, align 4 - %21 = alloca i32, align 4 - %22 = alloca float*, align 8 - %23 = alloca i32, align 4 - %24 = alloca i32, align 4 - %25 = alloca float, align 4 - %26 = alloca float*, align 8 - %27 = alloca i32, align 4 - %28 = alloca i32, align 4 - %29 = alloca float, align 4 - %30 = alloca float*, align 8 - store float* %0, float** %4, align 8 - store float* %1, float** %5, align 8 - store float* %2, float** %6, align 8 - %31 = load float*, float** %6, align 8 - %32 = bitcast float* %31 to i8* - %33 = load float*, float** %4, align 8 - %34 = bitcast float* %33 to i8* - %35 = load float*, float** %6, align 8 - %36 = bitcast float* %35 to i8* - %37 = call i64 @llvm.objectsize.i64.p0i8(i8* %36, i1 false, i1 true, i1 false) - %38 = call i8* @__memcpy_chk(i8* %32, i8* %34, i64 16, i64 %37) #7 - %39 = call i8* @calloc(i64 4, i64 4) #8 - %40 = bitcast i8* %39 to float* - store float* %40, float** %7, align 8 - store i32 0, i32* %8, align 4 - br label %41 - -41: ; preds = %65, %3 - %42 = load i32, i32* %8, align 4 - %43 = icmp slt i32 %42, 2 - br i1 %43, label %44, label %68 - -44: ; preds = %41 - store i32 0, i32* %9, align 4 - br label %45 - -45: ; preds = %61, %44 - %46 = load i32, i32* %9, align 4 - %47 = icmp slt i32 %46, 2 - br i1 %47, label %48, label %64 - -48: ; preds = %45 - %49 = load i32, i32* %8, align 4 - %50 = load i32, i32* %9, align 4 - %51 = icmp eq i32 %49, %50 - %52 = zext i1 %51 to i32 - %53 = sitofp i32 %52 to float - %54 = load float*, float** %7, align 8 - %55 = load i32, i32* %8, align 4 - %56 = mul nsw i32 %55, 2 - %57 = load i32, i32* %9, align 4 - %58 = add nsw i32 %56, %57 - %59 = sext i32 %58 to i64 - %60 = getelementptr inbounds float, float* %54, i64 %59 - store float %53, float* %60, align 4 - br label %61 - -61: ; preds = %48 - %62 = load i32, i32* %9, align 4 - %63 = add nsw i32 %62, 1 - store i32 %63, i32* %9, align 4 - br label %45 - -64: ; preds = %45 - br label %65 - -65: ; preds = %64 - %66 = load i32, i32* %8, align 4 - %67 = add nsw i32 %66, 1 - store i32 %67, i32* %8, align 4 - br label %41 - -68: ; preds = %41 - store i32 0, i32* %10, align 4 - br label %69 - -69: ; preds = %343, %68 - %70 = load i32, i32* %10, align 4 - %71 = icmp slt i32 %70, 1 - br i1 %71, label %72, label %346 - -72: ; preds = %69 - %73 = load i32, i32* %10, align 4 - %74 = sub nsw i32 2, %73 - store i32 %74, i32* %11, align 4 - %75 = load i32, i32* %11, align 4 - %76 = sext i32 %75 to i64 - %77 = call i8* @calloc(i64 4, i64 %76) #8 - %78 = bitcast i8* %77 to float* - store float* %78, float** %12, align 8 - %79 = load i32, i32* %11, align 4 - %80 = sext i32 %79 to i64 - %81 = call i8* @calloc(i64 4, i64 %80) #8 - %82 = bitcast i8* %81 to float* - store float* %82, float** %13, align 8 - store i32 0, i32* %14, align 4 - br label %83 - -83: ; preds = %115, %72 - %84 = load i32, i32* %14, align 4 - %85 = load i32, i32* %11, align 4 - %86 = icmp slt i32 %84, %85 - br i1 %86, label %87, label %118 - -87: ; preds = %83 - %88 = load i32, i32* %10, align 4 - %89 = load i32, i32* %14, align 4 - %90 = add nsw i32 %88, %89 - store i32 %90, i32* %15, align 4 - %91 = load float*, float** %6, align 8 - %92 = load i32, i32* %15, align 4 - %93 = mul nsw i32 %92, 2 - %94 = load i32, i32* %10, align 4 - %95 = add nsw i32 %93, %94 - %96 = sext i32 %95 to i64 - %97 = getelementptr inbounds float, float* %91, i64 %96 - %98 = load float, float* %97, align 4 - %99 = load float*, float** %12, align 8 - %100 = load i32, i32* %14, align 4 - %101 = sext i32 %100 to i64 - %102 = getelementptr inbounds float, float* %99, i64 %101 - store float %98, float* %102, align 4 - %103 = load float*, float** %7, align 8 - %104 = load i32, i32* %15, align 4 - %105 = mul nsw i32 %104, 2 - %106 = load i32, i32* %10, align 4 - %107 = add nsw i32 %105, %106 - %108 = sext i32 %107 to i64 - %109 = getelementptr inbounds float, float* %103, i64 %108 - %110 = load float, float* %109, align 4 - %111 = load float*, float** %13, align 8 - %112 = load i32, i32* %14, align 4 - %113 = sext i32 %112 to i64 - %114 = getelementptr inbounds float, float* %111, i64 %113 - store float %110, float* %114, align 4 - br label %115 - -115: ; preds = %87 - %116 = load i32, i32* %14, align 4 - %117 = add nsw i32 %116, 1 - store i32 %117, i32* %14, align 4 - br label %83 - -118: ; preds = %83 - %119 = load float*, float** %12, align 8 - %120 = getelementptr inbounds float, float* %119, i64 0 - %121 = load float, float* %120, align 4 - %122 = call float @no_opt_sgn(float %121) - %123 = fneg float %122 - %124 = load float*, float** %12, align 8 - %125 = load i32, i32* %11, align 4 - %126 = call float @no_opt_naive_norm(float* %124, i32 %125) - %127 = fmul float %123, %126 - store float %127, float* %16, align 4 - %128 = load i32, i32* %11, align 4 - %129 = sext i32 %128 to i64 - %130 = call i8* @calloc(i64 4, i64 %129) #8 - %131 = bitcast i8* %130 to float* - store float* %131, float** %17, align 8 - %132 = load i32, i32* %11, align 4 - %133 = sext i32 %132 to i64 - %134 = call i8* @calloc(i64 4, i64 %133) #8 - %135 = bitcast i8* %134 to float* - store float* %135, float** %18, align 8 - store i32 0, i32* %19, align 4 - br label %136 - -136: ; preds = %158, %118 - %137 = load i32, i32* %19, align 4 - %138 = load i32, i32* %11, align 4 - %139 = icmp slt i32 %137, %138 - br i1 %139, label %140, label %161 - -140: ; preds = %136 - %141 = load float*, float** %12, align 8 - %142 = load i32, i32* %19, align 4 - %143 = sext i32 %142 to i64 - %144 = getelementptr inbounds float, float* %141, i64 %143 - %145 = load float, float* %144, align 4 - %146 = load float, float* %16, align 4 - %147 = load float*, float** %13, align 8 - %148 = load i32, i32* %19, align 4 - %149 = sext i32 %148 to i64 - %150 = getelementptr inbounds float, float* %147, i64 %149 - %151 = load float, float* %150, align 4 - %152 = fmul float %146, %151 - %153 = fadd float %145, %152 - %154 = load float*, float** %17, align 8 - %155 = load i32, i32* %19, align 4 - %156 = sext i32 %155 to i64 - %157 = getelementptr inbounds float, float* %154, i64 %156 - store float %153, float* %157, align 4 - br label %158 - -158: ; preds = %140 - %159 = load i32, i32* %19, align 4 - %160 = add nsw i32 %159, 1 - store i32 %160, i32* %19, align 4 - br label %136 - -161: ; preds = %136 - %162 = load float*, float** %17, align 8 - %163 = load i32, i32* %11, align 4 - %164 = call float @no_opt_naive_norm(float* %162, i32 %163) - store float %164, float* %20, align 4 - store i32 0, i32* %21, align 4 - br label %165 - -165: ; preds = %182, %161 - %166 = load i32, i32* %21, align 4 - %167 = load i32, i32* %11, align 4 - %168 = icmp slt i32 %166, %167 - br i1 %168, label %169, label %185 - -169: ; preds = %165 - %170 = load float*, float** %17, align 8 - %171 = load i32, i32* %21, align 4 - %172 = sext i32 %171 to i64 - %173 = getelementptr inbounds float, float* %170, i64 %172 - %174 = load float, float* %173, align 4 - %175 = load float, float* %20, align 4 - %176 = fadd float %175, 0x3EE4F8B580000000 - %177 = fdiv float %174, %176 - %178 = load float*, float** %18, align 8 - %179 = load i32, i32* %21, align 4 - %180 = sext i32 %179 to i64 - %181 = getelementptr inbounds float, float* %178, i64 %180 - store float %177, float* %181, align 4 - br label %182 - -182: ; preds = %169 - %183 = load i32, i32* %21, align 4 - %184 = add nsw i32 %183, 1 - store i32 %184, i32* %21, align 4 - br label %165 - -185: ; preds = %165 - %186 = load i32, i32* %11, align 4 - %187 = load i32, i32* %11, align 4 - %188 = mul nsw i32 %186, %187 - %189 = sext i32 %188 to i64 - %190 = call i8* @calloc(i64 4, i64 %189) #8 - %191 = bitcast i8* %190 to float* - store float* %191, float** %22, align 8 - store i32 0, i32* %23, align 4 - br label %192 - -192: ; preds = %233, %185 - %193 = load i32, i32* %23, align 4 - %194 = load i32, i32* %11, align 4 - %195 = icmp slt i32 %193, %194 - br i1 %195, label %196, label %236 - -196: ; preds = %192 - store i32 0, i32* %24, align 4 - br label %197 - -197: ; preds = %229, %196 - %198 = load i32, i32* %24, align 4 - %199 = load i32, i32* %11, align 4 - %200 = icmp slt i32 %198, %199 - br i1 %200, label %201, label %232 - -201: ; preds = %197 - %202 = load i32, i32* %23, align 4 - %203 = load i32, i32* %24, align 4 - %204 = icmp eq i32 %202, %203 - %205 = zext i1 %204 to i64 - %206 = select i1 %204, float 1.000000e+00, float 0.000000e+00 - %207 = load float*, float** %18, align 8 - %208 = load i32, i32* %23, align 4 - %209 = sext i32 %208 to i64 - %210 = getelementptr inbounds float, float* %207, i64 %209 - %211 = load float, float* %210, align 4 - %212 = fmul float 2.000000e+00, %211 - %213 = load float*, float** %18, align 8 - %214 = load i32, i32* %24, align 4 - %215 = sext i32 %214 to i64 - %216 = getelementptr inbounds float, float* %213, i64 %215 - %217 = load float, float* %216, align 4 - %218 = fmul float %212, %217 - %219 = fsub float %206, %218 - store float %219, float* %25, align 4 - %220 = load float, float* %25, align 4 - %221 = load float*, float** %22, align 8 - %222 = load i32, i32* %23, align 4 - %223 = load i32, i32* %11, align 4 - %224 = mul nsw i32 %222, %223 - %225 = load i32, i32* %24, align 4 - %226 = add nsw i32 %224, %225 - %227 = sext i32 %226 to i64 - %228 = getelementptr inbounds float, float* %221, i64 %227 - store float %220, float* %228, align 4 - br label %229 - -229: ; preds = %201 - %230 = load i32, i32* %24, align 4 - %231 = add nsw i32 %230, 1 - store i32 %231, i32* %24, align 4 - br label %197 - -232: ; preds = %197 - br label %233 - -233: ; preds = %232 - %234 = load i32, i32* %23, align 4 - %235 = add nsw i32 %234, 1 - store i32 %235, i32* %23, align 4 - br label %192 - -236: ; preds = %192 - %237 = call i8* @calloc(i64 4, i64 4) #8 - %238 = bitcast i8* %237 to float* - store float* %238, float** %26, align 8 - store i32 0, i32* %27, align 4 - br label %239 - -239: ; preds = %287, %236 - %240 = load i32, i32* %27, align 4 - %241 = icmp slt i32 %240, 2 - br i1 %241, label %242, label %290 - -242: ; preds = %239 - store i32 0, i32* %28, align 4 - br label %243 - -243: ; preds = %283, %242 - %244 = load i32, i32* %28, align 4 - %245 = icmp slt i32 %244, 2 - br i1 %245, label %246, label %286 - -246: ; preds = %243 - %247 = load i32, i32* %27, align 4 - %248 = load i32, i32* %10, align 4 - %249 = icmp slt i32 %247, %248 - br i1 %249, label %254, label %250 - -250: ; preds = %246 - %251 = load i32, i32* %28, align 4 - %252 = load i32, i32* %10, align 4 - %253 = icmp slt i32 %251, %252 - br i1 %253, label %254, label %260 - -254: ; preds = %250, %246 - %255 = load i32, i32* %27, align 4 - %256 = load i32, i32* %28, align 4 - %257 = icmp eq i32 %255, %256 - %258 = zext i1 %257 to i64 - %259 = select i1 %257, float 1.000000e+00, float 0.000000e+00 - store float %259, float* %29, align 4 - br label %274 - -260: ; preds = %250 - %261 = load float*, float** %22, align 8 - %262 = load i32, i32* %27, align 4 - %263 = load i32, i32* %10, align 4 - %264 = sub nsw i32 %262, %263 - %265 = load i32, i32* %11, align 4 - %266 = mul nsw i32 %264, %265 - %267 = load i32, i32* %28, align 4 - %268 = load i32, i32* %10, align 4 - %269 = sub nsw i32 %267, %268 - %270 = add nsw i32 %266, %269 - %271 = sext i32 %270 to i64 - %272 = getelementptr inbounds float, float* %261, i64 %271 - %273 = load float, float* %272, align 4 - store float %273, float* %29, align 4 - br label %274 - -274: ; preds = %260, %254 - %275 = load float, float* %29, align 4 - %276 = load float*, float** %26, align 8 - %277 = load i32, i32* %27, align 4 - %278 = mul nsw i32 %277, 2 - %279 = load i32, i32* %28, align 4 - %280 = add nsw i32 %278, %279 - %281 = sext i32 %280 to i64 - %282 = getelementptr inbounds float, float* %276, i64 %281 - store float %275, float* %282, align 4 - br label %283 - -283: ; preds = %274 - %284 = load i32, i32* %28, align 4 - %285 = add nsw i32 %284, 1 - store i32 %285, i32* %28, align 4 - br label %243 - -286: ; preds = %243 - br label %287 - -287: ; preds = %286 - %288 = load i32, i32* %27, align 4 - %289 = add nsw i32 %288, 1 - store i32 %289, i32* %27, align 4 - br label %239 - -290: ; preds = %239 - %291 = load i32, i32* %10, align 4 - %292 = icmp eq i32 %291, 0 - br i1 %292, label %293, label %305 - -293: ; preds = %290 - %294 = load float*, float** %5, align 8 - %295 = bitcast float* %294 to i8* - %296 = load float*, float** %26, align 8 - %297 = bitcast float* %296 to i8* - %298 = load float*, float** %5, align 8 - %299 = bitcast float* %298 to i8* - %300 = call i64 @llvm.objectsize.i64.p0i8(i8* %299, i1 false, i1 true, i1 false) - %301 = call i8* @__memcpy_chk(i8* %295, i8* %297, i64 16, i64 %300) #7 - %302 = load float*, float** %26, align 8 - %303 = load float*, float** %4, align 8 - %304 = load float*, float** %6, align 8 - call void @no_opt_naive_fixed_matrix_multiply(float* %302, float* %303, float* %304) - br label %330 - -305: ; preds = %290 - %306 = call i8* @calloc(i64 4, i64 4) #8 - %307 = bitcast i8* %306 to float* - store float* %307, float** %30, align 8 - %308 = load float*, float** %26, align 8 - %309 = load float*, float** %5, align 8 - %310 = load float*, float** %30, align 8 - call void @no_opt_naive_fixed_matrix_multiply(float* %308, float* %309, float* %310) - %311 = load float*, float** %5, align 8 - %312 = bitcast float* %311 to i8* - %313 = load float*, float** %30, align 8 - %314 = bitcast float* %313 to i8* - %315 = load float*, float** %5, align 8 - %316 = bitcast float* %315 to i8* - %317 = call i64 @llvm.objectsize.i64.p0i8(i8* %316, i1 false, i1 true, i1 false) - %318 = call i8* @__memcpy_chk(i8* %312, i8* %314, i64 16, i64 %317) #7 - %319 = load float*, float** %26, align 8 - %320 = load float*, float** %6, align 8 - %321 = load float*, float** %30, align 8 - call void @no_opt_naive_fixed_matrix_multiply(float* %319, float* %320, float* %321) - %322 = load float*, float** %6, align 8 - %323 = bitcast float* %322 to i8* - %324 = load float*, float** %30, align 8 - %325 = bitcast float* %324 to i8* - %326 = load float*, float** %6, align 8 - %327 = bitcast float* %326 to i8* - %328 = call i64 @llvm.objectsize.i64.p0i8(i8* %327, i1 false, i1 true, i1 false) - %329 = call i8* @__memcpy_chk(i8* %323, i8* %325, i64 16, i64 %328) #7 - br label %330 - -330: ; preds = %305, %293 - %331 = load float*, float** %12, align 8 - %332 = bitcast float* %331 to i8* - call void @free(i8* %332) - %333 = load float*, float** %13, align 8 - %334 = bitcast float* %333 to i8* - call void @free(i8* %334) - %335 = load float*, float** %17, align 8 - %336 = bitcast float* %335 to i8* - call void @free(i8* %336) - %337 = load float*, float** %18, align 8 - %338 = bitcast float* %337 to i8* - call void @free(i8* %338) - %339 = load float*, float** %22, align 8 - %340 = bitcast float* %339 to i8* - call void @free(i8* %340) - %341 = load float*, float** %26, align 8 - %342 = bitcast float* %341 to i8* - call void @free(i8* %342) - br label %343 - -343: ; preds = %330 - %344 = load i32, i32* %10, align 4 - %345 = add nsw i32 %344, 1 - store i32 %345, i32* %10, align 4 - br label %69 - -346: ; preds = %69 - %347 = load float*, float** %5, align 8 - call void @no_opt_naive_fixed_transpose(float* %347) - ret void -} - -; Function Attrs: nounwind -declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 - -; Function Attrs: nounwind readnone speculatable willreturn -declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 - -; Function Attrs: allocsize(0,1) -declare i8* @calloc(i64, i64) #5 - -declare void @free(i8*) #6 - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #1 { - %1 = alloca i32, align 4 - %2 = alloca i64, align 8 - %3 = alloca [4 x float], align 16 - %4 = alloca i32, align 4 - %5 = alloca [4 x float], align 16 - %6 = alloca [4 x float], align 16 - %7 = alloca [4 x float], align 16 - %8 = alloca [4 x float], align 16 - %9 = alloca i32, align 4 - %10 = alloca i32, align 4 - %11 = alloca i32, align 4 - %12 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - %13 = call i64 @time(i64* null) - store i64 %13, i64* %2, align 8 - %14 = call i64 @time(i64* %2) - %15 = trunc i64 %14 to i32 - call void @srand(i32 %15) - %16 = bitcast [4 x float]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %16, i8 0, i64 16, i1 false) - store i32 0, i32* %4, align 4 - br label %17 - -17: ; preds = %33, %0 - %18 = load i32, i32* %4, align 4 - %19 = icmp slt i32 %18, 4 - br i1 %19, label %20, label %36 - -20: ; preds = %17 - %21 = call i32 @rand() - %22 = sitofp i32 %21 to float - %23 = fdiv float %22, 0x41747AE140000000 - %24 = load i32, i32* %4, align 4 - %25 = sext i32 %24 to i64 - %26 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 %25 - store float %23, float* %26, align 4 - %27 = load i32, i32* %4, align 4 - %28 = sext i32 %27 to i64 - %29 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 %28 - %30 = load float, float* %29, align 4 - %31 = fpext float %30 to double - %32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %31) - br label %33 - -33: ; preds = %20 - %34 = load i32, i32* %4, align 4 - %35 = add nsw i32 %34, 1 - store i32 %35, i32* %4, align 4 - br label %17 - -36: ; preds = %17 - %37 = bitcast [4 x float]* %5 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %37, i8 0, i64 16, i1 false) - %38 = bitcast [4 x float]* %6 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %38, i8 0, i64 16, i1 false) - %39 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - %40 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 - %41 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - call void @naive_fixed_qr_decomp(float* %39, float* %40, float* %41) - %42 = bitcast [4 x float]* %7 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %42, i8 0, i64 16, i1 false) - %43 = bitcast [4 x float]* %8 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %43, i8 0, i64 16, i1 false) - %44 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - %45 = getelementptr inbounds [4 x float], [4 x float]* %7, i64 0, i64 0 - %46 = getelementptr inbounds [4 x float], [4 x float]* %8, i64 0, i64 0 - call void @no_opt_naive_fixed_qr_decomp(float* %44, float* %45, float* %46) - store i32 0, i32* %9, align 4 - br label %47 - -47: ; preds = %77, %36 - %48 = load i32, i32* %9, align 4 - %49 = icmp slt i32 %48, 2 - br i1 %49, label %50, label %80 - -50: ; preds = %47 - store i32 0, i32* %10, align 4 - br label %51 - -51: ; preds = %73, %50 - %52 = load i32, i32* %10, align 4 - %53 = icmp slt i32 %52, 2 - br i1 %53, label %54, label %76 - -54: ; preds = %51 - %55 = load i32, i32* %9, align 4 - %56 = mul nsw i32 %55, 2 - %57 = load i32, i32* %10, align 4 - %58 = add nsw i32 %56, %57 - %59 = sext i32 %58 to i64 - %60 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 %59 - %61 = load float, float* %60, align 4 - %62 = fpext float %61 to double - %63 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %62) - %64 = load i32, i32* %9, align 4 - %65 = mul nsw i32 %64, 2 - %66 = load i32, i32* %10, align 4 - %67 = add nsw i32 %65, %66 - %68 = sext i32 %67 to i64 - %69 = getelementptr inbounds [4 x float], [4 x float]* %7, i64 0, i64 %68 - %70 = load float, float* %69, align 4 - %71 = fpext float %70 to double - %72 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %71) - br label %73 - -73: ; preds = %54 - %74 = load i32, i32* %10, align 4 - %75 = add nsw i32 %74, 1 - store i32 %75, i32* %10, align 4 - br label %51 - -76: ; preds = %51 - br label %77 - -77: ; preds = %76 - %78 = load i32, i32* %9, align 4 - %79 = add nsw i32 %78, 1 - store i32 %79, i32* %9, align 4 - br label %47 - -80: ; preds = %47 - store i32 0, i32* %11, align 4 - br label %81 - -81: ; preds = %111, %80 - %82 = load i32, i32* %11, align 4 - %83 = icmp slt i32 %82, 2 - br i1 %83, label %84, label %114 - -84: ; preds = %81 - store i32 0, i32* %12, align 4 - br label %85 - -85: ; preds = %107, %84 - %86 = load i32, i32* %12, align 4 - %87 = icmp slt i32 %86, 2 - br i1 %87, label %88, label %110 - -88: ; preds = %85 - %89 = load i32, i32* %11, align 4 - %90 = mul nsw i32 %89, 2 - %91 = load i32, i32* %12, align 4 - %92 = add nsw i32 %90, %91 - %93 = sext i32 %92 to i64 - %94 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 %93 - %95 = load float, float* %94, align 4 - %96 = fpext float %95 to double - %97 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %96) - %98 = load i32, i32* %11, align 4 - %99 = mul nsw i32 %98, 2 - %100 = load i32, i32* %12, align 4 - %101 = add nsw i32 %99, %100 - %102 = sext i32 %101 to i64 - %103 = getelementptr inbounds [4 x float], [4 x float]* %8, i64 0, i64 %102 - %104 = load float, float* %103, align 4 - %105 = fpext float %104 to double - %106 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %105) - br label %107 - -107: ; preds = %88 - %108 = load i32, i32* %12, align 4 - %109 = add nsw i32 %108, 1 - store i32 %109, i32* %12, align 4 - br label %85 - -110: ; preds = %85 - br label %111 - -111: ; preds = %110 - %112 = load i32, i32* %11, align 4 - %113 = add nsw i32 %112, 1 - store i32 %113, i32* %11, align 4 - br label %81 - -114: ; preds = %81 - %115 = load i32, i32* %1, align 4 - ret i32 %115 -} - -declare i64 @time(i64*) #6 - -declare void @srand(i32) #6 - -declare i32 @rand() #6 - -declare i32 @printf(i8*, ...) #6 - -attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone speculatable willreturn } -attributes #3 = { argmemonly nounwind willreturn writeonly } -attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { nounwind } -attributes #8 = { allocsize(0,1) } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/failed-test/dce.ll b/src/dios-egraphs/Diospyros/failed-test/dce.ll deleted file mode 100644 index 19512d60..00000000 --- a/src/dios-egraphs/Diospyros/failed-test/dce.ll +++ /dev/null @@ -1,4711 +0,0 @@ -; ModuleID = 'build/diospyros.ll' -source_filename = "fail-tests/qr-decomp-local-arrays.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1 -@.str.1 = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 -@.str.2 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 -@.str.3 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 -@.str.4 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @sgn(float %0) #0 { - %2 = fcmp ogt float %0, 0.000000e+00 - %3 = zext i1 %2 to i32 - %4 = fcmp olt float %0, 0.000000e+00 - %.neg = sext i1 %4 to i32 - %5 = add nsw i32 %.neg, %3 - %6 = sitofp i32 %5 to float - ret float %6 -} - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_sgn(float %0) #1 { - %2 = fcmp ogt float %0, 0.000000e+00 - %3 = zext i1 %2 to i32 - %4 = fcmp olt float %0, 0.000000e+00 - %.neg = sext i1 %4 to i32 - %5 = add nsw i32 %.neg, %3 - %6 = sitofp i32 %5 to float - ret float %6 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @naive_norm(float* %0, i32 %1) #0 { - %3 = icmp sgt i32 %1, 0 - %smax = select i1 %3, i32 %1, i32 0 - %wide.trip.count = zext i32 %smax to i64 - br i1 %3, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %2 - %4 = add nsw i64 %wide.trip.count, -1 - %xtraiter = and i64 %wide.trip.count, 3 - %5 = icmp ult i64 %4, 3 - br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count, 2147483644 - br label %6 - -6: ; preds = %6, %.lr.ph.new - %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] - %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 - %8 = load float, float* %7, align 4 - %9 = fmul float %8, %8 - %10 = fadd float %.013, %9 - %indvars.iv.next = or i64 %indvars.iv2, 1 - %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next - %12 = load float, float* %11, align 4 - %13 = fmul float %12, %12 - %14 = fadd float %10, %13 - %indvars.iv.next.1 = or i64 %indvars.iv2, 2 - %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 - %16 = load float, float* %15, align 4 - %17 = fmul float %16, %16 - %18 = fadd float %14, %17 - %indvars.iv.next.2 = or i64 %indvars.iv2, 3 - %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 - %20 = load float, float* %19, align 4 - %21 = fmul float %20, %20 - %22 = fadd float %18, %21 - %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 - %niter.nsub.3 = add i64 %niter, -4 - %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 - br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 - -._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] - %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] - %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] - %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil - %24 = load float, float* %23, align 4 - %25 = fmul float %24, %24 - %26 = fadd float %.013.epil, %25 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !3 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 - %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] - %27 = call float @llvm.sqrt.f32(float %.01.lcssa) - ret float %27 -} - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32(float) #2 - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_naive_norm(float* %0, i32 %1) #1 { - %3 = icmp sgt i32 %1, 0 - %smax = select i1 %3, i32 %1, i32 0 - %wide.trip.count = zext i32 %smax to i64 - br i1 %3, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %2 - %4 = add nsw i64 %wide.trip.count, -1 - %xtraiter = and i64 %wide.trip.count, 3 - %5 = icmp ult i64 %4, 3 - br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count, 2147483644 - br label %6 - -6: ; preds = %6, %.lr.ph.new - %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] - %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 - %8 = load float, float* %7, align 4 - %9 = fmul float %8, %8 - %10 = fadd float %.013, %9 - %indvars.iv.next = or i64 %indvars.iv2, 1 - %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next - %12 = load float, float* %11, align 4 - %13 = fmul float %12, %12 - %14 = fadd float %10, %13 - %indvars.iv.next.1 = or i64 %indvars.iv2, 2 - %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 - %16 = load float, float* %15, align 4 - %17 = fmul float %16, %16 - %18 = fadd float %14, %17 - %indvars.iv.next.2 = or i64 %indvars.iv2, 3 - %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 - %20 = load float, float* %19, align 4 - %21 = fmul float %20, %20 - %22 = fadd float %18, %21 - %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 - %niter.nsub.3 = add i64 %niter, -4 - %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 - br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 - -._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] - %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] - %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] - %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil - %24 = load float, float* %23, align 4 - %25 = fmul float %24, %24 - %26 = fadd float %.013.epil, %25 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 - %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] - %27 = call float @llvm.sqrt.f32(float %.01.lcssa) - ret float %27 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_transpose(float* %0) #0 { -.lr.ph: - %1 = getelementptr inbounds float, float* %0, i64 1 - %2 = bitcast float* %1 to i32* - %3 = load i32, i32* %2, align 4 - %4 = getelementptr inbounds float, float* %0, i64 2 - %5 = bitcast float* %4 to i32* - %6 = load i32, i32* %5, align 4 - store i32 %6, i32* %2, align 4 - store i32 %3, i32* %5, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_transpose(float* %0) #1 { -.lr.ph: - %1 = getelementptr inbounds float, float* %0, i64 1 - %2 = bitcast float* %1 to i32* - %3 = load i32, i32* %2, align 4 - %4 = getelementptr inbounds float, float* %0, i64 2 - %5 = bitcast float* %4 to i32* - %6 = load i32, i32* %5, align 4 - store i32 %6, i32* %2, align 4 - store i32 %3, i32* %5, align 4 - ret void -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { -.preheader: - store float 0.000000e+00, float* %2, align 4 - %3 = getelementptr float, float* %0, i32 0 - %4 = load float, float* %3, align 4 - %5 = insertelement <4 x float> zeroinitializer, float %4, i32 0 - %6 = insertelement <4 x float> %5, float 0.000000e+00, i32 1 - %7 = insertelement <4 x float> %6, float 0.000000e+00, i32 2 - %8 = insertelement <4 x float> %7, float 0.000000e+00, i32 3 - %9 = getelementptr float, float* %1, i32 0 - %10 = load float, float* %9, align 4 - %11 = insertelement <4 x float> zeroinitializer, float %10, i32 0 - %12 = insertelement <4 x float> %11, float 0.000000e+00, i32 1 - %13 = insertelement <4 x float> %12, float 0.000000e+00, i32 2 - %14 = insertelement <4 x float> %13, float 0.000000e+00, i32 3 - %15 = call <4 x float> @llvm.fma.v4f32(<4 x float> %8, <4 x float> %14, <4 x float> zeroinitializer) - %16 = extractelement <4 x float> %15, i32 0 - store float %16, float* %2, align 4 - %17 = getelementptr float, float* %0, i32 0 - %18 = load float, float* %17, align 4 - %19 = insertelement <4 x float> zeroinitializer, float %18, i32 0 - %20 = insertelement <4 x float> %19, float 1.000000e+00, i32 1 - %21 = insertelement <4 x float> %20, float 1.000000e+00, i32 2 - %22 = insertelement <4 x float> %21, float 1.000000e+00, i32 3 - %23 = insertelement <4 x float> zeroinitializer, float %10, i32 0 - %24 = insertelement <4 x float> %23, float 0.000000e+00, i32 1 - %25 = insertelement <4 x float> %24, float 0.000000e+00, i32 2 - %26 = insertelement <4 x float> %25, float 0.000000e+00, i32 3 - %27 = fmul <4 x float> %22, %26 - %28 = fadd <4 x float> %27, zeroinitializer - %29 = getelementptr float, float* %0, i32 0 - %30 = getelementptr inbounds float, float* %29, i64 1 - %31 = load float, float* %30, align 4 - %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 - %33 = insertelement <4 x float> %32, float 0.000000e+00, i32 1 - %34 = insertelement <4 x float> %33, float 0.000000e+00, i32 2 - %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 3 - %36 = getelementptr float, float* %1, i32 0 - %37 = getelementptr inbounds float, float* %36, i64 2 - %38 = load float, float* %37, align 4 - %39 = insertelement <4 x float> zeroinitializer, float %38, i32 0 - %40 = insertelement <4 x float> %39, float 0.000000e+00, i32 1 - %41 = insertelement <4 x float> %40, float 0.000000e+00, i32 2 - %42 = insertelement <4 x float> %41, float 0.000000e+00, i32 3 - %43 = call <4 x float> @llvm.fma.v4f32(<4 x float> %35, <4 x float> %42, <4 x float> %28) - %44 = extractelement <4 x float> %43, i32 0 - store float %44, float* %2, align 4 - %45 = extractelement <4 x float> %43, i32 1 - %46 = getelementptr float, float* %2, i32 0 - %47 = getelementptr inbounds float, float* %46, i64 1 - store float %45, float* %47, align 4 - %48 = getelementptr float, float* %0, i32 0 - %49 = load float, float* %48, align 4 - %50 = insertelement <4 x float> zeroinitializer, float %49, i32 0 - %51 = insertelement <4 x float> %50, float 0.000000e+00, i32 1 - %52 = insertelement <4 x float> %51, float 0.000000e+00, i32 2 - %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 3 - %54 = getelementptr float, float* %1, i32 0 - %55 = getelementptr inbounds float, float* %54, i64 1 - %56 = load float, float* %55, align 4 - %57 = insertelement <4 x float> zeroinitializer, float %56, i32 0 - %58 = insertelement <4 x float> %57, float 0.000000e+00, i32 1 - %59 = insertelement <4 x float> %58, float 0.000000e+00, i32 2 - %60 = insertelement <4 x float> %59, float 0.000000e+00, i32 3 - %61 = call <4 x float> @llvm.fma.v4f32(<4 x float> %53, <4 x float> %60, <4 x float> zeroinitializer) - %62 = extractelement <4 x float> %61, i32 0 - %63 = getelementptr float, float* %2, i32 0 - %64 = getelementptr inbounds float, float* %63, i64 1 - store float %62, float* %64, align 4 - %65 = insertelement <4 x float> zeroinitializer, float %49, i32 0 - %66 = insertelement <4 x float> %65, float 1.000000e+00, i32 1 - %67 = insertelement <4 x float> %66, float 1.000000e+00, i32 2 - %68 = insertelement <4 x float> %67, float 1.000000e+00, i32 3 - %69 = insertelement <4 x float> zeroinitializer, float %56, i32 0 - %70 = insertelement <4 x float> %69, float 0.000000e+00, i32 1 - %71 = insertelement <4 x float> %70, float 0.000000e+00, i32 2 - %72 = insertelement <4 x float> %71, float 0.000000e+00, i32 3 - %73 = fmul <4 x float> %68, %72 - %74 = fadd <4 x float> %73, zeroinitializer - %75 = getelementptr float, float* %0, i32 0 - %76 = getelementptr inbounds float, float* %75, i64 1 - %77 = load float, float* %76, align 4 - %78 = insertelement <4 x float> zeroinitializer, float %77, i32 0 - %79 = insertelement <4 x float> %78, float 0.000000e+00, i32 1 - %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 2 - %81 = insertelement <4 x float> %80, float 0.000000e+00, i32 3 - %82 = getelementptr float, float* %1, i32 0 - %83 = getelementptr inbounds float, float* %82, i64 3 - %84 = load float, float* %83, align 4 - %85 = insertelement <4 x float> zeroinitializer, float %84, i32 0 - %86 = insertelement <4 x float> %85, float 0.000000e+00, i32 1 - %87 = insertelement <4 x float> %86, float 0.000000e+00, i32 2 - %88 = insertelement <4 x float> %87, float 0.000000e+00, i32 3 - %89 = call <4 x float> @llvm.fma.v4f32(<4 x float> %81, <4 x float> %88, <4 x float> %74) - %90 = extractelement <4 x float> %89, i32 0 - %91 = getelementptr float, float* %2, i32 0 - %92 = getelementptr inbounds float, float* %91, i64 1 - store float %90, float* %92, align 4 - %93 = extractelement <4 x float> %89, i32 1 - %94 = getelementptr float, float* %2, i32 0 - %95 = getelementptr inbounds float, float* %94, i64 2 - store float %93, float* %95, align 4 - %96 = getelementptr float, float* %0, i32 0 - %97 = getelementptr inbounds float, float* %96, i64 2 - %98 = load float, float* %97, align 4 - %99 = insertelement <4 x float> zeroinitializer, float %98, i32 0 - %100 = insertelement <4 x float> %99, float 0.000000e+00, i32 1 - %101 = insertelement <4 x float> %100, float 0.000000e+00, i32 2 - %102 = insertelement <4 x float> %101, float 0.000000e+00, i32 3 - %103 = getelementptr float, float* %1, i32 0 - %104 = load float, float* %103, align 4 - %105 = insertelement <4 x float> zeroinitializer, float %104, i32 0 - %106 = insertelement <4 x float> %105, float 0.000000e+00, i32 1 - %107 = insertelement <4 x float> %106, float 0.000000e+00, i32 2 - %108 = insertelement <4 x float> %107, float 0.000000e+00, i32 3 - %109 = call <4 x float> @llvm.fma.v4f32(<4 x float> %102, <4 x float> %108, <4 x float> zeroinitializer) - %110 = extractelement <4 x float> %109, i32 0 - %111 = getelementptr float, float* %2, i32 0 - %112 = getelementptr inbounds float, float* %111, i64 2 - store float %110, float* %112, align 4 - %113 = insertelement <4 x float> zeroinitializer, float %98, i32 0 - %114 = insertelement <4 x float> %113, float 1.000000e+00, i32 1 - %115 = insertelement <4 x float> %114, float 1.000000e+00, i32 2 - %116 = insertelement <4 x float> %115, float 1.000000e+00, i32 3 - %117 = insertelement <4 x float> zeroinitializer, float %104, i32 0 - %118 = insertelement <4 x float> %117, float 0.000000e+00, i32 1 - %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 2 - %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 3 - %121 = fmul <4 x float> %116, %120 - %122 = fadd <4 x float> %121, zeroinitializer - %123 = getelementptr float, float* %0, i32 0 - %124 = getelementptr inbounds float, float* %123, i64 3 - %125 = load float, float* %124, align 4 - %126 = insertelement <4 x float> zeroinitializer, float %125, i32 0 - %127 = insertelement <4 x float> %126, float 0.000000e+00, i32 1 - %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 2 - %129 = insertelement <4 x float> %128, float 0.000000e+00, i32 3 - %130 = getelementptr float, float* %1, i32 0 - %131 = getelementptr inbounds float, float* %130, i64 2 - %132 = load float, float* %131, align 4 - %133 = insertelement <4 x float> zeroinitializer, float %132, i32 0 - %134 = insertelement <4 x float> %133, float 0.000000e+00, i32 1 - %135 = insertelement <4 x float> %134, float 0.000000e+00, i32 2 - %136 = insertelement <4 x float> %135, float 0.000000e+00, i32 3 - %137 = call <4 x float> @llvm.fma.v4f32(<4 x float> %129, <4 x float> %136, <4 x float> %122) - %138 = extractelement <4 x float> %137, i32 0 - %139 = getelementptr float, float* %2, i32 0 - %140 = getelementptr inbounds float, float* %139, i64 2 - store float %138, float* %140, align 4 - %141 = extractelement <4 x float> %137, i32 1 - %142 = getelementptr float, float* %2, i32 0 - %143 = getelementptr inbounds float, float* %142, i64 3 - store float %141, float* %143, align 4 - %144 = getelementptr float, float* %0, i32 0 - %145 = getelementptr inbounds float, float* %144, i64 2 - %146 = load float, float* %145, align 4 - %147 = insertelement <4 x float> zeroinitializer, float %146, i32 0 - %148 = insertelement <4 x float> %147, float 0.000000e+00, i32 1 - %149 = insertelement <4 x float> %148, float 0.000000e+00, i32 2 - %150 = insertelement <4 x float> %149, float 0.000000e+00, i32 3 - %151 = getelementptr float, float* %1, i32 0 - %152 = getelementptr inbounds float, float* %151, i64 1 - %153 = load float, float* %152, align 4 - %154 = insertelement <4 x float> zeroinitializer, float %153, i32 0 - %155 = insertelement <4 x float> %154, float 0.000000e+00, i32 1 - %156 = insertelement <4 x float> %155, float 0.000000e+00, i32 2 - %157 = insertelement <4 x float> %156, float 0.000000e+00, i32 3 - %158 = call <4 x float> @llvm.fma.v4f32(<4 x float> %150, <4 x float> %157, <4 x float> zeroinitializer) - %159 = extractelement <4 x float> %158, i32 0 - %160 = getelementptr float, float* %2, i32 0 - %161 = getelementptr inbounds float, float* %160, i64 3 - store float %159, float* %161, align 4 - %162 = insertelement <4 x float> zeroinitializer, float %146, i32 0 - %163 = insertelement <4 x float> %162, float 1.000000e+00, i32 1 - %164 = insertelement <4 x float> %163, float 1.000000e+00, i32 2 - %165 = insertelement <4 x float> %164, float 1.000000e+00, i32 3 - %166 = insertelement <4 x float> zeroinitializer, float %153, i32 0 - %167 = insertelement <4 x float> %166, float 0.000000e+00, i32 1 - %168 = insertelement <4 x float> %167, float 0.000000e+00, i32 2 - %169 = insertelement <4 x float> %168, float 0.000000e+00, i32 3 - %170 = fmul <4 x float> %165, %169 - %171 = fadd <4 x float> %170, zeroinitializer - %172 = getelementptr float, float* %0, i32 0 - %173 = getelementptr inbounds float, float* %172, i64 3 - %174 = load float, float* %173, align 4 - %175 = insertelement <4 x float> zeroinitializer, float %174, i32 0 - %176 = insertelement <4 x float> %175, float 0.000000e+00, i32 1 - %177 = insertelement <4 x float> %176, float 0.000000e+00, i32 2 - %178 = insertelement <4 x float> %177, float 0.000000e+00, i32 3 - %179 = getelementptr float, float* %1, i32 0 - %180 = getelementptr inbounds float, float* %179, i64 3 - %181 = load float, float* %180, align 4 - %182 = insertelement <4 x float> zeroinitializer, float %181, i32 0 - %183 = insertelement <4 x float> %182, float 0.000000e+00, i32 1 - %184 = insertelement <4 x float> %183, float 0.000000e+00, i32 2 - %185 = insertelement <4 x float> %184, float 0.000000e+00, i32 3 - %186 = call <4 x float> @llvm.fma.v4f32(<4 x float> %178, <4 x float> %185, <4 x float> %171) - %187 = extractelement <4 x float> %186, i32 0 - %188 = getelementptr float, float* %2, i32 0 - %189 = getelementptr inbounds float, float* %188, i64 3 - store float %187, float* %189, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { -.preheader: - store float 0.000000e+00, float* %2, align 4 - %3 = load float, float* %0, align 4 - %4 = load float, float* %1, align 4 - %5 = fmul float %3, %4 - %6 = fadd float %5, 0.000000e+00 - store float %6, float* %2, align 4 - %7 = getelementptr inbounds float, float* %0, i64 1 - %8 = load float, float* %7, align 4 - %9 = getelementptr inbounds float, float* %1, i64 2 - %10 = load float, float* %9, align 4 - %11 = fmul float %8, %10 - %12 = fadd float %6, %11 - store float %12, float* %2, align 4 - %13 = getelementptr inbounds float, float* %2, i64 1 - store float 0.000000e+00, float* %13, align 4 - %14 = load float, float* %0, align 4 - %15 = getelementptr inbounds float, float* %1, i64 1 - %16 = load float, float* %15, align 4 - %17 = fmul float %14, %16 - %18 = fadd float %17, 0.000000e+00 - store float %18, float* %13, align 4 - %19 = load float, float* %7, align 4 - %20 = getelementptr inbounds float, float* %1, i64 3 - %21 = load float, float* %20, align 4 - %22 = fmul float %19, %21 - %23 = fadd float %18, %22 - store float %23, float* %13, align 4 - %24 = getelementptr inbounds float, float* %0, i64 2 - %25 = getelementptr inbounds float, float* %2, i64 2 - store float 0.000000e+00, float* %25, align 4 - %26 = load float, float* %24, align 4 - %27 = load float, float* %1, align 4 - %28 = fmul float %26, %27 - %29 = fadd float %28, 0.000000e+00 - store float %29, float* %25, align 4 - %30 = getelementptr inbounds float, float* %0, i64 3 - %31 = load float, float* %30, align 4 - %32 = load float, float* %9, align 4 - %33 = fmul float %31, %32 - %34 = fadd float %29, %33 - store float %34, float* %25, align 4 - %35 = getelementptr inbounds float, float* %2, i64 3 - store float 0.000000e+00, float* %35, align 4 - %36 = load float, float* %24, align 4 - %37 = load float, float* %15, align 4 - %38 = fmul float %36, %37 - %39 = fadd float %38, 0.000000e+00 - store float %39, float* %35, align 4 - %40 = load float, float* %30, align 4 - %41 = load float, float* %20, align 4 - %42 = fmul float %40, %41 - %43 = fadd float %39, %42 - store float %43, float* %35, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { -.preheader49: - %3 = getelementptr float, float* %0, i32 0 - %4 = bitcast float* %3 to i32* - %5 = load i32, i32* %4, align 4 - %6 = bitcast i32 %5 to float - %7 = insertelement <4 x float> zeroinitializer, float %6, i32 0 - %8 = insertelement <4 x float> %7, float 0.000000e+00, i32 1 - %9 = insertelement <4 x float> %8, float 0.000000e+00, i32 2 - %10 = insertelement <4 x float> %9, float 0.000000e+00, i32 3 - %11 = extractelement <4 x float> %10, i32 0 - %12 = getelementptr float, float* %2, i32 0 - %13 = bitcast float* %12 to i32* - %14 = bitcast i32* %13 to float* - store float %11, float* %14, align 4 - %15 = getelementptr float, float* %0, i32 0 - %16 = getelementptr inbounds float, float* %15, i64 1 - %17 = bitcast float* %16 to i32* - %18 = load i32, i32* %17, align 4 - %19 = bitcast i32 %18 to float - %20 = insertelement <4 x float> zeroinitializer, float %19, i32 0 - %21 = insertelement <4 x float> %20, float 0.000000e+00, i32 1 - %22 = insertelement <4 x float> %21, float 0.000000e+00, i32 2 - %23 = insertelement <4 x float> %22, float 0.000000e+00, i32 3 - %24 = extractelement <4 x float> %23, i32 0 - %25 = getelementptr float, float* %2, i32 0 - %26 = getelementptr inbounds float, float* %25, i64 1 - %27 = bitcast float* %26 to i32* - %28 = bitcast i32* %27 to float* - store float %24, float* %28, align 4 - %29 = getelementptr float, float* %0, i32 0 - %30 = getelementptr inbounds float, float* %29, i64 2 - %31 = bitcast float* %30 to i32* - %32 = load i32, i32* %31, align 4 - %33 = bitcast i32 %32 to float - %34 = insertelement <4 x float> zeroinitializer, float %33, i32 0 - %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 1 - %36 = insertelement <4 x float> %35, float 0.000000e+00, i32 2 - %37 = insertelement <4 x float> %36, float 0.000000e+00, i32 3 - %38 = extractelement <4 x float> %37, i32 0 - %39 = getelementptr float, float* %2, i32 0 - %40 = getelementptr inbounds float, float* %39, i64 2 - %41 = bitcast float* %40 to i32* - %42 = bitcast i32* %41 to float* - store float %38, float* %42, align 4 - %43 = getelementptr float, float* %0, i32 0 - %44 = getelementptr inbounds float, float* %43, i64 3 - %45 = bitcast float* %44 to i32* - %46 = load i32, i32* %45, align 4 - %47 = bitcast i32 %46 to float - %48 = fneg float %47 - %49 = insertelement <4 x float> zeroinitializer, float %48, i32 0 - %50 = bitcast i32 %5 to float - %51 = bitcast i32 %5 to float - %52 = fmul float %50, %51 - %53 = fadd float %52, 0.000000e+00 - %54 = bitcast i32 %32 to float - %55 = bitcast i32 %32 to float - %56 = fmul float %54, %55 - %57 = fadd float %53, %56 - %58 = call float @llvm.sqrt.f32(float %57) - %59 = bitcast i32 %5 to float - %60 = fcmp olt float %59, 0.000000e+00 - %61 = sext i1 %60 to i32 - %62 = bitcast i32 %5 to float - %63 = fcmp ogt float %62, 0.000000e+00 - %64 = zext i1 %63 to i32 - %65 = add nsw i32 %61, %64 - %66 = sitofp i32 %65 to float - %67 = fneg float %66 - %68 = fmul float %58, %67 - %69 = bitcast i32 %5 to float - %70 = fadd float %69, %68 - %71 = bitcast i32 %5 to float - %72 = bitcast i32 %5 to float - %73 = fmul float %71, %72 - %74 = fadd float %73, 0.000000e+00 - %75 = bitcast i32 %32 to float - %76 = bitcast i32 %32 to float - %77 = fmul float %75, %76 - %78 = fadd float %74, %77 - %79 = call float @llvm.sqrt.f32(float %78) - %80 = bitcast i32 %5 to float - %81 = fcmp olt float %80, 0.000000e+00 - %82 = sext i1 %81 to i32 - %83 = bitcast i32 %5 to float - %84 = fcmp ogt float %83, 0.000000e+00 - %85 = zext i1 %84 to i32 - %86 = add nsw i32 %82, %85 - %87 = sitofp i32 %86 to float - %88 = fneg float %87 - %89 = fmul float %79, %88 - %90 = bitcast i32 %5 to float - %91 = fadd float %90, %89 - %92 = bitcast i32 %5 to float - %93 = bitcast i32 %5 to float - %94 = fmul float %92, %93 - %95 = fadd float %94, 0.000000e+00 - %96 = bitcast i32 %32 to float - %97 = bitcast i32 %32 to float - %98 = fmul float %96, %97 - %99 = fadd float %95, %98 - %100 = call float @llvm.sqrt.f32(float %99) - %101 = bitcast i32 %5 to float - %102 = fcmp olt float %101, 0.000000e+00 - %103 = sext i1 %102 to i32 - %104 = bitcast i32 %5 to float - %105 = fcmp ogt float %104, 0.000000e+00 - %106 = zext i1 %105 to i32 - %107 = add nsw i32 %103, %106 - %108 = sitofp i32 %107 to float - %109 = fneg float %108 - %110 = fmul float %100, %109 - %111 = bitcast i32 %5 to float - %112 = fadd float %111, %110 - %113 = fmul float %91, %112 - %114 = fadd float %113, 0.000000e+00 - %115 = bitcast i32 %5 to float - %116 = bitcast i32 %5 to float - %117 = fmul float %115, %116 - %118 = fadd float %117, 0.000000e+00 - %119 = bitcast i32 %32 to float - %120 = bitcast i32 %32 to float - %121 = fmul float %119, %120 - %122 = fadd float %118, %121 - %123 = call float @llvm.sqrt.f32(float %122) - %124 = bitcast i32 %5 to float - %125 = fcmp olt float %124, 0.000000e+00 - %126 = sext i1 %125 to i32 - %127 = bitcast i32 %5 to float - %128 = fcmp ogt float %127, 0.000000e+00 - %129 = zext i1 %128 to i32 - %130 = add nsw i32 %126, %129 - %131 = sitofp i32 %130 to float - %132 = fneg float %131 - %133 = fmul float %123, %132 - %134 = fmul float %133, 0.000000e+00 - %135 = bitcast i32 %32 to float - %136 = fadd float %135, %134 - %137 = bitcast i32 %5 to float - %138 = bitcast i32 %5 to float - %139 = fmul float %137, %138 - %140 = fadd float %139, 0.000000e+00 - %141 = bitcast i32 %32 to float - %142 = bitcast i32 %32 to float - %143 = fmul float %141, %142 - %144 = fadd float %140, %143 - %145 = call float @llvm.sqrt.f32(float %144) - %146 = bitcast i32 %5 to float - %147 = fcmp olt float %146, 0.000000e+00 - %148 = sext i1 %147 to i32 - %149 = bitcast i32 %5 to float - %150 = fcmp ogt float %149, 0.000000e+00 - %151 = zext i1 %150 to i32 - %152 = add nsw i32 %148, %151 - %153 = sitofp i32 %152 to float - %154 = fneg float %153 - %155 = fmul float %145, %154 - %156 = fmul float %155, 0.000000e+00 - %157 = bitcast i32 %32 to float - %158 = fadd float %157, %156 - %159 = fmul float %136, %158 - %160 = fadd float %114, %159 - %161 = call float @llvm.sqrt.f32(float %160) - %162 = fadd float %161, 0.000000e+00 - %163 = fdiv float %70, %162 - %164 = fmul float %163, 2.000000e+00 - %165 = bitcast i32 %5 to float - %166 = bitcast i32 %5 to float - %167 = fmul float %165, %166 - %168 = fadd float %167, 0.000000e+00 - %169 = bitcast i32 %32 to float - %170 = bitcast i32 %32 to float - %171 = fmul float %169, %170 - %172 = fadd float %168, %171 - %173 = call float @llvm.sqrt.f32(float %172) - %174 = bitcast i32 %5 to float - %175 = fcmp olt float %174, 0.000000e+00 - %176 = sext i1 %175 to i32 - %177 = bitcast i32 %5 to float - %178 = fcmp ogt float %177, 0.000000e+00 - %179 = zext i1 %178 to i32 - %180 = add nsw i32 %176, %179 - %181 = sitofp i32 %180 to float - %182 = fneg float %181 - %183 = fmul float %173, %182 - %184 = bitcast i32 %5 to float - %185 = fadd float %184, %183 - %186 = bitcast i32 %5 to float - %187 = bitcast i32 %5 to float - %188 = fmul float %186, %187 - %189 = fadd float %188, 0.000000e+00 - %190 = bitcast i32 %32 to float - %191 = bitcast i32 %32 to float - %192 = fmul float %190, %191 - %193 = fadd float %189, %192 - %194 = call float @llvm.sqrt.f32(float %193) - %195 = bitcast i32 %5 to float - %196 = fcmp olt float %195, 0.000000e+00 - %197 = sext i1 %196 to i32 - %198 = bitcast i32 %5 to float - %199 = fcmp ogt float %198, 0.000000e+00 - %200 = zext i1 %199 to i32 - %201 = add nsw i32 %197, %200 - %202 = sitofp i32 %201 to float - %203 = fneg float %202 - %204 = fmul float %194, %203 - %205 = bitcast i32 %5 to float - %206 = fadd float %205, %204 - %207 = bitcast i32 %5 to float - %208 = bitcast i32 %5 to float - %209 = fmul float %207, %208 - %210 = fadd float %209, 0.000000e+00 - %211 = bitcast i32 %32 to float - %212 = bitcast i32 %32 to float - %213 = fmul float %211, %212 - %214 = fadd float %210, %213 - %215 = call float @llvm.sqrt.f32(float %214) - %216 = bitcast i32 %5 to float - %217 = fcmp olt float %216, 0.000000e+00 - %218 = sext i1 %217 to i32 - %219 = bitcast i32 %5 to float - %220 = fcmp ogt float %219, 0.000000e+00 - %221 = zext i1 %220 to i32 - %222 = add nsw i32 %218, %221 - %223 = sitofp i32 %222 to float - %224 = fneg float %223 - %225 = fmul float %215, %224 - %226 = bitcast i32 %5 to float - %227 = fadd float %226, %225 - %228 = fmul float %206, %227 - %229 = fadd float %228, 0.000000e+00 - %230 = bitcast i32 %5 to float - %231 = bitcast i32 %5 to float - %232 = fmul float %230, %231 - %233 = fadd float %232, 0.000000e+00 - %234 = bitcast i32 %32 to float - %235 = bitcast i32 %32 to float - %236 = fmul float %234, %235 - %237 = fadd float %233, %236 - %238 = call float @llvm.sqrt.f32(float %237) - %239 = bitcast i32 %5 to float - %240 = fcmp olt float %239, 0.000000e+00 - %241 = sext i1 %240 to i32 - %242 = bitcast i32 %5 to float - %243 = fcmp ogt float %242, 0.000000e+00 - %244 = zext i1 %243 to i32 - %245 = add nsw i32 %241, %244 - %246 = sitofp i32 %245 to float - %247 = fneg float %246 - %248 = fmul float %238, %247 - %249 = fmul float %248, 0.000000e+00 - %250 = bitcast i32 %32 to float - %251 = fadd float %250, %249 - %252 = bitcast i32 %5 to float - %253 = bitcast i32 %5 to float - %254 = fmul float %252, %253 - %255 = fadd float %254, 0.000000e+00 - %256 = bitcast i32 %32 to float - %257 = bitcast i32 %32 to float - %258 = fmul float %256, %257 - %259 = fadd float %255, %258 - %260 = call float @llvm.sqrt.f32(float %259) - %261 = bitcast i32 %5 to float - %262 = fcmp olt float %261, 0.000000e+00 - %263 = sext i1 %262 to i32 - %264 = bitcast i32 %5 to float - %265 = fcmp ogt float %264, 0.000000e+00 - %266 = zext i1 %265 to i32 - %267 = add nsw i32 %263, %266 - %268 = sitofp i32 %267 to float - %269 = fneg float %268 - %270 = fmul float %260, %269 - %271 = fmul float %270, 0.000000e+00 - %272 = bitcast i32 %32 to float - %273 = fadd float %272, %271 - %274 = fmul float %251, %273 - %275 = fadd float %229, %274 - %276 = call float @llvm.sqrt.f32(float %275) - %277 = fadd float %276, 0.000000e+00 - %278 = fdiv float %185, %277 - %279 = fmul float %164, %278 - %280 = insertelement <4 x float> %49, float %279, i32 1 - %281 = bitcast i32 %5 to float - %282 = bitcast i32 %5 to float - %283 = fmul float %281, %282 - %284 = fadd float %283, 0.000000e+00 - %285 = bitcast i32 %32 to float - %286 = bitcast i32 %32 to float - %287 = fmul float %285, %286 - %288 = fadd float %284, %287 - %289 = call float @llvm.sqrt.f32(float %288) - %290 = bitcast i32 %5 to float - %291 = fcmp olt float %290, 0.000000e+00 - %292 = sext i1 %291 to i32 - %293 = bitcast i32 %5 to float - %294 = fcmp ogt float %293, 0.000000e+00 - %295 = zext i1 %294 to i32 - %296 = add nsw i32 %292, %295 - %297 = sitofp i32 %296 to float - %298 = fneg float %297 - %299 = fmul float %289, %298 - %300 = bitcast i32 %5 to float - %301 = fadd float %300, %299 - %302 = bitcast i32 %5 to float - %303 = bitcast i32 %5 to float - %304 = fmul float %302, %303 - %305 = fadd float %304, 0.000000e+00 - %306 = bitcast i32 %32 to float - %307 = bitcast i32 %32 to float - %308 = fmul float %306, %307 - %309 = fadd float %305, %308 - %310 = call float @llvm.sqrt.f32(float %309) - %311 = bitcast i32 %5 to float - %312 = fcmp olt float %311, 0.000000e+00 - %313 = sext i1 %312 to i32 - %314 = bitcast i32 %5 to float - %315 = fcmp ogt float %314, 0.000000e+00 - %316 = zext i1 %315 to i32 - %317 = add nsw i32 %313, %316 - %318 = sitofp i32 %317 to float - %319 = fneg float %318 - %320 = fmul float %310, %319 - %321 = bitcast i32 %5 to float - %322 = fadd float %321, %320 - %323 = bitcast i32 %5 to float - %324 = bitcast i32 %5 to float - %325 = fmul float %323, %324 - %326 = fadd float %325, 0.000000e+00 - %327 = bitcast i32 %32 to float - %328 = bitcast i32 %32 to float - %329 = fmul float %327, %328 - %330 = fadd float %326, %329 - %331 = call float @llvm.sqrt.f32(float %330) - %332 = bitcast i32 %5 to float - %333 = fcmp olt float %332, 0.000000e+00 - %334 = sext i1 %333 to i32 - %335 = bitcast i32 %5 to float - %336 = fcmp ogt float %335, 0.000000e+00 - %337 = zext i1 %336 to i32 - %338 = add nsw i32 %334, %337 - %339 = sitofp i32 %338 to float - %340 = fneg float %339 - %341 = fmul float %331, %340 - %342 = bitcast i32 %5 to float - %343 = fadd float %342, %341 - %344 = fmul float %322, %343 - %345 = fadd float %344, 0.000000e+00 - %346 = bitcast i32 %5 to float - %347 = bitcast i32 %5 to float - %348 = fmul float %346, %347 - %349 = fadd float %348, 0.000000e+00 - %350 = bitcast i32 %32 to float - %351 = bitcast i32 %32 to float - %352 = fmul float %350, %351 - %353 = fadd float %349, %352 - %354 = call float @llvm.sqrt.f32(float %353) - %355 = bitcast i32 %5 to float - %356 = fcmp olt float %355, 0.000000e+00 - %357 = sext i1 %356 to i32 - %358 = bitcast i32 %5 to float - %359 = fcmp ogt float %358, 0.000000e+00 - %360 = zext i1 %359 to i32 - %361 = add nsw i32 %357, %360 - %362 = sitofp i32 %361 to float - %363 = fneg float %362 - %364 = fmul float %354, %363 - %365 = fmul float %364, 0.000000e+00 - %366 = bitcast i32 %32 to float - %367 = fadd float %366, %365 - %368 = bitcast i32 %5 to float - %369 = bitcast i32 %5 to float - %370 = fmul float %368, %369 - %371 = fadd float %370, 0.000000e+00 - %372 = bitcast i32 %32 to float - %373 = bitcast i32 %32 to float - %374 = fmul float %372, %373 - %375 = fadd float %371, %374 - %376 = call float @llvm.sqrt.f32(float %375) - %377 = bitcast i32 %5 to float - %378 = fcmp olt float %377, 0.000000e+00 - %379 = sext i1 %378 to i32 - %380 = bitcast i32 %5 to float - %381 = fcmp ogt float %380, 0.000000e+00 - %382 = zext i1 %381 to i32 - %383 = add nsw i32 %379, %382 - %384 = sitofp i32 %383 to float - %385 = fneg float %384 - %386 = fmul float %376, %385 - %387 = fmul float %386, 0.000000e+00 - %388 = bitcast i32 %32 to float - %389 = fadd float %388, %387 - %390 = fmul float %367, %389 - %391 = fadd float %345, %390 - %392 = call float @llvm.sqrt.f32(float %391) - %393 = fadd float %392, 0.000000e+00 - %394 = fdiv float %301, %393 - %395 = fmul float %394, 2.000000e+00 - %396 = bitcast i32 %5 to float - %397 = bitcast i32 %5 to float - %398 = fmul float %396, %397 - %399 = fadd float %398, 0.000000e+00 - %400 = bitcast i32 %32 to float - %401 = bitcast i32 %32 to float - %402 = fmul float %400, %401 - %403 = fadd float %399, %402 - %404 = call float @llvm.sqrt.f32(float %403) - %405 = bitcast i32 %5 to float - %406 = fcmp olt float %405, 0.000000e+00 - %407 = sext i1 %406 to i32 - %408 = bitcast i32 %5 to float - %409 = fcmp ogt float %408, 0.000000e+00 - %410 = zext i1 %409 to i32 - %411 = add nsw i32 %407, %410 - %412 = sitofp i32 %411 to float - %413 = fneg float %412 - %414 = fmul float %404, %413 - %415 = fmul float %414, 0.000000e+00 - %416 = bitcast i32 %32 to float - %417 = fadd float %416, %415 - %418 = bitcast i32 %5 to float - %419 = bitcast i32 %5 to float - %420 = fmul float %418, %419 - %421 = fadd float %420, 0.000000e+00 - %422 = bitcast i32 %32 to float - %423 = bitcast i32 %32 to float - %424 = fmul float %422, %423 - %425 = fadd float %421, %424 - %426 = call float @llvm.sqrt.f32(float %425) - %427 = bitcast i32 %5 to float - %428 = fcmp olt float %427, 0.000000e+00 - %429 = sext i1 %428 to i32 - %430 = bitcast i32 %5 to float - %431 = fcmp ogt float %430, 0.000000e+00 - %432 = zext i1 %431 to i32 - %433 = add nsw i32 %429, %432 - %434 = sitofp i32 %433 to float - %435 = fneg float %434 - %436 = fmul float %426, %435 - %437 = bitcast i32 %5 to float - %438 = fadd float %437, %436 - %439 = bitcast i32 %5 to float - %440 = bitcast i32 %5 to float - %441 = fmul float %439, %440 - %442 = fadd float %441, 0.000000e+00 - %443 = bitcast i32 %32 to float - %444 = bitcast i32 %32 to float - %445 = fmul float %443, %444 - %446 = fadd float %442, %445 - %447 = call float @llvm.sqrt.f32(float %446) - %448 = bitcast i32 %5 to float - %449 = fcmp olt float %448, 0.000000e+00 - %450 = sext i1 %449 to i32 - %451 = bitcast i32 %5 to float - %452 = fcmp ogt float %451, 0.000000e+00 - %453 = zext i1 %452 to i32 - %454 = add nsw i32 %450, %453 - %455 = sitofp i32 %454 to float - %456 = fneg float %455 - %457 = fmul float %447, %456 - %458 = bitcast i32 %5 to float - %459 = fadd float %458, %457 - %460 = fmul float %438, %459 - %461 = fadd float %460, 0.000000e+00 - %462 = bitcast i32 %5 to float - %463 = bitcast i32 %5 to float - %464 = fmul float %462, %463 - %465 = fadd float %464, 0.000000e+00 - %466 = bitcast i32 %32 to float - %467 = bitcast i32 %32 to float - %468 = fmul float %466, %467 - %469 = fadd float %465, %468 - %470 = call float @llvm.sqrt.f32(float %469) - %471 = bitcast i32 %5 to float - %472 = fcmp olt float %471, 0.000000e+00 - %473 = sext i1 %472 to i32 - %474 = bitcast i32 %5 to float - %475 = fcmp ogt float %474, 0.000000e+00 - %476 = zext i1 %475 to i32 - %477 = add nsw i32 %473, %476 - %478 = sitofp i32 %477 to float - %479 = fneg float %478 - %480 = fmul float %470, %479 - %481 = fmul float %480, 0.000000e+00 - %482 = bitcast i32 %32 to float - %483 = fadd float %482, %481 - %484 = bitcast i32 %5 to float - %485 = bitcast i32 %5 to float - %486 = fmul float %484, %485 - %487 = fadd float %486, 0.000000e+00 - %488 = bitcast i32 %32 to float - %489 = bitcast i32 %32 to float - %490 = fmul float %488, %489 - %491 = fadd float %487, %490 - %492 = call float @llvm.sqrt.f32(float %491) - %493 = bitcast i32 %5 to float - %494 = fcmp olt float %493, 0.000000e+00 - %495 = sext i1 %494 to i32 - %496 = bitcast i32 %5 to float - %497 = fcmp ogt float %496, 0.000000e+00 - %498 = zext i1 %497 to i32 - %499 = add nsw i32 %495, %498 - %500 = sitofp i32 %499 to float - %501 = fneg float %500 - %502 = fmul float %492, %501 - %503 = fmul float %502, 0.000000e+00 - %504 = bitcast i32 %32 to float - %505 = fadd float %504, %503 - %506 = fmul float %483, %505 - %507 = fadd float %461, %506 - %508 = call float @llvm.sqrt.f32(float %507) - %509 = fadd float %508, 0.000000e+00 - %510 = fdiv float %417, %509 - %511 = fmul float %395, %510 - %512 = insertelement <4 x float> %280, float %511, i32 2 - %513 = bitcast i32 %5 to float - %514 = bitcast i32 %5 to float - %515 = fmul float %513, %514 - %516 = fadd float %515, 0.000000e+00 - %517 = bitcast i32 %32 to float - %518 = bitcast i32 %32 to float - %519 = fmul float %517, %518 - %520 = fadd float %516, %519 - %521 = call float @llvm.sqrt.f32(float %520) - %522 = bitcast i32 %5 to float - %523 = fcmp olt float %522, 0.000000e+00 - %524 = sext i1 %523 to i32 - %525 = bitcast i32 %5 to float - %526 = fcmp ogt float %525, 0.000000e+00 - %527 = zext i1 %526 to i32 - %528 = add nsw i32 %524, %527 - %529 = sitofp i32 %528 to float - %530 = fneg float %529 - %531 = fmul float %521, %530 - %532 = fmul float %531, 0.000000e+00 - %533 = bitcast i32 %32 to float - %534 = fadd float %533, %532 - %535 = bitcast i32 %5 to float - %536 = bitcast i32 %5 to float - %537 = fmul float %535, %536 - %538 = fadd float %537, 0.000000e+00 - %539 = bitcast i32 %32 to float - %540 = bitcast i32 %32 to float - %541 = fmul float %539, %540 - %542 = fadd float %538, %541 - %543 = call float @llvm.sqrt.f32(float %542) - %544 = bitcast i32 %5 to float - %545 = fcmp olt float %544, 0.000000e+00 - %546 = sext i1 %545 to i32 - %547 = bitcast i32 %5 to float - %548 = fcmp ogt float %547, 0.000000e+00 - %549 = zext i1 %548 to i32 - %550 = add nsw i32 %546, %549 - %551 = sitofp i32 %550 to float - %552 = fneg float %551 - %553 = fmul float %543, %552 - %554 = bitcast i32 %5 to float - %555 = fadd float %554, %553 - %556 = bitcast i32 %5 to float - %557 = bitcast i32 %5 to float - %558 = fmul float %556, %557 - %559 = fadd float %558, 0.000000e+00 - %560 = bitcast i32 %32 to float - %561 = bitcast i32 %32 to float - %562 = fmul float %560, %561 - %563 = fadd float %559, %562 - %564 = call float @llvm.sqrt.f32(float %563) - %565 = bitcast i32 %5 to float - %566 = fcmp olt float %565, 0.000000e+00 - %567 = sext i1 %566 to i32 - %568 = bitcast i32 %5 to float - %569 = fcmp ogt float %568, 0.000000e+00 - %570 = zext i1 %569 to i32 - %571 = add nsw i32 %567, %570 - %572 = sitofp i32 %571 to float - %573 = fneg float %572 - %574 = fmul float %564, %573 - %575 = bitcast i32 %5 to float - %576 = fadd float %575, %574 - %577 = fmul float %555, %576 - %578 = fadd float %577, 0.000000e+00 - %579 = bitcast i32 %5 to float - %580 = bitcast i32 %5 to float - %581 = fmul float %579, %580 - %582 = fadd float %581, 0.000000e+00 - %583 = bitcast i32 %32 to float - %584 = bitcast i32 %32 to float - %585 = fmul float %583, %584 - %586 = fadd float %582, %585 - %587 = call float @llvm.sqrt.f32(float %586) - %588 = bitcast i32 %5 to float - %589 = fcmp olt float %588, 0.000000e+00 - %590 = sext i1 %589 to i32 - %591 = bitcast i32 %5 to float - %592 = fcmp ogt float %591, 0.000000e+00 - %593 = zext i1 %592 to i32 - %594 = add nsw i32 %590, %593 - %595 = sitofp i32 %594 to float - %596 = fneg float %595 - %597 = fmul float %587, %596 - %598 = fmul float %597, 0.000000e+00 - %599 = bitcast i32 %32 to float - %600 = fadd float %599, %598 - %601 = bitcast i32 %5 to float - %602 = bitcast i32 %5 to float - %603 = fmul float %601, %602 - %604 = fadd float %603, 0.000000e+00 - %605 = bitcast i32 %32 to float - %606 = bitcast i32 %32 to float - %607 = fmul float %605, %606 - %608 = fadd float %604, %607 - %609 = call float @llvm.sqrt.f32(float %608) - %610 = bitcast i32 %5 to float - %611 = fcmp olt float %610, 0.000000e+00 - %612 = sext i1 %611 to i32 - %613 = bitcast i32 %5 to float - %614 = fcmp ogt float %613, 0.000000e+00 - %615 = zext i1 %614 to i32 - %616 = add nsw i32 %612, %615 - %617 = sitofp i32 %616 to float - %618 = fneg float %617 - %619 = fmul float %609, %618 - %620 = fmul float %619, 0.000000e+00 - %621 = bitcast i32 %32 to float - %622 = fadd float %621, %620 - %623 = fmul float %600, %622 - %624 = fadd float %578, %623 - %625 = call float @llvm.sqrt.f32(float %624) - %626 = fadd float %625, 0.000000e+00 - %627 = fdiv float %534, %626 - %628 = fmul float %627, 2.000000e+00 - %629 = bitcast i32 %5 to float - %630 = bitcast i32 %5 to float - %631 = fmul float %629, %630 - %632 = fadd float %631, 0.000000e+00 - %633 = bitcast i32 %32 to float - %634 = bitcast i32 %32 to float - %635 = fmul float %633, %634 - %636 = fadd float %632, %635 - %637 = call float @llvm.sqrt.f32(float %636) - %638 = bitcast i32 %5 to float - %639 = fcmp olt float %638, 0.000000e+00 - %640 = sext i1 %639 to i32 - %641 = bitcast i32 %5 to float - %642 = fcmp ogt float %641, 0.000000e+00 - %643 = zext i1 %642 to i32 - %644 = add nsw i32 %640, %643 - %645 = sitofp i32 %644 to float - %646 = fneg float %645 - %647 = fmul float %637, %646 - %648 = bitcast i32 %5 to float - %649 = fadd float %648, %647 - %650 = bitcast i32 %5 to float - %651 = bitcast i32 %5 to float - %652 = fmul float %650, %651 - %653 = fadd float %652, 0.000000e+00 - %654 = bitcast i32 %32 to float - %655 = bitcast i32 %32 to float - %656 = fmul float %654, %655 - %657 = fadd float %653, %656 - %658 = call float @llvm.sqrt.f32(float %657) - %659 = bitcast i32 %5 to float - %660 = fcmp olt float %659, 0.000000e+00 - %661 = sext i1 %660 to i32 - %662 = bitcast i32 %5 to float - %663 = fcmp ogt float %662, 0.000000e+00 - %664 = zext i1 %663 to i32 - %665 = add nsw i32 %661, %664 - %666 = sitofp i32 %665 to float - %667 = fneg float %666 - %668 = fmul float %658, %667 - %669 = bitcast i32 %5 to float - %670 = fadd float %669, %668 - %671 = bitcast i32 %5 to float - %672 = bitcast i32 %5 to float - %673 = fmul float %671, %672 - %674 = fadd float %673, 0.000000e+00 - %675 = bitcast i32 %32 to float - %676 = bitcast i32 %32 to float - %677 = fmul float %675, %676 - %678 = fadd float %674, %677 - %679 = call float @llvm.sqrt.f32(float %678) - %680 = bitcast i32 %5 to float - %681 = fcmp olt float %680, 0.000000e+00 - %682 = sext i1 %681 to i32 - %683 = bitcast i32 %5 to float - %684 = fcmp ogt float %683, 0.000000e+00 - %685 = zext i1 %684 to i32 - %686 = add nsw i32 %682, %685 - %687 = sitofp i32 %686 to float - %688 = fneg float %687 - %689 = fmul float %679, %688 - %690 = bitcast i32 %5 to float - %691 = fadd float %690, %689 - %692 = fmul float %670, %691 - %693 = fadd float %692, 0.000000e+00 - %694 = bitcast i32 %5 to float - %695 = bitcast i32 %5 to float - %696 = fmul float %694, %695 - %697 = fadd float %696, 0.000000e+00 - %698 = bitcast i32 %32 to float - %699 = bitcast i32 %32 to float - %700 = fmul float %698, %699 - %701 = fadd float %697, %700 - %702 = call float @llvm.sqrt.f32(float %701) - %703 = bitcast i32 %5 to float - %704 = fcmp olt float %703, 0.000000e+00 - %705 = sext i1 %704 to i32 - %706 = bitcast i32 %5 to float - %707 = fcmp ogt float %706, 0.000000e+00 - %708 = zext i1 %707 to i32 - %709 = add nsw i32 %705, %708 - %710 = sitofp i32 %709 to float - %711 = fneg float %710 - %712 = fmul float %702, %711 - %713 = fmul float %712, 0.000000e+00 - %714 = bitcast i32 %32 to float - %715 = fadd float %714, %713 - %716 = bitcast i32 %5 to float - %717 = bitcast i32 %5 to float - %718 = fmul float %716, %717 - %719 = fadd float %718, 0.000000e+00 - %720 = bitcast i32 %32 to float - %721 = bitcast i32 %32 to float - %722 = fmul float %720, %721 - %723 = fadd float %719, %722 - %724 = call float @llvm.sqrt.f32(float %723) - %725 = bitcast i32 %5 to float - %726 = fcmp olt float %725, 0.000000e+00 - %727 = sext i1 %726 to i32 - %728 = bitcast i32 %5 to float - %729 = fcmp ogt float %728, 0.000000e+00 - %730 = zext i1 %729 to i32 - %731 = add nsw i32 %727, %730 - %732 = sitofp i32 %731 to float - %733 = fneg float %732 - %734 = fmul float %724, %733 - %735 = fmul float %734, 0.000000e+00 - %736 = bitcast i32 %32 to float - %737 = fadd float %736, %735 - %738 = fmul float %715, %737 - %739 = fadd float %693, %738 - %740 = call float @llvm.sqrt.f32(float %739) - %741 = fadd float %740, 0.000000e+00 - %742 = fdiv float %649, %741 - %743 = fmul float %628, %742 - %744 = insertelement <4 x float> %512, float %743, i32 3 - %745 = fsub <4 x float> , %744 - %746 = bitcast i32 %5 to float - %747 = bitcast i32 %5 to float - %748 = fmul float %746, %747 - %749 = fadd float %748, 0.000000e+00 - %750 = bitcast i32 %32 to float - %751 = bitcast i32 %32 to float - %752 = fmul float %750, %751 - %753 = fadd float %749, %752 - %754 = call float @llvm.sqrt.f32(float %753) - %755 = bitcast i32 %5 to float - %756 = fcmp olt float %755, 0.000000e+00 - %757 = sext i1 %756 to i32 - %758 = bitcast i32 %5 to float - %759 = fcmp ogt float %758, 0.000000e+00 - %760 = zext i1 %759 to i32 - %761 = add nsw i32 %757, %760 - %762 = sitofp i32 %761 to float - %763 = fneg float %762 - %764 = fmul float %754, %763 - %765 = fmul float %764, 0.000000e+00 - %766 = bitcast i32 %32 to float - %767 = fadd float %766, %765 - %768 = bitcast i32 %5 to float - %769 = bitcast i32 %5 to float - %770 = fmul float %768, %769 - %771 = fadd float %770, 0.000000e+00 - %772 = bitcast i32 %32 to float - %773 = bitcast i32 %32 to float - %774 = fmul float %772, %773 - %775 = fadd float %771, %774 - %776 = call float @llvm.sqrt.f32(float %775) - %777 = bitcast i32 %5 to float - %778 = fcmp olt float %777, 0.000000e+00 - %779 = sext i1 %778 to i32 - %780 = bitcast i32 %5 to float - %781 = fcmp ogt float %780, 0.000000e+00 - %782 = zext i1 %781 to i32 - %783 = add nsw i32 %779, %782 - %784 = sitofp i32 %783 to float - %785 = fneg float %784 - %786 = fmul float %776, %785 - %787 = bitcast i32 %5 to float - %788 = fadd float %787, %786 - %789 = bitcast i32 %5 to float - %790 = bitcast i32 %5 to float - %791 = fmul float %789, %790 - %792 = fadd float %791, 0.000000e+00 - %793 = bitcast i32 %32 to float - %794 = bitcast i32 %32 to float - %795 = fmul float %793, %794 - %796 = fadd float %792, %795 - %797 = call float @llvm.sqrt.f32(float %796) - %798 = bitcast i32 %5 to float - %799 = fcmp olt float %798, 0.000000e+00 - %800 = sext i1 %799 to i32 - %801 = bitcast i32 %5 to float - %802 = fcmp ogt float %801, 0.000000e+00 - %803 = zext i1 %802 to i32 - %804 = add nsw i32 %800, %803 - %805 = sitofp i32 %804 to float - %806 = fneg float %805 - %807 = fmul float %797, %806 - %808 = bitcast i32 %5 to float - %809 = fadd float %808, %807 - %810 = fmul float %788, %809 - %811 = fadd float %810, 0.000000e+00 - %812 = bitcast i32 %5 to float - %813 = bitcast i32 %5 to float - %814 = fmul float %812, %813 - %815 = fadd float %814, 0.000000e+00 - %816 = bitcast i32 %32 to float - %817 = bitcast i32 %32 to float - %818 = fmul float %816, %817 - %819 = fadd float %815, %818 - %820 = call float @llvm.sqrt.f32(float %819) - %821 = bitcast i32 %5 to float - %822 = fcmp olt float %821, 0.000000e+00 - %823 = sext i1 %822 to i32 - %824 = bitcast i32 %5 to float - %825 = fcmp ogt float %824, 0.000000e+00 - %826 = zext i1 %825 to i32 - %827 = add nsw i32 %823, %826 - %828 = sitofp i32 %827 to float - %829 = fneg float %828 - %830 = fmul float %820, %829 - %831 = fmul float %830, 0.000000e+00 - %832 = bitcast i32 %32 to float - %833 = fadd float %832, %831 - %834 = bitcast i32 %5 to float - %835 = bitcast i32 %5 to float - %836 = fmul float %834, %835 - %837 = fadd float %836, 0.000000e+00 - %838 = bitcast i32 %32 to float - %839 = bitcast i32 %32 to float - %840 = fmul float %838, %839 - %841 = fadd float %837, %840 - %842 = call float @llvm.sqrt.f32(float %841) - %843 = bitcast i32 %5 to float - %844 = fcmp olt float %843, 0.000000e+00 - %845 = sext i1 %844 to i32 - %846 = bitcast i32 %5 to float - %847 = fcmp ogt float %846, 0.000000e+00 - %848 = zext i1 %847 to i32 - %849 = add nsw i32 %845, %848 - %850 = sitofp i32 %849 to float - %851 = fneg float %850 - %852 = fmul float %842, %851 - %853 = fmul float %852, 0.000000e+00 - %854 = bitcast i32 %32 to float - %855 = fadd float %854, %853 - %856 = fmul float %833, %855 - %857 = fadd float %811, %856 - %858 = call float @llvm.sqrt.f32(float %857) - %859 = fadd float %858, 0.000000e+00 - %860 = fdiv float %767, %859 - %861 = fmul float %860, 2.000000e+00 - %862 = bitcast i32 %5 to float - %863 = bitcast i32 %5 to float - %864 = fmul float %862, %863 - %865 = fadd float %864, 0.000000e+00 - %866 = bitcast i32 %32 to float - %867 = bitcast i32 %32 to float - %868 = fmul float %866, %867 - %869 = fadd float %865, %868 - %870 = call float @llvm.sqrt.f32(float %869) - %871 = bitcast i32 %5 to float - %872 = fcmp olt float %871, 0.000000e+00 - %873 = sext i1 %872 to i32 - %874 = bitcast i32 %5 to float - %875 = fcmp ogt float %874, 0.000000e+00 - %876 = zext i1 %875 to i32 - %877 = add nsw i32 %873, %876 - %878 = sitofp i32 %877 to float - %879 = fneg float %878 - %880 = fmul float %870, %879 - %881 = fmul float %880, 0.000000e+00 - %882 = bitcast i32 %32 to float - %883 = fadd float %882, %881 - %884 = bitcast i32 %5 to float - %885 = bitcast i32 %5 to float - %886 = fmul float %884, %885 - %887 = fadd float %886, 0.000000e+00 - %888 = bitcast i32 %32 to float - %889 = bitcast i32 %32 to float - %890 = fmul float %888, %889 - %891 = fadd float %887, %890 - %892 = call float @llvm.sqrt.f32(float %891) - %893 = bitcast i32 %5 to float - %894 = fcmp olt float %893, 0.000000e+00 - %895 = sext i1 %894 to i32 - %896 = bitcast i32 %5 to float - %897 = fcmp ogt float %896, 0.000000e+00 - %898 = zext i1 %897 to i32 - %899 = add nsw i32 %895, %898 - %900 = sitofp i32 %899 to float - %901 = fneg float %900 - %902 = fmul float %892, %901 - %903 = bitcast i32 %5 to float - %904 = fadd float %903, %902 - %905 = bitcast i32 %5 to float - %906 = bitcast i32 %5 to float - %907 = fmul float %905, %906 - %908 = fadd float %907, 0.000000e+00 - %909 = bitcast i32 %32 to float - %910 = bitcast i32 %32 to float - %911 = fmul float %909, %910 - %912 = fadd float %908, %911 - %913 = call float @llvm.sqrt.f32(float %912) - %914 = bitcast i32 %5 to float - %915 = fcmp olt float %914, 0.000000e+00 - %916 = sext i1 %915 to i32 - %917 = bitcast i32 %5 to float - %918 = fcmp ogt float %917, 0.000000e+00 - %919 = zext i1 %918 to i32 - %920 = add nsw i32 %916, %919 - %921 = sitofp i32 %920 to float - %922 = fneg float %921 - %923 = fmul float %913, %922 - %924 = bitcast i32 %5 to float - %925 = fadd float %924, %923 - %926 = fmul float %904, %925 - %927 = fadd float %926, 0.000000e+00 - %928 = bitcast i32 %5 to float - %929 = bitcast i32 %5 to float - %930 = fmul float %928, %929 - %931 = fadd float %930, 0.000000e+00 - %932 = bitcast i32 %32 to float - %933 = bitcast i32 %32 to float - %934 = fmul float %932, %933 - %935 = fadd float %931, %934 - %936 = call float @llvm.sqrt.f32(float %935) - %937 = bitcast i32 %5 to float - %938 = fcmp olt float %937, 0.000000e+00 - %939 = sext i1 %938 to i32 - %940 = bitcast i32 %5 to float - %941 = fcmp ogt float %940, 0.000000e+00 - %942 = zext i1 %941 to i32 - %943 = add nsw i32 %939, %942 - %944 = sitofp i32 %943 to float - %945 = fneg float %944 - %946 = fmul float %936, %945 - %947 = fmul float %946, 0.000000e+00 - %948 = bitcast i32 %32 to float - %949 = fadd float %948, %947 - %950 = bitcast i32 %5 to float - %951 = bitcast i32 %5 to float - %952 = fmul float %950, %951 - %953 = fadd float %952, 0.000000e+00 - %954 = bitcast i32 %32 to float - %955 = bitcast i32 %32 to float - %956 = fmul float %954, %955 - %957 = fadd float %953, %956 - %958 = call float @llvm.sqrt.f32(float %957) - %959 = bitcast i32 %5 to float - %960 = fcmp olt float %959, 0.000000e+00 - %961 = sext i1 %960 to i32 - %962 = bitcast i32 %5 to float - %963 = fcmp ogt float %962, 0.000000e+00 - %964 = zext i1 %963 to i32 - %965 = add nsw i32 %961, %964 - %966 = sitofp i32 %965 to float - %967 = fneg float %966 - %968 = fmul float %958, %967 - %969 = fmul float %968, 0.000000e+00 - %970 = bitcast i32 %32 to float - %971 = fadd float %970, %969 - %972 = fmul float %949, %971 - %973 = fadd float %927, %972 - %974 = call float @llvm.sqrt.f32(float %973) - %975 = fadd float %974, 0.000000e+00 - %976 = fdiv float %883, %975 - %977 = fmul float %861, %976 - %978 = fsub float 1.000000e+00, %977 - %979 = insertelement <4 x float> zeroinitializer, float %978, i32 0 - %980 = insertelement <4 x float> %979, float 0.000000e+00, i32 1 - %981 = insertelement <4 x float> %980, float 0.000000e+00, i32 2 - %982 = insertelement <4 x float> %981, float 0.000000e+00, i32 3 - %983 = shufflevector <4 x float> %745, <4 x float> %982, <8 x i32> - %984 = extractelement <8 x float> %983, i32 0 - %985 = getelementptr float, float* %2, i32 0 - %986 = getelementptr inbounds float, float* %985, i64 3 - %987 = bitcast float* %986 to i32* - %988 = bitcast i32* %987 to float* - store float %984, float* %988, align 4 - %989 = bitcast float* %1 to i8* - %990 = alloca [4 x float], align 16 - %991 = bitcast [4 x float]* %990 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %989, i8* nonnull align 16 dereferenceable(16) %991, i64 16, i1 false) - store float 0.000000e+00, float* %2, align 4 - %992 = bitcast i32 %5 to float - %993 = bitcast i32 %5 to float - %994 = fmul float %992, %993 - %995 = fadd float %994, 0.000000e+00 - %996 = bitcast i32 %32 to float - %997 = bitcast i32 %32 to float - %998 = fmul float %996, %997 - %999 = fadd float %995, %998 - %1000 = call float @llvm.sqrt.f32(float %999) - %1001 = bitcast i32 %5 to float - %1002 = fcmp olt float %1001, 0.000000e+00 - %1003 = sext i1 %1002 to i32 - %1004 = bitcast i32 %5 to float - %1005 = fcmp ogt float %1004, 0.000000e+00 - %1006 = zext i1 %1005 to i32 - %1007 = add nsw i32 %1003, %1006 - %1008 = sitofp i32 %1007 to float - %1009 = fneg float %1008 - %1010 = fmul float %1000, %1009 - %1011 = bitcast i32 %5 to float - %1012 = fadd float %1011, %1010 - %1013 = bitcast i32 %5 to float - %1014 = bitcast i32 %5 to float - %1015 = fmul float %1013, %1014 - %1016 = fadd float %1015, 0.000000e+00 - %1017 = bitcast i32 %32 to float - %1018 = bitcast i32 %32 to float - %1019 = fmul float %1017, %1018 - %1020 = fadd float %1016, %1019 - %1021 = call float @llvm.sqrt.f32(float %1020) - %1022 = bitcast i32 %5 to float - %1023 = fcmp olt float %1022, 0.000000e+00 - %1024 = sext i1 %1023 to i32 - %1025 = bitcast i32 %5 to float - %1026 = fcmp ogt float %1025, 0.000000e+00 - %1027 = zext i1 %1026 to i32 - %1028 = add nsw i32 %1024, %1027 - %1029 = sitofp i32 %1028 to float - %1030 = fneg float %1029 - %1031 = fmul float %1021, %1030 - %1032 = bitcast i32 %5 to float - %1033 = fadd float %1032, %1031 - %1034 = bitcast i32 %5 to float - %1035 = bitcast i32 %5 to float - %1036 = fmul float %1034, %1035 - %1037 = fadd float %1036, 0.000000e+00 - %1038 = bitcast i32 %32 to float - %1039 = bitcast i32 %32 to float - %1040 = fmul float %1038, %1039 - %1041 = fadd float %1037, %1040 - %1042 = call float @llvm.sqrt.f32(float %1041) - %1043 = bitcast i32 %5 to float - %1044 = fcmp olt float %1043, 0.000000e+00 - %1045 = sext i1 %1044 to i32 - %1046 = bitcast i32 %5 to float - %1047 = fcmp ogt float %1046, 0.000000e+00 - %1048 = zext i1 %1047 to i32 - %1049 = add nsw i32 %1045, %1048 - %1050 = sitofp i32 %1049 to float - %1051 = fneg float %1050 - %1052 = fmul float %1042, %1051 - %1053 = bitcast i32 %5 to float - %1054 = fadd float %1053, %1052 - %1055 = fmul float %1033, %1054 - %1056 = fadd float %1055, 0.000000e+00 - %1057 = bitcast i32 %5 to float - %1058 = bitcast i32 %5 to float - %1059 = fmul float %1057, %1058 - %1060 = fadd float %1059, 0.000000e+00 - %1061 = bitcast i32 %32 to float - %1062 = bitcast i32 %32 to float - %1063 = fmul float %1061, %1062 - %1064 = fadd float %1060, %1063 - %1065 = call float @llvm.sqrt.f32(float %1064) - %1066 = bitcast i32 %5 to float - %1067 = fcmp olt float %1066, 0.000000e+00 - %1068 = sext i1 %1067 to i32 - %1069 = bitcast i32 %5 to float - %1070 = fcmp ogt float %1069, 0.000000e+00 - %1071 = zext i1 %1070 to i32 - %1072 = add nsw i32 %1068, %1071 - %1073 = sitofp i32 %1072 to float - %1074 = fneg float %1073 - %1075 = fmul float %1065, %1074 - %1076 = fmul float %1075, 0.000000e+00 - %1077 = bitcast i32 %32 to float - %1078 = fadd float %1077, %1076 - %1079 = bitcast i32 %5 to float - %1080 = bitcast i32 %5 to float - %1081 = fmul float %1079, %1080 - %1082 = fadd float %1081, 0.000000e+00 - %1083 = bitcast i32 %32 to float - %1084 = bitcast i32 %32 to float - %1085 = fmul float %1083, %1084 - %1086 = fadd float %1082, %1085 - %1087 = call float @llvm.sqrt.f32(float %1086) - %1088 = bitcast i32 %5 to float - %1089 = fcmp olt float %1088, 0.000000e+00 - %1090 = sext i1 %1089 to i32 - %1091 = bitcast i32 %5 to float - %1092 = fcmp ogt float %1091, 0.000000e+00 - %1093 = zext i1 %1092 to i32 - %1094 = add nsw i32 %1090, %1093 - %1095 = sitofp i32 %1094 to float - %1096 = fneg float %1095 - %1097 = fmul float %1087, %1096 - %1098 = fmul float %1097, 0.000000e+00 - %1099 = bitcast i32 %32 to float - %1100 = fadd float %1099, %1098 - %1101 = fmul float %1078, %1100 - %1102 = fadd float %1056, %1101 - %1103 = call float @llvm.sqrt.f32(float %1102) - %1104 = fadd float %1103, 0.000000e+00 - %1105 = fdiv float %1012, %1104 - %1106 = fmul float %1105, 2.000000e+00 - %1107 = bitcast i32 %5 to float - %1108 = bitcast i32 %5 to float - %1109 = fmul float %1107, %1108 - %1110 = fadd float %1109, 0.000000e+00 - %1111 = bitcast i32 %32 to float - %1112 = bitcast i32 %32 to float - %1113 = fmul float %1111, %1112 - %1114 = fadd float %1110, %1113 - %1115 = call float @llvm.sqrt.f32(float %1114) - %1116 = bitcast i32 %5 to float - %1117 = fcmp olt float %1116, 0.000000e+00 - %1118 = sext i1 %1117 to i32 - %1119 = bitcast i32 %5 to float - %1120 = fcmp ogt float %1119, 0.000000e+00 - %1121 = zext i1 %1120 to i32 - %1122 = add nsw i32 %1118, %1121 - %1123 = sitofp i32 %1122 to float - %1124 = fneg float %1123 - %1125 = fmul float %1115, %1124 - %1126 = bitcast i32 %5 to float - %1127 = fadd float %1126, %1125 - %1128 = bitcast i32 %5 to float - %1129 = bitcast i32 %5 to float - %1130 = fmul float %1128, %1129 - %1131 = fadd float %1130, 0.000000e+00 - %1132 = bitcast i32 %32 to float - %1133 = bitcast i32 %32 to float - %1134 = fmul float %1132, %1133 - %1135 = fadd float %1131, %1134 - %1136 = call float @llvm.sqrt.f32(float %1135) - %1137 = bitcast i32 %5 to float - %1138 = fcmp olt float %1137, 0.000000e+00 - %1139 = sext i1 %1138 to i32 - %1140 = bitcast i32 %5 to float - %1141 = fcmp ogt float %1140, 0.000000e+00 - %1142 = zext i1 %1141 to i32 - %1143 = add nsw i32 %1139, %1142 - %1144 = sitofp i32 %1143 to float - %1145 = fneg float %1144 - %1146 = fmul float %1136, %1145 - %1147 = bitcast i32 %5 to float - %1148 = fadd float %1147, %1146 - %1149 = bitcast i32 %5 to float - %1150 = bitcast i32 %5 to float - %1151 = fmul float %1149, %1150 - %1152 = fadd float %1151, 0.000000e+00 - %1153 = bitcast i32 %32 to float - %1154 = bitcast i32 %32 to float - %1155 = fmul float %1153, %1154 - %1156 = fadd float %1152, %1155 - %1157 = call float @llvm.sqrt.f32(float %1156) - %1158 = bitcast i32 %5 to float - %1159 = fcmp olt float %1158, 0.000000e+00 - %1160 = sext i1 %1159 to i32 - %1161 = bitcast i32 %5 to float - %1162 = fcmp ogt float %1161, 0.000000e+00 - %1163 = zext i1 %1162 to i32 - %1164 = add nsw i32 %1160, %1163 - %1165 = sitofp i32 %1164 to float - %1166 = fneg float %1165 - %1167 = fmul float %1157, %1166 - %1168 = bitcast i32 %5 to float - %1169 = fadd float %1168, %1167 - %1170 = fmul float %1148, %1169 - %1171 = fadd float %1170, 0.000000e+00 - %1172 = bitcast i32 %5 to float - %1173 = bitcast i32 %5 to float - %1174 = fmul float %1172, %1173 - %1175 = fadd float %1174, 0.000000e+00 - %1176 = bitcast i32 %32 to float - %1177 = bitcast i32 %32 to float - %1178 = fmul float %1176, %1177 - %1179 = fadd float %1175, %1178 - %1180 = call float @llvm.sqrt.f32(float %1179) - %1181 = bitcast i32 %5 to float - %1182 = fcmp olt float %1181, 0.000000e+00 - %1183 = sext i1 %1182 to i32 - %1184 = bitcast i32 %5 to float - %1185 = fcmp ogt float %1184, 0.000000e+00 - %1186 = zext i1 %1185 to i32 - %1187 = add nsw i32 %1183, %1186 - %1188 = sitofp i32 %1187 to float - %1189 = fneg float %1188 - %1190 = fmul float %1180, %1189 - %1191 = fmul float %1190, 0.000000e+00 - %1192 = bitcast i32 %32 to float - %1193 = fadd float %1192, %1191 - %1194 = bitcast i32 %5 to float - %1195 = bitcast i32 %5 to float - %1196 = fmul float %1194, %1195 - %1197 = fadd float %1196, 0.000000e+00 - %1198 = bitcast i32 %32 to float - %1199 = bitcast i32 %32 to float - %1200 = fmul float %1198, %1199 - %1201 = fadd float %1197, %1200 - %1202 = call float @llvm.sqrt.f32(float %1201) - %1203 = bitcast i32 %5 to float - %1204 = fcmp olt float %1203, 0.000000e+00 - %1205 = sext i1 %1204 to i32 - %1206 = bitcast i32 %5 to float - %1207 = fcmp ogt float %1206, 0.000000e+00 - %1208 = zext i1 %1207 to i32 - %1209 = add nsw i32 %1205, %1208 - %1210 = sitofp i32 %1209 to float - %1211 = fneg float %1210 - %1212 = fmul float %1202, %1211 - %1213 = fmul float %1212, 0.000000e+00 - %1214 = bitcast i32 %32 to float - %1215 = fadd float %1214, %1213 - %1216 = fmul float %1193, %1215 - %1217 = fadd float %1171, %1216 - %1218 = call float @llvm.sqrt.f32(float %1217) - %1219 = fadd float %1218, 0.000000e+00 - %1220 = fdiv float %1127, %1219 - %1221 = fmul float %1106, %1220 - %1222 = fsub float 1.000000e+00, %1221 - %1223 = insertelement <4 x float> zeroinitializer, float %1222, i32 0 - %1224 = insertelement <4 x float> %1223, float 0.000000e+00, i32 1 - %1225 = insertelement <4 x float> %1224, float 0.000000e+00, i32 2 - %1226 = insertelement <4 x float> %1225, float 0.000000e+00, i32 3 - %1227 = getelementptr float, float* %0, i32 0 - %1228 = load float, float* %1227, align 4 - %1229 = insertelement <4 x float> zeroinitializer, float %1228, i32 0 - %1230 = insertelement <4 x float> %1229, float 0.000000e+00, i32 1 - %1231 = insertelement <4 x float> %1230, float 0.000000e+00, i32 2 - %1232 = insertelement <4 x float> %1231, float 0.000000e+00, i32 3 - %1233 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1226, <4 x float> %1232, <4 x float> zeroinitializer) - %1234 = extractelement <4 x float> %1233, i32 0 - store float %1234, float* %2, align 4 - %1235 = bitcast i32 %5 to float - %1236 = bitcast i32 %5 to float - %1237 = fmul float %1235, %1236 - %1238 = fadd float %1237, 0.000000e+00 - %1239 = bitcast i32 %32 to float - %1240 = bitcast i32 %32 to float - %1241 = fmul float %1239, %1240 - %1242 = fadd float %1238, %1241 - %1243 = call float @llvm.sqrt.f32(float %1242) - %1244 = bitcast i32 %5 to float - %1245 = fcmp olt float %1244, 0.000000e+00 - %1246 = sext i1 %1245 to i32 - %1247 = bitcast i32 %5 to float - %1248 = fcmp ogt float %1247, 0.000000e+00 - %1249 = zext i1 %1248 to i32 - %1250 = add nsw i32 %1246, %1249 - %1251 = sitofp i32 %1250 to float - %1252 = fneg float %1251 - %1253 = fmul float %1243, %1252 - %1254 = bitcast i32 %5 to float - %1255 = fadd float %1254, %1253 - %1256 = bitcast i32 %5 to float - %1257 = bitcast i32 %5 to float - %1258 = fmul float %1256, %1257 - %1259 = fadd float %1258, 0.000000e+00 - %1260 = bitcast i32 %32 to float - %1261 = bitcast i32 %32 to float - %1262 = fmul float %1260, %1261 - %1263 = fadd float %1259, %1262 - %1264 = call float @llvm.sqrt.f32(float %1263) - %1265 = bitcast i32 %5 to float - %1266 = fcmp olt float %1265, 0.000000e+00 - %1267 = sext i1 %1266 to i32 - %1268 = bitcast i32 %5 to float - %1269 = fcmp ogt float %1268, 0.000000e+00 - %1270 = zext i1 %1269 to i32 - %1271 = add nsw i32 %1267, %1270 - %1272 = sitofp i32 %1271 to float - %1273 = fneg float %1272 - %1274 = fmul float %1264, %1273 - %1275 = bitcast i32 %5 to float - %1276 = fadd float %1275, %1274 - %1277 = bitcast i32 %5 to float - %1278 = bitcast i32 %5 to float - %1279 = fmul float %1277, %1278 - %1280 = fadd float %1279, 0.000000e+00 - %1281 = bitcast i32 %32 to float - %1282 = bitcast i32 %32 to float - %1283 = fmul float %1281, %1282 - %1284 = fadd float %1280, %1283 - %1285 = call float @llvm.sqrt.f32(float %1284) - %1286 = bitcast i32 %5 to float - %1287 = fcmp olt float %1286, 0.000000e+00 - %1288 = sext i1 %1287 to i32 - %1289 = bitcast i32 %5 to float - %1290 = fcmp ogt float %1289, 0.000000e+00 - %1291 = zext i1 %1290 to i32 - %1292 = add nsw i32 %1288, %1291 - %1293 = sitofp i32 %1292 to float - %1294 = fneg float %1293 - %1295 = fmul float %1285, %1294 - %1296 = bitcast i32 %5 to float - %1297 = fadd float %1296, %1295 - %1298 = fmul float %1276, %1297 - %1299 = fadd float %1298, 0.000000e+00 - %1300 = bitcast i32 %5 to float - %1301 = bitcast i32 %5 to float - %1302 = fmul float %1300, %1301 - %1303 = fadd float %1302, 0.000000e+00 - %1304 = bitcast i32 %32 to float - %1305 = bitcast i32 %32 to float - %1306 = fmul float %1304, %1305 - %1307 = fadd float %1303, %1306 - %1308 = call float @llvm.sqrt.f32(float %1307) - %1309 = bitcast i32 %5 to float - %1310 = fcmp olt float %1309, 0.000000e+00 - %1311 = sext i1 %1310 to i32 - %1312 = bitcast i32 %5 to float - %1313 = fcmp ogt float %1312, 0.000000e+00 - %1314 = zext i1 %1313 to i32 - %1315 = add nsw i32 %1311, %1314 - %1316 = sitofp i32 %1315 to float - %1317 = fneg float %1316 - %1318 = fmul float %1308, %1317 - %1319 = fmul float %1318, 0.000000e+00 - %1320 = bitcast i32 %32 to float - %1321 = fadd float %1320, %1319 - %1322 = bitcast i32 %5 to float - %1323 = bitcast i32 %5 to float - %1324 = fmul float %1322, %1323 - %1325 = fadd float %1324, 0.000000e+00 - %1326 = bitcast i32 %32 to float - %1327 = bitcast i32 %32 to float - %1328 = fmul float %1326, %1327 - %1329 = fadd float %1325, %1328 - %1330 = call float @llvm.sqrt.f32(float %1329) - %1331 = bitcast i32 %5 to float - %1332 = fcmp olt float %1331, 0.000000e+00 - %1333 = sext i1 %1332 to i32 - %1334 = bitcast i32 %5 to float - %1335 = fcmp ogt float %1334, 0.000000e+00 - %1336 = zext i1 %1335 to i32 - %1337 = add nsw i32 %1333, %1336 - %1338 = sitofp i32 %1337 to float - %1339 = fneg float %1338 - %1340 = fmul float %1330, %1339 - %1341 = fmul float %1340, 0.000000e+00 - %1342 = bitcast i32 %32 to float - %1343 = fadd float %1342, %1341 - %1344 = fmul float %1321, %1343 - %1345 = fadd float %1299, %1344 - %1346 = call float @llvm.sqrt.f32(float %1345) - %1347 = fadd float %1346, 0.000000e+00 - %1348 = fdiv float %1255, %1347 - %1349 = fmul float %1348, 2.000000e+00 - %1350 = bitcast i32 %5 to float - %1351 = bitcast i32 %5 to float - %1352 = fmul float %1350, %1351 - %1353 = fadd float %1352, 0.000000e+00 - %1354 = bitcast i32 %32 to float - %1355 = bitcast i32 %32 to float - %1356 = fmul float %1354, %1355 - %1357 = fadd float %1353, %1356 - %1358 = call float @llvm.sqrt.f32(float %1357) - %1359 = bitcast i32 %5 to float - %1360 = fcmp olt float %1359, 0.000000e+00 - %1361 = sext i1 %1360 to i32 - %1362 = bitcast i32 %5 to float - %1363 = fcmp ogt float %1362, 0.000000e+00 - %1364 = zext i1 %1363 to i32 - %1365 = add nsw i32 %1361, %1364 - %1366 = sitofp i32 %1365 to float - %1367 = fneg float %1366 - %1368 = fmul float %1358, %1367 - %1369 = bitcast i32 %5 to float - %1370 = fadd float %1369, %1368 - %1371 = bitcast i32 %5 to float - %1372 = bitcast i32 %5 to float - %1373 = fmul float %1371, %1372 - %1374 = fadd float %1373, 0.000000e+00 - %1375 = bitcast i32 %32 to float - %1376 = bitcast i32 %32 to float - %1377 = fmul float %1375, %1376 - %1378 = fadd float %1374, %1377 - %1379 = call float @llvm.sqrt.f32(float %1378) - %1380 = bitcast i32 %5 to float - %1381 = fcmp olt float %1380, 0.000000e+00 - %1382 = sext i1 %1381 to i32 - %1383 = bitcast i32 %5 to float - %1384 = fcmp ogt float %1383, 0.000000e+00 - %1385 = zext i1 %1384 to i32 - %1386 = add nsw i32 %1382, %1385 - %1387 = sitofp i32 %1386 to float - %1388 = fneg float %1387 - %1389 = fmul float %1379, %1388 - %1390 = bitcast i32 %5 to float - %1391 = fadd float %1390, %1389 - %1392 = bitcast i32 %5 to float - %1393 = bitcast i32 %5 to float - %1394 = fmul float %1392, %1393 - %1395 = fadd float %1394, 0.000000e+00 - %1396 = bitcast i32 %32 to float - %1397 = bitcast i32 %32 to float - %1398 = fmul float %1396, %1397 - %1399 = fadd float %1395, %1398 - %1400 = call float @llvm.sqrt.f32(float %1399) - %1401 = bitcast i32 %5 to float - %1402 = fcmp olt float %1401, 0.000000e+00 - %1403 = sext i1 %1402 to i32 - %1404 = bitcast i32 %5 to float - %1405 = fcmp ogt float %1404, 0.000000e+00 - %1406 = zext i1 %1405 to i32 - %1407 = add nsw i32 %1403, %1406 - %1408 = sitofp i32 %1407 to float - %1409 = fneg float %1408 - %1410 = fmul float %1400, %1409 - %1411 = bitcast i32 %5 to float - %1412 = fadd float %1411, %1410 - %1413 = fmul float %1391, %1412 - %1414 = fadd float %1413, 0.000000e+00 - %1415 = bitcast i32 %5 to float - %1416 = bitcast i32 %5 to float - %1417 = fmul float %1415, %1416 - %1418 = fadd float %1417, 0.000000e+00 - %1419 = bitcast i32 %32 to float - %1420 = bitcast i32 %32 to float - %1421 = fmul float %1419, %1420 - %1422 = fadd float %1418, %1421 - %1423 = call float @llvm.sqrt.f32(float %1422) - %1424 = bitcast i32 %5 to float - %1425 = fcmp olt float %1424, 0.000000e+00 - %1426 = sext i1 %1425 to i32 - %1427 = bitcast i32 %5 to float - %1428 = fcmp ogt float %1427, 0.000000e+00 - %1429 = zext i1 %1428 to i32 - %1430 = add nsw i32 %1426, %1429 - %1431 = sitofp i32 %1430 to float - %1432 = fneg float %1431 - %1433 = fmul float %1423, %1432 - %1434 = fmul float %1433, 0.000000e+00 - %1435 = bitcast i32 %32 to float - %1436 = fadd float %1435, %1434 - %1437 = bitcast i32 %5 to float - %1438 = bitcast i32 %5 to float - %1439 = fmul float %1437, %1438 - %1440 = fadd float %1439, 0.000000e+00 - %1441 = bitcast i32 %32 to float - %1442 = bitcast i32 %32 to float - %1443 = fmul float %1441, %1442 - %1444 = fadd float %1440, %1443 - %1445 = call float @llvm.sqrt.f32(float %1444) - %1446 = bitcast i32 %5 to float - %1447 = fcmp olt float %1446, 0.000000e+00 - %1448 = sext i1 %1447 to i32 - %1449 = bitcast i32 %5 to float - %1450 = fcmp ogt float %1449, 0.000000e+00 - %1451 = zext i1 %1450 to i32 - %1452 = add nsw i32 %1448, %1451 - %1453 = sitofp i32 %1452 to float - %1454 = fneg float %1453 - %1455 = fmul float %1445, %1454 - %1456 = fmul float %1455, 0.000000e+00 - %1457 = bitcast i32 %32 to float - %1458 = fadd float %1457, %1456 - %1459 = fmul float %1436, %1458 - %1460 = fadd float %1414, %1459 - %1461 = call float @llvm.sqrt.f32(float %1460) - %1462 = fadd float %1461, 0.000000e+00 - %1463 = fdiv float %1370, %1462 - %1464 = fmul float %1349, %1463 - %1465 = fsub float 1.000000e+00, %1464 - %1466 = fmul float %1465, %1228 - %1467 = fadd float %1466, 0.000000e+00 - %1468 = bitcast i32 %5 to float - %1469 = bitcast i32 %5 to float - %1470 = fmul float %1468, %1469 - %1471 = fadd float %1470, 0.000000e+00 - %1472 = bitcast i32 %32 to float - %1473 = bitcast i32 %32 to float - %1474 = fmul float %1472, %1473 - %1475 = fadd float %1471, %1474 - %1476 = call float @llvm.sqrt.f32(float %1475) - %1477 = bitcast i32 %5 to float - %1478 = fcmp olt float %1477, 0.000000e+00 - %1479 = sext i1 %1478 to i32 - %1480 = bitcast i32 %5 to float - %1481 = fcmp ogt float %1480, 0.000000e+00 - %1482 = zext i1 %1481 to i32 - %1483 = add nsw i32 %1479, %1482 - %1484 = sitofp i32 %1483 to float - %1485 = fneg float %1484 - %1486 = fmul float %1476, %1485 - %1487 = bitcast i32 %5 to float - %1488 = fadd float %1487, %1486 - %1489 = bitcast i32 %5 to float - %1490 = bitcast i32 %5 to float - %1491 = fmul float %1489, %1490 - %1492 = fadd float %1491, 0.000000e+00 - %1493 = bitcast i32 %32 to float - %1494 = bitcast i32 %32 to float - %1495 = fmul float %1493, %1494 - %1496 = fadd float %1492, %1495 - %1497 = call float @llvm.sqrt.f32(float %1496) - %1498 = bitcast i32 %5 to float - %1499 = fcmp olt float %1498, 0.000000e+00 - %1500 = sext i1 %1499 to i32 - %1501 = bitcast i32 %5 to float - %1502 = fcmp ogt float %1501, 0.000000e+00 - %1503 = zext i1 %1502 to i32 - %1504 = add nsw i32 %1500, %1503 - %1505 = sitofp i32 %1504 to float - %1506 = fneg float %1505 - %1507 = fmul float %1497, %1506 - %1508 = bitcast i32 %5 to float - %1509 = fadd float %1508, %1507 - %1510 = bitcast i32 %5 to float - %1511 = bitcast i32 %5 to float - %1512 = fmul float %1510, %1511 - %1513 = fadd float %1512, 0.000000e+00 - %1514 = bitcast i32 %32 to float - %1515 = bitcast i32 %32 to float - %1516 = fmul float %1514, %1515 - %1517 = fadd float %1513, %1516 - %1518 = call float @llvm.sqrt.f32(float %1517) - %1519 = bitcast i32 %5 to float - %1520 = fcmp olt float %1519, 0.000000e+00 - %1521 = sext i1 %1520 to i32 - %1522 = bitcast i32 %5 to float - %1523 = fcmp ogt float %1522, 0.000000e+00 - %1524 = zext i1 %1523 to i32 - %1525 = add nsw i32 %1521, %1524 - %1526 = sitofp i32 %1525 to float - %1527 = fneg float %1526 - %1528 = fmul float %1518, %1527 - %1529 = bitcast i32 %5 to float - %1530 = fadd float %1529, %1528 - %1531 = fmul float %1509, %1530 - %1532 = fadd float %1531, 0.000000e+00 - %1533 = bitcast i32 %5 to float - %1534 = bitcast i32 %5 to float - %1535 = fmul float %1533, %1534 - %1536 = fadd float %1535, 0.000000e+00 - %1537 = bitcast i32 %32 to float - %1538 = bitcast i32 %32 to float - %1539 = fmul float %1537, %1538 - %1540 = fadd float %1536, %1539 - %1541 = call float @llvm.sqrt.f32(float %1540) - %1542 = bitcast i32 %5 to float - %1543 = fcmp olt float %1542, 0.000000e+00 - %1544 = sext i1 %1543 to i32 - %1545 = bitcast i32 %5 to float - %1546 = fcmp ogt float %1545, 0.000000e+00 - %1547 = zext i1 %1546 to i32 - %1548 = add nsw i32 %1544, %1547 - %1549 = sitofp i32 %1548 to float - %1550 = fneg float %1549 - %1551 = fmul float %1541, %1550 - %1552 = fmul float %1551, 0.000000e+00 - %1553 = bitcast i32 %32 to float - %1554 = fadd float %1553, %1552 - %1555 = bitcast i32 %5 to float - %1556 = bitcast i32 %5 to float - %1557 = fmul float %1555, %1556 - %1558 = fadd float %1557, 0.000000e+00 - %1559 = bitcast i32 %32 to float - %1560 = bitcast i32 %32 to float - %1561 = fmul float %1559, %1560 - %1562 = fadd float %1558, %1561 - %1563 = call float @llvm.sqrt.f32(float %1562) - %1564 = bitcast i32 %5 to float - %1565 = fcmp olt float %1564, 0.000000e+00 - %1566 = sext i1 %1565 to i32 - %1567 = bitcast i32 %5 to float - %1568 = fcmp ogt float %1567, 0.000000e+00 - %1569 = zext i1 %1568 to i32 - %1570 = add nsw i32 %1566, %1569 - %1571 = sitofp i32 %1570 to float - %1572 = fneg float %1571 - %1573 = fmul float %1563, %1572 - %1574 = fmul float %1573, 0.000000e+00 - %1575 = bitcast i32 %32 to float - %1576 = fadd float %1575, %1574 - %1577 = fmul float %1554, %1576 - %1578 = fadd float %1532, %1577 - %1579 = call float @llvm.sqrt.f32(float %1578) - %1580 = fadd float %1579, 0.000000e+00 - %1581 = fdiv float %1488, %1580 - %1582 = fmul float %1581, 2.000000e+00 - %1583 = bitcast i32 %5 to float - %1584 = bitcast i32 %5 to float - %1585 = fmul float %1583, %1584 - %1586 = fadd float %1585, 0.000000e+00 - %1587 = bitcast i32 %32 to float - %1588 = bitcast i32 %32 to float - %1589 = fmul float %1587, %1588 - %1590 = fadd float %1586, %1589 - %1591 = call float @llvm.sqrt.f32(float %1590) - %1592 = bitcast i32 %5 to float - %1593 = fcmp olt float %1592, 0.000000e+00 - %1594 = sext i1 %1593 to i32 - %1595 = bitcast i32 %5 to float - %1596 = fcmp ogt float %1595, 0.000000e+00 - %1597 = zext i1 %1596 to i32 - %1598 = add nsw i32 %1594, %1597 - %1599 = sitofp i32 %1598 to float - %1600 = fneg float %1599 - %1601 = fmul float %1591, %1600 - %1602 = fmul float %1601, 0.000000e+00 - %1603 = bitcast i32 %32 to float - %1604 = fadd float %1603, %1602 - %1605 = bitcast i32 %5 to float - %1606 = bitcast i32 %5 to float - %1607 = fmul float %1605, %1606 - %1608 = fadd float %1607, 0.000000e+00 - %1609 = bitcast i32 %32 to float - %1610 = bitcast i32 %32 to float - %1611 = fmul float %1609, %1610 - %1612 = fadd float %1608, %1611 - %1613 = call float @llvm.sqrt.f32(float %1612) - %1614 = bitcast i32 %5 to float - %1615 = fcmp olt float %1614, 0.000000e+00 - %1616 = sext i1 %1615 to i32 - %1617 = bitcast i32 %5 to float - %1618 = fcmp ogt float %1617, 0.000000e+00 - %1619 = zext i1 %1618 to i32 - %1620 = add nsw i32 %1616, %1619 - %1621 = sitofp i32 %1620 to float - %1622 = fneg float %1621 - %1623 = fmul float %1613, %1622 - %1624 = bitcast i32 %5 to float - %1625 = fadd float %1624, %1623 - %1626 = bitcast i32 %5 to float - %1627 = bitcast i32 %5 to float - %1628 = fmul float %1626, %1627 - %1629 = fadd float %1628, 0.000000e+00 - %1630 = bitcast i32 %32 to float - %1631 = bitcast i32 %32 to float - %1632 = fmul float %1630, %1631 - %1633 = fadd float %1629, %1632 - %1634 = call float @llvm.sqrt.f32(float %1633) - %1635 = bitcast i32 %5 to float - %1636 = fcmp olt float %1635, 0.000000e+00 - %1637 = sext i1 %1636 to i32 - %1638 = bitcast i32 %5 to float - %1639 = fcmp ogt float %1638, 0.000000e+00 - %1640 = zext i1 %1639 to i32 - %1641 = add nsw i32 %1637, %1640 - %1642 = sitofp i32 %1641 to float - %1643 = fneg float %1642 - %1644 = fmul float %1634, %1643 - %1645 = bitcast i32 %5 to float - %1646 = fadd float %1645, %1644 - %1647 = fmul float %1625, %1646 - %1648 = fadd float %1647, 0.000000e+00 - %1649 = bitcast i32 %5 to float - %1650 = bitcast i32 %5 to float - %1651 = fmul float %1649, %1650 - %1652 = fadd float %1651, 0.000000e+00 - %1653 = bitcast i32 %32 to float - %1654 = bitcast i32 %32 to float - %1655 = fmul float %1653, %1654 - %1656 = fadd float %1652, %1655 - %1657 = call float @llvm.sqrt.f32(float %1656) - %1658 = bitcast i32 %5 to float - %1659 = fcmp olt float %1658, 0.000000e+00 - %1660 = sext i1 %1659 to i32 - %1661 = bitcast i32 %5 to float - %1662 = fcmp ogt float %1661, 0.000000e+00 - %1663 = zext i1 %1662 to i32 - %1664 = add nsw i32 %1660, %1663 - %1665 = sitofp i32 %1664 to float - %1666 = fneg float %1665 - %1667 = fmul float %1657, %1666 - %1668 = fmul float %1667, 0.000000e+00 - %1669 = bitcast i32 %32 to float - %1670 = fadd float %1669, %1668 - %1671 = bitcast i32 %5 to float - %1672 = bitcast i32 %5 to float - %1673 = fmul float %1671, %1672 - %1674 = fadd float %1673, 0.000000e+00 - %1675 = bitcast i32 %32 to float - %1676 = bitcast i32 %32 to float - %1677 = fmul float %1675, %1676 - %1678 = fadd float %1674, %1677 - %1679 = call float @llvm.sqrt.f32(float %1678) - %1680 = bitcast i32 %5 to float - %1681 = fcmp olt float %1680, 0.000000e+00 - %1682 = sext i1 %1681 to i32 - %1683 = bitcast i32 %5 to float - %1684 = fcmp ogt float %1683, 0.000000e+00 - %1685 = zext i1 %1684 to i32 - %1686 = add nsw i32 %1682, %1685 - %1687 = sitofp i32 %1686 to float - %1688 = fneg float %1687 - %1689 = fmul float %1679, %1688 - %1690 = fmul float %1689, 0.000000e+00 - %1691 = bitcast i32 %32 to float - %1692 = fadd float %1691, %1690 - %1693 = fmul float %1670, %1692 - %1694 = fadd float %1648, %1693 - %1695 = call float @llvm.sqrt.f32(float %1694) - %1696 = fadd float %1695, 0.000000e+00 - %1697 = fdiv float %1604, %1696 - %1698 = fmul float %1582, %1697 - %1699 = fneg float %1698 - %1700 = getelementptr float, float* %0, i32 0 - %1701 = getelementptr inbounds float, float* %1700, i64 2 - %1702 = load float, float* %1701, align 4 - %1703 = fmul float %1699, %1702 - %1704 = fadd float %1467, %1703 - %1705 = insertelement <4 x float> zeroinitializer, float %1704, i32 0 - %1706 = insertelement <4 x float> %1705, float 0.000000e+00, i32 1 - %1707 = insertelement <4 x float> %1706, float 0.000000e+00, i32 2 - %1708 = insertelement <4 x float> %1707, float 0.000000e+00, i32 3 - %1709 = extractelement <4 x float> %1708, i32 0 - store float %1709, float* %2, align 4 - %1710 = extractelement <4 x float> %1708, i32 1 - %1711 = getelementptr float, float* %2, i32 0 - %1712 = getelementptr inbounds float, float* %1711, i64 1 - store float %1710, float* %1712, align 4 - %1713 = bitcast i32 %5 to float - %1714 = bitcast i32 %5 to float - %1715 = fmul float %1713, %1714 - %1716 = fadd float %1715, 0.000000e+00 - %1717 = bitcast i32 %32 to float - %1718 = bitcast i32 %32 to float - %1719 = fmul float %1717, %1718 - %1720 = fadd float %1716, %1719 - %1721 = call float @llvm.sqrt.f32(float %1720) - %1722 = bitcast i32 %5 to float - %1723 = fcmp olt float %1722, 0.000000e+00 - %1724 = sext i1 %1723 to i32 - %1725 = bitcast i32 %5 to float - %1726 = fcmp ogt float %1725, 0.000000e+00 - %1727 = zext i1 %1726 to i32 - %1728 = add nsw i32 %1724, %1727 - %1729 = sitofp i32 %1728 to float - %1730 = fneg float %1729 - %1731 = fmul float %1721, %1730 - %1732 = bitcast i32 %5 to float - %1733 = fadd float %1732, %1731 - %1734 = bitcast i32 %5 to float - %1735 = bitcast i32 %5 to float - %1736 = fmul float %1734, %1735 - %1737 = fadd float %1736, 0.000000e+00 - %1738 = bitcast i32 %32 to float - %1739 = bitcast i32 %32 to float - %1740 = fmul float %1738, %1739 - %1741 = fadd float %1737, %1740 - %1742 = call float @llvm.sqrt.f32(float %1741) - %1743 = bitcast i32 %5 to float - %1744 = fcmp olt float %1743, 0.000000e+00 - %1745 = sext i1 %1744 to i32 - %1746 = bitcast i32 %5 to float - %1747 = fcmp ogt float %1746, 0.000000e+00 - %1748 = zext i1 %1747 to i32 - %1749 = add nsw i32 %1745, %1748 - %1750 = sitofp i32 %1749 to float - %1751 = fneg float %1750 - %1752 = fmul float %1742, %1751 - %1753 = bitcast i32 %5 to float - %1754 = fadd float %1753, %1752 - %1755 = bitcast i32 %5 to float - %1756 = bitcast i32 %5 to float - %1757 = fmul float %1755, %1756 - %1758 = fadd float %1757, 0.000000e+00 - %1759 = bitcast i32 %32 to float - %1760 = bitcast i32 %32 to float - %1761 = fmul float %1759, %1760 - %1762 = fadd float %1758, %1761 - %1763 = call float @llvm.sqrt.f32(float %1762) - %1764 = bitcast i32 %5 to float - %1765 = fcmp olt float %1764, 0.000000e+00 - %1766 = sext i1 %1765 to i32 - %1767 = bitcast i32 %5 to float - %1768 = fcmp ogt float %1767, 0.000000e+00 - %1769 = zext i1 %1768 to i32 - %1770 = add nsw i32 %1766, %1769 - %1771 = sitofp i32 %1770 to float - %1772 = fneg float %1771 - %1773 = fmul float %1763, %1772 - %1774 = bitcast i32 %5 to float - %1775 = fadd float %1774, %1773 - %1776 = fmul float %1754, %1775 - %1777 = fadd float %1776, 0.000000e+00 - %1778 = bitcast i32 %5 to float - %1779 = bitcast i32 %5 to float - %1780 = fmul float %1778, %1779 - %1781 = fadd float %1780, 0.000000e+00 - %1782 = bitcast i32 %32 to float - %1783 = bitcast i32 %32 to float - %1784 = fmul float %1782, %1783 - %1785 = fadd float %1781, %1784 - %1786 = call float @llvm.sqrt.f32(float %1785) - %1787 = bitcast i32 %5 to float - %1788 = fcmp olt float %1787, 0.000000e+00 - %1789 = sext i1 %1788 to i32 - %1790 = bitcast i32 %5 to float - %1791 = fcmp ogt float %1790, 0.000000e+00 - %1792 = zext i1 %1791 to i32 - %1793 = add nsw i32 %1789, %1792 - %1794 = sitofp i32 %1793 to float - %1795 = fneg float %1794 - %1796 = fmul float %1786, %1795 - %1797 = fmul float %1796, 0.000000e+00 - %1798 = bitcast i32 %32 to float - %1799 = fadd float %1798, %1797 - %1800 = bitcast i32 %5 to float - %1801 = bitcast i32 %5 to float - %1802 = fmul float %1800, %1801 - %1803 = fadd float %1802, 0.000000e+00 - %1804 = bitcast i32 %32 to float - %1805 = bitcast i32 %32 to float - %1806 = fmul float %1804, %1805 - %1807 = fadd float %1803, %1806 - %1808 = call float @llvm.sqrt.f32(float %1807) - %1809 = bitcast i32 %5 to float - %1810 = fcmp olt float %1809, 0.000000e+00 - %1811 = sext i1 %1810 to i32 - %1812 = bitcast i32 %5 to float - %1813 = fcmp ogt float %1812, 0.000000e+00 - %1814 = zext i1 %1813 to i32 - %1815 = add nsw i32 %1811, %1814 - %1816 = sitofp i32 %1815 to float - %1817 = fneg float %1816 - %1818 = fmul float %1808, %1817 - %1819 = fmul float %1818, 0.000000e+00 - %1820 = bitcast i32 %32 to float - %1821 = fadd float %1820, %1819 - %1822 = fmul float %1799, %1821 - %1823 = fadd float %1777, %1822 - %1824 = call float @llvm.sqrt.f32(float %1823) - %1825 = fadd float %1824, 0.000000e+00 - %1826 = fdiv float %1733, %1825 - %1827 = fmul float %1826, 2.000000e+00 - %1828 = bitcast i32 %5 to float - %1829 = bitcast i32 %5 to float - %1830 = fmul float %1828, %1829 - %1831 = fadd float %1830, 0.000000e+00 - %1832 = bitcast i32 %32 to float - %1833 = bitcast i32 %32 to float - %1834 = fmul float %1832, %1833 - %1835 = fadd float %1831, %1834 - %1836 = call float @llvm.sqrt.f32(float %1835) - %1837 = bitcast i32 %5 to float - %1838 = fcmp olt float %1837, 0.000000e+00 - %1839 = sext i1 %1838 to i32 - %1840 = bitcast i32 %5 to float - %1841 = fcmp ogt float %1840, 0.000000e+00 - %1842 = zext i1 %1841 to i32 - %1843 = add nsw i32 %1839, %1842 - %1844 = sitofp i32 %1843 to float - %1845 = fneg float %1844 - %1846 = fmul float %1836, %1845 - %1847 = bitcast i32 %5 to float - %1848 = fadd float %1847, %1846 - %1849 = bitcast i32 %5 to float - %1850 = bitcast i32 %5 to float - %1851 = fmul float %1849, %1850 - %1852 = fadd float %1851, 0.000000e+00 - %1853 = bitcast i32 %32 to float - %1854 = bitcast i32 %32 to float - %1855 = fmul float %1853, %1854 - %1856 = fadd float %1852, %1855 - %1857 = call float @llvm.sqrt.f32(float %1856) - %1858 = bitcast i32 %5 to float - %1859 = fcmp olt float %1858, 0.000000e+00 - %1860 = sext i1 %1859 to i32 - %1861 = bitcast i32 %5 to float - %1862 = fcmp ogt float %1861, 0.000000e+00 - %1863 = zext i1 %1862 to i32 - %1864 = add nsw i32 %1860, %1863 - %1865 = sitofp i32 %1864 to float - %1866 = fneg float %1865 - %1867 = fmul float %1857, %1866 - %1868 = bitcast i32 %5 to float - %1869 = fadd float %1868, %1867 - %1870 = bitcast i32 %5 to float - %1871 = bitcast i32 %5 to float - %1872 = fmul float %1870, %1871 - %1873 = fadd float %1872, 0.000000e+00 - %1874 = bitcast i32 %32 to float - %1875 = bitcast i32 %32 to float - %1876 = fmul float %1874, %1875 - %1877 = fadd float %1873, %1876 - %1878 = call float @llvm.sqrt.f32(float %1877) - %1879 = bitcast i32 %5 to float - %1880 = fcmp olt float %1879, 0.000000e+00 - %1881 = sext i1 %1880 to i32 - %1882 = bitcast i32 %5 to float - %1883 = fcmp ogt float %1882, 0.000000e+00 - %1884 = zext i1 %1883 to i32 - %1885 = add nsw i32 %1881, %1884 - %1886 = sitofp i32 %1885 to float - %1887 = fneg float %1886 - %1888 = fmul float %1878, %1887 - %1889 = bitcast i32 %5 to float - %1890 = fadd float %1889, %1888 - %1891 = fmul float %1869, %1890 - %1892 = fadd float %1891, 0.000000e+00 - %1893 = bitcast i32 %5 to float - %1894 = bitcast i32 %5 to float - %1895 = fmul float %1893, %1894 - %1896 = fadd float %1895, 0.000000e+00 - %1897 = bitcast i32 %32 to float - %1898 = bitcast i32 %32 to float - %1899 = fmul float %1897, %1898 - %1900 = fadd float %1896, %1899 - %1901 = call float @llvm.sqrt.f32(float %1900) - %1902 = bitcast i32 %5 to float - %1903 = fcmp olt float %1902, 0.000000e+00 - %1904 = sext i1 %1903 to i32 - %1905 = bitcast i32 %5 to float - %1906 = fcmp ogt float %1905, 0.000000e+00 - %1907 = zext i1 %1906 to i32 - %1908 = add nsw i32 %1904, %1907 - %1909 = sitofp i32 %1908 to float - %1910 = fneg float %1909 - %1911 = fmul float %1901, %1910 - %1912 = fmul float %1911, 0.000000e+00 - %1913 = bitcast i32 %32 to float - %1914 = fadd float %1913, %1912 - %1915 = bitcast i32 %5 to float - %1916 = bitcast i32 %5 to float - %1917 = fmul float %1915, %1916 - %1918 = fadd float %1917, 0.000000e+00 - %1919 = bitcast i32 %32 to float - %1920 = bitcast i32 %32 to float - %1921 = fmul float %1919, %1920 - %1922 = fadd float %1918, %1921 - %1923 = call float @llvm.sqrt.f32(float %1922) - %1924 = bitcast i32 %5 to float - %1925 = fcmp olt float %1924, 0.000000e+00 - %1926 = sext i1 %1925 to i32 - %1927 = bitcast i32 %5 to float - %1928 = fcmp ogt float %1927, 0.000000e+00 - %1929 = zext i1 %1928 to i32 - %1930 = add nsw i32 %1926, %1929 - %1931 = sitofp i32 %1930 to float - %1932 = fneg float %1931 - %1933 = fmul float %1923, %1932 - %1934 = fmul float %1933, 0.000000e+00 - %1935 = bitcast i32 %32 to float - %1936 = fadd float %1935, %1934 - %1937 = fmul float %1914, %1936 - %1938 = fadd float %1892, %1937 - %1939 = call float @llvm.sqrt.f32(float %1938) - %1940 = fadd float %1939, 0.000000e+00 - %1941 = fdiv float %1848, %1940 - %1942 = fmul float %1827, %1941 - %1943 = fsub float 1.000000e+00, %1942 - %1944 = insertelement <4 x float> zeroinitializer, float %1943, i32 0 - %1945 = insertelement <4 x float> %1944, float 0.000000e+00, i32 1 - %1946 = insertelement <4 x float> %1945, float 0.000000e+00, i32 2 - %1947 = insertelement <4 x float> %1946, float 0.000000e+00, i32 3 - %1948 = getelementptr float, float* %0, i32 0 - %1949 = getelementptr inbounds float, float* %1948, i64 1 - %1950 = load float, float* %1949, align 4 - %1951 = insertelement <4 x float> zeroinitializer, float %1950, i32 0 - %1952 = insertelement <4 x float> %1951, float 0.000000e+00, i32 1 - %1953 = insertelement <4 x float> %1952, float 0.000000e+00, i32 2 - %1954 = insertelement <4 x float> %1953, float 0.000000e+00, i32 3 - %1955 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1947, <4 x float> %1954, <4 x float> zeroinitializer) - %1956 = extractelement <4 x float> %1955, i32 0 - %1957 = getelementptr float, float* %2, i32 0 - %1958 = getelementptr inbounds float, float* %1957, i64 1 - store float %1956, float* %1958, align 4 - %1959 = bitcast i32 %5 to float - %1960 = bitcast i32 %5 to float - %1961 = fmul float %1959, %1960 - %1962 = fadd float %1961, 0.000000e+00 - %1963 = bitcast i32 %32 to float - %1964 = bitcast i32 %32 to float - %1965 = fmul float %1963, %1964 - %1966 = fadd float %1962, %1965 - %1967 = call float @llvm.sqrt.f32(float %1966) - %1968 = bitcast i32 %5 to float - %1969 = fcmp olt float %1968, 0.000000e+00 - %1970 = sext i1 %1969 to i32 - %1971 = bitcast i32 %5 to float - %1972 = fcmp ogt float %1971, 0.000000e+00 - %1973 = zext i1 %1972 to i32 - %1974 = add nsw i32 %1970, %1973 - %1975 = sitofp i32 %1974 to float - %1976 = fneg float %1975 - %1977 = fmul float %1967, %1976 - %1978 = bitcast i32 %5 to float - %1979 = fadd float %1978, %1977 - %1980 = bitcast i32 %5 to float - %1981 = bitcast i32 %5 to float - %1982 = fmul float %1980, %1981 - %1983 = fadd float %1982, 0.000000e+00 - %1984 = bitcast i32 %32 to float - %1985 = bitcast i32 %32 to float - %1986 = fmul float %1984, %1985 - %1987 = fadd float %1983, %1986 - %1988 = call float @llvm.sqrt.f32(float %1987) - %1989 = bitcast i32 %5 to float - %1990 = fcmp olt float %1989, 0.000000e+00 - %1991 = sext i1 %1990 to i32 - %1992 = bitcast i32 %5 to float - %1993 = fcmp ogt float %1992, 0.000000e+00 - %1994 = zext i1 %1993 to i32 - %1995 = add nsw i32 %1991, %1994 - %1996 = sitofp i32 %1995 to float - %1997 = fneg float %1996 - %1998 = fmul float %1988, %1997 - %1999 = bitcast i32 %5 to float - %2000 = fadd float %1999, %1998 - %2001 = bitcast i32 %5 to float - %2002 = bitcast i32 %5 to float - %2003 = fmul float %2001, %2002 - %2004 = fadd float %2003, 0.000000e+00 - %2005 = bitcast i32 %32 to float - %2006 = bitcast i32 %32 to float - %2007 = fmul float %2005, %2006 - %2008 = fadd float %2004, %2007 - %2009 = call float @llvm.sqrt.f32(float %2008) - %2010 = bitcast i32 %5 to float - %2011 = fcmp olt float %2010, 0.000000e+00 - %2012 = sext i1 %2011 to i32 - %2013 = bitcast i32 %5 to float - %2014 = fcmp ogt float %2013, 0.000000e+00 - %2015 = zext i1 %2014 to i32 - %2016 = add nsw i32 %2012, %2015 - %2017 = sitofp i32 %2016 to float - %2018 = fneg float %2017 - %2019 = fmul float %2009, %2018 - %2020 = bitcast i32 %5 to float - %2021 = fadd float %2020, %2019 - %2022 = fmul float %2000, %2021 - %2023 = fadd float %2022, 0.000000e+00 - %2024 = bitcast i32 %5 to float - %2025 = bitcast i32 %5 to float - %2026 = fmul float %2024, %2025 - %2027 = fadd float %2026, 0.000000e+00 - %2028 = bitcast i32 %32 to float - %2029 = bitcast i32 %32 to float - %2030 = fmul float %2028, %2029 - %2031 = fadd float %2027, %2030 - %2032 = call float @llvm.sqrt.f32(float %2031) - %2033 = bitcast i32 %5 to float - %2034 = fcmp olt float %2033, 0.000000e+00 - %2035 = sext i1 %2034 to i32 - %2036 = bitcast i32 %5 to float - %2037 = fcmp ogt float %2036, 0.000000e+00 - %2038 = zext i1 %2037 to i32 - %2039 = add nsw i32 %2035, %2038 - %2040 = sitofp i32 %2039 to float - %2041 = fneg float %2040 - %2042 = fmul float %2032, %2041 - %2043 = fmul float %2042, 0.000000e+00 - %2044 = bitcast i32 %32 to float - %2045 = fadd float %2044, %2043 - %2046 = bitcast i32 %5 to float - %2047 = bitcast i32 %5 to float - %2048 = fmul float %2046, %2047 - %2049 = fadd float %2048, 0.000000e+00 - %2050 = bitcast i32 %32 to float - %2051 = bitcast i32 %32 to float - %2052 = fmul float %2050, %2051 - %2053 = fadd float %2049, %2052 - %2054 = call float @llvm.sqrt.f32(float %2053) - %2055 = bitcast i32 %5 to float - %2056 = fcmp olt float %2055, 0.000000e+00 - %2057 = sext i1 %2056 to i32 - %2058 = bitcast i32 %5 to float - %2059 = fcmp ogt float %2058, 0.000000e+00 - %2060 = zext i1 %2059 to i32 - %2061 = add nsw i32 %2057, %2060 - %2062 = sitofp i32 %2061 to float - %2063 = fneg float %2062 - %2064 = fmul float %2054, %2063 - %2065 = fmul float %2064, 0.000000e+00 - %2066 = bitcast i32 %32 to float - %2067 = fadd float %2066, %2065 - %2068 = fmul float %2045, %2067 - %2069 = fadd float %2023, %2068 - %2070 = call float @llvm.sqrt.f32(float %2069) - %2071 = fadd float %2070, 0.000000e+00 - %2072 = fdiv float %1979, %2071 - %2073 = fmul float %2072, 2.000000e+00 - %2074 = bitcast i32 %5 to float - %2075 = bitcast i32 %5 to float - %2076 = fmul float %2074, %2075 - %2077 = fadd float %2076, 0.000000e+00 - %2078 = bitcast i32 %32 to float - %2079 = bitcast i32 %32 to float - %2080 = fmul float %2078, %2079 - %2081 = fadd float %2077, %2080 - %2082 = call float @llvm.sqrt.f32(float %2081) - %2083 = bitcast i32 %5 to float - %2084 = fcmp olt float %2083, 0.000000e+00 - %2085 = sext i1 %2084 to i32 - %2086 = bitcast i32 %5 to float - %2087 = fcmp ogt float %2086, 0.000000e+00 - %2088 = zext i1 %2087 to i32 - %2089 = add nsw i32 %2085, %2088 - %2090 = sitofp i32 %2089 to float - %2091 = fneg float %2090 - %2092 = fmul float %2082, %2091 - %2093 = bitcast i32 %5 to float - %2094 = fadd float %2093, %2092 - %2095 = bitcast i32 %5 to float - %2096 = bitcast i32 %5 to float - %2097 = fmul float %2095, %2096 - %2098 = fadd float %2097, 0.000000e+00 - %2099 = bitcast i32 %32 to float - %2100 = bitcast i32 %32 to float - %2101 = fmul float %2099, %2100 - %2102 = fadd float %2098, %2101 - %2103 = call float @llvm.sqrt.f32(float %2102) - %2104 = bitcast i32 %5 to float - %2105 = fcmp olt float %2104, 0.000000e+00 - %2106 = sext i1 %2105 to i32 - %2107 = bitcast i32 %5 to float - %2108 = fcmp ogt float %2107, 0.000000e+00 - %2109 = zext i1 %2108 to i32 - %2110 = add nsw i32 %2106, %2109 - %2111 = sitofp i32 %2110 to float - %2112 = fneg float %2111 - %2113 = fmul float %2103, %2112 - %2114 = bitcast i32 %5 to float - %2115 = fadd float %2114, %2113 - %2116 = bitcast i32 %5 to float - %2117 = bitcast i32 %5 to float - %2118 = fmul float %2116, %2117 - %2119 = fadd float %2118, 0.000000e+00 - %2120 = bitcast i32 %32 to float - %2121 = bitcast i32 %32 to float - %2122 = fmul float %2120, %2121 - %2123 = fadd float %2119, %2122 - %2124 = call float @llvm.sqrt.f32(float %2123) - %2125 = bitcast i32 %5 to float - %2126 = fcmp olt float %2125, 0.000000e+00 - %2127 = sext i1 %2126 to i32 - %2128 = bitcast i32 %5 to float - %2129 = fcmp ogt float %2128, 0.000000e+00 - %2130 = zext i1 %2129 to i32 - %2131 = add nsw i32 %2127, %2130 - %2132 = sitofp i32 %2131 to float - %2133 = fneg float %2132 - %2134 = fmul float %2124, %2133 - %2135 = bitcast i32 %5 to float - %2136 = fadd float %2135, %2134 - %2137 = fmul float %2115, %2136 - %2138 = fadd float %2137, 0.000000e+00 - %2139 = bitcast i32 %5 to float - %2140 = bitcast i32 %5 to float - %2141 = fmul float %2139, %2140 - %2142 = fadd float %2141, 0.000000e+00 - %2143 = bitcast i32 %32 to float - %2144 = bitcast i32 %32 to float - %2145 = fmul float %2143, %2144 - %2146 = fadd float %2142, %2145 - %2147 = call float @llvm.sqrt.f32(float %2146) - %2148 = bitcast i32 %5 to float - %2149 = fcmp olt float %2148, 0.000000e+00 - %2150 = sext i1 %2149 to i32 - %2151 = bitcast i32 %5 to float - %2152 = fcmp ogt float %2151, 0.000000e+00 - %2153 = zext i1 %2152 to i32 - %2154 = add nsw i32 %2150, %2153 - %2155 = sitofp i32 %2154 to float - %2156 = fneg float %2155 - %2157 = fmul float %2147, %2156 - %2158 = fmul float %2157, 0.000000e+00 - %2159 = bitcast i32 %32 to float - %2160 = fadd float %2159, %2158 - %2161 = bitcast i32 %5 to float - %2162 = bitcast i32 %5 to float - %2163 = fmul float %2161, %2162 - %2164 = fadd float %2163, 0.000000e+00 - %2165 = bitcast i32 %32 to float - %2166 = bitcast i32 %32 to float - %2167 = fmul float %2165, %2166 - %2168 = fadd float %2164, %2167 - %2169 = call float @llvm.sqrt.f32(float %2168) - %2170 = bitcast i32 %5 to float - %2171 = fcmp olt float %2170, 0.000000e+00 - %2172 = sext i1 %2171 to i32 - %2173 = bitcast i32 %5 to float - %2174 = fcmp ogt float %2173, 0.000000e+00 - %2175 = zext i1 %2174 to i32 - %2176 = add nsw i32 %2172, %2175 - %2177 = sitofp i32 %2176 to float - %2178 = fneg float %2177 - %2179 = fmul float %2169, %2178 - %2180 = fmul float %2179, 0.000000e+00 - %2181 = bitcast i32 %32 to float - %2182 = fadd float %2181, %2180 - %2183 = fmul float %2160, %2182 - %2184 = fadd float %2138, %2183 - %2185 = call float @llvm.sqrt.f32(float %2184) - %2186 = fadd float %2185, 0.000000e+00 - %2187 = fdiv float %2094, %2186 - %2188 = fmul float %2073, %2187 - %2189 = fsub float 1.000000e+00, %2188 - %2190 = fmul float %2189, %1950 - %2191 = fadd float %2190, 0.000000e+00 - %2192 = bitcast i32 %5 to float - %2193 = bitcast i32 %5 to float - %2194 = fmul float %2192, %2193 - %2195 = fadd float %2194, 0.000000e+00 - %2196 = bitcast i32 %32 to float - %2197 = bitcast i32 %32 to float - %2198 = fmul float %2196, %2197 - %2199 = fadd float %2195, %2198 - %2200 = call float @llvm.sqrt.f32(float %2199) - %2201 = bitcast i32 %5 to float - %2202 = fcmp olt float %2201, 0.000000e+00 - %2203 = sext i1 %2202 to i32 - %2204 = bitcast i32 %5 to float - %2205 = fcmp ogt float %2204, 0.000000e+00 - %2206 = zext i1 %2205 to i32 - %2207 = add nsw i32 %2203, %2206 - %2208 = sitofp i32 %2207 to float - %2209 = fneg float %2208 - %2210 = fmul float %2200, %2209 - %2211 = bitcast i32 %5 to float - %2212 = fadd float %2211, %2210 - %2213 = bitcast i32 %5 to float - %2214 = bitcast i32 %5 to float - %2215 = fmul float %2213, %2214 - %2216 = fadd float %2215, 0.000000e+00 - %2217 = bitcast i32 %32 to float - %2218 = bitcast i32 %32 to float - %2219 = fmul float %2217, %2218 - %2220 = fadd float %2216, %2219 - %2221 = call float @llvm.sqrt.f32(float %2220) - %2222 = bitcast i32 %5 to float - %2223 = fcmp olt float %2222, 0.000000e+00 - %2224 = sext i1 %2223 to i32 - %2225 = bitcast i32 %5 to float - %2226 = fcmp ogt float %2225, 0.000000e+00 - %2227 = zext i1 %2226 to i32 - %2228 = add nsw i32 %2224, %2227 - %2229 = sitofp i32 %2228 to float - %2230 = fneg float %2229 - %2231 = fmul float %2221, %2230 - %2232 = bitcast i32 %5 to float - %2233 = fadd float %2232, %2231 - %2234 = bitcast i32 %5 to float - %2235 = bitcast i32 %5 to float - %2236 = fmul float %2234, %2235 - %2237 = fadd float %2236, 0.000000e+00 - %2238 = bitcast i32 %32 to float - %2239 = bitcast i32 %32 to float - %2240 = fmul float %2238, %2239 - %2241 = fadd float %2237, %2240 - %2242 = call float @llvm.sqrt.f32(float %2241) - %2243 = bitcast i32 %5 to float - %2244 = fcmp olt float %2243, 0.000000e+00 - %2245 = sext i1 %2244 to i32 - %2246 = bitcast i32 %5 to float - %2247 = fcmp ogt float %2246, 0.000000e+00 - %2248 = zext i1 %2247 to i32 - %2249 = add nsw i32 %2245, %2248 - %2250 = sitofp i32 %2249 to float - %2251 = fneg float %2250 - %2252 = fmul float %2242, %2251 - %2253 = bitcast i32 %5 to float - %2254 = fadd float %2253, %2252 - %2255 = fmul float %2233, %2254 - %2256 = fadd float %2255, 0.000000e+00 - %2257 = bitcast i32 %5 to float - %2258 = bitcast i32 %5 to float - %2259 = fmul float %2257, %2258 - %2260 = fadd float %2259, 0.000000e+00 - %2261 = bitcast i32 %32 to float - %2262 = bitcast i32 %32 to float - %2263 = fmul float %2261, %2262 - %2264 = fadd float %2260, %2263 - %2265 = call float @llvm.sqrt.f32(float %2264) - %2266 = bitcast i32 %5 to float - %2267 = fcmp olt float %2266, 0.000000e+00 - %2268 = sext i1 %2267 to i32 - %2269 = bitcast i32 %5 to float - %2270 = fcmp ogt float %2269, 0.000000e+00 - %2271 = zext i1 %2270 to i32 - %2272 = add nsw i32 %2268, %2271 - %2273 = sitofp i32 %2272 to float - %2274 = fneg float %2273 - %2275 = fmul float %2265, %2274 - %2276 = fmul float %2275, 0.000000e+00 - %2277 = bitcast i32 %32 to float - %2278 = fadd float %2277, %2276 - %2279 = bitcast i32 %5 to float - %2280 = bitcast i32 %5 to float - %2281 = fmul float %2279, %2280 - %2282 = fadd float %2281, 0.000000e+00 - %2283 = bitcast i32 %32 to float - %2284 = bitcast i32 %32 to float - %2285 = fmul float %2283, %2284 - %2286 = fadd float %2282, %2285 - %2287 = call float @llvm.sqrt.f32(float %2286) - %2288 = bitcast i32 %5 to float - %2289 = fcmp olt float %2288, 0.000000e+00 - %2290 = sext i1 %2289 to i32 - %2291 = bitcast i32 %5 to float - %2292 = fcmp ogt float %2291, 0.000000e+00 - %2293 = zext i1 %2292 to i32 - %2294 = add nsw i32 %2290, %2293 - %2295 = sitofp i32 %2294 to float - %2296 = fneg float %2295 - %2297 = fmul float %2287, %2296 - %2298 = fmul float %2297, 0.000000e+00 - %2299 = bitcast i32 %32 to float - %2300 = fadd float %2299, %2298 - %2301 = fmul float %2278, %2300 - %2302 = fadd float %2256, %2301 - %2303 = call float @llvm.sqrt.f32(float %2302) - %2304 = fadd float %2303, 0.000000e+00 - %2305 = fdiv float %2212, %2304 - %2306 = fmul float %2305, 2.000000e+00 - %2307 = bitcast i32 %5 to float - %2308 = bitcast i32 %5 to float - %2309 = fmul float %2307, %2308 - %2310 = fadd float %2309, 0.000000e+00 - %2311 = bitcast i32 %32 to float - %2312 = bitcast i32 %32 to float - %2313 = fmul float %2311, %2312 - %2314 = fadd float %2310, %2313 - %2315 = call float @llvm.sqrt.f32(float %2314) - %2316 = bitcast i32 %5 to float - %2317 = fcmp olt float %2316, 0.000000e+00 - %2318 = sext i1 %2317 to i32 - %2319 = bitcast i32 %5 to float - %2320 = fcmp ogt float %2319, 0.000000e+00 - %2321 = zext i1 %2320 to i32 - %2322 = add nsw i32 %2318, %2321 - %2323 = sitofp i32 %2322 to float - %2324 = fneg float %2323 - %2325 = fmul float %2315, %2324 - %2326 = fmul float %2325, 0.000000e+00 - %2327 = bitcast i32 %32 to float - %2328 = fadd float %2327, %2326 - %2329 = bitcast i32 %5 to float - %2330 = bitcast i32 %5 to float - %2331 = fmul float %2329, %2330 - %2332 = fadd float %2331, 0.000000e+00 - %2333 = bitcast i32 %32 to float - %2334 = bitcast i32 %32 to float - %2335 = fmul float %2333, %2334 - %2336 = fadd float %2332, %2335 - %2337 = call float @llvm.sqrt.f32(float %2336) - %2338 = bitcast i32 %5 to float - %2339 = fcmp olt float %2338, 0.000000e+00 - %2340 = sext i1 %2339 to i32 - %2341 = bitcast i32 %5 to float - %2342 = fcmp ogt float %2341, 0.000000e+00 - %2343 = zext i1 %2342 to i32 - %2344 = add nsw i32 %2340, %2343 - %2345 = sitofp i32 %2344 to float - %2346 = fneg float %2345 - %2347 = fmul float %2337, %2346 - %2348 = bitcast i32 %5 to float - %2349 = fadd float %2348, %2347 - %2350 = bitcast i32 %5 to float - %2351 = bitcast i32 %5 to float - %2352 = fmul float %2350, %2351 - %2353 = fadd float %2352, 0.000000e+00 - %2354 = bitcast i32 %32 to float - %2355 = bitcast i32 %32 to float - %2356 = fmul float %2354, %2355 - %2357 = fadd float %2353, %2356 - %2358 = call float @llvm.sqrt.f32(float %2357) - %2359 = bitcast i32 %5 to float - %2360 = fcmp olt float %2359, 0.000000e+00 - %2361 = sext i1 %2360 to i32 - %2362 = bitcast i32 %5 to float - %2363 = fcmp ogt float %2362, 0.000000e+00 - %2364 = zext i1 %2363 to i32 - %2365 = add nsw i32 %2361, %2364 - %2366 = sitofp i32 %2365 to float - %2367 = fneg float %2366 - %2368 = fmul float %2358, %2367 - %2369 = bitcast i32 %5 to float - %2370 = fadd float %2369, %2368 - %2371 = fmul float %2349, %2370 - %2372 = fadd float %2371, 0.000000e+00 - %2373 = bitcast i32 %5 to float - %2374 = bitcast i32 %5 to float - %2375 = fmul float %2373, %2374 - %2376 = fadd float %2375, 0.000000e+00 - %2377 = bitcast i32 %32 to float - %2378 = bitcast i32 %32 to float - %2379 = fmul float %2377, %2378 - %2380 = fadd float %2376, %2379 - %2381 = call float @llvm.sqrt.f32(float %2380) - %2382 = bitcast i32 %5 to float - %2383 = fcmp olt float %2382, 0.000000e+00 - %2384 = sext i1 %2383 to i32 - %2385 = bitcast i32 %5 to float - %2386 = fcmp ogt float %2385, 0.000000e+00 - %2387 = zext i1 %2386 to i32 - %2388 = add nsw i32 %2384, %2387 - %2389 = sitofp i32 %2388 to float - %2390 = fneg float %2389 - %2391 = fmul float %2381, %2390 - %2392 = fmul float %2391, 0.000000e+00 - %2393 = bitcast i32 %32 to float - %2394 = fadd float %2393, %2392 - %2395 = bitcast i32 %5 to float - %2396 = bitcast i32 %5 to float - %2397 = fmul float %2395, %2396 - %2398 = fadd float %2397, 0.000000e+00 - %2399 = bitcast i32 %32 to float - %2400 = bitcast i32 %32 to float - %2401 = fmul float %2399, %2400 - %2402 = fadd float %2398, %2401 - %2403 = call float @llvm.sqrt.f32(float %2402) - %2404 = bitcast i32 %5 to float - %2405 = fcmp olt float %2404, 0.000000e+00 - %2406 = sext i1 %2405 to i32 - %2407 = bitcast i32 %5 to float - %2408 = fcmp ogt float %2407, 0.000000e+00 - %2409 = zext i1 %2408 to i32 - %2410 = add nsw i32 %2406, %2409 - %2411 = sitofp i32 %2410 to float - %2412 = fneg float %2411 - %2413 = fmul float %2403, %2412 - %2414 = fmul float %2413, 0.000000e+00 - %2415 = bitcast i32 %32 to float - %2416 = fadd float %2415, %2414 - %2417 = fmul float %2394, %2416 - %2418 = fadd float %2372, %2417 - %2419 = call float @llvm.sqrt.f32(float %2418) - %2420 = fadd float %2419, 0.000000e+00 - %2421 = fdiv float %2328, %2420 - %2422 = fmul float %2306, %2421 - %2423 = fneg float %2422 - %2424 = getelementptr float, float* %0, i32 0 - %2425 = getelementptr inbounds float, float* %2424, i64 3 - %2426 = load float, float* %2425, align 4 - %2427 = fmul float %2423, %2426 - %2428 = fadd float %2191, %2427 - %2429 = insertelement <4 x float> zeroinitializer, float %2428, i32 0 - %2430 = insertelement <4 x float> %2429, float 0.000000e+00, i32 1 - %2431 = insertelement <4 x float> %2430, float 0.000000e+00, i32 2 - %2432 = insertelement <4 x float> %2431, float 0.000000e+00, i32 3 - %2433 = extractelement <4 x float> %2432, i32 0 - %2434 = getelementptr float, float* %2, i32 0 - %2435 = getelementptr inbounds float, float* %2434, i64 1 - store float %2433, float* %2435, align 4 - %2436 = extractelement <4 x float> %2432, i32 1 - %2437 = getelementptr float, float* %2, i32 0 - %2438 = getelementptr inbounds float, float* %2437, i64 2 - store float %2436, float* %2438, align 4 - %2439 = bitcast i32 %5 to float - %2440 = bitcast i32 %5 to float - %2441 = fmul float %2439, %2440 - %2442 = fadd float %2441, 0.000000e+00 - %2443 = bitcast i32 %32 to float - %2444 = bitcast i32 %32 to float - %2445 = fmul float %2443, %2444 - %2446 = fadd float %2442, %2445 - %2447 = call float @llvm.sqrt.f32(float %2446) - %2448 = bitcast i32 %5 to float - %2449 = fcmp olt float %2448, 0.000000e+00 - %2450 = sext i1 %2449 to i32 - %2451 = bitcast i32 %5 to float - %2452 = fcmp ogt float %2451, 0.000000e+00 - %2453 = zext i1 %2452 to i32 - %2454 = add nsw i32 %2450, %2453 - %2455 = sitofp i32 %2454 to float - %2456 = fneg float %2455 - %2457 = fmul float %2447, %2456 - %2458 = fmul float %2457, 0.000000e+00 - %2459 = bitcast i32 %32 to float - %2460 = fadd float %2459, %2458 - %2461 = bitcast i32 %5 to float - %2462 = bitcast i32 %5 to float - %2463 = fmul float %2461, %2462 - %2464 = fadd float %2463, 0.000000e+00 - %2465 = bitcast i32 %32 to float - %2466 = bitcast i32 %32 to float - %2467 = fmul float %2465, %2466 - %2468 = fadd float %2464, %2467 - %2469 = call float @llvm.sqrt.f32(float %2468) - %2470 = bitcast i32 %5 to float - %2471 = fcmp olt float %2470, 0.000000e+00 - %2472 = sext i1 %2471 to i32 - %2473 = bitcast i32 %5 to float - %2474 = fcmp ogt float %2473, 0.000000e+00 - %2475 = zext i1 %2474 to i32 - %2476 = add nsw i32 %2472, %2475 - %2477 = sitofp i32 %2476 to float - %2478 = fneg float %2477 - %2479 = fmul float %2469, %2478 - %2480 = bitcast i32 %5 to float - %2481 = fadd float %2480, %2479 - %2482 = bitcast i32 %5 to float - %2483 = bitcast i32 %5 to float - %2484 = fmul float %2482, %2483 - %2485 = fadd float %2484, 0.000000e+00 - %2486 = bitcast i32 %32 to float - %2487 = bitcast i32 %32 to float - %2488 = fmul float %2486, %2487 - %2489 = fadd float %2485, %2488 - %2490 = call float @llvm.sqrt.f32(float %2489) - %2491 = bitcast i32 %5 to float - %2492 = fcmp olt float %2491, 0.000000e+00 - %2493 = sext i1 %2492 to i32 - %2494 = bitcast i32 %5 to float - %2495 = fcmp ogt float %2494, 0.000000e+00 - %2496 = zext i1 %2495 to i32 - %2497 = add nsw i32 %2493, %2496 - %2498 = sitofp i32 %2497 to float - %2499 = fneg float %2498 - %2500 = fmul float %2490, %2499 - %2501 = bitcast i32 %5 to float - %2502 = fadd float %2501, %2500 - %2503 = fmul float %2481, %2502 - %2504 = fadd float %2503, 0.000000e+00 - %2505 = bitcast i32 %5 to float - %2506 = bitcast i32 %5 to float - %2507 = fmul float %2505, %2506 - %2508 = fadd float %2507, 0.000000e+00 - %2509 = bitcast i32 %32 to float - %2510 = bitcast i32 %32 to float - %2511 = fmul float %2509, %2510 - %2512 = fadd float %2508, %2511 - %2513 = call float @llvm.sqrt.f32(float %2512) - %2514 = bitcast i32 %5 to float - %2515 = fcmp olt float %2514, 0.000000e+00 - %2516 = sext i1 %2515 to i32 - %2517 = bitcast i32 %5 to float - %2518 = fcmp ogt float %2517, 0.000000e+00 - %2519 = zext i1 %2518 to i32 - %2520 = add nsw i32 %2516, %2519 - %2521 = sitofp i32 %2520 to float - %2522 = fneg float %2521 - %2523 = fmul float %2513, %2522 - %2524 = fmul float %2523, 0.000000e+00 - %2525 = bitcast i32 %32 to float - %2526 = fadd float %2525, %2524 - %2527 = bitcast i32 %5 to float - %2528 = bitcast i32 %5 to float - %2529 = fmul float %2527, %2528 - %2530 = fadd float %2529, 0.000000e+00 - %2531 = bitcast i32 %32 to float - %2532 = bitcast i32 %32 to float - %2533 = fmul float %2531, %2532 - %2534 = fadd float %2530, %2533 - %2535 = call float @llvm.sqrt.f32(float %2534) - %2536 = bitcast i32 %5 to float - %2537 = fcmp olt float %2536, 0.000000e+00 - %2538 = sext i1 %2537 to i32 - %2539 = bitcast i32 %5 to float - %2540 = fcmp ogt float %2539, 0.000000e+00 - %2541 = zext i1 %2540 to i32 - %2542 = add nsw i32 %2538, %2541 - %2543 = sitofp i32 %2542 to float - %2544 = fneg float %2543 - %2545 = fmul float %2535, %2544 - %2546 = fmul float %2545, 0.000000e+00 - %2547 = bitcast i32 %32 to float - %2548 = fadd float %2547, %2546 - %2549 = fmul float %2526, %2548 - %2550 = fadd float %2504, %2549 - %2551 = call float @llvm.sqrt.f32(float %2550) - %2552 = fadd float %2551, 0.000000e+00 - %2553 = fdiv float %2460, %2552 - %2554 = fmul float %2553, 2.000000e+00 - %2555 = bitcast i32 %5 to float - %2556 = bitcast i32 %5 to float - %2557 = fmul float %2555, %2556 - %2558 = fadd float %2557, 0.000000e+00 - %2559 = bitcast i32 %32 to float - %2560 = bitcast i32 %32 to float - %2561 = fmul float %2559, %2560 - %2562 = fadd float %2558, %2561 - %2563 = call float @llvm.sqrt.f32(float %2562) - %2564 = bitcast i32 %5 to float - %2565 = fcmp olt float %2564, 0.000000e+00 - %2566 = sext i1 %2565 to i32 - %2567 = bitcast i32 %5 to float - %2568 = fcmp ogt float %2567, 0.000000e+00 - %2569 = zext i1 %2568 to i32 - %2570 = add nsw i32 %2566, %2569 - %2571 = sitofp i32 %2570 to float - %2572 = fneg float %2571 - %2573 = fmul float %2563, %2572 - %2574 = bitcast i32 %5 to float - %2575 = fadd float %2574, %2573 - %2576 = bitcast i32 %5 to float - %2577 = bitcast i32 %5 to float - %2578 = fmul float %2576, %2577 - %2579 = fadd float %2578, 0.000000e+00 - %2580 = bitcast i32 %32 to float - %2581 = bitcast i32 %32 to float - %2582 = fmul float %2580, %2581 - %2583 = fadd float %2579, %2582 - %2584 = call float @llvm.sqrt.f32(float %2583) - %2585 = bitcast i32 %5 to float - %2586 = fcmp olt float %2585, 0.000000e+00 - %2587 = sext i1 %2586 to i32 - %2588 = bitcast i32 %5 to float - %2589 = fcmp ogt float %2588, 0.000000e+00 - %2590 = zext i1 %2589 to i32 - %2591 = add nsw i32 %2587, %2590 - %2592 = sitofp i32 %2591 to float - %2593 = fneg float %2592 - %2594 = fmul float %2584, %2593 - %2595 = bitcast i32 %5 to float - %2596 = fadd float %2595, %2594 - %2597 = bitcast i32 %5 to float - %2598 = bitcast i32 %5 to float - %2599 = fmul float %2597, %2598 - %2600 = fadd float %2599, 0.000000e+00 - %2601 = bitcast i32 %32 to float - %2602 = bitcast i32 %32 to float - %2603 = fmul float %2601, %2602 - %2604 = fadd float %2600, %2603 - %2605 = call float @llvm.sqrt.f32(float %2604) - %2606 = bitcast i32 %5 to float - %2607 = fcmp olt float %2606, 0.000000e+00 - %2608 = sext i1 %2607 to i32 - %2609 = bitcast i32 %5 to float - %2610 = fcmp ogt float %2609, 0.000000e+00 - %2611 = zext i1 %2610 to i32 - %2612 = add nsw i32 %2608, %2611 - %2613 = sitofp i32 %2612 to float - %2614 = fneg float %2613 - %2615 = fmul float %2605, %2614 - %2616 = bitcast i32 %5 to float - %2617 = fadd float %2616, %2615 - %2618 = fmul float %2596, %2617 - %2619 = fadd float %2618, 0.000000e+00 - %2620 = bitcast i32 %5 to float - %2621 = bitcast i32 %5 to float - %2622 = fmul float %2620, %2621 - %2623 = fadd float %2622, 0.000000e+00 - %2624 = bitcast i32 %32 to float - %2625 = bitcast i32 %32 to float - %2626 = fmul float %2624, %2625 - %2627 = fadd float %2623, %2626 - %2628 = call float @llvm.sqrt.f32(float %2627) - %2629 = bitcast i32 %5 to float - %2630 = fcmp olt float %2629, 0.000000e+00 - %2631 = sext i1 %2630 to i32 - %2632 = bitcast i32 %5 to float - %2633 = fcmp ogt float %2632, 0.000000e+00 - %2634 = zext i1 %2633 to i32 - %2635 = add nsw i32 %2631, %2634 - %2636 = sitofp i32 %2635 to float - %2637 = fneg float %2636 - %2638 = fmul float %2628, %2637 - %2639 = fmul float %2638, 0.000000e+00 - %2640 = bitcast i32 %32 to float - %2641 = fadd float %2640, %2639 - %2642 = bitcast i32 %5 to float - %2643 = bitcast i32 %5 to float - %2644 = fmul float %2642, %2643 - %2645 = fadd float %2644, 0.000000e+00 - %2646 = bitcast i32 %32 to float - %2647 = bitcast i32 %32 to float - %2648 = fmul float %2646, %2647 - %2649 = fadd float %2645, %2648 - %2650 = call float @llvm.sqrt.f32(float %2649) - %2651 = bitcast i32 %5 to float - %2652 = fcmp olt float %2651, 0.000000e+00 - %2653 = sext i1 %2652 to i32 - %2654 = bitcast i32 %5 to float - %2655 = fcmp ogt float %2654, 0.000000e+00 - %2656 = zext i1 %2655 to i32 - %2657 = add nsw i32 %2653, %2656 - %2658 = sitofp i32 %2657 to float - %2659 = fneg float %2658 - %2660 = fmul float %2650, %2659 - %2661 = fmul float %2660, 0.000000e+00 - %2662 = bitcast i32 %32 to float - %2663 = fadd float %2662, %2661 - %2664 = fmul float %2641, %2663 - %2665 = fadd float %2619, %2664 - %2666 = call float @llvm.sqrt.f32(float %2665) - %2667 = fadd float %2666, 0.000000e+00 - %2668 = fdiv float %2575, %2667 - %2669 = fmul float %2554, %2668 - %2670 = fneg float %2669 - %2671 = insertelement <4 x float> zeroinitializer, float %2670, i32 0 - %2672 = insertelement <4 x float> %2671, float 0.000000e+00, i32 1 - %2673 = insertelement <4 x float> %2672, float 0.000000e+00, i32 2 - %2674 = insertelement <4 x float> %2673, float 0.000000e+00, i32 3 - %2675 = getelementptr float, float* %0, i32 0 - %2676 = load float, float* %2675, align 4 - %2677 = insertelement <4 x float> zeroinitializer, float %2676, i32 0 - %2678 = insertelement <4 x float> %2677, float 0.000000e+00, i32 1 - %2679 = insertelement <4 x float> %2678, float 0.000000e+00, i32 2 - %2680 = insertelement <4 x float> %2679, float 0.000000e+00, i32 3 - %2681 = call <4 x float> @llvm.fma.v4f32(<4 x float> %2674, <4 x float> %2680, <4 x float> zeroinitializer) - %2682 = extractelement <4 x float> %2681, i32 0 - %2683 = getelementptr float, float* %2, i32 0 - %2684 = getelementptr inbounds float, float* %2683, i64 2 - store float %2682, float* %2684, align 4 - %2685 = bitcast i32 %5 to float - %2686 = bitcast i32 %5 to float - %2687 = fmul float %2685, %2686 - %2688 = fadd float %2687, 0.000000e+00 - %2689 = bitcast i32 %32 to float - %2690 = bitcast i32 %32 to float - %2691 = fmul float %2689, %2690 - %2692 = fadd float %2688, %2691 - %2693 = call float @llvm.sqrt.f32(float %2692) - %2694 = bitcast i32 %5 to float - %2695 = fcmp olt float %2694, 0.000000e+00 - %2696 = sext i1 %2695 to i32 - %2697 = bitcast i32 %5 to float - %2698 = fcmp ogt float %2697, 0.000000e+00 - %2699 = zext i1 %2698 to i32 - %2700 = add nsw i32 %2696, %2699 - %2701 = sitofp i32 %2700 to float - %2702 = fneg float %2701 - %2703 = fmul float %2693, %2702 - %2704 = fmul float %2703, 0.000000e+00 - %2705 = bitcast i32 %32 to float - %2706 = fadd float %2705, %2704 - %2707 = bitcast i32 %5 to float - %2708 = bitcast i32 %5 to float - %2709 = fmul float %2707, %2708 - %2710 = fadd float %2709, 0.000000e+00 - %2711 = bitcast i32 %32 to float - %2712 = bitcast i32 %32 to float - %2713 = fmul float %2711, %2712 - %2714 = fadd float %2710, %2713 - %2715 = call float @llvm.sqrt.f32(float %2714) - %2716 = bitcast i32 %5 to float - %2717 = fcmp olt float %2716, 0.000000e+00 - %2718 = sext i1 %2717 to i32 - %2719 = bitcast i32 %5 to float - %2720 = fcmp ogt float %2719, 0.000000e+00 - %2721 = zext i1 %2720 to i32 - %2722 = add nsw i32 %2718, %2721 - %2723 = sitofp i32 %2722 to float - %2724 = fneg float %2723 - %2725 = fmul float %2715, %2724 - %2726 = bitcast i32 %5 to float - %2727 = fadd float %2726, %2725 - %2728 = bitcast i32 %5 to float - %2729 = bitcast i32 %5 to float - %2730 = fmul float %2728, %2729 - %2731 = fadd float %2730, 0.000000e+00 - %2732 = bitcast i32 %32 to float - %2733 = bitcast i32 %32 to float - %2734 = fmul float %2732, %2733 - %2735 = fadd float %2731, %2734 - %2736 = call float @llvm.sqrt.f32(float %2735) - %2737 = bitcast i32 %5 to float - %2738 = fcmp olt float %2737, 0.000000e+00 - %2739 = sext i1 %2738 to i32 - %2740 = bitcast i32 %5 to float - %2741 = fcmp ogt float %2740, 0.000000e+00 - %2742 = zext i1 %2741 to i32 - %2743 = add nsw i32 %2739, %2742 - %2744 = sitofp i32 %2743 to float - %2745 = fneg float %2744 - %2746 = fmul float %2736, %2745 - %2747 = bitcast i32 %5 to float - %2748 = fadd float %2747, %2746 - %2749 = fmul float %2727, %2748 - %2750 = fadd float %2749, 0.000000e+00 - %2751 = bitcast i32 %5 to float - %2752 = bitcast i32 %5 to float - %2753 = fmul float %2751, %2752 - %2754 = fadd float %2753, 0.000000e+00 - %2755 = bitcast i32 %32 to float - %2756 = bitcast i32 %32 to float - %2757 = fmul float %2755, %2756 - %2758 = fadd float %2754, %2757 - %2759 = call float @llvm.sqrt.f32(float %2758) - %2760 = bitcast i32 %5 to float - %2761 = fcmp olt float %2760, 0.000000e+00 - %2762 = sext i1 %2761 to i32 - %2763 = bitcast i32 %5 to float - %2764 = fcmp ogt float %2763, 0.000000e+00 - %2765 = zext i1 %2764 to i32 - %2766 = add nsw i32 %2762, %2765 - %2767 = sitofp i32 %2766 to float - %2768 = fneg float %2767 - %2769 = fmul float %2759, %2768 - %2770 = fmul float %2769, 0.000000e+00 - %2771 = bitcast i32 %32 to float - %2772 = fadd float %2771, %2770 - %2773 = bitcast i32 %5 to float - %2774 = bitcast i32 %5 to float - %2775 = fmul float %2773, %2774 - %2776 = fadd float %2775, 0.000000e+00 - %2777 = bitcast i32 %32 to float - %2778 = bitcast i32 %32 to float - %2779 = fmul float %2777, %2778 - %2780 = fadd float %2776, %2779 - %2781 = call float @llvm.sqrt.f32(float %2780) - %2782 = bitcast i32 %5 to float - %2783 = fcmp olt float %2782, 0.000000e+00 - %2784 = sext i1 %2783 to i32 - %2785 = bitcast i32 %5 to float - %2786 = fcmp ogt float %2785, 0.000000e+00 - %2787 = zext i1 %2786 to i32 - %2788 = add nsw i32 %2784, %2787 - %2789 = sitofp i32 %2788 to float - %2790 = fneg float %2789 - %2791 = fmul float %2781, %2790 - %2792 = fmul float %2791, 0.000000e+00 - %2793 = bitcast i32 %32 to float - %2794 = fadd float %2793, %2792 - %2795 = fmul float %2772, %2794 - %2796 = fadd float %2750, %2795 - %2797 = call float @llvm.sqrt.f32(float %2796) - %2798 = fadd float %2797, 0.000000e+00 - %2799 = fdiv float %2706, %2798 - %2800 = fmul float %2799, 2.000000e+00 - %2801 = bitcast i32 %5 to float - %2802 = bitcast i32 %5 to float - %2803 = fmul float %2801, %2802 - %2804 = fadd float %2803, 0.000000e+00 - %2805 = bitcast i32 %32 to float - %2806 = bitcast i32 %32 to float - %2807 = fmul float %2805, %2806 - %2808 = fadd float %2804, %2807 - %2809 = call float @llvm.sqrt.f32(float %2808) - %2810 = bitcast i32 %5 to float - %2811 = fcmp olt float %2810, 0.000000e+00 - %2812 = sext i1 %2811 to i32 - %2813 = bitcast i32 %5 to float - %2814 = fcmp ogt float %2813, 0.000000e+00 - %2815 = zext i1 %2814 to i32 - %2816 = add nsw i32 %2812, %2815 - %2817 = sitofp i32 %2816 to float - %2818 = fneg float %2817 - %2819 = fmul float %2809, %2818 - %2820 = bitcast i32 %5 to float - %2821 = fadd float %2820, %2819 - %2822 = bitcast i32 %5 to float - %2823 = bitcast i32 %5 to float - %2824 = fmul float %2822, %2823 - %2825 = fadd float %2824, 0.000000e+00 - %2826 = bitcast i32 %32 to float - %2827 = bitcast i32 %32 to float - %2828 = fmul float %2826, %2827 - %2829 = fadd float %2825, %2828 - %2830 = call float @llvm.sqrt.f32(float %2829) - %2831 = bitcast i32 %5 to float - %2832 = fcmp olt float %2831, 0.000000e+00 - %2833 = sext i1 %2832 to i32 - %2834 = bitcast i32 %5 to float - %2835 = fcmp ogt float %2834, 0.000000e+00 - %2836 = zext i1 %2835 to i32 - %2837 = add nsw i32 %2833, %2836 - %2838 = sitofp i32 %2837 to float - %2839 = fneg float %2838 - %2840 = fmul float %2830, %2839 - %2841 = bitcast i32 %5 to float - %2842 = fadd float %2841, %2840 - %2843 = bitcast i32 %5 to float - %2844 = bitcast i32 %5 to float - %2845 = fmul float %2843, %2844 - %2846 = fadd float %2845, 0.000000e+00 - %2847 = bitcast i32 %32 to float - %2848 = bitcast i32 %32 to float - %2849 = fmul float %2847, %2848 - %2850 = fadd float %2846, %2849 - %2851 = call float @llvm.sqrt.f32(float %2850) - %2852 = bitcast i32 %5 to float - %2853 = fcmp olt float %2852, 0.000000e+00 - %2854 = sext i1 %2853 to i32 - %2855 = bitcast i32 %5 to float - %2856 = fcmp ogt float %2855, 0.000000e+00 - %2857 = zext i1 %2856 to i32 - %2858 = add nsw i32 %2854, %2857 - %2859 = sitofp i32 %2858 to float - %2860 = fneg float %2859 - %2861 = fmul float %2851, %2860 - %2862 = bitcast i32 %5 to float - %2863 = fadd float %2862, %2861 - %2864 = fmul float %2842, %2863 - %2865 = fadd float %2864, 0.000000e+00 - %2866 = bitcast i32 %5 to float - %2867 = bitcast i32 %5 to float - %2868 = fmul float %2866, %2867 - %2869 = fadd float %2868, 0.000000e+00 - %2870 = bitcast i32 %32 to float - %2871 = bitcast i32 %32 to float - %2872 = fmul float %2870, %2871 - %2873 = fadd float %2869, %2872 - %2874 = call float @llvm.sqrt.f32(float %2873) - %2875 = bitcast i32 %5 to float - %2876 = fcmp olt float %2875, 0.000000e+00 - %2877 = sext i1 %2876 to i32 - %2878 = bitcast i32 %5 to float - %2879 = fcmp ogt float %2878, 0.000000e+00 - %2880 = zext i1 %2879 to i32 - %2881 = add nsw i32 %2877, %2880 - %2882 = sitofp i32 %2881 to float - %2883 = fneg float %2882 - %2884 = fmul float %2874, %2883 - %2885 = fmul float %2884, 0.000000e+00 - %2886 = bitcast i32 %32 to float - %2887 = fadd float %2886, %2885 - %2888 = bitcast i32 %5 to float - %2889 = bitcast i32 %5 to float - %2890 = fmul float %2888, %2889 - %2891 = fadd float %2890, 0.000000e+00 - %2892 = bitcast i32 %32 to float - %2893 = bitcast i32 %32 to float - %2894 = fmul float %2892, %2893 - %2895 = fadd float %2891, %2894 - %2896 = call float @llvm.sqrt.f32(float %2895) - %2897 = bitcast i32 %5 to float - %2898 = fcmp olt float %2897, 0.000000e+00 - %2899 = sext i1 %2898 to i32 - %2900 = bitcast i32 %5 to float - %2901 = fcmp ogt float %2900, 0.000000e+00 - %2902 = zext i1 %2901 to i32 - %2903 = add nsw i32 %2899, %2902 - %2904 = sitofp i32 %2903 to float - %2905 = fneg float %2904 - %2906 = fmul float %2896, %2905 - %2907 = fmul float %2906, 0.000000e+00 - %2908 = bitcast i32 %32 to float - %2909 = fadd float %2908, %2907 - %2910 = fmul float %2887, %2909 - %2911 = fadd float %2865, %2910 - %2912 = call float @llvm.sqrt.f32(float %2911) - %2913 = fadd float %2912, 0.000000e+00 - %2914 = fdiv float %2821, %2913 - %2915 = fmul float %2800, %2914 - %2916 = fneg float %2915 - %2917 = fmul float %2916, %2676 - %2918 = fadd float %2917, 0.000000e+00 - %2919 = bitcast i32 %5 to float - %2920 = bitcast i32 %5 to float - %2921 = fmul float %2919, %2920 - %2922 = fadd float %2921, 0.000000e+00 - %2923 = bitcast i32 %32 to float - %2924 = bitcast i32 %32 to float - %2925 = fmul float %2923, %2924 - %2926 = fadd float %2922, %2925 - %2927 = call float @llvm.sqrt.f32(float %2926) - %2928 = bitcast i32 %5 to float - %2929 = fcmp olt float %2928, 0.000000e+00 - %2930 = sext i1 %2929 to i32 - %2931 = bitcast i32 %5 to float - %2932 = fcmp ogt float %2931, 0.000000e+00 - %2933 = zext i1 %2932 to i32 - %2934 = add nsw i32 %2930, %2933 - %2935 = sitofp i32 %2934 to float - %2936 = fneg float %2935 - %2937 = fmul float %2927, %2936 - %2938 = fmul float %2937, 0.000000e+00 - %2939 = bitcast i32 %32 to float - %2940 = fadd float %2939, %2938 - %2941 = bitcast i32 %5 to float - %2942 = bitcast i32 %5 to float - %2943 = fmul float %2941, %2942 - %2944 = fadd float %2943, 0.000000e+00 - %2945 = bitcast i32 %32 to float - %2946 = bitcast i32 %32 to float - %2947 = fmul float %2945, %2946 - %2948 = fadd float %2944, %2947 - %2949 = call float @llvm.sqrt.f32(float %2948) - %2950 = bitcast i32 %5 to float - %2951 = fcmp olt float %2950, 0.000000e+00 - %2952 = sext i1 %2951 to i32 - %2953 = bitcast i32 %5 to float - %2954 = fcmp ogt float %2953, 0.000000e+00 - %2955 = zext i1 %2954 to i32 - %2956 = add nsw i32 %2952, %2955 - %2957 = sitofp i32 %2956 to float - %2958 = fneg float %2957 - %2959 = fmul float %2949, %2958 - %2960 = bitcast i32 %5 to float - %2961 = fadd float %2960, %2959 - %2962 = bitcast i32 %5 to float - %2963 = bitcast i32 %5 to float - %2964 = fmul float %2962, %2963 - %2965 = fadd float %2964, 0.000000e+00 - %2966 = bitcast i32 %32 to float - %2967 = bitcast i32 %32 to float - %2968 = fmul float %2966, %2967 - %2969 = fadd float %2965, %2968 - %2970 = call float @llvm.sqrt.f32(float %2969) - %2971 = bitcast i32 %5 to float - %2972 = fcmp olt float %2971, 0.000000e+00 - %2973 = sext i1 %2972 to i32 - %2974 = bitcast i32 %5 to float - %2975 = fcmp ogt float %2974, 0.000000e+00 - %2976 = zext i1 %2975 to i32 - %2977 = add nsw i32 %2973, %2976 - %2978 = sitofp i32 %2977 to float - %2979 = fneg float %2978 - %2980 = fmul float %2970, %2979 - %2981 = bitcast i32 %5 to float - %2982 = fadd float %2981, %2980 - %2983 = fmul float %2961, %2982 - %2984 = fadd float %2983, 0.000000e+00 - %2985 = bitcast i32 %5 to float - %2986 = bitcast i32 %5 to float - %2987 = fmul float %2985, %2986 - %2988 = fadd float %2987, 0.000000e+00 - %2989 = bitcast i32 %32 to float - %2990 = bitcast i32 %32 to float - %2991 = fmul float %2989, %2990 - %2992 = fadd float %2988, %2991 - %2993 = call float @llvm.sqrt.f32(float %2992) - %2994 = bitcast i32 %5 to float - %2995 = fcmp olt float %2994, 0.000000e+00 - %2996 = sext i1 %2995 to i32 - %2997 = bitcast i32 %5 to float - %2998 = fcmp ogt float %2997, 0.000000e+00 - %2999 = zext i1 %2998 to i32 - %3000 = add nsw i32 %2996, %2999 - %3001 = sitofp i32 %3000 to float - %3002 = fneg float %3001 - %3003 = fmul float %2993, %3002 - %3004 = fmul float %3003, 0.000000e+00 - %3005 = bitcast i32 %32 to float - %3006 = fadd float %3005, %3004 - %3007 = bitcast i32 %5 to float - %3008 = bitcast i32 %5 to float - %3009 = fmul float %3007, %3008 - %3010 = fadd float %3009, 0.000000e+00 - %3011 = bitcast i32 %32 to float - %3012 = bitcast i32 %32 to float - %3013 = fmul float %3011, %3012 - %3014 = fadd float %3010, %3013 - %3015 = call float @llvm.sqrt.f32(float %3014) - %3016 = bitcast i32 %5 to float - %3017 = fcmp olt float %3016, 0.000000e+00 - %3018 = sext i1 %3017 to i32 - %3019 = bitcast i32 %5 to float - %3020 = fcmp ogt float %3019, 0.000000e+00 - %3021 = zext i1 %3020 to i32 - %3022 = add nsw i32 %3018, %3021 - %3023 = sitofp i32 %3022 to float - %3024 = fneg float %3023 - %3025 = fmul float %3015, %3024 - %3026 = fmul float %3025, 0.000000e+00 - %3027 = bitcast i32 %32 to float - %3028 = fadd float %3027, %3026 - %3029 = fmul float %3006, %3028 - %3030 = fadd float %2984, %3029 - %3031 = call float @llvm.sqrt.f32(float %3030) - %3032 = fadd float %3031, 0.000000e+00 - %3033 = fdiv float %2940, %3032 - %3034 = fmul float %3033, 2.000000e+00 - %3035 = bitcast i32 %5 to float - %3036 = bitcast i32 %5 to float - %3037 = fmul float %3035, %3036 - %3038 = fadd float %3037, 0.000000e+00 - %3039 = bitcast i32 %32 to float - %3040 = bitcast i32 %32 to float - %3041 = fmul float %3039, %3040 - %3042 = fadd float %3038, %3041 - %3043 = call float @llvm.sqrt.f32(float %3042) - %3044 = bitcast i32 %5 to float - %3045 = fcmp olt float %3044, 0.000000e+00 - %3046 = sext i1 %3045 to i32 - %3047 = bitcast i32 %5 to float - %3048 = fcmp ogt float %3047, 0.000000e+00 - %3049 = zext i1 %3048 to i32 - %3050 = add nsw i32 %3046, %3049 - %3051 = sitofp i32 %3050 to float - %3052 = fneg float %3051 - %3053 = fmul float %3043, %3052 - %3054 = fmul float %3053, 0.000000e+00 - %3055 = bitcast i32 %32 to float - %3056 = fadd float %3055, %3054 - %3057 = bitcast i32 %5 to float - %3058 = bitcast i32 %5 to float - %3059 = fmul float %3057, %3058 - %3060 = fadd float %3059, 0.000000e+00 - %3061 = bitcast i32 %32 to float - %3062 = bitcast i32 %32 to float - %3063 = fmul float %3061, %3062 - %3064 = fadd float %3060, %3063 - %3065 = call float @llvm.sqrt.f32(float %3064) - %3066 = bitcast i32 %5 to float - %3067 = fcmp olt float %3066, 0.000000e+00 - %3068 = sext i1 %3067 to i32 - %3069 = bitcast i32 %5 to float - %3070 = fcmp ogt float %3069, 0.000000e+00 - %3071 = zext i1 %3070 to i32 - %3072 = add nsw i32 %3068, %3071 - %3073 = sitofp i32 %3072 to float - %3074 = fneg float %3073 - %3075 = fmul float %3065, %3074 - %3076 = bitcast i32 %5 to float - %3077 = fadd float %3076, %3075 - %3078 = bitcast i32 %5 to float - %3079 = bitcast i32 %5 to float - %3080 = fmul float %3078, %3079 - %3081 = fadd float %3080, 0.000000e+00 - %3082 = bitcast i32 %32 to float - %3083 = bitcast i32 %32 to float - %3084 = fmul float %3082, %3083 - %3085 = fadd float %3081, %3084 - %3086 = call float @llvm.sqrt.f32(float %3085) - %3087 = bitcast i32 %5 to float - %3088 = fcmp olt float %3087, 0.000000e+00 - %3089 = sext i1 %3088 to i32 - %3090 = bitcast i32 %5 to float - %3091 = fcmp ogt float %3090, 0.000000e+00 - %3092 = zext i1 %3091 to i32 - %3093 = add nsw i32 %3089, %3092 - %3094 = sitofp i32 %3093 to float - %3095 = fneg float %3094 - %3096 = fmul float %3086, %3095 - %3097 = bitcast i32 %5 to float - %3098 = fadd float %3097, %3096 - %3099 = fmul float %3077, %3098 - %3100 = fadd float %3099, 0.000000e+00 - %3101 = bitcast i32 %5 to float - %3102 = bitcast i32 %5 to float - %3103 = fmul float %3101, %3102 - %3104 = fadd float %3103, 0.000000e+00 - %3105 = bitcast i32 %32 to float - %3106 = bitcast i32 %32 to float - %3107 = fmul float %3105, %3106 - %3108 = fadd float %3104, %3107 - %3109 = call float @llvm.sqrt.f32(float %3108) - %3110 = bitcast i32 %5 to float - %3111 = fcmp olt float %3110, 0.000000e+00 - %3112 = sext i1 %3111 to i32 - %3113 = bitcast i32 %5 to float - %3114 = fcmp ogt float %3113, 0.000000e+00 - %3115 = zext i1 %3114 to i32 - %3116 = add nsw i32 %3112, %3115 - %3117 = sitofp i32 %3116 to float - %3118 = fneg float %3117 - %3119 = fmul float %3109, %3118 - %3120 = fmul float %3119, 0.000000e+00 - %3121 = bitcast i32 %32 to float - %3122 = fadd float %3121, %3120 - %3123 = bitcast i32 %5 to float - %3124 = bitcast i32 %5 to float - %3125 = fmul float %3123, %3124 - %3126 = fadd float %3125, 0.000000e+00 - %3127 = bitcast i32 %32 to float - %3128 = bitcast i32 %32 to float - %3129 = fmul float %3127, %3128 - %3130 = fadd float %3126, %3129 - %3131 = call float @llvm.sqrt.f32(float %3130) - %3132 = bitcast i32 %5 to float - %3133 = fcmp olt float %3132, 0.000000e+00 - %3134 = sext i1 %3133 to i32 - %3135 = bitcast i32 %5 to float - %3136 = fcmp ogt float %3135, 0.000000e+00 - %3137 = zext i1 %3136 to i32 - %3138 = add nsw i32 %3134, %3137 - %3139 = sitofp i32 %3138 to float - %3140 = fneg float %3139 - %3141 = fmul float %3131, %3140 - %3142 = fmul float %3141, 0.000000e+00 - %3143 = bitcast i32 %32 to float - %3144 = fadd float %3143, %3142 - %3145 = fmul float %3122, %3144 - %3146 = fadd float %3100, %3145 - %3147 = call float @llvm.sqrt.f32(float %3146) - %3148 = fadd float %3147, 0.000000e+00 - %3149 = fdiv float %3056, %3148 - %3150 = fmul float %3034, %3149 - %3151 = fsub float 1.000000e+00, %3150 - %3152 = getelementptr float, float* %0, i32 0 - %3153 = getelementptr inbounds float, float* %3152, i64 2 - %3154 = load float, float* %3153, align 4 - %3155 = fmul float %3151, %3154 - %3156 = fadd float %2918, %3155 - %3157 = insertelement <4 x float> zeroinitializer, float %3156, i32 0 - %3158 = insertelement <4 x float> %3157, float 0.000000e+00, i32 1 - %3159 = insertelement <4 x float> %3158, float 0.000000e+00, i32 2 - %3160 = insertelement <4 x float> %3159, float 0.000000e+00, i32 3 - %3161 = extractelement <4 x float> %3160, i32 0 - %3162 = getelementptr float, float* %2, i32 0 - %3163 = getelementptr inbounds float, float* %3162, i64 2 - store float %3161, float* %3163, align 4 - %3164 = extractelement <4 x float> %3160, i32 1 - %3165 = getelementptr float, float* %2, i32 0 - %3166 = getelementptr inbounds float, float* %3165, i64 3 - store float %3164, float* %3166, align 4 - %3167 = bitcast i32 %5 to float - %3168 = bitcast i32 %5 to float - %3169 = fmul float %3167, %3168 - %3170 = fadd float %3169, 0.000000e+00 - %3171 = bitcast i32 %32 to float - %3172 = bitcast i32 %32 to float - %3173 = fmul float %3171, %3172 - %3174 = fadd float %3170, %3173 - %3175 = call float @llvm.sqrt.f32(float %3174) - %3176 = bitcast i32 %5 to float - %3177 = fcmp olt float %3176, 0.000000e+00 - %3178 = sext i1 %3177 to i32 - %3179 = bitcast i32 %5 to float - %3180 = fcmp ogt float %3179, 0.000000e+00 - %3181 = zext i1 %3180 to i32 - %3182 = add nsw i32 %3178, %3181 - %3183 = sitofp i32 %3182 to float - %3184 = fneg float %3183 - %3185 = fmul float %3175, %3184 - %3186 = fmul float %3185, 0.000000e+00 - %3187 = bitcast i32 %32 to float - %3188 = fadd float %3187, %3186 - %3189 = bitcast i32 %5 to float - %3190 = bitcast i32 %5 to float - %3191 = fmul float %3189, %3190 - %3192 = fadd float %3191, 0.000000e+00 - %3193 = bitcast i32 %32 to float - %3194 = bitcast i32 %32 to float - %3195 = fmul float %3193, %3194 - %3196 = fadd float %3192, %3195 - %3197 = call float @llvm.sqrt.f32(float %3196) - %3198 = bitcast i32 %5 to float - %3199 = fcmp olt float %3198, 0.000000e+00 - %3200 = sext i1 %3199 to i32 - %3201 = bitcast i32 %5 to float - %3202 = fcmp ogt float %3201, 0.000000e+00 - %3203 = zext i1 %3202 to i32 - %3204 = add nsw i32 %3200, %3203 - %3205 = sitofp i32 %3204 to float - %3206 = fneg float %3205 - %3207 = fmul float %3197, %3206 - %3208 = bitcast i32 %5 to float - %3209 = fadd float %3208, %3207 - %3210 = bitcast i32 %5 to float - %3211 = bitcast i32 %5 to float - %3212 = fmul float %3210, %3211 - %3213 = fadd float %3212, 0.000000e+00 - %3214 = bitcast i32 %32 to float - %3215 = bitcast i32 %32 to float - %3216 = fmul float %3214, %3215 - %3217 = fadd float %3213, %3216 - %3218 = call float @llvm.sqrt.f32(float %3217) - %3219 = bitcast i32 %5 to float - %3220 = fcmp olt float %3219, 0.000000e+00 - %3221 = sext i1 %3220 to i32 - %3222 = bitcast i32 %5 to float - %3223 = fcmp ogt float %3222, 0.000000e+00 - %3224 = zext i1 %3223 to i32 - %3225 = add nsw i32 %3221, %3224 - %3226 = sitofp i32 %3225 to float - %3227 = fneg float %3226 - %3228 = fmul float %3218, %3227 - %3229 = bitcast i32 %5 to float - %3230 = fadd float %3229, %3228 - %3231 = fmul float %3209, %3230 - %3232 = fadd float %3231, 0.000000e+00 - %3233 = bitcast i32 %5 to float - %3234 = bitcast i32 %5 to float - %3235 = fmul float %3233, %3234 - %3236 = fadd float %3235, 0.000000e+00 - %3237 = bitcast i32 %32 to float - %3238 = bitcast i32 %32 to float - %3239 = fmul float %3237, %3238 - %3240 = fadd float %3236, %3239 - %3241 = call float @llvm.sqrt.f32(float %3240) - %3242 = bitcast i32 %5 to float - %3243 = fcmp olt float %3242, 0.000000e+00 - %3244 = sext i1 %3243 to i32 - %3245 = bitcast i32 %5 to float - %3246 = fcmp ogt float %3245, 0.000000e+00 - %3247 = zext i1 %3246 to i32 - %3248 = add nsw i32 %3244, %3247 - %3249 = sitofp i32 %3248 to float - %3250 = fneg float %3249 - %3251 = fmul float %3241, %3250 - %3252 = fmul float %3251, 0.000000e+00 - %3253 = bitcast i32 %32 to float - %3254 = fadd float %3253, %3252 - %3255 = bitcast i32 %5 to float - %3256 = bitcast i32 %5 to float - %3257 = fmul float %3255, %3256 - %3258 = fadd float %3257, 0.000000e+00 - %3259 = bitcast i32 %32 to float - %3260 = bitcast i32 %32 to float - %3261 = fmul float %3259, %3260 - %3262 = fadd float %3258, %3261 - %3263 = call float @llvm.sqrt.f32(float %3262) - %3264 = bitcast i32 %5 to float - %3265 = fcmp olt float %3264, 0.000000e+00 - %3266 = sext i1 %3265 to i32 - %3267 = bitcast i32 %5 to float - %3268 = fcmp ogt float %3267, 0.000000e+00 - %3269 = zext i1 %3268 to i32 - %3270 = add nsw i32 %3266, %3269 - %3271 = sitofp i32 %3270 to float - %3272 = fneg float %3271 - %3273 = fmul float %3263, %3272 - %3274 = fmul float %3273, 0.000000e+00 - %3275 = bitcast i32 %32 to float - %3276 = fadd float %3275, %3274 - %3277 = fmul float %3254, %3276 - %3278 = fadd float %3232, %3277 - %3279 = call float @llvm.sqrt.f32(float %3278) - %3280 = fadd float %3279, 0.000000e+00 - %3281 = fdiv float %3188, %3280 - %3282 = fmul float %3281, 2.000000e+00 - %3283 = bitcast i32 %5 to float - %3284 = bitcast i32 %5 to float - %3285 = fmul float %3283, %3284 - %3286 = fadd float %3285, 0.000000e+00 - %3287 = bitcast i32 %32 to float - %3288 = bitcast i32 %32 to float - %3289 = fmul float %3287, %3288 - %3290 = fadd float %3286, %3289 - %3291 = call float @llvm.sqrt.f32(float %3290) - %3292 = bitcast i32 %5 to float - %3293 = fcmp olt float %3292, 0.000000e+00 - %3294 = sext i1 %3293 to i32 - %3295 = bitcast i32 %5 to float - %3296 = fcmp ogt float %3295, 0.000000e+00 - %3297 = zext i1 %3296 to i32 - %3298 = add nsw i32 %3294, %3297 - %3299 = sitofp i32 %3298 to float - %3300 = fneg float %3299 - %3301 = fmul float %3291, %3300 - %3302 = bitcast i32 %5 to float - %3303 = fadd float %3302, %3301 - %3304 = bitcast i32 %5 to float - %3305 = bitcast i32 %5 to float - %3306 = fmul float %3304, %3305 - %3307 = fadd float %3306, 0.000000e+00 - %3308 = bitcast i32 %32 to float - %3309 = bitcast i32 %32 to float - %3310 = fmul float %3308, %3309 - %3311 = fadd float %3307, %3310 - %3312 = call float @llvm.sqrt.f32(float %3311) - %3313 = bitcast i32 %5 to float - %3314 = fcmp olt float %3313, 0.000000e+00 - %3315 = sext i1 %3314 to i32 - %3316 = bitcast i32 %5 to float - %3317 = fcmp ogt float %3316, 0.000000e+00 - %3318 = zext i1 %3317 to i32 - %3319 = add nsw i32 %3315, %3318 - %3320 = sitofp i32 %3319 to float - %3321 = fneg float %3320 - %3322 = fmul float %3312, %3321 - %3323 = bitcast i32 %5 to float - %3324 = fadd float %3323, %3322 - %3325 = bitcast i32 %5 to float - %3326 = bitcast i32 %5 to float - %3327 = fmul float %3325, %3326 - %3328 = fadd float %3327, 0.000000e+00 - %3329 = bitcast i32 %32 to float - %3330 = bitcast i32 %32 to float - %3331 = fmul float %3329, %3330 - %3332 = fadd float %3328, %3331 - %3333 = call float @llvm.sqrt.f32(float %3332) - %3334 = bitcast i32 %5 to float - %3335 = fcmp olt float %3334, 0.000000e+00 - %3336 = sext i1 %3335 to i32 - %3337 = bitcast i32 %5 to float - %3338 = fcmp ogt float %3337, 0.000000e+00 - %3339 = zext i1 %3338 to i32 - %3340 = add nsw i32 %3336, %3339 - %3341 = sitofp i32 %3340 to float - %3342 = fneg float %3341 - %3343 = fmul float %3333, %3342 - %3344 = bitcast i32 %5 to float - %3345 = fadd float %3344, %3343 - %3346 = fmul float %3324, %3345 - %3347 = fadd float %3346, 0.000000e+00 - %3348 = bitcast i32 %5 to float - %3349 = bitcast i32 %5 to float - %3350 = fmul float %3348, %3349 - %3351 = fadd float %3350, 0.000000e+00 - %3352 = bitcast i32 %32 to float - %3353 = bitcast i32 %32 to float - %3354 = fmul float %3352, %3353 - %3355 = fadd float %3351, %3354 - %3356 = call float @llvm.sqrt.f32(float %3355) - %3357 = bitcast i32 %5 to float - %3358 = fcmp olt float %3357, 0.000000e+00 - %3359 = sext i1 %3358 to i32 - %3360 = bitcast i32 %5 to float - %3361 = fcmp ogt float %3360, 0.000000e+00 - %3362 = zext i1 %3361 to i32 - %3363 = add nsw i32 %3359, %3362 - %3364 = sitofp i32 %3363 to float - %3365 = fneg float %3364 - %3366 = fmul float %3356, %3365 - %3367 = fmul float %3366, 0.000000e+00 - %3368 = bitcast i32 %32 to float - %3369 = fadd float %3368, %3367 - %3370 = bitcast i32 %5 to float - %3371 = bitcast i32 %5 to float - %3372 = fmul float %3370, %3371 - %3373 = fadd float %3372, 0.000000e+00 - %3374 = bitcast i32 %32 to float - %3375 = bitcast i32 %32 to float - %3376 = fmul float %3374, %3375 - %3377 = fadd float %3373, %3376 - %3378 = call float @llvm.sqrt.f32(float %3377) - %3379 = bitcast i32 %5 to float - %3380 = fcmp olt float %3379, 0.000000e+00 - %3381 = sext i1 %3380 to i32 - %3382 = bitcast i32 %5 to float - %3383 = fcmp ogt float %3382, 0.000000e+00 - %3384 = zext i1 %3383 to i32 - %3385 = add nsw i32 %3381, %3384 - %3386 = sitofp i32 %3385 to float - %3387 = fneg float %3386 - %3388 = fmul float %3378, %3387 - %3389 = fmul float %3388, 0.000000e+00 - %3390 = bitcast i32 %32 to float - %3391 = fadd float %3390, %3389 - %3392 = fmul float %3369, %3391 - %3393 = fadd float %3347, %3392 - %3394 = call float @llvm.sqrt.f32(float %3393) - %3395 = fadd float %3394, 0.000000e+00 - %3396 = fdiv float %3303, %3395 - %3397 = fmul float %3282, %3396 - %3398 = fneg float %3397 - %3399 = insertelement <4 x float> zeroinitializer, float %3398, i32 0 - %3400 = insertelement <4 x float> %3399, float 0.000000e+00, i32 1 - %3401 = insertelement <4 x float> %3400, float 0.000000e+00, i32 2 - %3402 = insertelement <4 x float> %3401, float 0.000000e+00, i32 3 - %3403 = getelementptr float, float* %0, i32 0 - %3404 = getelementptr inbounds float, float* %3403, i64 1 - %3405 = load float, float* %3404, align 4 - %3406 = insertelement <4 x float> zeroinitializer, float %3405, i32 0 - %3407 = insertelement <4 x float> %3406, float 0.000000e+00, i32 1 - %3408 = insertelement <4 x float> %3407, float 0.000000e+00, i32 2 - %3409 = insertelement <4 x float> %3408, float 0.000000e+00, i32 3 - %3410 = call <4 x float> @llvm.fma.v4f32(<4 x float> %3402, <4 x float> %3409, <4 x float> zeroinitializer) - %3411 = extractelement <4 x float> %3410, i32 0 - %3412 = getelementptr float, float* %2, i32 0 - %3413 = getelementptr inbounds float, float* %3412, i64 3 - store float %3411, float* %3413, align 4 - %3414 = bitcast i32 %5 to float - %3415 = bitcast i32 %5 to float - %3416 = fmul float %3414, %3415 - %3417 = fadd float %3416, 0.000000e+00 - %3418 = bitcast i32 %32 to float - %3419 = bitcast i32 %32 to float - %3420 = fmul float %3418, %3419 - %3421 = fadd float %3417, %3420 - %3422 = call float @llvm.sqrt.f32(float %3421) - %3423 = bitcast i32 %5 to float - %3424 = fcmp olt float %3423, 0.000000e+00 - %3425 = sext i1 %3424 to i32 - %3426 = bitcast i32 %5 to float - %3427 = fcmp ogt float %3426, 0.000000e+00 - %3428 = zext i1 %3427 to i32 - %3429 = add nsw i32 %3425, %3428 - %3430 = sitofp i32 %3429 to float - %3431 = fneg float %3430 - %3432 = fmul float %3422, %3431 - %3433 = fmul float %3432, 0.000000e+00 - %3434 = bitcast i32 %32 to float - %3435 = fadd float %3434, %3433 - %3436 = bitcast i32 %5 to float - %3437 = bitcast i32 %5 to float - %3438 = fmul float %3436, %3437 - %3439 = fadd float %3438, 0.000000e+00 - %3440 = bitcast i32 %32 to float - %3441 = bitcast i32 %32 to float - %3442 = fmul float %3440, %3441 - %3443 = fadd float %3439, %3442 - %3444 = call float @llvm.sqrt.f32(float %3443) - %3445 = bitcast i32 %5 to float - %3446 = fcmp olt float %3445, 0.000000e+00 - %3447 = sext i1 %3446 to i32 - %3448 = bitcast i32 %5 to float - %3449 = fcmp ogt float %3448, 0.000000e+00 - %3450 = zext i1 %3449 to i32 - %3451 = add nsw i32 %3447, %3450 - %3452 = sitofp i32 %3451 to float - %3453 = fneg float %3452 - %3454 = fmul float %3444, %3453 - %3455 = bitcast i32 %5 to float - %3456 = fadd float %3455, %3454 - %3457 = bitcast i32 %5 to float - %3458 = bitcast i32 %5 to float - %3459 = fmul float %3457, %3458 - %3460 = fadd float %3459, 0.000000e+00 - %3461 = bitcast i32 %32 to float - %3462 = bitcast i32 %32 to float - %3463 = fmul float %3461, %3462 - %3464 = fadd float %3460, %3463 - %3465 = call float @llvm.sqrt.f32(float %3464) - %3466 = bitcast i32 %5 to float - %3467 = fcmp olt float %3466, 0.000000e+00 - %3468 = sext i1 %3467 to i32 - %3469 = bitcast i32 %5 to float - %3470 = fcmp ogt float %3469, 0.000000e+00 - %3471 = zext i1 %3470 to i32 - %3472 = add nsw i32 %3468, %3471 - %3473 = sitofp i32 %3472 to float - %3474 = fneg float %3473 - %3475 = fmul float %3465, %3474 - %3476 = bitcast i32 %5 to float - %3477 = fadd float %3476, %3475 - %3478 = fmul float %3456, %3477 - %3479 = fadd float %3478, 0.000000e+00 - %3480 = bitcast i32 %5 to float - %3481 = bitcast i32 %5 to float - %3482 = fmul float %3480, %3481 - %3483 = fadd float %3482, 0.000000e+00 - %3484 = bitcast i32 %32 to float - %3485 = bitcast i32 %32 to float - %3486 = fmul float %3484, %3485 - %3487 = fadd float %3483, %3486 - %3488 = call float @llvm.sqrt.f32(float %3487) - %3489 = bitcast i32 %5 to float - %3490 = fcmp olt float %3489, 0.000000e+00 - %3491 = sext i1 %3490 to i32 - %3492 = bitcast i32 %5 to float - %3493 = fcmp ogt float %3492, 0.000000e+00 - %3494 = zext i1 %3493 to i32 - %3495 = add nsw i32 %3491, %3494 - %3496 = sitofp i32 %3495 to float - %3497 = fneg float %3496 - %3498 = fmul float %3488, %3497 - %3499 = fmul float %3498, 0.000000e+00 - %3500 = bitcast i32 %32 to float - %3501 = fadd float %3500, %3499 - %3502 = bitcast i32 %5 to float - %3503 = bitcast i32 %5 to float - %3504 = fmul float %3502, %3503 - %3505 = fadd float %3504, 0.000000e+00 - %3506 = bitcast i32 %32 to float - %3507 = bitcast i32 %32 to float - %3508 = fmul float %3506, %3507 - %3509 = fadd float %3505, %3508 - %3510 = call float @llvm.sqrt.f32(float %3509) - %3511 = bitcast i32 %5 to float - %3512 = fcmp olt float %3511, 0.000000e+00 - %3513 = sext i1 %3512 to i32 - %3514 = bitcast i32 %5 to float - %3515 = fcmp ogt float %3514, 0.000000e+00 - %3516 = zext i1 %3515 to i32 - %3517 = add nsw i32 %3513, %3516 - %3518 = sitofp i32 %3517 to float - %3519 = fneg float %3518 - %3520 = fmul float %3510, %3519 - %3521 = fmul float %3520, 0.000000e+00 - %3522 = bitcast i32 %32 to float - %3523 = fadd float %3522, %3521 - %3524 = fmul float %3501, %3523 - %3525 = fadd float %3479, %3524 - %3526 = call float @llvm.sqrt.f32(float %3525) - %3527 = fadd float %3526, 0.000000e+00 - %3528 = fdiv float %3435, %3527 - %3529 = fmul float %3528, 2.000000e+00 - %3530 = bitcast i32 %5 to float - %3531 = bitcast i32 %5 to float - %3532 = fmul float %3530, %3531 - %3533 = fadd float %3532, 0.000000e+00 - %3534 = bitcast i32 %32 to float - %3535 = bitcast i32 %32 to float - %3536 = fmul float %3534, %3535 - %3537 = fadd float %3533, %3536 - %3538 = call float @llvm.sqrt.f32(float %3537) - %3539 = bitcast i32 %5 to float - %3540 = fcmp olt float %3539, 0.000000e+00 - %3541 = sext i1 %3540 to i32 - %3542 = bitcast i32 %5 to float - %3543 = fcmp ogt float %3542, 0.000000e+00 - %3544 = zext i1 %3543 to i32 - %3545 = add nsw i32 %3541, %3544 - %3546 = sitofp i32 %3545 to float - %3547 = fneg float %3546 - %3548 = fmul float %3538, %3547 - %3549 = bitcast i32 %5 to float - %3550 = fadd float %3549, %3548 - %3551 = bitcast i32 %5 to float - %3552 = bitcast i32 %5 to float - %3553 = fmul float %3551, %3552 - %3554 = fadd float %3553, 0.000000e+00 - %3555 = bitcast i32 %32 to float - %3556 = bitcast i32 %32 to float - %3557 = fmul float %3555, %3556 - %3558 = fadd float %3554, %3557 - %3559 = call float @llvm.sqrt.f32(float %3558) - %3560 = bitcast i32 %5 to float - %3561 = fcmp olt float %3560, 0.000000e+00 - %3562 = sext i1 %3561 to i32 - %3563 = bitcast i32 %5 to float - %3564 = fcmp ogt float %3563, 0.000000e+00 - %3565 = zext i1 %3564 to i32 - %3566 = add nsw i32 %3562, %3565 - %3567 = sitofp i32 %3566 to float - %3568 = fneg float %3567 - %3569 = fmul float %3559, %3568 - %3570 = bitcast i32 %5 to float - %3571 = fadd float %3570, %3569 - %3572 = bitcast i32 %5 to float - %3573 = bitcast i32 %5 to float - %3574 = fmul float %3572, %3573 - %3575 = fadd float %3574, 0.000000e+00 - %3576 = bitcast i32 %32 to float - %3577 = bitcast i32 %32 to float - %3578 = fmul float %3576, %3577 - %3579 = fadd float %3575, %3578 - %3580 = call float @llvm.sqrt.f32(float %3579) - %3581 = bitcast i32 %5 to float - %3582 = fcmp olt float %3581, 0.000000e+00 - %3583 = sext i1 %3582 to i32 - %3584 = bitcast i32 %5 to float - %3585 = fcmp ogt float %3584, 0.000000e+00 - %3586 = zext i1 %3585 to i32 - %3587 = add nsw i32 %3583, %3586 - %3588 = sitofp i32 %3587 to float - %3589 = fneg float %3588 - %3590 = fmul float %3580, %3589 - %3591 = bitcast i32 %5 to float - %3592 = fadd float %3591, %3590 - %3593 = fmul float %3571, %3592 - %3594 = fadd float %3593, 0.000000e+00 - %3595 = bitcast i32 %5 to float - %3596 = bitcast i32 %5 to float - %3597 = fmul float %3595, %3596 - %3598 = fadd float %3597, 0.000000e+00 - %3599 = bitcast i32 %32 to float - %3600 = bitcast i32 %32 to float - %3601 = fmul float %3599, %3600 - %3602 = fadd float %3598, %3601 - %3603 = call float @llvm.sqrt.f32(float %3602) - %3604 = bitcast i32 %5 to float - %3605 = fcmp olt float %3604, 0.000000e+00 - %3606 = sext i1 %3605 to i32 - %3607 = bitcast i32 %5 to float - %3608 = fcmp ogt float %3607, 0.000000e+00 - %3609 = zext i1 %3608 to i32 - %3610 = add nsw i32 %3606, %3609 - %3611 = sitofp i32 %3610 to float - %3612 = fneg float %3611 - %3613 = fmul float %3603, %3612 - %3614 = fmul float %3613, 0.000000e+00 - %3615 = bitcast i32 %32 to float - %3616 = fadd float %3615, %3614 - %3617 = bitcast i32 %5 to float - %3618 = bitcast i32 %5 to float - %3619 = fmul float %3617, %3618 - %3620 = fadd float %3619, 0.000000e+00 - %3621 = bitcast i32 %32 to float - %3622 = bitcast i32 %32 to float - %3623 = fmul float %3621, %3622 - %3624 = fadd float %3620, %3623 - %3625 = call float @llvm.sqrt.f32(float %3624) - %3626 = bitcast i32 %5 to float - %3627 = fcmp olt float %3626, 0.000000e+00 - %3628 = sext i1 %3627 to i32 - %3629 = bitcast i32 %5 to float - %3630 = fcmp ogt float %3629, 0.000000e+00 - %3631 = zext i1 %3630 to i32 - %3632 = add nsw i32 %3628, %3631 - %3633 = sitofp i32 %3632 to float - %3634 = fneg float %3633 - %3635 = fmul float %3625, %3634 - %3636 = fmul float %3635, 0.000000e+00 - %3637 = bitcast i32 %32 to float - %3638 = fadd float %3637, %3636 - %3639 = fmul float %3616, %3638 - %3640 = fadd float %3594, %3639 - %3641 = call float @llvm.sqrt.f32(float %3640) - %3642 = fadd float %3641, 0.000000e+00 - %3643 = fdiv float %3550, %3642 - %3644 = fmul float %3529, %3643 - %3645 = fneg float %3644 - %3646 = fmul float %3645, %3405 - %3647 = fadd float %3646, 0.000000e+00 - %3648 = bitcast i32 %5 to float - %3649 = bitcast i32 %5 to float - %3650 = fmul float %3648, %3649 - %3651 = fadd float %3650, 0.000000e+00 - %3652 = bitcast i32 %32 to float - %3653 = bitcast i32 %32 to float - %3654 = fmul float %3652, %3653 - %3655 = fadd float %3651, %3654 - %3656 = call float @llvm.sqrt.f32(float %3655) - %3657 = bitcast i32 %5 to float - %3658 = fcmp olt float %3657, 0.000000e+00 - %3659 = sext i1 %3658 to i32 - %3660 = bitcast i32 %5 to float - %3661 = fcmp ogt float %3660, 0.000000e+00 - %3662 = zext i1 %3661 to i32 - %3663 = add nsw i32 %3659, %3662 - %3664 = sitofp i32 %3663 to float - %3665 = fneg float %3664 - %3666 = fmul float %3656, %3665 - %3667 = fmul float %3666, 0.000000e+00 - %3668 = bitcast i32 %32 to float - %3669 = fadd float %3668, %3667 - %3670 = bitcast i32 %5 to float - %3671 = bitcast i32 %5 to float - %3672 = fmul float %3670, %3671 - %3673 = fadd float %3672, 0.000000e+00 - %3674 = bitcast i32 %32 to float - %3675 = bitcast i32 %32 to float - %3676 = fmul float %3674, %3675 - %3677 = fadd float %3673, %3676 - %3678 = call float @llvm.sqrt.f32(float %3677) - %3679 = bitcast i32 %5 to float - %3680 = fcmp olt float %3679, 0.000000e+00 - %3681 = sext i1 %3680 to i32 - %3682 = bitcast i32 %5 to float - %3683 = fcmp ogt float %3682, 0.000000e+00 - %3684 = zext i1 %3683 to i32 - %3685 = add nsw i32 %3681, %3684 - %3686 = sitofp i32 %3685 to float - %3687 = fneg float %3686 - %3688 = fmul float %3678, %3687 - %3689 = bitcast i32 %5 to float - %3690 = fadd float %3689, %3688 - %3691 = bitcast i32 %5 to float - %3692 = bitcast i32 %5 to float - %3693 = fmul float %3691, %3692 - %3694 = fadd float %3693, 0.000000e+00 - %3695 = bitcast i32 %32 to float - %3696 = bitcast i32 %32 to float - %3697 = fmul float %3695, %3696 - %3698 = fadd float %3694, %3697 - %3699 = call float @llvm.sqrt.f32(float %3698) - %3700 = bitcast i32 %5 to float - %3701 = fcmp olt float %3700, 0.000000e+00 - %3702 = sext i1 %3701 to i32 - %3703 = bitcast i32 %5 to float - %3704 = fcmp ogt float %3703, 0.000000e+00 - %3705 = zext i1 %3704 to i32 - %3706 = add nsw i32 %3702, %3705 - %3707 = sitofp i32 %3706 to float - %3708 = fneg float %3707 - %3709 = fmul float %3699, %3708 - %3710 = bitcast i32 %5 to float - %3711 = fadd float %3710, %3709 - %3712 = fmul float %3690, %3711 - %3713 = fadd float %3712, 0.000000e+00 - %3714 = bitcast i32 %5 to float - %3715 = bitcast i32 %5 to float - %3716 = fmul float %3714, %3715 - %3717 = fadd float %3716, 0.000000e+00 - %3718 = bitcast i32 %32 to float - %3719 = bitcast i32 %32 to float - %3720 = fmul float %3718, %3719 - %3721 = fadd float %3717, %3720 - %3722 = call float @llvm.sqrt.f32(float %3721) - %3723 = bitcast i32 %5 to float - %3724 = fcmp olt float %3723, 0.000000e+00 - %3725 = sext i1 %3724 to i32 - %3726 = bitcast i32 %5 to float - %3727 = fcmp ogt float %3726, 0.000000e+00 - %3728 = zext i1 %3727 to i32 - %3729 = add nsw i32 %3725, %3728 - %3730 = sitofp i32 %3729 to float - %3731 = fneg float %3730 - %3732 = fmul float %3722, %3731 - %3733 = fmul float %3732, 0.000000e+00 - %3734 = bitcast i32 %32 to float - %3735 = fadd float %3734, %3733 - %3736 = bitcast i32 %5 to float - %3737 = bitcast i32 %5 to float - %3738 = fmul float %3736, %3737 - %3739 = fadd float %3738, 0.000000e+00 - %3740 = bitcast i32 %32 to float - %3741 = bitcast i32 %32 to float - %3742 = fmul float %3740, %3741 - %3743 = fadd float %3739, %3742 - %3744 = call float @llvm.sqrt.f32(float %3743) - %3745 = bitcast i32 %5 to float - %3746 = fcmp olt float %3745, 0.000000e+00 - %3747 = sext i1 %3746 to i32 - %3748 = bitcast i32 %5 to float - %3749 = fcmp ogt float %3748, 0.000000e+00 - %3750 = zext i1 %3749 to i32 - %3751 = add nsw i32 %3747, %3750 - %3752 = sitofp i32 %3751 to float - %3753 = fneg float %3752 - %3754 = fmul float %3744, %3753 - %3755 = fmul float %3754, 0.000000e+00 - %3756 = bitcast i32 %32 to float - %3757 = fadd float %3756, %3755 - %3758 = fmul float %3735, %3757 - %3759 = fadd float %3713, %3758 - %3760 = call float @llvm.sqrt.f32(float %3759) - %3761 = fadd float %3760, 0.000000e+00 - %3762 = fdiv float %3669, %3761 - %3763 = fmul float %3762, 2.000000e+00 - %3764 = bitcast i32 %5 to float - %3765 = bitcast i32 %5 to float - %3766 = fmul float %3764, %3765 - %3767 = fadd float %3766, 0.000000e+00 - %3768 = bitcast i32 %32 to float - %3769 = bitcast i32 %32 to float - %3770 = fmul float %3768, %3769 - %3771 = fadd float %3767, %3770 - %3772 = call float @llvm.sqrt.f32(float %3771) - %3773 = bitcast i32 %5 to float - %3774 = fcmp olt float %3773, 0.000000e+00 - %3775 = sext i1 %3774 to i32 - %3776 = bitcast i32 %5 to float - %3777 = fcmp ogt float %3776, 0.000000e+00 - %3778 = zext i1 %3777 to i32 - %3779 = add nsw i32 %3775, %3778 - %3780 = sitofp i32 %3779 to float - %3781 = fneg float %3780 - %3782 = fmul float %3772, %3781 - %3783 = fmul float %3782, 0.000000e+00 - %3784 = bitcast i32 %32 to float - %3785 = fadd float %3784, %3783 - %3786 = bitcast i32 %5 to float - %3787 = bitcast i32 %5 to float - %3788 = fmul float %3786, %3787 - %3789 = fadd float %3788, 0.000000e+00 - %3790 = bitcast i32 %32 to float - %3791 = bitcast i32 %32 to float - %3792 = fmul float %3790, %3791 - %3793 = fadd float %3789, %3792 - %3794 = call float @llvm.sqrt.f32(float %3793) - %3795 = bitcast i32 %5 to float - %3796 = fcmp olt float %3795, 0.000000e+00 - %3797 = sext i1 %3796 to i32 - %3798 = bitcast i32 %5 to float - %3799 = fcmp ogt float %3798, 0.000000e+00 - %3800 = zext i1 %3799 to i32 - %3801 = add nsw i32 %3797, %3800 - %3802 = sitofp i32 %3801 to float - %3803 = fneg float %3802 - %3804 = fmul float %3794, %3803 - %3805 = bitcast i32 %5 to float - %3806 = fadd float %3805, %3804 - %3807 = bitcast i32 %5 to float - %3808 = bitcast i32 %5 to float - %3809 = fmul float %3807, %3808 - %3810 = fadd float %3809, 0.000000e+00 - %3811 = bitcast i32 %32 to float - %3812 = bitcast i32 %32 to float - %3813 = fmul float %3811, %3812 - %3814 = fadd float %3810, %3813 - %3815 = call float @llvm.sqrt.f32(float %3814) - %3816 = bitcast i32 %5 to float - %3817 = fcmp olt float %3816, 0.000000e+00 - %3818 = sext i1 %3817 to i32 - %3819 = bitcast i32 %5 to float - %3820 = fcmp ogt float %3819, 0.000000e+00 - %3821 = zext i1 %3820 to i32 - %3822 = add nsw i32 %3818, %3821 - %3823 = sitofp i32 %3822 to float - %3824 = fneg float %3823 - %3825 = fmul float %3815, %3824 - %3826 = bitcast i32 %5 to float - %3827 = fadd float %3826, %3825 - %3828 = fmul float %3806, %3827 - %3829 = fadd float %3828, 0.000000e+00 - %3830 = bitcast i32 %5 to float - %3831 = bitcast i32 %5 to float - %3832 = fmul float %3830, %3831 - %3833 = fadd float %3832, 0.000000e+00 - %3834 = bitcast i32 %32 to float - %3835 = bitcast i32 %32 to float - %3836 = fmul float %3834, %3835 - %3837 = fadd float %3833, %3836 - %3838 = call float @llvm.sqrt.f32(float %3837) - %3839 = bitcast i32 %5 to float - %3840 = fcmp olt float %3839, 0.000000e+00 - %3841 = sext i1 %3840 to i32 - %3842 = bitcast i32 %5 to float - %3843 = fcmp ogt float %3842, 0.000000e+00 - %3844 = zext i1 %3843 to i32 - %3845 = add nsw i32 %3841, %3844 - %3846 = sitofp i32 %3845 to float - %3847 = fneg float %3846 - %3848 = fmul float %3838, %3847 - %3849 = fmul float %3848, 0.000000e+00 - %3850 = bitcast i32 %32 to float - %3851 = fadd float %3850, %3849 - %3852 = bitcast i32 %5 to float - %3853 = bitcast i32 %5 to float - %3854 = fmul float %3852, %3853 - %3855 = fadd float %3854, 0.000000e+00 - %3856 = bitcast i32 %32 to float - %3857 = bitcast i32 %32 to float - %3858 = fmul float %3856, %3857 - %3859 = fadd float %3855, %3858 - %3860 = call float @llvm.sqrt.f32(float %3859) - %3861 = bitcast i32 %5 to float - %3862 = fcmp olt float %3861, 0.000000e+00 - %3863 = sext i1 %3862 to i32 - %3864 = bitcast i32 %5 to float - %3865 = fcmp ogt float %3864, 0.000000e+00 - %3866 = zext i1 %3865 to i32 - %3867 = add nsw i32 %3863, %3866 - %3868 = sitofp i32 %3867 to float - %3869 = fneg float %3868 - %3870 = fmul float %3860, %3869 - %3871 = fmul float %3870, 0.000000e+00 - %3872 = bitcast i32 %32 to float - %3873 = fadd float %3872, %3871 - %3874 = fmul float %3851, %3873 - %3875 = fadd float %3829, %3874 - %3876 = call float @llvm.sqrt.f32(float %3875) - %3877 = fadd float %3876, 0.000000e+00 - %3878 = fdiv float %3785, %3877 - %3879 = fmul float %3763, %3878 - %3880 = fsub float 1.000000e+00, %3879 - %3881 = getelementptr float, float* %0, i32 0 - %3882 = getelementptr inbounds float, float* %3881, i64 3 - %3883 = load float, float* %3882, align 4 - %3884 = fmul float %3880, %3883 - %3885 = fadd float %3647, %3884 - %3886 = insertelement <4 x float> zeroinitializer, float %3885, i32 0 - %3887 = insertelement <4 x float> %3886, float 0.000000e+00, i32 1 - %3888 = insertelement <4 x float> %3887, float 0.000000e+00, i32 2 - %3889 = insertelement <4 x float> %3888, float 0.000000e+00, i32 3 - %3890 = extractelement <4 x float> %3889, i32 0 - %3891 = getelementptr float, float* %2, i32 0 - %3892 = getelementptr inbounds float, float* %3891, i64 3 - store float %3890, float* %3892, align 4 - %3893 = getelementptr float, float* %1, i32 0 - %3894 = getelementptr inbounds float, float* %3893, i64 2 - %3895 = bitcast float* %3894 to i32* - %3896 = load i32, i32* %3895, align 4 - %3897 = bitcast i32 %3896 to float - %3898 = insertelement <4 x float> zeroinitializer, float %3897, i32 0 - %3899 = getelementptr float, float* %1, i32 0 - %3900 = getelementptr inbounds float, float* %3899, i64 1 - %3901 = bitcast float* %3900 to i32* - %3902 = load i32, i32* %3901, align 4 - %3903 = bitcast i32 %3902 to float - %3904 = insertelement <4 x float> %3898, float %3903, i32 1 - %3905 = insertelement <4 x float> %3904, float 0.000000e+00, i32 2 - %3906 = insertelement <4 x float> %3905, float 0.000000e+00, i32 3 - %3907 = extractelement <4 x float> %3906, i32 0 - %3908 = getelementptr float, float* %1, i32 0 - %3909 = getelementptr inbounds float, float* %3908, i64 1 - %3910 = bitcast float* %3909 to i32* - %3911 = bitcast i32* %3910 to float* - store float %3907, float* %3911, align 4 - %3912 = extractelement <4 x float> %3906, i32 1 - %3913 = getelementptr float, float* %1, i32 0 - %3914 = getelementptr inbounds float, float* %3913, i64 2 - %3915 = bitcast float* %3914 to i32* - %3916 = bitcast i32* %3915 to float* - store float %3912, float* %3916, align 4 - ret void -} - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { -.preheader13: - %3 = bitcast float* %2 to i8* - %4 = bitcast float* %0 to i8* - %5 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false, i1 true, i1 false) - %6 = call i8* @__memcpy_chk(i8* %3, i8* %4, i64 16, i64 %5) #8 - %7 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 - %8 = bitcast i8* %7 to float* - store float 1.000000e+00, float* %8, align 4 - %9 = getelementptr inbounds i8, i8* %7, i64 8 - %10 = getelementptr inbounds i8, i8* %7, i64 12 - %11 = bitcast i8* %10 to float* - store float 1.000000e+00, float* %11, align 4 - %12 = bitcast float* %1 to i8* - %13 = call i64 @llvm.objectsize.i64.p0i8(i8* %12, i1 false, i1 true, i1 false) - %14 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %15 = bitcast i8* %14 to float* - %16 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %17 = bitcast i8* %16 to float* - %18 = bitcast float* %2 to i32* - %19 = load i32, i32* %18, align 4 - %20 = bitcast i8* %14 to i32* - store i32 %19, i32* %20, align 4 - %21 = bitcast i8* %7 to i32* - %22 = load i32, i32* %21, align 4 - %23 = bitcast i8* %16 to i32* - store i32 %22, i32* %23, align 4 - %24 = getelementptr inbounds float, float* %2, i64 2 - %25 = bitcast float* %24 to i32* - %26 = load i32, i32* %25, align 4 - %27 = getelementptr inbounds i8, i8* %14, i64 4 - %28 = bitcast i8* %27 to i32* - store i32 %26, i32* %28, align 4 - %29 = bitcast i8* %9 to i32* - %30 = load i32, i32* %29, align 4 - %31 = getelementptr inbounds i8, i8* %16, i64 4 - %32 = bitcast i8* %31 to i32* - store i32 %30, i32* %32, align 4 - %33 = load float, float* %15, align 4 - %34 = call float @no_opt_sgn(float %33) - %35 = fneg float %34 - %36 = call float @no_opt_naive_norm(float* nonnull %15, i32 2) - %37 = fmul float %36, %35 - %38 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %39 = bitcast i8* %38 to float* - %40 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %41 = load float, float* %15, align 4 - %42 = load float, float* %17, align 4 - %43 = fmul float %37, %42 - %44 = fadd float %41, %43 - store float %44, float* %39, align 4 - %45 = bitcast i8* %27 to float* - %46 = load float, float* %45, align 4 - %47 = bitcast i8* %31 to float* - %48 = load float, float* %47, align 4 - %49 = fmul float %37, %48 - %50 = fadd float %46, %49 - %51 = getelementptr inbounds i8, i8* %38, i64 4 - %52 = bitcast i8* %51 to float* - store float %50, float* %52, align 4 - %53 = bitcast i8* %40 to float* - %54 = call float @no_opt_naive_norm(float* nonnull %39, i32 2) - %55 = fadd float %54, 0x3EE4F8B580000000 - %56 = load float, float* %39, align 4 - %57 = fdiv float %56, %55 - store float %57, float* %53, align 4 - %58 = load float, float* %52, align 4 - %59 = fdiv float %58, %55 - %60 = getelementptr inbounds i8, i8* %40, i64 4 - %61 = bitcast i8* %60 to float* - store float %59, float* %61, align 4 - %62 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 - %63 = bitcast i8* %62 to float* - %64 = load float, float* %53, align 4 - %65 = fmul float %64, 2.000000e+00 - %66 = fmul float %65, %64 - %67 = fsub float 1.000000e+00, %66 - store float %67, float* %63, align 4 - %68 = load float, float* %53, align 4 - %69 = fmul float %68, 2.000000e+00 - %70 = load float, float* %61, align 4 - %71 = fmul float %69, %70 - %72 = fsub float 0.000000e+00, %71 - %73 = getelementptr inbounds i8, i8* %62, i64 4 - %74 = bitcast i8* %73 to float* - store float %72, float* %74, align 4 - %75 = load float, float* %61, align 4 - %76 = fmul float %75, 2.000000e+00 - %77 = load float, float* %53, align 4 - %78 = fmul float %76, %77 - %79 = fsub float 0.000000e+00, %78 - %80 = getelementptr inbounds i8, i8* %62, i64 8 - %81 = bitcast i8* %80 to float* - store float %79, float* %81, align 4 - %82 = load float, float* %61, align 4 - %83 = fmul float %82, 2.000000e+00 - %84 = fmul float %83, %82 - %85 = fsub float 1.000000e+00, %84 - %86 = getelementptr inbounds i8, i8* %62, i64 12 - %87 = bitcast i8* %86 to float* - store float %85, float* %87, align 4 - %88 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 - %89 = bitcast i8* %88 to float* - %90 = bitcast i8* %62 to i32* - %91 = load i32, i32* %90, align 4 - %92 = bitcast i8* %88 to i32* - store i32 %91, i32* %92, align 4 - %93 = bitcast i8* %73 to i32* - %94 = load i32, i32* %93, align 4 - %95 = getelementptr inbounds i8, i8* %88, i64 4 - %96 = bitcast i8* %95 to i32* - store i32 %94, i32* %96, align 4 - %97 = bitcast i8* %80 to i32* - %98 = load i32, i32* %97, align 4 - %99 = getelementptr inbounds i8, i8* %88, i64 8 - %100 = bitcast i8* %99 to i32* - store i32 %98, i32* %100, align 4 - %101 = bitcast i8* %86 to i32* - %102 = load i32, i32* %101, align 4 - %103 = getelementptr inbounds i8, i8* %88, i64 12 - %104 = bitcast i8* %103 to i32* - store i32 %102, i32* %104, align 4 - %105 = call i8* @__memcpy_chk(i8* %12, i8* %88, i64 16, i64 %13) #8 - call void @no_opt_naive_fixed_matrix_multiply(float* %89, float* %0, float* %2) - call void @free(i8* %14) - call void @free(i8* %16) - call void @free(i8* %38) - call void @free(i8* %40) - call void @free(i8* %62) - call void @free(i8* %88) - call void @no_opt_naive_fixed_transpose(float* %1) - ret void -} - -; Function Attrs: nounwind -declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 - -; Function Attrs: nounwind readnone speculatable willreturn -declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 - -; Function Attrs: allocsize(0,1) -declare i8* @calloc(i64, i64) #5 - -declare void @free(i8*) #6 - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #1 { -.preheader6: - %0 = alloca i64, align 8 - %1 = alloca [4 x float], align 16 - %2 = alloca [4 x float], align 16 - %3 = alloca [4 x float], align 16 - %4 = alloca [4 x float], align 16 - %5 = alloca [4 x float], align 16 - %6 = call i64 @time(i64* null) #8 - store i64 %6, i64* %0, align 8 - %7 = call i64 @time(i64* nonnull %0) #8 - %8 = trunc i64 %7 to i32 - call void @srand(i32 %8) #8 - %9 = call i32 @rand() #8 - %10 = sitofp i32 %9 to float - %11 = fdiv float %10, 0x41747AE140000000 - %12 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 - store float %11, float* %12, align 16 - %13 = fpext float %11 to double - %14 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %13) #8 - %15 = call i32 @rand() #8 - %16 = sitofp i32 %15 to float - %17 = fdiv float %16, 0x41747AE140000000 - %18 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1 - store float %17, float* %18, align 4 - %19 = fpext float %17 to double - %20 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %19) #8 - %21 = call i32 @rand() #8 - %22 = sitofp i32 %21 to float - %23 = fdiv float %22, 0x41747AE140000000 - %24 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2 - store float %23, float* %24, align 8 - %25 = fpext float %23 to double - %26 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %25) #8 - %27 = call i32 @rand() #8 - %28 = sitofp i32 %27 to float - %29 = fdiv float %28, 0x41747AE140000000 - %30 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3 - store float %29, float* %30, align 4 - %31 = fpext float %29 to double - %32 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %31) #8 - %33 = bitcast [4 x float]* %2 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %33, i8 0, i64 16, i1 false) - %34 = bitcast [4 x float]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %34, i8 0, i64 16, i1 false) - %35 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %36 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - call void @naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %35, float* nonnull %36) - %37 = bitcast [4 x float]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %37, i8 0, i64 16, i1 false) - %38 = bitcast [4 x float]* %5 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %38, i8 0, i64 16, i1 false) - %39 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 - %40 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 - call void @no_opt_naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %39, float* nonnull %40) - %41 = load float, float* %35, align 16 - %42 = fpext float %41 to double - %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %42) #8 - %44 = load float, float* %39, align 16 - %45 = fpext float %44 to double - %46 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %45) #8 - %47 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1 - %48 = load float, float* %47, align 4 - %49 = fpext float %48 to double - %50 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %49) #8 - %51 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 - %52 = load float, float* %51, align 4 - %53 = fpext float %52 to double - %54 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %53) #8 - %55 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2 - %56 = load float, float* %55, align 8 - %57 = fpext float %56 to double - %58 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %57) #8 - %59 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 - %60 = load float, float* %59, align 8 - %61 = fpext float %60 to double - %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %61) #8 - %63 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3 - %64 = load float, float* %63, align 4 - %65 = fpext float %64 to double - %66 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %65) #8 - %67 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 - %68 = load float, float* %67, align 4 - %69 = fpext float %68 to double - %70 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %69) #8 - %71 = load float, float* %36, align 16 - %72 = fpext float %71 to double - %73 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %72) #8 - %74 = load float, float* %40, align 16 - %75 = fpext float %74 to double - %76 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %75) #8 - %77 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1 - %78 = load float, float* %77, align 4 - %79 = fpext float %78 to double - %80 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %79) #8 - %81 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 1 - %82 = load float, float* %81, align 4 - %83 = fpext float %82 to double - %84 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %83) #8 - %85 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2 - %86 = load float, float* %85, align 8 - %87 = fpext float %86 to double - %88 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %87) #8 - %89 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 2 - %90 = load float, float* %89, align 8 - %91 = fpext float %90 to double - %92 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %91) #8 - %93 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3 - %94 = load float, float* %93, align 4 - %95 = fpext float %94 to double - %96 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %95) #8 - %97 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 3 - %98 = load float, float* %97, align 4 - %99 = fpext float %98 to double - %100 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %99) #8 - ret i32 0 -} - -declare i64 @time(i64*) #6 - -declare void @srand(i32) #6 - -declare i32 @rand() #6 - -declare i32 @printf(i8*, ...) #6 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #7 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #2 - -attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone speculatable willreturn } -attributes #3 = { argmemonly nounwind willreturn writeonly } -attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { argmemonly nounwind willreturn } -attributes #8 = { nounwind } -attributes #9 = { nounwind allocsize(0,1) } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} -!3 = distinct !{!3, !4} -!4 = !{!"llvm.loop.unroll.disable"} -!5 = distinct !{!5, !4} diff --git a/src/dios-egraphs/Diospyros/failed-test/diospyros.ll b/src/dios-egraphs/Diospyros/failed-test/diospyros.ll deleted file mode 100644 index 548f014e..00000000 --- a/src/dios-egraphs/Diospyros/failed-test/diospyros.ll +++ /dev/null @@ -1,5489 +0,0 @@ -; ModuleID = 'build/aa.ll' -source_filename = "fail-tests/qr-decomp-local-arrays.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1 -@.str.1 = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 -@.str.2 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 -@.str.3 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 -@.str.4 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @sgn(float %0) #0 { - %2 = fcmp ogt float %0, 0.000000e+00 - %3 = zext i1 %2 to i32 - %4 = fcmp olt float %0, 0.000000e+00 - %.neg = sext i1 %4 to i32 - %5 = add nsw i32 %.neg, %3 - %6 = sitofp i32 %5 to float - ret float %6 -} - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_sgn(float %0) #1 { - %2 = fcmp ogt float %0, 0.000000e+00 - %3 = zext i1 %2 to i32 - %4 = fcmp olt float %0, 0.000000e+00 - %.neg = sext i1 %4 to i32 - %5 = add nsw i32 %.neg, %3 - %6 = sitofp i32 %5 to float - ret float %6 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @naive_norm(float* %0, i32 %1) #0 { - %3 = icmp sgt i32 %1, 0 - %smax = select i1 %3, i32 %1, i32 0 - %wide.trip.count = zext i32 %smax to i64 - br i1 %3, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %2 - %4 = add nsw i64 %wide.trip.count, -1 - %xtraiter = and i64 %wide.trip.count, 3 - %5 = icmp ult i64 %4, 3 - br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count, 2147483644 - br label %6 - -6: ; preds = %6, %.lr.ph.new - %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] - %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 - %8 = load float, float* %7, align 4 - %9 = fmul float %8, %8 - %10 = fadd float %.013, %9 - %indvars.iv.next = or i64 %indvars.iv2, 1 - %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next - %12 = load float, float* %11, align 4 - %13 = fmul float %12, %12 - %14 = fadd float %10, %13 - %indvars.iv.next.1 = or i64 %indvars.iv2, 2 - %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 - %16 = load float, float* %15, align 4 - %17 = fmul float %16, %16 - %18 = fadd float %14, %17 - %indvars.iv.next.2 = or i64 %indvars.iv2, 3 - %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 - %20 = load float, float* %19, align 4 - %21 = fmul float %20, %20 - %22 = fadd float %18, %21 - %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 - %niter.nsub.3 = add i64 %niter, -4 - %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 - br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 - -._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] - %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] - %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] - %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil - %24 = load float, float* %23, align 4 - %25 = fmul float %24, %24 - %26 = fadd float %.013.epil, %25 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !3 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 - %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] - %27 = call float @llvm.sqrt.f32(float %.01.lcssa) - ret float %27 -} - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32(float) #2 - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_naive_norm(float* %0, i32 %1) #1 { - %3 = icmp sgt i32 %1, 0 - %smax = select i1 %3, i32 %1, i32 0 - %wide.trip.count = zext i32 %smax to i64 - br i1 %3, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %2 - %4 = add nsw i64 %wide.trip.count, -1 - %xtraiter = and i64 %wide.trip.count, 3 - %5 = icmp ult i64 %4, 3 - br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count, 2147483644 - br label %6 - -6: ; preds = %6, %.lr.ph.new - %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] - %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 - %8 = load float, float* %7, align 4 - %9 = fmul float %8, %8 - %10 = fadd float %.013, %9 - %indvars.iv.next = or i64 %indvars.iv2, 1 - %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next - %12 = load float, float* %11, align 4 - %13 = fmul float %12, %12 - %14 = fadd float %10, %13 - %indvars.iv.next.1 = or i64 %indvars.iv2, 2 - %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 - %16 = load float, float* %15, align 4 - %17 = fmul float %16, %16 - %18 = fadd float %14, %17 - %indvars.iv.next.2 = or i64 %indvars.iv2, 3 - %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 - %20 = load float, float* %19, align 4 - %21 = fmul float %20, %20 - %22 = fadd float %18, %21 - %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 - %niter.nsub.3 = add i64 %niter, -4 - %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 - br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 - -._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] - %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] - %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] - %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil - %24 = load float, float* %23, align 4 - %25 = fmul float %24, %24 - %26 = fadd float %.013.epil, %25 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 - %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] - %27 = call float @llvm.sqrt.f32(float %.01.lcssa) - ret float %27 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_transpose(float* %0) #0 { -.lr.ph: - %1 = getelementptr inbounds float, float* %0, i64 1 - %2 = bitcast float* %1 to i32* - %3 = load i32, i32* %2, align 4 - %4 = getelementptr inbounds float, float* %0, i64 2 - %5 = bitcast float* %4 to i32* - %6 = load i32, i32* %5, align 4 - store i32 %6, i32* %2, align 4 - store i32 %3, i32* %5, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_transpose(float* %0) #1 { -.lr.ph: - %1 = getelementptr inbounds float, float* %0, i64 1 - %2 = bitcast float* %1 to i32* - %3 = load i32, i32* %2, align 4 - %4 = getelementptr inbounds float, float* %0, i64 2 - %5 = bitcast float* %4 to i32* - %6 = load i32, i32* %5, align 4 - store i32 %6, i32* %2, align 4 - store i32 %3, i32* %5, align 4 - ret void -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { -.preheader: - %3 = load float, float* %0, align 4 - %4 = load float, float* %1, align 4 - %5 = fmul float %3, %4 - %6 = fadd float %5, 0.000000e+00 - %7 = getelementptr inbounds float, float* %0, i64 1 - %8 = load float, float* %7, align 4 - %9 = getelementptr inbounds float, float* %1, i64 2 - %10 = load float, float* %9, align 4 - %11 = fmul float %8, %10 - %12 = fadd float %6, %11 - %13 = getelementptr inbounds float, float* %2, i64 1 - %14 = load float, float* %0, align 4 - %15 = getelementptr inbounds float, float* %1, i64 1 - %16 = load float, float* %15, align 4 - %17 = fmul float %14, %16 - %18 = fadd float %17, 0.000000e+00 - %19 = load float, float* %7, align 4 - %20 = getelementptr inbounds float, float* %1, i64 3 - %21 = load float, float* %20, align 4 - %22 = fmul float %19, %21 - %23 = fadd float %18, %22 - %24 = getelementptr inbounds float, float* %0, i64 2 - %25 = getelementptr inbounds float, float* %2, i64 2 - %26 = load float, float* %24, align 4 - %27 = load float, float* %1, align 4 - %28 = fmul float %26, %27 - %29 = fadd float %28, 0.000000e+00 - %30 = getelementptr inbounds float, float* %0, i64 3 - %31 = load float, float* %30, align 4 - %32 = load float, float* %9, align 4 - %33 = fmul float %31, %32 - %34 = fadd float %29, %33 - %35 = getelementptr inbounds float, float* %2, i64 3 - %36 = load float, float* %24, align 4 - %37 = load float, float* %15, align 4 - %38 = fmul float %36, %37 - %39 = fadd float %38, 0.000000e+00 - %40 = load float, float* %30, align 4 - %41 = load float, float* %20, align 4 - %42 = fmul float %40, %41 - %43 = fadd float %39, %42 - store float 0.000000e+00, float* %2, align 4 - %44 = getelementptr float, float* %0, i32 0 - %45 = load float, float* %44, align 4 - %46 = insertelement <4 x float> zeroinitializer, float %45, i32 0 - %47 = insertelement <4 x float> %46, float 0.000000e+00, i32 1 - %48 = insertelement <4 x float> %47, float 0.000000e+00, i32 2 - %49 = insertelement <4 x float> %48, float 0.000000e+00, i32 3 - %50 = getelementptr float, float* %1, i32 0 - %51 = load float, float* %50, align 4 - %52 = insertelement <4 x float> zeroinitializer, float %51, i32 0 - %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 1 - %54 = insertelement <4 x float> %53, float 0.000000e+00, i32 2 - %55 = insertelement <4 x float> %54, float 0.000000e+00, i32 3 - %56 = call <4 x float> @llvm.fma.f32(<4 x float> %49, <4 x float> %55, <4 x float> zeroinitializer) - %57 = extractelement <4 x float> %56, i32 0 - store float %57, float* %2, align 4 - %58 = getelementptr float, float* %0, i32 0 - %59 = load float, float* %58, align 4 - %60 = insertelement <4 x float> zeroinitializer, float %59, i32 0 - %61 = insertelement <4 x float> %60, float 1.000000e+00, i32 1 - %62 = insertelement <4 x float> %61, float 1.000000e+00, i32 2 - %63 = insertelement <4 x float> %62, float 1.000000e+00, i32 3 - %64 = insertelement <4 x float> zeroinitializer, float %51, i32 0 - %65 = insertelement <4 x float> %64, float 0.000000e+00, i32 1 - %66 = insertelement <4 x float> %65, float 0.000000e+00, i32 2 - %67 = insertelement <4 x float> %66, float 0.000000e+00, i32 3 - %68 = fmul <4 x float> %63, %67 - %69 = fadd <4 x float> %68, zeroinitializer - %70 = getelementptr float, float* %0, i32 0 - %71 = getelementptr inbounds float, float* %70, i64 1 - %72 = load float, float* %71, align 4 - %73 = insertelement <4 x float> zeroinitializer, float %72, i32 0 - %74 = insertelement <4 x float> %73, float 0.000000e+00, i32 1 - %75 = insertelement <4 x float> %74, float 0.000000e+00, i32 2 - %76 = insertelement <4 x float> %75, float 0.000000e+00, i32 3 - %77 = getelementptr float, float* %1, i32 0 - %78 = getelementptr inbounds float, float* %77, i64 2 - %79 = load float, float* %78, align 4 - %80 = insertelement <4 x float> zeroinitializer, float %79, i32 0 - %81 = insertelement <4 x float> %80, float 0.000000e+00, i32 1 - %82 = insertelement <4 x float> %81, float 0.000000e+00, i32 2 - %83 = insertelement <4 x float> %82, float 0.000000e+00, i32 3 - %84 = call <4 x float> @llvm.fma.f32.1(<4 x float> %76, <4 x float> %83, <4 x float> %69) - %85 = extractelement <4 x float> %84, i32 0 - store float %85, float* %2, align 4 - %86 = extractelement <4 x float> %84, i32 1 - %87 = getelementptr float, float* %2, i32 0 - %88 = getelementptr inbounds float, float* %87, i64 1 - store float %86, float* %88, align 4 - %89 = getelementptr float, float* %0, i32 0 - %90 = load float, float* %89, align 4 - %91 = insertelement <4 x float> zeroinitializer, float %90, i32 0 - %92 = insertelement <4 x float> %91, float 0.000000e+00, i32 1 - %93 = insertelement <4 x float> %92, float 0.000000e+00, i32 2 - %94 = insertelement <4 x float> %93, float 0.000000e+00, i32 3 - %95 = getelementptr float, float* %1, i32 0 - %96 = getelementptr inbounds float, float* %95, i64 1 - %97 = load float, float* %96, align 4 - %98 = insertelement <4 x float> zeroinitializer, float %97, i32 0 - %99 = insertelement <4 x float> %98, float 0.000000e+00, i32 1 - %100 = insertelement <4 x float> %99, float 0.000000e+00, i32 2 - %101 = insertelement <4 x float> %100, float 0.000000e+00, i32 3 - %102 = call <4 x float> @llvm.fma.f32.2(<4 x float> %94, <4 x float> %101, <4 x float> zeroinitializer) - %103 = extractelement <4 x float> %102, i32 0 - %104 = getelementptr float, float* %2, i32 0 - %105 = getelementptr inbounds float, float* %104, i64 1 - store float %103, float* %105, align 4 - %106 = insertelement <4 x float> zeroinitializer, float %90, i32 0 - %107 = insertelement <4 x float> %106, float 1.000000e+00, i32 1 - %108 = insertelement <4 x float> %107, float 1.000000e+00, i32 2 - %109 = insertelement <4 x float> %108, float 1.000000e+00, i32 3 - %110 = insertelement <4 x float> zeroinitializer, float %97, i32 0 - %111 = insertelement <4 x float> %110, float 0.000000e+00, i32 1 - %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 2 - %113 = insertelement <4 x float> %112, float 0.000000e+00, i32 3 - %114 = fmul <4 x float> %109, %113 - %115 = fadd <4 x float> %114, zeroinitializer - %116 = getelementptr float, float* %0, i32 0 - %117 = getelementptr inbounds float, float* %116, i64 1 - %118 = load float, float* %117, align 4 - %119 = insertelement <4 x float> zeroinitializer, float %118, i32 0 - %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 1 - %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 2 - %122 = insertelement <4 x float> %121, float 0.000000e+00, i32 3 - %123 = getelementptr float, float* %1, i32 0 - %124 = getelementptr inbounds float, float* %123, i64 3 - %125 = load float, float* %124, align 4 - %126 = insertelement <4 x float> zeroinitializer, float %125, i32 0 - %127 = insertelement <4 x float> %126, float 0.000000e+00, i32 1 - %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 2 - %129 = insertelement <4 x float> %128, float 0.000000e+00, i32 3 - %130 = call <4 x float> @llvm.fma.f32.3(<4 x float> %122, <4 x float> %129, <4 x float> %115) - %131 = extractelement <4 x float> %130, i32 0 - %132 = getelementptr float, float* %2, i32 0 - %133 = getelementptr inbounds float, float* %132, i64 1 - store float %131, float* %133, align 4 - %134 = extractelement <4 x float> %130, i32 1 - %135 = getelementptr float, float* %2, i32 0 - %136 = getelementptr inbounds float, float* %135, i64 2 - store float %134, float* %136, align 4 - %137 = getelementptr float, float* %0, i32 0 - %138 = getelementptr inbounds float, float* %137, i64 2 - %139 = load float, float* %138, align 4 - %140 = insertelement <4 x float> zeroinitializer, float %139, i32 0 - %141 = insertelement <4 x float> %140, float 0.000000e+00, i32 1 - %142 = insertelement <4 x float> %141, float 0.000000e+00, i32 2 - %143 = insertelement <4 x float> %142, float 0.000000e+00, i32 3 - %144 = getelementptr float, float* %1, i32 0 - %145 = load float, float* %144, align 4 - %146 = insertelement <4 x float> zeroinitializer, float %145, i32 0 - %147 = insertelement <4 x float> %146, float 0.000000e+00, i32 1 - %148 = insertelement <4 x float> %147, float 0.000000e+00, i32 2 - %149 = insertelement <4 x float> %148, float 0.000000e+00, i32 3 - %150 = call <4 x float> @llvm.fma.f32.4(<4 x float> %143, <4 x float> %149, <4 x float> zeroinitializer) - %151 = extractelement <4 x float> %150, i32 0 - %152 = getelementptr float, float* %2, i32 0 - %153 = getelementptr inbounds float, float* %152, i64 2 - store float %151, float* %153, align 4 - %154 = insertelement <4 x float> zeroinitializer, float %139, i32 0 - %155 = insertelement <4 x float> %154, float 1.000000e+00, i32 1 - %156 = insertelement <4 x float> %155, float 1.000000e+00, i32 2 - %157 = insertelement <4 x float> %156, float 1.000000e+00, i32 3 - %158 = insertelement <4 x float> zeroinitializer, float %145, i32 0 - %159 = insertelement <4 x float> %158, float 0.000000e+00, i32 1 - %160 = insertelement <4 x float> %159, float 0.000000e+00, i32 2 - %161 = insertelement <4 x float> %160, float 0.000000e+00, i32 3 - %162 = fmul <4 x float> %157, %161 - %163 = fadd <4 x float> %162, zeroinitializer - %164 = getelementptr float, float* %0, i32 0 - %165 = getelementptr inbounds float, float* %164, i64 3 - %166 = load float, float* %165, align 4 - %167 = insertelement <4 x float> zeroinitializer, float %166, i32 0 - %168 = insertelement <4 x float> %167, float 0.000000e+00, i32 1 - %169 = insertelement <4 x float> %168, float 0.000000e+00, i32 2 - %170 = insertelement <4 x float> %169, float 0.000000e+00, i32 3 - %171 = getelementptr float, float* %1, i32 0 - %172 = getelementptr inbounds float, float* %171, i64 2 - %173 = load float, float* %172, align 4 - %174 = insertelement <4 x float> zeroinitializer, float %173, i32 0 - %175 = insertelement <4 x float> %174, float 0.000000e+00, i32 1 - %176 = insertelement <4 x float> %175, float 0.000000e+00, i32 2 - %177 = insertelement <4 x float> %176, float 0.000000e+00, i32 3 - %178 = call <4 x float> @llvm.fma.f32.5(<4 x float> %170, <4 x float> %177, <4 x float> %163) - %179 = extractelement <4 x float> %178, i32 0 - %180 = getelementptr float, float* %2, i32 0 - %181 = getelementptr inbounds float, float* %180, i64 2 - store float %179, float* %181, align 4 - %182 = extractelement <4 x float> %178, i32 1 - %183 = getelementptr float, float* %2, i32 0 - %184 = getelementptr inbounds float, float* %183, i64 3 - store float %182, float* %184, align 4 - %185 = getelementptr float, float* %0, i32 0 - %186 = getelementptr inbounds float, float* %185, i64 2 - %187 = load float, float* %186, align 4 - %188 = insertelement <4 x float> zeroinitializer, float %187, i32 0 - %189 = insertelement <4 x float> %188, float 0.000000e+00, i32 1 - %190 = insertelement <4 x float> %189, float 0.000000e+00, i32 2 - %191 = insertelement <4 x float> %190, float 0.000000e+00, i32 3 - %192 = getelementptr float, float* %1, i32 0 - %193 = getelementptr inbounds float, float* %192, i64 1 - %194 = load float, float* %193, align 4 - %195 = insertelement <4 x float> zeroinitializer, float %194, i32 0 - %196 = insertelement <4 x float> %195, float 0.000000e+00, i32 1 - %197 = insertelement <4 x float> %196, float 0.000000e+00, i32 2 - %198 = insertelement <4 x float> %197, float 0.000000e+00, i32 3 - %199 = call <4 x float> @llvm.fma.f32.6(<4 x float> %191, <4 x float> %198, <4 x float> zeroinitializer) - %200 = extractelement <4 x float> %199, i32 0 - %201 = getelementptr float, float* %2, i32 0 - %202 = getelementptr inbounds float, float* %201, i64 3 - store float %200, float* %202, align 4 - %203 = insertelement <4 x float> zeroinitializer, float %187, i32 0 - %204 = insertelement <4 x float> %203, float 1.000000e+00, i32 1 - %205 = insertelement <4 x float> %204, float 1.000000e+00, i32 2 - %206 = insertelement <4 x float> %205, float 1.000000e+00, i32 3 - %207 = insertelement <4 x float> zeroinitializer, float %194, i32 0 - %208 = insertelement <4 x float> %207, float 0.000000e+00, i32 1 - %209 = insertelement <4 x float> %208, float 0.000000e+00, i32 2 - %210 = insertelement <4 x float> %209, float 0.000000e+00, i32 3 - %211 = fmul <4 x float> %206, %210 - %212 = fadd <4 x float> %211, zeroinitializer - %213 = getelementptr float, float* %0, i32 0 - %214 = getelementptr inbounds float, float* %213, i64 3 - %215 = load float, float* %214, align 4 - %216 = insertelement <4 x float> zeroinitializer, float %215, i32 0 - %217 = insertelement <4 x float> %216, float 0.000000e+00, i32 1 - %218 = insertelement <4 x float> %217, float 0.000000e+00, i32 2 - %219 = insertelement <4 x float> %218, float 0.000000e+00, i32 3 - %220 = getelementptr float, float* %1, i32 0 - %221 = getelementptr inbounds float, float* %220, i64 3 - %222 = load float, float* %221, align 4 - %223 = insertelement <4 x float> zeroinitializer, float %222, i32 0 - %224 = insertelement <4 x float> %223, float 0.000000e+00, i32 1 - %225 = insertelement <4 x float> %224, float 0.000000e+00, i32 2 - %226 = insertelement <4 x float> %225, float 0.000000e+00, i32 3 - %227 = call <4 x float> @llvm.fma.f32.7(<4 x float> %219, <4 x float> %226, <4 x float> %212) - %228 = extractelement <4 x float> %227, i32 0 - %229 = getelementptr float, float* %2, i32 0 - %230 = getelementptr inbounds float, float* %229, i64 3 - store float %228, float* %230, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { -.preheader: - store float 0.000000e+00, float* %2, align 4 - %3 = load float, float* %0, align 4 - %4 = load float, float* %1, align 4 - %5 = fmul float %3, %4 - %6 = fadd float %5, 0.000000e+00 - store float %6, float* %2, align 4 - %7 = getelementptr inbounds float, float* %0, i64 1 - %8 = load float, float* %7, align 4 - %9 = getelementptr inbounds float, float* %1, i64 2 - %10 = load float, float* %9, align 4 - %11 = fmul float %8, %10 - %12 = fadd float %6, %11 - store float %12, float* %2, align 4 - %13 = getelementptr inbounds float, float* %2, i64 1 - store float 0.000000e+00, float* %13, align 4 - %14 = load float, float* %0, align 4 - %15 = getelementptr inbounds float, float* %1, i64 1 - %16 = load float, float* %15, align 4 - %17 = fmul float %14, %16 - %18 = fadd float %17, 0.000000e+00 - store float %18, float* %13, align 4 - %19 = load float, float* %7, align 4 - %20 = getelementptr inbounds float, float* %1, i64 3 - %21 = load float, float* %20, align 4 - %22 = fmul float %19, %21 - %23 = fadd float %18, %22 - store float %23, float* %13, align 4 - %24 = getelementptr inbounds float, float* %0, i64 2 - %25 = getelementptr inbounds float, float* %2, i64 2 - store float 0.000000e+00, float* %25, align 4 - %26 = load float, float* %24, align 4 - %27 = load float, float* %1, align 4 - %28 = fmul float %26, %27 - %29 = fadd float %28, 0.000000e+00 - store float %29, float* %25, align 4 - %30 = getelementptr inbounds float, float* %0, i64 3 - %31 = load float, float* %30, align 4 - %32 = load float, float* %9, align 4 - %33 = fmul float %31, %32 - %34 = fadd float %29, %33 - store float %34, float* %25, align 4 - %35 = getelementptr inbounds float, float* %2, i64 3 - store float 0.000000e+00, float* %35, align 4 - %36 = load float, float* %24, align 4 - %37 = load float, float* %15, align 4 - %38 = fmul float %36, %37 - %39 = fadd float %38, 0.000000e+00 - store float %39, float* %35, align 4 - %40 = load float, float* %30, align 4 - %41 = load float, float* %20, align 4 - %42 = fmul float %40, %41 - %43 = fadd float %39, %42 - store float %43, float* %35, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { -.preheader49: - %3 = bitcast float* %1 to i8* - %4 = alloca [4 x float], align 16 - %5 = bitcast [4 x float]* %4 to i8* - %6 = bitcast float* %0 to i32* - %7 = load i32, i32* %6, align 4 - %8 = bitcast float* %2 to i32* - %9 = getelementptr inbounds float, float* %0, i64 1 - %10 = bitcast float* %9 to i32* - %11 = load i32, i32* %10, align 4 - %12 = getelementptr inbounds float, float* %2, i64 1 - %13 = bitcast float* %12 to i32* - %14 = getelementptr inbounds float, float* %0, i64 2 - %15 = bitcast float* %14 to i32* - %16 = load i32, i32* %15, align 4 - %17 = getelementptr inbounds float, float* %2, i64 2 - %18 = bitcast float* %17 to i32* - %19 = getelementptr inbounds float, float* %0, i64 3 - %20 = bitcast float* %19 to i32* - %21 = load i32, i32* %20, align 4 - %22 = getelementptr inbounds float, float* %2, i64 3 - %23 = bitcast float* %22 to i32* - %24 = bitcast i32 %7 to float - %25 = fcmp ogt float %24, 0.000000e+00 - %26 = zext i1 %25 to i32 - %27 = fcmp olt float %24, 0.000000e+00 - %.neg = sext i1 %27 to i32 - %28 = add nsw i32 %.neg, %26 - %29 = sitofp i32 %28 to float - %30 = fmul float %24, %24 - %31 = fadd float %30, 0.000000e+00 - %32 = bitcast i32 %16 to float - %33 = fmul float %32, %32 - %34 = fadd float %31, %33 - %35 = call float @llvm.sqrt.f32(float %34) #8 - %36 = fneg float %29 - %37 = fmul float %35, %36 - %38 = fadd float %24, %37 - %39 = fmul float %37, 0.000000e+00 - %40 = fadd float %32, %39 - %41 = fmul float %38, %38 - %42 = fadd float %41, 0.000000e+00 - %43 = fmul float %40, %40 - %44 = fadd float %42, %43 - %45 = call float @llvm.sqrt.f32(float %44) #8 - %46 = fadd float %45, 0x3EE4F8B580000000 - %47 = fdiv float %38, %46 - %48 = fdiv float %40, %46 - %49 = fmul float %47, 2.000000e+00 - %50 = fmul float %49, %47 - %51 = fsub float 1.000000e+00, %50 - %52 = fmul float %49, %48 - %53 = fsub float 0.000000e+00, %52 - %54 = fmul float %48, 2.000000e+00 - %55 = fmul float %54, %47 - %56 = fsub float 0.000000e+00, %55 - %57 = fmul float %54, %48 - %58 = fsub float 1.000000e+00, %57 - %59 = bitcast float %51 to i32 - %60 = bitcast [4 x float]* %4 to i32* - %61 = bitcast float %53 to i32 - %62 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 - %63 = bitcast float* %62 to i32* - %64 = bitcast float %56 to i32 - %65 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 - %66 = bitcast float* %65 to i32* - %67 = bitcast float %58 to i32 - %68 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 - %69 = bitcast float* %68 to i32* - %70 = load float, float* %0, align 4 - %71 = fmul float %51, %70 - %72 = fadd float %71, 0.000000e+00 - %73 = load float, float* %14, align 4 - %74 = fmul float %53, %73 - %75 = fadd float %72, %74 - %76 = load float, float* %9, align 4 - %77 = fmul float %51, %76 - %78 = fadd float %77, 0.000000e+00 - %79 = load float, float* %19, align 4 - %80 = fmul float %53, %79 - %81 = fadd float %78, %80 - %82 = load float, float* %0, align 4 - %83 = fmul float %56, %82 - %84 = fadd float %83, 0.000000e+00 - %85 = load float, float* %14, align 4 - %86 = fmul float %58, %85 - %87 = fadd float %84, %86 - %88 = load float, float* %9, align 4 - %89 = fmul float %56, %88 - %90 = fadd float %89, 0.000000e+00 - %91 = load float, float* %19, align 4 - %92 = fmul float %58, %91 - %93 = fadd float %90, %92 - %94 = getelementptr inbounds float, float* %1, i64 1 - %95 = bitcast float* %94 to i32* - %96 = load i32, i32* %95, align 4 - %97 = getelementptr inbounds float, float* %1, i64 2 - %98 = bitcast float* %97 to i32* - %99 = load i32, i32* %98, align 4 - %100 = getelementptr float, float* %0, i32 0 - %101 = bitcast float* %100 to i32* - %102 = load i32, i32* %101, align 4 - %103 = bitcast i32 %102 to float - %104 = insertelement <4 x float> zeroinitializer, float %103, i32 0 - %105 = insertelement <4 x float> %104, float 0.000000e+00, i32 1 - %106 = insertelement <4 x float> %105, float 0.000000e+00, i32 2 - %107 = insertelement <4 x float> %106, float 0.000000e+00, i32 3 - %108 = extractelement <4 x float> %107, i32 0 - %109 = bitcast i32* %8 to float* - %110 = getelementptr float, float* %2, i32 0 - %111 = bitcast float* %110 to i32* - %112 = bitcast i32* %111 to float* - store float %108, float* %112, align 4 - %113 = getelementptr float, float* %0, i32 0 - %114 = getelementptr inbounds float, float* %113, i64 1 - %115 = bitcast float* %114 to i32* - %116 = load i32, i32* %115, align 4 - %117 = bitcast i32 %116 to float - %118 = insertelement <4 x float> zeroinitializer, float %117, i32 0 - %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 1 - %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 2 - %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 3 - %122 = extractelement <4 x float> %121, i32 0 - %123 = bitcast i32* %13 to float* - %124 = getelementptr float, float* %2, i32 0 - %125 = getelementptr inbounds float, float* %124, i64 1 - %126 = bitcast float* %125 to i32* - %127 = bitcast i32* %126 to float* - store float %122, float* %127, align 4 - %128 = getelementptr float, float* %0, i32 0 - %129 = getelementptr inbounds float, float* %128, i64 2 - %130 = bitcast float* %129 to i32* - %131 = load i32, i32* %130, align 4 - %132 = bitcast i32 %131 to float - %133 = insertelement <4 x float> zeroinitializer, float %132, i32 0 - %134 = insertelement <4 x float> %133, float 0.000000e+00, i32 1 - %135 = insertelement <4 x float> %134, float 0.000000e+00, i32 2 - %136 = insertelement <4 x float> %135, float 0.000000e+00, i32 3 - %137 = extractelement <4 x float> %136, i32 0 - %138 = bitcast i32* %18 to float* - %139 = getelementptr float, float* %2, i32 0 - %140 = getelementptr inbounds float, float* %139, i64 2 - %141 = bitcast float* %140 to i32* - %142 = bitcast i32* %141 to float* - store float %137, float* %142, align 4 - %143 = getelementptr float, float* %0, i32 0 - %144 = getelementptr inbounds float, float* %143, i64 3 - %145 = bitcast float* %144 to i32* - %146 = load i32, i32* %145, align 4 - %147 = bitcast i32 %146 to float - %148 = fneg float %147 - %149 = insertelement <4 x float> zeroinitializer, float %148, i32 0 - %150 = bitcast i32 %102 to float - %151 = bitcast i32 %102 to float - %152 = fmul float %150, %151 - %153 = fadd float %152, 0.000000e+00 - %154 = bitcast i32 %131 to float - %155 = bitcast i32 %131 to float - %156 = fmul float %154, %155 - %157 = fadd float %153, %156 - %158 = call float @llvm.sqrt.f32.8(float %157) - %159 = bitcast i32 %102 to float - %160 = fcmp olt float %159, 0.000000e+00 - %161 = sext i1 %160 to i32 - %162 = bitcast i32 %102 to float - %163 = fcmp ogt float %162, 0.000000e+00 - %164 = zext i1 %163 to i32 - %165 = add nsw i32 %161, %164 - %166 = sitofp i32 %165 to float - %167 = fneg float %166 - %168 = fmul float %158, %167 - %169 = bitcast i32 %102 to float - %170 = fadd float %169, %168 - %171 = bitcast i32 %102 to float - %172 = bitcast i32 %102 to float - %173 = fmul float %171, %172 - %174 = fadd float %173, 0.000000e+00 - %175 = bitcast i32 %131 to float - %176 = bitcast i32 %131 to float - %177 = fmul float %175, %176 - %178 = fadd float %174, %177 - %179 = call float @llvm.sqrt.f32.9(float %178) - %180 = bitcast i32 %102 to float - %181 = fcmp olt float %180, 0.000000e+00 - %182 = sext i1 %181 to i32 - %183 = bitcast i32 %102 to float - %184 = fcmp ogt float %183, 0.000000e+00 - %185 = zext i1 %184 to i32 - %186 = add nsw i32 %182, %185 - %187 = sitofp i32 %186 to float - %188 = fneg float %187 - %189 = fmul float %179, %188 - %190 = bitcast i32 %102 to float - %191 = fadd float %190, %189 - %192 = bitcast i32 %102 to float - %193 = bitcast i32 %102 to float - %194 = fmul float %192, %193 - %195 = fadd float %194, 0.000000e+00 - %196 = bitcast i32 %131 to float - %197 = bitcast i32 %131 to float - %198 = fmul float %196, %197 - %199 = fadd float %195, %198 - %200 = call float @llvm.sqrt.f32.10(float %199) - %201 = bitcast i32 %102 to float - %202 = fcmp olt float %201, 0.000000e+00 - %203 = sext i1 %202 to i32 - %204 = bitcast i32 %102 to float - %205 = fcmp ogt float %204, 0.000000e+00 - %206 = zext i1 %205 to i32 - %207 = add nsw i32 %203, %206 - %208 = sitofp i32 %207 to float - %209 = fneg float %208 - %210 = fmul float %200, %209 - %211 = bitcast i32 %102 to float - %212 = fadd float %211, %210 - %213 = fmul float %191, %212 - %214 = fadd float %213, 0.000000e+00 - %215 = bitcast i32 %102 to float - %216 = bitcast i32 %102 to float - %217 = fmul float %215, %216 - %218 = fadd float %217, 0.000000e+00 - %219 = bitcast i32 %131 to float - %220 = bitcast i32 %131 to float - %221 = fmul float %219, %220 - %222 = fadd float %218, %221 - %223 = call float @llvm.sqrt.f32.11(float %222) - %224 = bitcast i32 %102 to float - %225 = fcmp olt float %224, 0.000000e+00 - %226 = sext i1 %225 to i32 - %227 = bitcast i32 %102 to float - %228 = fcmp ogt float %227, 0.000000e+00 - %229 = zext i1 %228 to i32 - %230 = add nsw i32 %226, %229 - %231 = sitofp i32 %230 to float - %232 = fneg float %231 - %233 = fmul float %223, %232 - %234 = fmul float %233, 0.000000e+00 - %235 = bitcast i32 %131 to float - %236 = fadd float %235, %234 - %237 = bitcast i32 %102 to float - %238 = bitcast i32 %102 to float - %239 = fmul float %237, %238 - %240 = fadd float %239, 0.000000e+00 - %241 = bitcast i32 %131 to float - %242 = bitcast i32 %131 to float - %243 = fmul float %241, %242 - %244 = fadd float %240, %243 - %245 = call float @llvm.sqrt.f32.12(float %244) - %246 = bitcast i32 %102 to float - %247 = fcmp olt float %246, 0.000000e+00 - %248 = sext i1 %247 to i32 - %249 = bitcast i32 %102 to float - %250 = fcmp ogt float %249, 0.000000e+00 - %251 = zext i1 %250 to i32 - %252 = add nsw i32 %248, %251 - %253 = sitofp i32 %252 to float - %254 = fneg float %253 - %255 = fmul float %245, %254 - %256 = fmul float %255, 0.000000e+00 - %257 = bitcast i32 %131 to float - %258 = fadd float %257, %256 - %259 = fmul float %236, %258 - %260 = fadd float %214, %259 - %261 = call float @llvm.sqrt.f32.13(float %260) - %262 = fadd float %261, 0.000000e+00 - %263 = fdiv float %170, %262 - %264 = fmul float %263, 2.000000e+00 - %265 = bitcast i32 %102 to float - %266 = bitcast i32 %102 to float - %267 = fmul float %265, %266 - %268 = fadd float %267, 0.000000e+00 - %269 = bitcast i32 %131 to float - %270 = bitcast i32 %131 to float - %271 = fmul float %269, %270 - %272 = fadd float %268, %271 - %273 = call float @llvm.sqrt.f32.14(float %272) - %274 = bitcast i32 %102 to float - %275 = fcmp olt float %274, 0.000000e+00 - %276 = sext i1 %275 to i32 - %277 = bitcast i32 %102 to float - %278 = fcmp ogt float %277, 0.000000e+00 - %279 = zext i1 %278 to i32 - %280 = add nsw i32 %276, %279 - %281 = sitofp i32 %280 to float - %282 = fneg float %281 - %283 = fmul float %273, %282 - %284 = bitcast i32 %102 to float - %285 = fadd float %284, %283 - %286 = bitcast i32 %102 to float - %287 = bitcast i32 %102 to float - %288 = fmul float %286, %287 - %289 = fadd float %288, 0.000000e+00 - %290 = bitcast i32 %131 to float - %291 = bitcast i32 %131 to float - %292 = fmul float %290, %291 - %293 = fadd float %289, %292 - %294 = call float @llvm.sqrt.f32.15(float %293) - %295 = bitcast i32 %102 to float - %296 = fcmp olt float %295, 0.000000e+00 - %297 = sext i1 %296 to i32 - %298 = bitcast i32 %102 to float - %299 = fcmp ogt float %298, 0.000000e+00 - %300 = zext i1 %299 to i32 - %301 = add nsw i32 %297, %300 - %302 = sitofp i32 %301 to float - %303 = fneg float %302 - %304 = fmul float %294, %303 - %305 = bitcast i32 %102 to float - %306 = fadd float %305, %304 - %307 = bitcast i32 %102 to float - %308 = bitcast i32 %102 to float - %309 = fmul float %307, %308 - %310 = fadd float %309, 0.000000e+00 - %311 = bitcast i32 %131 to float - %312 = bitcast i32 %131 to float - %313 = fmul float %311, %312 - %314 = fadd float %310, %313 - %315 = call float @llvm.sqrt.f32.16(float %314) - %316 = bitcast i32 %102 to float - %317 = fcmp olt float %316, 0.000000e+00 - %318 = sext i1 %317 to i32 - %319 = bitcast i32 %102 to float - %320 = fcmp ogt float %319, 0.000000e+00 - %321 = zext i1 %320 to i32 - %322 = add nsw i32 %318, %321 - %323 = sitofp i32 %322 to float - %324 = fneg float %323 - %325 = fmul float %315, %324 - %326 = bitcast i32 %102 to float - %327 = fadd float %326, %325 - %328 = fmul float %306, %327 - %329 = fadd float %328, 0.000000e+00 - %330 = bitcast i32 %102 to float - %331 = bitcast i32 %102 to float - %332 = fmul float %330, %331 - %333 = fadd float %332, 0.000000e+00 - %334 = bitcast i32 %131 to float - %335 = bitcast i32 %131 to float - %336 = fmul float %334, %335 - %337 = fadd float %333, %336 - %338 = call float @llvm.sqrt.f32.17(float %337) - %339 = bitcast i32 %102 to float - %340 = fcmp olt float %339, 0.000000e+00 - %341 = sext i1 %340 to i32 - %342 = bitcast i32 %102 to float - %343 = fcmp ogt float %342, 0.000000e+00 - %344 = zext i1 %343 to i32 - %345 = add nsw i32 %341, %344 - %346 = sitofp i32 %345 to float - %347 = fneg float %346 - %348 = fmul float %338, %347 - %349 = fmul float %348, 0.000000e+00 - %350 = bitcast i32 %131 to float - %351 = fadd float %350, %349 - %352 = bitcast i32 %102 to float - %353 = bitcast i32 %102 to float - %354 = fmul float %352, %353 - %355 = fadd float %354, 0.000000e+00 - %356 = bitcast i32 %131 to float - %357 = bitcast i32 %131 to float - %358 = fmul float %356, %357 - %359 = fadd float %355, %358 - %360 = call float @llvm.sqrt.f32.18(float %359) - %361 = bitcast i32 %102 to float - %362 = fcmp olt float %361, 0.000000e+00 - %363 = sext i1 %362 to i32 - %364 = bitcast i32 %102 to float - %365 = fcmp ogt float %364, 0.000000e+00 - %366 = zext i1 %365 to i32 - %367 = add nsw i32 %363, %366 - %368 = sitofp i32 %367 to float - %369 = fneg float %368 - %370 = fmul float %360, %369 - %371 = fmul float %370, 0.000000e+00 - %372 = bitcast i32 %131 to float - %373 = fadd float %372, %371 - %374 = fmul float %351, %373 - %375 = fadd float %329, %374 - %376 = call float @llvm.sqrt.f32.19(float %375) - %377 = fadd float %376, 0.000000e+00 - %378 = fdiv float %285, %377 - %379 = fmul float %264, %378 - %380 = insertelement <4 x float> %149, float %379, i32 1 - %381 = bitcast i32 %102 to float - %382 = bitcast i32 %102 to float - %383 = fmul float %381, %382 - %384 = fadd float %383, 0.000000e+00 - %385 = bitcast i32 %131 to float - %386 = bitcast i32 %131 to float - %387 = fmul float %385, %386 - %388 = fadd float %384, %387 - %389 = call float @llvm.sqrt.f32.20(float %388) - %390 = bitcast i32 %102 to float - %391 = fcmp olt float %390, 0.000000e+00 - %392 = sext i1 %391 to i32 - %393 = bitcast i32 %102 to float - %394 = fcmp ogt float %393, 0.000000e+00 - %395 = zext i1 %394 to i32 - %396 = add nsw i32 %392, %395 - %397 = sitofp i32 %396 to float - %398 = fneg float %397 - %399 = fmul float %389, %398 - %400 = bitcast i32 %102 to float - %401 = fadd float %400, %399 - %402 = bitcast i32 %102 to float - %403 = bitcast i32 %102 to float - %404 = fmul float %402, %403 - %405 = fadd float %404, 0.000000e+00 - %406 = bitcast i32 %131 to float - %407 = bitcast i32 %131 to float - %408 = fmul float %406, %407 - %409 = fadd float %405, %408 - %410 = call float @llvm.sqrt.f32.21(float %409) - %411 = bitcast i32 %102 to float - %412 = fcmp olt float %411, 0.000000e+00 - %413 = sext i1 %412 to i32 - %414 = bitcast i32 %102 to float - %415 = fcmp ogt float %414, 0.000000e+00 - %416 = zext i1 %415 to i32 - %417 = add nsw i32 %413, %416 - %418 = sitofp i32 %417 to float - %419 = fneg float %418 - %420 = fmul float %410, %419 - %421 = bitcast i32 %102 to float - %422 = fadd float %421, %420 - %423 = bitcast i32 %102 to float - %424 = bitcast i32 %102 to float - %425 = fmul float %423, %424 - %426 = fadd float %425, 0.000000e+00 - %427 = bitcast i32 %131 to float - %428 = bitcast i32 %131 to float - %429 = fmul float %427, %428 - %430 = fadd float %426, %429 - %431 = call float @llvm.sqrt.f32.22(float %430) - %432 = bitcast i32 %102 to float - %433 = fcmp olt float %432, 0.000000e+00 - %434 = sext i1 %433 to i32 - %435 = bitcast i32 %102 to float - %436 = fcmp ogt float %435, 0.000000e+00 - %437 = zext i1 %436 to i32 - %438 = add nsw i32 %434, %437 - %439 = sitofp i32 %438 to float - %440 = fneg float %439 - %441 = fmul float %431, %440 - %442 = bitcast i32 %102 to float - %443 = fadd float %442, %441 - %444 = fmul float %422, %443 - %445 = fadd float %444, 0.000000e+00 - %446 = bitcast i32 %102 to float - %447 = bitcast i32 %102 to float - %448 = fmul float %446, %447 - %449 = fadd float %448, 0.000000e+00 - %450 = bitcast i32 %131 to float - %451 = bitcast i32 %131 to float - %452 = fmul float %450, %451 - %453 = fadd float %449, %452 - %454 = call float @llvm.sqrt.f32.23(float %453) - %455 = bitcast i32 %102 to float - %456 = fcmp olt float %455, 0.000000e+00 - %457 = sext i1 %456 to i32 - %458 = bitcast i32 %102 to float - %459 = fcmp ogt float %458, 0.000000e+00 - %460 = zext i1 %459 to i32 - %461 = add nsw i32 %457, %460 - %462 = sitofp i32 %461 to float - %463 = fneg float %462 - %464 = fmul float %454, %463 - %465 = fmul float %464, 0.000000e+00 - %466 = bitcast i32 %131 to float - %467 = fadd float %466, %465 - %468 = bitcast i32 %102 to float - %469 = bitcast i32 %102 to float - %470 = fmul float %468, %469 - %471 = fadd float %470, 0.000000e+00 - %472 = bitcast i32 %131 to float - %473 = bitcast i32 %131 to float - %474 = fmul float %472, %473 - %475 = fadd float %471, %474 - %476 = call float @llvm.sqrt.f32.24(float %475) - %477 = bitcast i32 %102 to float - %478 = fcmp olt float %477, 0.000000e+00 - %479 = sext i1 %478 to i32 - %480 = bitcast i32 %102 to float - %481 = fcmp ogt float %480, 0.000000e+00 - %482 = zext i1 %481 to i32 - %483 = add nsw i32 %479, %482 - %484 = sitofp i32 %483 to float - %485 = fneg float %484 - %486 = fmul float %476, %485 - %487 = fmul float %486, 0.000000e+00 - %488 = bitcast i32 %131 to float - %489 = fadd float %488, %487 - %490 = fmul float %467, %489 - %491 = fadd float %445, %490 - %492 = call float @llvm.sqrt.f32.25(float %491) - %493 = fadd float %492, 0.000000e+00 - %494 = fdiv float %401, %493 - %495 = fmul float %494, 2.000000e+00 - %496 = bitcast i32 %102 to float - %497 = bitcast i32 %102 to float - %498 = fmul float %496, %497 - %499 = fadd float %498, 0.000000e+00 - %500 = bitcast i32 %131 to float - %501 = bitcast i32 %131 to float - %502 = fmul float %500, %501 - %503 = fadd float %499, %502 - %504 = call float @llvm.sqrt.f32.26(float %503) - %505 = bitcast i32 %102 to float - %506 = fcmp olt float %505, 0.000000e+00 - %507 = sext i1 %506 to i32 - %508 = bitcast i32 %102 to float - %509 = fcmp ogt float %508, 0.000000e+00 - %510 = zext i1 %509 to i32 - %511 = add nsw i32 %507, %510 - %512 = sitofp i32 %511 to float - %513 = fneg float %512 - %514 = fmul float %504, %513 - %515 = fmul float %514, 0.000000e+00 - %516 = bitcast i32 %131 to float - %517 = fadd float %516, %515 - %518 = bitcast i32 %102 to float - %519 = bitcast i32 %102 to float - %520 = fmul float %518, %519 - %521 = fadd float %520, 0.000000e+00 - %522 = bitcast i32 %131 to float - %523 = bitcast i32 %131 to float - %524 = fmul float %522, %523 - %525 = fadd float %521, %524 - %526 = call float @llvm.sqrt.f32.27(float %525) - %527 = bitcast i32 %102 to float - %528 = fcmp olt float %527, 0.000000e+00 - %529 = sext i1 %528 to i32 - %530 = bitcast i32 %102 to float - %531 = fcmp ogt float %530, 0.000000e+00 - %532 = zext i1 %531 to i32 - %533 = add nsw i32 %529, %532 - %534 = sitofp i32 %533 to float - %535 = fneg float %534 - %536 = fmul float %526, %535 - %537 = bitcast i32 %102 to float - %538 = fadd float %537, %536 - %539 = bitcast i32 %102 to float - %540 = bitcast i32 %102 to float - %541 = fmul float %539, %540 - %542 = fadd float %541, 0.000000e+00 - %543 = bitcast i32 %131 to float - %544 = bitcast i32 %131 to float - %545 = fmul float %543, %544 - %546 = fadd float %542, %545 - %547 = call float @llvm.sqrt.f32.28(float %546) - %548 = bitcast i32 %102 to float - %549 = fcmp olt float %548, 0.000000e+00 - %550 = sext i1 %549 to i32 - %551 = bitcast i32 %102 to float - %552 = fcmp ogt float %551, 0.000000e+00 - %553 = zext i1 %552 to i32 - %554 = add nsw i32 %550, %553 - %555 = sitofp i32 %554 to float - %556 = fneg float %555 - %557 = fmul float %547, %556 - %558 = bitcast i32 %102 to float - %559 = fadd float %558, %557 - %560 = fmul float %538, %559 - %561 = fadd float %560, 0.000000e+00 - %562 = bitcast i32 %102 to float - %563 = bitcast i32 %102 to float - %564 = fmul float %562, %563 - %565 = fadd float %564, 0.000000e+00 - %566 = bitcast i32 %131 to float - %567 = bitcast i32 %131 to float - %568 = fmul float %566, %567 - %569 = fadd float %565, %568 - %570 = call float @llvm.sqrt.f32.29(float %569) - %571 = bitcast i32 %102 to float - %572 = fcmp olt float %571, 0.000000e+00 - %573 = sext i1 %572 to i32 - %574 = bitcast i32 %102 to float - %575 = fcmp ogt float %574, 0.000000e+00 - %576 = zext i1 %575 to i32 - %577 = add nsw i32 %573, %576 - %578 = sitofp i32 %577 to float - %579 = fneg float %578 - %580 = fmul float %570, %579 - %581 = fmul float %580, 0.000000e+00 - %582 = bitcast i32 %131 to float - %583 = fadd float %582, %581 - %584 = bitcast i32 %102 to float - %585 = bitcast i32 %102 to float - %586 = fmul float %584, %585 - %587 = fadd float %586, 0.000000e+00 - %588 = bitcast i32 %131 to float - %589 = bitcast i32 %131 to float - %590 = fmul float %588, %589 - %591 = fadd float %587, %590 - %592 = call float @llvm.sqrt.f32.30(float %591) - %593 = bitcast i32 %102 to float - %594 = fcmp olt float %593, 0.000000e+00 - %595 = sext i1 %594 to i32 - %596 = bitcast i32 %102 to float - %597 = fcmp ogt float %596, 0.000000e+00 - %598 = zext i1 %597 to i32 - %599 = add nsw i32 %595, %598 - %600 = sitofp i32 %599 to float - %601 = fneg float %600 - %602 = fmul float %592, %601 - %603 = fmul float %602, 0.000000e+00 - %604 = bitcast i32 %131 to float - %605 = fadd float %604, %603 - %606 = fmul float %583, %605 - %607 = fadd float %561, %606 - %608 = call float @llvm.sqrt.f32.31(float %607) - %609 = fadd float %608, 0.000000e+00 - %610 = fdiv float %517, %609 - %611 = fmul float %495, %610 - %612 = insertelement <4 x float> %380, float %611, i32 2 - %613 = bitcast i32 %102 to float - %614 = bitcast i32 %102 to float - %615 = fmul float %613, %614 - %616 = fadd float %615, 0.000000e+00 - %617 = bitcast i32 %131 to float - %618 = bitcast i32 %131 to float - %619 = fmul float %617, %618 - %620 = fadd float %616, %619 - %621 = call float @llvm.sqrt.f32.32(float %620) - %622 = bitcast i32 %102 to float - %623 = fcmp olt float %622, 0.000000e+00 - %624 = sext i1 %623 to i32 - %625 = bitcast i32 %102 to float - %626 = fcmp ogt float %625, 0.000000e+00 - %627 = zext i1 %626 to i32 - %628 = add nsw i32 %624, %627 - %629 = sitofp i32 %628 to float - %630 = fneg float %629 - %631 = fmul float %621, %630 - %632 = fmul float %631, 0.000000e+00 - %633 = bitcast i32 %131 to float - %634 = fadd float %633, %632 - %635 = bitcast i32 %102 to float - %636 = bitcast i32 %102 to float - %637 = fmul float %635, %636 - %638 = fadd float %637, 0.000000e+00 - %639 = bitcast i32 %131 to float - %640 = bitcast i32 %131 to float - %641 = fmul float %639, %640 - %642 = fadd float %638, %641 - %643 = call float @llvm.sqrt.f32.33(float %642) - %644 = bitcast i32 %102 to float - %645 = fcmp olt float %644, 0.000000e+00 - %646 = sext i1 %645 to i32 - %647 = bitcast i32 %102 to float - %648 = fcmp ogt float %647, 0.000000e+00 - %649 = zext i1 %648 to i32 - %650 = add nsw i32 %646, %649 - %651 = sitofp i32 %650 to float - %652 = fneg float %651 - %653 = fmul float %643, %652 - %654 = bitcast i32 %102 to float - %655 = fadd float %654, %653 - %656 = bitcast i32 %102 to float - %657 = bitcast i32 %102 to float - %658 = fmul float %656, %657 - %659 = fadd float %658, 0.000000e+00 - %660 = bitcast i32 %131 to float - %661 = bitcast i32 %131 to float - %662 = fmul float %660, %661 - %663 = fadd float %659, %662 - %664 = call float @llvm.sqrt.f32.34(float %663) - %665 = bitcast i32 %102 to float - %666 = fcmp olt float %665, 0.000000e+00 - %667 = sext i1 %666 to i32 - %668 = bitcast i32 %102 to float - %669 = fcmp ogt float %668, 0.000000e+00 - %670 = zext i1 %669 to i32 - %671 = add nsw i32 %667, %670 - %672 = sitofp i32 %671 to float - %673 = fneg float %672 - %674 = fmul float %664, %673 - %675 = bitcast i32 %102 to float - %676 = fadd float %675, %674 - %677 = fmul float %655, %676 - %678 = fadd float %677, 0.000000e+00 - %679 = bitcast i32 %102 to float - %680 = bitcast i32 %102 to float - %681 = fmul float %679, %680 - %682 = fadd float %681, 0.000000e+00 - %683 = bitcast i32 %131 to float - %684 = bitcast i32 %131 to float - %685 = fmul float %683, %684 - %686 = fadd float %682, %685 - %687 = call float @llvm.sqrt.f32.35(float %686) - %688 = bitcast i32 %102 to float - %689 = fcmp olt float %688, 0.000000e+00 - %690 = sext i1 %689 to i32 - %691 = bitcast i32 %102 to float - %692 = fcmp ogt float %691, 0.000000e+00 - %693 = zext i1 %692 to i32 - %694 = add nsw i32 %690, %693 - %695 = sitofp i32 %694 to float - %696 = fneg float %695 - %697 = fmul float %687, %696 - %698 = fmul float %697, 0.000000e+00 - %699 = bitcast i32 %131 to float - %700 = fadd float %699, %698 - %701 = bitcast i32 %102 to float - %702 = bitcast i32 %102 to float - %703 = fmul float %701, %702 - %704 = fadd float %703, 0.000000e+00 - %705 = bitcast i32 %131 to float - %706 = bitcast i32 %131 to float - %707 = fmul float %705, %706 - %708 = fadd float %704, %707 - %709 = call float @llvm.sqrt.f32.36(float %708) - %710 = bitcast i32 %102 to float - %711 = fcmp olt float %710, 0.000000e+00 - %712 = sext i1 %711 to i32 - %713 = bitcast i32 %102 to float - %714 = fcmp ogt float %713, 0.000000e+00 - %715 = zext i1 %714 to i32 - %716 = add nsw i32 %712, %715 - %717 = sitofp i32 %716 to float - %718 = fneg float %717 - %719 = fmul float %709, %718 - %720 = fmul float %719, 0.000000e+00 - %721 = bitcast i32 %131 to float - %722 = fadd float %721, %720 - %723 = fmul float %700, %722 - %724 = fadd float %678, %723 - %725 = call float @llvm.sqrt.f32.37(float %724) - %726 = fadd float %725, 0.000000e+00 - %727 = fdiv float %634, %726 - %728 = fmul float %727, 2.000000e+00 - %729 = bitcast i32 %102 to float - %730 = bitcast i32 %102 to float - %731 = fmul float %729, %730 - %732 = fadd float %731, 0.000000e+00 - %733 = bitcast i32 %131 to float - %734 = bitcast i32 %131 to float - %735 = fmul float %733, %734 - %736 = fadd float %732, %735 - %737 = call float @llvm.sqrt.f32.38(float %736) - %738 = bitcast i32 %102 to float - %739 = fcmp olt float %738, 0.000000e+00 - %740 = sext i1 %739 to i32 - %741 = bitcast i32 %102 to float - %742 = fcmp ogt float %741, 0.000000e+00 - %743 = zext i1 %742 to i32 - %744 = add nsw i32 %740, %743 - %745 = sitofp i32 %744 to float - %746 = fneg float %745 - %747 = fmul float %737, %746 - %748 = bitcast i32 %102 to float - %749 = fadd float %748, %747 - %750 = bitcast i32 %102 to float - %751 = bitcast i32 %102 to float - %752 = fmul float %750, %751 - %753 = fadd float %752, 0.000000e+00 - %754 = bitcast i32 %131 to float - %755 = bitcast i32 %131 to float - %756 = fmul float %754, %755 - %757 = fadd float %753, %756 - %758 = call float @llvm.sqrt.f32.39(float %757) - %759 = bitcast i32 %102 to float - %760 = fcmp olt float %759, 0.000000e+00 - %761 = sext i1 %760 to i32 - %762 = bitcast i32 %102 to float - %763 = fcmp ogt float %762, 0.000000e+00 - %764 = zext i1 %763 to i32 - %765 = add nsw i32 %761, %764 - %766 = sitofp i32 %765 to float - %767 = fneg float %766 - %768 = fmul float %758, %767 - %769 = bitcast i32 %102 to float - %770 = fadd float %769, %768 - %771 = bitcast i32 %102 to float - %772 = bitcast i32 %102 to float - %773 = fmul float %771, %772 - %774 = fadd float %773, 0.000000e+00 - %775 = bitcast i32 %131 to float - %776 = bitcast i32 %131 to float - %777 = fmul float %775, %776 - %778 = fadd float %774, %777 - %779 = call float @llvm.sqrt.f32.40(float %778) - %780 = bitcast i32 %102 to float - %781 = fcmp olt float %780, 0.000000e+00 - %782 = sext i1 %781 to i32 - %783 = bitcast i32 %102 to float - %784 = fcmp ogt float %783, 0.000000e+00 - %785 = zext i1 %784 to i32 - %786 = add nsw i32 %782, %785 - %787 = sitofp i32 %786 to float - %788 = fneg float %787 - %789 = fmul float %779, %788 - %790 = bitcast i32 %102 to float - %791 = fadd float %790, %789 - %792 = fmul float %770, %791 - %793 = fadd float %792, 0.000000e+00 - %794 = bitcast i32 %102 to float - %795 = bitcast i32 %102 to float - %796 = fmul float %794, %795 - %797 = fadd float %796, 0.000000e+00 - %798 = bitcast i32 %131 to float - %799 = bitcast i32 %131 to float - %800 = fmul float %798, %799 - %801 = fadd float %797, %800 - %802 = call float @llvm.sqrt.f32.41(float %801) - %803 = bitcast i32 %102 to float - %804 = fcmp olt float %803, 0.000000e+00 - %805 = sext i1 %804 to i32 - %806 = bitcast i32 %102 to float - %807 = fcmp ogt float %806, 0.000000e+00 - %808 = zext i1 %807 to i32 - %809 = add nsw i32 %805, %808 - %810 = sitofp i32 %809 to float - %811 = fneg float %810 - %812 = fmul float %802, %811 - %813 = fmul float %812, 0.000000e+00 - %814 = bitcast i32 %131 to float - %815 = fadd float %814, %813 - %816 = bitcast i32 %102 to float - %817 = bitcast i32 %102 to float - %818 = fmul float %816, %817 - %819 = fadd float %818, 0.000000e+00 - %820 = bitcast i32 %131 to float - %821 = bitcast i32 %131 to float - %822 = fmul float %820, %821 - %823 = fadd float %819, %822 - %824 = call float @llvm.sqrt.f32.42(float %823) - %825 = bitcast i32 %102 to float - %826 = fcmp olt float %825, 0.000000e+00 - %827 = sext i1 %826 to i32 - %828 = bitcast i32 %102 to float - %829 = fcmp ogt float %828, 0.000000e+00 - %830 = zext i1 %829 to i32 - %831 = add nsw i32 %827, %830 - %832 = sitofp i32 %831 to float - %833 = fneg float %832 - %834 = fmul float %824, %833 - %835 = fmul float %834, 0.000000e+00 - %836 = bitcast i32 %131 to float - %837 = fadd float %836, %835 - %838 = fmul float %815, %837 - %839 = fadd float %793, %838 - %840 = call float @llvm.sqrt.f32.43(float %839) - %841 = fadd float %840, 0.000000e+00 - %842 = fdiv float %749, %841 - %843 = fmul float %728, %842 - %844 = insertelement <4 x float> %612, float %843, i32 3 - %845 = fsub <4 x float> , %844 - %846 = bitcast i32 %102 to float - %847 = bitcast i32 %102 to float - %848 = fmul float %846, %847 - %849 = fadd float %848, 0.000000e+00 - %850 = bitcast i32 %131 to float - %851 = bitcast i32 %131 to float - %852 = fmul float %850, %851 - %853 = fadd float %849, %852 - %854 = call float @llvm.sqrt.f32.44(float %853) - %855 = bitcast i32 %102 to float - %856 = fcmp olt float %855, 0.000000e+00 - %857 = sext i1 %856 to i32 - %858 = bitcast i32 %102 to float - %859 = fcmp ogt float %858, 0.000000e+00 - %860 = zext i1 %859 to i32 - %861 = add nsw i32 %857, %860 - %862 = sitofp i32 %861 to float - %863 = fneg float %862 - %864 = fmul float %854, %863 - %865 = fmul float %864, 0.000000e+00 - %866 = bitcast i32 %131 to float - %867 = fadd float %866, %865 - %868 = bitcast i32 %102 to float - %869 = bitcast i32 %102 to float - %870 = fmul float %868, %869 - %871 = fadd float %870, 0.000000e+00 - %872 = bitcast i32 %131 to float - %873 = bitcast i32 %131 to float - %874 = fmul float %872, %873 - %875 = fadd float %871, %874 - %876 = call float @llvm.sqrt.f32.45(float %875) - %877 = bitcast i32 %102 to float - %878 = fcmp olt float %877, 0.000000e+00 - %879 = sext i1 %878 to i32 - %880 = bitcast i32 %102 to float - %881 = fcmp ogt float %880, 0.000000e+00 - %882 = zext i1 %881 to i32 - %883 = add nsw i32 %879, %882 - %884 = sitofp i32 %883 to float - %885 = fneg float %884 - %886 = fmul float %876, %885 - %887 = bitcast i32 %102 to float - %888 = fadd float %887, %886 - %889 = bitcast i32 %102 to float - %890 = bitcast i32 %102 to float - %891 = fmul float %889, %890 - %892 = fadd float %891, 0.000000e+00 - %893 = bitcast i32 %131 to float - %894 = bitcast i32 %131 to float - %895 = fmul float %893, %894 - %896 = fadd float %892, %895 - %897 = call float @llvm.sqrt.f32.46(float %896) - %898 = bitcast i32 %102 to float - %899 = fcmp olt float %898, 0.000000e+00 - %900 = sext i1 %899 to i32 - %901 = bitcast i32 %102 to float - %902 = fcmp ogt float %901, 0.000000e+00 - %903 = zext i1 %902 to i32 - %904 = add nsw i32 %900, %903 - %905 = sitofp i32 %904 to float - %906 = fneg float %905 - %907 = fmul float %897, %906 - %908 = bitcast i32 %102 to float - %909 = fadd float %908, %907 - %910 = fmul float %888, %909 - %911 = fadd float %910, 0.000000e+00 - %912 = bitcast i32 %102 to float - %913 = bitcast i32 %102 to float - %914 = fmul float %912, %913 - %915 = fadd float %914, 0.000000e+00 - %916 = bitcast i32 %131 to float - %917 = bitcast i32 %131 to float - %918 = fmul float %916, %917 - %919 = fadd float %915, %918 - %920 = call float @llvm.sqrt.f32.47(float %919) - %921 = bitcast i32 %102 to float - %922 = fcmp olt float %921, 0.000000e+00 - %923 = sext i1 %922 to i32 - %924 = bitcast i32 %102 to float - %925 = fcmp ogt float %924, 0.000000e+00 - %926 = zext i1 %925 to i32 - %927 = add nsw i32 %923, %926 - %928 = sitofp i32 %927 to float - %929 = fneg float %928 - %930 = fmul float %920, %929 - %931 = fmul float %930, 0.000000e+00 - %932 = bitcast i32 %131 to float - %933 = fadd float %932, %931 - %934 = bitcast i32 %102 to float - %935 = bitcast i32 %102 to float - %936 = fmul float %934, %935 - %937 = fadd float %936, 0.000000e+00 - %938 = bitcast i32 %131 to float - %939 = bitcast i32 %131 to float - %940 = fmul float %938, %939 - %941 = fadd float %937, %940 - %942 = call float @llvm.sqrt.f32.48(float %941) - %943 = bitcast i32 %102 to float - %944 = fcmp olt float %943, 0.000000e+00 - %945 = sext i1 %944 to i32 - %946 = bitcast i32 %102 to float - %947 = fcmp ogt float %946, 0.000000e+00 - %948 = zext i1 %947 to i32 - %949 = add nsw i32 %945, %948 - %950 = sitofp i32 %949 to float - %951 = fneg float %950 - %952 = fmul float %942, %951 - %953 = fmul float %952, 0.000000e+00 - %954 = bitcast i32 %131 to float - %955 = fadd float %954, %953 - %956 = fmul float %933, %955 - %957 = fadd float %911, %956 - %958 = call float @llvm.sqrt.f32.49(float %957) - %959 = fadd float %958, 0.000000e+00 - %960 = fdiv float %867, %959 - %961 = fmul float %960, 2.000000e+00 - %962 = bitcast i32 %102 to float - %963 = bitcast i32 %102 to float - %964 = fmul float %962, %963 - %965 = fadd float %964, 0.000000e+00 - %966 = bitcast i32 %131 to float - %967 = bitcast i32 %131 to float - %968 = fmul float %966, %967 - %969 = fadd float %965, %968 - %970 = call float @llvm.sqrt.f32.50(float %969) - %971 = bitcast i32 %102 to float - %972 = fcmp olt float %971, 0.000000e+00 - %973 = sext i1 %972 to i32 - %974 = bitcast i32 %102 to float - %975 = fcmp ogt float %974, 0.000000e+00 - %976 = zext i1 %975 to i32 - %977 = add nsw i32 %973, %976 - %978 = sitofp i32 %977 to float - %979 = fneg float %978 - %980 = fmul float %970, %979 - %981 = fmul float %980, 0.000000e+00 - %982 = bitcast i32 %131 to float - %983 = fadd float %982, %981 - %984 = bitcast i32 %102 to float - %985 = bitcast i32 %102 to float - %986 = fmul float %984, %985 - %987 = fadd float %986, 0.000000e+00 - %988 = bitcast i32 %131 to float - %989 = bitcast i32 %131 to float - %990 = fmul float %988, %989 - %991 = fadd float %987, %990 - %992 = call float @llvm.sqrt.f32.51(float %991) - %993 = bitcast i32 %102 to float - %994 = fcmp olt float %993, 0.000000e+00 - %995 = sext i1 %994 to i32 - %996 = bitcast i32 %102 to float - %997 = fcmp ogt float %996, 0.000000e+00 - %998 = zext i1 %997 to i32 - %999 = add nsw i32 %995, %998 - %1000 = sitofp i32 %999 to float - %1001 = fneg float %1000 - %1002 = fmul float %992, %1001 - %1003 = bitcast i32 %102 to float - %1004 = fadd float %1003, %1002 - %1005 = bitcast i32 %102 to float - %1006 = bitcast i32 %102 to float - %1007 = fmul float %1005, %1006 - %1008 = fadd float %1007, 0.000000e+00 - %1009 = bitcast i32 %131 to float - %1010 = bitcast i32 %131 to float - %1011 = fmul float %1009, %1010 - %1012 = fadd float %1008, %1011 - %1013 = call float @llvm.sqrt.f32.52(float %1012) - %1014 = bitcast i32 %102 to float - %1015 = fcmp olt float %1014, 0.000000e+00 - %1016 = sext i1 %1015 to i32 - %1017 = bitcast i32 %102 to float - %1018 = fcmp ogt float %1017, 0.000000e+00 - %1019 = zext i1 %1018 to i32 - %1020 = add nsw i32 %1016, %1019 - %1021 = sitofp i32 %1020 to float - %1022 = fneg float %1021 - %1023 = fmul float %1013, %1022 - %1024 = bitcast i32 %102 to float - %1025 = fadd float %1024, %1023 - %1026 = fmul float %1004, %1025 - %1027 = fadd float %1026, 0.000000e+00 - %1028 = bitcast i32 %102 to float - %1029 = bitcast i32 %102 to float - %1030 = fmul float %1028, %1029 - %1031 = fadd float %1030, 0.000000e+00 - %1032 = bitcast i32 %131 to float - %1033 = bitcast i32 %131 to float - %1034 = fmul float %1032, %1033 - %1035 = fadd float %1031, %1034 - %1036 = call float @llvm.sqrt.f32.53(float %1035) - %1037 = bitcast i32 %102 to float - %1038 = fcmp olt float %1037, 0.000000e+00 - %1039 = sext i1 %1038 to i32 - %1040 = bitcast i32 %102 to float - %1041 = fcmp ogt float %1040, 0.000000e+00 - %1042 = zext i1 %1041 to i32 - %1043 = add nsw i32 %1039, %1042 - %1044 = sitofp i32 %1043 to float - %1045 = fneg float %1044 - %1046 = fmul float %1036, %1045 - %1047 = fmul float %1046, 0.000000e+00 - %1048 = bitcast i32 %131 to float - %1049 = fadd float %1048, %1047 - %1050 = bitcast i32 %102 to float - %1051 = bitcast i32 %102 to float - %1052 = fmul float %1050, %1051 - %1053 = fadd float %1052, 0.000000e+00 - %1054 = bitcast i32 %131 to float - %1055 = bitcast i32 %131 to float - %1056 = fmul float %1054, %1055 - %1057 = fadd float %1053, %1056 - %1058 = call float @llvm.sqrt.f32.54(float %1057) - %1059 = bitcast i32 %102 to float - %1060 = fcmp olt float %1059, 0.000000e+00 - %1061 = sext i1 %1060 to i32 - %1062 = bitcast i32 %102 to float - %1063 = fcmp ogt float %1062, 0.000000e+00 - %1064 = zext i1 %1063 to i32 - %1065 = add nsw i32 %1061, %1064 - %1066 = sitofp i32 %1065 to float - %1067 = fneg float %1066 - %1068 = fmul float %1058, %1067 - %1069 = fmul float %1068, 0.000000e+00 - %1070 = bitcast i32 %131 to float - %1071 = fadd float %1070, %1069 - %1072 = fmul float %1049, %1071 - %1073 = fadd float %1027, %1072 - %1074 = call float @llvm.sqrt.f32.55(float %1073) - %1075 = fadd float %1074, 0.000000e+00 - %1076 = fdiv float %983, %1075 - %1077 = fmul float %961, %1076 - %1078 = fsub float 1.000000e+00, %1077 - %1079 = insertelement <4 x float> zeroinitializer, float %1078, i32 0 - %1080 = insertelement <4 x float> %1079, float 0.000000e+00, i32 1 - %1081 = insertelement <4 x float> %1080, float 0.000000e+00, i32 2 - %1082 = insertelement <4 x float> %1081, float 0.000000e+00, i32 3 - %1083 = shufflevector <4 x float> %845, <4 x float> %1082, <8 x i32> - %1084 = extractelement <8 x float> %1083, i32 0 - %1085 = bitcast i32* %23 to float* - %1086 = getelementptr float, float* %2, i32 0 - %1087 = getelementptr inbounds float, float* %1086, i64 3 - %1088 = bitcast float* %1087 to i32* - %1089 = bitcast i32* %1088 to float* - store float %1084, float* %1089, align 4 - %1090 = extractelement <8 x float> %1083, i32 1 - %1091 = bitcast i32* %60 to float* - %1092 = alloca [4 x float], align 16 - %1093 = bitcast [4 x float]* %1092 to i32* - %1094 = bitcast i32* %1093 to float* - store float %1090, float* %1094, align 4 - %1095 = extractelement <8 x float> %1083, i32 2 - %1096 = bitcast i32* %63 to float* - %1097 = getelementptr inbounds [4 x float], [4 x float]* %1092, i64 0, i64 1 - %1098 = bitcast float* %1097 to i32* - %1099 = bitcast i32* %1098 to float* - store float %1095, float* %1099, align 4 - %1100 = extractelement <8 x float> %1083, i32 3 - %1101 = bitcast i32* %66 to float* - %1102 = getelementptr inbounds [4 x float], [4 x float]* %1092, i64 0, i64 2 - %1103 = bitcast float* %1102 to i32* - %1104 = bitcast i32* %1103 to float* - store float %1100, float* %1104, align 4 - %1105 = extractelement <8 x float> %1083, i32 4 - %1106 = bitcast i32* %69 to float* - %1107 = getelementptr inbounds [4 x float], [4 x float]* %1092, i64 0, i64 3 - %1108 = bitcast float* %1107 to i32* - %1109 = bitcast i32* %1108 to float* - store float %1105, float* %1109, align 4 - %1110 = bitcast float* %1 to i8* - %1111 = alloca [4 x float], align 16 - %1112 = bitcast [4 x float]* %1111 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %1110, i8* nonnull align 16 dereferenceable(16) %1112, i64 16, i1 false) - store float 0.000000e+00, float* %2, align 4 - %1113 = bitcast i32 %102 to float - %1114 = bitcast i32 %102 to float - %1115 = fmul float %1113, %1114 - %1116 = fadd float %1115, 0.000000e+00 - %1117 = bitcast i32 %131 to float - %1118 = bitcast i32 %131 to float - %1119 = fmul float %1117, %1118 - %1120 = fadd float %1116, %1119 - %1121 = call float @llvm.sqrt.f32.56(float %1120) - %1122 = bitcast i32 %102 to float - %1123 = fcmp olt float %1122, 0.000000e+00 - %1124 = sext i1 %1123 to i32 - %1125 = bitcast i32 %102 to float - %1126 = fcmp ogt float %1125, 0.000000e+00 - %1127 = zext i1 %1126 to i32 - %1128 = add nsw i32 %1124, %1127 - %1129 = sitofp i32 %1128 to float - %1130 = fneg float %1129 - %1131 = fmul float %1121, %1130 - %1132 = bitcast i32 %102 to float - %1133 = fadd float %1132, %1131 - %1134 = bitcast i32 %102 to float - %1135 = bitcast i32 %102 to float - %1136 = fmul float %1134, %1135 - %1137 = fadd float %1136, 0.000000e+00 - %1138 = bitcast i32 %131 to float - %1139 = bitcast i32 %131 to float - %1140 = fmul float %1138, %1139 - %1141 = fadd float %1137, %1140 - %1142 = call float @llvm.sqrt.f32.57(float %1141) - %1143 = bitcast i32 %102 to float - %1144 = fcmp olt float %1143, 0.000000e+00 - %1145 = sext i1 %1144 to i32 - %1146 = bitcast i32 %102 to float - %1147 = fcmp ogt float %1146, 0.000000e+00 - %1148 = zext i1 %1147 to i32 - %1149 = add nsw i32 %1145, %1148 - %1150 = sitofp i32 %1149 to float - %1151 = fneg float %1150 - %1152 = fmul float %1142, %1151 - %1153 = bitcast i32 %102 to float - %1154 = fadd float %1153, %1152 - %1155 = bitcast i32 %102 to float - %1156 = bitcast i32 %102 to float - %1157 = fmul float %1155, %1156 - %1158 = fadd float %1157, 0.000000e+00 - %1159 = bitcast i32 %131 to float - %1160 = bitcast i32 %131 to float - %1161 = fmul float %1159, %1160 - %1162 = fadd float %1158, %1161 - %1163 = call float @llvm.sqrt.f32.58(float %1162) - %1164 = bitcast i32 %102 to float - %1165 = fcmp olt float %1164, 0.000000e+00 - %1166 = sext i1 %1165 to i32 - %1167 = bitcast i32 %102 to float - %1168 = fcmp ogt float %1167, 0.000000e+00 - %1169 = zext i1 %1168 to i32 - %1170 = add nsw i32 %1166, %1169 - %1171 = sitofp i32 %1170 to float - %1172 = fneg float %1171 - %1173 = fmul float %1163, %1172 - %1174 = bitcast i32 %102 to float - %1175 = fadd float %1174, %1173 - %1176 = fmul float %1154, %1175 - %1177 = fadd float %1176, 0.000000e+00 - %1178 = bitcast i32 %102 to float - %1179 = bitcast i32 %102 to float - %1180 = fmul float %1178, %1179 - %1181 = fadd float %1180, 0.000000e+00 - %1182 = bitcast i32 %131 to float - %1183 = bitcast i32 %131 to float - %1184 = fmul float %1182, %1183 - %1185 = fadd float %1181, %1184 - %1186 = call float @llvm.sqrt.f32.59(float %1185) - %1187 = bitcast i32 %102 to float - %1188 = fcmp olt float %1187, 0.000000e+00 - %1189 = sext i1 %1188 to i32 - %1190 = bitcast i32 %102 to float - %1191 = fcmp ogt float %1190, 0.000000e+00 - %1192 = zext i1 %1191 to i32 - %1193 = add nsw i32 %1189, %1192 - %1194 = sitofp i32 %1193 to float - %1195 = fneg float %1194 - %1196 = fmul float %1186, %1195 - %1197 = fmul float %1196, 0.000000e+00 - %1198 = bitcast i32 %131 to float - %1199 = fadd float %1198, %1197 - %1200 = bitcast i32 %102 to float - %1201 = bitcast i32 %102 to float - %1202 = fmul float %1200, %1201 - %1203 = fadd float %1202, 0.000000e+00 - %1204 = bitcast i32 %131 to float - %1205 = bitcast i32 %131 to float - %1206 = fmul float %1204, %1205 - %1207 = fadd float %1203, %1206 - %1208 = call float @llvm.sqrt.f32.60(float %1207) - %1209 = bitcast i32 %102 to float - %1210 = fcmp olt float %1209, 0.000000e+00 - %1211 = sext i1 %1210 to i32 - %1212 = bitcast i32 %102 to float - %1213 = fcmp ogt float %1212, 0.000000e+00 - %1214 = zext i1 %1213 to i32 - %1215 = add nsw i32 %1211, %1214 - %1216 = sitofp i32 %1215 to float - %1217 = fneg float %1216 - %1218 = fmul float %1208, %1217 - %1219 = fmul float %1218, 0.000000e+00 - %1220 = bitcast i32 %131 to float - %1221 = fadd float %1220, %1219 - %1222 = fmul float %1199, %1221 - %1223 = fadd float %1177, %1222 - %1224 = call float @llvm.sqrt.f32.61(float %1223) - %1225 = fadd float %1224, 0.000000e+00 - %1226 = fdiv float %1133, %1225 - %1227 = fmul float %1226, 2.000000e+00 - %1228 = bitcast i32 %102 to float - %1229 = bitcast i32 %102 to float - %1230 = fmul float %1228, %1229 - %1231 = fadd float %1230, 0.000000e+00 - %1232 = bitcast i32 %131 to float - %1233 = bitcast i32 %131 to float - %1234 = fmul float %1232, %1233 - %1235 = fadd float %1231, %1234 - %1236 = call float @llvm.sqrt.f32.62(float %1235) - %1237 = bitcast i32 %102 to float - %1238 = fcmp olt float %1237, 0.000000e+00 - %1239 = sext i1 %1238 to i32 - %1240 = bitcast i32 %102 to float - %1241 = fcmp ogt float %1240, 0.000000e+00 - %1242 = zext i1 %1241 to i32 - %1243 = add nsw i32 %1239, %1242 - %1244 = sitofp i32 %1243 to float - %1245 = fneg float %1244 - %1246 = fmul float %1236, %1245 - %1247 = bitcast i32 %102 to float - %1248 = fadd float %1247, %1246 - %1249 = bitcast i32 %102 to float - %1250 = bitcast i32 %102 to float - %1251 = fmul float %1249, %1250 - %1252 = fadd float %1251, 0.000000e+00 - %1253 = bitcast i32 %131 to float - %1254 = bitcast i32 %131 to float - %1255 = fmul float %1253, %1254 - %1256 = fadd float %1252, %1255 - %1257 = call float @llvm.sqrt.f32.63(float %1256) - %1258 = bitcast i32 %102 to float - %1259 = fcmp olt float %1258, 0.000000e+00 - %1260 = sext i1 %1259 to i32 - %1261 = bitcast i32 %102 to float - %1262 = fcmp ogt float %1261, 0.000000e+00 - %1263 = zext i1 %1262 to i32 - %1264 = add nsw i32 %1260, %1263 - %1265 = sitofp i32 %1264 to float - %1266 = fneg float %1265 - %1267 = fmul float %1257, %1266 - %1268 = bitcast i32 %102 to float - %1269 = fadd float %1268, %1267 - %1270 = bitcast i32 %102 to float - %1271 = bitcast i32 %102 to float - %1272 = fmul float %1270, %1271 - %1273 = fadd float %1272, 0.000000e+00 - %1274 = bitcast i32 %131 to float - %1275 = bitcast i32 %131 to float - %1276 = fmul float %1274, %1275 - %1277 = fadd float %1273, %1276 - %1278 = call float @llvm.sqrt.f32.64(float %1277) - %1279 = bitcast i32 %102 to float - %1280 = fcmp olt float %1279, 0.000000e+00 - %1281 = sext i1 %1280 to i32 - %1282 = bitcast i32 %102 to float - %1283 = fcmp ogt float %1282, 0.000000e+00 - %1284 = zext i1 %1283 to i32 - %1285 = add nsw i32 %1281, %1284 - %1286 = sitofp i32 %1285 to float - %1287 = fneg float %1286 - %1288 = fmul float %1278, %1287 - %1289 = bitcast i32 %102 to float - %1290 = fadd float %1289, %1288 - %1291 = fmul float %1269, %1290 - %1292 = fadd float %1291, 0.000000e+00 - %1293 = bitcast i32 %102 to float - %1294 = bitcast i32 %102 to float - %1295 = fmul float %1293, %1294 - %1296 = fadd float %1295, 0.000000e+00 - %1297 = bitcast i32 %131 to float - %1298 = bitcast i32 %131 to float - %1299 = fmul float %1297, %1298 - %1300 = fadd float %1296, %1299 - %1301 = call float @llvm.sqrt.f32.65(float %1300) - %1302 = bitcast i32 %102 to float - %1303 = fcmp olt float %1302, 0.000000e+00 - %1304 = sext i1 %1303 to i32 - %1305 = bitcast i32 %102 to float - %1306 = fcmp ogt float %1305, 0.000000e+00 - %1307 = zext i1 %1306 to i32 - %1308 = add nsw i32 %1304, %1307 - %1309 = sitofp i32 %1308 to float - %1310 = fneg float %1309 - %1311 = fmul float %1301, %1310 - %1312 = fmul float %1311, 0.000000e+00 - %1313 = bitcast i32 %131 to float - %1314 = fadd float %1313, %1312 - %1315 = bitcast i32 %102 to float - %1316 = bitcast i32 %102 to float - %1317 = fmul float %1315, %1316 - %1318 = fadd float %1317, 0.000000e+00 - %1319 = bitcast i32 %131 to float - %1320 = bitcast i32 %131 to float - %1321 = fmul float %1319, %1320 - %1322 = fadd float %1318, %1321 - %1323 = call float @llvm.sqrt.f32.66(float %1322) - %1324 = bitcast i32 %102 to float - %1325 = fcmp olt float %1324, 0.000000e+00 - %1326 = sext i1 %1325 to i32 - %1327 = bitcast i32 %102 to float - %1328 = fcmp ogt float %1327, 0.000000e+00 - %1329 = zext i1 %1328 to i32 - %1330 = add nsw i32 %1326, %1329 - %1331 = sitofp i32 %1330 to float - %1332 = fneg float %1331 - %1333 = fmul float %1323, %1332 - %1334 = fmul float %1333, 0.000000e+00 - %1335 = bitcast i32 %131 to float - %1336 = fadd float %1335, %1334 - %1337 = fmul float %1314, %1336 - %1338 = fadd float %1292, %1337 - %1339 = call float @llvm.sqrt.f32.67(float %1338) - %1340 = fadd float %1339, 0.000000e+00 - %1341 = fdiv float %1248, %1340 - %1342 = fmul float %1227, %1341 - %1343 = fsub float 1.000000e+00, %1342 - %1344 = insertelement <4 x float> zeroinitializer, float %1343, i32 0 - %1345 = insertelement <4 x float> %1344, float 0.000000e+00, i32 1 - %1346 = insertelement <4 x float> %1345, float 0.000000e+00, i32 2 - %1347 = insertelement <4 x float> %1346, float 0.000000e+00, i32 3 - %1348 = getelementptr float, float* %0, i32 0 - %1349 = load float, float* %1348, align 4 - %1350 = insertelement <4 x float> zeroinitializer, float %1349, i32 0 - %1351 = insertelement <4 x float> %1350, float 0.000000e+00, i32 1 - %1352 = insertelement <4 x float> %1351, float 0.000000e+00, i32 2 - %1353 = insertelement <4 x float> %1352, float 0.000000e+00, i32 3 - %1354 = call <4 x float> @llvm.fma.f32.68(<4 x float> %1347, <4 x float> %1353, <4 x float> zeroinitializer) - %1355 = extractelement <4 x float> %1354, i32 0 - store float %1355, float* %2, align 4 - %1356 = bitcast i32 %102 to float - %1357 = bitcast i32 %102 to float - %1358 = fmul float %1356, %1357 - %1359 = fadd float %1358, 0.000000e+00 - %1360 = bitcast i32 %131 to float - %1361 = bitcast i32 %131 to float - %1362 = fmul float %1360, %1361 - %1363 = fadd float %1359, %1362 - %1364 = call float @llvm.sqrt.f32.69(float %1363) - %1365 = bitcast i32 %102 to float - %1366 = fcmp olt float %1365, 0.000000e+00 - %1367 = sext i1 %1366 to i32 - %1368 = bitcast i32 %102 to float - %1369 = fcmp ogt float %1368, 0.000000e+00 - %1370 = zext i1 %1369 to i32 - %1371 = add nsw i32 %1367, %1370 - %1372 = sitofp i32 %1371 to float - %1373 = fneg float %1372 - %1374 = fmul float %1364, %1373 - %1375 = bitcast i32 %102 to float - %1376 = fadd float %1375, %1374 - %1377 = bitcast i32 %102 to float - %1378 = bitcast i32 %102 to float - %1379 = fmul float %1377, %1378 - %1380 = fadd float %1379, 0.000000e+00 - %1381 = bitcast i32 %131 to float - %1382 = bitcast i32 %131 to float - %1383 = fmul float %1381, %1382 - %1384 = fadd float %1380, %1383 - %1385 = call float @llvm.sqrt.f32.70(float %1384) - %1386 = bitcast i32 %102 to float - %1387 = fcmp olt float %1386, 0.000000e+00 - %1388 = sext i1 %1387 to i32 - %1389 = bitcast i32 %102 to float - %1390 = fcmp ogt float %1389, 0.000000e+00 - %1391 = zext i1 %1390 to i32 - %1392 = add nsw i32 %1388, %1391 - %1393 = sitofp i32 %1392 to float - %1394 = fneg float %1393 - %1395 = fmul float %1385, %1394 - %1396 = bitcast i32 %102 to float - %1397 = fadd float %1396, %1395 - %1398 = bitcast i32 %102 to float - %1399 = bitcast i32 %102 to float - %1400 = fmul float %1398, %1399 - %1401 = fadd float %1400, 0.000000e+00 - %1402 = bitcast i32 %131 to float - %1403 = bitcast i32 %131 to float - %1404 = fmul float %1402, %1403 - %1405 = fadd float %1401, %1404 - %1406 = call float @llvm.sqrt.f32.71(float %1405) - %1407 = bitcast i32 %102 to float - %1408 = fcmp olt float %1407, 0.000000e+00 - %1409 = sext i1 %1408 to i32 - %1410 = bitcast i32 %102 to float - %1411 = fcmp ogt float %1410, 0.000000e+00 - %1412 = zext i1 %1411 to i32 - %1413 = add nsw i32 %1409, %1412 - %1414 = sitofp i32 %1413 to float - %1415 = fneg float %1414 - %1416 = fmul float %1406, %1415 - %1417 = bitcast i32 %102 to float - %1418 = fadd float %1417, %1416 - %1419 = fmul float %1397, %1418 - %1420 = fadd float %1419, 0.000000e+00 - %1421 = bitcast i32 %102 to float - %1422 = bitcast i32 %102 to float - %1423 = fmul float %1421, %1422 - %1424 = fadd float %1423, 0.000000e+00 - %1425 = bitcast i32 %131 to float - %1426 = bitcast i32 %131 to float - %1427 = fmul float %1425, %1426 - %1428 = fadd float %1424, %1427 - %1429 = call float @llvm.sqrt.f32.72(float %1428) - %1430 = bitcast i32 %102 to float - %1431 = fcmp olt float %1430, 0.000000e+00 - %1432 = sext i1 %1431 to i32 - %1433 = bitcast i32 %102 to float - %1434 = fcmp ogt float %1433, 0.000000e+00 - %1435 = zext i1 %1434 to i32 - %1436 = add nsw i32 %1432, %1435 - %1437 = sitofp i32 %1436 to float - %1438 = fneg float %1437 - %1439 = fmul float %1429, %1438 - %1440 = fmul float %1439, 0.000000e+00 - %1441 = bitcast i32 %131 to float - %1442 = fadd float %1441, %1440 - %1443 = bitcast i32 %102 to float - %1444 = bitcast i32 %102 to float - %1445 = fmul float %1443, %1444 - %1446 = fadd float %1445, 0.000000e+00 - %1447 = bitcast i32 %131 to float - %1448 = bitcast i32 %131 to float - %1449 = fmul float %1447, %1448 - %1450 = fadd float %1446, %1449 - %1451 = call float @llvm.sqrt.f32.73(float %1450) - %1452 = bitcast i32 %102 to float - %1453 = fcmp olt float %1452, 0.000000e+00 - %1454 = sext i1 %1453 to i32 - %1455 = bitcast i32 %102 to float - %1456 = fcmp ogt float %1455, 0.000000e+00 - %1457 = zext i1 %1456 to i32 - %1458 = add nsw i32 %1454, %1457 - %1459 = sitofp i32 %1458 to float - %1460 = fneg float %1459 - %1461 = fmul float %1451, %1460 - %1462 = fmul float %1461, 0.000000e+00 - %1463 = bitcast i32 %131 to float - %1464 = fadd float %1463, %1462 - %1465 = fmul float %1442, %1464 - %1466 = fadd float %1420, %1465 - %1467 = call float @llvm.sqrt.f32.74(float %1466) - %1468 = fadd float %1467, 0.000000e+00 - %1469 = fdiv float %1376, %1468 - %1470 = fmul float %1469, 2.000000e+00 - %1471 = bitcast i32 %102 to float - %1472 = bitcast i32 %102 to float - %1473 = fmul float %1471, %1472 - %1474 = fadd float %1473, 0.000000e+00 - %1475 = bitcast i32 %131 to float - %1476 = bitcast i32 %131 to float - %1477 = fmul float %1475, %1476 - %1478 = fadd float %1474, %1477 - %1479 = call float @llvm.sqrt.f32.75(float %1478) - %1480 = bitcast i32 %102 to float - %1481 = fcmp olt float %1480, 0.000000e+00 - %1482 = sext i1 %1481 to i32 - %1483 = bitcast i32 %102 to float - %1484 = fcmp ogt float %1483, 0.000000e+00 - %1485 = zext i1 %1484 to i32 - %1486 = add nsw i32 %1482, %1485 - %1487 = sitofp i32 %1486 to float - %1488 = fneg float %1487 - %1489 = fmul float %1479, %1488 - %1490 = bitcast i32 %102 to float - %1491 = fadd float %1490, %1489 - %1492 = bitcast i32 %102 to float - %1493 = bitcast i32 %102 to float - %1494 = fmul float %1492, %1493 - %1495 = fadd float %1494, 0.000000e+00 - %1496 = bitcast i32 %131 to float - %1497 = bitcast i32 %131 to float - %1498 = fmul float %1496, %1497 - %1499 = fadd float %1495, %1498 - %1500 = call float @llvm.sqrt.f32.76(float %1499) - %1501 = bitcast i32 %102 to float - %1502 = fcmp olt float %1501, 0.000000e+00 - %1503 = sext i1 %1502 to i32 - %1504 = bitcast i32 %102 to float - %1505 = fcmp ogt float %1504, 0.000000e+00 - %1506 = zext i1 %1505 to i32 - %1507 = add nsw i32 %1503, %1506 - %1508 = sitofp i32 %1507 to float - %1509 = fneg float %1508 - %1510 = fmul float %1500, %1509 - %1511 = bitcast i32 %102 to float - %1512 = fadd float %1511, %1510 - %1513 = bitcast i32 %102 to float - %1514 = bitcast i32 %102 to float - %1515 = fmul float %1513, %1514 - %1516 = fadd float %1515, 0.000000e+00 - %1517 = bitcast i32 %131 to float - %1518 = bitcast i32 %131 to float - %1519 = fmul float %1517, %1518 - %1520 = fadd float %1516, %1519 - %1521 = call float @llvm.sqrt.f32.77(float %1520) - %1522 = bitcast i32 %102 to float - %1523 = fcmp olt float %1522, 0.000000e+00 - %1524 = sext i1 %1523 to i32 - %1525 = bitcast i32 %102 to float - %1526 = fcmp ogt float %1525, 0.000000e+00 - %1527 = zext i1 %1526 to i32 - %1528 = add nsw i32 %1524, %1527 - %1529 = sitofp i32 %1528 to float - %1530 = fneg float %1529 - %1531 = fmul float %1521, %1530 - %1532 = bitcast i32 %102 to float - %1533 = fadd float %1532, %1531 - %1534 = fmul float %1512, %1533 - %1535 = fadd float %1534, 0.000000e+00 - %1536 = bitcast i32 %102 to float - %1537 = bitcast i32 %102 to float - %1538 = fmul float %1536, %1537 - %1539 = fadd float %1538, 0.000000e+00 - %1540 = bitcast i32 %131 to float - %1541 = bitcast i32 %131 to float - %1542 = fmul float %1540, %1541 - %1543 = fadd float %1539, %1542 - %1544 = call float @llvm.sqrt.f32.78(float %1543) - %1545 = bitcast i32 %102 to float - %1546 = fcmp olt float %1545, 0.000000e+00 - %1547 = sext i1 %1546 to i32 - %1548 = bitcast i32 %102 to float - %1549 = fcmp ogt float %1548, 0.000000e+00 - %1550 = zext i1 %1549 to i32 - %1551 = add nsw i32 %1547, %1550 - %1552 = sitofp i32 %1551 to float - %1553 = fneg float %1552 - %1554 = fmul float %1544, %1553 - %1555 = fmul float %1554, 0.000000e+00 - %1556 = bitcast i32 %131 to float - %1557 = fadd float %1556, %1555 - %1558 = bitcast i32 %102 to float - %1559 = bitcast i32 %102 to float - %1560 = fmul float %1558, %1559 - %1561 = fadd float %1560, 0.000000e+00 - %1562 = bitcast i32 %131 to float - %1563 = bitcast i32 %131 to float - %1564 = fmul float %1562, %1563 - %1565 = fadd float %1561, %1564 - %1566 = call float @llvm.sqrt.f32.79(float %1565) - %1567 = bitcast i32 %102 to float - %1568 = fcmp olt float %1567, 0.000000e+00 - %1569 = sext i1 %1568 to i32 - %1570 = bitcast i32 %102 to float - %1571 = fcmp ogt float %1570, 0.000000e+00 - %1572 = zext i1 %1571 to i32 - %1573 = add nsw i32 %1569, %1572 - %1574 = sitofp i32 %1573 to float - %1575 = fneg float %1574 - %1576 = fmul float %1566, %1575 - %1577 = fmul float %1576, 0.000000e+00 - %1578 = bitcast i32 %131 to float - %1579 = fadd float %1578, %1577 - %1580 = fmul float %1557, %1579 - %1581 = fadd float %1535, %1580 - %1582 = call float @llvm.sqrt.f32.80(float %1581) - %1583 = fadd float %1582, 0.000000e+00 - %1584 = fdiv float %1491, %1583 - %1585 = fmul float %1470, %1584 - %1586 = fsub float 1.000000e+00, %1585 - %1587 = fmul float %1586, %1349 - %1588 = fadd float %1587, 0.000000e+00 - %1589 = bitcast i32 %102 to float - %1590 = bitcast i32 %102 to float - %1591 = fmul float %1589, %1590 - %1592 = fadd float %1591, 0.000000e+00 - %1593 = bitcast i32 %131 to float - %1594 = bitcast i32 %131 to float - %1595 = fmul float %1593, %1594 - %1596 = fadd float %1592, %1595 - %1597 = call float @llvm.sqrt.f32.81(float %1596) - %1598 = bitcast i32 %102 to float - %1599 = fcmp olt float %1598, 0.000000e+00 - %1600 = sext i1 %1599 to i32 - %1601 = bitcast i32 %102 to float - %1602 = fcmp ogt float %1601, 0.000000e+00 - %1603 = zext i1 %1602 to i32 - %1604 = add nsw i32 %1600, %1603 - %1605 = sitofp i32 %1604 to float - %1606 = fneg float %1605 - %1607 = fmul float %1597, %1606 - %1608 = bitcast i32 %102 to float - %1609 = fadd float %1608, %1607 - %1610 = bitcast i32 %102 to float - %1611 = bitcast i32 %102 to float - %1612 = fmul float %1610, %1611 - %1613 = fadd float %1612, 0.000000e+00 - %1614 = bitcast i32 %131 to float - %1615 = bitcast i32 %131 to float - %1616 = fmul float %1614, %1615 - %1617 = fadd float %1613, %1616 - %1618 = call float @llvm.sqrt.f32.82(float %1617) - %1619 = bitcast i32 %102 to float - %1620 = fcmp olt float %1619, 0.000000e+00 - %1621 = sext i1 %1620 to i32 - %1622 = bitcast i32 %102 to float - %1623 = fcmp ogt float %1622, 0.000000e+00 - %1624 = zext i1 %1623 to i32 - %1625 = add nsw i32 %1621, %1624 - %1626 = sitofp i32 %1625 to float - %1627 = fneg float %1626 - %1628 = fmul float %1618, %1627 - %1629 = bitcast i32 %102 to float - %1630 = fadd float %1629, %1628 - %1631 = bitcast i32 %102 to float - %1632 = bitcast i32 %102 to float - %1633 = fmul float %1631, %1632 - %1634 = fadd float %1633, 0.000000e+00 - %1635 = bitcast i32 %131 to float - %1636 = bitcast i32 %131 to float - %1637 = fmul float %1635, %1636 - %1638 = fadd float %1634, %1637 - %1639 = call float @llvm.sqrt.f32.83(float %1638) - %1640 = bitcast i32 %102 to float - %1641 = fcmp olt float %1640, 0.000000e+00 - %1642 = sext i1 %1641 to i32 - %1643 = bitcast i32 %102 to float - %1644 = fcmp ogt float %1643, 0.000000e+00 - %1645 = zext i1 %1644 to i32 - %1646 = add nsw i32 %1642, %1645 - %1647 = sitofp i32 %1646 to float - %1648 = fneg float %1647 - %1649 = fmul float %1639, %1648 - %1650 = bitcast i32 %102 to float - %1651 = fadd float %1650, %1649 - %1652 = fmul float %1630, %1651 - %1653 = fadd float %1652, 0.000000e+00 - %1654 = bitcast i32 %102 to float - %1655 = bitcast i32 %102 to float - %1656 = fmul float %1654, %1655 - %1657 = fadd float %1656, 0.000000e+00 - %1658 = bitcast i32 %131 to float - %1659 = bitcast i32 %131 to float - %1660 = fmul float %1658, %1659 - %1661 = fadd float %1657, %1660 - %1662 = call float @llvm.sqrt.f32.84(float %1661) - %1663 = bitcast i32 %102 to float - %1664 = fcmp olt float %1663, 0.000000e+00 - %1665 = sext i1 %1664 to i32 - %1666 = bitcast i32 %102 to float - %1667 = fcmp ogt float %1666, 0.000000e+00 - %1668 = zext i1 %1667 to i32 - %1669 = add nsw i32 %1665, %1668 - %1670 = sitofp i32 %1669 to float - %1671 = fneg float %1670 - %1672 = fmul float %1662, %1671 - %1673 = fmul float %1672, 0.000000e+00 - %1674 = bitcast i32 %131 to float - %1675 = fadd float %1674, %1673 - %1676 = bitcast i32 %102 to float - %1677 = bitcast i32 %102 to float - %1678 = fmul float %1676, %1677 - %1679 = fadd float %1678, 0.000000e+00 - %1680 = bitcast i32 %131 to float - %1681 = bitcast i32 %131 to float - %1682 = fmul float %1680, %1681 - %1683 = fadd float %1679, %1682 - %1684 = call float @llvm.sqrt.f32.85(float %1683) - %1685 = bitcast i32 %102 to float - %1686 = fcmp olt float %1685, 0.000000e+00 - %1687 = sext i1 %1686 to i32 - %1688 = bitcast i32 %102 to float - %1689 = fcmp ogt float %1688, 0.000000e+00 - %1690 = zext i1 %1689 to i32 - %1691 = add nsw i32 %1687, %1690 - %1692 = sitofp i32 %1691 to float - %1693 = fneg float %1692 - %1694 = fmul float %1684, %1693 - %1695 = fmul float %1694, 0.000000e+00 - %1696 = bitcast i32 %131 to float - %1697 = fadd float %1696, %1695 - %1698 = fmul float %1675, %1697 - %1699 = fadd float %1653, %1698 - %1700 = call float @llvm.sqrt.f32.86(float %1699) - %1701 = fadd float %1700, 0.000000e+00 - %1702 = fdiv float %1609, %1701 - %1703 = fmul float %1702, 2.000000e+00 - %1704 = bitcast i32 %102 to float - %1705 = bitcast i32 %102 to float - %1706 = fmul float %1704, %1705 - %1707 = fadd float %1706, 0.000000e+00 - %1708 = bitcast i32 %131 to float - %1709 = bitcast i32 %131 to float - %1710 = fmul float %1708, %1709 - %1711 = fadd float %1707, %1710 - %1712 = call float @llvm.sqrt.f32.87(float %1711) - %1713 = bitcast i32 %102 to float - %1714 = fcmp olt float %1713, 0.000000e+00 - %1715 = sext i1 %1714 to i32 - %1716 = bitcast i32 %102 to float - %1717 = fcmp ogt float %1716, 0.000000e+00 - %1718 = zext i1 %1717 to i32 - %1719 = add nsw i32 %1715, %1718 - %1720 = sitofp i32 %1719 to float - %1721 = fneg float %1720 - %1722 = fmul float %1712, %1721 - %1723 = fmul float %1722, 0.000000e+00 - %1724 = bitcast i32 %131 to float - %1725 = fadd float %1724, %1723 - %1726 = bitcast i32 %102 to float - %1727 = bitcast i32 %102 to float - %1728 = fmul float %1726, %1727 - %1729 = fadd float %1728, 0.000000e+00 - %1730 = bitcast i32 %131 to float - %1731 = bitcast i32 %131 to float - %1732 = fmul float %1730, %1731 - %1733 = fadd float %1729, %1732 - %1734 = call float @llvm.sqrt.f32.88(float %1733) - %1735 = bitcast i32 %102 to float - %1736 = fcmp olt float %1735, 0.000000e+00 - %1737 = sext i1 %1736 to i32 - %1738 = bitcast i32 %102 to float - %1739 = fcmp ogt float %1738, 0.000000e+00 - %1740 = zext i1 %1739 to i32 - %1741 = add nsw i32 %1737, %1740 - %1742 = sitofp i32 %1741 to float - %1743 = fneg float %1742 - %1744 = fmul float %1734, %1743 - %1745 = bitcast i32 %102 to float - %1746 = fadd float %1745, %1744 - %1747 = bitcast i32 %102 to float - %1748 = bitcast i32 %102 to float - %1749 = fmul float %1747, %1748 - %1750 = fadd float %1749, 0.000000e+00 - %1751 = bitcast i32 %131 to float - %1752 = bitcast i32 %131 to float - %1753 = fmul float %1751, %1752 - %1754 = fadd float %1750, %1753 - %1755 = call float @llvm.sqrt.f32.89(float %1754) - %1756 = bitcast i32 %102 to float - %1757 = fcmp olt float %1756, 0.000000e+00 - %1758 = sext i1 %1757 to i32 - %1759 = bitcast i32 %102 to float - %1760 = fcmp ogt float %1759, 0.000000e+00 - %1761 = zext i1 %1760 to i32 - %1762 = add nsw i32 %1758, %1761 - %1763 = sitofp i32 %1762 to float - %1764 = fneg float %1763 - %1765 = fmul float %1755, %1764 - %1766 = bitcast i32 %102 to float - %1767 = fadd float %1766, %1765 - %1768 = fmul float %1746, %1767 - %1769 = fadd float %1768, 0.000000e+00 - %1770 = bitcast i32 %102 to float - %1771 = bitcast i32 %102 to float - %1772 = fmul float %1770, %1771 - %1773 = fadd float %1772, 0.000000e+00 - %1774 = bitcast i32 %131 to float - %1775 = bitcast i32 %131 to float - %1776 = fmul float %1774, %1775 - %1777 = fadd float %1773, %1776 - %1778 = call float @llvm.sqrt.f32.90(float %1777) - %1779 = bitcast i32 %102 to float - %1780 = fcmp olt float %1779, 0.000000e+00 - %1781 = sext i1 %1780 to i32 - %1782 = bitcast i32 %102 to float - %1783 = fcmp ogt float %1782, 0.000000e+00 - %1784 = zext i1 %1783 to i32 - %1785 = add nsw i32 %1781, %1784 - %1786 = sitofp i32 %1785 to float - %1787 = fneg float %1786 - %1788 = fmul float %1778, %1787 - %1789 = fmul float %1788, 0.000000e+00 - %1790 = bitcast i32 %131 to float - %1791 = fadd float %1790, %1789 - %1792 = bitcast i32 %102 to float - %1793 = bitcast i32 %102 to float - %1794 = fmul float %1792, %1793 - %1795 = fadd float %1794, 0.000000e+00 - %1796 = bitcast i32 %131 to float - %1797 = bitcast i32 %131 to float - %1798 = fmul float %1796, %1797 - %1799 = fadd float %1795, %1798 - %1800 = call float @llvm.sqrt.f32.91(float %1799) - %1801 = bitcast i32 %102 to float - %1802 = fcmp olt float %1801, 0.000000e+00 - %1803 = sext i1 %1802 to i32 - %1804 = bitcast i32 %102 to float - %1805 = fcmp ogt float %1804, 0.000000e+00 - %1806 = zext i1 %1805 to i32 - %1807 = add nsw i32 %1803, %1806 - %1808 = sitofp i32 %1807 to float - %1809 = fneg float %1808 - %1810 = fmul float %1800, %1809 - %1811 = fmul float %1810, 0.000000e+00 - %1812 = bitcast i32 %131 to float - %1813 = fadd float %1812, %1811 - %1814 = fmul float %1791, %1813 - %1815 = fadd float %1769, %1814 - %1816 = call float @llvm.sqrt.f32.92(float %1815) - %1817 = fadd float %1816, 0.000000e+00 - %1818 = fdiv float %1725, %1817 - %1819 = fmul float %1703, %1818 - %1820 = fneg float %1819 - %1821 = getelementptr float, float* %0, i32 0 - %1822 = getelementptr inbounds float, float* %1821, i64 2 - %1823 = load float, float* %1822, align 4 - %1824 = fmul float %1820, %1823 - %1825 = fadd float %1588, %1824 - %1826 = insertelement <4 x float> zeroinitializer, float %1825, i32 0 - %1827 = insertelement <4 x float> %1826, float 0.000000e+00, i32 1 - %1828 = insertelement <4 x float> %1827, float 0.000000e+00, i32 2 - %1829 = insertelement <4 x float> %1828, float 0.000000e+00, i32 3 - %1830 = extractelement <4 x float> %1829, i32 0 - store float %1830, float* %2, align 4 - %1831 = extractelement <4 x float> %1829, i32 1 - %1832 = getelementptr float, float* %2, i32 0 - %1833 = getelementptr inbounds float, float* %1832, i64 1 - store float %1831, float* %1833, align 4 - %1834 = bitcast i32 %102 to float - %1835 = bitcast i32 %102 to float - %1836 = fmul float %1834, %1835 - %1837 = fadd float %1836, 0.000000e+00 - %1838 = bitcast i32 %131 to float - %1839 = bitcast i32 %131 to float - %1840 = fmul float %1838, %1839 - %1841 = fadd float %1837, %1840 - %1842 = call float @llvm.sqrt.f32.93(float %1841) - %1843 = bitcast i32 %102 to float - %1844 = fcmp olt float %1843, 0.000000e+00 - %1845 = sext i1 %1844 to i32 - %1846 = bitcast i32 %102 to float - %1847 = fcmp ogt float %1846, 0.000000e+00 - %1848 = zext i1 %1847 to i32 - %1849 = add nsw i32 %1845, %1848 - %1850 = sitofp i32 %1849 to float - %1851 = fneg float %1850 - %1852 = fmul float %1842, %1851 - %1853 = bitcast i32 %102 to float - %1854 = fadd float %1853, %1852 - %1855 = bitcast i32 %102 to float - %1856 = bitcast i32 %102 to float - %1857 = fmul float %1855, %1856 - %1858 = fadd float %1857, 0.000000e+00 - %1859 = bitcast i32 %131 to float - %1860 = bitcast i32 %131 to float - %1861 = fmul float %1859, %1860 - %1862 = fadd float %1858, %1861 - %1863 = call float @llvm.sqrt.f32.94(float %1862) - %1864 = bitcast i32 %102 to float - %1865 = fcmp olt float %1864, 0.000000e+00 - %1866 = sext i1 %1865 to i32 - %1867 = bitcast i32 %102 to float - %1868 = fcmp ogt float %1867, 0.000000e+00 - %1869 = zext i1 %1868 to i32 - %1870 = add nsw i32 %1866, %1869 - %1871 = sitofp i32 %1870 to float - %1872 = fneg float %1871 - %1873 = fmul float %1863, %1872 - %1874 = bitcast i32 %102 to float - %1875 = fadd float %1874, %1873 - %1876 = bitcast i32 %102 to float - %1877 = bitcast i32 %102 to float - %1878 = fmul float %1876, %1877 - %1879 = fadd float %1878, 0.000000e+00 - %1880 = bitcast i32 %131 to float - %1881 = bitcast i32 %131 to float - %1882 = fmul float %1880, %1881 - %1883 = fadd float %1879, %1882 - %1884 = call float @llvm.sqrt.f32.95(float %1883) - %1885 = bitcast i32 %102 to float - %1886 = fcmp olt float %1885, 0.000000e+00 - %1887 = sext i1 %1886 to i32 - %1888 = bitcast i32 %102 to float - %1889 = fcmp ogt float %1888, 0.000000e+00 - %1890 = zext i1 %1889 to i32 - %1891 = add nsw i32 %1887, %1890 - %1892 = sitofp i32 %1891 to float - %1893 = fneg float %1892 - %1894 = fmul float %1884, %1893 - %1895 = bitcast i32 %102 to float - %1896 = fadd float %1895, %1894 - %1897 = fmul float %1875, %1896 - %1898 = fadd float %1897, 0.000000e+00 - %1899 = bitcast i32 %102 to float - %1900 = bitcast i32 %102 to float - %1901 = fmul float %1899, %1900 - %1902 = fadd float %1901, 0.000000e+00 - %1903 = bitcast i32 %131 to float - %1904 = bitcast i32 %131 to float - %1905 = fmul float %1903, %1904 - %1906 = fadd float %1902, %1905 - %1907 = call float @llvm.sqrt.f32.96(float %1906) - %1908 = bitcast i32 %102 to float - %1909 = fcmp olt float %1908, 0.000000e+00 - %1910 = sext i1 %1909 to i32 - %1911 = bitcast i32 %102 to float - %1912 = fcmp ogt float %1911, 0.000000e+00 - %1913 = zext i1 %1912 to i32 - %1914 = add nsw i32 %1910, %1913 - %1915 = sitofp i32 %1914 to float - %1916 = fneg float %1915 - %1917 = fmul float %1907, %1916 - %1918 = fmul float %1917, 0.000000e+00 - %1919 = bitcast i32 %131 to float - %1920 = fadd float %1919, %1918 - %1921 = bitcast i32 %102 to float - %1922 = bitcast i32 %102 to float - %1923 = fmul float %1921, %1922 - %1924 = fadd float %1923, 0.000000e+00 - %1925 = bitcast i32 %131 to float - %1926 = bitcast i32 %131 to float - %1927 = fmul float %1925, %1926 - %1928 = fadd float %1924, %1927 - %1929 = call float @llvm.sqrt.f32.97(float %1928) - %1930 = bitcast i32 %102 to float - %1931 = fcmp olt float %1930, 0.000000e+00 - %1932 = sext i1 %1931 to i32 - %1933 = bitcast i32 %102 to float - %1934 = fcmp ogt float %1933, 0.000000e+00 - %1935 = zext i1 %1934 to i32 - %1936 = add nsw i32 %1932, %1935 - %1937 = sitofp i32 %1936 to float - %1938 = fneg float %1937 - %1939 = fmul float %1929, %1938 - %1940 = fmul float %1939, 0.000000e+00 - %1941 = bitcast i32 %131 to float - %1942 = fadd float %1941, %1940 - %1943 = fmul float %1920, %1942 - %1944 = fadd float %1898, %1943 - %1945 = call float @llvm.sqrt.f32.98(float %1944) - %1946 = fadd float %1945, 0.000000e+00 - %1947 = fdiv float %1854, %1946 - %1948 = fmul float %1947, 2.000000e+00 - %1949 = bitcast i32 %102 to float - %1950 = bitcast i32 %102 to float - %1951 = fmul float %1949, %1950 - %1952 = fadd float %1951, 0.000000e+00 - %1953 = bitcast i32 %131 to float - %1954 = bitcast i32 %131 to float - %1955 = fmul float %1953, %1954 - %1956 = fadd float %1952, %1955 - %1957 = call float @llvm.sqrt.f32.99(float %1956) - %1958 = bitcast i32 %102 to float - %1959 = fcmp olt float %1958, 0.000000e+00 - %1960 = sext i1 %1959 to i32 - %1961 = bitcast i32 %102 to float - %1962 = fcmp ogt float %1961, 0.000000e+00 - %1963 = zext i1 %1962 to i32 - %1964 = add nsw i32 %1960, %1963 - %1965 = sitofp i32 %1964 to float - %1966 = fneg float %1965 - %1967 = fmul float %1957, %1966 - %1968 = bitcast i32 %102 to float - %1969 = fadd float %1968, %1967 - %1970 = bitcast i32 %102 to float - %1971 = bitcast i32 %102 to float - %1972 = fmul float %1970, %1971 - %1973 = fadd float %1972, 0.000000e+00 - %1974 = bitcast i32 %131 to float - %1975 = bitcast i32 %131 to float - %1976 = fmul float %1974, %1975 - %1977 = fadd float %1973, %1976 - %1978 = call float @llvm.sqrt.f32.100(float %1977) - %1979 = bitcast i32 %102 to float - %1980 = fcmp olt float %1979, 0.000000e+00 - %1981 = sext i1 %1980 to i32 - %1982 = bitcast i32 %102 to float - %1983 = fcmp ogt float %1982, 0.000000e+00 - %1984 = zext i1 %1983 to i32 - %1985 = add nsw i32 %1981, %1984 - %1986 = sitofp i32 %1985 to float - %1987 = fneg float %1986 - %1988 = fmul float %1978, %1987 - %1989 = bitcast i32 %102 to float - %1990 = fadd float %1989, %1988 - %1991 = bitcast i32 %102 to float - %1992 = bitcast i32 %102 to float - %1993 = fmul float %1991, %1992 - %1994 = fadd float %1993, 0.000000e+00 - %1995 = bitcast i32 %131 to float - %1996 = bitcast i32 %131 to float - %1997 = fmul float %1995, %1996 - %1998 = fadd float %1994, %1997 - %1999 = call float @llvm.sqrt.f32.101(float %1998) - %2000 = bitcast i32 %102 to float - %2001 = fcmp olt float %2000, 0.000000e+00 - %2002 = sext i1 %2001 to i32 - %2003 = bitcast i32 %102 to float - %2004 = fcmp ogt float %2003, 0.000000e+00 - %2005 = zext i1 %2004 to i32 - %2006 = add nsw i32 %2002, %2005 - %2007 = sitofp i32 %2006 to float - %2008 = fneg float %2007 - %2009 = fmul float %1999, %2008 - %2010 = bitcast i32 %102 to float - %2011 = fadd float %2010, %2009 - %2012 = fmul float %1990, %2011 - %2013 = fadd float %2012, 0.000000e+00 - %2014 = bitcast i32 %102 to float - %2015 = bitcast i32 %102 to float - %2016 = fmul float %2014, %2015 - %2017 = fadd float %2016, 0.000000e+00 - %2018 = bitcast i32 %131 to float - %2019 = bitcast i32 %131 to float - %2020 = fmul float %2018, %2019 - %2021 = fadd float %2017, %2020 - %2022 = call float @llvm.sqrt.f32.102(float %2021) - %2023 = bitcast i32 %102 to float - %2024 = fcmp olt float %2023, 0.000000e+00 - %2025 = sext i1 %2024 to i32 - %2026 = bitcast i32 %102 to float - %2027 = fcmp ogt float %2026, 0.000000e+00 - %2028 = zext i1 %2027 to i32 - %2029 = add nsw i32 %2025, %2028 - %2030 = sitofp i32 %2029 to float - %2031 = fneg float %2030 - %2032 = fmul float %2022, %2031 - %2033 = fmul float %2032, 0.000000e+00 - %2034 = bitcast i32 %131 to float - %2035 = fadd float %2034, %2033 - %2036 = bitcast i32 %102 to float - %2037 = bitcast i32 %102 to float - %2038 = fmul float %2036, %2037 - %2039 = fadd float %2038, 0.000000e+00 - %2040 = bitcast i32 %131 to float - %2041 = bitcast i32 %131 to float - %2042 = fmul float %2040, %2041 - %2043 = fadd float %2039, %2042 - %2044 = call float @llvm.sqrt.f32.103(float %2043) - %2045 = bitcast i32 %102 to float - %2046 = fcmp olt float %2045, 0.000000e+00 - %2047 = sext i1 %2046 to i32 - %2048 = bitcast i32 %102 to float - %2049 = fcmp ogt float %2048, 0.000000e+00 - %2050 = zext i1 %2049 to i32 - %2051 = add nsw i32 %2047, %2050 - %2052 = sitofp i32 %2051 to float - %2053 = fneg float %2052 - %2054 = fmul float %2044, %2053 - %2055 = fmul float %2054, 0.000000e+00 - %2056 = bitcast i32 %131 to float - %2057 = fadd float %2056, %2055 - %2058 = fmul float %2035, %2057 - %2059 = fadd float %2013, %2058 - %2060 = call float @llvm.sqrt.f32.104(float %2059) - %2061 = fadd float %2060, 0.000000e+00 - %2062 = fdiv float %1969, %2061 - %2063 = fmul float %1948, %2062 - %2064 = fsub float 1.000000e+00, %2063 - %2065 = insertelement <4 x float> zeroinitializer, float %2064, i32 0 - %2066 = insertelement <4 x float> %2065, float 0.000000e+00, i32 1 - %2067 = insertelement <4 x float> %2066, float 0.000000e+00, i32 2 - %2068 = insertelement <4 x float> %2067, float 0.000000e+00, i32 3 - %2069 = getelementptr float, float* %0, i32 0 - %2070 = getelementptr inbounds float, float* %2069, i64 1 - %2071 = load float, float* %2070, align 4 - %2072 = insertelement <4 x float> zeroinitializer, float %2071, i32 0 - %2073 = insertelement <4 x float> %2072, float 0.000000e+00, i32 1 - %2074 = insertelement <4 x float> %2073, float 0.000000e+00, i32 2 - %2075 = insertelement <4 x float> %2074, float 0.000000e+00, i32 3 - %2076 = call <4 x float> @llvm.fma.f32.105(<4 x float> %2068, <4 x float> %2075, <4 x float> zeroinitializer) - %2077 = extractelement <4 x float> %2076, i32 0 - %2078 = getelementptr float, float* %2, i32 0 - %2079 = getelementptr inbounds float, float* %2078, i64 1 - store float %2077, float* %2079, align 4 - %2080 = bitcast i32 %102 to float - %2081 = bitcast i32 %102 to float - %2082 = fmul float %2080, %2081 - %2083 = fadd float %2082, 0.000000e+00 - %2084 = bitcast i32 %131 to float - %2085 = bitcast i32 %131 to float - %2086 = fmul float %2084, %2085 - %2087 = fadd float %2083, %2086 - %2088 = call float @llvm.sqrt.f32.106(float %2087) - %2089 = bitcast i32 %102 to float - %2090 = fcmp olt float %2089, 0.000000e+00 - %2091 = sext i1 %2090 to i32 - %2092 = bitcast i32 %102 to float - %2093 = fcmp ogt float %2092, 0.000000e+00 - %2094 = zext i1 %2093 to i32 - %2095 = add nsw i32 %2091, %2094 - %2096 = sitofp i32 %2095 to float - %2097 = fneg float %2096 - %2098 = fmul float %2088, %2097 - %2099 = bitcast i32 %102 to float - %2100 = fadd float %2099, %2098 - %2101 = bitcast i32 %102 to float - %2102 = bitcast i32 %102 to float - %2103 = fmul float %2101, %2102 - %2104 = fadd float %2103, 0.000000e+00 - %2105 = bitcast i32 %131 to float - %2106 = bitcast i32 %131 to float - %2107 = fmul float %2105, %2106 - %2108 = fadd float %2104, %2107 - %2109 = call float @llvm.sqrt.f32.107(float %2108) - %2110 = bitcast i32 %102 to float - %2111 = fcmp olt float %2110, 0.000000e+00 - %2112 = sext i1 %2111 to i32 - %2113 = bitcast i32 %102 to float - %2114 = fcmp ogt float %2113, 0.000000e+00 - %2115 = zext i1 %2114 to i32 - %2116 = add nsw i32 %2112, %2115 - %2117 = sitofp i32 %2116 to float - %2118 = fneg float %2117 - %2119 = fmul float %2109, %2118 - %2120 = bitcast i32 %102 to float - %2121 = fadd float %2120, %2119 - %2122 = bitcast i32 %102 to float - %2123 = bitcast i32 %102 to float - %2124 = fmul float %2122, %2123 - %2125 = fadd float %2124, 0.000000e+00 - %2126 = bitcast i32 %131 to float - %2127 = bitcast i32 %131 to float - %2128 = fmul float %2126, %2127 - %2129 = fadd float %2125, %2128 - %2130 = call float @llvm.sqrt.f32.108(float %2129) - %2131 = bitcast i32 %102 to float - %2132 = fcmp olt float %2131, 0.000000e+00 - %2133 = sext i1 %2132 to i32 - %2134 = bitcast i32 %102 to float - %2135 = fcmp ogt float %2134, 0.000000e+00 - %2136 = zext i1 %2135 to i32 - %2137 = add nsw i32 %2133, %2136 - %2138 = sitofp i32 %2137 to float - %2139 = fneg float %2138 - %2140 = fmul float %2130, %2139 - %2141 = bitcast i32 %102 to float - %2142 = fadd float %2141, %2140 - %2143 = fmul float %2121, %2142 - %2144 = fadd float %2143, 0.000000e+00 - %2145 = bitcast i32 %102 to float - %2146 = bitcast i32 %102 to float - %2147 = fmul float %2145, %2146 - %2148 = fadd float %2147, 0.000000e+00 - %2149 = bitcast i32 %131 to float - %2150 = bitcast i32 %131 to float - %2151 = fmul float %2149, %2150 - %2152 = fadd float %2148, %2151 - %2153 = call float @llvm.sqrt.f32.109(float %2152) - %2154 = bitcast i32 %102 to float - %2155 = fcmp olt float %2154, 0.000000e+00 - %2156 = sext i1 %2155 to i32 - %2157 = bitcast i32 %102 to float - %2158 = fcmp ogt float %2157, 0.000000e+00 - %2159 = zext i1 %2158 to i32 - %2160 = add nsw i32 %2156, %2159 - %2161 = sitofp i32 %2160 to float - %2162 = fneg float %2161 - %2163 = fmul float %2153, %2162 - %2164 = fmul float %2163, 0.000000e+00 - %2165 = bitcast i32 %131 to float - %2166 = fadd float %2165, %2164 - %2167 = bitcast i32 %102 to float - %2168 = bitcast i32 %102 to float - %2169 = fmul float %2167, %2168 - %2170 = fadd float %2169, 0.000000e+00 - %2171 = bitcast i32 %131 to float - %2172 = bitcast i32 %131 to float - %2173 = fmul float %2171, %2172 - %2174 = fadd float %2170, %2173 - %2175 = call float @llvm.sqrt.f32.110(float %2174) - %2176 = bitcast i32 %102 to float - %2177 = fcmp olt float %2176, 0.000000e+00 - %2178 = sext i1 %2177 to i32 - %2179 = bitcast i32 %102 to float - %2180 = fcmp ogt float %2179, 0.000000e+00 - %2181 = zext i1 %2180 to i32 - %2182 = add nsw i32 %2178, %2181 - %2183 = sitofp i32 %2182 to float - %2184 = fneg float %2183 - %2185 = fmul float %2175, %2184 - %2186 = fmul float %2185, 0.000000e+00 - %2187 = bitcast i32 %131 to float - %2188 = fadd float %2187, %2186 - %2189 = fmul float %2166, %2188 - %2190 = fadd float %2144, %2189 - %2191 = call float @llvm.sqrt.f32.111(float %2190) - %2192 = fadd float %2191, 0.000000e+00 - %2193 = fdiv float %2100, %2192 - %2194 = fmul float %2193, 2.000000e+00 - %2195 = bitcast i32 %102 to float - %2196 = bitcast i32 %102 to float - %2197 = fmul float %2195, %2196 - %2198 = fadd float %2197, 0.000000e+00 - %2199 = bitcast i32 %131 to float - %2200 = bitcast i32 %131 to float - %2201 = fmul float %2199, %2200 - %2202 = fadd float %2198, %2201 - %2203 = call float @llvm.sqrt.f32.112(float %2202) - %2204 = bitcast i32 %102 to float - %2205 = fcmp olt float %2204, 0.000000e+00 - %2206 = sext i1 %2205 to i32 - %2207 = bitcast i32 %102 to float - %2208 = fcmp ogt float %2207, 0.000000e+00 - %2209 = zext i1 %2208 to i32 - %2210 = add nsw i32 %2206, %2209 - %2211 = sitofp i32 %2210 to float - %2212 = fneg float %2211 - %2213 = fmul float %2203, %2212 - %2214 = bitcast i32 %102 to float - %2215 = fadd float %2214, %2213 - %2216 = bitcast i32 %102 to float - %2217 = bitcast i32 %102 to float - %2218 = fmul float %2216, %2217 - %2219 = fadd float %2218, 0.000000e+00 - %2220 = bitcast i32 %131 to float - %2221 = bitcast i32 %131 to float - %2222 = fmul float %2220, %2221 - %2223 = fadd float %2219, %2222 - %2224 = call float @llvm.sqrt.f32.113(float %2223) - %2225 = bitcast i32 %102 to float - %2226 = fcmp olt float %2225, 0.000000e+00 - %2227 = sext i1 %2226 to i32 - %2228 = bitcast i32 %102 to float - %2229 = fcmp ogt float %2228, 0.000000e+00 - %2230 = zext i1 %2229 to i32 - %2231 = add nsw i32 %2227, %2230 - %2232 = sitofp i32 %2231 to float - %2233 = fneg float %2232 - %2234 = fmul float %2224, %2233 - %2235 = bitcast i32 %102 to float - %2236 = fadd float %2235, %2234 - %2237 = bitcast i32 %102 to float - %2238 = bitcast i32 %102 to float - %2239 = fmul float %2237, %2238 - %2240 = fadd float %2239, 0.000000e+00 - %2241 = bitcast i32 %131 to float - %2242 = bitcast i32 %131 to float - %2243 = fmul float %2241, %2242 - %2244 = fadd float %2240, %2243 - %2245 = call float @llvm.sqrt.f32.114(float %2244) - %2246 = bitcast i32 %102 to float - %2247 = fcmp olt float %2246, 0.000000e+00 - %2248 = sext i1 %2247 to i32 - %2249 = bitcast i32 %102 to float - %2250 = fcmp ogt float %2249, 0.000000e+00 - %2251 = zext i1 %2250 to i32 - %2252 = add nsw i32 %2248, %2251 - %2253 = sitofp i32 %2252 to float - %2254 = fneg float %2253 - %2255 = fmul float %2245, %2254 - %2256 = bitcast i32 %102 to float - %2257 = fadd float %2256, %2255 - %2258 = fmul float %2236, %2257 - %2259 = fadd float %2258, 0.000000e+00 - %2260 = bitcast i32 %102 to float - %2261 = bitcast i32 %102 to float - %2262 = fmul float %2260, %2261 - %2263 = fadd float %2262, 0.000000e+00 - %2264 = bitcast i32 %131 to float - %2265 = bitcast i32 %131 to float - %2266 = fmul float %2264, %2265 - %2267 = fadd float %2263, %2266 - %2268 = call float @llvm.sqrt.f32.115(float %2267) - %2269 = bitcast i32 %102 to float - %2270 = fcmp olt float %2269, 0.000000e+00 - %2271 = sext i1 %2270 to i32 - %2272 = bitcast i32 %102 to float - %2273 = fcmp ogt float %2272, 0.000000e+00 - %2274 = zext i1 %2273 to i32 - %2275 = add nsw i32 %2271, %2274 - %2276 = sitofp i32 %2275 to float - %2277 = fneg float %2276 - %2278 = fmul float %2268, %2277 - %2279 = fmul float %2278, 0.000000e+00 - %2280 = bitcast i32 %131 to float - %2281 = fadd float %2280, %2279 - %2282 = bitcast i32 %102 to float - %2283 = bitcast i32 %102 to float - %2284 = fmul float %2282, %2283 - %2285 = fadd float %2284, 0.000000e+00 - %2286 = bitcast i32 %131 to float - %2287 = bitcast i32 %131 to float - %2288 = fmul float %2286, %2287 - %2289 = fadd float %2285, %2288 - %2290 = call float @llvm.sqrt.f32.116(float %2289) - %2291 = bitcast i32 %102 to float - %2292 = fcmp olt float %2291, 0.000000e+00 - %2293 = sext i1 %2292 to i32 - %2294 = bitcast i32 %102 to float - %2295 = fcmp ogt float %2294, 0.000000e+00 - %2296 = zext i1 %2295 to i32 - %2297 = add nsw i32 %2293, %2296 - %2298 = sitofp i32 %2297 to float - %2299 = fneg float %2298 - %2300 = fmul float %2290, %2299 - %2301 = fmul float %2300, 0.000000e+00 - %2302 = bitcast i32 %131 to float - %2303 = fadd float %2302, %2301 - %2304 = fmul float %2281, %2303 - %2305 = fadd float %2259, %2304 - %2306 = call float @llvm.sqrt.f32.117(float %2305) - %2307 = fadd float %2306, 0.000000e+00 - %2308 = fdiv float %2215, %2307 - %2309 = fmul float %2194, %2308 - %2310 = fsub float 1.000000e+00, %2309 - %2311 = fmul float %2310, %2071 - %2312 = fadd float %2311, 0.000000e+00 - %2313 = bitcast i32 %102 to float - %2314 = bitcast i32 %102 to float - %2315 = fmul float %2313, %2314 - %2316 = fadd float %2315, 0.000000e+00 - %2317 = bitcast i32 %131 to float - %2318 = bitcast i32 %131 to float - %2319 = fmul float %2317, %2318 - %2320 = fadd float %2316, %2319 - %2321 = call float @llvm.sqrt.f32.118(float %2320) - %2322 = bitcast i32 %102 to float - %2323 = fcmp olt float %2322, 0.000000e+00 - %2324 = sext i1 %2323 to i32 - %2325 = bitcast i32 %102 to float - %2326 = fcmp ogt float %2325, 0.000000e+00 - %2327 = zext i1 %2326 to i32 - %2328 = add nsw i32 %2324, %2327 - %2329 = sitofp i32 %2328 to float - %2330 = fneg float %2329 - %2331 = fmul float %2321, %2330 - %2332 = bitcast i32 %102 to float - %2333 = fadd float %2332, %2331 - %2334 = bitcast i32 %102 to float - %2335 = bitcast i32 %102 to float - %2336 = fmul float %2334, %2335 - %2337 = fadd float %2336, 0.000000e+00 - %2338 = bitcast i32 %131 to float - %2339 = bitcast i32 %131 to float - %2340 = fmul float %2338, %2339 - %2341 = fadd float %2337, %2340 - %2342 = call float @llvm.sqrt.f32.119(float %2341) - %2343 = bitcast i32 %102 to float - %2344 = fcmp olt float %2343, 0.000000e+00 - %2345 = sext i1 %2344 to i32 - %2346 = bitcast i32 %102 to float - %2347 = fcmp ogt float %2346, 0.000000e+00 - %2348 = zext i1 %2347 to i32 - %2349 = add nsw i32 %2345, %2348 - %2350 = sitofp i32 %2349 to float - %2351 = fneg float %2350 - %2352 = fmul float %2342, %2351 - %2353 = bitcast i32 %102 to float - %2354 = fadd float %2353, %2352 - %2355 = bitcast i32 %102 to float - %2356 = bitcast i32 %102 to float - %2357 = fmul float %2355, %2356 - %2358 = fadd float %2357, 0.000000e+00 - %2359 = bitcast i32 %131 to float - %2360 = bitcast i32 %131 to float - %2361 = fmul float %2359, %2360 - %2362 = fadd float %2358, %2361 - %2363 = call float @llvm.sqrt.f32.120(float %2362) - %2364 = bitcast i32 %102 to float - %2365 = fcmp olt float %2364, 0.000000e+00 - %2366 = sext i1 %2365 to i32 - %2367 = bitcast i32 %102 to float - %2368 = fcmp ogt float %2367, 0.000000e+00 - %2369 = zext i1 %2368 to i32 - %2370 = add nsw i32 %2366, %2369 - %2371 = sitofp i32 %2370 to float - %2372 = fneg float %2371 - %2373 = fmul float %2363, %2372 - %2374 = bitcast i32 %102 to float - %2375 = fadd float %2374, %2373 - %2376 = fmul float %2354, %2375 - %2377 = fadd float %2376, 0.000000e+00 - %2378 = bitcast i32 %102 to float - %2379 = bitcast i32 %102 to float - %2380 = fmul float %2378, %2379 - %2381 = fadd float %2380, 0.000000e+00 - %2382 = bitcast i32 %131 to float - %2383 = bitcast i32 %131 to float - %2384 = fmul float %2382, %2383 - %2385 = fadd float %2381, %2384 - %2386 = call float @llvm.sqrt.f32.121(float %2385) - %2387 = bitcast i32 %102 to float - %2388 = fcmp olt float %2387, 0.000000e+00 - %2389 = sext i1 %2388 to i32 - %2390 = bitcast i32 %102 to float - %2391 = fcmp ogt float %2390, 0.000000e+00 - %2392 = zext i1 %2391 to i32 - %2393 = add nsw i32 %2389, %2392 - %2394 = sitofp i32 %2393 to float - %2395 = fneg float %2394 - %2396 = fmul float %2386, %2395 - %2397 = fmul float %2396, 0.000000e+00 - %2398 = bitcast i32 %131 to float - %2399 = fadd float %2398, %2397 - %2400 = bitcast i32 %102 to float - %2401 = bitcast i32 %102 to float - %2402 = fmul float %2400, %2401 - %2403 = fadd float %2402, 0.000000e+00 - %2404 = bitcast i32 %131 to float - %2405 = bitcast i32 %131 to float - %2406 = fmul float %2404, %2405 - %2407 = fadd float %2403, %2406 - %2408 = call float @llvm.sqrt.f32.122(float %2407) - %2409 = bitcast i32 %102 to float - %2410 = fcmp olt float %2409, 0.000000e+00 - %2411 = sext i1 %2410 to i32 - %2412 = bitcast i32 %102 to float - %2413 = fcmp ogt float %2412, 0.000000e+00 - %2414 = zext i1 %2413 to i32 - %2415 = add nsw i32 %2411, %2414 - %2416 = sitofp i32 %2415 to float - %2417 = fneg float %2416 - %2418 = fmul float %2408, %2417 - %2419 = fmul float %2418, 0.000000e+00 - %2420 = bitcast i32 %131 to float - %2421 = fadd float %2420, %2419 - %2422 = fmul float %2399, %2421 - %2423 = fadd float %2377, %2422 - %2424 = call float @llvm.sqrt.f32.123(float %2423) - %2425 = fadd float %2424, 0.000000e+00 - %2426 = fdiv float %2333, %2425 - %2427 = fmul float %2426, 2.000000e+00 - %2428 = bitcast i32 %102 to float - %2429 = bitcast i32 %102 to float - %2430 = fmul float %2428, %2429 - %2431 = fadd float %2430, 0.000000e+00 - %2432 = bitcast i32 %131 to float - %2433 = bitcast i32 %131 to float - %2434 = fmul float %2432, %2433 - %2435 = fadd float %2431, %2434 - %2436 = call float @llvm.sqrt.f32.124(float %2435) - %2437 = bitcast i32 %102 to float - %2438 = fcmp olt float %2437, 0.000000e+00 - %2439 = sext i1 %2438 to i32 - %2440 = bitcast i32 %102 to float - %2441 = fcmp ogt float %2440, 0.000000e+00 - %2442 = zext i1 %2441 to i32 - %2443 = add nsw i32 %2439, %2442 - %2444 = sitofp i32 %2443 to float - %2445 = fneg float %2444 - %2446 = fmul float %2436, %2445 - %2447 = fmul float %2446, 0.000000e+00 - %2448 = bitcast i32 %131 to float - %2449 = fadd float %2448, %2447 - %2450 = bitcast i32 %102 to float - %2451 = bitcast i32 %102 to float - %2452 = fmul float %2450, %2451 - %2453 = fadd float %2452, 0.000000e+00 - %2454 = bitcast i32 %131 to float - %2455 = bitcast i32 %131 to float - %2456 = fmul float %2454, %2455 - %2457 = fadd float %2453, %2456 - %2458 = call float @llvm.sqrt.f32.125(float %2457) - %2459 = bitcast i32 %102 to float - %2460 = fcmp olt float %2459, 0.000000e+00 - %2461 = sext i1 %2460 to i32 - %2462 = bitcast i32 %102 to float - %2463 = fcmp ogt float %2462, 0.000000e+00 - %2464 = zext i1 %2463 to i32 - %2465 = add nsw i32 %2461, %2464 - %2466 = sitofp i32 %2465 to float - %2467 = fneg float %2466 - %2468 = fmul float %2458, %2467 - %2469 = bitcast i32 %102 to float - %2470 = fadd float %2469, %2468 - %2471 = bitcast i32 %102 to float - %2472 = bitcast i32 %102 to float - %2473 = fmul float %2471, %2472 - %2474 = fadd float %2473, 0.000000e+00 - %2475 = bitcast i32 %131 to float - %2476 = bitcast i32 %131 to float - %2477 = fmul float %2475, %2476 - %2478 = fadd float %2474, %2477 - %2479 = call float @llvm.sqrt.f32.126(float %2478) - %2480 = bitcast i32 %102 to float - %2481 = fcmp olt float %2480, 0.000000e+00 - %2482 = sext i1 %2481 to i32 - %2483 = bitcast i32 %102 to float - %2484 = fcmp ogt float %2483, 0.000000e+00 - %2485 = zext i1 %2484 to i32 - %2486 = add nsw i32 %2482, %2485 - %2487 = sitofp i32 %2486 to float - %2488 = fneg float %2487 - %2489 = fmul float %2479, %2488 - %2490 = bitcast i32 %102 to float - %2491 = fadd float %2490, %2489 - %2492 = fmul float %2470, %2491 - %2493 = fadd float %2492, 0.000000e+00 - %2494 = bitcast i32 %102 to float - %2495 = bitcast i32 %102 to float - %2496 = fmul float %2494, %2495 - %2497 = fadd float %2496, 0.000000e+00 - %2498 = bitcast i32 %131 to float - %2499 = bitcast i32 %131 to float - %2500 = fmul float %2498, %2499 - %2501 = fadd float %2497, %2500 - %2502 = call float @llvm.sqrt.f32.127(float %2501) - %2503 = bitcast i32 %102 to float - %2504 = fcmp olt float %2503, 0.000000e+00 - %2505 = sext i1 %2504 to i32 - %2506 = bitcast i32 %102 to float - %2507 = fcmp ogt float %2506, 0.000000e+00 - %2508 = zext i1 %2507 to i32 - %2509 = add nsw i32 %2505, %2508 - %2510 = sitofp i32 %2509 to float - %2511 = fneg float %2510 - %2512 = fmul float %2502, %2511 - %2513 = fmul float %2512, 0.000000e+00 - %2514 = bitcast i32 %131 to float - %2515 = fadd float %2514, %2513 - %2516 = bitcast i32 %102 to float - %2517 = bitcast i32 %102 to float - %2518 = fmul float %2516, %2517 - %2519 = fadd float %2518, 0.000000e+00 - %2520 = bitcast i32 %131 to float - %2521 = bitcast i32 %131 to float - %2522 = fmul float %2520, %2521 - %2523 = fadd float %2519, %2522 - %2524 = call float @llvm.sqrt.f32.128(float %2523) - %2525 = bitcast i32 %102 to float - %2526 = fcmp olt float %2525, 0.000000e+00 - %2527 = sext i1 %2526 to i32 - %2528 = bitcast i32 %102 to float - %2529 = fcmp ogt float %2528, 0.000000e+00 - %2530 = zext i1 %2529 to i32 - %2531 = add nsw i32 %2527, %2530 - %2532 = sitofp i32 %2531 to float - %2533 = fneg float %2532 - %2534 = fmul float %2524, %2533 - %2535 = fmul float %2534, 0.000000e+00 - %2536 = bitcast i32 %131 to float - %2537 = fadd float %2536, %2535 - %2538 = fmul float %2515, %2537 - %2539 = fadd float %2493, %2538 - %2540 = call float @llvm.sqrt.f32.129(float %2539) - %2541 = fadd float %2540, 0.000000e+00 - %2542 = fdiv float %2449, %2541 - %2543 = fmul float %2427, %2542 - %2544 = fneg float %2543 - %2545 = getelementptr float, float* %0, i32 0 - %2546 = getelementptr inbounds float, float* %2545, i64 3 - %2547 = load float, float* %2546, align 4 - %2548 = fmul float %2544, %2547 - %2549 = fadd float %2312, %2548 - %2550 = insertelement <4 x float> zeroinitializer, float %2549, i32 0 - %2551 = insertelement <4 x float> %2550, float 0.000000e+00, i32 1 - %2552 = insertelement <4 x float> %2551, float 0.000000e+00, i32 2 - %2553 = insertelement <4 x float> %2552, float 0.000000e+00, i32 3 - %2554 = extractelement <4 x float> %2553, i32 0 - %2555 = getelementptr float, float* %2, i32 0 - %2556 = getelementptr inbounds float, float* %2555, i64 1 - store float %2554, float* %2556, align 4 - %2557 = extractelement <4 x float> %2553, i32 1 - %2558 = getelementptr float, float* %2, i32 0 - %2559 = getelementptr inbounds float, float* %2558, i64 2 - store float %2557, float* %2559, align 4 - %2560 = bitcast i32 %102 to float - %2561 = bitcast i32 %102 to float - %2562 = fmul float %2560, %2561 - %2563 = fadd float %2562, 0.000000e+00 - %2564 = bitcast i32 %131 to float - %2565 = bitcast i32 %131 to float - %2566 = fmul float %2564, %2565 - %2567 = fadd float %2563, %2566 - %2568 = call float @llvm.sqrt.f32.130(float %2567) - %2569 = bitcast i32 %102 to float - %2570 = fcmp olt float %2569, 0.000000e+00 - %2571 = sext i1 %2570 to i32 - %2572 = bitcast i32 %102 to float - %2573 = fcmp ogt float %2572, 0.000000e+00 - %2574 = zext i1 %2573 to i32 - %2575 = add nsw i32 %2571, %2574 - %2576 = sitofp i32 %2575 to float - %2577 = fneg float %2576 - %2578 = fmul float %2568, %2577 - %2579 = fmul float %2578, 0.000000e+00 - %2580 = bitcast i32 %131 to float - %2581 = fadd float %2580, %2579 - %2582 = bitcast i32 %102 to float - %2583 = bitcast i32 %102 to float - %2584 = fmul float %2582, %2583 - %2585 = fadd float %2584, 0.000000e+00 - %2586 = bitcast i32 %131 to float - %2587 = bitcast i32 %131 to float - %2588 = fmul float %2586, %2587 - %2589 = fadd float %2585, %2588 - %2590 = call float @llvm.sqrt.f32.131(float %2589) - %2591 = bitcast i32 %102 to float - %2592 = fcmp olt float %2591, 0.000000e+00 - %2593 = sext i1 %2592 to i32 - %2594 = bitcast i32 %102 to float - %2595 = fcmp ogt float %2594, 0.000000e+00 - %2596 = zext i1 %2595 to i32 - %2597 = add nsw i32 %2593, %2596 - %2598 = sitofp i32 %2597 to float - %2599 = fneg float %2598 - %2600 = fmul float %2590, %2599 - %2601 = bitcast i32 %102 to float - %2602 = fadd float %2601, %2600 - %2603 = bitcast i32 %102 to float - %2604 = bitcast i32 %102 to float - %2605 = fmul float %2603, %2604 - %2606 = fadd float %2605, 0.000000e+00 - %2607 = bitcast i32 %131 to float - %2608 = bitcast i32 %131 to float - %2609 = fmul float %2607, %2608 - %2610 = fadd float %2606, %2609 - %2611 = call float @llvm.sqrt.f32.132(float %2610) - %2612 = bitcast i32 %102 to float - %2613 = fcmp olt float %2612, 0.000000e+00 - %2614 = sext i1 %2613 to i32 - %2615 = bitcast i32 %102 to float - %2616 = fcmp ogt float %2615, 0.000000e+00 - %2617 = zext i1 %2616 to i32 - %2618 = add nsw i32 %2614, %2617 - %2619 = sitofp i32 %2618 to float - %2620 = fneg float %2619 - %2621 = fmul float %2611, %2620 - %2622 = bitcast i32 %102 to float - %2623 = fadd float %2622, %2621 - %2624 = fmul float %2602, %2623 - %2625 = fadd float %2624, 0.000000e+00 - %2626 = bitcast i32 %102 to float - %2627 = bitcast i32 %102 to float - %2628 = fmul float %2626, %2627 - %2629 = fadd float %2628, 0.000000e+00 - %2630 = bitcast i32 %131 to float - %2631 = bitcast i32 %131 to float - %2632 = fmul float %2630, %2631 - %2633 = fadd float %2629, %2632 - %2634 = call float @llvm.sqrt.f32.133(float %2633) - %2635 = bitcast i32 %102 to float - %2636 = fcmp olt float %2635, 0.000000e+00 - %2637 = sext i1 %2636 to i32 - %2638 = bitcast i32 %102 to float - %2639 = fcmp ogt float %2638, 0.000000e+00 - %2640 = zext i1 %2639 to i32 - %2641 = add nsw i32 %2637, %2640 - %2642 = sitofp i32 %2641 to float - %2643 = fneg float %2642 - %2644 = fmul float %2634, %2643 - %2645 = fmul float %2644, 0.000000e+00 - %2646 = bitcast i32 %131 to float - %2647 = fadd float %2646, %2645 - %2648 = bitcast i32 %102 to float - %2649 = bitcast i32 %102 to float - %2650 = fmul float %2648, %2649 - %2651 = fadd float %2650, 0.000000e+00 - %2652 = bitcast i32 %131 to float - %2653 = bitcast i32 %131 to float - %2654 = fmul float %2652, %2653 - %2655 = fadd float %2651, %2654 - %2656 = call float @llvm.sqrt.f32.134(float %2655) - %2657 = bitcast i32 %102 to float - %2658 = fcmp olt float %2657, 0.000000e+00 - %2659 = sext i1 %2658 to i32 - %2660 = bitcast i32 %102 to float - %2661 = fcmp ogt float %2660, 0.000000e+00 - %2662 = zext i1 %2661 to i32 - %2663 = add nsw i32 %2659, %2662 - %2664 = sitofp i32 %2663 to float - %2665 = fneg float %2664 - %2666 = fmul float %2656, %2665 - %2667 = fmul float %2666, 0.000000e+00 - %2668 = bitcast i32 %131 to float - %2669 = fadd float %2668, %2667 - %2670 = fmul float %2647, %2669 - %2671 = fadd float %2625, %2670 - %2672 = call float @llvm.sqrt.f32.135(float %2671) - %2673 = fadd float %2672, 0.000000e+00 - %2674 = fdiv float %2581, %2673 - %2675 = fmul float %2674, 2.000000e+00 - %2676 = bitcast i32 %102 to float - %2677 = bitcast i32 %102 to float - %2678 = fmul float %2676, %2677 - %2679 = fadd float %2678, 0.000000e+00 - %2680 = bitcast i32 %131 to float - %2681 = bitcast i32 %131 to float - %2682 = fmul float %2680, %2681 - %2683 = fadd float %2679, %2682 - %2684 = call float @llvm.sqrt.f32.136(float %2683) - %2685 = bitcast i32 %102 to float - %2686 = fcmp olt float %2685, 0.000000e+00 - %2687 = sext i1 %2686 to i32 - %2688 = bitcast i32 %102 to float - %2689 = fcmp ogt float %2688, 0.000000e+00 - %2690 = zext i1 %2689 to i32 - %2691 = add nsw i32 %2687, %2690 - %2692 = sitofp i32 %2691 to float - %2693 = fneg float %2692 - %2694 = fmul float %2684, %2693 - %2695 = bitcast i32 %102 to float - %2696 = fadd float %2695, %2694 - %2697 = bitcast i32 %102 to float - %2698 = bitcast i32 %102 to float - %2699 = fmul float %2697, %2698 - %2700 = fadd float %2699, 0.000000e+00 - %2701 = bitcast i32 %131 to float - %2702 = bitcast i32 %131 to float - %2703 = fmul float %2701, %2702 - %2704 = fadd float %2700, %2703 - %2705 = call float @llvm.sqrt.f32.137(float %2704) - %2706 = bitcast i32 %102 to float - %2707 = fcmp olt float %2706, 0.000000e+00 - %2708 = sext i1 %2707 to i32 - %2709 = bitcast i32 %102 to float - %2710 = fcmp ogt float %2709, 0.000000e+00 - %2711 = zext i1 %2710 to i32 - %2712 = add nsw i32 %2708, %2711 - %2713 = sitofp i32 %2712 to float - %2714 = fneg float %2713 - %2715 = fmul float %2705, %2714 - %2716 = bitcast i32 %102 to float - %2717 = fadd float %2716, %2715 - %2718 = bitcast i32 %102 to float - %2719 = bitcast i32 %102 to float - %2720 = fmul float %2718, %2719 - %2721 = fadd float %2720, 0.000000e+00 - %2722 = bitcast i32 %131 to float - %2723 = bitcast i32 %131 to float - %2724 = fmul float %2722, %2723 - %2725 = fadd float %2721, %2724 - %2726 = call float @llvm.sqrt.f32.138(float %2725) - %2727 = bitcast i32 %102 to float - %2728 = fcmp olt float %2727, 0.000000e+00 - %2729 = sext i1 %2728 to i32 - %2730 = bitcast i32 %102 to float - %2731 = fcmp ogt float %2730, 0.000000e+00 - %2732 = zext i1 %2731 to i32 - %2733 = add nsw i32 %2729, %2732 - %2734 = sitofp i32 %2733 to float - %2735 = fneg float %2734 - %2736 = fmul float %2726, %2735 - %2737 = bitcast i32 %102 to float - %2738 = fadd float %2737, %2736 - %2739 = fmul float %2717, %2738 - %2740 = fadd float %2739, 0.000000e+00 - %2741 = bitcast i32 %102 to float - %2742 = bitcast i32 %102 to float - %2743 = fmul float %2741, %2742 - %2744 = fadd float %2743, 0.000000e+00 - %2745 = bitcast i32 %131 to float - %2746 = bitcast i32 %131 to float - %2747 = fmul float %2745, %2746 - %2748 = fadd float %2744, %2747 - %2749 = call float @llvm.sqrt.f32.139(float %2748) - %2750 = bitcast i32 %102 to float - %2751 = fcmp olt float %2750, 0.000000e+00 - %2752 = sext i1 %2751 to i32 - %2753 = bitcast i32 %102 to float - %2754 = fcmp ogt float %2753, 0.000000e+00 - %2755 = zext i1 %2754 to i32 - %2756 = add nsw i32 %2752, %2755 - %2757 = sitofp i32 %2756 to float - %2758 = fneg float %2757 - %2759 = fmul float %2749, %2758 - %2760 = fmul float %2759, 0.000000e+00 - %2761 = bitcast i32 %131 to float - %2762 = fadd float %2761, %2760 - %2763 = bitcast i32 %102 to float - %2764 = bitcast i32 %102 to float - %2765 = fmul float %2763, %2764 - %2766 = fadd float %2765, 0.000000e+00 - %2767 = bitcast i32 %131 to float - %2768 = bitcast i32 %131 to float - %2769 = fmul float %2767, %2768 - %2770 = fadd float %2766, %2769 - %2771 = call float @llvm.sqrt.f32.140(float %2770) - %2772 = bitcast i32 %102 to float - %2773 = fcmp olt float %2772, 0.000000e+00 - %2774 = sext i1 %2773 to i32 - %2775 = bitcast i32 %102 to float - %2776 = fcmp ogt float %2775, 0.000000e+00 - %2777 = zext i1 %2776 to i32 - %2778 = add nsw i32 %2774, %2777 - %2779 = sitofp i32 %2778 to float - %2780 = fneg float %2779 - %2781 = fmul float %2771, %2780 - %2782 = fmul float %2781, 0.000000e+00 - %2783 = bitcast i32 %131 to float - %2784 = fadd float %2783, %2782 - %2785 = fmul float %2762, %2784 - %2786 = fadd float %2740, %2785 - %2787 = call float @llvm.sqrt.f32.141(float %2786) - %2788 = fadd float %2787, 0.000000e+00 - %2789 = fdiv float %2696, %2788 - %2790 = fmul float %2675, %2789 - %2791 = fneg float %2790 - %2792 = insertelement <4 x float> zeroinitializer, float %2791, i32 0 - %2793 = insertelement <4 x float> %2792, float 0.000000e+00, i32 1 - %2794 = insertelement <4 x float> %2793, float 0.000000e+00, i32 2 - %2795 = insertelement <4 x float> %2794, float 0.000000e+00, i32 3 - %2796 = getelementptr float, float* %0, i32 0 - %2797 = load float, float* %2796, align 4 - %2798 = insertelement <4 x float> zeroinitializer, float %2797, i32 0 - %2799 = insertelement <4 x float> %2798, float 0.000000e+00, i32 1 - %2800 = insertelement <4 x float> %2799, float 0.000000e+00, i32 2 - %2801 = insertelement <4 x float> %2800, float 0.000000e+00, i32 3 - %2802 = call <4 x float> @llvm.fma.f32.142(<4 x float> %2795, <4 x float> %2801, <4 x float> zeroinitializer) - %2803 = extractelement <4 x float> %2802, i32 0 - %2804 = getelementptr float, float* %2, i32 0 - %2805 = getelementptr inbounds float, float* %2804, i64 2 - store float %2803, float* %2805, align 4 - %2806 = bitcast i32 %102 to float - %2807 = bitcast i32 %102 to float - %2808 = fmul float %2806, %2807 - %2809 = fadd float %2808, 0.000000e+00 - %2810 = bitcast i32 %131 to float - %2811 = bitcast i32 %131 to float - %2812 = fmul float %2810, %2811 - %2813 = fadd float %2809, %2812 - %2814 = call float @llvm.sqrt.f32.143(float %2813) - %2815 = bitcast i32 %102 to float - %2816 = fcmp olt float %2815, 0.000000e+00 - %2817 = sext i1 %2816 to i32 - %2818 = bitcast i32 %102 to float - %2819 = fcmp ogt float %2818, 0.000000e+00 - %2820 = zext i1 %2819 to i32 - %2821 = add nsw i32 %2817, %2820 - %2822 = sitofp i32 %2821 to float - %2823 = fneg float %2822 - %2824 = fmul float %2814, %2823 - %2825 = fmul float %2824, 0.000000e+00 - %2826 = bitcast i32 %131 to float - %2827 = fadd float %2826, %2825 - %2828 = bitcast i32 %102 to float - %2829 = bitcast i32 %102 to float - %2830 = fmul float %2828, %2829 - %2831 = fadd float %2830, 0.000000e+00 - %2832 = bitcast i32 %131 to float - %2833 = bitcast i32 %131 to float - %2834 = fmul float %2832, %2833 - %2835 = fadd float %2831, %2834 - %2836 = call float @llvm.sqrt.f32.144(float %2835) - %2837 = bitcast i32 %102 to float - %2838 = fcmp olt float %2837, 0.000000e+00 - %2839 = sext i1 %2838 to i32 - %2840 = bitcast i32 %102 to float - %2841 = fcmp ogt float %2840, 0.000000e+00 - %2842 = zext i1 %2841 to i32 - %2843 = add nsw i32 %2839, %2842 - %2844 = sitofp i32 %2843 to float - %2845 = fneg float %2844 - %2846 = fmul float %2836, %2845 - %2847 = bitcast i32 %102 to float - %2848 = fadd float %2847, %2846 - %2849 = bitcast i32 %102 to float - %2850 = bitcast i32 %102 to float - %2851 = fmul float %2849, %2850 - %2852 = fadd float %2851, 0.000000e+00 - %2853 = bitcast i32 %131 to float - %2854 = bitcast i32 %131 to float - %2855 = fmul float %2853, %2854 - %2856 = fadd float %2852, %2855 - %2857 = call float @llvm.sqrt.f32.145(float %2856) - %2858 = bitcast i32 %102 to float - %2859 = fcmp olt float %2858, 0.000000e+00 - %2860 = sext i1 %2859 to i32 - %2861 = bitcast i32 %102 to float - %2862 = fcmp ogt float %2861, 0.000000e+00 - %2863 = zext i1 %2862 to i32 - %2864 = add nsw i32 %2860, %2863 - %2865 = sitofp i32 %2864 to float - %2866 = fneg float %2865 - %2867 = fmul float %2857, %2866 - %2868 = bitcast i32 %102 to float - %2869 = fadd float %2868, %2867 - %2870 = fmul float %2848, %2869 - %2871 = fadd float %2870, 0.000000e+00 - %2872 = bitcast i32 %102 to float - %2873 = bitcast i32 %102 to float - %2874 = fmul float %2872, %2873 - %2875 = fadd float %2874, 0.000000e+00 - %2876 = bitcast i32 %131 to float - %2877 = bitcast i32 %131 to float - %2878 = fmul float %2876, %2877 - %2879 = fadd float %2875, %2878 - %2880 = call float @llvm.sqrt.f32.146(float %2879) - %2881 = bitcast i32 %102 to float - %2882 = fcmp olt float %2881, 0.000000e+00 - %2883 = sext i1 %2882 to i32 - %2884 = bitcast i32 %102 to float - %2885 = fcmp ogt float %2884, 0.000000e+00 - %2886 = zext i1 %2885 to i32 - %2887 = add nsw i32 %2883, %2886 - %2888 = sitofp i32 %2887 to float - %2889 = fneg float %2888 - %2890 = fmul float %2880, %2889 - %2891 = fmul float %2890, 0.000000e+00 - %2892 = bitcast i32 %131 to float - %2893 = fadd float %2892, %2891 - %2894 = bitcast i32 %102 to float - %2895 = bitcast i32 %102 to float - %2896 = fmul float %2894, %2895 - %2897 = fadd float %2896, 0.000000e+00 - %2898 = bitcast i32 %131 to float - %2899 = bitcast i32 %131 to float - %2900 = fmul float %2898, %2899 - %2901 = fadd float %2897, %2900 - %2902 = call float @llvm.sqrt.f32.147(float %2901) - %2903 = bitcast i32 %102 to float - %2904 = fcmp olt float %2903, 0.000000e+00 - %2905 = sext i1 %2904 to i32 - %2906 = bitcast i32 %102 to float - %2907 = fcmp ogt float %2906, 0.000000e+00 - %2908 = zext i1 %2907 to i32 - %2909 = add nsw i32 %2905, %2908 - %2910 = sitofp i32 %2909 to float - %2911 = fneg float %2910 - %2912 = fmul float %2902, %2911 - %2913 = fmul float %2912, 0.000000e+00 - %2914 = bitcast i32 %131 to float - %2915 = fadd float %2914, %2913 - %2916 = fmul float %2893, %2915 - %2917 = fadd float %2871, %2916 - %2918 = call float @llvm.sqrt.f32.148(float %2917) - %2919 = fadd float %2918, 0.000000e+00 - %2920 = fdiv float %2827, %2919 - %2921 = fmul float %2920, 2.000000e+00 - %2922 = bitcast i32 %102 to float - %2923 = bitcast i32 %102 to float - %2924 = fmul float %2922, %2923 - %2925 = fadd float %2924, 0.000000e+00 - %2926 = bitcast i32 %131 to float - %2927 = bitcast i32 %131 to float - %2928 = fmul float %2926, %2927 - %2929 = fadd float %2925, %2928 - %2930 = call float @llvm.sqrt.f32.149(float %2929) - %2931 = bitcast i32 %102 to float - %2932 = fcmp olt float %2931, 0.000000e+00 - %2933 = sext i1 %2932 to i32 - %2934 = bitcast i32 %102 to float - %2935 = fcmp ogt float %2934, 0.000000e+00 - %2936 = zext i1 %2935 to i32 - %2937 = add nsw i32 %2933, %2936 - %2938 = sitofp i32 %2937 to float - %2939 = fneg float %2938 - %2940 = fmul float %2930, %2939 - %2941 = bitcast i32 %102 to float - %2942 = fadd float %2941, %2940 - %2943 = bitcast i32 %102 to float - %2944 = bitcast i32 %102 to float - %2945 = fmul float %2943, %2944 - %2946 = fadd float %2945, 0.000000e+00 - %2947 = bitcast i32 %131 to float - %2948 = bitcast i32 %131 to float - %2949 = fmul float %2947, %2948 - %2950 = fadd float %2946, %2949 - %2951 = call float @llvm.sqrt.f32.150(float %2950) - %2952 = bitcast i32 %102 to float - %2953 = fcmp olt float %2952, 0.000000e+00 - %2954 = sext i1 %2953 to i32 - %2955 = bitcast i32 %102 to float - %2956 = fcmp ogt float %2955, 0.000000e+00 - %2957 = zext i1 %2956 to i32 - %2958 = add nsw i32 %2954, %2957 - %2959 = sitofp i32 %2958 to float - %2960 = fneg float %2959 - %2961 = fmul float %2951, %2960 - %2962 = bitcast i32 %102 to float - %2963 = fadd float %2962, %2961 - %2964 = bitcast i32 %102 to float - %2965 = bitcast i32 %102 to float - %2966 = fmul float %2964, %2965 - %2967 = fadd float %2966, 0.000000e+00 - %2968 = bitcast i32 %131 to float - %2969 = bitcast i32 %131 to float - %2970 = fmul float %2968, %2969 - %2971 = fadd float %2967, %2970 - %2972 = call float @llvm.sqrt.f32.151(float %2971) - %2973 = bitcast i32 %102 to float - %2974 = fcmp olt float %2973, 0.000000e+00 - %2975 = sext i1 %2974 to i32 - %2976 = bitcast i32 %102 to float - %2977 = fcmp ogt float %2976, 0.000000e+00 - %2978 = zext i1 %2977 to i32 - %2979 = add nsw i32 %2975, %2978 - %2980 = sitofp i32 %2979 to float - %2981 = fneg float %2980 - %2982 = fmul float %2972, %2981 - %2983 = bitcast i32 %102 to float - %2984 = fadd float %2983, %2982 - %2985 = fmul float %2963, %2984 - %2986 = fadd float %2985, 0.000000e+00 - %2987 = bitcast i32 %102 to float - %2988 = bitcast i32 %102 to float - %2989 = fmul float %2987, %2988 - %2990 = fadd float %2989, 0.000000e+00 - %2991 = bitcast i32 %131 to float - %2992 = bitcast i32 %131 to float - %2993 = fmul float %2991, %2992 - %2994 = fadd float %2990, %2993 - %2995 = call float @llvm.sqrt.f32.152(float %2994) - %2996 = bitcast i32 %102 to float - %2997 = fcmp olt float %2996, 0.000000e+00 - %2998 = sext i1 %2997 to i32 - %2999 = bitcast i32 %102 to float - %3000 = fcmp ogt float %2999, 0.000000e+00 - %3001 = zext i1 %3000 to i32 - %3002 = add nsw i32 %2998, %3001 - %3003 = sitofp i32 %3002 to float - %3004 = fneg float %3003 - %3005 = fmul float %2995, %3004 - %3006 = fmul float %3005, 0.000000e+00 - %3007 = bitcast i32 %131 to float - %3008 = fadd float %3007, %3006 - %3009 = bitcast i32 %102 to float - %3010 = bitcast i32 %102 to float - %3011 = fmul float %3009, %3010 - %3012 = fadd float %3011, 0.000000e+00 - %3013 = bitcast i32 %131 to float - %3014 = bitcast i32 %131 to float - %3015 = fmul float %3013, %3014 - %3016 = fadd float %3012, %3015 - %3017 = call float @llvm.sqrt.f32.153(float %3016) - %3018 = bitcast i32 %102 to float - %3019 = fcmp olt float %3018, 0.000000e+00 - %3020 = sext i1 %3019 to i32 - %3021 = bitcast i32 %102 to float - %3022 = fcmp ogt float %3021, 0.000000e+00 - %3023 = zext i1 %3022 to i32 - %3024 = add nsw i32 %3020, %3023 - %3025 = sitofp i32 %3024 to float - %3026 = fneg float %3025 - %3027 = fmul float %3017, %3026 - %3028 = fmul float %3027, 0.000000e+00 - %3029 = bitcast i32 %131 to float - %3030 = fadd float %3029, %3028 - %3031 = fmul float %3008, %3030 - %3032 = fadd float %2986, %3031 - %3033 = call float @llvm.sqrt.f32.154(float %3032) - %3034 = fadd float %3033, 0.000000e+00 - %3035 = fdiv float %2942, %3034 - %3036 = fmul float %2921, %3035 - %3037 = fneg float %3036 - %3038 = fmul float %3037, %2797 - %3039 = fadd float %3038, 0.000000e+00 - %3040 = bitcast i32 %102 to float - %3041 = bitcast i32 %102 to float - %3042 = fmul float %3040, %3041 - %3043 = fadd float %3042, 0.000000e+00 - %3044 = bitcast i32 %131 to float - %3045 = bitcast i32 %131 to float - %3046 = fmul float %3044, %3045 - %3047 = fadd float %3043, %3046 - %3048 = call float @llvm.sqrt.f32.155(float %3047) - %3049 = bitcast i32 %102 to float - %3050 = fcmp olt float %3049, 0.000000e+00 - %3051 = sext i1 %3050 to i32 - %3052 = bitcast i32 %102 to float - %3053 = fcmp ogt float %3052, 0.000000e+00 - %3054 = zext i1 %3053 to i32 - %3055 = add nsw i32 %3051, %3054 - %3056 = sitofp i32 %3055 to float - %3057 = fneg float %3056 - %3058 = fmul float %3048, %3057 - %3059 = fmul float %3058, 0.000000e+00 - %3060 = bitcast i32 %131 to float - %3061 = fadd float %3060, %3059 - %3062 = bitcast i32 %102 to float - %3063 = bitcast i32 %102 to float - %3064 = fmul float %3062, %3063 - %3065 = fadd float %3064, 0.000000e+00 - %3066 = bitcast i32 %131 to float - %3067 = bitcast i32 %131 to float - %3068 = fmul float %3066, %3067 - %3069 = fadd float %3065, %3068 - %3070 = call float @llvm.sqrt.f32.156(float %3069) - %3071 = bitcast i32 %102 to float - %3072 = fcmp olt float %3071, 0.000000e+00 - %3073 = sext i1 %3072 to i32 - %3074 = bitcast i32 %102 to float - %3075 = fcmp ogt float %3074, 0.000000e+00 - %3076 = zext i1 %3075 to i32 - %3077 = add nsw i32 %3073, %3076 - %3078 = sitofp i32 %3077 to float - %3079 = fneg float %3078 - %3080 = fmul float %3070, %3079 - %3081 = bitcast i32 %102 to float - %3082 = fadd float %3081, %3080 - %3083 = bitcast i32 %102 to float - %3084 = bitcast i32 %102 to float - %3085 = fmul float %3083, %3084 - %3086 = fadd float %3085, 0.000000e+00 - %3087 = bitcast i32 %131 to float - %3088 = bitcast i32 %131 to float - %3089 = fmul float %3087, %3088 - %3090 = fadd float %3086, %3089 - %3091 = call float @llvm.sqrt.f32.157(float %3090) - %3092 = bitcast i32 %102 to float - %3093 = fcmp olt float %3092, 0.000000e+00 - %3094 = sext i1 %3093 to i32 - %3095 = bitcast i32 %102 to float - %3096 = fcmp ogt float %3095, 0.000000e+00 - %3097 = zext i1 %3096 to i32 - %3098 = add nsw i32 %3094, %3097 - %3099 = sitofp i32 %3098 to float - %3100 = fneg float %3099 - %3101 = fmul float %3091, %3100 - %3102 = bitcast i32 %102 to float - %3103 = fadd float %3102, %3101 - %3104 = fmul float %3082, %3103 - %3105 = fadd float %3104, 0.000000e+00 - %3106 = bitcast i32 %102 to float - %3107 = bitcast i32 %102 to float - %3108 = fmul float %3106, %3107 - %3109 = fadd float %3108, 0.000000e+00 - %3110 = bitcast i32 %131 to float - %3111 = bitcast i32 %131 to float - %3112 = fmul float %3110, %3111 - %3113 = fadd float %3109, %3112 - %3114 = call float @llvm.sqrt.f32.158(float %3113) - %3115 = bitcast i32 %102 to float - %3116 = fcmp olt float %3115, 0.000000e+00 - %3117 = sext i1 %3116 to i32 - %3118 = bitcast i32 %102 to float - %3119 = fcmp ogt float %3118, 0.000000e+00 - %3120 = zext i1 %3119 to i32 - %3121 = add nsw i32 %3117, %3120 - %3122 = sitofp i32 %3121 to float - %3123 = fneg float %3122 - %3124 = fmul float %3114, %3123 - %3125 = fmul float %3124, 0.000000e+00 - %3126 = bitcast i32 %131 to float - %3127 = fadd float %3126, %3125 - %3128 = bitcast i32 %102 to float - %3129 = bitcast i32 %102 to float - %3130 = fmul float %3128, %3129 - %3131 = fadd float %3130, 0.000000e+00 - %3132 = bitcast i32 %131 to float - %3133 = bitcast i32 %131 to float - %3134 = fmul float %3132, %3133 - %3135 = fadd float %3131, %3134 - %3136 = call float @llvm.sqrt.f32.159(float %3135) - %3137 = bitcast i32 %102 to float - %3138 = fcmp olt float %3137, 0.000000e+00 - %3139 = sext i1 %3138 to i32 - %3140 = bitcast i32 %102 to float - %3141 = fcmp ogt float %3140, 0.000000e+00 - %3142 = zext i1 %3141 to i32 - %3143 = add nsw i32 %3139, %3142 - %3144 = sitofp i32 %3143 to float - %3145 = fneg float %3144 - %3146 = fmul float %3136, %3145 - %3147 = fmul float %3146, 0.000000e+00 - %3148 = bitcast i32 %131 to float - %3149 = fadd float %3148, %3147 - %3150 = fmul float %3127, %3149 - %3151 = fadd float %3105, %3150 - %3152 = call float @llvm.sqrt.f32.160(float %3151) - %3153 = fadd float %3152, 0.000000e+00 - %3154 = fdiv float %3061, %3153 - %3155 = fmul float %3154, 2.000000e+00 - %3156 = bitcast i32 %102 to float - %3157 = bitcast i32 %102 to float - %3158 = fmul float %3156, %3157 - %3159 = fadd float %3158, 0.000000e+00 - %3160 = bitcast i32 %131 to float - %3161 = bitcast i32 %131 to float - %3162 = fmul float %3160, %3161 - %3163 = fadd float %3159, %3162 - %3164 = call float @llvm.sqrt.f32.161(float %3163) - %3165 = bitcast i32 %102 to float - %3166 = fcmp olt float %3165, 0.000000e+00 - %3167 = sext i1 %3166 to i32 - %3168 = bitcast i32 %102 to float - %3169 = fcmp ogt float %3168, 0.000000e+00 - %3170 = zext i1 %3169 to i32 - %3171 = add nsw i32 %3167, %3170 - %3172 = sitofp i32 %3171 to float - %3173 = fneg float %3172 - %3174 = fmul float %3164, %3173 - %3175 = fmul float %3174, 0.000000e+00 - %3176 = bitcast i32 %131 to float - %3177 = fadd float %3176, %3175 - %3178 = bitcast i32 %102 to float - %3179 = bitcast i32 %102 to float - %3180 = fmul float %3178, %3179 - %3181 = fadd float %3180, 0.000000e+00 - %3182 = bitcast i32 %131 to float - %3183 = bitcast i32 %131 to float - %3184 = fmul float %3182, %3183 - %3185 = fadd float %3181, %3184 - %3186 = call float @llvm.sqrt.f32.162(float %3185) - %3187 = bitcast i32 %102 to float - %3188 = fcmp olt float %3187, 0.000000e+00 - %3189 = sext i1 %3188 to i32 - %3190 = bitcast i32 %102 to float - %3191 = fcmp ogt float %3190, 0.000000e+00 - %3192 = zext i1 %3191 to i32 - %3193 = add nsw i32 %3189, %3192 - %3194 = sitofp i32 %3193 to float - %3195 = fneg float %3194 - %3196 = fmul float %3186, %3195 - %3197 = bitcast i32 %102 to float - %3198 = fadd float %3197, %3196 - %3199 = bitcast i32 %102 to float - %3200 = bitcast i32 %102 to float - %3201 = fmul float %3199, %3200 - %3202 = fadd float %3201, 0.000000e+00 - %3203 = bitcast i32 %131 to float - %3204 = bitcast i32 %131 to float - %3205 = fmul float %3203, %3204 - %3206 = fadd float %3202, %3205 - %3207 = call float @llvm.sqrt.f32.163(float %3206) - %3208 = bitcast i32 %102 to float - %3209 = fcmp olt float %3208, 0.000000e+00 - %3210 = sext i1 %3209 to i32 - %3211 = bitcast i32 %102 to float - %3212 = fcmp ogt float %3211, 0.000000e+00 - %3213 = zext i1 %3212 to i32 - %3214 = add nsw i32 %3210, %3213 - %3215 = sitofp i32 %3214 to float - %3216 = fneg float %3215 - %3217 = fmul float %3207, %3216 - %3218 = bitcast i32 %102 to float - %3219 = fadd float %3218, %3217 - %3220 = fmul float %3198, %3219 - %3221 = fadd float %3220, 0.000000e+00 - %3222 = bitcast i32 %102 to float - %3223 = bitcast i32 %102 to float - %3224 = fmul float %3222, %3223 - %3225 = fadd float %3224, 0.000000e+00 - %3226 = bitcast i32 %131 to float - %3227 = bitcast i32 %131 to float - %3228 = fmul float %3226, %3227 - %3229 = fadd float %3225, %3228 - %3230 = call float @llvm.sqrt.f32.164(float %3229) - %3231 = bitcast i32 %102 to float - %3232 = fcmp olt float %3231, 0.000000e+00 - %3233 = sext i1 %3232 to i32 - %3234 = bitcast i32 %102 to float - %3235 = fcmp ogt float %3234, 0.000000e+00 - %3236 = zext i1 %3235 to i32 - %3237 = add nsw i32 %3233, %3236 - %3238 = sitofp i32 %3237 to float - %3239 = fneg float %3238 - %3240 = fmul float %3230, %3239 - %3241 = fmul float %3240, 0.000000e+00 - %3242 = bitcast i32 %131 to float - %3243 = fadd float %3242, %3241 - %3244 = bitcast i32 %102 to float - %3245 = bitcast i32 %102 to float - %3246 = fmul float %3244, %3245 - %3247 = fadd float %3246, 0.000000e+00 - %3248 = bitcast i32 %131 to float - %3249 = bitcast i32 %131 to float - %3250 = fmul float %3248, %3249 - %3251 = fadd float %3247, %3250 - %3252 = call float @llvm.sqrt.f32.165(float %3251) - %3253 = bitcast i32 %102 to float - %3254 = fcmp olt float %3253, 0.000000e+00 - %3255 = sext i1 %3254 to i32 - %3256 = bitcast i32 %102 to float - %3257 = fcmp ogt float %3256, 0.000000e+00 - %3258 = zext i1 %3257 to i32 - %3259 = add nsw i32 %3255, %3258 - %3260 = sitofp i32 %3259 to float - %3261 = fneg float %3260 - %3262 = fmul float %3252, %3261 - %3263 = fmul float %3262, 0.000000e+00 - %3264 = bitcast i32 %131 to float - %3265 = fadd float %3264, %3263 - %3266 = fmul float %3243, %3265 - %3267 = fadd float %3221, %3266 - %3268 = call float @llvm.sqrt.f32.166(float %3267) - %3269 = fadd float %3268, 0.000000e+00 - %3270 = fdiv float %3177, %3269 - %3271 = fmul float %3155, %3270 - %3272 = fsub float 1.000000e+00, %3271 - %3273 = getelementptr float, float* %0, i32 0 - %3274 = getelementptr inbounds float, float* %3273, i64 2 - %3275 = load float, float* %3274, align 4 - %3276 = fmul float %3272, %3275 - %3277 = fadd float %3039, %3276 - %3278 = insertelement <4 x float> zeroinitializer, float %3277, i32 0 - %3279 = insertelement <4 x float> %3278, float 0.000000e+00, i32 1 - %3280 = insertelement <4 x float> %3279, float 0.000000e+00, i32 2 - %3281 = insertelement <4 x float> %3280, float 0.000000e+00, i32 3 - %3282 = extractelement <4 x float> %3281, i32 0 - %3283 = getelementptr float, float* %2, i32 0 - %3284 = getelementptr inbounds float, float* %3283, i64 2 - store float %3282, float* %3284, align 4 - %3285 = extractelement <4 x float> %3281, i32 1 - %3286 = getelementptr float, float* %2, i32 0 - %3287 = getelementptr inbounds float, float* %3286, i64 3 - store float %3285, float* %3287, align 4 - %3288 = bitcast i32 %102 to float - %3289 = bitcast i32 %102 to float - %3290 = fmul float %3288, %3289 - %3291 = fadd float %3290, 0.000000e+00 - %3292 = bitcast i32 %131 to float - %3293 = bitcast i32 %131 to float - %3294 = fmul float %3292, %3293 - %3295 = fadd float %3291, %3294 - %3296 = call float @llvm.sqrt.f32.167(float %3295) - %3297 = bitcast i32 %102 to float - %3298 = fcmp olt float %3297, 0.000000e+00 - %3299 = sext i1 %3298 to i32 - %3300 = bitcast i32 %102 to float - %3301 = fcmp ogt float %3300, 0.000000e+00 - %3302 = zext i1 %3301 to i32 - %3303 = add nsw i32 %3299, %3302 - %3304 = sitofp i32 %3303 to float - %3305 = fneg float %3304 - %3306 = fmul float %3296, %3305 - %3307 = fmul float %3306, 0.000000e+00 - %3308 = bitcast i32 %131 to float - %3309 = fadd float %3308, %3307 - %3310 = bitcast i32 %102 to float - %3311 = bitcast i32 %102 to float - %3312 = fmul float %3310, %3311 - %3313 = fadd float %3312, 0.000000e+00 - %3314 = bitcast i32 %131 to float - %3315 = bitcast i32 %131 to float - %3316 = fmul float %3314, %3315 - %3317 = fadd float %3313, %3316 - %3318 = call float @llvm.sqrt.f32.168(float %3317) - %3319 = bitcast i32 %102 to float - %3320 = fcmp olt float %3319, 0.000000e+00 - %3321 = sext i1 %3320 to i32 - %3322 = bitcast i32 %102 to float - %3323 = fcmp ogt float %3322, 0.000000e+00 - %3324 = zext i1 %3323 to i32 - %3325 = add nsw i32 %3321, %3324 - %3326 = sitofp i32 %3325 to float - %3327 = fneg float %3326 - %3328 = fmul float %3318, %3327 - %3329 = bitcast i32 %102 to float - %3330 = fadd float %3329, %3328 - %3331 = bitcast i32 %102 to float - %3332 = bitcast i32 %102 to float - %3333 = fmul float %3331, %3332 - %3334 = fadd float %3333, 0.000000e+00 - %3335 = bitcast i32 %131 to float - %3336 = bitcast i32 %131 to float - %3337 = fmul float %3335, %3336 - %3338 = fadd float %3334, %3337 - %3339 = call float @llvm.sqrt.f32.169(float %3338) - %3340 = bitcast i32 %102 to float - %3341 = fcmp olt float %3340, 0.000000e+00 - %3342 = sext i1 %3341 to i32 - %3343 = bitcast i32 %102 to float - %3344 = fcmp ogt float %3343, 0.000000e+00 - %3345 = zext i1 %3344 to i32 - %3346 = add nsw i32 %3342, %3345 - %3347 = sitofp i32 %3346 to float - %3348 = fneg float %3347 - %3349 = fmul float %3339, %3348 - %3350 = bitcast i32 %102 to float - %3351 = fadd float %3350, %3349 - %3352 = fmul float %3330, %3351 - %3353 = fadd float %3352, 0.000000e+00 - %3354 = bitcast i32 %102 to float - %3355 = bitcast i32 %102 to float - %3356 = fmul float %3354, %3355 - %3357 = fadd float %3356, 0.000000e+00 - %3358 = bitcast i32 %131 to float - %3359 = bitcast i32 %131 to float - %3360 = fmul float %3358, %3359 - %3361 = fadd float %3357, %3360 - %3362 = call float @llvm.sqrt.f32.170(float %3361) - %3363 = bitcast i32 %102 to float - %3364 = fcmp olt float %3363, 0.000000e+00 - %3365 = sext i1 %3364 to i32 - %3366 = bitcast i32 %102 to float - %3367 = fcmp ogt float %3366, 0.000000e+00 - %3368 = zext i1 %3367 to i32 - %3369 = add nsw i32 %3365, %3368 - %3370 = sitofp i32 %3369 to float - %3371 = fneg float %3370 - %3372 = fmul float %3362, %3371 - %3373 = fmul float %3372, 0.000000e+00 - %3374 = bitcast i32 %131 to float - %3375 = fadd float %3374, %3373 - %3376 = bitcast i32 %102 to float - %3377 = bitcast i32 %102 to float - %3378 = fmul float %3376, %3377 - %3379 = fadd float %3378, 0.000000e+00 - %3380 = bitcast i32 %131 to float - %3381 = bitcast i32 %131 to float - %3382 = fmul float %3380, %3381 - %3383 = fadd float %3379, %3382 - %3384 = call float @llvm.sqrt.f32.171(float %3383) - %3385 = bitcast i32 %102 to float - %3386 = fcmp olt float %3385, 0.000000e+00 - %3387 = sext i1 %3386 to i32 - %3388 = bitcast i32 %102 to float - %3389 = fcmp ogt float %3388, 0.000000e+00 - %3390 = zext i1 %3389 to i32 - %3391 = add nsw i32 %3387, %3390 - %3392 = sitofp i32 %3391 to float - %3393 = fneg float %3392 - %3394 = fmul float %3384, %3393 - %3395 = fmul float %3394, 0.000000e+00 - %3396 = bitcast i32 %131 to float - %3397 = fadd float %3396, %3395 - %3398 = fmul float %3375, %3397 - %3399 = fadd float %3353, %3398 - %3400 = call float @llvm.sqrt.f32.172(float %3399) - %3401 = fadd float %3400, 0.000000e+00 - %3402 = fdiv float %3309, %3401 - %3403 = fmul float %3402, 2.000000e+00 - %3404 = bitcast i32 %102 to float - %3405 = bitcast i32 %102 to float - %3406 = fmul float %3404, %3405 - %3407 = fadd float %3406, 0.000000e+00 - %3408 = bitcast i32 %131 to float - %3409 = bitcast i32 %131 to float - %3410 = fmul float %3408, %3409 - %3411 = fadd float %3407, %3410 - %3412 = call float @llvm.sqrt.f32.173(float %3411) - %3413 = bitcast i32 %102 to float - %3414 = fcmp olt float %3413, 0.000000e+00 - %3415 = sext i1 %3414 to i32 - %3416 = bitcast i32 %102 to float - %3417 = fcmp ogt float %3416, 0.000000e+00 - %3418 = zext i1 %3417 to i32 - %3419 = add nsw i32 %3415, %3418 - %3420 = sitofp i32 %3419 to float - %3421 = fneg float %3420 - %3422 = fmul float %3412, %3421 - %3423 = bitcast i32 %102 to float - %3424 = fadd float %3423, %3422 - %3425 = bitcast i32 %102 to float - %3426 = bitcast i32 %102 to float - %3427 = fmul float %3425, %3426 - %3428 = fadd float %3427, 0.000000e+00 - %3429 = bitcast i32 %131 to float - %3430 = bitcast i32 %131 to float - %3431 = fmul float %3429, %3430 - %3432 = fadd float %3428, %3431 - %3433 = call float @llvm.sqrt.f32.174(float %3432) - %3434 = bitcast i32 %102 to float - %3435 = fcmp olt float %3434, 0.000000e+00 - %3436 = sext i1 %3435 to i32 - %3437 = bitcast i32 %102 to float - %3438 = fcmp ogt float %3437, 0.000000e+00 - %3439 = zext i1 %3438 to i32 - %3440 = add nsw i32 %3436, %3439 - %3441 = sitofp i32 %3440 to float - %3442 = fneg float %3441 - %3443 = fmul float %3433, %3442 - %3444 = bitcast i32 %102 to float - %3445 = fadd float %3444, %3443 - %3446 = bitcast i32 %102 to float - %3447 = bitcast i32 %102 to float - %3448 = fmul float %3446, %3447 - %3449 = fadd float %3448, 0.000000e+00 - %3450 = bitcast i32 %131 to float - %3451 = bitcast i32 %131 to float - %3452 = fmul float %3450, %3451 - %3453 = fadd float %3449, %3452 - %3454 = call float @llvm.sqrt.f32.175(float %3453) - %3455 = bitcast i32 %102 to float - %3456 = fcmp olt float %3455, 0.000000e+00 - %3457 = sext i1 %3456 to i32 - %3458 = bitcast i32 %102 to float - %3459 = fcmp ogt float %3458, 0.000000e+00 - %3460 = zext i1 %3459 to i32 - %3461 = add nsw i32 %3457, %3460 - %3462 = sitofp i32 %3461 to float - %3463 = fneg float %3462 - %3464 = fmul float %3454, %3463 - %3465 = bitcast i32 %102 to float - %3466 = fadd float %3465, %3464 - %3467 = fmul float %3445, %3466 - %3468 = fadd float %3467, 0.000000e+00 - %3469 = bitcast i32 %102 to float - %3470 = bitcast i32 %102 to float - %3471 = fmul float %3469, %3470 - %3472 = fadd float %3471, 0.000000e+00 - %3473 = bitcast i32 %131 to float - %3474 = bitcast i32 %131 to float - %3475 = fmul float %3473, %3474 - %3476 = fadd float %3472, %3475 - %3477 = call float @llvm.sqrt.f32.176(float %3476) - %3478 = bitcast i32 %102 to float - %3479 = fcmp olt float %3478, 0.000000e+00 - %3480 = sext i1 %3479 to i32 - %3481 = bitcast i32 %102 to float - %3482 = fcmp ogt float %3481, 0.000000e+00 - %3483 = zext i1 %3482 to i32 - %3484 = add nsw i32 %3480, %3483 - %3485 = sitofp i32 %3484 to float - %3486 = fneg float %3485 - %3487 = fmul float %3477, %3486 - %3488 = fmul float %3487, 0.000000e+00 - %3489 = bitcast i32 %131 to float - %3490 = fadd float %3489, %3488 - %3491 = bitcast i32 %102 to float - %3492 = bitcast i32 %102 to float - %3493 = fmul float %3491, %3492 - %3494 = fadd float %3493, 0.000000e+00 - %3495 = bitcast i32 %131 to float - %3496 = bitcast i32 %131 to float - %3497 = fmul float %3495, %3496 - %3498 = fadd float %3494, %3497 - %3499 = call float @llvm.sqrt.f32.177(float %3498) - %3500 = bitcast i32 %102 to float - %3501 = fcmp olt float %3500, 0.000000e+00 - %3502 = sext i1 %3501 to i32 - %3503 = bitcast i32 %102 to float - %3504 = fcmp ogt float %3503, 0.000000e+00 - %3505 = zext i1 %3504 to i32 - %3506 = add nsw i32 %3502, %3505 - %3507 = sitofp i32 %3506 to float - %3508 = fneg float %3507 - %3509 = fmul float %3499, %3508 - %3510 = fmul float %3509, 0.000000e+00 - %3511 = bitcast i32 %131 to float - %3512 = fadd float %3511, %3510 - %3513 = fmul float %3490, %3512 - %3514 = fadd float %3468, %3513 - %3515 = call float @llvm.sqrt.f32.178(float %3514) - %3516 = fadd float %3515, 0.000000e+00 - %3517 = fdiv float %3424, %3516 - %3518 = fmul float %3403, %3517 - %3519 = fneg float %3518 - %3520 = insertelement <4 x float> zeroinitializer, float %3519, i32 0 - %3521 = insertelement <4 x float> %3520, float 0.000000e+00, i32 1 - %3522 = insertelement <4 x float> %3521, float 0.000000e+00, i32 2 - %3523 = insertelement <4 x float> %3522, float 0.000000e+00, i32 3 - %3524 = getelementptr float, float* %0, i32 0 - %3525 = getelementptr inbounds float, float* %3524, i64 1 - %3526 = load float, float* %3525, align 4 - %3527 = insertelement <4 x float> zeroinitializer, float %3526, i32 0 - %3528 = insertelement <4 x float> %3527, float 0.000000e+00, i32 1 - %3529 = insertelement <4 x float> %3528, float 0.000000e+00, i32 2 - %3530 = insertelement <4 x float> %3529, float 0.000000e+00, i32 3 - %3531 = call <4 x float> @llvm.fma.f32.179(<4 x float> %3523, <4 x float> %3530, <4 x float> zeroinitializer) - %3532 = extractelement <4 x float> %3531, i32 0 - %3533 = getelementptr float, float* %2, i32 0 - %3534 = getelementptr inbounds float, float* %3533, i64 3 - store float %3532, float* %3534, align 4 - %3535 = bitcast i32 %102 to float - %3536 = bitcast i32 %102 to float - %3537 = fmul float %3535, %3536 - %3538 = fadd float %3537, 0.000000e+00 - %3539 = bitcast i32 %131 to float - %3540 = bitcast i32 %131 to float - %3541 = fmul float %3539, %3540 - %3542 = fadd float %3538, %3541 - %3543 = call float @llvm.sqrt.f32.180(float %3542) - %3544 = bitcast i32 %102 to float - %3545 = fcmp olt float %3544, 0.000000e+00 - %3546 = sext i1 %3545 to i32 - %3547 = bitcast i32 %102 to float - %3548 = fcmp ogt float %3547, 0.000000e+00 - %3549 = zext i1 %3548 to i32 - %3550 = add nsw i32 %3546, %3549 - %3551 = sitofp i32 %3550 to float - %3552 = fneg float %3551 - %3553 = fmul float %3543, %3552 - %3554 = fmul float %3553, 0.000000e+00 - %3555 = bitcast i32 %131 to float - %3556 = fadd float %3555, %3554 - %3557 = bitcast i32 %102 to float - %3558 = bitcast i32 %102 to float - %3559 = fmul float %3557, %3558 - %3560 = fadd float %3559, 0.000000e+00 - %3561 = bitcast i32 %131 to float - %3562 = bitcast i32 %131 to float - %3563 = fmul float %3561, %3562 - %3564 = fadd float %3560, %3563 - %3565 = call float @llvm.sqrt.f32.181(float %3564) - %3566 = bitcast i32 %102 to float - %3567 = fcmp olt float %3566, 0.000000e+00 - %3568 = sext i1 %3567 to i32 - %3569 = bitcast i32 %102 to float - %3570 = fcmp ogt float %3569, 0.000000e+00 - %3571 = zext i1 %3570 to i32 - %3572 = add nsw i32 %3568, %3571 - %3573 = sitofp i32 %3572 to float - %3574 = fneg float %3573 - %3575 = fmul float %3565, %3574 - %3576 = bitcast i32 %102 to float - %3577 = fadd float %3576, %3575 - %3578 = bitcast i32 %102 to float - %3579 = bitcast i32 %102 to float - %3580 = fmul float %3578, %3579 - %3581 = fadd float %3580, 0.000000e+00 - %3582 = bitcast i32 %131 to float - %3583 = bitcast i32 %131 to float - %3584 = fmul float %3582, %3583 - %3585 = fadd float %3581, %3584 - %3586 = call float @llvm.sqrt.f32.182(float %3585) - %3587 = bitcast i32 %102 to float - %3588 = fcmp olt float %3587, 0.000000e+00 - %3589 = sext i1 %3588 to i32 - %3590 = bitcast i32 %102 to float - %3591 = fcmp ogt float %3590, 0.000000e+00 - %3592 = zext i1 %3591 to i32 - %3593 = add nsw i32 %3589, %3592 - %3594 = sitofp i32 %3593 to float - %3595 = fneg float %3594 - %3596 = fmul float %3586, %3595 - %3597 = bitcast i32 %102 to float - %3598 = fadd float %3597, %3596 - %3599 = fmul float %3577, %3598 - %3600 = fadd float %3599, 0.000000e+00 - %3601 = bitcast i32 %102 to float - %3602 = bitcast i32 %102 to float - %3603 = fmul float %3601, %3602 - %3604 = fadd float %3603, 0.000000e+00 - %3605 = bitcast i32 %131 to float - %3606 = bitcast i32 %131 to float - %3607 = fmul float %3605, %3606 - %3608 = fadd float %3604, %3607 - %3609 = call float @llvm.sqrt.f32.183(float %3608) - %3610 = bitcast i32 %102 to float - %3611 = fcmp olt float %3610, 0.000000e+00 - %3612 = sext i1 %3611 to i32 - %3613 = bitcast i32 %102 to float - %3614 = fcmp ogt float %3613, 0.000000e+00 - %3615 = zext i1 %3614 to i32 - %3616 = add nsw i32 %3612, %3615 - %3617 = sitofp i32 %3616 to float - %3618 = fneg float %3617 - %3619 = fmul float %3609, %3618 - %3620 = fmul float %3619, 0.000000e+00 - %3621 = bitcast i32 %131 to float - %3622 = fadd float %3621, %3620 - %3623 = bitcast i32 %102 to float - %3624 = bitcast i32 %102 to float - %3625 = fmul float %3623, %3624 - %3626 = fadd float %3625, 0.000000e+00 - %3627 = bitcast i32 %131 to float - %3628 = bitcast i32 %131 to float - %3629 = fmul float %3627, %3628 - %3630 = fadd float %3626, %3629 - %3631 = call float @llvm.sqrt.f32.184(float %3630) - %3632 = bitcast i32 %102 to float - %3633 = fcmp olt float %3632, 0.000000e+00 - %3634 = sext i1 %3633 to i32 - %3635 = bitcast i32 %102 to float - %3636 = fcmp ogt float %3635, 0.000000e+00 - %3637 = zext i1 %3636 to i32 - %3638 = add nsw i32 %3634, %3637 - %3639 = sitofp i32 %3638 to float - %3640 = fneg float %3639 - %3641 = fmul float %3631, %3640 - %3642 = fmul float %3641, 0.000000e+00 - %3643 = bitcast i32 %131 to float - %3644 = fadd float %3643, %3642 - %3645 = fmul float %3622, %3644 - %3646 = fadd float %3600, %3645 - %3647 = call float @llvm.sqrt.f32.185(float %3646) - %3648 = fadd float %3647, 0.000000e+00 - %3649 = fdiv float %3556, %3648 - %3650 = fmul float %3649, 2.000000e+00 - %3651 = bitcast i32 %102 to float - %3652 = bitcast i32 %102 to float - %3653 = fmul float %3651, %3652 - %3654 = fadd float %3653, 0.000000e+00 - %3655 = bitcast i32 %131 to float - %3656 = bitcast i32 %131 to float - %3657 = fmul float %3655, %3656 - %3658 = fadd float %3654, %3657 - %3659 = call float @llvm.sqrt.f32.186(float %3658) - %3660 = bitcast i32 %102 to float - %3661 = fcmp olt float %3660, 0.000000e+00 - %3662 = sext i1 %3661 to i32 - %3663 = bitcast i32 %102 to float - %3664 = fcmp ogt float %3663, 0.000000e+00 - %3665 = zext i1 %3664 to i32 - %3666 = add nsw i32 %3662, %3665 - %3667 = sitofp i32 %3666 to float - %3668 = fneg float %3667 - %3669 = fmul float %3659, %3668 - %3670 = bitcast i32 %102 to float - %3671 = fadd float %3670, %3669 - %3672 = bitcast i32 %102 to float - %3673 = bitcast i32 %102 to float - %3674 = fmul float %3672, %3673 - %3675 = fadd float %3674, 0.000000e+00 - %3676 = bitcast i32 %131 to float - %3677 = bitcast i32 %131 to float - %3678 = fmul float %3676, %3677 - %3679 = fadd float %3675, %3678 - %3680 = call float @llvm.sqrt.f32.187(float %3679) - %3681 = bitcast i32 %102 to float - %3682 = fcmp olt float %3681, 0.000000e+00 - %3683 = sext i1 %3682 to i32 - %3684 = bitcast i32 %102 to float - %3685 = fcmp ogt float %3684, 0.000000e+00 - %3686 = zext i1 %3685 to i32 - %3687 = add nsw i32 %3683, %3686 - %3688 = sitofp i32 %3687 to float - %3689 = fneg float %3688 - %3690 = fmul float %3680, %3689 - %3691 = bitcast i32 %102 to float - %3692 = fadd float %3691, %3690 - %3693 = bitcast i32 %102 to float - %3694 = bitcast i32 %102 to float - %3695 = fmul float %3693, %3694 - %3696 = fadd float %3695, 0.000000e+00 - %3697 = bitcast i32 %131 to float - %3698 = bitcast i32 %131 to float - %3699 = fmul float %3697, %3698 - %3700 = fadd float %3696, %3699 - %3701 = call float @llvm.sqrt.f32.188(float %3700) - %3702 = bitcast i32 %102 to float - %3703 = fcmp olt float %3702, 0.000000e+00 - %3704 = sext i1 %3703 to i32 - %3705 = bitcast i32 %102 to float - %3706 = fcmp ogt float %3705, 0.000000e+00 - %3707 = zext i1 %3706 to i32 - %3708 = add nsw i32 %3704, %3707 - %3709 = sitofp i32 %3708 to float - %3710 = fneg float %3709 - %3711 = fmul float %3701, %3710 - %3712 = bitcast i32 %102 to float - %3713 = fadd float %3712, %3711 - %3714 = fmul float %3692, %3713 - %3715 = fadd float %3714, 0.000000e+00 - %3716 = bitcast i32 %102 to float - %3717 = bitcast i32 %102 to float - %3718 = fmul float %3716, %3717 - %3719 = fadd float %3718, 0.000000e+00 - %3720 = bitcast i32 %131 to float - %3721 = bitcast i32 %131 to float - %3722 = fmul float %3720, %3721 - %3723 = fadd float %3719, %3722 - %3724 = call float @llvm.sqrt.f32.189(float %3723) - %3725 = bitcast i32 %102 to float - %3726 = fcmp olt float %3725, 0.000000e+00 - %3727 = sext i1 %3726 to i32 - %3728 = bitcast i32 %102 to float - %3729 = fcmp ogt float %3728, 0.000000e+00 - %3730 = zext i1 %3729 to i32 - %3731 = add nsw i32 %3727, %3730 - %3732 = sitofp i32 %3731 to float - %3733 = fneg float %3732 - %3734 = fmul float %3724, %3733 - %3735 = fmul float %3734, 0.000000e+00 - %3736 = bitcast i32 %131 to float - %3737 = fadd float %3736, %3735 - %3738 = bitcast i32 %102 to float - %3739 = bitcast i32 %102 to float - %3740 = fmul float %3738, %3739 - %3741 = fadd float %3740, 0.000000e+00 - %3742 = bitcast i32 %131 to float - %3743 = bitcast i32 %131 to float - %3744 = fmul float %3742, %3743 - %3745 = fadd float %3741, %3744 - %3746 = call float @llvm.sqrt.f32.190(float %3745) - %3747 = bitcast i32 %102 to float - %3748 = fcmp olt float %3747, 0.000000e+00 - %3749 = sext i1 %3748 to i32 - %3750 = bitcast i32 %102 to float - %3751 = fcmp ogt float %3750, 0.000000e+00 - %3752 = zext i1 %3751 to i32 - %3753 = add nsw i32 %3749, %3752 - %3754 = sitofp i32 %3753 to float - %3755 = fneg float %3754 - %3756 = fmul float %3746, %3755 - %3757 = fmul float %3756, 0.000000e+00 - %3758 = bitcast i32 %131 to float - %3759 = fadd float %3758, %3757 - %3760 = fmul float %3737, %3759 - %3761 = fadd float %3715, %3760 - %3762 = call float @llvm.sqrt.f32.191(float %3761) - %3763 = fadd float %3762, 0.000000e+00 - %3764 = fdiv float %3671, %3763 - %3765 = fmul float %3650, %3764 - %3766 = fneg float %3765 - %3767 = fmul float %3766, %3526 - %3768 = fadd float %3767, 0.000000e+00 - %3769 = bitcast i32 %102 to float - %3770 = bitcast i32 %102 to float - %3771 = fmul float %3769, %3770 - %3772 = fadd float %3771, 0.000000e+00 - %3773 = bitcast i32 %131 to float - %3774 = bitcast i32 %131 to float - %3775 = fmul float %3773, %3774 - %3776 = fadd float %3772, %3775 - %3777 = call float @llvm.sqrt.f32.192(float %3776) - %3778 = bitcast i32 %102 to float - %3779 = fcmp olt float %3778, 0.000000e+00 - %3780 = sext i1 %3779 to i32 - %3781 = bitcast i32 %102 to float - %3782 = fcmp ogt float %3781, 0.000000e+00 - %3783 = zext i1 %3782 to i32 - %3784 = add nsw i32 %3780, %3783 - %3785 = sitofp i32 %3784 to float - %3786 = fneg float %3785 - %3787 = fmul float %3777, %3786 - %3788 = fmul float %3787, 0.000000e+00 - %3789 = bitcast i32 %131 to float - %3790 = fadd float %3789, %3788 - %3791 = bitcast i32 %102 to float - %3792 = bitcast i32 %102 to float - %3793 = fmul float %3791, %3792 - %3794 = fadd float %3793, 0.000000e+00 - %3795 = bitcast i32 %131 to float - %3796 = bitcast i32 %131 to float - %3797 = fmul float %3795, %3796 - %3798 = fadd float %3794, %3797 - %3799 = call float @llvm.sqrt.f32.193(float %3798) - %3800 = bitcast i32 %102 to float - %3801 = fcmp olt float %3800, 0.000000e+00 - %3802 = sext i1 %3801 to i32 - %3803 = bitcast i32 %102 to float - %3804 = fcmp ogt float %3803, 0.000000e+00 - %3805 = zext i1 %3804 to i32 - %3806 = add nsw i32 %3802, %3805 - %3807 = sitofp i32 %3806 to float - %3808 = fneg float %3807 - %3809 = fmul float %3799, %3808 - %3810 = bitcast i32 %102 to float - %3811 = fadd float %3810, %3809 - %3812 = bitcast i32 %102 to float - %3813 = bitcast i32 %102 to float - %3814 = fmul float %3812, %3813 - %3815 = fadd float %3814, 0.000000e+00 - %3816 = bitcast i32 %131 to float - %3817 = bitcast i32 %131 to float - %3818 = fmul float %3816, %3817 - %3819 = fadd float %3815, %3818 - %3820 = call float @llvm.sqrt.f32.194(float %3819) - %3821 = bitcast i32 %102 to float - %3822 = fcmp olt float %3821, 0.000000e+00 - %3823 = sext i1 %3822 to i32 - %3824 = bitcast i32 %102 to float - %3825 = fcmp ogt float %3824, 0.000000e+00 - %3826 = zext i1 %3825 to i32 - %3827 = add nsw i32 %3823, %3826 - %3828 = sitofp i32 %3827 to float - %3829 = fneg float %3828 - %3830 = fmul float %3820, %3829 - %3831 = bitcast i32 %102 to float - %3832 = fadd float %3831, %3830 - %3833 = fmul float %3811, %3832 - %3834 = fadd float %3833, 0.000000e+00 - %3835 = bitcast i32 %102 to float - %3836 = bitcast i32 %102 to float - %3837 = fmul float %3835, %3836 - %3838 = fadd float %3837, 0.000000e+00 - %3839 = bitcast i32 %131 to float - %3840 = bitcast i32 %131 to float - %3841 = fmul float %3839, %3840 - %3842 = fadd float %3838, %3841 - %3843 = call float @llvm.sqrt.f32.195(float %3842) - %3844 = bitcast i32 %102 to float - %3845 = fcmp olt float %3844, 0.000000e+00 - %3846 = sext i1 %3845 to i32 - %3847 = bitcast i32 %102 to float - %3848 = fcmp ogt float %3847, 0.000000e+00 - %3849 = zext i1 %3848 to i32 - %3850 = add nsw i32 %3846, %3849 - %3851 = sitofp i32 %3850 to float - %3852 = fneg float %3851 - %3853 = fmul float %3843, %3852 - %3854 = fmul float %3853, 0.000000e+00 - %3855 = bitcast i32 %131 to float - %3856 = fadd float %3855, %3854 - %3857 = bitcast i32 %102 to float - %3858 = bitcast i32 %102 to float - %3859 = fmul float %3857, %3858 - %3860 = fadd float %3859, 0.000000e+00 - %3861 = bitcast i32 %131 to float - %3862 = bitcast i32 %131 to float - %3863 = fmul float %3861, %3862 - %3864 = fadd float %3860, %3863 - %3865 = call float @llvm.sqrt.f32.196(float %3864) - %3866 = bitcast i32 %102 to float - %3867 = fcmp olt float %3866, 0.000000e+00 - %3868 = sext i1 %3867 to i32 - %3869 = bitcast i32 %102 to float - %3870 = fcmp ogt float %3869, 0.000000e+00 - %3871 = zext i1 %3870 to i32 - %3872 = add nsw i32 %3868, %3871 - %3873 = sitofp i32 %3872 to float - %3874 = fneg float %3873 - %3875 = fmul float %3865, %3874 - %3876 = fmul float %3875, 0.000000e+00 - %3877 = bitcast i32 %131 to float - %3878 = fadd float %3877, %3876 - %3879 = fmul float %3856, %3878 - %3880 = fadd float %3834, %3879 - %3881 = call float @llvm.sqrt.f32.197(float %3880) - %3882 = fadd float %3881, 0.000000e+00 - %3883 = fdiv float %3790, %3882 - %3884 = fmul float %3883, 2.000000e+00 - %3885 = bitcast i32 %102 to float - %3886 = bitcast i32 %102 to float - %3887 = fmul float %3885, %3886 - %3888 = fadd float %3887, 0.000000e+00 - %3889 = bitcast i32 %131 to float - %3890 = bitcast i32 %131 to float - %3891 = fmul float %3889, %3890 - %3892 = fadd float %3888, %3891 - %3893 = call float @llvm.sqrt.f32.198(float %3892) - %3894 = bitcast i32 %102 to float - %3895 = fcmp olt float %3894, 0.000000e+00 - %3896 = sext i1 %3895 to i32 - %3897 = bitcast i32 %102 to float - %3898 = fcmp ogt float %3897, 0.000000e+00 - %3899 = zext i1 %3898 to i32 - %3900 = add nsw i32 %3896, %3899 - %3901 = sitofp i32 %3900 to float - %3902 = fneg float %3901 - %3903 = fmul float %3893, %3902 - %3904 = fmul float %3903, 0.000000e+00 - %3905 = bitcast i32 %131 to float - %3906 = fadd float %3905, %3904 - %3907 = bitcast i32 %102 to float - %3908 = bitcast i32 %102 to float - %3909 = fmul float %3907, %3908 - %3910 = fadd float %3909, 0.000000e+00 - %3911 = bitcast i32 %131 to float - %3912 = bitcast i32 %131 to float - %3913 = fmul float %3911, %3912 - %3914 = fadd float %3910, %3913 - %3915 = call float @llvm.sqrt.f32.199(float %3914) - %3916 = bitcast i32 %102 to float - %3917 = fcmp olt float %3916, 0.000000e+00 - %3918 = sext i1 %3917 to i32 - %3919 = bitcast i32 %102 to float - %3920 = fcmp ogt float %3919, 0.000000e+00 - %3921 = zext i1 %3920 to i32 - %3922 = add nsw i32 %3918, %3921 - %3923 = sitofp i32 %3922 to float - %3924 = fneg float %3923 - %3925 = fmul float %3915, %3924 - %3926 = bitcast i32 %102 to float - %3927 = fadd float %3926, %3925 - %3928 = bitcast i32 %102 to float - %3929 = bitcast i32 %102 to float - %3930 = fmul float %3928, %3929 - %3931 = fadd float %3930, 0.000000e+00 - %3932 = bitcast i32 %131 to float - %3933 = bitcast i32 %131 to float - %3934 = fmul float %3932, %3933 - %3935 = fadd float %3931, %3934 - %3936 = call float @llvm.sqrt.f32.200(float %3935) - %3937 = bitcast i32 %102 to float - %3938 = fcmp olt float %3937, 0.000000e+00 - %3939 = sext i1 %3938 to i32 - %3940 = bitcast i32 %102 to float - %3941 = fcmp ogt float %3940, 0.000000e+00 - %3942 = zext i1 %3941 to i32 - %3943 = add nsw i32 %3939, %3942 - %3944 = sitofp i32 %3943 to float - %3945 = fneg float %3944 - %3946 = fmul float %3936, %3945 - %3947 = bitcast i32 %102 to float - %3948 = fadd float %3947, %3946 - %3949 = fmul float %3927, %3948 - %3950 = fadd float %3949, 0.000000e+00 - %3951 = bitcast i32 %102 to float - %3952 = bitcast i32 %102 to float - %3953 = fmul float %3951, %3952 - %3954 = fadd float %3953, 0.000000e+00 - %3955 = bitcast i32 %131 to float - %3956 = bitcast i32 %131 to float - %3957 = fmul float %3955, %3956 - %3958 = fadd float %3954, %3957 - %3959 = call float @llvm.sqrt.f32.201(float %3958) - %3960 = bitcast i32 %102 to float - %3961 = fcmp olt float %3960, 0.000000e+00 - %3962 = sext i1 %3961 to i32 - %3963 = bitcast i32 %102 to float - %3964 = fcmp ogt float %3963, 0.000000e+00 - %3965 = zext i1 %3964 to i32 - %3966 = add nsw i32 %3962, %3965 - %3967 = sitofp i32 %3966 to float - %3968 = fneg float %3967 - %3969 = fmul float %3959, %3968 - %3970 = fmul float %3969, 0.000000e+00 - %3971 = bitcast i32 %131 to float - %3972 = fadd float %3971, %3970 - %3973 = bitcast i32 %102 to float - %3974 = bitcast i32 %102 to float - %3975 = fmul float %3973, %3974 - %3976 = fadd float %3975, 0.000000e+00 - %3977 = bitcast i32 %131 to float - %3978 = bitcast i32 %131 to float - %3979 = fmul float %3977, %3978 - %3980 = fadd float %3976, %3979 - %3981 = call float @llvm.sqrt.f32.202(float %3980) - %3982 = bitcast i32 %102 to float - %3983 = fcmp olt float %3982, 0.000000e+00 - %3984 = sext i1 %3983 to i32 - %3985 = bitcast i32 %102 to float - %3986 = fcmp ogt float %3985, 0.000000e+00 - %3987 = zext i1 %3986 to i32 - %3988 = add nsw i32 %3984, %3987 - %3989 = sitofp i32 %3988 to float - %3990 = fneg float %3989 - %3991 = fmul float %3981, %3990 - %3992 = fmul float %3991, 0.000000e+00 - %3993 = bitcast i32 %131 to float - %3994 = fadd float %3993, %3992 - %3995 = fmul float %3972, %3994 - %3996 = fadd float %3950, %3995 - %3997 = call float @llvm.sqrt.f32.203(float %3996) - %3998 = fadd float %3997, 0.000000e+00 - %3999 = fdiv float %3906, %3998 - %4000 = fmul float %3884, %3999 - %4001 = fsub float 1.000000e+00, %4000 - %4002 = getelementptr float, float* %0, i32 0 - %4003 = getelementptr inbounds float, float* %4002, i64 3 - %4004 = load float, float* %4003, align 4 - %4005 = fmul float %4001, %4004 - %4006 = fadd float %3768, %4005 - %4007 = insertelement <4 x float> zeroinitializer, float %4006, i32 0 - %4008 = insertelement <4 x float> %4007, float 0.000000e+00, i32 1 - %4009 = insertelement <4 x float> %4008, float 0.000000e+00, i32 2 - %4010 = insertelement <4 x float> %4009, float 0.000000e+00, i32 3 - %4011 = extractelement <4 x float> %4010, i32 0 - %4012 = getelementptr float, float* %2, i32 0 - %4013 = getelementptr inbounds float, float* %4012, i64 3 - store float %4011, float* %4013, align 4 - %4014 = getelementptr float, float* %1, i32 0 - %4015 = getelementptr inbounds float, float* %4014, i64 2 - %4016 = bitcast float* %4015 to i32* - %4017 = load i32, i32* %4016, align 4 - %4018 = bitcast i32 %4017 to float - %4019 = insertelement <4 x float> zeroinitializer, float %4018, i32 0 - %4020 = getelementptr float, float* %1, i32 0 - %4021 = getelementptr inbounds float, float* %4020, i64 1 - %4022 = bitcast float* %4021 to i32* - %4023 = load i32, i32* %4022, align 4 - %4024 = bitcast i32 %4023 to float - %4025 = insertelement <4 x float> %4019, float %4024, i32 1 - %4026 = insertelement <4 x float> %4025, float 0.000000e+00, i32 2 - %4027 = insertelement <4 x float> %4026, float 0.000000e+00, i32 3 - %4028 = extractelement <4 x float> %4027, i32 0 - %4029 = bitcast i32* %95 to float* - %4030 = getelementptr float, float* %1, i32 0 - %4031 = getelementptr inbounds float, float* %4030, i64 1 - %4032 = bitcast float* %4031 to i32* - %4033 = bitcast i32* %4032 to float* - store float %4028, float* %4033, align 4 - %4034 = extractelement <4 x float> %4027, i32 1 - %4035 = bitcast i32* %98 to float* - %4036 = getelementptr float, float* %1, i32 0 - %4037 = getelementptr inbounds float, float* %4036, i64 2 - %4038 = bitcast float* %4037 to i32* - %4039 = bitcast i32* %4038 to float* - store float %4034, float* %4039, align 4 - ret void -} - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { -.preheader13: - %3 = bitcast float* %2 to i8* - %4 = bitcast float* %0 to i8* - %5 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false, i1 true, i1 false) - %6 = call i8* @__memcpy_chk(i8* %3, i8* %4, i64 16, i64 %5) #8 - %7 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 - %8 = bitcast i8* %7 to float* - store float 1.000000e+00, float* %8, align 4 - %9 = getelementptr inbounds i8, i8* %7, i64 8 - %10 = getelementptr inbounds i8, i8* %7, i64 12 - %11 = bitcast i8* %10 to float* - store float 1.000000e+00, float* %11, align 4 - %12 = bitcast float* %1 to i8* - %13 = call i64 @llvm.objectsize.i64.p0i8(i8* %12, i1 false, i1 true, i1 false) - %14 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %15 = bitcast i8* %14 to float* - %16 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %17 = bitcast i8* %16 to float* - %18 = bitcast float* %2 to i32* - %19 = load i32, i32* %18, align 4 - %20 = bitcast i8* %14 to i32* - store i32 %19, i32* %20, align 4 - %21 = bitcast i8* %7 to i32* - %22 = load i32, i32* %21, align 4 - %23 = bitcast i8* %16 to i32* - store i32 %22, i32* %23, align 4 - %24 = getelementptr inbounds float, float* %2, i64 2 - %25 = bitcast float* %24 to i32* - %26 = load i32, i32* %25, align 4 - %27 = getelementptr inbounds i8, i8* %14, i64 4 - %28 = bitcast i8* %27 to i32* - store i32 %26, i32* %28, align 4 - %29 = bitcast i8* %9 to i32* - %30 = load i32, i32* %29, align 4 - %31 = getelementptr inbounds i8, i8* %16, i64 4 - %32 = bitcast i8* %31 to i32* - store i32 %30, i32* %32, align 4 - %33 = load float, float* %15, align 4 - %34 = call float @no_opt_sgn(float %33) - %35 = fneg float %34 - %36 = call float @no_opt_naive_norm(float* nonnull %15, i32 2) - %37 = fmul float %36, %35 - %38 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %39 = bitcast i8* %38 to float* - %40 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %41 = load float, float* %15, align 4 - %42 = load float, float* %17, align 4 - %43 = fmul float %37, %42 - %44 = fadd float %41, %43 - store float %44, float* %39, align 4 - %45 = bitcast i8* %27 to float* - %46 = load float, float* %45, align 4 - %47 = bitcast i8* %31 to float* - %48 = load float, float* %47, align 4 - %49 = fmul float %37, %48 - %50 = fadd float %46, %49 - %51 = getelementptr inbounds i8, i8* %38, i64 4 - %52 = bitcast i8* %51 to float* - store float %50, float* %52, align 4 - %53 = bitcast i8* %40 to float* - %54 = call float @no_opt_naive_norm(float* nonnull %39, i32 2) - %55 = fadd float %54, 0x3EE4F8B580000000 - %56 = load float, float* %39, align 4 - %57 = fdiv float %56, %55 - store float %57, float* %53, align 4 - %58 = load float, float* %52, align 4 - %59 = fdiv float %58, %55 - %60 = getelementptr inbounds i8, i8* %40, i64 4 - %61 = bitcast i8* %60 to float* - store float %59, float* %61, align 4 - %62 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 - %63 = bitcast i8* %62 to float* - %64 = load float, float* %53, align 4 - %65 = fmul float %64, 2.000000e+00 - %66 = fmul float %65, %64 - %67 = fsub float 1.000000e+00, %66 - store float %67, float* %63, align 4 - %68 = load float, float* %53, align 4 - %69 = fmul float %68, 2.000000e+00 - %70 = load float, float* %61, align 4 - %71 = fmul float %69, %70 - %72 = fsub float 0.000000e+00, %71 - %73 = getelementptr inbounds i8, i8* %62, i64 4 - %74 = bitcast i8* %73 to float* - store float %72, float* %74, align 4 - %75 = load float, float* %61, align 4 - %76 = fmul float %75, 2.000000e+00 - %77 = load float, float* %53, align 4 - %78 = fmul float %76, %77 - %79 = fsub float 0.000000e+00, %78 - %80 = getelementptr inbounds i8, i8* %62, i64 8 - %81 = bitcast i8* %80 to float* - store float %79, float* %81, align 4 - %82 = load float, float* %61, align 4 - %83 = fmul float %82, 2.000000e+00 - %84 = fmul float %83, %82 - %85 = fsub float 1.000000e+00, %84 - %86 = getelementptr inbounds i8, i8* %62, i64 12 - %87 = bitcast i8* %86 to float* - store float %85, float* %87, align 4 - %88 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 - %89 = bitcast i8* %88 to float* - %90 = bitcast i8* %62 to i32* - %91 = load i32, i32* %90, align 4 - %92 = bitcast i8* %88 to i32* - store i32 %91, i32* %92, align 4 - %93 = bitcast i8* %73 to i32* - %94 = load i32, i32* %93, align 4 - %95 = getelementptr inbounds i8, i8* %88, i64 4 - %96 = bitcast i8* %95 to i32* - store i32 %94, i32* %96, align 4 - %97 = bitcast i8* %80 to i32* - %98 = load i32, i32* %97, align 4 - %99 = getelementptr inbounds i8, i8* %88, i64 8 - %100 = bitcast i8* %99 to i32* - store i32 %98, i32* %100, align 4 - %101 = bitcast i8* %86 to i32* - %102 = load i32, i32* %101, align 4 - %103 = getelementptr inbounds i8, i8* %88, i64 12 - %104 = bitcast i8* %103 to i32* - store i32 %102, i32* %104, align 4 - %105 = call i8* @__memcpy_chk(i8* %12, i8* %88, i64 16, i64 %13) #8 - call void @no_opt_naive_fixed_matrix_multiply(float* %89, float* %0, float* %2) - call void @free(i8* %14) - call void @free(i8* %16) - call void @free(i8* %38) - call void @free(i8* %40) - call void @free(i8* %62) - call void @free(i8* %88) - call void @no_opt_naive_fixed_transpose(float* %1) - ret void -} - -; Function Attrs: nounwind -declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 - -; Function Attrs: nounwind readnone speculatable willreturn -declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 - -; Function Attrs: allocsize(0,1) -declare i8* @calloc(i64, i64) #5 - -declare void @free(i8*) #6 - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #1 { -.preheader6: - %0 = alloca i64, align 8 - %1 = alloca [4 x float], align 16 - %2 = alloca [4 x float], align 16 - %3 = alloca [4 x float], align 16 - %4 = alloca [4 x float], align 16 - %5 = alloca [4 x float], align 16 - %6 = call i64 @time(i64* null) #8 - store i64 %6, i64* %0, align 8 - %7 = call i64 @time(i64* nonnull %0) #8 - %8 = trunc i64 %7 to i32 - call void @srand(i32 %8) #8 - %9 = call i32 @rand() #8 - %10 = sitofp i32 %9 to float - %11 = fdiv float %10, 0x41747AE140000000 - %12 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 - store float %11, float* %12, align 16 - %13 = fpext float %11 to double - %14 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %13) #8 - %15 = call i32 @rand() #8 - %16 = sitofp i32 %15 to float - %17 = fdiv float %16, 0x41747AE140000000 - %18 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1 - store float %17, float* %18, align 4 - %19 = fpext float %17 to double - %20 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %19) #8 - %21 = call i32 @rand() #8 - %22 = sitofp i32 %21 to float - %23 = fdiv float %22, 0x41747AE140000000 - %24 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2 - store float %23, float* %24, align 8 - %25 = fpext float %23 to double - %26 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %25) #8 - %27 = call i32 @rand() #8 - %28 = sitofp i32 %27 to float - %29 = fdiv float %28, 0x41747AE140000000 - %30 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3 - store float %29, float* %30, align 4 - %31 = fpext float %29 to double - %32 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %31) #8 - %33 = bitcast [4 x float]* %2 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %33, i8 0, i64 16, i1 false) - %34 = bitcast [4 x float]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %34, i8 0, i64 16, i1 false) - %35 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %36 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - call void @naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %35, float* nonnull %36) - %37 = bitcast [4 x float]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %37, i8 0, i64 16, i1 false) - %38 = bitcast [4 x float]* %5 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %38, i8 0, i64 16, i1 false) - %39 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 - %40 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 - call void @no_opt_naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %39, float* nonnull %40) - %41 = load float, float* %35, align 16 - %42 = fpext float %41 to double - %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %42) #8 - %44 = load float, float* %39, align 16 - %45 = fpext float %44 to double - %46 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %45) #8 - %47 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1 - %48 = load float, float* %47, align 4 - %49 = fpext float %48 to double - %50 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %49) #8 - %51 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 - %52 = load float, float* %51, align 4 - %53 = fpext float %52 to double - %54 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %53) #8 - %55 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2 - %56 = load float, float* %55, align 8 - %57 = fpext float %56 to double - %58 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %57) #8 - %59 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 - %60 = load float, float* %59, align 8 - %61 = fpext float %60 to double - %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %61) #8 - %63 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3 - %64 = load float, float* %63, align 4 - %65 = fpext float %64 to double - %66 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %65) #8 - %67 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 - %68 = load float, float* %67, align 4 - %69 = fpext float %68 to double - %70 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %69) #8 - %71 = load float, float* %36, align 16 - %72 = fpext float %71 to double - %73 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %72) #8 - %74 = load float, float* %40, align 16 - %75 = fpext float %74 to double - %76 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %75) #8 - %77 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1 - %78 = load float, float* %77, align 4 - %79 = fpext float %78 to double - %80 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %79) #8 - %81 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 1 - %82 = load float, float* %81, align 4 - %83 = fpext float %82 to double - %84 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %83) #8 - %85 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2 - %86 = load float, float* %85, align 8 - %87 = fpext float %86 to double - %88 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %87) #8 - %89 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 2 - %90 = load float, float* %89, align 8 - %91 = fpext float %90 to double - %92 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %91) #8 - %93 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3 - %94 = load float, float* %93, align 4 - %95 = fpext float %94 to double - %96 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %95) #8 - %97 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 3 - %98 = load float, float* %97, align 4 - %99 = fpext float %98 to double - %100 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %99) #8 - ret i32 0 -} - -declare i64 @time(i64*) #6 - -declare void @srand(i32) #6 - -declare i32 @rand() #6 - -declare i32 @printf(i8*, ...) #6 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #7 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.1(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.2(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.3(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.4(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.5(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.6(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.7(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.8(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.9(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.10(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.11(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.12(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.13(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.14(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.15(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.16(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.17(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.18(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.19(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.20(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.21(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.22(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.23(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.24(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.25(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.26(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.27(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.28(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.29(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.30(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.31(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.32(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.33(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.34(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.35(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.36(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.37(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.38(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.39(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.40(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.41(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.42(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.43(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.44(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.45(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.46(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.47(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.48(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.49(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.50(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.51(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.52(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.53(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.54(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.55(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.56(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.57(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.58(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.59(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.60(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.61(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.62(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.63(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.64(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.65(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.66(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.67(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.68(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.69(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.70(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.71(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.72(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.73(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.74(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.75(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.76(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.77(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.78(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.79(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.80(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.81(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.82(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.83(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.84(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.85(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.86(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.87(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.88(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.89(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.90(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.91(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.92(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.93(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.94(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.95(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.96(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.97(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.98(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.99(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.100(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.101(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.102(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.103(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.104(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.105(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.106(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.107(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.108(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.109(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.110(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.111(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.112(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.113(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.114(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.115(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.116(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.117(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.118(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.119(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.120(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.121(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.122(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.123(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.124(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.125(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.126(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.127(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.128(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.129(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.130(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.131(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.132(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.133(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.134(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.135(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.136(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.137(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.138(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.139(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.140(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.141(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.142(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.143(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.144(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.145(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.146(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.147(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.148(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.149(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.150(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.151(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.152(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.153(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.154(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.155(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.156(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.157(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.158(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.159(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.160(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.161(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.162(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.163(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.164(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.165(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.166(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.167(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.168(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.169(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.170(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.171(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.172(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.173(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.174(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.175(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.176(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.177(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.178(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.179(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.180(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.181(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.182(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.183(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.184(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.185(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.186(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.187(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.188(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.189(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.190(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.191(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.192(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.193(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.194(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.195(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.196(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.197(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.198(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.199(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.200(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.201(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.202(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.203(float) #2 - -attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone speculatable willreturn } -attributes #3 = { argmemonly nounwind willreturn writeonly } -attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { argmemonly nounwind willreturn } -attributes #8 = { nounwind } -attributes #9 = { nounwind allocsize(0,1) } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} -!3 = distinct !{!3, !4} -!4 = !{!"llvm.loop.unroll.disable"} -!5 = distinct !{!5, !4} diff --git a/src/dios-egraphs/Diospyros/failed-test/final b/src/dios-egraphs/Diospyros/failed-test/final deleted file mode 100755 index daadd8533ef4771d868093d840ed881038ca0639..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 13628 zcmeHOdvH|M9lje93>ZAS)&g3U)sc3?SRTHLj@Avc8!njTB`F(9xP-8QjL9Y@yDivK zLt$HPm&|0WDASRewswX({*iW&QU~oOAPGt(Rx4EOm@0KHBWk8pjcBFcch1@DBh+>} z)6R5e@8sO`d%w=(KKAC=xpzLj$S_>v45OpSFbsjVro=E3h8K`wT#jadCDM3Wp;X(;#7BG9hDcLuqzyv(_EOqjItAja z`{*$yzWSDEds=#|J^wTnpH~|oo|EsWVFg%LeS55}F}hyU^W$5i?Jd%L;@L+|tYBDH zJo?SX=sK%0+R!5L-0|(v_Be-l5YM-vIO@!KG8Y4AS%He>6gn;dX>E;T8DVHJ*-({lIF2v-PAg*TCnJ@U|_*=&77TAH{d*O=fc9l zwZfJ@uEjneMQ~zmO3`Z-DKZzxEk_o=1543Jw`_Vg zyaD=CvI!c#1icrH`m^HgZL^vh*UnnEsc9W>9$!FX9FI<~tc-hRuX*PE;{%ud@bbks zfaXO*NQQBV8d1WkmX{XI0PH4DA3o$yJE<$u6xGx3Tr~2Sv+LX`EHhUo7Q}P+ruJB* zdFH&Ch^uiegm8QjjrIq1eabb?dKHdB1qu}?RG?6SLInyH`0rGp+O+>7RvZyCd&R@4 z>s(^rfS5TT!4FP|X9iu*ofR|sY7aJuY1?D~|B3vZ)YeZ8@n|p4ix1l=yYGWumspW9 z?ad>mT|InOc!Fm{f_Ge}qqzB)ld^lwWc9GQrSCirGZ_c?7Vk*Rt>?x`#rP1ELa$Nt zglUHc#dn_uJEQoCb{v=E4H5R=s-0Lp%FR@59hE5f~|{erpW z1>XCR{Ht6tZ)q+-W=y%{ExmlYFiYV7yt#G65WWEz7QR8#hI2a(=bq3}oT{^ePpp+` z1_xm)cmk);Ylw5m-1-r$dxCqRy9Y^hQ}>M6wvZl@!EVUxp~l{^xoqiWB4&xKUpfqE z*wpZZ_R=g$z7Kp_ICEAkP1PP0(+rMpVZ8po#w$6$IJvmgUR-R?FU3-wnO>`6nqQ9R zzv7!)&aA<7>E|TpbiN210=mg)SBn*=M0sdf_?{EKm$6_S%;6y{rW2yPmUB6Bz-h*F z7N#TStp|O`ftm|#k_*oy4dS+Idfno1Z?W)Ir$kL3x?vIS1HYyi({>2J+S0RPGPU9d zM0v1V_zuHkm+)cL`~fy=chJptXFS{p9~XhUdWR@av|{cGA1&2xheT(S#GePglc4ho z9!b(+37-^4G*kS=*+EXwF)S;F-pXQ8-nm4Y!M5kFMmW3uG?mPl@*|Oik_mNXXSb7* znFMtqMw47vh9sG7Gpr_NODE4V?CT}?J5(3gZXexU33tNRNg-xK8g$%JkooSW4t2{U zNv2HDc@s~SI%OFrRb6|N2Yxjauo%N+au0YPipR$yrIS&DK9?r5=iky{9Ue1&p zn{$~qzmNkvn)9G^B6B_}!F0|8bmxORM-XgBuVB>(0LEftm&K? zCskz5Wtp7Q@nDcWi9OoTW3tC6J5OYpK~-(aBpMci7P7QUNLPvAcB-n-C&eE6X7Ia>V;dR!*M&kNrl0iFc-HY`#K=y7>N0bXRUD35moXnq9FJi%SgYP=SL z+5MmyutE#&5WLm&0;tB7H{6H$h%nqMF*xqXBVfIWpbk-?AKR1Kgylr2rTgH6%mhXr zbc!;z{hSuc_=6WT&JoTr?l125Ltk$3!Ck^v+l{Kz@U?V;4fiPe$PQ;W-ks?L{ERTT z3#ZVIEGMf5bp~<{)^hC8!dMlaC8hm-z6)aNGKK0cTHmgA>d-pV9mW`XTkJL?QKZbM|H){L&E((`7T;}@B{$A{#=bdjs@i+3W&OXMQL_0X-w1Wos zC08L4xM%Zo>lpqm1)t%S6ms&`)R_-i)9%gTUu-eiKN#SkQo7gv?F^lH&um z5lB{)2JCsofn+6mIod@qx!<|(Z`j}}#CK8^TZZAL18_8DCO4;~R~CD(bIS*E8|8go zO_pOadz@e6Mms;AJ-oeic9W9~4IndK!9zWW3iG%I8x!u2lkq(;FdmEw&b?3%INXWC znAo05T+KIkSL|oz0sAl_-R1PWFQb@b$YRk#5Ek4Rx77o1gK6cN0NjL))$r+AQ2dlC z^KsLD#c?ATa7&(2{iJ;=IB2;&!uKkib5D2`w`y2CoO*8I!@y#K>~&iA``I_fu*lWvf8 zoPfASoL6W9LuGbu-sKztV)r`-&^f;(qcb$b0SLk&wrY%!93>K0_L`k{RDj;&f}Jks z+f)tiaPC7-l}qzfPHB}(q>2-g;@h3OwD_B{&vV~79IVsFp)BeytJnAYeP4~gc5=dA(w`Y9=mSl6kdc z@+KAm-L07iG*d|Cb&7e5W~MZAf@FFX^K#AX*35Ij?ZlOe`3a^z^E{-P$0T#AV!p1K zk7;H~GA~oiKF!4JmeKB%Ot)hGTr>A*=JzC%x7tW$yJjBM%&$vksbb!vnNMhDm1Gtx zCVxA`Li`12W|B`JBj&9yiUjVputu?vxma<*;^j+R+;x6>7x44bACTt<<@q6b-X+gJ zlIKU{`N#6yEzdv2*)Xa)ueudL#ogzpSD^3OK;r4Q5Xgqh2)#z=dP2V^bQ_@oLY0J` zA+(IpUP5aKJx(Y}sGHDzgdQUFeL_0`q0c%x?gZk0;@K~KW#aLl-37QKyG?JHXe{+E zj>lT#vD>`Ff*V^S^|8o0Z!US+g@tm->UXRw;ebbWkEvwvx9*$Jv+;Lo9)z)N;CC|@ zeut7JBY6}ca-m~T|4#j??oa9dUEPo9z6ihP(pZ`9C+ps;`x&~wkdFebP=P`P3Kb|+ zpiqHA1qu}?RG?6SLInyHC{&$Ex3OkF`Et*S5}B z+E&`-=SQ~kBYA5rzVpUM?R*bwv`v*7&5`E%)=g9axy_qrrJ=5|iF6<6^>s~6E%n4^ z18ZomYal$Aa9boo*bf-?+9NSyCa_j~s2-DYOMtc2Mc0uN2Hc)T)&Pk$Hq(GQTo(Rc zpxA2OU_~}Y>f^BpzQwO&*fZMKM~#_LK)!q0oIzj>4bED7>Ae@IL)s!?<}$ z`EVL8$7ygD(VOuO=2eTC}zPDHJYbHxvedo;gH++{Q z01QEd_^lcr##=PvOEliC`Rg_A$5#vF^FJu4@7MAVYkUwNbCdt1##37VC5;c`BMS20 z(zsjO`>V$JPK*2x(a;l=M~Om4@hN%uj6D2?Je>byp#GG-ujb*EdHA9{e0d(ux1;J$ z*{jRL`I{k9lmB0X5*YmVluOY*hlY|FpGV`E;P9g)@*DVnSI3M0CKx;4EjiCEHH*~* z&7X=n%*?FJA!nwtCd+kTMb(=5Z0loXvYm|OX8X$K=D6hAwB!j{XSSc5?MrSvITW>_ frFS8<|EQfOz2~HN8@12q9Y*afW41s!PR2g~z+`Np diff --git a/src/dios-egraphs/Diospyros/failed-test/opt.ll b/src/dios-egraphs/Diospyros/failed-test/opt.ll deleted file mode 100644 index 4f8a871a..00000000 --- a/src/dios-egraphs/Diospyros/failed-test/opt.ll +++ /dev/null @@ -1,750 +0,0 @@ -; ModuleID = 'build/clang.ll' -source_filename = "fail-tests/qr-decomp-local-arrays.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1 -@.str.1 = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 -@.str.2 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 -@.str.3 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 -@.str.4 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @sgn(float %0) #0 { - %2 = fcmp ogt float %0, 0.000000e+00 - %3 = zext i1 %2 to i32 - %4 = fcmp olt float %0, 0.000000e+00 - %.neg = sext i1 %4 to i32 - %5 = add nsw i32 %.neg, %3 - %6 = sitofp i32 %5 to float - ret float %6 -} - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_sgn(float %0) #1 { - %2 = fcmp ogt float %0, 0.000000e+00 - %3 = zext i1 %2 to i32 - %4 = fcmp olt float %0, 0.000000e+00 - %.neg = sext i1 %4 to i32 - %5 = add nsw i32 %.neg, %3 - %6 = sitofp i32 %5 to float - ret float %6 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @naive_norm(float* %0, i32 %1) #0 { - %3 = icmp sgt i32 %1, 0 - %smax = select i1 %3, i32 %1, i32 0 - %wide.trip.count = zext i32 %smax to i64 - br i1 %3, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %2 - %4 = add nsw i64 %wide.trip.count, -1 - %xtraiter = and i64 %wide.trip.count, 3 - %5 = icmp ult i64 %4, 3 - br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count, 2147483644 - br label %6 - -6: ; preds = %6, %.lr.ph.new - %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] - %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 - %8 = load float, float* %7, align 4 - %9 = fmul float %8, %8 - %10 = fadd float %.013, %9 - %indvars.iv.next = or i64 %indvars.iv2, 1 - %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next - %12 = load float, float* %11, align 4 - %13 = fmul float %12, %12 - %14 = fadd float %10, %13 - %indvars.iv.next.1 = or i64 %indvars.iv2, 2 - %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 - %16 = load float, float* %15, align 4 - %17 = fmul float %16, %16 - %18 = fadd float %14, %17 - %indvars.iv.next.2 = or i64 %indvars.iv2, 3 - %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 - %20 = load float, float* %19, align 4 - %21 = fmul float %20, %20 - %22 = fadd float %18, %21 - %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 - %niter.nsub.3 = add i64 %niter, -4 - %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 - br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 - -._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] - %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] - %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] - %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil - %24 = load float, float* %23, align 4 - %25 = fmul float %24, %24 - %26 = fadd float %.013.epil, %25 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !3 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 - %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] - %27 = call float @llvm.sqrt.f32(float %.01.lcssa) - ret float %27 -} - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32(float) #2 - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_naive_norm(float* %0, i32 %1) #1 { - %3 = icmp sgt i32 %1, 0 - %smax = select i1 %3, i32 %1, i32 0 - %wide.trip.count = zext i32 %smax to i64 - br i1 %3, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %2 - %4 = add nsw i64 %wide.trip.count, -1 - %xtraiter = and i64 %wide.trip.count, 3 - %5 = icmp ult i64 %4, 3 - br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count, 2147483644 - br label %6 - -6: ; preds = %6, %.lr.ph.new - %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] - %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 - %8 = load float, float* %7, align 4 - %9 = fmul float %8, %8 - %10 = fadd float %.013, %9 - %indvars.iv.next = or i64 %indvars.iv2, 1 - %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next - %12 = load float, float* %11, align 4 - %13 = fmul float %12, %12 - %14 = fadd float %10, %13 - %indvars.iv.next.1 = or i64 %indvars.iv2, 2 - %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 - %16 = load float, float* %15, align 4 - %17 = fmul float %16, %16 - %18 = fadd float %14, %17 - %indvars.iv.next.2 = or i64 %indvars.iv2, 3 - %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 - %20 = load float, float* %19, align 4 - %21 = fmul float %20, %20 - %22 = fadd float %18, %21 - %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 - %niter.nsub.3 = add i64 %niter, -4 - %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 - br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 - -._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] - %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] - %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] - %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil - %24 = load float, float* %23, align 4 - %25 = fmul float %24, %24 - %26 = fadd float %.013.epil, %25 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 - %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] - %27 = call float @llvm.sqrt.f32(float %.01.lcssa) - ret float %27 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_transpose(float* %0) #0 { -.lr.ph: - %1 = getelementptr inbounds float, float* %0, i64 1 - %2 = bitcast float* %1 to i32* - %3 = load i32, i32* %2, align 4 - %4 = getelementptr inbounds float, float* %0, i64 2 - %5 = bitcast float* %4 to i32* - %6 = load i32, i32* %5, align 4 - store i32 %6, i32* %2, align 4 - store i32 %3, i32* %5, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_transpose(float* %0) #1 { -.lr.ph: - %1 = getelementptr inbounds float, float* %0, i64 1 - %2 = bitcast float* %1 to i32* - %3 = load i32, i32* %2, align 4 - %4 = getelementptr inbounds float, float* %0, i64 2 - %5 = bitcast float* %4 to i32* - %6 = load i32, i32* %5, align 4 - store i32 %6, i32* %2, align 4 - store i32 %3, i32* %5, align 4 - ret void -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { -.preheader: - store float 0.000000e+00, float* %2, align 4 - %3 = load float, float* %0, align 4 - %4 = load float, float* %1, align 4 - %5 = fmul float %3, %4 - %6 = fadd float %5, 0.000000e+00 - store float %6, float* %2, align 4 - %7 = getelementptr inbounds float, float* %0, i64 1 - %8 = load float, float* %7, align 4 - %9 = getelementptr inbounds float, float* %1, i64 2 - %10 = load float, float* %9, align 4 - %11 = fmul float %8, %10 - %12 = fadd float %6, %11 - store float %12, float* %2, align 4 - %13 = getelementptr inbounds float, float* %2, i64 1 - store float 0.000000e+00, float* %13, align 4 - %14 = load float, float* %0, align 4 - %15 = getelementptr inbounds float, float* %1, i64 1 - %16 = load float, float* %15, align 4 - %17 = fmul float %14, %16 - %18 = fadd float %17, 0.000000e+00 - store float %18, float* %13, align 4 - %19 = load float, float* %7, align 4 - %20 = getelementptr inbounds float, float* %1, i64 3 - %21 = load float, float* %20, align 4 - %22 = fmul float %19, %21 - %23 = fadd float %18, %22 - store float %23, float* %13, align 4 - %24 = getelementptr inbounds float, float* %0, i64 2 - %25 = getelementptr inbounds float, float* %2, i64 2 - store float 0.000000e+00, float* %25, align 4 - %26 = load float, float* %24, align 4 - %27 = load float, float* %1, align 4 - %28 = fmul float %26, %27 - %29 = fadd float %28, 0.000000e+00 - store float %29, float* %25, align 4 - %30 = getelementptr inbounds float, float* %0, i64 3 - %31 = load float, float* %30, align 4 - %32 = load float, float* %9, align 4 - %33 = fmul float %31, %32 - %34 = fadd float %29, %33 - store float %34, float* %25, align 4 - %35 = getelementptr inbounds float, float* %2, i64 3 - store float 0.000000e+00, float* %35, align 4 - %36 = load float, float* %24, align 4 - %37 = load float, float* %15, align 4 - %38 = fmul float %36, %37 - %39 = fadd float %38, 0.000000e+00 - store float %39, float* %35, align 4 - %40 = load float, float* %30, align 4 - %41 = load float, float* %20, align 4 - %42 = fmul float %40, %41 - %43 = fadd float %39, %42 - store float %43, float* %35, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { -.preheader: - store float 0.000000e+00, float* %2, align 4 - %3 = load float, float* %0, align 4 - %4 = load float, float* %1, align 4 - %5 = fmul float %3, %4 - %6 = fadd float %5, 0.000000e+00 - store float %6, float* %2, align 4 - %7 = getelementptr inbounds float, float* %0, i64 1 - %8 = load float, float* %7, align 4 - %9 = getelementptr inbounds float, float* %1, i64 2 - %10 = load float, float* %9, align 4 - %11 = fmul float %8, %10 - %12 = fadd float %6, %11 - store float %12, float* %2, align 4 - %13 = getelementptr inbounds float, float* %2, i64 1 - store float 0.000000e+00, float* %13, align 4 - %14 = load float, float* %0, align 4 - %15 = getelementptr inbounds float, float* %1, i64 1 - %16 = load float, float* %15, align 4 - %17 = fmul float %14, %16 - %18 = fadd float %17, 0.000000e+00 - store float %18, float* %13, align 4 - %19 = load float, float* %7, align 4 - %20 = getelementptr inbounds float, float* %1, i64 3 - %21 = load float, float* %20, align 4 - %22 = fmul float %19, %21 - %23 = fadd float %18, %22 - store float %23, float* %13, align 4 - %24 = getelementptr inbounds float, float* %0, i64 2 - %25 = getelementptr inbounds float, float* %2, i64 2 - store float 0.000000e+00, float* %25, align 4 - %26 = load float, float* %24, align 4 - %27 = load float, float* %1, align 4 - %28 = fmul float %26, %27 - %29 = fadd float %28, 0.000000e+00 - store float %29, float* %25, align 4 - %30 = getelementptr inbounds float, float* %0, i64 3 - %31 = load float, float* %30, align 4 - %32 = load float, float* %9, align 4 - %33 = fmul float %31, %32 - %34 = fadd float %29, %33 - store float %34, float* %25, align 4 - %35 = getelementptr inbounds float, float* %2, i64 3 - store float 0.000000e+00, float* %35, align 4 - %36 = load float, float* %24, align 4 - %37 = load float, float* %15, align 4 - %38 = fmul float %36, %37 - %39 = fadd float %38, 0.000000e+00 - store float %39, float* %35, align 4 - %40 = load float, float* %30, align 4 - %41 = load float, float* %20, align 4 - %42 = fmul float %40, %41 - %43 = fadd float %39, %42 - store float %43, float* %35, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { -.preheader49: - %3 = bitcast float* %1 to i8* - %4 = alloca [4 x float], align 16 - %5 = bitcast [4 x float]* %4 to i8* - %6 = bitcast float* %0 to i32* - %7 = load i32, i32* %6, align 4 - %8 = bitcast float* %2 to i32* - store i32 %7, i32* %8, align 4 - %9 = getelementptr inbounds float, float* %0, i64 1 - %10 = bitcast float* %9 to i32* - %11 = load i32, i32* %10, align 4 - %12 = getelementptr inbounds float, float* %2, i64 1 - %13 = bitcast float* %12 to i32* - store i32 %11, i32* %13, align 4 - %14 = getelementptr inbounds float, float* %0, i64 2 - %15 = bitcast float* %14 to i32* - %16 = load i32, i32* %15, align 4 - %17 = getelementptr inbounds float, float* %2, i64 2 - %18 = bitcast float* %17 to i32* - store i32 %16, i32* %18, align 4 - %19 = getelementptr inbounds float, float* %0, i64 3 - %20 = bitcast float* %19 to i32* - %21 = load i32, i32* %20, align 4 - %22 = getelementptr inbounds float, float* %2, i64 3 - %23 = bitcast float* %22 to i32* - store i32 %21, i32* %23, align 4 - %24 = bitcast i32 %7 to float - %25 = fcmp ogt float %24, 0.000000e+00 - %26 = zext i1 %25 to i32 - %27 = fcmp olt float %24, 0.000000e+00 - %.neg = sext i1 %27 to i32 - %28 = add nsw i32 %.neg, %26 - %29 = sitofp i32 %28 to float - %30 = fmul float %24, %24 - %31 = fadd float %30, 0.000000e+00 - %32 = bitcast i32 %16 to float - %33 = fmul float %32, %32 - %34 = fadd float %31, %33 - %35 = call float @llvm.sqrt.f32(float %34) #8 - %36 = fneg float %29 - %37 = fmul float %35, %36 - %38 = fadd float %24, %37 - %39 = fmul float %37, 0.000000e+00 - %40 = fadd float %32, %39 - %41 = fmul float %38, %38 - %42 = fadd float %41, 0.000000e+00 - %43 = fmul float %40, %40 - %44 = fadd float %42, %43 - %45 = call float @llvm.sqrt.f32(float %44) #8 - %46 = fadd float %45, 0x3EE4F8B580000000 - %47 = fdiv float %38, %46 - %48 = fdiv float %40, %46 - %49 = fmul float %47, 2.000000e+00 - %50 = fmul float %49, %47 - %51 = fsub float 1.000000e+00, %50 - %52 = fmul float %49, %48 - %53 = fsub float 0.000000e+00, %52 - %54 = fmul float %48, 2.000000e+00 - %55 = fmul float %54, %47 - %56 = fsub float 0.000000e+00, %55 - %57 = fmul float %54, %48 - %58 = fsub float 1.000000e+00, %57 - %59 = bitcast float %51 to i32 - %60 = bitcast [4 x float]* %4 to i32* - store i32 %59, i32* %60, align 16 - %61 = bitcast float %53 to i32 - %62 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 - %63 = bitcast float* %62 to i32* - store i32 %61, i32* %63, align 4 - %64 = bitcast float %56 to i32 - %65 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 - %66 = bitcast float* %65 to i32* - store i32 %64, i32* %66, align 8 - %67 = bitcast float %58 to i32 - %68 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 - %69 = bitcast float* %68 to i32* - store i32 %67, i32* %69, align 4 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %3, i8* nonnull align 16 dereferenceable(16) %5, i64 16, i1 false) - store float 0.000000e+00, float* %2, align 4 - %70 = load float, float* %0, align 4 - %71 = fmul float %51, %70 - %72 = fadd float %71, 0.000000e+00 - store float %72, float* %2, align 4 - %73 = load float, float* %14, align 4 - %74 = fmul float %53, %73 - %75 = fadd float %72, %74 - store float %75, float* %2, align 4 - store float 0.000000e+00, float* %12, align 4 - %76 = load float, float* %9, align 4 - %77 = fmul float %51, %76 - %78 = fadd float %77, 0.000000e+00 - store float %78, float* %12, align 4 - %79 = load float, float* %19, align 4 - %80 = fmul float %53, %79 - %81 = fadd float %78, %80 - store float %81, float* %12, align 4 - store float 0.000000e+00, float* %17, align 4 - %82 = load float, float* %0, align 4 - %83 = fmul float %56, %82 - %84 = fadd float %83, 0.000000e+00 - store float %84, float* %17, align 4 - %85 = load float, float* %14, align 4 - %86 = fmul float %58, %85 - %87 = fadd float %84, %86 - store float %87, float* %17, align 4 - store float 0.000000e+00, float* %22, align 4 - %88 = load float, float* %9, align 4 - %89 = fmul float %56, %88 - %90 = fadd float %89, 0.000000e+00 - store float %90, float* %22, align 4 - %91 = load float, float* %19, align 4 - %92 = fmul float %58, %91 - %93 = fadd float %90, %92 - store float %93, float* %22, align 4 - %94 = getelementptr inbounds float, float* %1, i64 1 - %95 = bitcast float* %94 to i32* - %96 = load i32, i32* %95, align 4 - %97 = getelementptr inbounds float, float* %1, i64 2 - %98 = bitcast float* %97 to i32* - %99 = load i32, i32* %98, align 4 - store i32 %99, i32* %95, align 4 - store i32 %96, i32* %98, align 4 - ret void -} - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { -.preheader13: - %3 = bitcast float* %2 to i8* - %4 = bitcast float* %0 to i8* - %5 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false, i1 true, i1 false) - %6 = call i8* @__memcpy_chk(i8* %3, i8* %4, i64 16, i64 %5) #8 - %7 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 - %8 = bitcast i8* %7 to float* - store float 1.000000e+00, float* %8, align 4 - %9 = getelementptr inbounds i8, i8* %7, i64 8 - %10 = getelementptr inbounds i8, i8* %7, i64 12 - %11 = bitcast i8* %10 to float* - store float 1.000000e+00, float* %11, align 4 - %12 = bitcast float* %1 to i8* - %13 = call i64 @llvm.objectsize.i64.p0i8(i8* %12, i1 false, i1 true, i1 false) - %14 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %15 = bitcast i8* %14 to float* - %16 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %17 = bitcast i8* %16 to float* - %18 = bitcast float* %2 to i32* - %19 = load i32, i32* %18, align 4 - %20 = bitcast i8* %14 to i32* - store i32 %19, i32* %20, align 4 - %21 = bitcast i8* %7 to i32* - %22 = load i32, i32* %21, align 4 - %23 = bitcast i8* %16 to i32* - store i32 %22, i32* %23, align 4 - %24 = getelementptr inbounds float, float* %2, i64 2 - %25 = bitcast float* %24 to i32* - %26 = load i32, i32* %25, align 4 - %27 = getelementptr inbounds i8, i8* %14, i64 4 - %28 = bitcast i8* %27 to i32* - store i32 %26, i32* %28, align 4 - %29 = bitcast i8* %9 to i32* - %30 = load i32, i32* %29, align 4 - %31 = getelementptr inbounds i8, i8* %16, i64 4 - %32 = bitcast i8* %31 to i32* - store i32 %30, i32* %32, align 4 - %33 = load float, float* %15, align 4 - %34 = call float @no_opt_sgn(float %33) - %35 = fneg float %34 - %36 = call float @no_opt_naive_norm(float* nonnull %15, i32 2) - %37 = fmul float %36, %35 - %38 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %39 = bitcast i8* %38 to float* - %40 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #9 - %41 = load float, float* %15, align 4 - %42 = load float, float* %17, align 4 - %43 = fmul float %37, %42 - %44 = fadd float %41, %43 - store float %44, float* %39, align 4 - %45 = bitcast i8* %27 to float* - %46 = load float, float* %45, align 4 - %47 = bitcast i8* %31 to float* - %48 = load float, float* %47, align 4 - %49 = fmul float %37, %48 - %50 = fadd float %46, %49 - %51 = getelementptr inbounds i8, i8* %38, i64 4 - %52 = bitcast i8* %51 to float* - store float %50, float* %52, align 4 - %53 = bitcast i8* %40 to float* - %54 = call float @no_opt_naive_norm(float* nonnull %39, i32 2) - %55 = fadd float %54, 0x3EE4F8B580000000 - %56 = load float, float* %39, align 4 - %57 = fdiv float %56, %55 - store float %57, float* %53, align 4 - %58 = load float, float* %52, align 4 - %59 = fdiv float %58, %55 - %60 = getelementptr inbounds i8, i8* %40, i64 4 - %61 = bitcast i8* %60 to float* - store float %59, float* %61, align 4 - %62 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 - %63 = bitcast i8* %62 to float* - %64 = load float, float* %53, align 4 - %65 = fmul float %64, 2.000000e+00 - %66 = fmul float %65, %64 - %67 = fsub float 1.000000e+00, %66 - store float %67, float* %63, align 4 - %68 = load float, float* %53, align 4 - %69 = fmul float %68, 2.000000e+00 - %70 = load float, float* %61, align 4 - %71 = fmul float %69, %70 - %72 = fsub float 0.000000e+00, %71 - %73 = getelementptr inbounds i8, i8* %62, i64 4 - %74 = bitcast i8* %73 to float* - store float %72, float* %74, align 4 - %75 = load float, float* %61, align 4 - %76 = fmul float %75, 2.000000e+00 - %77 = load float, float* %53, align 4 - %78 = fmul float %76, %77 - %79 = fsub float 0.000000e+00, %78 - %80 = getelementptr inbounds i8, i8* %62, i64 8 - %81 = bitcast i8* %80 to float* - store float %79, float* %81, align 4 - %82 = load float, float* %61, align 4 - %83 = fmul float %82, 2.000000e+00 - %84 = fmul float %83, %82 - %85 = fsub float 1.000000e+00, %84 - %86 = getelementptr inbounds i8, i8* %62, i64 12 - %87 = bitcast i8* %86 to float* - store float %85, float* %87, align 4 - %88 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #9 - %89 = bitcast i8* %88 to float* - %90 = bitcast i8* %62 to i32* - %91 = load i32, i32* %90, align 4 - %92 = bitcast i8* %88 to i32* - store i32 %91, i32* %92, align 4 - %93 = bitcast i8* %73 to i32* - %94 = load i32, i32* %93, align 4 - %95 = getelementptr inbounds i8, i8* %88, i64 4 - %96 = bitcast i8* %95 to i32* - store i32 %94, i32* %96, align 4 - %97 = bitcast i8* %80 to i32* - %98 = load i32, i32* %97, align 4 - %99 = getelementptr inbounds i8, i8* %88, i64 8 - %100 = bitcast i8* %99 to i32* - store i32 %98, i32* %100, align 4 - %101 = bitcast i8* %86 to i32* - %102 = load i32, i32* %101, align 4 - %103 = getelementptr inbounds i8, i8* %88, i64 12 - %104 = bitcast i8* %103 to i32* - store i32 %102, i32* %104, align 4 - %105 = call i8* @__memcpy_chk(i8* %12, i8* %88, i64 16, i64 %13) #8 - call void @no_opt_naive_fixed_matrix_multiply(float* %89, float* %0, float* %2) - call void @free(i8* %14) - call void @free(i8* %16) - call void @free(i8* %38) - call void @free(i8* %40) - call void @free(i8* %62) - call void @free(i8* %88) - call void @no_opt_naive_fixed_transpose(float* %1) - ret void -} - -; Function Attrs: nounwind -declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 - -; Function Attrs: nounwind readnone speculatable willreturn -declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 - -; Function Attrs: allocsize(0,1) -declare i8* @calloc(i64, i64) #5 - -declare void @free(i8*) #6 - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #1 { -.preheader6: - %0 = alloca i64, align 8 - %1 = alloca [4 x float], align 16 - %2 = alloca [4 x float], align 16 - %3 = alloca [4 x float], align 16 - %4 = alloca [4 x float], align 16 - %5 = alloca [4 x float], align 16 - %6 = call i64 @time(i64* null) #8 - store i64 %6, i64* %0, align 8 - %7 = call i64 @time(i64* nonnull %0) #8 - %8 = trunc i64 %7 to i32 - call void @srand(i32 %8) #8 - %9 = call i32 @rand() #8 - %10 = sitofp i32 %9 to float - %11 = fdiv float %10, 0x41747AE140000000 - %12 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 - store float %11, float* %12, align 16 - %13 = fpext float %11 to double - %14 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %13) #8 - %15 = call i32 @rand() #8 - %16 = sitofp i32 %15 to float - %17 = fdiv float %16, 0x41747AE140000000 - %18 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1 - store float %17, float* %18, align 4 - %19 = fpext float %17 to double - %20 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %19) #8 - %21 = call i32 @rand() #8 - %22 = sitofp i32 %21 to float - %23 = fdiv float %22, 0x41747AE140000000 - %24 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2 - store float %23, float* %24, align 8 - %25 = fpext float %23 to double - %26 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %25) #8 - %27 = call i32 @rand() #8 - %28 = sitofp i32 %27 to float - %29 = fdiv float %28, 0x41747AE140000000 - %30 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3 - store float %29, float* %30, align 4 - %31 = fpext float %29 to double - %32 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %31) #8 - %33 = bitcast [4 x float]* %2 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %33, i8 0, i64 16, i1 false) - %34 = bitcast [4 x float]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %34, i8 0, i64 16, i1 false) - %35 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %36 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - call void @naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %35, float* nonnull %36) - %37 = bitcast [4 x float]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %37, i8 0, i64 16, i1 false) - %38 = bitcast [4 x float]* %5 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %38, i8 0, i64 16, i1 false) - %39 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 - %40 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 - call void @no_opt_naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %39, float* nonnull %40) - %41 = load float, float* %35, align 16 - %42 = fpext float %41 to double - %43 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %42) #8 - %44 = load float, float* %39, align 16 - %45 = fpext float %44 to double - %46 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %45) #8 - %47 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1 - %48 = load float, float* %47, align 4 - %49 = fpext float %48 to double - %50 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %49) #8 - %51 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 - %52 = load float, float* %51, align 4 - %53 = fpext float %52 to double - %54 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %53) #8 - %55 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2 - %56 = load float, float* %55, align 8 - %57 = fpext float %56 to double - %58 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %57) #8 - %59 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 - %60 = load float, float* %59, align 8 - %61 = fpext float %60 to double - %62 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %61) #8 - %63 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3 - %64 = load float, float* %63, align 4 - %65 = fpext float %64 to double - %66 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), double %65) #8 - %67 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 - %68 = load float, float* %67, align 4 - %69 = fpext float %68 to double - %70 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), double %69) #8 - %71 = load float, float* %36, align 16 - %72 = fpext float %71 to double - %73 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %72) #8 - %74 = load float, float* %40, align 16 - %75 = fpext float %74 to double - %76 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %75) #8 - %77 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1 - %78 = load float, float* %77, align 4 - %79 = fpext float %78 to double - %80 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %79) #8 - %81 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 1 - %82 = load float, float* %81, align 4 - %83 = fpext float %82 to double - %84 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %83) #8 - %85 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2 - %86 = load float, float* %85, align 8 - %87 = fpext float %86 to double - %88 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %87) #8 - %89 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 2 - %90 = load float, float* %89, align 8 - %91 = fpext float %90 to double - %92 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %91) #8 - %93 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3 - %94 = load float, float* %93, align 4 - %95 = fpext float %94 to double - %96 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.3, i64 0, i64 0), double %95) #8 - %97 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 3 - %98 = load float, float* %97, align 4 - %99 = fpext float %98 to double - %100 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0), double %99) #8 - ret i32 0 -} - -declare i64 @time(i64*) #6 - -declare void @srand(i32) #6 - -declare i32 @rand() #6 - -declare i32 @printf(i8*, ...) #6 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #7 - -attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone speculatable willreturn } -attributes #3 = { argmemonly nounwind willreturn writeonly } -attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { argmemonly nounwind willreturn } -attributes #8 = { nounwind } -attributes #9 = { nounwind allocsize(0,1) } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} -!3 = distinct !{!3, !4} -!4 = !{!"llvm.loop.unroll.disable"} -!5 = distinct !{!5, !4} diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/diff-aa.txt b/src/dios-egraphs/Diospyros/flaky-outputs/diff-aa.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/diff-clang.txt b/src/dios-egraphs/Diospyros/flaky-outputs/diff-clang.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/diff-dce.txt b/src/dios-egraphs/Diospyros/flaky-outputs/diff-dce.txt deleted file mode 100644 index f0b20f07..00000000 --- a/src/dios-egraphs/Diospyros/flaky-outputs/diff-dce.txt +++ /dev/null @@ -1,5416 +0,0 @@ -229,235c229,235 -< %17 = getelementptr float, float* %0, i32 0 -< %18 = load float, float* %17, align 4 -< %19 = insertelement <4 x float> zeroinitializer, float %18, i32 0 -< %20 = insertelement <4 x float> %19, float 1.000000e+00, i32 1 -< %21 = insertelement <4 x float> %20, float 1.000000e+00, i32 2 -< %22 = insertelement <4 x float> %21, float 1.000000e+00, i32 3 -< %23 = insertelement <4 x float> zeroinitializer, float %10, i32 0 ---- -> %17 = insertelement <4 x float> zeroinitializer, float %4, i32 0 -> %18 = insertelement <4 x float> %17, float 1.000000e+00, i32 1 -> %19 = insertelement <4 x float> %18, float 1.000000e+00, i32 2 -> %20 = insertelement <4 x float> %19, float 1.000000e+00, i32 3 -> %21 = getelementptr float, float* %1, i32 0 -> %22 = load float, float* %21, align 4 -> %23 = insertelement <4 x float> zeroinitializer, float %22, i32 0 -239c239 -< %27 = fmul <4 x float> %22, %26 ---- -> %27 = fmul <4 x float> %20, %26 -284,307c284,306 -< %69 = insertelement <4 x float> zeroinitializer, float %56, i32 0 -< %70 = insertelement <4 x float> %69, float 0.000000e+00, i32 1 -< %71 = insertelement <4 x float> %70, float 0.000000e+00, i32 2 -< %72 = insertelement <4 x float> %71, float 0.000000e+00, i32 3 -< %73 = fmul <4 x float> %68, %72 -< %74 = fadd <4 x float> %73, zeroinitializer -< %75 = getelementptr float, float* %0, i32 0 -< %76 = getelementptr inbounds float, float* %75, i64 1 -< %77 = load float, float* %76, align 4 -< %78 = insertelement <4 x float> zeroinitializer, float %77, i32 0 -< %79 = insertelement <4 x float> %78, float 0.000000e+00, i32 1 -< %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 2 -< %81 = insertelement <4 x float> %80, float 0.000000e+00, i32 3 -< %82 = getelementptr float, float* %1, i32 0 -< %83 = getelementptr inbounds float, float* %82, i64 3 -< %84 = load float, float* %83, align 4 -< %85 = insertelement <4 x float> zeroinitializer, float %84, i32 0 -< %86 = insertelement <4 x float> %85, float 0.000000e+00, i32 1 -< %87 = insertelement <4 x float> %86, float 0.000000e+00, i32 2 -< %88 = insertelement <4 x float> %87, float 0.000000e+00, i32 3 -< %89 = call <4 x float> @llvm.fma.v4f32(<4 x float> %81, <4 x float> %88, <4 x float> %74) -< %90 = extractelement <4 x float> %89, i32 0 -< store float %90, float* %64, align 4 -< %91 = extractelement <4 x float> %89, i32 1 ---- -> %69 = load float, float* %55, align 4 -> %70 = insertelement <4 x float> zeroinitializer, float %69, i32 0 -> %71 = insertelement <4 x float> %70, float 0.000000e+00, i32 1 -> %72 = insertelement <4 x float> %71, float 0.000000e+00, i32 2 -> %73 = insertelement <4 x float> %72, float 0.000000e+00, i32 3 -> %74 = fmul <4 x float> %68, %73 -> %75 = fadd <4 x float> %74, zeroinitializer -> %76 = getelementptr float, float* %0, i32 0 -> %77 = getelementptr inbounds float, float* %76, i64 1 -> %78 = load float, float* %77, align 4 -> %79 = insertelement <4 x float> zeroinitializer, float %78, i32 0 -> %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 1 -> %81 = insertelement <4 x float> %80, float 0.000000e+00, i32 2 -> %82 = insertelement <4 x float> %81, float 0.000000e+00, i32 3 -> %83 = getelementptr float, float* %1, i32 0 -> %84 = getelementptr inbounds float, float* %83, i64 3 -> %85 = load float, float* %84, align 4 -> %86 = insertelement <4 x float> zeroinitializer, float %85, i32 0 -> %87 = insertelement <4 x float> %86, float 0.000000e+00, i32 1 -> %88 = insertelement <4 x float> %87, float 0.000000e+00, i32 2 -> %89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3 -> %90 = call <4 x float> @llvm.fma.v4f32(<4 x float> %82, <4 x float> %89, <4 x float> %75) -> %91 = extractelement <4 x float> %90, i32 0 -309c308 -< %93 = getelementptr inbounds float, float* %92, i64 2 ---- -> %93 = getelementptr inbounds float, float* %92, i64 1 -311,344c310,344 -< %94 = getelementptr float, float* %0, i32 0 -< %95 = getelementptr inbounds float, float* %94, i64 2 -< %96 = load float, float* %95, align 4 -< %97 = insertelement <4 x float> zeroinitializer, float %96, i32 0 -< %98 = insertelement <4 x float> %97, float 0.000000e+00, i32 1 -< %99 = insertelement <4 x float> %98, float 0.000000e+00, i32 2 -< %100 = insertelement <4 x float> %99, float 0.000000e+00, i32 3 -< %101 = getelementptr float, float* %1, i32 0 -< %102 = load float, float* %101, align 4 -< %103 = insertelement <4 x float> zeroinitializer, float %102, i32 0 -< %104 = insertelement <4 x float> %103, float 0.000000e+00, i32 1 -< %105 = insertelement <4 x float> %104, float 0.000000e+00, i32 2 -< %106 = insertelement <4 x float> %105, float 0.000000e+00, i32 3 -< %107 = call <4 x float> @llvm.fma.v4f32(<4 x float> %100, <4 x float> %106, <4 x float> zeroinitializer) -< %108 = extractelement <4 x float> %107, i32 0 -< store float %108, float* %93, align 4 -< %109 = insertelement <4 x float> zeroinitializer, float %96, i32 0 -< %110 = insertelement <4 x float> %109, float 1.000000e+00, i32 1 -< %111 = insertelement <4 x float> %110, float 1.000000e+00, i32 2 -< %112 = insertelement <4 x float> %111, float 1.000000e+00, i32 3 -< %113 = insertelement <4 x float> zeroinitializer, float %102, i32 0 -< %114 = insertelement <4 x float> %113, float 0.000000e+00, i32 1 -< %115 = insertelement <4 x float> %114, float 0.000000e+00, i32 2 -< %116 = insertelement <4 x float> %115, float 0.000000e+00, i32 3 -< %117 = fmul <4 x float> %112, %116 -< %118 = fadd <4 x float> %117, zeroinitializer -< %119 = getelementptr float, float* %0, i32 0 -< %120 = getelementptr inbounds float, float* %119, i64 3 -< %121 = load float, float* %120, align 4 -< %122 = insertelement <4 x float> zeroinitializer, float %121, i32 0 -< %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 1 -< %124 = insertelement <4 x float> %123, float 0.000000e+00, i32 2 -< %125 = insertelement <4 x float> %124, float 0.000000e+00, i32 3 -< %126 = load float, float* %37, align 4 ---- -> %94 = extractelement <4 x float> %90, i32 1 -> %95 = getelementptr float, float* %2, i32 0 -> %96 = getelementptr inbounds float, float* %95, i64 2 -> store float %94, float* %96, align 4 -> %97 = getelementptr float, float* %0, i32 0 -> %98 = getelementptr inbounds float, float* %97, i64 2 -> %99 = load float, float* %98, align 4 -> %100 = insertelement <4 x float> zeroinitializer, float %99, i32 0 -> %101 = insertelement <4 x float> %100, float 0.000000e+00, i32 1 -> %102 = insertelement <4 x float> %101, float 0.000000e+00, i32 2 -> %103 = insertelement <4 x float> %102, float 0.000000e+00, i32 3 -> %104 = getelementptr float, float* %1, i32 0 -> %105 = load float, float* %104, align 4 -> %106 = insertelement <4 x float> zeroinitializer, float %105, i32 0 -> %107 = insertelement <4 x float> %106, float 0.000000e+00, i32 1 -> %108 = insertelement <4 x float> %107, float 0.000000e+00, i32 2 -> %109 = insertelement <4 x float> %108, float 0.000000e+00, i32 3 -> %110 = call <4 x float> @llvm.fma.v4f32(<4 x float> %103, <4 x float> %109, <4 x float> zeroinitializer) -> %111 = extractelement <4 x float> %110, i32 0 -> %112 = getelementptr float, float* %2, i32 0 -> %113 = getelementptr inbounds float, float* %112, i64 2 -> store float %111, float* %113, align 4 -> %114 = insertelement <4 x float> zeroinitializer, float %99, i32 0 -> %115 = insertelement <4 x float> %114, float 1.000000e+00, i32 1 -> %116 = insertelement <4 x float> %115, float 1.000000e+00, i32 2 -> %117 = insertelement <4 x float> %116, float 1.000000e+00, i32 3 -> %118 = insertelement <4 x float> zeroinitializer, float %105, i32 0 -> %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 1 -> %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 2 -> %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 3 -> %122 = fmul <4 x float> %117, %121 -> %123 = fadd <4 x float> %122, zeroinitializer -> %124 = getelementptr float, float* %0, i32 0 -> %125 = getelementptr inbounds float, float* %124, i64 3 -> %126 = load float, float* %125, align 4 -349,361c349,361 -< %131 = call <4 x float> @llvm.fma.v4f32(<4 x float> %125, <4 x float> %130, <4 x float> %118) -< %132 = extractelement <4 x float> %131, i32 0 -< store float %132, float* %93, align 4 -< %133 = extractelement <4 x float> %131, i32 1 -< %134 = getelementptr float, float* %2, i32 0 -< %135 = getelementptr inbounds float, float* %134, i64 3 -< store float %133, float* %135, align 4 -< %136 = load float, float* %95, align 4 -< %137 = insertelement <4 x float> zeroinitializer, float %136, i32 0 -< %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 1 -< %139 = insertelement <4 x float> %138, float 0.000000e+00, i32 2 -< %140 = insertelement <4 x float> %139, float 0.000000e+00, i32 3 -< %141 = load float, float* %55, align 4 ---- -> %131 = load float, float* %37, align 4 -> %132 = insertelement <4 x float> zeroinitializer, float %131, i32 0 -> %133 = insertelement <4 x float> %132, float 0.000000e+00, i32 1 -> %134 = insertelement <4 x float> %133, float 0.000000e+00, i32 2 -> %135 = insertelement <4 x float> %134, float 0.000000e+00, i32 3 -> %136 = call <4 x float> @llvm.fma.v4f32(<4 x float> %130, <4 x float> %135, <4 x float> %123) -> %137 = extractelement <4 x float> %136, i32 0 -> store float %137, float* %113, align 4 -> %138 = extractelement <4 x float> %136, i32 1 -> %139 = getelementptr float, float* %2, i32 0 -> %140 = getelementptr inbounds float, float* %139, i64 3 -> store float %138, float* %140, align 4 -> %141 = load float, float* %98, align 4 -366,386c366,386 -< %146 = call <4 x float> @llvm.fma.v4f32(<4 x float> %140, <4 x float> %145, <4 x float> zeroinitializer) -< %147 = extractelement <4 x float> %146, i32 0 -< store float %147, float* %135, align 4 -< %148 = insertelement <4 x float> zeroinitializer, float %136, i32 0 -< %149 = insertelement <4 x float> %148, float 1.000000e+00, i32 1 -< %150 = insertelement <4 x float> %149, float 1.000000e+00, i32 2 -< %151 = insertelement <4 x float> %150, float 1.000000e+00, i32 3 -< %152 = insertelement <4 x float> zeroinitializer, float %141, i32 0 -< %153 = insertelement <4 x float> %152, float 0.000000e+00, i32 1 -< %154 = insertelement <4 x float> %153, float 0.000000e+00, i32 2 -< %155 = insertelement <4 x float> %154, float 0.000000e+00, i32 3 -< %156 = fmul <4 x float> %151, %155 -< %157 = fadd <4 x float> %156, zeroinitializer -< %158 = getelementptr float, float* %0, i32 0 -< %159 = getelementptr inbounds float, float* %158, i64 3 -< %160 = load float, float* %159, align 4 -< %161 = insertelement <4 x float> zeroinitializer, float %160, i32 0 -< %162 = insertelement <4 x float> %161, float 0.000000e+00, i32 1 -< %163 = insertelement <4 x float> %162, float 0.000000e+00, i32 2 -< %164 = insertelement <4 x float> %163, float 0.000000e+00, i32 3 -< %165 = load float, float* %83, align 4 ---- -> %146 = load float, float* %55, align 4 -> %147 = insertelement <4 x float> zeroinitializer, float %146, i32 0 -> %148 = insertelement <4 x float> %147, float 0.000000e+00, i32 1 -> %149 = insertelement <4 x float> %148, float 0.000000e+00, i32 2 -> %150 = insertelement <4 x float> %149, float 0.000000e+00, i32 3 -> %151 = call <4 x float> @llvm.fma.v4f32(<4 x float> %145, <4 x float> %150, <4 x float> zeroinitializer) -> %152 = extractelement <4 x float> %151, i32 0 -> store float %152, float* %140, align 4 -> %153 = insertelement <4 x float> zeroinitializer, float %141, i32 0 -> %154 = insertelement <4 x float> %153, float 1.000000e+00, i32 1 -> %155 = insertelement <4 x float> %154, float 1.000000e+00, i32 2 -> %156 = insertelement <4 x float> %155, float 1.000000e+00, i32 3 -> %157 = insertelement <4 x float> zeroinitializer, float %146, i32 0 -> %158 = insertelement <4 x float> %157, float 0.000000e+00, i32 1 -> %159 = insertelement <4 x float> %158, float 0.000000e+00, i32 2 -> %160 = insertelement <4 x float> %159, float 0.000000e+00, i32 3 -> %161 = fmul <4 x float> %156, %160 -> %162 = fadd <4 x float> %161, zeroinitializer -> %163 = getelementptr float, float* %0, i32 0 -> %164 = getelementptr inbounds float, float* %163, i64 3 -> %165 = load float, float* %164, align 4 -391,393c391,398 -< %170 = call <4 x float> @llvm.fma.v4f32(<4 x float> %164, <4 x float> %169, <4 x float> %157) -< %171 = extractelement <4 x float> %170, i32 0 -< store float %171, float* %135, align 4 ---- -> %170 = load float, float* %84, align 4 -> %171 = insertelement <4 x float> zeroinitializer, float %170, i32 0 -> %172 = insertelement <4 x float> %171, float 0.000000e+00, i32 1 -> %173 = insertelement <4 x float> %172, float 0.000000e+00, i32 2 -> %174 = insertelement <4 x float> %173, float 0.000000e+00, i32 3 -> %175 = call <4 x float> @llvm.fma.v4f32(<4 x float> %169, <4 x float> %174, <4 x float> %162) -> %176 = extractelement <4 x float> %175, i32 0 -> store float %176, float* %140, align 4 -516,532c521,537 -< %57 = load i32, i32* %31, align 4 -< %58 = bitcast i32 %57 to float -< %59 = bitcast i32 %57 to float -< %60 = fmul float %58, %59 -< %61 = fadd float %56, %60 -< %62 = call float @llvm.sqrt.f32(float %61) -< %63 = bitcast i32 %52 to float -< %64 = fcmp olt float %63, 0.000000e+00 -< %65 = sext i1 %64 to i32 -< %66 = fcmp ogt float %63, 0.000000e+00 -< %67 = zext i1 %66 to i32 -< %68 = add nsw i32 %65, %67 -< %69 = sitofp i32 %68 to float -< %70 = fneg float %69 -< %71 = fmul float %62, %70 -< %72 = bitcast i32 %52 to float -< %73 = fadd float %72, %71 ---- -> %57 = bitcast i32 %32 to float -> %58 = bitcast i32 %32 to float -> %59 = fmul float %57, %58 -> %60 = fadd float %56, %59 -> %61 = call float @llvm.sqrt.f32(float %60) -> %62 = bitcast i32 %52 to float -> %63 = fcmp olt float %62, 0.000000e+00 -> %64 = sext i1 %63 to i32 -> %65 = fcmp ogt float %62, 0.000000e+00 -> %66 = zext i1 %65 to i32 -> %67 = add nsw i32 %64, %66 -> %68 = sitofp i32 %67 to float -> %69 = fneg float %68 -> %70 = fmul float %61, %69 -> %71 = bitcast i32 %52 to float -> %72 = fadd float %71, %70 -> %73 = bitcast i32 %52 to float -534,545c539,550 -< %75 = bitcast i32 %52 to float -< %76 = fmul float %74, %75 -< %77 = fadd float %76, 0.000000e+00 -< %78 = bitcast i32 %57 to float -< %79 = bitcast i32 %57 to float -< %80 = fmul float %78, %79 -< %81 = fadd float %77, %80 -< %82 = call float @llvm.sqrt.f32(float %81) -< %83 = fneg float %69 -< %84 = fmul float %82, %83 -< %85 = bitcast i32 %52 to float -< %86 = fadd float %85, %84 ---- -> %75 = fmul float %73, %74 -> %76 = fadd float %75, 0.000000e+00 -> %77 = bitcast i32 %32 to float -> %78 = bitcast i32 %32 to float -> %79 = fmul float %77, %78 -> %80 = fadd float %76, %79 -> %81 = call float @llvm.sqrt.f32(float %80) -> %82 = fneg float %68 -> %83 = fmul float %81, %82 -> %84 = bitcast i32 %52 to float -> %85 = fadd float %84, %83 -> %86 = bitcast i32 %52 to float -547,560c552,565 -< %88 = bitcast i32 %52 to float -< %89 = fmul float %87, %88 -< %90 = fadd float %89, 0.000000e+00 -< %91 = bitcast i32 %57 to float -< %92 = bitcast i32 %57 to float -< %93 = fmul float %91, %92 -< %94 = fadd float %90, %93 -< %95 = call float @llvm.sqrt.f32(float %94) -< %96 = fneg float %69 -< %97 = fmul float %95, %96 -< %98 = bitcast i32 %52 to float -< %99 = fadd float %98, %97 -< %100 = fmul float %86, %99 -< %101 = fadd float %100, 0.000000e+00 ---- -> %88 = fmul float %86, %87 -> %89 = fadd float %88, 0.000000e+00 -> %90 = bitcast i32 %32 to float -> %91 = bitcast i32 %32 to float -> %92 = fmul float %90, %91 -> %93 = fadd float %89, %92 -> %94 = call float @llvm.sqrt.f32(float %93) -> %95 = fneg float %68 -> %96 = fmul float %94, %95 -> %97 = bitcast i32 %52 to float -> %98 = fadd float %97, %96 -> %99 = fmul float %85, %98 -> %100 = fadd float %99, 0.000000e+00 -> %101 = bitcast i32 %52 to float -562,574c567,579 -< %103 = bitcast i32 %52 to float -< %104 = fmul float %102, %103 -< %105 = fadd float %104, 0.000000e+00 -< %106 = bitcast i32 %57 to float -< %107 = bitcast i32 %57 to float -< %108 = fmul float %106, %107 -< %109 = fadd float %105, %108 -< %110 = call float @llvm.sqrt.f32(float %109) -< %111 = fneg float %69 -< %112 = fmul float %110, %111 -< %113 = fmul float %112, 0.000000e+00 -< %114 = bitcast i32 %57 to float -< %115 = fadd float %114, %113 ---- -> %103 = fmul float %101, %102 -> %104 = fadd float %103, 0.000000e+00 -> %105 = bitcast i32 %32 to float -> %106 = bitcast i32 %32 to float -> %107 = fmul float %105, %106 -> %108 = fadd float %104, %107 -> %109 = call float @llvm.sqrt.f32(float %108) -> %110 = fneg float %68 -> %111 = fmul float %109, %110 -> %112 = fmul float %111, 0.000000e+00 -> %113 = bitcast i32 %32 to float -> %114 = fadd float %113, %112 -> %115 = bitcast i32 %52 to float -576,594c581,599 -< %117 = bitcast i32 %52 to float -< %118 = fmul float %116, %117 -< %119 = fadd float %118, 0.000000e+00 -< %120 = bitcast i32 %57 to float -< %121 = bitcast i32 %57 to float -< %122 = fmul float %120, %121 -< %123 = fadd float %119, %122 -< %124 = call float @llvm.sqrt.f32(float %123) -< %125 = fneg float %69 -< %126 = fmul float %124, %125 -< %127 = fmul float %126, 0.000000e+00 -< %128 = bitcast i32 %57 to float -< %129 = fadd float %128, %127 -< %130 = fmul float %115, %129 -< %131 = fadd float %101, %130 -< %132 = call float @llvm.sqrt.f32(float %131) -< %133 = fadd float %132, 0.000000e+00 -< %134 = fdiv float %73, %133 -< %135 = fmul float %134, 2.000000e+00 ---- -> %117 = fmul float %115, %116 -> %118 = fadd float %117, 0.000000e+00 -> %119 = bitcast i32 %32 to float -> %120 = bitcast i32 %32 to float -> %121 = fmul float %119, %120 -> %122 = fadd float %118, %121 -> %123 = call float @llvm.sqrt.f32(float %122) -> %124 = fneg float %68 -> %125 = fmul float %123, %124 -> %126 = fmul float %125, 0.000000e+00 -> %127 = bitcast i32 %32 to float -> %128 = fadd float %127, %126 -> %129 = fmul float %114, %128 -> %130 = fadd float %100, %129 -> %131 = call float @llvm.sqrt.f32(float %130) -> %132 = fadd float %131, 0.000000e+00 -> %133 = fdiv float %72, %132 -> %134 = fmul float %133, 2.000000e+00 -> %135 = bitcast i32 %52 to float -596,607c601,612 -< %137 = bitcast i32 %52 to float -< %138 = fmul float %136, %137 -< %139 = fadd float %138, 0.000000e+00 -< %140 = bitcast i32 %57 to float -< %141 = bitcast i32 %57 to float -< %142 = fmul float %140, %141 -< %143 = fadd float %139, %142 -< %144 = call float @llvm.sqrt.f32(float %143) -< %145 = fneg float %69 -< %146 = fmul float %144, %145 -< %147 = bitcast i32 %52 to float -< %148 = fadd float %147, %146 ---- -> %137 = fmul float %135, %136 -> %138 = fadd float %137, 0.000000e+00 -> %139 = bitcast i32 %32 to float -> %140 = bitcast i32 %32 to float -> %141 = fmul float %139, %140 -> %142 = fadd float %138, %141 -> %143 = call float @llvm.sqrt.f32(float %142) -> %144 = fneg float %68 -> %145 = fmul float %143, %144 -> %146 = bitcast i32 %52 to float -> %147 = fadd float %146, %145 -> %148 = bitcast i32 %52 to float -609,620c614,625 -< %150 = bitcast i32 %52 to float -< %151 = fmul float %149, %150 -< %152 = fadd float %151, 0.000000e+00 -< %153 = bitcast i32 %57 to float -< %154 = bitcast i32 %57 to float -< %155 = fmul float %153, %154 -< %156 = fadd float %152, %155 -< %157 = call float @llvm.sqrt.f32(float %156) -< %158 = fneg float %69 -< %159 = fmul float %157, %158 -< %160 = bitcast i32 %52 to float -< %161 = fadd float %160, %159 ---- -> %150 = fmul float %148, %149 -> %151 = fadd float %150, 0.000000e+00 -> %152 = bitcast i32 %32 to float -> %153 = bitcast i32 %32 to float -> %154 = fmul float %152, %153 -> %155 = fadd float %151, %154 -> %156 = call float @llvm.sqrt.f32(float %155) -> %157 = fneg float %68 -> %158 = fmul float %156, %157 -> %159 = bitcast i32 %52 to float -> %160 = fadd float %159, %158 -> %161 = bitcast i32 %52 to float -622,635c627,640 -< %163 = bitcast i32 %52 to float -< %164 = fmul float %162, %163 -< %165 = fadd float %164, 0.000000e+00 -< %166 = bitcast i32 %57 to float -< %167 = bitcast i32 %57 to float -< %168 = fmul float %166, %167 -< %169 = fadd float %165, %168 -< %170 = call float @llvm.sqrt.f32(float %169) -< %171 = fneg float %69 -< %172 = fmul float %170, %171 -< %173 = bitcast i32 %52 to float -< %174 = fadd float %173, %172 -< %175 = fmul float %161, %174 -< %176 = fadd float %175, 0.000000e+00 ---- -> %163 = fmul float %161, %162 -> %164 = fadd float %163, 0.000000e+00 -> %165 = bitcast i32 %32 to float -> %166 = bitcast i32 %32 to float -> %167 = fmul float %165, %166 -> %168 = fadd float %164, %167 -> %169 = call float @llvm.sqrt.f32(float %168) -> %170 = fneg float %68 -> %171 = fmul float %169, %170 -> %172 = bitcast i32 %52 to float -> %173 = fadd float %172, %171 -> %174 = fmul float %160, %173 -> %175 = fadd float %174, 0.000000e+00 -> %176 = bitcast i32 %52 to float -637,649c642,654 -< %178 = bitcast i32 %52 to float -< %179 = fmul float %177, %178 -< %180 = fadd float %179, 0.000000e+00 -< %181 = bitcast i32 %57 to float -< %182 = bitcast i32 %57 to float -< %183 = fmul float %181, %182 -< %184 = fadd float %180, %183 -< %185 = call float @llvm.sqrt.f32(float %184) -< %186 = fneg float %69 -< %187 = fmul float %185, %186 -< %188 = fmul float %187, 0.000000e+00 -< %189 = bitcast i32 %57 to float -< %190 = fadd float %189, %188 ---- -> %178 = fmul float %176, %177 -> %179 = fadd float %178, 0.000000e+00 -> %180 = bitcast i32 %32 to float -> %181 = bitcast i32 %32 to float -> %182 = fmul float %180, %181 -> %183 = fadd float %179, %182 -> %184 = call float @llvm.sqrt.f32(float %183) -> %185 = fneg float %68 -> %186 = fmul float %184, %185 -> %187 = fmul float %186, 0.000000e+00 -> %188 = bitcast i32 %32 to float -> %189 = fadd float %188, %187 -> %190 = bitcast i32 %52 to float -651,670c656,675 -< %192 = bitcast i32 %52 to float -< %193 = fmul float %191, %192 -< %194 = fadd float %193, 0.000000e+00 -< %195 = bitcast i32 %57 to float -< %196 = bitcast i32 %57 to float -< %197 = fmul float %195, %196 -< %198 = fadd float %194, %197 -< %199 = call float @llvm.sqrt.f32(float %198) -< %200 = fneg float %69 -< %201 = fmul float %199, %200 -< %202 = fmul float %201, 0.000000e+00 -< %203 = bitcast i32 %57 to float -< %204 = fadd float %203, %202 -< %205 = fmul float %190, %204 -< %206 = fadd float %176, %205 -< %207 = call float @llvm.sqrt.f32(float %206) -< %208 = fadd float %207, 0.000000e+00 -< %209 = fdiv float %148, %208 -< %210 = fmul float %135, %209 -< %211 = insertelement <4 x float> %49, float %210, i32 1 ---- -> %192 = fmul float %190, %191 -> %193 = fadd float %192, 0.000000e+00 -> %194 = bitcast i32 %32 to float -> %195 = bitcast i32 %32 to float -> %196 = fmul float %194, %195 -> %197 = fadd float %193, %196 -> %198 = call float @llvm.sqrt.f32(float %197) -> %199 = fneg float %68 -> %200 = fmul float %198, %199 -> %201 = fmul float %200, 0.000000e+00 -> %202 = bitcast i32 %32 to float -> %203 = fadd float %202, %201 -> %204 = fmul float %189, %203 -> %205 = fadd float %175, %204 -> %206 = call float @llvm.sqrt.f32(float %205) -> %207 = fadd float %206, 0.000000e+00 -> %208 = fdiv float %147, %207 -> %209 = fmul float %134, %208 -> %210 = insertelement <4 x float> %49, float %209, i32 1 -> %211 = bitcast i32 %52 to float -672,683c677,688 -< %213 = bitcast i32 %52 to float -< %214 = fmul float %212, %213 -< %215 = fadd float %214, 0.000000e+00 -< %216 = bitcast i32 %57 to float -< %217 = bitcast i32 %57 to float -< %218 = fmul float %216, %217 -< %219 = fadd float %215, %218 -< %220 = call float @llvm.sqrt.f32(float %219) -< %221 = fneg float %69 -< %222 = fmul float %220, %221 -< %223 = bitcast i32 %52 to float -< %224 = fadd float %223, %222 ---- -> %213 = fmul float %211, %212 -> %214 = fadd float %213, 0.000000e+00 -> %215 = bitcast i32 %32 to float -> %216 = bitcast i32 %32 to float -> %217 = fmul float %215, %216 -> %218 = fadd float %214, %217 -> %219 = call float @llvm.sqrt.f32(float %218) -> %220 = fneg float %68 -> %221 = fmul float %219, %220 -> %222 = bitcast i32 %52 to float -> %223 = fadd float %222, %221 -> %224 = bitcast i32 %52 to float -685,696c690,701 -< %226 = bitcast i32 %52 to float -< %227 = fmul float %225, %226 -< %228 = fadd float %227, 0.000000e+00 -< %229 = bitcast i32 %57 to float -< %230 = bitcast i32 %57 to float -< %231 = fmul float %229, %230 -< %232 = fadd float %228, %231 -< %233 = call float @llvm.sqrt.f32(float %232) -< %234 = fneg float %69 -< %235 = fmul float %233, %234 -< %236 = bitcast i32 %52 to float -< %237 = fadd float %236, %235 ---- -> %226 = fmul float %224, %225 -> %227 = fadd float %226, 0.000000e+00 -> %228 = bitcast i32 %32 to float -> %229 = bitcast i32 %32 to float -> %230 = fmul float %228, %229 -> %231 = fadd float %227, %230 -> %232 = call float @llvm.sqrt.f32(float %231) -> %233 = fneg float %68 -> %234 = fmul float %232, %233 -> %235 = bitcast i32 %52 to float -> %236 = fadd float %235, %234 -> %237 = bitcast i32 %52 to float -698,711c703,716 -< %239 = bitcast i32 %52 to float -< %240 = fmul float %238, %239 -< %241 = fadd float %240, 0.000000e+00 -< %242 = bitcast i32 %57 to float -< %243 = bitcast i32 %57 to float -< %244 = fmul float %242, %243 -< %245 = fadd float %241, %244 -< %246 = call float @llvm.sqrt.f32(float %245) -< %247 = fneg float %69 -< %248 = fmul float %246, %247 -< %249 = bitcast i32 %52 to float -< %250 = fadd float %249, %248 -< %251 = fmul float %237, %250 -< %252 = fadd float %251, 0.000000e+00 ---- -> %239 = fmul float %237, %238 -> %240 = fadd float %239, 0.000000e+00 -> %241 = bitcast i32 %32 to float -> %242 = bitcast i32 %32 to float -> %243 = fmul float %241, %242 -> %244 = fadd float %240, %243 -> %245 = call float @llvm.sqrt.f32(float %244) -> %246 = fneg float %68 -> %247 = fmul float %245, %246 -> %248 = bitcast i32 %52 to float -> %249 = fadd float %248, %247 -> %250 = fmul float %236, %249 -> %251 = fadd float %250, 0.000000e+00 -> %252 = bitcast i32 %52 to float -713,725c718,730 -< %254 = bitcast i32 %52 to float -< %255 = fmul float %253, %254 -< %256 = fadd float %255, 0.000000e+00 -< %257 = bitcast i32 %57 to float -< %258 = bitcast i32 %57 to float -< %259 = fmul float %257, %258 -< %260 = fadd float %256, %259 -< %261 = call float @llvm.sqrt.f32(float %260) -< %262 = fneg float %69 -< %263 = fmul float %261, %262 -< %264 = fmul float %263, 0.000000e+00 -< %265 = bitcast i32 %57 to float -< %266 = fadd float %265, %264 ---- -> %254 = fmul float %252, %253 -> %255 = fadd float %254, 0.000000e+00 -> %256 = bitcast i32 %32 to float -> %257 = bitcast i32 %32 to float -> %258 = fmul float %256, %257 -> %259 = fadd float %255, %258 -> %260 = call float @llvm.sqrt.f32(float %259) -> %261 = fneg float %68 -> %262 = fmul float %260, %261 -> %263 = fmul float %262, 0.000000e+00 -> %264 = bitcast i32 %32 to float -> %265 = fadd float %264, %263 -> %266 = bitcast i32 %52 to float -727,745c732,750 -< %268 = bitcast i32 %52 to float -< %269 = fmul float %267, %268 -< %270 = fadd float %269, 0.000000e+00 -< %271 = bitcast i32 %57 to float -< %272 = bitcast i32 %57 to float -< %273 = fmul float %271, %272 -< %274 = fadd float %270, %273 -< %275 = call float @llvm.sqrt.f32(float %274) -< %276 = fneg float %69 -< %277 = fmul float %275, %276 -< %278 = fmul float %277, 0.000000e+00 -< %279 = bitcast i32 %57 to float -< %280 = fadd float %279, %278 -< %281 = fmul float %266, %280 -< %282 = fadd float %252, %281 -< %283 = call float @llvm.sqrt.f32(float %282) -< %284 = fadd float %283, 0.000000e+00 -< %285 = fdiv float %224, %284 -< %286 = fmul float %285, 2.000000e+00 ---- -> %268 = fmul float %266, %267 -> %269 = fadd float %268, 0.000000e+00 -> %270 = bitcast i32 %32 to float -> %271 = bitcast i32 %32 to float -> %272 = fmul float %270, %271 -> %273 = fadd float %269, %272 -> %274 = call float @llvm.sqrt.f32(float %273) -> %275 = fneg float %68 -> %276 = fmul float %274, %275 -> %277 = fmul float %276, 0.000000e+00 -> %278 = bitcast i32 %32 to float -> %279 = fadd float %278, %277 -> %280 = fmul float %265, %279 -> %281 = fadd float %251, %280 -> %282 = call float @llvm.sqrt.f32(float %281) -> %283 = fadd float %282, 0.000000e+00 -> %284 = fdiv float %223, %283 -> %285 = fmul float %284, 2.000000e+00 -> %286 = bitcast i32 %52 to float -747,759c752,764 -< %288 = bitcast i32 %52 to float -< %289 = fmul float %287, %288 -< %290 = fadd float %289, 0.000000e+00 -< %291 = bitcast i32 %57 to float -< %292 = bitcast i32 %57 to float -< %293 = fmul float %291, %292 -< %294 = fadd float %290, %293 -< %295 = call float @llvm.sqrt.f32(float %294) -< %296 = fneg float %69 -< %297 = fmul float %295, %296 -< %298 = fmul float %297, 0.000000e+00 -< %299 = bitcast i32 %57 to float -< %300 = fadd float %299, %298 ---- -> %288 = fmul float %286, %287 -> %289 = fadd float %288, 0.000000e+00 -> %290 = bitcast i32 %32 to float -> %291 = bitcast i32 %32 to float -> %292 = fmul float %290, %291 -> %293 = fadd float %289, %292 -> %294 = call float @llvm.sqrt.f32(float %293) -> %295 = fneg float %68 -> %296 = fmul float %294, %295 -> %297 = fmul float %296, 0.000000e+00 -> %298 = bitcast i32 %32 to float -> %299 = fadd float %298, %297 -> %300 = bitcast i32 %52 to float -761,772c766,777 -< %302 = bitcast i32 %52 to float -< %303 = fmul float %301, %302 -< %304 = fadd float %303, 0.000000e+00 -< %305 = bitcast i32 %57 to float -< %306 = bitcast i32 %57 to float -< %307 = fmul float %305, %306 -< %308 = fadd float %304, %307 -< %309 = call float @llvm.sqrt.f32(float %308) -< %310 = fneg float %69 -< %311 = fmul float %309, %310 -< %312 = bitcast i32 %52 to float -< %313 = fadd float %312, %311 ---- -> %302 = fmul float %300, %301 -> %303 = fadd float %302, 0.000000e+00 -> %304 = bitcast i32 %32 to float -> %305 = bitcast i32 %32 to float -> %306 = fmul float %304, %305 -> %307 = fadd float %303, %306 -> %308 = call float @llvm.sqrt.f32(float %307) -> %309 = fneg float %68 -> %310 = fmul float %308, %309 -> %311 = bitcast i32 %52 to float -> %312 = fadd float %311, %310 -> %313 = bitcast i32 %52 to float -774,787c779,792 -< %315 = bitcast i32 %52 to float -< %316 = fmul float %314, %315 -< %317 = fadd float %316, 0.000000e+00 -< %318 = bitcast i32 %57 to float -< %319 = bitcast i32 %57 to float -< %320 = fmul float %318, %319 -< %321 = fadd float %317, %320 -< %322 = call float @llvm.sqrt.f32(float %321) -< %323 = fneg float %69 -< %324 = fmul float %322, %323 -< %325 = bitcast i32 %52 to float -< %326 = fadd float %325, %324 -< %327 = fmul float %313, %326 -< %328 = fadd float %327, 0.000000e+00 ---- -> %315 = fmul float %313, %314 -> %316 = fadd float %315, 0.000000e+00 -> %317 = bitcast i32 %32 to float -> %318 = bitcast i32 %32 to float -> %319 = fmul float %317, %318 -> %320 = fadd float %316, %319 -> %321 = call float @llvm.sqrt.f32(float %320) -> %322 = fneg float %68 -> %323 = fmul float %321, %322 -> %324 = bitcast i32 %52 to float -> %325 = fadd float %324, %323 -> %326 = fmul float %312, %325 -> %327 = fadd float %326, 0.000000e+00 -> %328 = bitcast i32 %52 to float -789,801c794,806 -< %330 = bitcast i32 %52 to float -< %331 = fmul float %329, %330 -< %332 = fadd float %331, 0.000000e+00 -< %333 = bitcast i32 %57 to float -< %334 = bitcast i32 %57 to float -< %335 = fmul float %333, %334 -< %336 = fadd float %332, %335 -< %337 = call float @llvm.sqrt.f32(float %336) -< %338 = fneg float %69 -< %339 = fmul float %337, %338 -< %340 = fmul float %339, 0.000000e+00 -< %341 = bitcast i32 %57 to float -< %342 = fadd float %341, %340 ---- -> %330 = fmul float %328, %329 -> %331 = fadd float %330, 0.000000e+00 -> %332 = bitcast i32 %32 to float -> %333 = bitcast i32 %32 to float -> %334 = fmul float %332, %333 -> %335 = fadd float %331, %334 -> %336 = call float @llvm.sqrt.f32(float %335) -> %337 = fneg float %68 -> %338 = fmul float %336, %337 -> %339 = fmul float %338, 0.000000e+00 -> %340 = bitcast i32 %32 to float -> %341 = fadd float %340, %339 -> %342 = bitcast i32 %52 to float -803,822c808,827 -< %344 = bitcast i32 %52 to float -< %345 = fmul float %343, %344 -< %346 = fadd float %345, 0.000000e+00 -< %347 = bitcast i32 %57 to float -< %348 = bitcast i32 %57 to float -< %349 = fmul float %347, %348 -< %350 = fadd float %346, %349 -< %351 = call float @llvm.sqrt.f32(float %350) -< %352 = fneg float %69 -< %353 = fmul float %351, %352 -< %354 = fmul float %353, 0.000000e+00 -< %355 = bitcast i32 %57 to float -< %356 = fadd float %355, %354 -< %357 = fmul float %342, %356 -< %358 = fadd float %328, %357 -< %359 = call float @llvm.sqrt.f32(float %358) -< %360 = fadd float %359, 0.000000e+00 -< %361 = fdiv float %300, %360 -< %362 = fmul float %286, %361 -< %363 = insertelement <4 x float> %211, float %362, i32 2 ---- -> %344 = fmul float %342, %343 -> %345 = fadd float %344, 0.000000e+00 -> %346 = bitcast i32 %32 to float -> %347 = bitcast i32 %32 to float -> %348 = fmul float %346, %347 -> %349 = fadd float %345, %348 -> %350 = call float @llvm.sqrt.f32(float %349) -> %351 = fneg float %68 -> %352 = fmul float %350, %351 -> %353 = fmul float %352, 0.000000e+00 -> %354 = bitcast i32 %32 to float -> %355 = fadd float %354, %353 -> %356 = fmul float %341, %355 -> %357 = fadd float %327, %356 -> %358 = call float @llvm.sqrt.f32(float %357) -> %359 = fadd float %358, 0.000000e+00 -> %360 = fdiv float %299, %359 -> %361 = fmul float %285, %360 -> %362 = insertelement <4 x float> %210, float %361, i32 2 -> %363 = bitcast i32 %52 to float -824,836c829,841 -< %365 = bitcast i32 %52 to float -< %366 = fmul float %364, %365 -< %367 = fadd float %366, 0.000000e+00 -< %368 = bitcast i32 %57 to float -< %369 = bitcast i32 %57 to float -< %370 = fmul float %368, %369 -< %371 = fadd float %367, %370 -< %372 = call float @llvm.sqrt.f32(float %371) -< %373 = fneg float %69 -< %374 = fmul float %372, %373 -< %375 = fmul float %374, 0.000000e+00 -< %376 = bitcast i32 %57 to float -< %377 = fadd float %376, %375 ---- -> %365 = fmul float %363, %364 -> %366 = fadd float %365, 0.000000e+00 -> %367 = bitcast i32 %32 to float -> %368 = bitcast i32 %32 to float -> %369 = fmul float %367, %368 -> %370 = fadd float %366, %369 -> %371 = call float @llvm.sqrt.f32(float %370) -> %372 = fneg float %68 -> %373 = fmul float %371, %372 -> %374 = fmul float %373, 0.000000e+00 -> %375 = bitcast i32 %32 to float -> %376 = fadd float %375, %374 -> %377 = bitcast i32 %52 to float -838,849c843,854 -< %379 = bitcast i32 %52 to float -< %380 = fmul float %378, %379 -< %381 = fadd float %380, 0.000000e+00 -< %382 = bitcast i32 %57 to float -< %383 = bitcast i32 %57 to float -< %384 = fmul float %382, %383 -< %385 = fadd float %381, %384 -< %386 = call float @llvm.sqrt.f32(float %385) -< %387 = fneg float %69 -< %388 = fmul float %386, %387 -< %389 = bitcast i32 %52 to float -< %390 = fadd float %389, %388 ---- -> %379 = fmul float %377, %378 -> %380 = fadd float %379, 0.000000e+00 -> %381 = bitcast i32 %32 to float -> %382 = bitcast i32 %32 to float -> %383 = fmul float %381, %382 -> %384 = fadd float %380, %383 -> %385 = call float @llvm.sqrt.f32(float %384) -> %386 = fneg float %68 -> %387 = fmul float %385, %386 -> %388 = bitcast i32 %52 to float -> %389 = fadd float %388, %387 -> %390 = bitcast i32 %52 to float -851,864c856,869 -< %392 = bitcast i32 %52 to float -< %393 = fmul float %391, %392 -< %394 = fadd float %393, 0.000000e+00 -< %395 = bitcast i32 %57 to float -< %396 = bitcast i32 %57 to float -< %397 = fmul float %395, %396 -< %398 = fadd float %394, %397 -< %399 = call float @llvm.sqrt.f32(float %398) -< %400 = fneg float %69 -< %401 = fmul float %399, %400 -< %402 = bitcast i32 %52 to float -< %403 = fadd float %402, %401 -< %404 = fmul float %390, %403 -< %405 = fadd float %404, 0.000000e+00 ---- -> %392 = fmul float %390, %391 -> %393 = fadd float %392, 0.000000e+00 -> %394 = bitcast i32 %32 to float -> %395 = bitcast i32 %32 to float -> %396 = fmul float %394, %395 -> %397 = fadd float %393, %396 -> %398 = call float @llvm.sqrt.f32(float %397) -> %399 = fneg float %68 -> %400 = fmul float %398, %399 -> %401 = bitcast i32 %52 to float -> %402 = fadd float %401, %400 -> %403 = fmul float %389, %402 -> %404 = fadd float %403, 0.000000e+00 -> %405 = bitcast i32 %52 to float -866,878c871,883 -< %407 = bitcast i32 %52 to float -< %408 = fmul float %406, %407 -< %409 = fadd float %408, 0.000000e+00 -< %410 = bitcast i32 %57 to float -< %411 = bitcast i32 %57 to float -< %412 = fmul float %410, %411 -< %413 = fadd float %409, %412 -< %414 = call float @llvm.sqrt.f32(float %413) -< %415 = fneg float %69 -< %416 = fmul float %414, %415 -< %417 = fmul float %416, 0.000000e+00 -< %418 = bitcast i32 %57 to float -< %419 = fadd float %418, %417 ---- -> %407 = fmul float %405, %406 -> %408 = fadd float %407, 0.000000e+00 -> %409 = bitcast i32 %32 to float -> %410 = bitcast i32 %32 to float -> %411 = fmul float %409, %410 -> %412 = fadd float %408, %411 -> %413 = call float @llvm.sqrt.f32(float %412) -> %414 = fneg float %68 -> %415 = fmul float %413, %414 -> %416 = fmul float %415, 0.000000e+00 -> %417 = bitcast i32 %32 to float -> %418 = fadd float %417, %416 -> %419 = bitcast i32 %52 to float -880,898c885,903 -< %421 = bitcast i32 %52 to float -< %422 = fmul float %420, %421 -< %423 = fadd float %422, 0.000000e+00 -< %424 = bitcast i32 %57 to float -< %425 = bitcast i32 %57 to float -< %426 = fmul float %424, %425 -< %427 = fadd float %423, %426 -< %428 = call float @llvm.sqrt.f32(float %427) -< %429 = fneg float %69 -< %430 = fmul float %428, %429 -< %431 = fmul float %430, 0.000000e+00 -< %432 = bitcast i32 %57 to float -< %433 = fadd float %432, %431 -< %434 = fmul float %419, %433 -< %435 = fadd float %405, %434 -< %436 = call float @llvm.sqrt.f32(float %435) -< %437 = fadd float %436, 0.000000e+00 -< %438 = fdiv float %377, %437 -< %439 = fmul float %438, 2.000000e+00 ---- -> %421 = fmul float %419, %420 -> %422 = fadd float %421, 0.000000e+00 -> %423 = bitcast i32 %32 to float -> %424 = bitcast i32 %32 to float -> %425 = fmul float %423, %424 -> %426 = fadd float %422, %425 -> %427 = call float @llvm.sqrt.f32(float %426) -> %428 = fneg float %68 -> %429 = fmul float %427, %428 -> %430 = fmul float %429, 0.000000e+00 -> %431 = bitcast i32 %32 to float -> %432 = fadd float %431, %430 -> %433 = fmul float %418, %432 -> %434 = fadd float %404, %433 -> %435 = call float @llvm.sqrt.f32(float %434) -> %436 = fadd float %435, 0.000000e+00 -> %437 = fdiv float %376, %436 -> %438 = fmul float %437, 2.000000e+00 -> %439 = bitcast i32 %52 to float -900,911c905,916 -< %441 = bitcast i32 %52 to float -< %442 = fmul float %440, %441 -< %443 = fadd float %442, 0.000000e+00 -< %444 = bitcast i32 %57 to float -< %445 = bitcast i32 %57 to float -< %446 = fmul float %444, %445 -< %447 = fadd float %443, %446 -< %448 = call float @llvm.sqrt.f32(float %447) -< %449 = fneg float %69 -< %450 = fmul float %448, %449 -< %451 = bitcast i32 %52 to float -< %452 = fadd float %451, %450 ---- -> %441 = fmul float %439, %440 -> %442 = fadd float %441, 0.000000e+00 -> %443 = bitcast i32 %32 to float -> %444 = bitcast i32 %32 to float -> %445 = fmul float %443, %444 -> %446 = fadd float %442, %445 -> %447 = call float @llvm.sqrt.f32(float %446) -> %448 = fneg float %68 -> %449 = fmul float %447, %448 -> %450 = bitcast i32 %52 to float -> %451 = fadd float %450, %449 -> %452 = bitcast i32 %52 to float -913,924c918,929 -< %454 = bitcast i32 %52 to float -< %455 = fmul float %453, %454 -< %456 = fadd float %455, 0.000000e+00 -< %457 = bitcast i32 %57 to float -< %458 = bitcast i32 %57 to float -< %459 = fmul float %457, %458 -< %460 = fadd float %456, %459 -< %461 = call float @llvm.sqrt.f32(float %460) -< %462 = fneg float %69 -< %463 = fmul float %461, %462 -< %464 = bitcast i32 %52 to float -< %465 = fadd float %464, %463 ---- -> %454 = fmul float %452, %453 -> %455 = fadd float %454, 0.000000e+00 -> %456 = bitcast i32 %32 to float -> %457 = bitcast i32 %32 to float -> %458 = fmul float %456, %457 -> %459 = fadd float %455, %458 -> %460 = call float @llvm.sqrt.f32(float %459) -> %461 = fneg float %68 -> %462 = fmul float %460, %461 -> %463 = bitcast i32 %52 to float -> %464 = fadd float %463, %462 -> %465 = bitcast i32 %52 to float -926,939c931,944 -< %467 = bitcast i32 %52 to float -< %468 = fmul float %466, %467 -< %469 = fadd float %468, 0.000000e+00 -< %470 = bitcast i32 %57 to float -< %471 = bitcast i32 %57 to float -< %472 = fmul float %470, %471 -< %473 = fadd float %469, %472 -< %474 = call float @llvm.sqrt.f32(float %473) -< %475 = fneg float %69 -< %476 = fmul float %474, %475 -< %477 = bitcast i32 %52 to float -< %478 = fadd float %477, %476 -< %479 = fmul float %465, %478 -< %480 = fadd float %479, 0.000000e+00 ---- -> %467 = fmul float %465, %466 -> %468 = fadd float %467, 0.000000e+00 -> %469 = bitcast i32 %32 to float -> %470 = bitcast i32 %32 to float -> %471 = fmul float %469, %470 -> %472 = fadd float %468, %471 -> %473 = call float @llvm.sqrt.f32(float %472) -> %474 = fneg float %68 -> %475 = fmul float %473, %474 -> %476 = bitcast i32 %52 to float -> %477 = fadd float %476, %475 -> %478 = fmul float %464, %477 -> %479 = fadd float %478, 0.000000e+00 -> %480 = bitcast i32 %52 to float -941,953c946,958 -< %482 = bitcast i32 %52 to float -< %483 = fmul float %481, %482 -< %484 = fadd float %483, 0.000000e+00 -< %485 = bitcast i32 %57 to float -< %486 = bitcast i32 %57 to float -< %487 = fmul float %485, %486 -< %488 = fadd float %484, %487 -< %489 = call float @llvm.sqrt.f32(float %488) -< %490 = fneg float %69 -< %491 = fmul float %489, %490 -< %492 = fmul float %491, 0.000000e+00 -< %493 = bitcast i32 %57 to float -< %494 = fadd float %493, %492 ---- -> %482 = fmul float %480, %481 -> %483 = fadd float %482, 0.000000e+00 -> %484 = bitcast i32 %32 to float -> %485 = bitcast i32 %32 to float -> %486 = fmul float %484, %485 -> %487 = fadd float %483, %486 -> %488 = call float @llvm.sqrt.f32(float %487) -> %489 = fneg float %68 -> %490 = fmul float %488, %489 -> %491 = fmul float %490, 0.000000e+00 -> %492 = bitcast i32 %32 to float -> %493 = fadd float %492, %491 -> %494 = bitcast i32 %52 to float -955,975c960,980 -< %496 = bitcast i32 %52 to float -< %497 = fmul float %495, %496 -< %498 = fadd float %497, 0.000000e+00 -< %499 = bitcast i32 %57 to float -< %500 = bitcast i32 %57 to float -< %501 = fmul float %499, %500 -< %502 = fadd float %498, %501 -< %503 = call float @llvm.sqrt.f32(float %502) -< %504 = fneg float %69 -< %505 = fmul float %503, %504 -< %506 = fmul float %505, 0.000000e+00 -< %507 = bitcast i32 %57 to float -< %508 = fadd float %507, %506 -< %509 = fmul float %494, %508 -< %510 = fadd float %480, %509 -< %511 = call float @llvm.sqrt.f32(float %510) -< %512 = fadd float %511, 0.000000e+00 -< %513 = fdiv float %452, %512 -< %514 = fmul float %439, %513 -< %515 = insertelement <4 x float> %363, float %514, i32 3 -< %516 = fsub <4 x float> , %515 ---- -> %496 = fmul float %494, %495 -> %497 = fadd float %496, 0.000000e+00 -> %498 = bitcast i32 %32 to float -> %499 = bitcast i32 %32 to float -> %500 = fmul float %498, %499 -> %501 = fadd float %497, %500 -> %502 = call float @llvm.sqrt.f32(float %501) -> %503 = fneg float %68 -> %504 = fmul float %502, %503 -> %505 = fmul float %504, 0.000000e+00 -> %506 = bitcast i32 %32 to float -> %507 = fadd float %506, %505 -> %508 = fmul float %493, %507 -> %509 = fadd float %479, %508 -> %510 = call float @llvm.sqrt.f32(float %509) -> %511 = fadd float %510, 0.000000e+00 -> %512 = fdiv float %451, %511 -> %513 = fmul float %438, %512 -> %514 = insertelement <4 x float> %362, float %513, i32 3 -> %515 = fsub <4 x float> , %514 -> %516 = bitcast i32 %52 to float -977,989c982,994 -< %518 = bitcast i32 %52 to float -< %519 = fmul float %517, %518 -< %520 = fadd float %519, 0.000000e+00 -< %521 = bitcast i32 %57 to float -< %522 = bitcast i32 %57 to float -< %523 = fmul float %521, %522 -< %524 = fadd float %520, %523 -< %525 = call float @llvm.sqrt.f32(float %524) -< %526 = fneg float %69 -< %527 = fmul float %525, %526 -< %528 = fmul float %527, 0.000000e+00 -< %529 = bitcast i32 %57 to float -< %530 = fadd float %529, %528 ---- -> %518 = fmul float %516, %517 -> %519 = fadd float %518, 0.000000e+00 -> %520 = bitcast i32 %32 to float -> %521 = bitcast i32 %32 to float -> %522 = fmul float %520, %521 -> %523 = fadd float %519, %522 -> %524 = call float @llvm.sqrt.f32(float %523) -> %525 = fneg float %68 -> %526 = fmul float %524, %525 -> %527 = fmul float %526, 0.000000e+00 -> %528 = bitcast i32 %32 to float -> %529 = fadd float %528, %527 -> %530 = bitcast i32 %52 to float -991,1002c996,1007 -< %532 = bitcast i32 %52 to float -< %533 = fmul float %531, %532 -< %534 = fadd float %533, 0.000000e+00 -< %535 = bitcast i32 %57 to float -< %536 = bitcast i32 %57 to float -< %537 = fmul float %535, %536 -< %538 = fadd float %534, %537 -< %539 = call float @llvm.sqrt.f32(float %538) -< %540 = fneg float %69 -< %541 = fmul float %539, %540 -< %542 = bitcast i32 %52 to float -< %543 = fadd float %542, %541 ---- -> %532 = fmul float %530, %531 -> %533 = fadd float %532, 0.000000e+00 -> %534 = bitcast i32 %32 to float -> %535 = bitcast i32 %32 to float -> %536 = fmul float %534, %535 -> %537 = fadd float %533, %536 -> %538 = call float @llvm.sqrt.f32(float %537) -> %539 = fneg float %68 -> %540 = fmul float %538, %539 -> %541 = bitcast i32 %52 to float -> %542 = fadd float %541, %540 -> %543 = bitcast i32 %52 to float -1004,1017c1009,1022 -< %545 = bitcast i32 %52 to float -< %546 = fmul float %544, %545 -< %547 = fadd float %546, 0.000000e+00 -< %548 = bitcast i32 %57 to float -< %549 = bitcast i32 %57 to float -< %550 = fmul float %548, %549 -< %551 = fadd float %547, %550 -< %552 = call float @llvm.sqrt.f32(float %551) -< %553 = fneg float %69 -< %554 = fmul float %552, %553 -< %555 = bitcast i32 %52 to float -< %556 = fadd float %555, %554 -< %557 = fmul float %543, %556 -< %558 = fadd float %557, 0.000000e+00 ---- -> %545 = fmul float %543, %544 -> %546 = fadd float %545, 0.000000e+00 -> %547 = bitcast i32 %32 to float -> %548 = bitcast i32 %32 to float -> %549 = fmul float %547, %548 -> %550 = fadd float %546, %549 -> %551 = call float @llvm.sqrt.f32(float %550) -> %552 = fneg float %68 -> %553 = fmul float %551, %552 -> %554 = bitcast i32 %52 to float -> %555 = fadd float %554, %553 -> %556 = fmul float %542, %555 -> %557 = fadd float %556, 0.000000e+00 -> %558 = bitcast i32 %52 to float -1019,1031c1024,1036 -< %560 = bitcast i32 %52 to float -< %561 = fmul float %559, %560 -< %562 = fadd float %561, 0.000000e+00 -< %563 = bitcast i32 %57 to float -< %564 = bitcast i32 %57 to float -< %565 = fmul float %563, %564 -< %566 = fadd float %562, %565 -< %567 = call float @llvm.sqrt.f32(float %566) -< %568 = fneg float %69 -< %569 = fmul float %567, %568 -< %570 = fmul float %569, 0.000000e+00 -< %571 = bitcast i32 %57 to float -< %572 = fadd float %571, %570 ---- -> %560 = fmul float %558, %559 -> %561 = fadd float %560, 0.000000e+00 -> %562 = bitcast i32 %32 to float -> %563 = bitcast i32 %32 to float -> %564 = fmul float %562, %563 -> %565 = fadd float %561, %564 -> %566 = call float @llvm.sqrt.f32(float %565) -> %567 = fneg float %68 -> %568 = fmul float %566, %567 -> %569 = fmul float %568, 0.000000e+00 -> %570 = bitcast i32 %32 to float -> %571 = fadd float %570, %569 -> %572 = bitcast i32 %52 to float -1033,1051c1038,1056 -< %574 = bitcast i32 %52 to float -< %575 = fmul float %573, %574 -< %576 = fadd float %575, 0.000000e+00 -< %577 = bitcast i32 %57 to float -< %578 = bitcast i32 %57 to float -< %579 = fmul float %577, %578 -< %580 = fadd float %576, %579 -< %581 = call float @llvm.sqrt.f32(float %580) -< %582 = fneg float %69 -< %583 = fmul float %581, %582 -< %584 = fmul float %583, 0.000000e+00 -< %585 = bitcast i32 %57 to float -< %586 = fadd float %585, %584 -< %587 = fmul float %572, %586 -< %588 = fadd float %558, %587 -< %589 = call float @llvm.sqrt.f32(float %588) -< %590 = fadd float %589, 0.000000e+00 -< %591 = fdiv float %530, %590 -< %592 = fmul float %591, 2.000000e+00 ---- -> %574 = fmul float %572, %573 -> %575 = fadd float %574, 0.000000e+00 -> %576 = bitcast i32 %32 to float -> %577 = bitcast i32 %32 to float -> %578 = fmul float %576, %577 -> %579 = fadd float %575, %578 -> %580 = call float @llvm.sqrt.f32(float %579) -> %581 = fneg float %68 -> %582 = fmul float %580, %581 -> %583 = fmul float %582, 0.000000e+00 -> %584 = bitcast i32 %32 to float -> %585 = fadd float %584, %583 -> %586 = fmul float %571, %585 -> %587 = fadd float %557, %586 -> %588 = call float @llvm.sqrt.f32(float %587) -> %589 = fadd float %588, 0.000000e+00 -> %590 = fdiv float %529, %589 -> %591 = fmul float %590, 2.000000e+00 -> %592 = bitcast i32 %52 to float -1053,1065c1058,1070 -< %594 = bitcast i32 %52 to float -< %595 = fmul float %593, %594 -< %596 = fadd float %595, 0.000000e+00 -< %597 = bitcast i32 %57 to float -< %598 = bitcast i32 %57 to float -< %599 = fmul float %597, %598 -< %600 = fadd float %596, %599 -< %601 = call float @llvm.sqrt.f32(float %600) -< %602 = fneg float %69 -< %603 = fmul float %601, %602 -< %604 = fmul float %603, 0.000000e+00 -< %605 = bitcast i32 %57 to float -< %606 = fadd float %605, %604 ---- -> %594 = fmul float %592, %593 -> %595 = fadd float %594, 0.000000e+00 -> %596 = bitcast i32 %32 to float -> %597 = bitcast i32 %32 to float -> %598 = fmul float %596, %597 -> %599 = fadd float %595, %598 -> %600 = call float @llvm.sqrt.f32(float %599) -> %601 = fneg float %68 -> %602 = fmul float %600, %601 -> %603 = fmul float %602, 0.000000e+00 -> %604 = bitcast i32 %32 to float -> %605 = fadd float %604, %603 -> %606 = bitcast i32 %52 to float -1067,1078c1072,1083 -< %608 = bitcast i32 %52 to float -< %609 = fmul float %607, %608 -< %610 = fadd float %609, 0.000000e+00 -< %611 = bitcast i32 %57 to float -< %612 = bitcast i32 %57 to float -< %613 = fmul float %611, %612 -< %614 = fadd float %610, %613 -< %615 = call float @llvm.sqrt.f32(float %614) -< %616 = fneg float %69 -< %617 = fmul float %615, %616 -< %618 = bitcast i32 %52 to float -< %619 = fadd float %618, %617 ---- -> %608 = fmul float %606, %607 -> %609 = fadd float %608, 0.000000e+00 -> %610 = bitcast i32 %32 to float -> %611 = bitcast i32 %32 to float -> %612 = fmul float %610, %611 -> %613 = fadd float %609, %612 -> %614 = call float @llvm.sqrt.f32(float %613) -> %615 = fneg float %68 -> %616 = fmul float %614, %615 -> %617 = bitcast i32 %52 to float -> %618 = fadd float %617, %616 -> %619 = bitcast i32 %52 to float -1080,1093c1085,1098 -< %621 = bitcast i32 %52 to float -< %622 = fmul float %620, %621 -< %623 = fadd float %622, 0.000000e+00 -< %624 = bitcast i32 %57 to float -< %625 = bitcast i32 %57 to float -< %626 = fmul float %624, %625 -< %627 = fadd float %623, %626 -< %628 = call float @llvm.sqrt.f32(float %627) -< %629 = fneg float %69 -< %630 = fmul float %628, %629 -< %631 = bitcast i32 %52 to float -< %632 = fadd float %631, %630 -< %633 = fmul float %619, %632 -< %634 = fadd float %633, 0.000000e+00 ---- -> %621 = fmul float %619, %620 -> %622 = fadd float %621, 0.000000e+00 -> %623 = bitcast i32 %32 to float -> %624 = bitcast i32 %32 to float -> %625 = fmul float %623, %624 -> %626 = fadd float %622, %625 -> %627 = call float @llvm.sqrt.f32(float %626) -> %628 = fneg float %68 -> %629 = fmul float %627, %628 -> %630 = bitcast i32 %52 to float -> %631 = fadd float %630, %629 -> %632 = fmul float %618, %631 -> %633 = fadd float %632, 0.000000e+00 -> %634 = bitcast i32 %52 to float -1095,1107c1100,1112 -< %636 = bitcast i32 %52 to float -< %637 = fmul float %635, %636 -< %638 = fadd float %637, 0.000000e+00 -< %639 = bitcast i32 %57 to float -< %640 = bitcast i32 %57 to float -< %641 = fmul float %639, %640 -< %642 = fadd float %638, %641 -< %643 = call float @llvm.sqrt.f32(float %642) -< %644 = fneg float %69 -< %645 = fmul float %643, %644 -< %646 = fmul float %645, 0.000000e+00 -< %647 = bitcast i32 %57 to float -< %648 = fadd float %647, %646 ---- -> %636 = fmul float %634, %635 -> %637 = fadd float %636, 0.000000e+00 -> %638 = bitcast i32 %32 to float -> %639 = bitcast i32 %32 to float -> %640 = fmul float %638, %639 -> %641 = fadd float %637, %640 -> %642 = call float @llvm.sqrt.f32(float %641) -> %643 = fneg float %68 -> %644 = fmul float %642, %643 -> %645 = fmul float %644, 0.000000e+00 -> %646 = bitcast i32 %32 to float -> %647 = fadd float %646, %645 -> %648 = bitcast i32 %52 to float -1109,1162c1114,1147 -< %650 = bitcast i32 %52 to float -< %651 = fmul float %649, %650 -< %652 = fadd float %651, 0.000000e+00 -< %653 = bitcast i32 %57 to float -< %654 = bitcast i32 %57 to float -< %655 = fmul float %653, %654 -< %656 = fadd float %652, %655 -< %657 = call float @llvm.sqrt.f32(float %656) -< %658 = fneg float %69 -< %659 = fmul float %657, %658 -< %660 = fmul float %659, 0.000000e+00 -< %661 = bitcast i32 %57 to float -< %662 = fadd float %661, %660 -< %663 = fmul float %648, %662 -< %664 = fadd float %634, %663 -< %665 = call float @llvm.sqrt.f32(float %664) -< %666 = fadd float %665, 0.000000e+00 -< %667 = fdiv float %606, %666 -< %668 = fmul float %592, %667 -< %669 = fsub float 1.000000e+00, %668 -< %670 = insertelement <4 x float> zeroinitializer, float %669, i32 0 -< %671 = insertelement <4 x float> %670, float 0.000000e+00, i32 1 -< %672 = insertelement <4 x float> %671, float 0.000000e+00, i32 2 -< %673 = insertelement <4 x float> %672, float 0.000000e+00, i32 3 -< %674 = shufflevector <4 x float> %516, <4 x float> %673, <8 x i32> -< %675 = extractelement <8 x float> %674, i32 0 -< %676 = getelementptr float, float* %2, i32 0 -< %677 = getelementptr inbounds float, float* %676, i64 3 -< %678 = bitcast float* %677 to i32* -< %679 = bitcast i32* %678 to float* -< store float %675, float* %679, align 4 -< %680 = extractelement <8 x float> %674, i32 1 -< %681 = alloca [4 x float], align 16 -< %682 = bitcast [4 x float]* %681 to i32* -< %683 = bitcast i32* %682 to float* -< store float %680, float* %683, align 4 -< %684 = extractelement <8 x float> %674, i32 2 -< %685 = getelementptr inbounds [4 x float], [4 x float]* %681, i64 0, i64 1 -< %686 = bitcast float* %685 to i32* -< %687 = bitcast i32* %686 to float* -< store float %684, float* %687, align 4 -< %688 = extractelement <8 x float> %674, i32 3 -< %689 = getelementptr inbounds [4 x float], [4 x float]* %681, i64 0, i64 2 -< %690 = bitcast float* %689 to i32* -< %691 = bitcast i32* %690 to float* -< store float %688, float* %691, align 4 -< %692 = extractelement <8 x float> %674, i32 4 -< %693 = getelementptr inbounds [4 x float], [4 x float]* %681, i64 0, i64 3 -< %694 = bitcast float* %693 to i32* -< %695 = bitcast i32* %694 to float* -< store float %692, float* %695, align 4 -< %696 = bitcast float* %1 to i8* -< %697 = bitcast [4 x float]* %681 to i8* -< call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %696, i8* nonnull align 16 dereferenceable(16) %697, i64 16, i1 false) ---- -> %650 = fmul float %648, %649 -> %651 = fadd float %650, 0.000000e+00 -> %652 = bitcast i32 %32 to float -> %653 = bitcast i32 %32 to float -> %654 = fmul float %652, %653 -> %655 = fadd float %651, %654 -> %656 = call float @llvm.sqrt.f32(float %655) -> %657 = fneg float %68 -> %658 = fmul float %656, %657 -> %659 = fmul float %658, 0.000000e+00 -> %660 = bitcast i32 %32 to float -> %661 = fadd float %660, %659 -> %662 = fmul float %647, %661 -> %663 = fadd float %633, %662 -> %664 = call float @llvm.sqrt.f32(float %663) -> %665 = fadd float %664, 0.000000e+00 -> %666 = fdiv float %605, %665 -> %667 = fmul float %591, %666 -> %668 = fsub float 1.000000e+00, %667 -> %669 = insertelement <4 x float> zeroinitializer, float %668, i32 0 -> %670 = insertelement <4 x float> %669, float 0.000000e+00, i32 1 -> %671 = insertelement <4 x float> %670, float 0.000000e+00, i32 2 -> %672 = insertelement <4 x float> %671, float 0.000000e+00, i32 3 -> %673 = shufflevector <4 x float> %515, <4 x float> %672, <8 x i32> -> %674 = extractelement <8 x float> %673, i32 0 -> %675 = getelementptr float, float* %2, i32 0 -> %676 = getelementptr inbounds float, float* %675, i64 3 -> %677 = bitcast float* %676 to i32* -> %678 = bitcast i32* %677 to float* -> store float %674, float* %678, align 4 -> %679 = bitcast float* %1 to i8* -> %680 = alloca [4 x float], align 16 -> %681 = bitcast [4 x float]* %680 to i8* -> call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %679, i8* nonnull align 16 dereferenceable(16) %681, i64 16, i1 false) -1164,1168c1149,1169 -< %698 = getelementptr float, float* %0, i32 0 -< %699 = bitcast float* %698 to i32* -< %700 = load i32, i32* %699, align 4 -< %701 = bitcast i32 %700 to float -< %702 = bitcast i32 %700 to float ---- -> %682 = bitcast i32 %52 to float -> %683 = bitcast i32 %52 to float -> %684 = fmul float %682, %683 -> %685 = fadd float %684, 0.000000e+00 -> %686 = load i32, i32* %31, align 4 -> %687 = bitcast i32 %686 to float -> %688 = bitcast i32 %686 to float -> %689 = fmul float %687, %688 -> %690 = fadd float %685, %689 -> %691 = call float @llvm.sqrt.f32(float %690) -> %692 = sitofp i32 %67 to float -> %693 = fneg float %692 -> %694 = fmul float %691, %693 -> %695 = bitcast i32 %52 to float -> %696 = fadd float %695, %694 -> %697 = bitcast i32 %52 to float -> %698 = bitcast i32 %52 to float -> %699 = fmul float %697, %698 -> %700 = fadd float %699, 0.000000e+00 -> %701 = bitcast i32 %686 to float -> %702 = bitcast i32 %686 to float -1170,1172c1171,1173 -< %704 = fadd float %703, 0.000000e+00 -< %705 = bitcast i32 %57 to float -< %706 = bitcast i32 %57 to float ---- -> %704 = fadd float %700, %703 -> %705 = call float @llvm.sqrt.f32(float %704) -> %706 = fneg float %692 -1174,1207c1175,1208 -< %708 = fadd float %704, %707 -< %709 = call float @llvm.sqrt.f32(float %708) -< %710 = sitofp i32 %68 to float -< %711 = fneg float %710 -< %712 = fmul float %709, %711 -< %713 = bitcast i32 %700 to float -< %714 = fadd float %713, %712 -< %715 = bitcast i32 %700 to float -< %716 = bitcast i32 %700 to float -< %717 = fmul float %715, %716 -< %718 = fadd float %717, 0.000000e+00 -< %719 = bitcast i32 %57 to float -< %720 = bitcast i32 %57 to float -< %721 = fmul float %719, %720 -< %722 = fadd float %718, %721 -< %723 = call float @llvm.sqrt.f32(float %722) -< %724 = fneg float %710 -< %725 = fmul float %723, %724 -< %726 = bitcast i32 %700 to float -< %727 = fadd float %726, %725 -< %728 = bitcast i32 %700 to float -< %729 = bitcast i32 %700 to float -< %730 = fmul float %728, %729 -< %731 = fadd float %730, 0.000000e+00 -< %732 = bitcast i32 %57 to float -< %733 = bitcast i32 %57 to float -< %734 = fmul float %732, %733 -< %735 = fadd float %731, %734 -< %736 = call float @llvm.sqrt.f32(float %735) -< %737 = fneg float %710 -< %738 = fmul float %736, %737 -< %739 = bitcast i32 %700 to float -< %740 = fadd float %739, %738 -< %741 = fmul float %727, %740 ---- -> %708 = bitcast i32 %52 to float -> %709 = fadd float %708, %707 -> %710 = bitcast i32 %52 to float -> %711 = bitcast i32 %52 to float -> %712 = fmul float %710, %711 -> %713 = fadd float %712, 0.000000e+00 -> %714 = bitcast i32 %686 to float -> %715 = bitcast i32 %686 to float -> %716 = fmul float %714, %715 -> %717 = fadd float %713, %716 -> %718 = call float @llvm.sqrt.f32(float %717) -> %719 = fneg float %692 -> %720 = fmul float %718, %719 -> %721 = bitcast i32 %52 to float -> %722 = fadd float %721, %720 -> %723 = fmul float %709, %722 -> %724 = fadd float %723, 0.000000e+00 -> %725 = bitcast i32 %52 to float -> %726 = bitcast i32 %52 to float -> %727 = fmul float %725, %726 -> %728 = fadd float %727, 0.000000e+00 -> %729 = bitcast i32 %686 to float -> %730 = bitcast i32 %686 to float -> %731 = fmul float %729, %730 -> %732 = fadd float %728, %731 -> %733 = call float @llvm.sqrt.f32(float %732) -> %734 = fneg float %692 -> %735 = fmul float %733, %734 -> %736 = fmul float %735, 0.000000e+00 -> %737 = bitcast i32 %686 to float -> %738 = fadd float %737, %736 -> %739 = bitcast i32 %52 to float -> %740 = bitcast i32 %52 to float -> %741 = fmul float %739, %740 -1209,1210c1210,1211 -< %743 = bitcast i32 %700 to float -< %744 = bitcast i32 %700 to float ---- -> %743 = bitcast i32 %686 to float -> %744 = bitcast i32 %686 to float -1212,1214c1213,1215 -< %746 = fadd float %745, 0.000000e+00 -< %747 = bitcast i32 %57 to float -< %748 = bitcast i32 %57 to float ---- -> %746 = fadd float %742, %745 -> %747 = call float @llvm.sqrt.f32(float %746) -> %748 = fneg float %692 -1216,1252c1217,1253 -< %750 = fadd float %746, %749 -< %751 = call float @llvm.sqrt.f32(float %750) -< %752 = fneg float %710 -< %753 = fmul float %751, %752 -< %754 = fmul float %753, 0.000000e+00 -< %755 = bitcast i32 %57 to float -< %756 = fadd float %755, %754 -< %757 = bitcast i32 %700 to float -< %758 = bitcast i32 %700 to float -< %759 = fmul float %757, %758 -< %760 = fadd float %759, 0.000000e+00 -< %761 = bitcast i32 %57 to float -< %762 = bitcast i32 %57 to float -< %763 = fmul float %761, %762 -< %764 = fadd float %760, %763 -< %765 = call float @llvm.sqrt.f32(float %764) -< %766 = fneg float %710 -< %767 = fmul float %765, %766 -< %768 = fmul float %767, 0.000000e+00 -< %769 = bitcast i32 %57 to float -< %770 = fadd float %769, %768 -< %771 = fmul float %756, %770 -< %772 = fadd float %742, %771 -< %773 = call float @llvm.sqrt.f32(float %772) -< %774 = fadd float %773, 0.000000e+00 -< %775 = fdiv float %714, %774 -< %776 = fmul float %775, 2.000000e+00 -< %777 = bitcast i32 %700 to float -< %778 = bitcast i32 %700 to float -< %779 = fmul float %777, %778 -< %780 = fadd float %779, 0.000000e+00 -< %781 = bitcast i32 %57 to float -< %782 = bitcast i32 %57 to float -< %783 = fmul float %781, %782 -< %784 = fadd float %780, %783 -< %785 = call float @llvm.sqrt.f32(float %784) -< %786 = fneg float %710 ---- -> %750 = fmul float %749, 0.000000e+00 -> %751 = bitcast i32 %686 to float -> %752 = fadd float %751, %750 -> %753 = fmul float %738, %752 -> %754 = fadd float %724, %753 -> %755 = call float @llvm.sqrt.f32(float %754) -> %756 = fadd float %755, 0.000000e+00 -> %757 = fdiv float %696, %756 -> %758 = fmul float %757, 2.000000e+00 -> %759 = bitcast i32 %52 to float -> %760 = bitcast i32 %52 to float -> %761 = fmul float %759, %760 -> %762 = fadd float %761, 0.000000e+00 -> %763 = bitcast i32 %686 to float -> %764 = bitcast i32 %686 to float -> %765 = fmul float %763, %764 -> %766 = fadd float %762, %765 -> %767 = call float @llvm.sqrt.f32(float %766) -> %768 = fneg float %692 -> %769 = fmul float %767, %768 -> %770 = bitcast i32 %52 to float -> %771 = fadd float %770, %769 -> %772 = bitcast i32 %52 to float -> %773 = bitcast i32 %52 to float -> %774 = fmul float %772, %773 -> %775 = fadd float %774, 0.000000e+00 -> %776 = bitcast i32 %686 to float -> %777 = bitcast i32 %686 to float -> %778 = fmul float %776, %777 -> %779 = fadd float %775, %778 -> %780 = call float @llvm.sqrt.f32(float %779) -> %781 = fneg float %692 -> %782 = fmul float %780, %781 -> %783 = bitcast i32 %52 to float -> %784 = fadd float %783, %782 -> %785 = bitcast i32 %52 to float -> %786 = bitcast i32 %52 to float -1254,1282c1255,1283 -< %788 = bitcast i32 %700 to float -< %789 = fadd float %788, %787 -< %790 = bitcast i32 %700 to float -< %791 = bitcast i32 %700 to float -< %792 = fmul float %790, %791 -< %793 = fadd float %792, 0.000000e+00 -< %794 = bitcast i32 %57 to float -< %795 = bitcast i32 %57 to float -< %796 = fmul float %794, %795 -< %797 = fadd float %793, %796 -< %798 = call float @llvm.sqrt.f32(float %797) -< %799 = fneg float %710 -< %800 = fmul float %798, %799 -< %801 = bitcast i32 %700 to float -< %802 = fadd float %801, %800 -< %803 = bitcast i32 %700 to float -< %804 = bitcast i32 %700 to float -< %805 = fmul float %803, %804 -< %806 = fadd float %805, 0.000000e+00 -< %807 = bitcast i32 %57 to float -< %808 = bitcast i32 %57 to float -< %809 = fmul float %807, %808 -< %810 = fadd float %806, %809 -< %811 = call float @llvm.sqrt.f32(float %810) -< %812 = fneg float %710 -< %813 = fmul float %811, %812 -< %814 = bitcast i32 %700 to float -< %815 = fadd float %814, %813 -< %816 = fmul float %802, %815 ---- -> %788 = fadd float %787, 0.000000e+00 -> %789 = bitcast i32 %686 to float -> %790 = bitcast i32 %686 to float -> %791 = fmul float %789, %790 -> %792 = fadd float %788, %791 -> %793 = call float @llvm.sqrt.f32(float %792) -> %794 = fneg float %692 -> %795 = fmul float %793, %794 -> %796 = bitcast i32 %52 to float -> %797 = fadd float %796, %795 -> %798 = fmul float %784, %797 -> %799 = fadd float %798, 0.000000e+00 -> %800 = bitcast i32 %52 to float -> %801 = bitcast i32 %52 to float -> %802 = fmul float %800, %801 -> %803 = fadd float %802, 0.000000e+00 -> %804 = bitcast i32 %686 to float -> %805 = bitcast i32 %686 to float -> %806 = fmul float %804, %805 -> %807 = fadd float %803, %806 -> %808 = call float @llvm.sqrt.f32(float %807) -> %809 = fneg float %692 -> %810 = fmul float %808, %809 -> %811 = fmul float %810, 0.000000e+00 -> %812 = bitcast i32 %686 to float -> %813 = fadd float %812, %811 -> %814 = bitcast i32 %52 to float -> %815 = bitcast i32 %52 to float -> %816 = fmul float %814, %815 -1284,1285c1285,1286 -< %818 = bitcast i32 %700 to float -< %819 = bitcast i32 %700 to float ---- -> %818 = bitcast i32 %686 to float -> %819 = bitcast i32 %686 to float -1287,1289c1288,1290 -< %821 = fadd float %820, 0.000000e+00 -< %822 = bitcast i32 %57 to float -< %823 = bitcast i32 %57 to float ---- -> %821 = fadd float %817, %820 -> %822 = call float @llvm.sqrt.f32(float %821) -> %823 = fneg float %692 -1291,1360c1292,1361 -< %825 = fadd float %821, %824 -< %826 = call float @llvm.sqrt.f32(float %825) -< %827 = fneg float %710 -< %828 = fmul float %826, %827 -< %829 = fmul float %828, 0.000000e+00 -< %830 = bitcast i32 %57 to float -< %831 = fadd float %830, %829 -< %832 = bitcast i32 %700 to float -< %833 = bitcast i32 %700 to float -< %834 = fmul float %832, %833 -< %835 = fadd float %834, 0.000000e+00 -< %836 = bitcast i32 %57 to float -< %837 = bitcast i32 %57 to float -< %838 = fmul float %836, %837 -< %839 = fadd float %835, %838 -< %840 = call float @llvm.sqrt.f32(float %839) -< %841 = fneg float %710 -< %842 = fmul float %840, %841 -< %843 = fmul float %842, 0.000000e+00 -< %844 = bitcast i32 %57 to float -< %845 = fadd float %844, %843 -< %846 = fmul float %831, %845 -< %847 = fadd float %817, %846 -< %848 = call float @llvm.sqrt.f32(float %847) -< %849 = fadd float %848, 0.000000e+00 -< %850 = fdiv float %789, %849 -< %851 = fmul float %776, %850 -< %852 = fsub float 1.000000e+00, %851 -< %853 = insertelement <4 x float> zeroinitializer, float %852, i32 0 -< %854 = insertelement <4 x float> %853, float 0.000000e+00, i32 1 -< %855 = insertelement <4 x float> %854, float 0.000000e+00, i32 2 -< %856 = insertelement <4 x float> %855, float 0.000000e+00, i32 3 -< %857 = getelementptr float, float* %0, i32 0 -< %858 = load float, float* %857, align 4 -< %859 = insertelement <4 x float> zeroinitializer, float %858, i32 0 -< %860 = insertelement <4 x float> %859, float 0.000000e+00, i32 1 -< %861 = insertelement <4 x float> %860, float 0.000000e+00, i32 2 -< %862 = insertelement <4 x float> %861, float 0.000000e+00, i32 3 -< %863 = call <4 x float> @llvm.fma.v4f32(<4 x float> %856, <4 x float> %862, <4 x float> zeroinitializer) -< %864 = extractelement <4 x float> %863, i32 0 -< store float %864, float* %2, align 4 -< %865 = load i32, i32* %699, align 4 -< %866 = bitcast i32 %865 to float -< %867 = bitcast i32 %865 to float -< %868 = fmul float %866, %867 -< %869 = fadd float %868, 0.000000e+00 -< %870 = bitcast i32 %57 to float -< %871 = bitcast i32 %57 to float -< %872 = fmul float %870, %871 -< %873 = fadd float %869, %872 -< %874 = call float @llvm.sqrt.f32(float %873) -< %875 = fneg float %710 -< %876 = fmul float %874, %875 -< %877 = bitcast i32 %865 to float -< %878 = fadd float %877, %876 -< %879 = bitcast i32 %865 to float -< %880 = bitcast i32 %865 to float -< %881 = fmul float %879, %880 -< %882 = fadd float %881, 0.000000e+00 -< %883 = bitcast i32 %57 to float -< %884 = bitcast i32 %57 to float -< %885 = fmul float %883, %884 -< %886 = fadd float %882, %885 -< %887 = call float @llvm.sqrt.f32(float %886) -< %888 = fneg float %710 -< %889 = fmul float %887, %888 -< %890 = bitcast i32 %865 to float -< %891 = fadd float %890, %889 -< %892 = bitcast i32 %865 to float -< %893 = bitcast i32 %865 to float ---- -> %825 = fmul float %824, 0.000000e+00 -> %826 = bitcast i32 %686 to float -> %827 = fadd float %826, %825 -> %828 = fmul float %813, %827 -> %829 = fadd float %799, %828 -> %830 = call float @llvm.sqrt.f32(float %829) -> %831 = fadd float %830, 0.000000e+00 -> %832 = fdiv float %771, %831 -> %833 = fmul float %758, %832 -> %834 = fsub float 1.000000e+00, %833 -> %835 = insertelement <4 x float> zeroinitializer, float %834, i32 0 -> %836 = insertelement <4 x float> %835, float 0.000000e+00, i32 1 -> %837 = insertelement <4 x float> %836, float 0.000000e+00, i32 2 -> %838 = insertelement <4 x float> %837, float 0.000000e+00, i32 3 -> %839 = getelementptr float, float* %0, i32 0 -> %840 = load float, float* %839, align 4 -> %841 = insertelement <4 x float> zeroinitializer, float %840, i32 0 -> %842 = insertelement <4 x float> %841, float 0.000000e+00, i32 1 -> %843 = insertelement <4 x float> %842, float 0.000000e+00, i32 2 -> %844 = insertelement <4 x float> %843, float 0.000000e+00, i32 3 -> %845 = call <4 x float> @llvm.fma.v4f32(<4 x float> %838, <4 x float> %844, <4 x float> zeroinitializer) -> %846 = extractelement <4 x float> %845, i32 0 -> store float %846, float* %2, align 4 -> %847 = bitcast i32 %52 to float -> %848 = bitcast i32 %52 to float -> %849 = fmul float %847, %848 -> %850 = fadd float %849, 0.000000e+00 -> %851 = bitcast i32 %686 to float -> %852 = bitcast i32 %686 to float -> %853 = fmul float %851, %852 -> %854 = fadd float %850, %853 -> %855 = call float @llvm.sqrt.f32(float %854) -> %856 = fneg float %692 -> %857 = fmul float %855, %856 -> %858 = bitcast i32 %52 to float -> %859 = fadd float %858, %857 -> %860 = bitcast i32 %52 to float -> %861 = bitcast i32 %52 to float -> %862 = fmul float %860, %861 -> %863 = fadd float %862, 0.000000e+00 -> %864 = bitcast i32 %686 to float -> %865 = bitcast i32 %686 to float -> %866 = fmul float %864, %865 -> %867 = fadd float %863, %866 -> %868 = call float @llvm.sqrt.f32(float %867) -> %869 = fneg float %692 -> %870 = fmul float %868, %869 -> %871 = bitcast i32 %52 to float -> %872 = fadd float %871, %870 -> %873 = bitcast i32 %52 to float -> %874 = bitcast i32 %52 to float -> %875 = fmul float %873, %874 -> %876 = fadd float %875, 0.000000e+00 -> %877 = bitcast i32 %686 to float -> %878 = bitcast i32 %686 to float -> %879 = fmul float %877, %878 -> %880 = fadd float %876, %879 -> %881 = call float @llvm.sqrt.f32(float %880) -> %882 = fneg float %692 -> %883 = fmul float %881, %882 -> %884 = bitcast i32 %52 to float -> %885 = fadd float %884, %883 -> %886 = fmul float %872, %885 -> %887 = fadd float %886, 0.000000e+00 -> %888 = bitcast i32 %52 to float -> %889 = bitcast i32 %52 to float -> %890 = fmul float %888, %889 -> %891 = fadd float %890, 0.000000e+00 -> %892 = bitcast i32 %686 to float -> %893 = bitcast i32 %686 to float -1362,1364c1363,1365 -< %895 = fadd float %894, 0.000000e+00 -< %896 = bitcast i32 %57 to float -< %897 = bitcast i32 %57 to float ---- -> %895 = fadd float %891, %894 -> %896 = call float @llvm.sqrt.f32(float %895) -> %897 = fneg float %692 -1366,1400c1367,1401 -< %899 = fadd float %895, %898 -< %900 = call float @llvm.sqrt.f32(float %899) -< %901 = fneg float %710 -< %902 = fmul float %900, %901 -< %903 = bitcast i32 %865 to float -< %904 = fadd float %903, %902 -< %905 = fmul float %891, %904 -< %906 = fadd float %905, 0.000000e+00 -< %907 = bitcast i32 %865 to float -< %908 = bitcast i32 %865 to float -< %909 = fmul float %907, %908 -< %910 = fadd float %909, 0.000000e+00 -< %911 = bitcast i32 %57 to float -< %912 = bitcast i32 %57 to float -< %913 = fmul float %911, %912 -< %914 = fadd float %910, %913 -< %915 = call float @llvm.sqrt.f32(float %914) -< %916 = fneg float %710 -< %917 = fmul float %915, %916 -< %918 = fmul float %917, 0.000000e+00 -< %919 = bitcast i32 %57 to float -< %920 = fadd float %919, %918 -< %921 = bitcast i32 %865 to float -< %922 = bitcast i32 %865 to float -< %923 = fmul float %921, %922 -< %924 = fadd float %923, 0.000000e+00 -< %925 = bitcast i32 %57 to float -< %926 = bitcast i32 %57 to float -< %927 = fmul float %925, %926 -< %928 = fadd float %924, %927 -< %929 = call float @llvm.sqrt.f32(float %928) -< %930 = fneg float %710 -< %931 = fmul float %929, %930 -< %932 = fmul float %931, 0.000000e+00 -< %933 = bitcast i32 %57 to float ---- -> %899 = fmul float %898, 0.000000e+00 -> %900 = bitcast i32 %686 to float -> %901 = fadd float %900, %899 -> %902 = bitcast i32 %52 to float -> %903 = bitcast i32 %52 to float -> %904 = fmul float %902, %903 -> %905 = fadd float %904, 0.000000e+00 -> %906 = bitcast i32 %686 to float -> %907 = bitcast i32 %686 to float -> %908 = fmul float %906, %907 -> %909 = fadd float %905, %908 -> %910 = call float @llvm.sqrt.f32(float %909) -> %911 = fneg float %692 -> %912 = fmul float %910, %911 -> %913 = fmul float %912, 0.000000e+00 -> %914 = bitcast i32 %686 to float -> %915 = fadd float %914, %913 -> %916 = fmul float %901, %915 -> %917 = fadd float %887, %916 -> %918 = call float @llvm.sqrt.f32(float %917) -> %919 = fadd float %918, 0.000000e+00 -> %920 = fdiv float %859, %919 -> %921 = fmul float %920, 2.000000e+00 -> %922 = bitcast i32 %52 to float -> %923 = bitcast i32 %52 to float -> %924 = fmul float %922, %923 -> %925 = fadd float %924, 0.000000e+00 -> %926 = bitcast i32 %686 to float -> %927 = bitcast i32 %686 to float -> %928 = fmul float %926, %927 -> %929 = fadd float %925, %928 -> %930 = call float @llvm.sqrt.f32(float %929) -> %931 = fneg float %692 -> %932 = fmul float %930, %931 -> %933 = bitcast i32 %52 to float -1402,1404c1403,1405 -< %935 = fmul float %920, %934 -< %936 = fadd float %906, %935 -< %937 = call float @llvm.sqrt.f32(float %936) ---- -> %935 = bitcast i32 %52 to float -> %936 = bitcast i32 %52 to float -> %937 = fmul float %935, %936 -1406,1435c1407,1436 -< %939 = fdiv float %878, %938 -< %940 = fmul float %939, 2.000000e+00 -< %941 = bitcast i32 %865 to float -< %942 = bitcast i32 %865 to float -< %943 = fmul float %941, %942 -< %944 = fadd float %943, 0.000000e+00 -< %945 = bitcast i32 %57 to float -< %946 = bitcast i32 %57 to float -< %947 = fmul float %945, %946 -< %948 = fadd float %944, %947 -< %949 = call float @llvm.sqrt.f32(float %948) -< %950 = fneg float %710 -< %951 = fmul float %949, %950 -< %952 = bitcast i32 %865 to float -< %953 = fadd float %952, %951 -< %954 = bitcast i32 %865 to float -< %955 = bitcast i32 %865 to float -< %956 = fmul float %954, %955 -< %957 = fadd float %956, 0.000000e+00 -< %958 = bitcast i32 %57 to float -< %959 = bitcast i32 %57 to float -< %960 = fmul float %958, %959 -< %961 = fadd float %957, %960 -< %962 = call float @llvm.sqrt.f32(float %961) -< %963 = fneg float %710 -< %964 = fmul float %962, %963 -< %965 = bitcast i32 %865 to float -< %966 = fadd float %965, %964 -< %967 = bitcast i32 %865 to float -< %968 = bitcast i32 %865 to float ---- -> %939 = bitcast i32 %686 to float -> %940 = bitcast i32 %686 to float -> %941 = fmul float %939, %940 -> %942 = fadd float %938, %941 -> %943 = call float @llvm.sqrt.f32(float %942) -> %944 = fneg float %692 -> %945 = fmul float %943, %944 -> %946 = bitcast i32 %52 to float -> %947 = fadd float %946, %945 -> %948 = bitcast i32 %52 to float -> %949 = bitcast i32 %52 to float -> %950 = fmul float %948, %949 -> %951 = fadd float %950, 0.000000e+00 -> %952 = bitcast i32 %686 to float -> %953 = bitcast i32 %686 to float -> %954 = fmul float %952, %953 -> %955 = fadd float %951, %954 -> %956 = call float @llvm.sqrt.f32(float %955) -> %957 = fneg float %692 -> %958 = fmul float %956, %957 -> %959 = bitcast i32 %52 to float -> %960 = fadd float %959, %958 -> %961 = fmul float %947, %960 -> %962 = fadd float %961, 0.000000e+00 -> %963 = bitcast i32 %52 to float -> %964 = bitcast i32 %52 to float -> %965 = fmul float %963, %964 -> %966 = fadd float %965, 0.000000e+00 -> %967 = bitcast i32 %686 to float -> %968 = bitcast i32 %686 to float -1437,1439c1438,1440 -< %970 = fadd float %969, 0.000000e+00 -< %971 = bitcast i32 %57 to float -< %972 = bitcast i32 %57 to float ---- -> %970 = fadd float %966, %969 -> %971 = call float @llvm.sqrt.f32(float %970) -> %972 = fneg float %692 -1441,1465c1442,1466 -< %974 = fadd float %970, %973 -< %975 = call float @llvm.sqrt.f32(float %974) -< %976 = fneg float %710 -< %977 = fmul float %975, %976 -< %978 = bitcast i32 %865 to float -< %979 = fadd float %978, %977 -< %980 = fmul float %966, %979 -< %981 = fadd float %980, 0.000000e+00 -< %982 = bitcast i32 %865 to float -< %983 = bitcast i32 %865 to float -< %984 = fmul float %982, %983 -< %985 = fadd float %984, 0.000000e+00 -< %986 = bitcast i32 %57 to float -< %987 = bitcast i32 %57 to float -< %988 = fmul float %986, %987 -< %989 = fadd float %985, %988 -< %990 = call float @llvm.sqrt.f32(float %989) -< %991 = fneg float %710 -< %992 = fmul float %990, %991 -< %993 = fmul float %992, 0.000000e+00 -< %994 = bitcast i32 %57 to float -< %995 = fadd float %994, %993 -< %996 = bitcast i32 %865 to float -< %997 = bitcast i32 %865 to float -< %998 = fmul float %996, %997 ---- -> %974 = fmul float %973, 0.000000e+00 -> %975 = bitcast i32 %686 to float -> %976 = fadd float %975, %974 -> %977 = bitcast i32 %52 to float -> %978 = bitcast i32 %52 to float -> %979 = fmul float %977, %978 -> %980 = fadd float %979, 0.000000e+00 -> %981 = bitcast i32 %686 to float -> %982 = bitcast i32 %686 to float -> %983 = fmul float %981, %982 -> %984 = fadd float %980, %983 -> %985 = call float @llvm.sqrt.f32(float %984) -> %986 = fneg float %692 -> %987 = fmul float %985, %986 -> %988 = fmul float %987, 0.000000e+00 -> %989 = bitcast i32 %686 to float -> %990 = fadd float %989, %988 -> %991 = fmul float %976, %990 -> %992 = fadd float %962, %991 -> %993 = call float @llvm.sqrt.f32(float %992) -> %994 = fadd float %993, 0.000000e+00 -> %995 = fdiv float %934, %994 -> %996 = fmul float %921, %995 -> %997 = fsub float 1.000000e+00, %996 -> %998 = fmul float %997, %840 -1467,1468c1468,1469 -< %1000 = bitcast i32 %57 to float -< %1001 = bitcast i32 %57 to float ---- -> %1000 = bitcast i32 %52 to float -> %1001 = bitcast i32 %52 to float -1470,1472c1471,1473 -< %1003 = fadd float %999, %1002 -< %1004 = call float @llvm.sqrt.f32(float %1003) -< %1005 = fneg float %710 ---- -> %1003 = fadd float %1002, 0.000000e+00 -> %1004 = bitcast i32 %686 to float -> %1005 = bitcast i32 %686 to float -1474,1513c1475,1514 -< %1007 = fmul float %1006, 0.000000e+00 -< %1008 = bitcast i32 %57 to float -< %1009 = fadd float %1008, %1007 -< %1010 = fmul float %995, %1009 -< %1011 = fadd float %981, %1010 -< %1012 = call float @llvm.sqrt.f32(float %1011) -< %1013 = fadd float %1012, 0.000000e+00 -< %1014 = fdiv float %953, %1013 -< %1015 = fmul float %940, %1014 -< %1016 = fsub float 1.000000e+00, %1015 -< %1017 = fmul float %1016, %858 -< %1018 = fadd float %1017, 0.000000e+00 -< %1019 = bitcast i32 %865 to float -< %1020 = bitcast i32 %865 to float -< %1021 = fmul float %1019, %1020 -< %1022 = fadd float %1021, 0.000000e+00 -< %1023 = bitcast i32 %57 to float -< %1024 = bitcast i32 %57 to float -< %1025 = fmul float %1023, %1024 -< %1026 = fadd float %1022, %1025 -< %1027 = call float @llvm.sqrt.f32(float %1026) -< %1028 = fneg float %710 -< %1029 = fmul float %1027, %1028 -< %1030 = bitcast i32 %865 to float -< %1031 = fadd float %1030, %1029 -< %1032 = bitcast i32 %865 to float -< %1033 = bitcast i32 %865 to float -< %1034 = fmul float %1032, %1033 -< %1035 = fadd float %1034, 0.000000e+00 -< %1036 = bitcast i32 %57 to float -< %1037 = bitcast i32 %57 to float -< %1038 = fmul float %1036, %1037 -< %1039 = fadd float %1035, %1038 -< %1040 = call float @llvm.sqrt.f32(float %1039) -< %1041 = fneg float %710 -< %1042 = fmul float %1040, %1041 -< %1043 = bitcast i32 %865 to float -< %1044 = fadd float %1043, %1042 -< %1045 = bitcast i32 %865 to float -< %1046 = bitcast i32 %865 to float ---- -> %1007 = fadd float %1003, %1006 -> %1008 = call float @llvm.sqrt.f32(float %1007) -> %1009 = fneg float %692 -> %1010 = fmul float %1008, %1009 -> %1011 = bitcast i32 %52 to float -> %1012 = fadd float %1011, %1010 -> %1013 = bitcast i32 %52 to float -> %1014 = bitcast i32 %52 to float -> %1015 = fmul float %1013, %1014 -> %1016 = fadd float %1015, 0.000000e+00 -> %1017 = bitcast i32 %686 to float -> %1018 = bitcast i32 %686 to float -> %1019 = fmul float %1017, %1018 -> %1020 = fadd float %1016, %1019 -> %1021 = call float @llvm.sqrt.f32(float %1020) -> %1022 = fneg float %692 -> %1023 = fmul float %1021, %1022 -> %1024 = bitcast i32 %52 to float -> %1025 = fadd float %1024, %1023 -> %1026 = bitcast i32 %52 to float -> %1027 = bitcast i32 %52 to float -> %1028 = fmul float %1026, %1027 -> %1029 = fadd float %1028, 0.000000e+00 -> %1030 = bitcast i32 %686 to float -> %1031 = bitcast i32 %686 to float -> %1032 = fmul float %1030, %1031 -> %1033 = fadd float %1029, %1032 -> %1034 = call float @llvm.sqrt.f32(float %1033) -> %1035 = fneg float %692 -> %1036 = fmul float %1034, %1035 -> %1037 = bitcast i32 %52 to float -> %1038 = fadd float %1037, %1036 -> %1039 = fmul float %1025, %1038 -> %1040 = fadd float %1039, 0.000000e+00 -> %1041 = bitcast i32 %52 to float -> %1042 = bitcast i32 %52 to float -> %1043 = fmul float %1041, %1042 -> %1044 = fadd float %1043, 0.000000e+00 -> %1045 = bitcast i32 %686 to float -> %1046 = bitcast i32 %686 to float -1515,1517c1516,1518 -< %1048 = fadd float %1047, 0.000000e+00 -< %1049 = bitcast i32 %57 to float -< %1050 = bitcast i32 %57 to float ---- -> %1048 = fadd float %1044, %1047 -> %1049 = call float @llvm.sqrt.f32(float %1048) -> %1050 = fneg float %692 -1519,1570c1520,1571 -< %1052 = fadd float %1048, %1051 -< %1053 = call float @llvm.sqrt.f32(float %1052) -< %1054 = fneg float %710 -< %1055 = fmul float %1053, %1054 -< %1056 = bitcast i32 %865 to float -< %1057 = fadd float %1056, %1055 -< %1058 = fmul float %1044, %1057 -< %1059 = fadd float %1058, 0.000000e+00 -< %1060 = bitcast i32 %865 to float -< %1061 = bitcast i32 %865 to float -< %1062 = fmul float %1060, %1061 -< %1063 = fadd float %1062, 0.000000e+00 -< %1064 = bitcast i32 %57 to float -< %1065 = bitcast i32 %57 to float -< %1066 = fmul float %1064, %1065 -< %1067 = fadd float %1063, %1066 -< %1068 = call float @llvm.sqrt.f32(float %1067) -< %1069 = fneg float %710 -< %1070 = fmul float %1068, %1069 -< %1071 = fmul float %1070, 0.000000e+00 -< %1072 = bitcast i32 %57 to float -< %1073 = fadd float %1072, %1071 -< %1074 = bitcast i32 %865 to float -< %1075 = bitcast i32 %865 to float -< %1076 = fmul float %1074, %1075 -< %1077 = fadd float %1076, 0.000000e+00 -< %1078 = bitcast i32 %57 to float -< %1079 = bitcast i32 %57 to float -< %1080 = fmul float %1078, %1079 -< %1081 = fadd float %1077, %1080 -< %1082 = call float @llvm.sqrt.f32(float %1081) -< %1083 = fneg float %710 -< %1084 = fmul float %1082, %1083 -< %1085 = fmul float %1084, 0.000000e+00 -< %1086 = bitcast i32 %57 to float -< %1087 = fadd float %1086, %1085 -< %1088 = fmul float %1073, %1087 -< %1089 = fadd float %1059, %1088 -< %1090 = call float @llvm.sqrt.f32(float %1089) -< %1091 = fadd float %1090, 0.000000e+00 -< %1092 = fdiv float %1031, %1091 -< %1093 = fmul float %1092, 2.000000e+00 -< %1094 = bitcast i32 %865 to float -< %1095 = bitcast i32 %865 to float -< %1096 = fmul float %1094, %1095 -< %1097 = fadd float %1096, 0.000000e+00 -< %1098 = bitcast i32 %57 to float -< %1099 = bitcast i32 %57 to float -< %1100 = fmul float %1098, %1099 -< %1101 = fadd float %1097, %1100 -< %1102 = call float @llvm.sqrt.f32(float %1101) -< %1103 = fneg float %710 ---- -> %1052 = fmul float %1051, 0.000000e+00 -> %1053 = bitcast i32 %686 to float -> %1054 = fadd float %1053, %1052 -> %1055 = bitcast i32 %52 to float -> %1056 = bitcast i32 %52 to float -> %1057 = fmul float %1055, %1056 -> %1058 = fadd float %1057, 0.000000e+00 -> %1059 = bitcast i32 %686 to float -> %1060 = bitcast i32 %686 to float -> %1061 = fmul float %1059, %1060 -> %1062 = fadd float %1058, %1061 -> %1063 = call float @llvm.sqrt.f32(float %1062) -> %1064 = fneg float %692 -> %1065 = fmul float %1063, %1064 -> %1066 = fmul float %1065, 0.000000e+00 -> %1067 = bitcast i32 %686 to float -> %1068 = fadd float %1067, %1066 -> %1069 = fmul float %1054, %1068 -> %1070 = fadd float %1040, %1069 -> %1071 = call float @llvm.sqrt.f32(float %1070) -> %1072 = fadd float %1071, 0.000000e+00 -> %1073 = fdiv float %1012, %1072 -> %1074 = fmul float %1073, 2.000000e+00 -> %1075 = bitcast i32 %52 to float -> %1076 = bitcast i32 %52 to float -> %1077 = fmul float %1075, %1076 -> %1078 = fadd float %1077, 0.000000e+00 -> %1079 = bitcast i32 %686 to float -> %1080 = bitcast i32 %686 to float -> %1081 = fmul float %1079, %1080 -> %1082 = fadd float %1078, %1081 -> %1083 = call float @llvm.sqrt.f32(float %1082) -> %1084 = fneg float %692 -> %1085 = fmul float %1083, %1084 -> %1086 = fmul float %1085, 0.000000e+00 -> %1087 = bitcast i32 %686 to float -> %1088 = fadd float %1087, %1086 -> %1089 = bitcast i32 %52 to float -> %1090 = bitcast i32 %52 to float -> %1091 = fmul float %1089, %1090 -> %1092 = fadd float %1091, 0.000000e+00 -> %1093 = bitcast i32 %686 to float -> %1094 = bitcast i32 %686 to float -> %1095 = fmul float %1093, %1094 -> %1096 = fadd float %1092, %1095 -> %1097 = call float @llvm.sqrt.f32(float %1096) -> %1098 = fneg float %692 -> %1099 = fmul float %1097, %1098 -> %1100 = bitcast i32 %52 to float -> %1101 = fadd float %1100, %1099 -> %1102 = bitcast i32 %52 to float -> %1103 = bitcast i32 %52 to float -1572,1589c1573,1590 -< %1105 = fmul float %1104, 0.000000e+00 -< %1106 = bitcast i32 %57 to float -< %1107 = fadd float %1106, %1105 -< %1108 = bitcast i32 %865 to float -< %1109 = bitcast i32 %865 to float -< %1110 = fmul float %1108, %1109 -< %1111 = fadd float %1110, 0.000000e+00 -< %1112 = bitcast i32 %57 to float -< %1113 = bitcast i32 %57 to float -< %1114 = fmul float %1112, %1113 -< %1115 = fadd float %1111, %1114 -< %1116 = call float @llvm.sqrt.f32(float %1115) -< %1117 = fneg float %710 -< %1118 = fmul float %1116, %1117 -< %1119 = bitcast i32 %865 to float -< %1120 = fadd float %1119, %1118 -< %1121 = bitcast i32 %865 to float -< %1122 = bitcast i32 %865 to float ---- -> %1105 = fadd float %1104, 0.000000e+00 -> %1106 = bitcast i32 %686 to float -> %1107 = bitcast i32 %686 to float -> %1108 = fmul float %1106, %1107 -> %1109 = fadd float %1105, %1108 -> %1110 = call float @llvm.sqrt.f32(float %1109) -> %1111 = fneg float %692 -> %1112 = fmul float %1110, %1111 -> %1113 = bitcast i32 %52 to float -> %1114 = fadd float %1113, %1112 -> %1115 = fmul float %1101, %1114 -> %1116 = fadd float %1115, 0.000000e+00 -> %1117 = bitcast i32 %52 to float -> %1118 = bitcast i32 %52 to float -> %1119 = fmul float %1117, %1118 -> %1120 = fadd float %1119, 0.000000e+00 -> %1121 = bitcast i32 %686 to float -> %1122 = bitcast i32 %686 to float -1591,1593c1592,1594 -< %1124 = fadd float %1123, 0.000000e+00 -< %1125 = bitcast i32 %57 to float -< %1126 = bitcast i32 %57 to float ---- -> %1124 = fadd float %1120, %1123 -> %1125 = call float @llvm.sqrt.f32(float %1124) -> %1126 = fneg float %692 -1595,1652c1596,1653 -< %1128 = fadd float %1124, %1127 -< %1129 = call float @llvm.sqrt.f32(float %1128) -< %1130 = fneg float %710 -< %1131 = fmul float %1129, %1130 -< %1132 = bitcast i32 %865 to float -< %1133 = fadd float %1132, %1131 -< %1134 = fmul float %1120, %1133 -< %1135 = fadd float %1134, 0.000000e+00 -< %1136 = bitcast i32 %865 to float -< %1137 = bitcast i32 %865 to float -< %1138 = fmul float %1136, %1137 -< %1139 = fadd float %1138, 0.000000e+00 -< %1140 = bitcast i32 %57 to float -< %1141 = bitcast i32 %57 to float -< %1142 = fmul float %1140, %1141 -< %1143 = fadd float %1139, %1142 -< %1144 = call float @llvm.sqrt.f32(float %1143) -< %1145 = fneg float %710 -< %1146 = fmul float %1144, %1145 -< %1147 = fmul float %1146, 0.000000e+00 -< %1148 = bitcast i32 %57 to float -< %1149 = fadd float %1148, %1147 -< %1150 = bitcast i32 %865 to float -< %1151 = bitcast i32 %865 to float -< %1152 = fmul float %1150, %1151 -< %1153 = fadd float %1152, 0.000000e+00 -< %1154 = bitcast i32 %57 to float -< %1155 = bitcast i32 %57 to float -< %1156 = fmul float %1154, %1155 -< %1157 = fadd float %1153, %1156 -< %1158 = call float @llvm.sqrt.f32(float %1157) -< %1159 = fneg float %710 -< %1160 = fmul float %1158, %1159 -< %1161 = fmul float %1160, 0.000000e+00 -< %1162 = bitcast i32 %57 to float -< %1163 = fadd float %1162, %1161 -< %1164 = fmul float %1149, %1163 -< %1165 = fadd float %1135, %1164 -< %1166 = call float @llvm.sqrt.f32(float %1165) -< %1167 = fadd float %1166, 0.000000e+00 -< %1168 = fdiv float %1107, %1167 -< %1169 = fmul float %1093, %1168 -< %1170 = fneg float %1169 -< %1171 = load float, float* %30, align 4 -< %1172 = fmul float %1170, %1171 -< %1173 = fadd float %1018, %1172 -< %1174 = insertelement <4 x float> zeroinitializer, float %1173, i32 0 -< %1175 = insertelement <4 x float> %1174, float 0.000000e+00, i32 1 -< %1176 = insertelement <4 x float> %1175, float 0.000000e+00, i32 2 -< %1177 = insertelement <4 x float> %1176, float 0.000000e+00, i32 3 -< %1178 = extractelement <4 x float> %1177, i32 0 -< store float %1178, float* %2, align 4 -< %1179 = extractelement <4 x float> %1177, i32 1 -< %1180 = getelementptr float, float* %2, i32 0 -< %1181 = getelementptr inbounds float, float* %1180, i64 1 -< store float %1179, float* %1181, align 4 -< %1182 = bitcast i32 %865 to float -< %1183 = bitcast i32 %865 to float ---- -> %1128 = fmul float %1127, 0.000000e+00 -> %1129 = bitcast i32 %686 to float -> %1130 = fadd float %1129, %1128 -> %1131 = bitcast i32 %52 to float -> %1132 = bitcast i32 %52 to float -> %1133 = fmul float %1131, %1132 -> %1134 = fadd float %1133, 0.000000e+00 -> %1135 = bitcast i32 %686 to float -> %1136 = bitcast i32 %686 to float -> %1137 = fmul float %1135, %1136 -> %1138 = fadd float %1134, %1137 -> %1139 = call float @llvm.sqrt.f32(float %1138) -> %1140 = fneg float %692 -> %1141 = fmul float %1139, %1140 -> %1142 = fmul float %1141, 0.000000e+00 -> %1143 = bitcast i32 %686 to float -> %1144 = fadd float %1143, %1142 -> %1145 = fmul float %1130, %1144 -> %1146 = fadd float %1116, %1145 -> %1147 = call float @llvm.sqrt.f32(float %1146) -> %1148 = fadd float %1147, 0.000000e+00 -> %1149 = fdiv float %1088, %1148 -> %1150 = fmul float %1074, %1149 -> %1151 = fneg float %1150 -> %1152 = getelementptr float, float* %0, i32 0 -> %1153 = getelementptr inbounds float, float* %1152, i64 2 -> %1154 = load float, float* %1153, align 4 -> %1155 = fmul float %1151, %1154 -> %1156 = fadd float %999, %1155 -> %1157 = insertelement <4 x float> zeroinitializer, float %1156, i32 0 -> %1158 = insertelement <4 x float> %1157, float 0.000000e+00, i32 1 -> %1159 = insertelement <4 x float> %1158, float 0.000000e+00, i32 2 -> %1160 = insertelement <4 x float> %1159, float 0.000000e+00, i32 3 -> %1161 = extractelement <4 x float> %1160, i32 0 -> store float %1161, float* %2, align 4 -> %1162 = extractelement <4 x float> %1160, i32 1 -> %1163 = getelementptr float, float* %2, i32 0 -> %1164 = getelementptr inbounds float, float* %1163, i64 1 -> store float %1162, float* %1164, align 4 -> %1165 = bitcast i32 %52 to float -> %1166 = bitcast i32 %52 to float -> %1167 = fmul float %1165, %1166 -> %1168 = fadd float %1167, 0.000000e+00 -> %1169 = bitcast i32 %686 to float -> %1170 = bitcast i32 %686 to float -> %1171 = fmul float %1169, %1170 -> %1172 = fadd float %1168, %1171 -> %1173 = call float @llvm.sqrt.f32(float %1172) -> %1174 = fneg float %692 -> %1175 = fmul float %1173, %1174 -> %1176 = bitcast i32 %52 to float -> %1177 = fadd float %1176, %1175 -> %1178 = bitcast i32 %52 to float -> %1179 = bitcast i32 %52 to float -> %1180 = fmul float %1178, %1179 -> %1181 = fadd float %1180, 0.000000e+00 -> %1182 = bitcast i32 %686 to float -> %1183 = bitcast i32 %686 to float -1654,1656c1655,1657 -< %1185 = fadd float %1184, 0.000000e+00 -< %1186 = bitcast i32 %57 to float -< %1187 = bitcast i32 %57 to float ---- -> %1185 = fadd float %1181, %1184 -> %1186 = call float @llvm.sqrt.f32(float %1185) -> %1187 = fneg float %692 -1658,1665c1659,1666 -< %1189 = fadd float %1185, %1188 -< %1190 = call float @llvm.sqrt.f32(float %1189) -< %1191 = fneg float %710 -< %1192 = fmul float %1190, %1191 -< %1193 = bitcast i32 %865 to float -< %1194 = fadd float %1193, %1192 -< %1195 = bitcast i32 %865 to float -< %1196 = bitcast i32 %865 to float ---- -> %1189 = bitcast i32 %52 to float -> %1190 = fadd float %1189, %1188 -> %1191 = bitcast i32 %52 to float -> %1192 = bitcast i32 %52 to float -> %1193 = fmul float %1191, %1192 -> %1194 = fadd float %1193, 0.000000e+00 -> %1195 = bitcast i32 %686 to float -> %1196 = bitcast i32 %686 to float -1667,1669c1668,1670 -< %1198 = fadd float %1197, 0.000000e+00 -< %1199 = bitcast i32 %57 to float -< %1200 = bitcast i32 %57 to float ---- -> %1198 = fadd float %1194, %1197 -> %1199 = call float @llvm.sqrt.f32(float %1198) -> %1200 = fneg float %692 -1671,1727c1672,1728 -< %1202 = fadd float %1198, %1201 -< %1203 = call float @llvm.sqrt.f32(float %1202) -< %1204 = fneg float %710 -< %1205 = fmul float %1203, %1204 -< %1206 = bitcast i32 %865 to float -< %1207 = fadd float %1206, %1205 -< %1208 = bitcast i32 %865 to float -< %1209 = bitcast i32 %865 to float -< %1210 = fmul float %1208, %1209 -< %1211 = fadd float %1210, 0.000000e+00 -< %1212 = bitcast i32 %57 to float -< %1213 = bitcast i32 %57 to float -< %1214 = fmul float %1212, %1213 -< %1215 = fadd float %1211, %1214 -< %1216 = call float @llvm.sqrt.f32(float %1215) -< %1217 = fneg float %710 -< %1218 = fmul float %1216, %1217 -< %1219 = bitcast i32 %865 to float -< %1220 = fadd float %1219, %1218 -< %1221 = fmul float %1207, %1220 -< %1222 = fadd float %1221, 0.000000e+00 -< %1223 = bitcast i32 %865 to float -< %1224 = bitcast i32 %865 to float -< %1225 = fmul float %1223, %1224 -< %1226 = fadd float %1225, 0.000000e+00 -< %1227 = bitcast i32 %57 to float -< %1228 = bitcast i32 %57 to float -< %1229 = fmul float %1227, %1228 -< %1230 = fadd float %1226, %1229 -< %1231 = call float @llvm.sqrt.f32(float %1230) -< %1232 = fneg float %710 -< %1233 = fmul float %1231, %1232 -< %1234 = fmul float %1233, 0.000000e+00 -< %1235 = bitcast i32 %57 to float -< %1236 = fadd float %1235, %1234 -< %1237 = bitcast i32 %865 to float -< %1238 = bitcast i32 %865 to float -< %1239 = fmul float %1237, %1238 -< %1240 = fadd float %1239, 0.000000e+00 -< %1241 = bitcast i32 %57 to float -< %1242 = bitcast i32 %57 to float -< %1243 = fmul float %1241, %1242 -< %1244 = fadd float %1240, %1243 -< %1245 = call float @llvm.sqrt.f32(float %1244) -< %1246 = fneg float %710 -< %1247 = fmul float %1245, %1246 -< %1248 = fmul float %1247, 0.000000e+00 -< %1249 = bitcast i32 %57 to float -< %1250 = fadd float %1249, %1248 -< %1251 = fmul float %1236, %1250 -< %1252 = fadd float %1222, %1251 -< %1253 = call float @llvm.sqrt.f32(float %1252) -< %1254 = fadd float %1253, 0.000000e+00 -< %1255 = fdiv float %1194, %1254 -< %1256 = fmul float %1255, 2.000000e+00 -< %1257 = bitcast i32 %865 to float -< %1258 = bitcast i32 %865 to float ---- -> %1202 = bitcast i32 %52 to float -> %1203 = fadd float %1202, %1201 -> %1204 = fmul float %1190, %1203 -> %1205 = fadd float %1204, 0.000000e+00 -> %1206 = bitcast i32 %52 to float -> %1207 = bitcast i32 %52 to float -> %1208 = fmul float %1206, %1207 -> %1209 = fadd float %1208, 0.000000e+00 -> %1210 = bitcast i32 %686 to float -> %1211 = bitcast i32 %686 to float -> %1212 = fmul float %1210, %1211 -> %1213 = fadd float %1209, %1212 -> %1214 = call float @llvm.sqrt.f32(float %1213) -> %1215 = fneg float %692 -> %1216 = fmul float %1214, %1215 -> %1217 = fmul float %1216, 0.000000e+00 -> %1218 = bitcast i32 %686 to float -> %1219 = fadd float %1218, %1217 -> %1220 = bitcast i32 %52 to float -> %1221 = bitcast i32 %52 to float -> %1222 = fmul float %1220, %1221 -> %1223 = fadd float %1222, 0.000000e+00 -> %1224 = bitcast i32 %686 to float -> %1225 = bitcast i32 %686 to float -> %1226 = fmul float %1224, %1225 -> %1227 = fadd float %1223, %1226 -> %1228 = call float @llvm.sqrt.f32(float %1227) -> %1229 = fneg float %692 -> %1230 = fmul float %1228, %1229 -> %1231 = fmul float %1230, 0.000000e+00 -> %1232 = bitcast i32 %686 to float -> %1233 = fadd float %1232, %1231 -> %1234 = fmul float %1219, %1233 -> %1235 = fadd float %1205, %1234 -> %1236 = call float @llvm.sqrt.f32(float %1235) -> %1237 = fadd float %1236, 0.000000e+00 -> %1238 = fdiv float %1177, %1237 -> %1239 = fmul float %1238, 2.000000e+00 -> %1240 = bitcast i32 %52 to float -> %1241 = bitcast i32 %52 to float -> %1242 = fmul float %1240, %1241 -> %1243 = fadd float %1242, 0.000000e+00 -> %1244 = bitcast i32 %686 to float -> %1245 = bitcast i32 %686 to float -> %1246 = fmul float %1244, %1245 -> %1247 = fadd float %1243, %1246 -> %1248 = call float @llvm.sqrt.f32(float %1247) -> %1249 = fneg float %692 -> %1250 = fmul float %1248, %1249 -> %1251 = bitcast i32 %52 to float -> %1252 = fadd float %1251, %1250 -> %1253 = bitcast i32 %52 to float -> %1254 = bitcast i32 %52 to float -> %1255 = fmul float %1253, %1254 -> %1256 = fadd float %1255, 0.000000e+00 -> %1257 = bitcast i32 %686 to float -> %1258 = bitcast i32 %686 to float -1729,1731c1730,1732 -< %1260 = fadd float %1259, 0.000000e+00 -< %1261 = bitcast i32 %57 to float -< %1262 = bitcast i32 %57 to float ---- -> %1260 = fadd float %1256, %1259 -> %1261 = call float @llvm.sqrt.f32(float %1260) -> %1262 = fneg float %692 -1733,1740c1734,1741 -< %1264 = fadd float %1260, %1263 -< %1265 = call float @llvm.sqrt.f32(float %1264) -< %1266 = fneg float %710 -< %1267 = fmul float %1265, %1266 -< %1268 = bitcast i32 %865 to float -< %1269 = fadd float %1268, %1267 -< %1270 = bitcast i32 %865 to float -< %1271 = bitcast i32 %865 to float ---- -> %1264 = bitcast i32 %52 to float -> %1265 = fadd float %1264, %1263 -> %1266 = bitcast i32 %52 to float -> %1267 = bitcast i32 %52 to float -> %1268 = fmul float %1266, %1267 -> %1269 = fadd float %1268, 0.000000e+00 -> %1270 = bitcast i32 %686 to float -> %1271 = bitcast i32 %686 to float -1742,1744c1743,1745 -< %1273 = fadd float %1272, 0.000000e+00 -< %1274 = bitcast i32 %57 to float -< %1275 = bitcast i32 %57 to float ---- -> %1273 = fadd float %1269, %1272 -> %1274 = call float @llvm.sqrt.f32(float %1273) -> %1275 = fneg float %692 -1746,1817c1747,1818 -< %1277 = fadd float %1273, %1276 -< %1278 = call float @llvm.sqrt.f32(float %1277) -< %1279 = fneg float %710 -< %1280 = fmul float %1278, %1279 -< %1281 = bitcast i32 %865 to float -< %1282 = fadd float %1281, %1280 -< %1283 = bitcast i32 %865 to float -< %1284 = bitcast i32 %865 to float -< %1285 = fmul float %1283, %1284 -< %1286 = fadd float %1285, 0.000000e+00 -< %1287 = bitcast i32 %57 to float -< %1288 = bitcast i32 %57 to float -< %1289 = fmul float %1287, %1288 -< %1290 = fadd float %1286, %1289 -< %1291 = call float @llvm.sqrt.f32(float %1290) -< %1292 = fneg float %710 -< %1293 = fmul float %1291, %1292 -< %1294 = bitcast i32 %865 to float -< %1295 = fadd float %1294, %1293 -< %1296 = fmul float %1282, %1295 -< %1297 = fadd float %1296, 0.000000e+00 -< %1298 = bitcast i32 %865 to float -< %1299 = bitcast i32 %865 to float -< %1300 = fmul float %1298, %1299 -< %1301 = fadd float %1300, 0.000000e+00 -< %1302 = bitcast i32 %57 to float -< %1303 = bitcast i32 %57 to float -< %1304 = fmul float %1302, %1303 -< %1305 = fadd float %1301, %1304 -< %1306 = call float @llvm.sqrt.f32(float %1305) -< %1307 = fneg float %710 -< %1308 = fmul float %1306, %1307 -< %1309 = fmul float %1308, 0.000000e+00 -< %1310 = bitcast i32 %57 to float -< %1311 = fadd float %1310, %1309 -< %1312 = bitcast i32 %865 to float -< %1313 = bitcast i32 %865 to float -< %1314 = fmul float %1312, %1313 -< %1315 = fadd float %1314, 0.000000e+00 -< %1316 = bitcast i32 %57 to float -< %1317 = bitcast i32 %57 to float -< %1318 = fmul float %1316, %1317 -< %1319 = fadd float %1315, %1318 -< %1320 = call float @llvm.sqrt.f32(float %1319) -< %1321 = fneg float %710 -< %1322 = fmul float %1320, %1321 -< %1323 = fmul float %1322, 0.000000e+00 -< %1324 = bitcast i32 %57 to float -< %1325 = fadd float %1324, %1323 -< %1326 = fmul float %1311, %1325 -< %1327 = fadd float %1297, %1326 -< %1328 = call float @llvm.sqrt.f32(float %1327) -< %1329 = fadd float %1328, 0.000000e+00 -< %1330 = fdiv float %1269, %1329 -< %1331 = fmul float %1256, %1330 -< %1332 = fsub float 1.000000e+00, %1331 -< %1333 = insertelement <4 x float> zeroinitializer, float %1332, i32 0 -< %1334 = insertelement <4 x float> %1333, float 0.000000e+00, i32 1 -< %1335 = insertelement <4 x float> %1334, float 0.000000e+00, i32 2 -< %1336 = insertelement <4 x float> %1335, float 0.000000e+00, i32 3 -< %1337 = getelementptr float, float* %0, i32 0 -< %1338 = getelementptr inbounds float, float* %1337, i64 1 -< %1339 = load float, float* %1338, align 4 -< %1340 = insertelement <4 x float> zeroinitializer, float %1339, i32 0 -< %1341 = insertelement <4 x float> %1340, float 0.000000e+00, i32 1 -< %1342 = insertelement <4 x float> %1341, float 0.000000e+00, i32 2 -< %1343 = insertelement <4 x float> %1342, float 0.000000e+00, i32 3 -< %1344 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1336, <4 x float> %1343, <4 x float> zeroinitializer) -< %1345 = extractelement <4 x float> %1344, i32 0 -< store float %1345, float* %1181, align 4 -< %1346 = bitcast i32 %865 to float -< %1347 = bitcast i32 %865 to float ---- -> %1277 = bitcast i32 %52 to float -> %1278 = fadd float %1277, %1276 -> %1279 = fmul float %1265, %1278 -> %1280 = fadd float %1279, 0.000000e+00 -> %1281 = bitcast i32 %52 to float -> %1282 = bitcast i32 %52 to float -> %1283 = fmul float %1281, %1282 -> %1284 = fadd float %1283, 0.000000e+00 -> %1285 = bitcast i32 %686 to float -> %1286 = bitcast i32 %686 to float -> %1287 = fmul float %1285, %1286 -> %1288 = fadd float %1284, %1287 -> %1289 = call float @llvm.sqrt.f32(float %1288) -> %1290 = fneg float %692 -> %1291 = fmul float %1289, %1290 -> %1292 = fmul float %1291, 0.000000e+00 -> %1293 = bitcast i32 %686 to float -> %1294 = fadd float %1293, %1292 -> %1295 = bitcast i32 %52 to float -> %1296 = bitcast i32 %52 to float -> %1297 = fmul float %1295, %1296 -> %1298 = fadd float %1297, 0.000000e+00 -> %1299 = bitcast i32 %686 to float -> %1300 = bitcast i32 %686 to float -> %1301 = fmul float %1299, %1300 -> %1302 = fadd float %1298, %1301 -> %1303 = call float @llvm.sqrt.f32(float %1302) -> %1304 = fneg float %692 -> %1305 = fmul float %1303, %1304 -> %1306 = fmul float %1305, 0.000000e+00 -> %1307 = bitcast i32 %686 to float -> %1308 = fadd float %1307, %1306 -> %1309 = fmul float %1294, %1308 -> %1310 = fadd float %1280, %1309 -> %1311 = call float @llvm.sqrt.f32(float %1310) -> %1312 = fadd float %1311, 0.000000e+00 -> %1313 = fdiv float %1252, %1312 -> %1314 = fmul float %1239, %1313 -> %1315 = fsub float 1.000000e+00, %1314 -> %1316 = insertelement <4 x float> zeroinitializer, float %1315, i32 0 -> %1317 = insertelement <4 x float> %1316, float 0.000000e+00, i32 1 -> %1318 = insertelement <4 x float> %1317, float 0.000000e+00, i32 2 -> %1319 = insertelement <4 x float> %1318, float 0.000000e+00, i32 3 -> %1320 = getelementptr float, float* %0, i32 0 -> %1321 = getelementptr inbounds float, float* %1320, i64 1 -> %1322 = load float, float* %1321, align 4 -> %1323 = insertelement <4 x float> zeroinitializer, float %1322, i32 0 -> %1324 = insertelement <4 x float> %1323, float 0.000000e+00, i32 1 -> %1325 = insertelement <4 x float> %1324, float 0.000000e+00, i32 2 -> %1326 = insertelement <4 x float> %1325, float 0.000000e+00, i32 3 -> %1327 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1319, <4 x float> %1326, <4 x float> zeroinitializer) -> %1328 = extractelement <4 x float> %1327, i32 0 -> store float %1328, float* %1164, align 4 -> %1329 = bitcast i32 %52 to float -> %1330 = bitcast i32 %52 to float -> %1331 = fmul float %1329, %1330 -> %1332 = fadd float %1331, 0.000000e+00 -> %1333 = bitcast i32 %686 to float -> %1334 = bitcast i32 %686 to float -> %1335 = fmul float %1333, %1334 -> %1336 = fadd float %1332, %1335 -> %1337 = call float @llvm.sqrt.f32(float %1336) -> %1338 = fneg float %692 -> %1339 = fmul float %1337, %1338 -> %1340 = bitcast i32 %52 to float -> %1341 = fadd float %1340, %1339 -> %1342 = bitcast i32 %52 to float -> %1343 = bitcast i32 %52 to float -> %1344 = fmul float %1342, %1343 -> %1345 = fadd float %1344, 0.000000e+00 -> %1346 = bitcast i32 %686 to float -> %1347 = bitcast i32 %686 to float -1819,1821c1820,1822 -< %1349 = fadd float %1348, 0.000000e+00 -< %1350 = bitcast i32 %57 to float -< %1351 = bitcast i32 %57 to float ---- -> %1349 = fadd float %1345, %1348 -> %1350 = call float @llvm.sqrt.f32(float %1349) -> %1351 = fneg float %692 -1823,1830c1824,1831 -< %1353 = fadd float %1349, %1352 -< %1354 = call float @llvm.sqrt.f32(float %1353) -< %1355 = fneg float %710 -< %1356 = fmul float %1354, %1355 -< %1357 = bitcast i32 %865 to float -< %1358 = fadd float %1357, %1356 -< %1359 = bitcast i32 %865 to float -< %1360 = bitcast i32 %865 to float ---- -> %1353 = bitcast i32 %52 to float -> %1354 = fadd float %1353, %1352 -> %1355 = bitcast i32 %52 to float -> %1356 = bitcast i32 %52 to float -> %1357 = fmul float %1355, %1356 -> %1358 = fadd float %1357, 0.000000e+00 -> %1359 = bitcast i32 %686 to float -> %1360 = bitcast i32 %686 to float -1832,1834c1833,1835 -< %1362 = fadd float %1361, 0.000000e+00 -< %1363 = bitcast i32 %57 to float -< %1364 = bitcast i32 %57 to float ---- -> %1362 = fadd float %1358, %1361 -> %1363 = call float @llvm.sqrt.f32(float %1362) -> %1364 = fneg float %692 -1836,1892c1837,1893 -< %1366 = fadd float %1362, %1365 -< %1367 = call float @llvm.sqrt.f32(float %1366) -< %1368 = fneg float %710 -< %1369 = fmul float %1367, %1368 -< %1370 = bitcast i32 %865 to float -< %1371 = fadd float %1370, %1369 -< %1372 = bitcast i32 %865 to float -< %1373 = bitcast i32 %865 to float -< %1374 = fmul float %1372, %1373 -< %1375 = fadd float %1374, 0.000000e+00 -< %1376 = bitcast i32 %57 to float -< %1377 = bitcast i32 %57 to float -< %1378 = fmul float %1376, %1377 -< %1379 = fadd float %1375, %1378 -< %1380 = call float @llvm.sqrt.f32(float %1379) -< %1381 = fneg float %710 -< %1382 = fmul float %1380, %1381 -< %1383 = bitcast i32 %865 to float -< %1384 = fadd float %1383, %1382 -< %1385 = fmul float %1371, %1384 -< %1386 = fadd float %1385, 0.000000e+00 -< %1387 = bitcast i32 %865 to float -< %1388 = bitcast i32 %865 to float -< %1389 = fmul float %1387, %1388 -< %1390 = fadd float %1389, 0.000000e+00 -< %1391 = bitcast i32 %57 to float -< %1392 = bitcast i32 %57 to float -< %1393 = fmul float %1391, %1392 -< %1394 = fadd float %1390, %1393 -< %1395 = call float @llvm.sqrt.f32(float %1394) -< %1396 = fneg float %710 -< %1397 = fmul float %1395, %1396 -< %1398 = fmul float %1397, 0.000000e+00 -< %1399 = bitcast i32 %57 to float -< %1400 = fadd float %1399, %1398 -< %1401 = bitcast i32 %865 to float -< %1402 = bitcast i32 %865 to float -< %1403 = fmul float %1401, %1402 -< %1404 = fadd float %1403, 0.000000e+00 -< %1405 = bitcast i32 %57 to float -< %1406 = bitcast i32 %57 to float -< %1407 = fmul float %1405, %1406 -< %1408 = fadd float %1404, %1407 -< %1409 = call float @llvm.sqrt.f32(float %1408) -< %1410 = fneg float %710 -< %1411 = fmul float %1409, %1410 -< %1412 = fmul float %1411, 0.000000e+00 -< %1413 = bitcast i32 %57 to float -< %1414 = fadd float %1413, %1412 -< %1415 = fmul float %1400, %1414 -< %1416 = fadd float %1386, %1415 -< %1417 = call float @llvm.sqrt.f32(float %1416) -< %1418 = fadd float %1417, 0.000000e+00 -< %1419 = fdiv float %1358, %1418 -< %1420 = fmul float %1419, 2.000000e+00 -< %1421 = bitcast i32 %865 to float -< %1422 = bitcast i32 %865 to float ---- -> %1366 = bitcast i32 %52 to float -> %1367 = fadd float %1366, %1365 -> %1368 = fmul float %1354, %1367 -> %1369 = fadd float %1368, 0.000000e+00 -> %1370 = bitcast i32 %52 to float -> %1371 = bitcast i32 %52 to float -> %1372 = fmul float %1370, %1371 -> %1373 = fadd float %1372, 0.000000e+00 -> %1374 = bitcast i32 %686 to float -> %1375 = bitcast i32 %686 to float -> %1376 = fmul float %1374, %1375 -> %1377 = fadd float %1373, %1376 -> %1378 = call float @llvm.sqrt.f32(float %1377) -> %1379 = fneg float %692 -> %1380 = fmul float %1378, %1379 -> %1381 = fmul float %1380, 0.000000e+00 -> %1382 = bitcast i32 %686 to float -> %1383 = fadd float %1382, %1381 -> %1384 = bitcast i32 %52 to float -> %1385 = bitcast i32 %52 to float -> %1386 = fmul float %1384, %1385 -> %1387 = fadd float %1386, 0.000000e+00 -> %1388 = bitcast i32 %686 to float -> %1389 = bitcast i32 %686 to float -> %1390 = fmul float %1388, %1389 -> %1391 = fadd float %1387, %1390 -> %1392 = call float @llvm.sqrt.f32(float %1391) -> %1393 = fneg float %692 -> %1394 = fmul float %1392, %1393 -> %1395 = fmul float %1394, 0.000000e+00 -> %1396 = bitcast i32 %686 to float -> %1397 = fadd float %1396, %1395 -> %1398 = fmul float %1383, %1397 -> %1399 = fadd float %1369, %1398 -> %1400 = call float @llvm.sqrt.f32(float %1399) -> %1401 = fadd float %1400, 0.000000e+00 -> %1402 = fdiv float %1341, %1401 -> %1403 = fmul float %1402, 2.000000e+00 -> %1404 = bitcast i32 %52 to float -> %1405 = bitcast i32 %52 to float -> %1406 = fmul float %1404, %1405 -> %1407 = fadd float %1406, 0.000000e+00 -> %1408 = bitcast i32 %686 to float -> %1409 = bitcast i32 %686 to float -> %1410 = fmul float %1408, %1409 -> %1411 = fadd float %1407, %1410 -> %1412 = call float @llvm.sqrt.f32(float %1411) -> %1413 = fneg float %692 -> %1414 = fmul float %1412, %1413 -> %1415 = bitcast i32 %52 to float -> %1416 = fadd float %1415, %1414 -> %1417 = bitcast i32 %52 to float -> %1418 = bitcast i32 %52 to float -> %1419 = fmul float %1417, %1418 -> %1420 = fadd float %1419, 0.000000e+00 -> %1421 = bitcast i32 %686 to float -> %1422 = bitcast i32 %686 to float -1894,1896c1895,1897 -< %1424 = fadd float %1423, 0.000000e+00 -< %1425 = bitcast i32 %57 to float -< %1426 = bitcast i32 %57 to float ---- -> %1424 = fadd float %1420, %1423 -> %1425 = call float @llvm.sqrt.f32(float %1424) -> %1426 = fneg float %692 -1898,1905c1899,1906 -< %1428 = fadd float %1424, %1427 -< %1429 = call float @llvm.sqrt.f32(float %1428) -< %1430 = fneg float %710 -< %1431 = fmul float %1429, %1430 -< %1432 = bitcast i32 %865 to float -< %1433 = fadd float %1432, %1431 -< %1434 = bitcast i32 %865 to float -< %1435 = bitcast i32 %865 to float ---- -> %1428 = bitcast i32 %52 to float -> %1429 = fadd float %1428, %1427 -> %1430 = bitcast i32 %52 to float -> %1431 = bitcast i32 %52 to float -> %1432 = fmul float %1430, %1431 -> %1433 = fadd float %1432, 0.000000e+00 -> %1434 = bitcast i32 %686 to float -> %1435 = bitcast i32 %686 to float -1907,1909c1908,1910 -< %1437 = fadd float %1436, 0.000000e+00 -< %1438 = bitcast i32 %57 to float -< %1439 = bitcast i32 %57 to float ---- -> %1437 = fadd float %1433, %1436 -> %1438 = call float @llvm.sqrt.f32(float %1437) -> %1439 = fneg float %692 -1911,1967c1912,1968 -< %1441 = fadd float %1437, %1440 -< %1442 = call float @llvm.sqrt.f32(float %1441) -< %1443 = fneg float %710 -< %1444 = fmul float %1442, %1443 -< %1445 = bitcast i32 %865 to float -< %1446 = fadd float %1445, %1444 -< %1447 = bitcast i32 %865 to float -< %1448 = bitcast i32 %865 to float -< %1449 = fmul float %1447, %1448 -< %1450 = fadd float %1449, 0.000000e+00 -< %1451 = bitcast i32 %57 to float -< %1452 = bitcast i32 %57 to float -< %1453 = fmul float %1451, %1452 -< %1454 = fadd float %1450, %1453 -< %1455 = call float @llvm.sqrt.f32(float %1454) -< %1456 = fneg float %710 -< %1457 = fmul float %1455, %1456 -< %1458 = bitcast i32 %865 to float -< %1459 = fadd float %1458, %1457 -< %1460 = fmul float %1446, %1459 -< %1461 = fadd float %1460, 0.000000e+00 -< %1462 = bitcast i32 %865 to float -< %1463 = bitcast i32 %865 to float -< %1464 = fmul float %1462, %1463 -< %1465 = fadd float %1464, 0.000000e+00 -< %1466 = bitcast i32 %57 to float -< %1467 = bitcast i32 %57 to float -< %1468 = fmul float %1466, %1467 -< %1469 = fadd float %1465, %1468 -< %1470 = call float @llvm.sqrt.f32(float %1469) -< %1471 = fneg float %710 -< %1472 = fmul float %1470, %1471 -< %1473 = fmul float %1472, 0.000000e+00 -< %1474 = bitcast i32 %57 to float -< %1475 = fadd float %1474, %1473 -< %1476 = bitcast i32 %865 to float -< %1477 = bitcast i32 %865 to float -< %1478 = fmul float %1476, %1477 -< %1479 = fadd float %1478, 0.000000e+00 -< %1480 = bitcast i32 %57 to float -< %1481 = bitcast i32 %57 to float -< %1482 = fmul float %1480, %1481 -< %1483 = fadd float %1479, %1482 -< %1484 = call float @llvm.sqrt.f32(float %1483) -< %1485 = fneg float %710 -< %1486 = fmul float %1484, %1485 -< %1487 = fmul float %1486, 0.000000e+00 -< %1488 = bitcast i32 %57 to float -< %1489 = fadd float %1488, %1487 -< %1490 = fmul float %1475, %1489 -< %1491 = fadd float %1461, %1490 -< %1492 = call float @llvm.sqrt.f32(float %1491) -< %1493 = fadd float %1492, 0.000000e+00 -< %1494 = fdiv float %1433, %1493 -< %1495 = fmul float %1420, %1494 -< %1496 = fsub float 1.000000e+00, %1495 -< %1497 = fmul float %1496, %1339 ---- -> %1441 = bitcast i32 %52 to float -> %1442 = fadd float %1441, %1440 -> %1443 = fmul float %1429, %1442 -> %1444 = fadd float %1443, 0.000000e+00 -> %1445 = bitcast i32 %52 to float -> %1446 = bitcast i32 %52 to float -> %1447 = fmul float %1445, %1446 -> %1448 = fadd float %1447, 0.000000e+00 -> %1449 = bitcast i32 %686 to float -> %1450 = bitcast i32 %686 to float -> %1451 = fmul float %1449, %1450 -> %1452 = fadd float %1448, %1451 -> %1453 = call float @llvm.sqrt.f32(float %1452) -> %1454 = fneg float %692 -> %1455 = fmul float %1453, %1454 -> %1456 = fmul float %1455, 0.000000e+00 -> %1457 = bitcast i32 %686 to float -> %1458 = fadd float %1457, %1456 -> %1459 = bitcast i32 %52 to float -> %1460 = bitcast i32 %52 to float -> %1461 = fmul float %1459, %1460 -> %1462 = fadd float %1461, 0.000000e+00 -> %1463 = bitcast i32 %686 to float -> %1464 = bitcast i32 %686 to float -> %1465 = fmul float %1463, %1464 -> %1466 = fadd float %1462, %1465 -> %1467 = call float @llvm.sqrt.f32(float %1466) -> %1468 = fneg float %692 -> %1469 = fmul float %1467, %1468 -> %1470 = fmul float %1469, 0.000000e+00 -> %1471 = bitcast i32 %686 to float -> %1472 = fadd float %1471, %1470 -> %1473 = fmul float %1458, %1472 -> %1474 = fadd float %1444, %1473 -> %1475 = call float @llvm.sqrt.f32(float %1474) -> %1476 = fadd float %1475, 0.000000e+00 -> %1477 = fdiv float %1416, %1476 -> %1478 = fmul float %1403, %1477 -> %1479 = fsub float 1.000000e+00, %1478 -> %1480 = fmul float %1479, %1322 -> %1481 = fadd float %1480, 0.000000e+00 -> %1482 = bitcast i32 %52 to float -> %1483 = bitcast i32 %52 to float -> %1484 = fmul float %1482, %1483 -> %1485 = fadd float %1484, 0.000000e+00 -> %1486 = bitcast i32 %686 to float -> %1487 = bitcast i32 %686 to float -> %1488 = fmul float %1486, %1487 -> %1489 = fadd float %1485, %1488 -> %1490 = call float @llvm.sqrt.f32(float %1489) -> %1491 = fneg float %692 -> %1492 = fmul float %1490, %1491 -> %1493 = bitcast i32 %52 to float -> %1494 = fadd float %1493, %1492 -> %1495 = bitcast i32 %52 to float -> %1496 = bitcast i32 %52 to float -> %1497 = fmul float %1495, %1496 -1969,1970c1970,1971 -< %1499 = bitcast i32 %865 to float -< %1500 = bitcast i32 %865 to float ---- -> %1499 = bitcast i32 %686 to float -> %1500 = bitcast i32 %686 to float -1972,1974c1973,1975 -< %1502 = fadd float %1501, 0.000000e+00 -< %1503 = bitcast i32 %57 to float -< %1504 = bitcast i32 %57 to float ---- -> %1502 = fadd float %1498, %1501 -> %1503 = call float @llvm.sqrt.f32(float %1502) -> %1504 = fneg float %692 -1976,1983c1977,1984 -< %1506 = fadd float %1502, %1505 -< %1507 = call float @llvm.sqrt.f32(float %1506) -< %1508 = fneg float %710 -< %1509 = fmul float %1507, %1508 -< %1510 = bitcast i32 %865 to float -< %1511 = fadd float %1510, %1509 -< %1512 = bitcast i32 %865 to float -< %1513 = bitcast i32 %865 to float ---- -> %1506 = bitcast i32 %52 to float -> %1507 = fadd float %1506, %1505 -> %1508 = bitcast i32 %52 to float -> %1509 = bitcast i32 %52 to float -> %1510 = fmul float %1508, %1509 -> %1511 = fadd float %1510, 0.000000e+00 -> %1512 = bitcast i32 %686 to float -> %1513 = bitcast i32 %686 to float -1985,1987c1986,1988 -< %1515 = fadd float %1514, 0.000000e+00 -< %1516 = bitcast i32 %57 to float -< %1517 = bitcast i32 %57 to float ---- -> %1515 = fadd float %1511, %1514 -> %1516 = call float @llvm.sqrt.f32(float %1515) -> %1517 = fneg float %692 -1989,2059c1990,2060 -< %1519 = fadd float %1515, %1518 -< %1520 = call float @llvm.sqrt.f32(float %1519) -< %1521 = fneg float %710 -< %1522 = fmul float %1520, %1521 -< %1523 = bitcast i32 %865 to float -< %1524 = fadd float %1523, %1522 -< %1525 = bitcast i32 %865 to float -< %1526 = bitcast i32 %865 to float -< %1527 = fmul float %1525, %1526 -< %1528 = fadd float %1527, 0.000000e+00 -< %1529 = bitcast i32 %57 to float -< %1530 = bitcast i32 %57 to float -< %1531 = fmul float %1529, %1530 -< %1532 = fadd float %1528, %1531 -< %1533 = call float @llvm.sqrt.f32(float %1532) -< %1534 = fneg float %710 -< %1535 = fmul float %1533, %1534 -< %1536 = bitcast i32 %865 to float -< %1537 = fadd float %1536, %1535 -< %1538 = fmul float %1524, %1537 -< %1539 = fadd float %1538, 0.000000e+00 -< %1540 = bitcast i32 %865 to float -< %1541 = bitcast i32 %865 to float -< %1542 = fmul float %1540, %1541 -< %1543 = fadd float %1542, 0.000000e+00 -< %1544 = bitcast i32 %57 to float -< %1545 = bitcast i32 %57 to float -< %1546 = fmul float %1544, %1545 -< %1547 = fadd float %1543, %1546 -< %1548 = call float @llvm.sqrt.f32(float %1547) -< %1549 = fneg float %710 -< %1550 = fmul float %1548, %1549 -< %1551 = fmul float %1550, 0.000000e+00 -< %1552 = bitcast i32 %57 to float -< %1553 = fadd float %1552, %1551 -< %1554 = bitcast i32 %865 to float -< %1555 = bitcast i32 %865 to float -< %1556 = fmul float %1554, %1555 -< %1557 = fadd float %1556, 0.000000e+00 -< %1558 = bitcast i32 %57 to float -< %1559 = bitcast i32 %57 to float -< %1560 = fmul float %1558, %1559 -< %1561 = fadd float %1557, %1560 -< %1562 = call float @llvm.sqrt.f32(float %1561) -< %1563 = fneg float %710 -< %1564 = fmul float %1562, %1563 -< %1565 = fmul float %1564, 0.000000e+00 -< %1566 = bitcast i32 %57 to float -< %1567 = fadd float %1566, %1565 -< %1568 = fmul float %1553, %1567 -< %1569 = fadd float %1539, %1568 -< %1570 = call float @llvm.sqrt.f32(float %1569) -< %1571 = fadd float %1570, 0.000000e+00 -< %1572 = fdiv float %1511, %1571 -< %1573 = fmul float %1572, 2.000000e+00 -< %1574 = bitcast i32 %865 to float -< %1575 = bitcast i32 %865 to float -< %1576 = fmul float %1574, %1575 -< %1577 = fadd float %1576, 0.000000e+00 -< %1578 = bitcast i32 %57 to float -< %1579 = bitcast i32 %57 to float -< %1580 = fmul float %1578, %1579 -< %1581 = fadd float %1577, %1580 -< %1582 = call float @llvm.sqrt.f32(float %1581) -< %1583 = fneg float %710 -< %1584 = fmul float %1582, %1583 -< %1585 = fmul float %1584, 0.000000e+00 -< %1586 = bitcast i32 %57 to float -< %1587 = fadd float %1586, %1585 -< %1588 = bitcast i32 %865 to float -< %1589 = bitcast i32 %865 to float ---- -> %1519 = bitcast i32 %52 to float -> %1520 = fadd float %1519, %1518 -> %1521 = fmul float %1507, %1520 -> %1522 = fadd float %1521, 0.000000e+00 -> %1523 = bitcast i32 %52 to float -> %1524 = bitcast i32 %52 to float -> %1525 = fmul float %1523, %1524 -> %1526 = fadd float %1525, 0.000000e+00 -> %1527 = bitcast i32 %686 to float -> %1528 = bitcast i32 %686 to float -> %1529 = fmul float %1527, %1528 -> %1530 = fadd float %1526, %1529 -> %1531 = call float @llvm.sqrt.f32(float %1530) -> %1532 = fneg float %692 -> %1533 = fmul float %1531, %1532 -> %1534 = fmul float %1533, 0.000000e+00 -> %1535 = bitcast i32 %686 to float -> %1536 = fadd float %1535, %1534 -> %1537 = bitcast i32 %52 to float -> %1538 = bitcast i32 %52 to float -> %1539 = fmul float %1537, %1538 -> %1540 = fadd float %1539, 0.000000e+00 -> %1541 = bitcast i32 %686 to float -> %1542 = bitcast i32 %686 to float -> %1543 = fmul float %1541, %1542 -> %1544 = fadd float %1540, %1543 -> %1545 = call float @llvm.sqrt.f32(float %1544) -> %1546 = fneg float %692 -> %1547 = fmul float %1545, %1546 -> %1548 = fmul float %1547, 0.000000e+00 -> %1549 = bitcast i32 %686 to float -> %1550 = fadd float %1549, %1548 -> %1551 = fmul float %1536, %1550 -> %1552 = fadd float %1522, %1551 -> %1553 = call float @llvm.sqrt.f32(float %1552) -> %1554 = fadd float %1553, 0.000000e+00 -> %1555 = fdiv float %1494, %1554 -> %1556 = fmul float %1555, 2.000000e+00 -> %1557 = bitcast i32 %52 to float -> %1558 = bitcast i32 %52 to float -> %1559 = fmul float %1557, %1558 -> %1560 = fadd float %1559, 0.000000e+00 -> %1561 = bitcast i32 %686 to float -> %1562 = bitcast i32 %686 to float -> %1563 = fmul float %1561, %1562 -> %1564 = fadd float %1560, %1563 -> %1565 = call float @llvm.sqrt.f32(float %1564) -> %1566 = fneg float %692 -> %1567 = fmul float %1565, %1566 -> %1568 = fmul float %1567, 0.000000e+00 -> %1569 = bitcast i32 %686 to float -> %1570 = fadd float %1569, %1568 -> %1571 = bitcast i32 %52 to float -> %1572 = bitcast i32 %52 to float -> %1573 = fmul float %1571, %1572 -> %1574 = fadd float %1573, 0.000000e+00 -> %1575 = bitcast i32 %686 to float -> %1576 = bitcast i32 %686 to float -> %1577 = fmul float %1575, %1576 -> %1578 = fadd float %1574, %1577 -> %1579 = call float @llvm.sqrt.f32(float %1578) -> %1580 = fneg float %692 -> %1581 = fmul float %1579, %1580 -> %1582 = bitcast i32 %52 to float -> %1583 = fadd float %1582, %1581 -> %1584 = bitcast i32 %52 to float -> %1585 = bitcast i32 %52 to float -> %1586 = fmul float %1584, %1585 -> %1587 = fadd float %1586, 0.000000e+00 -> %1588 = bitcast i32 %686 to float -> %1589 = bitcast i32 %686 to float -2061,2063c2062,2064 -< %1591 = fadd float %1590, 0.000000e+00 -< %1592 = bitcast i32 %57 to float -< %1593 = bitcast i32 %57 to float ---- -> %1591 = fadd float %1587, %1590 -> %1592 = call float @llvm.sqrt.f32(float %1591) -> %1593 = fneg float %692 -2065,2149c2066,2150 -< %1595 = fadd float %1591, %1594 -< %1596 = call float @llvm.sqrt.f32(float %1595) -< %1597 = fneg float %710 -< %1598 = fmul float %1596, %1597 -< %1599 = bitcast i32 %865 to float -< %1600 = fadd float %1599, %1598 -< %1601 = bitcast i32 %865 to float -< %1602 = bitcast i32 %865 to float -< %1603 = fmul float %1601, %1602 -< %1604 = fadd float %1603, 0.000000e+00 -< %1605 = bitcast i32 %57 to float -< %1606 = bitcast i32 %57 to float -< %1607 = fmul float %1605, %1606 -< %1608 = fadd float %1604, %1607 -< %1609 = call float @llvm.sqrt.f32(float %1608) -< %1610 = fneg float %710 -< %1611 = fmul float %1609, %1610 -< %1612 = bitcast i32 %865 to float -< %1613 = fadd float %1612, %1611 -< %1614 = fmul float %1600, %1613 -< %1615 = fadd float %1614, 0.000000e+00 -< %1616 = bitcast i32 %865 to float -< %1617 = bitcast i32 %865 to float -< %1618 = fmul float %1616, %1617 -< %1619 = fadd float %1618, 0.000000e+00 -< %1620 = bitcast i32 %57 to float -< %1621 = bitcast i32 %57 to float -< %1622 = fmul float %1620, %1621 -< %1623 = fadd float %1619, %1622 -< %1624 = call float @llvm.sqrt.f32(float %1623) -< %1625 = fneg float %710 -< %1626 = fmul float %1624, %1625 -< %1627 = fmul float %1626, 0.000000e+00 -< %1628 = bitcast i32 %57 to float -< %1629 = fadd float %1628, %1627 -< %1630 = bitcast i32 %865 to float -< %1631 = bitcast i32 %865 to float -< %1632 = fmul float %1630, %1631 -< %1633 = fadd float %1632, 0.000000e+00 -< %1634 = bitcast i32 %57 to float -< %1635 = bitcast i32 %57 to float -< %1636 = fmul float %1634, %1635 -< %1637 = fadd float %1633, %1636 -< %1638 = call float @llvm.sqrt.f32(float %1637) -< %1639 = fneg float %710 -< %1640 = fmul float %1638, %1639 -< %1641 = fmul float %1640, 0.000000e+00 -< %1642 = bitcast i32 %57 to float -< %1643 = fadd float %1642, %1641 -< %1644 = fmul float %1629, %1643 -< %1645 = fadd float %1615, %1644 -< %1646 = call float @llvm.sqrt.f32(float %1645) -< %1647 = fadd float %1646, 0.000000e+00 -< %1648 = fdiv float %1587, %1647 -< %1649 = fmul float %1573, %1648 -< %1650 = fneg float %1649 -< %1651 = getelementptr float, float* %0, i32 0 -< %1652 = getelementptr inbounds float, float* %1651, i64 3 -< %1653 = load float, float* %1652, align 4 -< %1654 = fmul float %1650, %1653 -< %1655 = fadd float %1498, %1654 -< %1656 = insertelement <4 x float> zeroinitializer, float %1655, i32 0 -< %1657 = insertelement <4 x float> %1656, float 0.000000e+00, i32 1 -< %1658 = insertelement <4 x float> %1657, float 0.000000e+00, i32 2 -< %1659 = insertelement <4 x float> %1658, float 0.000000e+00, i32 3 -< %1660 = extractelement <4 x float> %1659, i32 0 -< store float %1660, float* %1181, align 4 -< %1661 = extractelement <4 x float> %1659, i32 1 -< store float %1661, float* %40, align 4 -< %1662 = bitcast i32 %865 to float -< %1663 = bitcast i32 %865 to float -< %1664 = fmul float %1662, %1663 -< %1665 = fadd float %1664, 0.000000e+00 -< %1666 = bitcast i32 %57 to float -< %1667 = bitcast i32 %57 to float -< %1668 = fmul float %1666, %1667 -< %1669 = fadd float %1665, %1668 -< %1670 = call float @llvm.sqrt.f32(float %1669) -< %1671 = fneg float %710 -< %1672 = fmul float %1670, %1671 -< %1673 = fmul float %1672, 0.000000e+00 -< %1674 = bitcast i32 %57 to float -< %1675 = fadd float %1674, %1673 -< %1676 = bitcast i32 %865 to float -< %1677 = bitcast i32 %865 to float ---- -> %1595 = bitcast i32 %52 to float -> %1596 = fadd float %1595, %1594 -> %1597 = fmul float %1583, %1596 -> %1598 = fadd float %1597, 0.000000e+00 -> %1599 = bitcast i32 %52 to float -> %1600 = bitcast i32 %52 to float -> %1601 = fmul float %1599, %1600 -> %1602 = fadd float %1601, 0.000000e+00 -> %1603 = bitcast i32 %686 to float -> %1604 = bitcast i32 %686 to float -> %1605 = fmul float %1603, %1604 -> %1606 = fadd float %1602, %1605 -> %1607 = call float @llvm.sqrt.f32(float %1606) -> %1608 = fneg float %692 -> %1609 = fmul float %1607, %1608 -> %1610 = fmul float %1609, 0.000000e+00 -> %1611 = bitcast i32 %686 to float -> %1612 = fadd float %1611, %1610 -> %1613 = bitcast i32 %52 to float -> %1614 = bitcast i32 %52 to float -> %1615 = fmul float %1613, %1614 -> %1616 = fadd float %1615, 0.000000e+00 -> %1617 = bitcast i32 %686 to float -> %1618 = bitcast i32 %686 to float -> %1619 = fmul float %1617, %1618 -> %1620 = fadd float %1616, %1619 -> %1621 = call float @llvm.sqrt.f32(float %1620) -> %1622 = fneg float %692 -> %1623 = fmul float %1621, %1622 -> %1624 = fmul float %1623, 0.000000e+00 -> %1625 = bitcast i32 %686 to float -> %1626 = fadd float %1625, %1624 -> %1627 = fmul float %1612, %1626 -> %1628 = fadd float %1598, %1627 -> %1629 = call float @llvm.sqrt.f32(float %1628) -> %1630 = fadd float %1629, 0.000000e+00 -> %1631 = fdiv float %1570, %1630 -> %1632 = fmul float %1556, %1631 -> %1633 = fneg float %1632 -> %1634 = load float, float* %44, align 4 -> %1635 = fmul float %1633, %1634 -> %1636 = fadd float %1481, %1635 -> %1637 = insertelement <4 x float> zeroinitializer, float %1636, i32 0 -> %1638 = insertelement <4 x float> %1637, float 0.000000e+00, i32 1 -> %1639 = insertelement <4 x float> %1638, float 0.000000e+00, i32 2 -> %1640 = insertelement <4 x float> %1639, float 0.000000e+00, i32 3 -> %1641 = extractelement <4 x float> %1640, i32 0 -> store float %1641, float* %1164, align 4 -> %1642 = extractelement <4 x float> %1640, i32 1 -> %1643 = getelementptr float, float* %2, i32 0 -> %1644 = getelementptr inbounds float, float* %1643, i64 2 -> store float %1642, float* %1644, align 4 -> %1645 = bitcast i32 %52 to float -> %1646 = bitcast i32 %52 to float -> %1647 = fmul float %1645, %1646 -> %1648 = fadd float %1647, 0.000000e+00 -> %1649 = bitcast i32 %686 to float -> %1650 = bitcast i32 %686 to float -> %1651 = fmul float %1649, %1650 -> %1652 = fadd float %1648, %1651 -> %1653 = call float @llvm.sqrt.f32(float %1652) -> %1654 = fneg float %692 -> %1655 = fmul float %1653, %1654 -> %1656 = fmul float %1655, 0.000000e+00 -> %1657 = bitcast i32 %686 to float -> %1658 = fadd float %1657, %1656 -> %1659 = bitcast i32 %52 to float -> %1660 = bitcast i32 %52 to float -> %1661 = fmul float %1659, %1660 -> %1662 = fadd float %1661, 0.000000e+00 -> %1663 = bitcast i32 %686 to float -> %1664 = bitcast i32 %686 to float -> %1665 = fmul float %1663, %1664 -> %1666 = fadd float %1662, %1665 -> %1667 = call float @llvm.sqrt.f32(float %1666) -> %1668 = fneg float %692 -> %1669 = fmul float %1667, %1668 -> %1670 = bitcast i32 %52 to float -> %1671 = fadd float %1670, %1669 -> %1672 = bitcast i32 %52 to float -> %1673 = bitcast i32 %52 to float -> %1674 = fmul float %1672, %1673 -> %1675 = fadd float %1674, 0.000000e+00 -> %1676 = bitcast i32 %686 to float -> %1677 = bitcast i32 %686 to float -2151,2153c2152,2154 -< %1679 = fadd float %1678, 0.000000e+00 -< %1680 = bitcast i32 %57 to float -< %1681 = bitcast i32 %57 to float ---- -> %1679 = fadd float %1675, %1678 -> %1680 = call float @llvm.sqrt.f32(float %1679) -> %1681 = fneg float %692 -2155,2211c2156,2212 -< %1683 = fadd float %1679, %1682 -< %1684 = call float @llvm.sqrt.f32(float %1683) -< %1685 = fneg float %710 -< %1686 = fmul float %1684, %1685 -< %1687 = bitcast i32 %865 to float -< %1688 = fadd float %1687, %1686 -< %1689 = bitcast i32 %865 to float -< %1690 = bitcast i32 %865 to float -< %1691 = fmul float %1689, %1690 -< %1692 = fadd float %1691, 0.000000e+00 -< %1693 = bitcast i32 %57 to float -< %1694 = bitcast i32 %57 to float -< %1695 = fmul float %1693, %1694 -< %1696 = fadd float %1692, %1695 -< %1697 = call float @llvm.sqrt.f32(float %1696) -< %1698 = fneg float %710 -< %1699 = fmul float %1697, %1698 -< %1700 = bitcast i32 %865 to float -< %1701 = fadd float %1700, %1699 -< %1702 = fmul float %1688, %1701 -< %1703 = fadd float %1702, 0.000000e+00 -< %1704 = bitcast i32 %865 to float -< %1705 = bitcast i32 %865 to float -< %1706 = fmul float %1704, %1705 -< %1707 = fadd float %1706, 0.000000e+00 -< %1708 = bitcast i32 %57 to float -< %1709 = bitcast i32 %57 to float -< %1710 = fmul float %1708, %1709 -< %1711 = fadd float %1707, %1710 -< %1712 = call float @llvm.sqrt.f32(float %1711) -< %1713 = fneg float %710 -< %1714 = fmul float %1712, %1713 -< %1715 = fmul float %1714, 0.000000e+00 -< %1716 = bitcast i32 %57 to float -< %1717 = fadd float %1716, %1715 -< %1718 = bitcast i32 %865 to float -< %1719 = bitcast i32 %865 to float -< %1720 = fmul float %1718, %1719 -< %1721 = fadd float %1720, 0.000000e+00 -< %1722 = bitcast i32 %57 to float -< %1723 = bitcast i32 %57 to float -< %1724 = fmul float %1722, %1723 -< %1725 = fadd float %1721, %1724 -< %1726 = call float @llvm.sqrt.f32(float %1725) -< %1727 = fneg float %710 -< %1728 = fmul float %1726, %1727 -< %1729 = fmul float %1728, 0.000000e+00 -< %1730 = bitcast i32 %57 to float -< %1731 = fadd float %1730, %1729 -< %1732 = fmul float %1717, %1731 -< %1733 = fadd float %1703, %1732 -< %1734 = call float @llvm.sqrt.f32(float %1733) -< %1735 = fadd float %1734, 0.000000e+00 -< %1736 = fdiv float %1675, %1735 -< %1737 = fmul float %1736, 2.000000e+00 -< %1738 = bitcast i32 %865 to float -< %1739 = bitcast i32 %865 to float ---- -> %1683 = bitcast i32 %52 to float -> %1684 = fadd float %1683, %1682 -> %1685 = fmul float %1671, %1684 -> %1686 = fadd float %1685, 0.000000e+00 -> %1687 = bitcast i32 %52 to float -> %1688 = bitcast i32 %52 to float -> %1689 = fmul float %1687, %1688 -> %1690 = fadd float %1689, 0.000000e+00 -> %1691 = bitcast i32 %686 to float -> %1692 = bitcast i32 %686 to float -> %1693 = fmul float %1691, %1692 -> %1694 = fadd float %1690, %1693 -> %1695 = call float @llvm.sqrt.f32(float %1694) -> %1696 = fneg float %692 -> %1697 = fmul float %1695, %1696 -> %1698 = fmul float %1697, 0.000000e+00 -> %1699 = bitcast i32 %686 to float -> %1700 = fadd float %1699, %1698 -> %1701 = bitcast i32 %52 to float -> %1702 = bitcast i32 %52 to float -> %1703 = fmul float %1701, %1702 -> %1704 = fadd float %1703, 0.000000e+00 -> %1705 = bitcast i32 %686 to float -> %1706 = bitcast i32 %686 to float -> %1707 = fmul float %1705, %1706 -> %1708 = fadd float %1704, %1707 -> %1709 = call float @llvm.sqrt.f32(float %1708) -> %1710 = fneg float %692 -> %1711 = fmul float %1709, %1710 -> %1712 = fmul float %1711, 0.000000e+00 -> %1713 = bitcast i32 %686 to float -> %1714 = fadd float %1713, %1712 -> %1715 = fmul float %1700, %1714 -> %1716 = fadd float %1686, %1715 -> %1717 = call float @llvm.sqrt.f32(float %1716) -> %1718 = fadd float %1717, 0.000000e+00 -> %1719 = fdiv float %1658, %1718 -> %1720 = fmul float %1719, 2.000000e+00 -> %1721 = bitcast i32 %52 to float -> %1722 = bitcast i32 %52 to float -> %1723 = fmul float %1721, %1722 -> %1724 = fadd float %1723, 0.000000e+00 -> %1725 = bitcast i32 %686 to float -> %1726 = bitcast i32 %686 to float -> %1727 = fmul float %1725, %1726 -> %1728 = fadd float %1724, %1727 -> %1729 = call float @llvm.sqrt.f32(float %1728) -> %1730 = fneg float %692 -> %1731 = fmul float %1729, %1730 -> %1732 = bitcast i32 %52 to float -> %1733 = fadd float %1732, %1731 -> %1734 = bitcast i32 %52 to float -> %1735 = bitcast i32 %52 to float -> %1736 = fmul float %1734, %1735 -> %1737 = fadd float %1736, 0.000000e+00 -> %1738 = bitcast i32 %686 to float -> %1739 = bitcast i32 %686 to float -2213,2215c2214,2216 -< %1741 = fadd float %1740, 0.000000e+00 -< %1742 = bitcast i32 %57 to float -< %1743 = bitcast i32 %57 to float ---- -> %1741 = fadd float %1737, %1740 -> %1742 = call float @llvm.sqrt.f32(float %1741) -> %1743 = fneg float %692 -2217,2224c2218,2225 -< %1745 = fadd float %1741, %1744 -< %1746 = call float @llvm.sqrt.f32(float %1745) -< %1747 = fneg float %710 -< %1748 = fmul float %1746, %1747 -< %1749 = bitcast i32 %865 to float -< %1750 = fadd float %1749, %1748 -< %1751 = bitcast i32 %865 to float -< %1752 = bitcast i32 %865 to float ---- -> %1745 = bitcast i32 %52 to float -> %1746 = fadd float %1745, %1744 -> %1747 = bitcast i32 %52 to float -> %1748 = bitcast i32 %52 to float -> %1749 = fmul float %1747, %1748 -> %1750 = fadd float %1749, 0.000000e+00 -> %1751 = bitcast i32 %686 to float -> %1752 = bitcast i32 %686 to float -2226,2228c2227,2229 -< %1754 = fadd float %1753, 0.000000e+00 -< %1755 = bitcast i32 %57 to float -< %1756 = bitcast i32 %57 to float ---- -> %1754 = fadd float %1750, %1753 -> %1755 = call float @llvm.sqrt.f32(float %1754) -> %1756 = fneg float %692 -2230,2314c2231,2315 -< %1758 = fadd float %1754, %1757 -< %1759 = call float @llvm.sqrt.f32(float %1758) -< %1760 = fneg float %710 -< %1761 = fmul float %1759, %1760 -< %1762 = bitcast i32 %865 to float -< %1763 = fadd float %1762, %1761 -< %1764 = bitcast i32 %865 to float -< %1765 = bitcast i32 %865 to float -< %1766 = fmul float %1764, %1765 -< %1767 = fadd float %1766, 0.000000e+00 -< %1768 = bitcast i32 %57 to float -< %1769 = bitcast i32 %57 to float -< %1770 = fmul float %1768, %1769 -< %1771 = fadd float %1767, %1770 -< %1772 = call float @llvm.sqrt.f32(float %1771) -< %1773 = fneg float %710 -< %1774 = fmul float %1772, %1773 -< %1775 = bitcast i32 %865 to float -< %1776 = fadd float %1775, %1774 -< %1777 = fmul float %1763, %1776 -< %1778 = fadd float %1777, 0.000000e+00 -< %1779 = bitcast i32 %865 to float -< %1780 = bitcast i32 %865 to float -< %1781 = fmul float %1779, %1780 -< %1782 = fadd float %1781, 0.000000e+00 -< %1783 = bitcast i32 %57 to float -< %1784 = bitcast i32 %57 to float -< %1785 = fmul float %1783, %1784 -< %1786 = fadd float %1782, %1785 -< %1787 = call float @llvm.sqrt.f32(float %1786) -< %1788 = fneg float %710 -< %1789 = fmul float %1787, %1788 -< %1790 = fmul float %1789, 0.000000e+00 -< %1791 = bitcast i32 %57 to float -< %1792 = fadd float %1791, %1790 -< %1793 = bitcast i32 %865 to float -< %1794 = bitcast i32 %865 to float -< %1795 = fmul float %1793, %1794 -< %1796 = fadd float %1795, 0.000000e+00 -< %1797 = bitcast i32 %57 to float -< %1798 = bitcast i32 %57 to float -< %1799 = fmul float %1797, %1798 -< %1800 = fadd float %1796, %1799 -< %1801 = call float @llvm.sqrt.f32(float %1800) -< %1802 = fneg float %710 -< %1803 = fmul float %1801, %1802 -< %1804 = fmul float %1803, 0.000000e+00 -< %1805 = bitcast i32 %57 to float -< %1806 = fadd float %1805, %1804 -< %1807 = fmul float %1792, %1806 -< %1808 = fadd float %1778, %1807 -< %1809 = call float @llvm.sqrt.f32(float %1808) -< %1810 = fadd float %1809, 0.000000e+00 -< %1811 = fdiv float %1750, %1810 -< %1812 = fmul float %1737, %1811 -< %1813 = fneg float %1812 -< %1814 = insertelement <4 x float> zeroinitializer, float %1813, i32 0 -< %1815 = insertelement <4 x float> %1814, float 0.000000e+00, i32 1 -< %1816 = insertelement <4 x float> %1815, float 0.000000e+00, i32 2 -< %1817 = insertelement <4 x float> %1816, float 0.000000e+00, i32 3 -< %1818 = getelementptr float, float* %0, i32 0 -< %1819 = load float, float* %1818, align 4 -< %1820 = insertelement <4 x float> zeroinitializer, float %1819, i32 0 -< %1821 = insertelement <4 x float> %1820, float 0.000000e+00, i32 1 -< %1822 = insertelement <4 x float> %1821, float 0.000000e+00, i32 2 -< %1823 = insertelement <4 x float> %1822, float 0.000000e+00, i32 3 -< %1824 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1817, <4 x float> %1823, <4 x float> zeroinitializer) -< %1825 = extractelement <4 x float> %1824, i32 0 -< store float %1825, float* %40, align 4 -< %1826 = bitcast i32 %865 to float -< %1827 = bitcast i32 %865 to float -< %1828 = fmul float %1826, %1827 -< %1829 = fadd float %1828, 0.000000e+00 -< %1830 = bitcast i32 %57 to float -< %1831 = bitcast i32 %57 to float -< %1832 = fmul float %1830, %1831 -< %1833 = fadd float %1829, %1832 -< %1834 = call float @llvm.sqrt.f32(float %1833) -< %1835 = fneg float %710 -< %1836 = fmul float %1834, %1835 -< %1837 = fmul float %1836, 0.000000e+00 -< %1838 = bitcast i32 %57 to float -< %1839 = fadd float %1838, %1837 -< %1840 = bitcast i32 %865 to float -< %1841 = bitcast i32 %865 to float ---- -> %1758 = bitcast i32 %52 to float -> %1759 = fadd float %1758, %1757 -> %1760 = fmul float %1746, %1759 -> %1761 = fadd float %1760, 0.000000e+00 -> %1762 = bitcast i32 %52 to float -> %1763 = bitcast i32 %52 to float -> %1764 = fmul float %1762, %1763 -> %1765 = fadd float %1764, 0.000000e+00 -> %1766 = bitcast i32 %686 to float -> %1767 = bitcast i32 %686 to float -> %1768 = fmul float %1766, %1767 -> %1769 = fadd float %1765, %1768 -> %1770 = call float @llvm.sqrt.f32(float %1769) -> %1771 = fneg float %692 -> %1772 = fmul float %1770, %1771 -> %1773 = fmul float %1772, 0.000000e+00 -> %1774 = bitcast i32 %686 to float -> %1775 = fadd float %1774, %1773 -> %1776 = bitcast i32 %52 to float -> %1777 = bitcast i32 %52 to float -> %1778 = fmul float %1776, %1777 -> %1779 = fadd float %1778, 0.000000e+00 -> %1780 = bitcast i32 %686 to float -> %1781 = bitcast i32 %686 to float -> %1782 = fmul float %1780, %1781 -> %1783 = fadd float %1779, %1782 -> %1784 = call float @llvm.sqrt.f32(float %1783) -> %1785 = fneg float %692 -> %1786 = fmul float %1784, %1785 -> %1787 = fmul float %1786, 0.000000e+00 -> %1788 = bitcast i32 %686 to float -> %1789 = fadd float %1788, %1787 -> %1790 = fmul float %1775, %1789 -> %1791 = fadd float %1761, %1790 -> %1792 = call float @llvm.sqrt.f32(float %1791) -> %1793 = fadd float %1792, 0.000000e+00 -> %1794 = fdiv float %1733, %1793 -> %1795 = fmul float %1720, %1794 -> %1796 = fneg float %1795 -> %1797 = insertelement <4 x float> zeroinitializer, float %1796, i32 0 -> %1798 = insertelement <4 x float> %1797, float 0.000000e+00, i32 1 -> %1799 = insertelement <4 x float> %1798, float 0.000000e+00, i32 2 -> %1800 = insertelement <4 x float> %1799, float 0.000000e+00, i32 3 -> %1801 = getelementptr float, float* %0, i32 0 -> %1802 = load float, float* %1801, align 4 -> %1803 = insertelement <4 x float> zeroinitializer, float %1802, i32 0 -> %1804 = insertelement <4 x float> %1803, float 0.000000e+00, i32 1 -> %1805 = insertelement <4 x float> %1804, float 0.000000e+00, i32 2 -> %1806 = insertelement <4 x float> %1805, float 0.000000e+00, i32 3 -> %1807 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1800, <4 x float> %1806, <4 x float> zeroinitializer) -> %1808 = extractelement <4 x float> %1807, i32 0 -> store float %1808, float* %1644, align 4 -> %1809 = bitcast i32 %52 to float -> %1810 = bitcast i32 %52 to float -> %1811 = fmul float %1809, %1810 -> %1812 = fadd float %1811, 0.000000e+00 -> %1813 = bitcast i32 %686 to float -> %1814 = bitcast i32 %686 to float -> %1815 = fmul float %1813, %1814 -> %1816 = fadd float %1812, %1815 -> %1817 = call float @llvm.sqrt.f32(float %1816) -> %1818 = fneg float %692 -> %1819 = fmul float %1817, %1818 -> %1820 = fmul float %1819, 0.000000e+00 -> %1821 = bitcast i32 %686 to float -> %1822 = fadd float %1821, %1820 -> %1823 = bitcast i32 %52 to float -> %1824 = bitcast i32 %52 to float -> %1825 = fmul float %1823, %1824 -> %1826 = fadd float %1825, 0.000000e+00 -> %1827 = bitcast i32 %686 to float -> %1828 = bitcast i32 %686 to float -> %1829 = fmul float %1827, %1828 -> %1830 = fadd float %1826, %1829 -> %1831 = call float @llvm.sqrt.f32(float %1830) -> %1832 = fneg float %692 -> %1833 = fmul float %1831, %1832 -> %1834 = bitcast i32 %52 to float -> %1835 = fadd float %1834, %1833 -> %1836 = bitcast i32 %52 to float -> %1837 = bitcast i32 %52 to float -> %1838 = fmul float %1836, %1837 -> %1839 = fadd float %1838, 0.000000e+00 -> %1840 = bitcast i32 %686 to float -> %1841 = bitcast i32 %686 to float -2316,2318c2317,2319 -< %1843 = fadd float %1842, 0.000000e+00 -< %1844 = bitcast i32 %57 to float -< %1845 = bitcast i32 %57 to float ---- -> %1843 = fadd float %1839, %1842 -> %1844 = call float @llvm.sqrt.f32(float %1843) -> %1845 = fneg float %692 -2320,2376c2321,2377 -< %1847 = fadd float %1843, %1846 -< %1848 = call float @llvm.sqrt.f32(float %1847) -< %1849 = fneg float %710 -< %1850 = fmul float %1848, %1849 -< %1851 = bitcast i32 %865 to float -< %1852 = fadd float %1851, %1850 -< %1853 = bitcast i32 %865 to float -< %1854 = bitcast i32 %865 to float -< %1855 = fmul float %1853, %1854 -< %1856 = fadd float %1855, 0.000000e+00 -< %1857 = bitcast i32 %57 to float -< %1858 = bitcast i32 %57 to float -< %1859 = fmul float %1857, %1858 -< %1860 = fadd float %1856, %1859 -< %1861 = call float @llvm.sqrt.f32(float %1860) -< %1862 = fneg float %710 -< %1863 = fmul float %1861, %1862 -< %1864 = bitcast i32 %865 to float -< %1865 = fadd float %1864, %1863 -< %1866 = fmul float %1852, %1865 -< %1867 = fadd float %1866, 0.000000e+00 -< %1868 = bitcast i32 %865 to float -< %1869 = bitcast i32 %865 to float -< %1870 = fmul float %1868, %1869 -< %1871 = fadd float %1870, 0.000000e+00 -< %1872 = bitcast i32 %57 to float -< %1873 = bitcast i32 %57 to float -< %1874 = fmul float %1872, %1873 -< %1875 = fadd float %1871, %1874 -< %1876 = call float @llvm.sqrt.f32(float %1875) -< %1877 = fneg float %710 -< %1878 = fmul float %1876, %1877 -< %1879 = fmul float %1878, 0.000000e+00 -< %1880 = bitcast i32 %57 to float -< %1881 = fadd float %1880, %1879 -< %1882 = bitcast i32 %865 to float -< %1883 = bitcast i32 %865 to float -< %1884 = fmul float %1882, %1883 -< %1885 = fadd float %1884, 0.000000e+00 -< %1886 = bitcast i32 %57 to float -< %1887 = bitcast i32 %57 to float -< %1888 = fmul float %1886, %1887 -< %1889 = fadd float %1885, %1888 -< %1890 = call float @llvm.sqrt.f32(float %1889) -< %1891 = fneg float %710 -< %1892 = fmul float %1890, %1891 -< %1893 = fmul float %1892, 0.000000e+00 -< %1894 = bitcast i32 %57 to float -< %1895 = fadd float %1894, %1893 -< %1896 = fmul float %1881, %1895 -< %1897 = fadd float %1867, %1896 -< %1898 = call float @llvm.sqrt.f32(float %1897) -< %1899 = fadd float %1898, 0.000000e+00 -< %1900 = fdiv float %1839, %1899 -< %1901 = fmul float %1900, 2.000000e+00 -< %1902 = bitcast i32 %865 to float -< %1903 = bitcast i32 %865 to float ---- -> %1847 = bitcast i32 %52 to float -> %1848 = fadd float %1847, %1846 -> %1849 = fmul float %1835, %1848 -> %1850 = fadd float %1849, 0.000000e+00 -> %1851 = bitcast i32 %52 to float -> %1852 = bitcast i32 %52 to float -> %1853 = fmul float %1851, %1852 -> %1854 = fadd float %1853, 0.000000e+00 -> %1855 = bitcast i32 %686 to float -> %1856 = bitcast i32 %686 to float -> %1857 = fmul float %1855, %1856 -> %1858 = fadd float %1854, %1857 -> %1859 = call float @llvm.sqrt.f32(float %1858) -> %1860 = fneg float %692 -> %1861 = fmul float %1859, %1860 -> %1862 = fmul float %1861, 0.000000e+00 -> %1863 = bitcast i32 %686 to float -> %1864 = fadd float %1863, %1862 -> %1865 = bitcast i32 %52 to float -> %1866 = bitcast i32 %52 to float -> %1867 = fmul float %1865, %1866 -> %1868 = fadd float %1867, 0.000000e+00 -> %1869 = bitcast i32 %686 to float -> %1870 = bitcast i32 %686 to float -> %1871 = fmul float %1869, %1870 -> %1872 = fadd float %1868, %1871 -> %1873 = call float @llvm.sqrt.f32(float %1872) -> %1874 = fneg float %692 -> %1875 = fmul float %1873, %1874 -> %1876 = fmul float %1875, 0.000000e+00 -> %1877 = bitcast i32 %686 to float -> %1878 = fadd float %1877, %1876 -> %1879 = fmul float %1864, %1878 -> %1880 = fadd float %1850, %1879 -> %1881 = call float @llvm.sqrt.f32(float %1880) -> %1882 = fadd float %1881, 0.000000e+00 -> %1883 = fdiv float %1822, %1882 -> %1884 = fmul float %1883, 2.000000e+00 -> %1885 = bitcast i32 %52 to float -> %1886 = bitcast i32 %52 to float -> %1887 = fmul float %1885, %1886 -> %1888 = fadd float %1887, 0.000000e+00 -> %1889 = bitcast i32 %686 to float -> %1890 = bitcast i32 %686 to float -> %1891 = fmul float %1889, %1890 -> %1892 = fadd float %1888, %1891 -> %1893 = call float @llvm.sqrt.f32(float %1892) -> %1894 = fneg float %692 -> %1895 = fmul float %1893, %1894 -> %1896 = bitcast i32 %52 to float -> %1897 = fadd float %1896, %1895 -> %1898 = bitcast i32 %52 to float -> %1899 = bitcast i32 %52 to float -> %1900 = fmul float %1898, %1899 -> %1901 = fadd float %1900, 0.000000e+00 -> %1902 = bitcast i32 %686 to float -> %1903 = bitcast i32 %686 to float -2378,2380c2379,2381 -< %1905 = fadd float %1904, 0.000000e+00 -< %1906 = bitcast i32 %57 to float -< %1907 = bitcast i32 %57 to float ---- -> %1905 = fadd float %1901, %1904 -> %1906 = call float @llvm.sqrt.f32(float %1905) -> %1907 = fneg float %692 -2382,2389c2383,2390 -< %1909 = fadd float %1905, %1908 -< %1910 = call float @llvm.sqrt.f32(float %1909) -< %1911 = fneg float %710 -< %1912 = fmul float %1910, %1911 -< %1913 = bitcast i32 %865 to float -< %1914 = fadd float %1913, %1912 -< %1915 = bitcast i32 %865 to float -< %1916 = bitcast i32 %865 to float ---- -> %1909 = bitcast i32 %52 to float -> %1910 = fadd float %1909, %1908 -> %1911 = bitcast i32 %52 to float -> %1912 = bitcast i32 %52 to float -> %1913 = fmul float %1911, %1912 -> %1914 = fadd float %1913, 0.000000e+00 -> %1915 = bitcast i32 %686 to float -> %1916 = bitcast i32 %686 to float -2391,2393c2392,2394 -< %1918 = fadd float %1917, 0.000000e+00 -< %1919 = bitcast i32 %57 to float -< %1920 = bitcast i32 %57 to float ---- -> %1918 = fadd float %1914, %1917 -> %1919 = call float @llvm.sqrt.f32(float %1918) -> %1920 = fneg float %692 -2395,2468c2396,2469 -< %1922 = fadd float %1918, %1921 -< %1923 = call float @llvm.sqrt.f32(float %1922) -< %1924 = fneg float %710 -< %1925 = fmul float %1923, %1924 -< %1926 = bitcast i32 %865 to float -< %1927 = fadd float %1926, %1925 -< %1928 = bitcast i32 %865 to float -< %1929 = bitcast i32 %865 to float -< %1930 = fmul float %1928, %1929 -< %1931 = fadd float %1930, 0.000000e+00 -< %1932 = bitcast i32 %57 to float -< %1933 = bitcast i32 %57 to float -< %1934 = fmul float %1932, %1933 -< %1935 = fadd float %1931, %1934 -< %1936 = call float @llvm.sqrt.f32(float %1935) -< %1937 = fneg float %710 -< %1938 = fmul float %1936, %1937 -< %1939 = bitcast i32 %865 to float -< %1940 = fadd float %1939, %1938 -< %1941 = fmul float %1927, %1940 -< %1942 = fadd float %1941, 0.000000e+00 -< %1943 = bitcast i32 %865 to float -< %1944 = bitcast i32 %865 to float -< %1945 = fmul float %1943, %1944 -< %1946 = fadd float %1945, 0.000000e+00 -< %1947 = bitcast i32 %57 to float -< %1948 = bitcast i32 %57 to float -< %1949 = fmul float %1947, %1948 -< %1950 = fadd float %1946, %1949 -< %1951 = call float @llvm.sqrt.f32(float %1950) -< %1952 = fneg float %710 -< %1953 = fmul float %1951, %1952 -< %1954 = fmul float %1953, 0.000000e+00 -< %1955 = bitcast i32 %57 to float -< %1956 = fadd float %1955, %1954 -< %1957 = bitcast i32 %865 to float -< %1958 = bitcast i32 %865 to float -< %1959 = fmul float %1957, %1958 -< %1960 = fadd float %1959, 0.000000e+00 -< %1961 = bitcast i32 %57 to float -< %1962 = bitcast i32 %57 to float -< %1963 = fmul float %1961, %1962 -< %1964 = fadd float %1960, %1963 -< %1965 = call float @llvm.sqrt.f32(float %1964) -< %1966 = fneg float %710 -< %1967 = fmul float %1965, %1966 -< %1968 = fmul float %1967, 0.000000e+00 -< %1969 = bitcast i32 %57 to float -< %1970 = fadd float %1969, %1968 -< %1971 = fmul float %1956, %1970 -< %1972 = fadd float %1942, %1971 -< %1973 = call float @llvm.sqrt.f32(float %1972) -< %1974 = fadd float %1973, 0.000000e+00 -< %1975 = fdiv float %1914, %1974 -< %1976 = fmul float %1901, %1975 -< %1977 = fneg float %1976 -< %1978 = fmul float %1977, %1819 -< %1979 = fadd float %1978, 0.000000e+00 -< %1980 = bitcast i32 %865 to float -< %1981 = bitcast i32 %865 to float -< %1982 = fmul float %1980, %1981 -< %1983 = fadd float %1982, 0.000000e+00 -< %1984 = bitcast i32 %57 to float -< %1985 = bitcast i32 %57 to float -< %1986 = fmul float %1984, %1985 -< %1987 = fadd float %1983, %1986 -< %1988 = call float @llvm.sqrt.f32(float %1987) -< %1989 = fneg float %710 -< %1990 = fmul float %1988, %1989 -< %1991 = fmul float %1990, 0.000000e+00 -< %1992 = bitcast i32 %57 to float -< %1993 = fadd float %1992, %1991 -< %1994 = bitcast i32 %865 to float -< %1995 = bitcast i32 %865 to float ---- -> %1922 = bitcast i32 %52 to float -> %1923 = fadd float %1922, %1921 -> %1924 = fmul float %1910, %1923 -> %1925 = fadd float %1924, 0.000000e+00 -> %1926 = bitcast i32 %52 to float -> %1927 = bitcast i32 %52 to float -> %1928 = fmul float %1926, %1927 -> %1929 = fadd float %1928, 0.000000e+00 -> %1930 = bitcast i32 %686 to float -> %1931 = bitcast i32 %686 to float -> %1932 = fmul float %1930, %1931 -> %1933 = fadd float %1929, %1932 -> %1934 = call float @llvm.sqrt.f32(float %1933) -> %1935 = fneg float %692 -> %1936 = fmul float %1934, %1935 -> %1937 = fmul float %1936, 0.000000e+00 -> %1938 = bitcast i32 %686 to float -> %1939 = fadd float %1938, %1937 -> %1940 = bitcast i32 %52 to float -> %1941 = bitcast i32 %52 to float -> %1942 = fmul float %1940, %1941 -> %1943 = fadd float %1942, 0.000000e+00 -> %1944 = bitcast i32 %686 to float -> %1945 = bitcast i32 %686 to float -> %1946 = fmul float %1944, %1945 -> %1947 = fadd float %1943, %1946 -> %1948 = call float @llvm.sqrt.f32(float %1947) -> %1949 = fneg float %692 -> %1950 = fmul float %1948, %1949 -> %1951 = fmul float %1950, 0.000000e+00 -> %1952 = bitcast i32 %686 to float -> %1953 = fadd float %1952, %1951 -> %1954 = fmul float %1939, %1953 -> %1955 = fadd float %1925, %1954 -> %1956 = call float @llvm.sqrt.f32(float %1955) -> %1957 = fadd float %1956, 0.000000e+00 -> %1958 = fdiv float %1897, %1957 -> %1959 = fmul float %1884, %1958 -> %1960 = fneg float %1959 -> %1961 = fmul float %1960, %1802 -> %1962 = fadd float %1961, 0.000000e+00 -> %1963 = bitcast i32 %52 to float -> %1964 = bitcast i32 %52 to float -> %1965 = fmul float %1963, %1964 -> %1966 = fadd float %1965, 0.000000e+00 -> %1967 = bitcast i32 %686 to float -> %1968 = bitcast i32 %686 to float -> %1969 = fmul float %1967, %1968 -> %1970 = fadd float %1966, %1969 -> %1971 = call float @llvm.sqrt.f32(float %1970) -> %1972 = fneg float %692 -> %1973 = fmul float %1971, %1972 -> %1974 = fmul float %1973, 0.000000e+00 -> %1975 = bitcast i32 %686 to float -> %1976 = fadd float %1975, %1974 -> %1977 = bitcast i32 %52 to float -> %1978 = bitcast i32 %52 to float -> %1979 = fmul float %1977, %1978 -> %1980 = fadd float %1979, 0.000000e+00 -> %1981 = bitcast i32 %686 to float -> %1982 = bitcast i32 %686 to float -> %1983 = fmul float %1981, %1982 -> %1984 = fadd float %1980, %1983 -> %1985 = call float @llvm.sqrt.f32(float %1984) -> %1986 = fneg float %692 -> %1987 = fmul float %1985, %1986 -> %1988 = bitcast i32 %52 to float -> %1989 = fadd float %1988, %1987 -> %1990 = bitcast i32 %52 to float -> %1991 = bitcast i32 %52 to float -> %1992 = fmul float %1990, %1991 -> %1993 = fadd float %1992, 0.000000e+00 -> %1994 = bitcast i32 %686 to float -> %1995 = bitcast i32 %686 to float -2470,2472c2471,2473 -< %1997 = fadd float %1996, 0.000000e+00 -< %1998 = bitcast i32 %57 to float -< %1999 = bitcast i32 %57 to float ---- -> %1997 = fadd float %1993, %1996 -> %1998 = call float @llvm.sqrt.f32(float %1997) -> %1999 = fneg float %692 -2474,2544c2475,2545 -< %2001 = fadd float %1997, %2000 -< %2002 = call float @llvm.sqrt.f32(float %2001) -< %2003 = fneg float %710 -< %2004 = fmul float %2002, %2003 -< %2005 = bitcast i32 %865 to float -< %2006 = fadd float %2005, %2004 -< %2007 = bitcast i32 %865 to float -< %2008 = bitcast i32 %865 to float -< %2009 = fmul float %2007, %2008 -< %2010 = fadd float %2009, 0.000000e+00 -< %2011 = bitcast i32 %57 to float -< %2012 = bitcast i32 %57 to float -< %2013 = fmul float %2011, %2012 -< %2014 = fadd float %2010, %2013 -< %2015 = call float @llvm.sqrt.f32(float %2014) -< %2016 = fneg float %710 -< %2017 = fmul float %2015, %2016 -< %2018 = bitcast i32 %865 to float -< %2019 = fadd float %2018, %2017 -< %2020 = fmul float %2006, %2019 -< %2021 = fadd float %2020, 0.000000e+00 -< %2022 = bitcast i32 %865 to float -< %2023 = bitcast i32 %865 to float -< %2024 = fmul float %2022, %2023 -< %2025 = fadd float %2024, 0.000000e+00 -< %2026 = bitcast i32 %57 to float -< %2027 = bitcast i32 %57 to float -< %2028 = fmul float %2026, %2027 -< %2029 = fadd float %2025, %2028 -< %2030 = call float @llvm.sqrt.f32(float %2029) -< %2031 = fneg float %710 -< %2032 = fmul float %2030, %2031 -< %2033 = fmul float %2032, 0.000000e+00 -< %2034 = bitcast i32 %57 to float -< %2035 = fadd float %2034, %2033 -< %2036 = bitcast i32 %865 to float -< %2037 = bitcast i32 %865 to float -< %2038 = fmul float %2036, %2037 -< %2039 = fadd float %2038, 0.000000e+00 -< %2040 = bitcast i32 %57 to float -< %2041 = bitcast i32 %57 to float -< %2042 = fmul float %2040, %2041 -< %2043 = fadd float %2039, %2042 -< %2044 = call float @llvm.sqrt.f32(float %2043) -< %2045 = fneg float %710 -< %2046 = fmul float %2044, %2045 -< %2047 = fmul float %2046, 0.000000e+00 -< %2048 = bitcast i32 %57 to float -< %2049 = fadd float %2048, %2047 -< %2050 = fmul float %2035, %2049 -< %2051 = fadd float %2021, %2050 -< %2052 = call float @llvm.sqrt.f32(float %2051) -< %2053 = fadd float %2052, 0.000000e+00 -< %2054 = fdiv float %1993, %2053 -< %2055 = fmul float %2054, 2.000000e+00 -< %2056 = bitcast i32 %865 to float -< %2057 = bitcast i32 %865 to float -< %2058 = fmul float %2056, %2057 -< %2059 = fadd float %2058, 0.000000e+00 -< %2060 = bitcast i32 %57 to float -< %2061 = bitcast i32 %57 to float -< %2062 = fmul float %2060, %2061 -< %2063 = fadd float %2059, %2062 -< %2064 = call float @llvm.sqrt.f32(float %2063) -< %2065 = fneg float %710 -< %2066 = fmul float %2064, %2065 -< %2067 = fmul float %2066, 0.000000e+00 -< %2068 = bitcast i32 %57 to float -< %2069 = fadd float %2068, %2067 -< %2070 = bitcast i32 %865 to float -< %2071 = bitcast i32 %865 to float ---- -> %2001 = bitcast i32 %52 to float -> %2002 = fadd float %2001, %2000 -> %2003 = fmul float %1989, %2002 -> %2004 = fadd float %2003, 0.000000e+00 -> %2005 = bitcast i32 %52 to float -> %2006 = bitcast i32 %52 to float -> %2007 = fmul float %2005, %2006 -> %2008 = fadd float %2007, 0.000000e+00 -> %2009 = bitcast i32 %686 to float -> %2010 = bitcast i32 %686 to float -> %2011 = fmul float %2009, %2010 -> %2012 = fadd float %2008, %2011 -> %2013 = call float @llvm.sqrt.f32(float %2012) -> %2014 = fneg float %692 -> %2015 = fmul float %2013, %2014 -> %2016 = fmul float %2015, 0.000000e+00 -> %2017 = bitcast i32 %686 to float -> %2018 = fadd float %2017, %2016 -> %2019 = bitcast i32 %52 to float -> %2020 = bitcast i32 %52 to float -> %2021 = fmul float %2019, %2020 -> %2022 = fadd float %2021, 0.000000e+00 -> %2023 = bitcast i32 %686 to float -> %2024 = bitcast i32 %686 to float -> %2025 = fmul float %2023, %2024 -> %2026 = fadd float %2022, %2025 -> %2027 = call float @llvm.sqrt.f32(float %2026) -> %2028 = fneg float %692 -> %2029 = fmul float %2027, %2028 -> %2030 = fmul float %2029, 0.000000e+00 -> %2031 = bitcast i32 %686 to float -> %2032 = fadd float %2031, %2030 -> %2033 = fmul float %2018, %2032 -> %2034 = fadd float %2004, %2033 -> %2035 = call float @llvm.sqrt.f32(float %2034) -> %2036 = fadd float %2035, 0.000000e+00 -> %2037 = fdiv float %1976, %2036 -> %2038 = fmul float %2037, 2.000000e+00 -> %2039 = bitcast i32 %52 to float -> %2040 = bitcast i32 %52 to float -> %2041 = fmul float %2039, %2040 -> %2042 = fadd float %2041, 0.000000e+00 -> %2043 = bitcast i32 %686 to float -> %2044 = bitcast i32 %686 to float -> %2045 = fmul float %2043, %2044 -> %2046 = fadd float %2042, %2045 -> %2047 = call float @llvm.sqrt.f32(float %2046) -> %2048 = fneg float %692 -> %2049 = fmul float %2047, %2048 -> %2050 = fmul float %2049, 0.000000e+00 -> %2051 = bitcast i32 %686 to float -> %2052 = fadd float %2051, %2050 -> %2053 = bitcast i32 %52 to float -> %2054 = bitcast i32 %52 to float -> %2055 = fmul float %2053, %2054 -> %2056 = fadd float %2055, 0.000000e+00 -> %2057 = bitcast i32 %686 to float -> %2058 = bitcast i32 %686 to float -> %2059 = fmul float %2057, %2058 -> %2060 = fadd float %2056, %2059 -> %2061 = call float @llvm.sqrt.f32(float %2060) -> %2062 = fneg float %692 -> %2063 = fmul float %2061, %2062 -> %2064 = bitcast i32 %52 to float -> %2065 = fadd float %2064, %2063 -> %2066 = bitcast i32 %52 to float -> %2067 = bitcast i32 %52 to float -> %2068 = fmul float %2066, %2067 -> %2069 = fadd float %2068, 0.000000e+00 -> %2070 = bitcast i32 %686 to float -> %2071 = bitcast i32 %686 to float -2546,2548c2547,2549 -< %2073 = fadd float %2072, 0.000000e+00 -< %2074 = bitcast i32 %57 to float -< %2075 = bitcast i32 %57 to float ---- -> %2073 = fadd float %2069, %2072 -> %2074 = call float @llvm.sqrt.f32(float %2073) -> %2075 = fneg float %692 -2550,2634c2551,2635 -< %2077 = fadd float %2073, %2076 -< %2078 = call float @llvm.sqrt.f32(float %2077) -< %2079 = fneg float %710 -< %2080 = fmul float %2078, %2079 -< %2081 = bitcast i32 %865 to float -< %2082 = fadd float %2081, %2080 -< %2083 = bitcast i32 %865 to float -< %2084 = bitcast i32 %865 to float -< %2085 = fmul float %2083, %2084 -< %2086 = fadd float %2085, 0.000000e+00 -< %2087 = bitcast i32 %57 to float -< %2088 = bitcast i32 %57 to float -< %2089 = fmul float %2087, %2088 -< %2090 = fadd float %2086, %2089 -< %2091 = call float @llvm.sqrt.f32(float %2090) -< %2092 = fneg float %710 -< %2093 = fmul float %2091, %2092 -< %2094 = bitcast i32 %865 to float -< %2095 = fadd float %2094, %2093 -< %2096 = fmul float %2082, %2095 -< %2097 = fadd float %2096, 0.000000e+00 -< %2098 = bitcast i32 %865 to float -< %2099 = bitcast i32 %865 to float -< %2100 = fmul float %2098, %2099 -< %2101 = fadd float %2100, 0.000000e+00 -< %2102 = bitcast i32 %57 to float -< %2103 = bitcast i32 %57 to float -< %2104 = fmul float %2102, %2103 -< %2105 = fadd float %2101, %2104 -< %2106 = call float @llvm.sqrt.f32(float %2105) -< %2107 = fneg float %710 -< %2108 = fmul float %2106, %2107 -< %2109 = fmul float %2108, 0.000000e+00 -< %2110 = bitcast i32 %57 to float -< %2111 = fadd float %2110, %2109 -< %2112 = bitcast i32 %865 to float -< %2113 = bitcast i32 %865 to float -< %2114 = fmul float %2112, %2113 -< %2115 = fadd float %2114, 0.000000e+00 -< %2116 = bitcast i32 %57 to float -< %2117 = bitcast i32 %57 to float -< %2118 = fmul float %2116, %2117 -< %2119 = fadd float %2115, %2118 -< %2120 = call float @llvm.sqrt.f32(float %2119) -< %2121 = fneg float %710 -< %2122 = fmul float %2120, %2121 -< %2123 = fmul float %2122, 0.000000e+00 -< %2124 = bitcast i32 %57 to float -< %2125 = fadd float %2124, %2123 -< %2126 = fmul float %2111, %2125 -< %2127 = fadd float %2097, %2126 -< %2128 = call float @llvm.sqrt.f32(float %2127) -< %2129 = fadd float %2128, 0.000000e+00 -< %2130 = fdiv float %2069, %2129 -< %2131 = fmul float %2055, %2130 -< %2132 = fsub float 1.000000e+00, %2131 -< %2133 = load float, float* %30, align 4 -< %2134 = fmul float %2132, %2133 -< %2135 = fadd float %1979, %2134 -< %2136 = insertelement <4 x float> zeroinitializer, float %2135, i32 0 -< %2137 = insertelement <4 x float> %2136, float 0.000000e+00, i32 1 -< %2138 = insertelement <4 x float> %2137, float 0.000000e+00, i32 2 -< %2139 = insertelement <4 x float> %2138, float 0.000000e+00, i32 3 -< %2140 = extractelement <4 x float> %2139, i32 0 -< store float %2140, float* %40, align 4 -< %2141 = extractelement <4 x float> %2139, i32 1 -< %2142 = getelementptr float, float* %2, i32 0 -< %2143 = getelementptr inbounds float, float* %2142, i64 3 -< store float %2141, float* %2143, align 4 -< %2144 = bitcast i32 %865 to float -< %2145 = bitcast i32 %865 to float -< %2146 = fmul float %2144, %2145 -< %2147 = fadd float %2146, 0.000000e+00 -< %2148 = bitcast i32 %57 to float -< %2149 = bitcast i32 %57 to float -< %2150 = fmul float %2148, %2149 -< %2151 = fadd float %2147, %2150 -< %2152 = call float @llvm.sqrt.f32(float %2151) -< %2153 = fneg float %710 -< %2154 = fmul float %2152, %2153 -< %2155 = fmul float %2154, 0.000000e+00 -< %2156 = bitcast i32 %57 to float -< %2157 = fadd float %2156, %2155 -< %2158 = bitcast i32 %865 to float -< %2159 = bitcast i32 %865 to float ---- -> %2077 = bitcast i32 %52 to float -> %2078 = fadd float %2077, %2076 -> %2079 = fmul float %2065, %2078 -> %2080 = fadd float %2079, 0.000000e+00 -> %2081 = bitcast i32 %52 to float -> %2082 = bitcast i32 %52 to float -> %2083 = fmul float %2081, %2082 -> %2084 = fadd float %2083, 0.000000e+00 -> %2085 = bitcast i32 %686 to float -> %2086 = bitcast i32 %686 to float -> %2087 = fmul float %2085, %2086 -> %2088 = fadd float %2084, %2087 -> %2089 = call float @llvm.sqrt.f32(float %2088) -> %2090 = fneg float %692 -> %2091 = fmul float %2089, %2090 -> %2092 = fmul float %2091, 0.000000e+00 -> %2093 = bitcast i32 %686 to float -> %2094 = fadd float %2093, %2092 -> %2095 = bitcast i32 %52 to float -> %2096 = bitcast i32 %52 to float -> %2097 = fmul float %2095, %2096 -> %2098 = fadd float %2097, 0.000000e+00 -> %2099 = bitcast i32 %686 to float -> %2100 = bitcast i32 %686 to float -> %2101 = fmul float %2099, %2100 -> %2102 = fadd float %2098, %2101 -> %2103 = call float @llvm.sqrt.f32(float %2102) -> %2104 = fneg float %692 -> %2105 = fmul float %2103, %2104 -> %2106 = fmul float %2105, 0.000000e+00 -> %2107 = bitcast i32 %686 to float -> %2108 = fadd float %2107, %2106 -> %2109 = fmul float %2094, %2108 -> %2110 = fadd float %2080, %2109 -> %2111 = call float @llvm.sqrt.f32(float %2110) -> %2112 = fadd float %2111, 0.000000e+00 -> %2113 = fdiv float %2052, %2112 -> %2114 = fmul float %2038, %2113 -> %2115 = fsub float 1.000000e+00, %2114 -> %2116 = load float, float* %1153, align 4 -> %2117 = fmul float %2115, %2116 -> %2118 = fadd float %1962, %2117 -> %2119 = insertelement <4 x float> zeroinitializer, float %2118, i32 0 -> %2120 = insertelement <4 x float> %2119, float 0.000000e+00, i32 1 -> %2121 = insertelement <4 x float> %2120, float 0.000000e+00, i32 2 -> %2122 = insertelement <4 x float> %2121, float 0.000000e+00, i32 3 -> %2123 = extractelement <4 x float> %2122, i32 0 -> store float %2123, float* %1644, align 4 -> %2124 = extractelement <4 x float> %2122, i32 1 -> %2125 = getelementptr float, float* %2, i32 0 -> %2126 = getelementptr inbounds float, float* %2125, i64 3 -> store float %2124, float* %2126, align 4 -> %2127 = bitcast i32 %52 to float -> %2128 = bitcast i32 %52 to float -> %2129 = fmul float %2127, %2128 -> %2130 = fadd float %2129, 0.000000e+00 -> %2131 = bitcast i32 %686 to float -> %2132 = bitcast i32 %686 to float -> %2133 = fmul float %2131, %2132 -> %2134 = fadd float %2130, %2133 -> %2135 = call float @llvm.sqrt.f32(float %2134) -> %2136 = fneg float %692 -> %2137 = fmul float %2135, %2136 -> %2138 = fmul float %2137, 0.000000e+00 -> %2139 = bitcast i32 %686 to float -> %2140 = fadd float %2139, %2138 -> %2141 = bitcast i32 %52 to float -> %2142 = bitcast i32 %52 to float -> %2143 = fmul float %2141, %2142 -> %2144 = fadd float %2143, 0.000000e+00 -> %2145 = bitcast i32 %686 to float -> %2146 = bitcast i32 %686 to float -> %2147 = fmul float %2145, %2146 -> %2148 = fadd float %2144, %2147 -> %2149 = call float @llvm.sqrt.f32(float %2148) -> %2150 = fneg float %692 -> %2151 = fmul float %2149, %2150 -> %2152 = bitcast i32 %52 to float -> %2153 = fadd float %2152, %2151 -> %2154 = bitcast i32 %52 to float -> %2155 = bitcast i32 %52 to float -> %2156 = fmul float %2154, %2155 -> %2157 = fadd float %2156, 0.000000e+00 -> %2158 = bitcast i32 %686 to float -> %2159 = bitcast i32 %686 to float -2636,2638c2637,2639 -< %2161 = fadd float %2160, 0.000000e+00 -< %2162 = bitcast i32 %57 to float -< %2163 = bitcast i32 %57 to float ---- -> %2161 = fadd float %2157, %2160 -> %2162 = call float @llvm.sqrt.f32(float %2161) -> %2163 = fneg float %692 -2640,2696c2641,2697 -< %2165 = fadd float %2161, %2164 -< %2166 = call float @llvm.sqrt.f32(float %2165) -< %2167 = fneg float %710 -< %2168 = fmul float %2166, %2167 -< %2169 = bitcast i32 %865 to float -< %2170 = fadd float %2169, %2168 -< %2171 = bitcast i32 %865 to float -< %2172 = bitcast i32 %865 to float -< %2173 = fmul float %2171, %2172 -< %2174 = fadd float %2173, 0.000000e+00 -< %2175 = bitcast i32 %57 to float -< %2176 = bitcast i32 %57 to float -< %2177 = fmul float %2175, %2176 -< %2178 = fadd float %2174, %2177 -< %2179 = call float @llvm.sqrt.f32(float %2178) -< %2180 = fneg float %710 -< %2181 = fmul float %2179, %2180 -< %2182 = bitcast i32 %865 to float -< %2183 = fadd float %2182, %2181 -< %2184 = fmul float %2170, %2183 -< %2185 = fadd float %2184, 0.000000e+00 -< %2186 = bitcast i32 %865 to float -< %2187 = bitcast i32 %865 to float -< %2188 = fmul float %2186, %2187 -< %2189 = fadd float %2188, 0.000000e+00 -< %2190 = bitcast i32 %57 to float -< %2191 = bitcast i32 %57 to float -< %2192 = fmul float %2190, %2191 -< %2193 = fadd float %2189, %2192 -< %2194 = call float @llvm.sqrt.f32(float %2193) -< %2195 = fneg float %710 -< %2196 = fmul float %2194, %2195 -< %2197 = fmul float %2196, 0.000000e+00 -< %2198 = bitcast i32 %57 to float -< %2199 = fadd float %2198, %2197 -< %2200 = bitcast i32 %865 to float -< %2201 = bitcast i32 %865 to float -< %2202 = fmul float %2200, %2201 -< %2203 = fadd float %2202, 0.000000e+00 -< %2204 = bitcast i32 %57 to float -< %2205 = bitcast i32 %57 to float -< %2206 = fmul float %2204, %2205 -< %2207 = fadd float %2203, %2206 -< %2208 = call float @llvm.sqrt.f32(float %2207) -< %2209 = fneg float %710 -< %2210 = fmul float %2208, %2209 -< %2211 = fmul float %2210, 0.000000e+00 -< %2212 = bitcast i32 %57 to float -< %2213 = fadd float %2212, %2211 -< %2214 = fmul float %2199, %2213 -< %2215 = fadd float %2185, %2214 -< %2216 = call float @llvm.sqrt.f32(float %2215) -< %2217 = fadd float %2216, 0.000000e+00 -< %2218 = fdiv float %2157, %2217 -< %2219 = fmul float %2218, 2.000000e+00 -< %2220 = bitcast i32 %865 to float -< %2221 = bitcast i32 %865 to float ---- -> %2165 = bitcast i32 %52 to float -> %2166 = fadd float %2165, %2164 -> %2167 = fmul float %2153, %2166 -> %2168 = fadd float %2167, 0.000000e+00 -> %2169 = bitcast i32 %52 to float -> %2170 = bitcast i32 %52 to float -> %2171 = fmul float %2169, %2170 -> %2172 = fadd float %2171, 0.000000e+00 -> %2173 = bitcast i32 %686 to float -> %2174 = bitcast i32 %686 to float -> %2175 = fmul float %2173, %2174 -> %2176 = fadd float %2172, %2175 -> %2177 = call float @llvm.sqrt.f32(float %2176) -> %2178 = fneg float %692 -> %2179 = fmul float %2177, %2178 -> %2180 = fmul float %2179, 0.000000e+00 -> %2181 = bitcast i32 %686 to float -> %2182 = fadd float %2181, %2180 -> %2183 = bitcast i32 %52 to float -> %2184 = bitcast i32 %52 to float -> %2185 = fmul float %2183, %2184 -> %2186 = fadd float %2185, 0.000000e+00 -> %2187 = bitcast i32 %686 to float -> %2188 = bitcast i32 %686 to float -> %2189 = fmul float %2187, %2188 -> %2190 = fadd float %2186, %2189 -> %2191 = call float @llvm.sqrt.f32(float %2190) -> %2192 = fneg float %692 -> %2193 = fmul float %2191, %2192 -> %2194 = fmul float %2193, 0.000000e+00 -> %2195 = bitcast i32 %686 to float -> %2196 = fadd float %2195, %2194 -> %2197 = fmul float %2182, %2196 -> %2198 = fadd float %2168, %2197 -> %2199 = call float @llvm.sqrt.f32(float %2198) -> %2200 = fadd float %2199, 0.000000e+00 -> %2201 = fdiv float %2140, %2200 -> %2202 = fmul float %2201, 2.000000e+00 -> %2203 = bitcast i32 %52 to float -> %2204 = bitcast i32 %52 to float -> %2205 = fmul float %2203, %2204 -> %2206 = fadd float %2205, 0.000000e+00 -> %2207 = bitcast i32 %686 to float -> %2208 = bitcast i32 %686 to float -> %2209 = fmul float %2207, %2208 -> %2210 = fadd float %2206, %2209 -> %2211 = call float @llvm.sqrt.f32(float %2210) -> %2212 = fneg float %692 -> %2213 = fmul float %2211, %2212 -> %2214 = bitcast i32 %52 to float -> %2215 = fadd float %2214, %2213 -> %2216 = bitcast i32 %52 to float -> %2217 = bitcast i32 %52 to float -> %2218 = fmul float %2216, %2217 -> %2219 = fadd float %2218, 0.000000e+00 -> %2220 = bitcast i32 %686 to float -> %2221 = bitcast i32 %686 to float -2698,2700c2699,2701 -< %2223 = fadd float %2222, 0.000000e+00 -< %2224 = bitcast i32 %57 to float -< %2225 = bitcast i32 %57 to float ---- -> %2223 = fadd float %2219, %2222 -> %2224 = call float @llvm.sqrt.f32(float %2223) -> %2225 = fneg float %692 -2702,2709c2703,2710 -< %2227 = fadd float %2223, %2226 -< %2228 = call float @llvm.sqrt.f32(float %2227) -< %2229 = fneg float %710 -< %2230 = fmul float %2228, %2229 -< %2231 = bitcast i32 %865 to float -< %2232 = fadd float %2231, %2230 -< %2233 = bitcast i32 %865 to float -< %2234 = bitcast i32 %865 to float ---- -> %2227 = bitcast i32 %52 to float -> %2228 = fadd float %2227, %2226 -> %2229 = bitcast i32 %52 to float -> %2230 = bitcast i32 %52 to float -> %2231 = fmul float %2229, %2230 -> %2232 = fadd float %2231, 0.000000e+00 -> %2233 = bitcast i32 %686 to float -> %2234 = bitcast i32 %686 to float -2711,2713c2712,2714 -< %2236 = fadd float %2235, 0.000000e+00 -< %2237 = bitcast i32 %57 to float -< %2238 = bitcast i32 %57 to float ---- -> %2236 = fadd float %2232, %2235 -> %2237 = call float @llvm.sqrt.f32(float %2236) -> %2238 = fneg float %692 -2715,2798c2716,2799 -< %2240 = fadd float %2236, %2239 -< %2241 = call float @llvm.sqrt.f32(float %2240) -< %2242 = fneg float %710 -< %2243 = fmul float %2241, %2242 -< %2244 = bitcast i32 %865 to float -< %2245 = fadd float %2244, %2243 -< %2246 = bitcast i32 %865 to float -< %2247 = bitcast i32 %865 to float -< %2248 = fmul float %2246, %2247 -< %2249 = fadd float %2248, 0.000000e+00 -< %2250 = bitcast i32 %57 to float -< %2251 = bitcast i32 %57 to float -< %2252 = fmul float %2250, %2251 -< %2253 = fadd float %2249, %2252 -< %2254 = call float @llvm.sqrt.f32(float %2253) -< %2255 = fneg float %710 -< %2256 = fmul float %2254, %2255 -< %2257 = bitcast i32 %865 to float -< %2258 = fadd float %2257, %2256 -< %2259 = fmul float %2245, %2258 -< %2260 = fadd float %2259, 0.000000e+00 -< %2261 = bitcast i32 %865 to float -< %2262 = bitcast i32 %865 to float -< %2263 = fmul float %2261, %2262 -< %2264 = fadd float %2263, 0.000000e+00 -< %2265 = bitcast i32 %57 to float -< %2266 = bitcast i32 %57 to float -< %2267 = fmul float %2265, %2266 -< %2268 = fadd float %2264, %2267 -< %2269 = call float @llvm.sqrt.f32(float %2268) -< %2270 = fneg float %710 -< %2271 = fmul float %2269, %2270 -< %2272 = fmul float %2271, 0.000000e+00 -< %2273 = bitcast i32 %57 to float -< %2274 = fadd float %2273, %2272 -< %2275 = bitcast i32 %865 to float -< %2276 = bitcast i32 %865 to float -< %2277 = fmul float %2275, %2276 -< %2278 = fadd float %2277, 0.000000e+00 -< %2279 = bitcast i32 %57 to float -< %2280 = bitcast i32 %57 to float -< %2281 = fmul float %2279, %2280 -< %2282 = fadd float %2278, %2281 -< %2283 = call float @llvm.sqrt.f32(float %2282) -< %2284 = fneg float %710 -< %2285 = fmul float %2283, %2284 -< %2286 = fmul float %2285, 0.000000e+00 -< %2287 = bitcast i32 %57 to float -< %2288 = fadd float %2287, %2286 -< %2289 = fmul float %2274, %2288 -< %2290 = fadd float %2260, %2289 -< %2291 = call float @llvm.sqrt.f32(float %2290) -< %2292 = fadd float %2291, 0.000000e+00 -< %2293 = fdiv float %2232, %2292 -< %2294 = fmul float %2219, %2293 -< %2295 = fneg float %2294 -< %2296 = insertelement <4 x float> zeroinitializer, float %2295, i32 0 -< %2297 = insertelement <4 x float> %2296, float 0.000000e+00, i32 1 -< %2298 = insertelement <4 x float> %2297, float 0.000000e+00, i32 2 -< %2299 = insertelement <4 x float> %2298, float 0.000000e+00, i32 3 -< %2300 = load float, float* %1338, align 4 -< %2301 = insertelement <4 x float> zeroinitializer, float %2300, i32 0 -< %2302 = insertelement <4 x float> %2301, float 0.000000e+00, i32 1 -< %2303 = insertelement <4 x float> %2302, float 0.000000e+00, i32 2 -< %2304 = insertelement <4 x float> %2303, float 0.000000e+00, i32 3 -< %2305 = call <4 x float> @llvm.fma.v4f32(<4 x float> %2299, <4 x float> %2304, <4 x float> zeroinitializer) -< %2306 = extractelement <4 x float> %2305, i32 0 -< store float %2306, float* %2143, align 4 -< %2307 = bitcast i32 %865 to float -< %2308 = bitcast i32 %865 to float -< %2309 = fmul float %2307, %2308 -< %2310 = fadd float %2309, 0.000000e+00 -< %2311 = bitcast i32 %57 to float -< %2312 = bitcast i32 %57 to float -< %2313 = fmul float %2311, %2312 -< %2314 = fadd float %2310, %2313 -< %2315 = call float @llvm.sqrt.f32(float %2314) -< %2316 = fneg float %710 -< %2317 = fmul float %2315, %2316 -< %2318 = fmul float %2317, 0.000000e+00 -< %2319 = bitcast i32 %57 to float -< %2320 = fadd float %2319, %2318 -< %2321 = bitcast i32 %865 to float -< %2322 = bitcast i32 %865 to float ---- -> %2240 = bitcast i32 %52 to float -> %2241 = fadd float %2240, %2239 -> %2242 = fmul float %2228, %2241 -> %2243 = fadd float %2242, 0.000000e+00 -> %2244 = bitcast i32 %52 to float -> %2245 = bitcast i32 %52 to float -> %2246 = fmul float %2244, %2245 -> %2247 = fadd float %2246, 0.000000e+00 -> %2248 = bitcast i32 %686 to float -> %2249 = bitcast i32 %686 to float -> %2250 = fmul float %2248, %2249 -> %2251 = fadd float %2247, %2250 -> %2252 = call float @llvm.sqrt.f32(float %2251) -> %2253 = fneg float %692 -> %2254 = fmul float %2252, %2253 -> %2255 = fmul float %2254, 0.000000e+00 -> %2256 = bitcast i32 %686 to float -> %2257 = fadd float %2256, %2255 -> %2258 = bitcast i32 %52 to float -> %2259 = bitcast i32 %52 to float -> %2260 = fmul float %2258, %2259 -> %2261 = fadd float %2260, 0.000000e+00 -> %2262 = bitcast i32 %686 to float -> %2263 = bitcast i32 %686 to float -> %2264 = fmul float %2262, %2263 -> %2265 = fadd float %2261, %2264 -> %2266 = call float @llvm.sqrt.f32(float %2265) -> %2267 = fneg float %692 -> %2268 = fmul float %2266, %2267 -> %2269 = fmul float %2268, 0.000000e+00 -> %2270 = bitcast i32 %686 to float -> %2271 = fadd float %2270, %2269 -> %2272 = fmul float %2257, %2271 -> %2273 = fadd float %2243, %2272 -> %2274 = call float @llvm.sqrt.f32(float %2273) -> %2275 = fadd float %2274, 0.000000e+00 -> %2276 = fdiv float %2215, %2275 -> %2277 = fmul float %2202, %2276 -> %2278 = fneg float %2277 -> %2279 = insertelement <4 x float> zeroinitializer, float %2278, i32 0 -> %2280 = insertelement <4 x float> %2279, float 0.000000e+00, i32 1 -> %2281 = insertelement <4 x float> %2280, float 0.000000e+00, i32 2 -> %2282 = insertelement <4 x float> %2281, float 0.000000e+00, i32 3 -> %2283 = load float, float* %1321, align 4 -> %2284 = insertelement <4 x float> zeroinitializer, float %2283, i32 0 -> %2285 = insertelement <4 x float> %2284, float 0.000000e+00, i32 1 -> %2286 = insertelement <4 x float> %2285, float 0.000000e+00, i32 2 -> %2287 = insertelement <4 x float> %2286, float 0.000000e+00, i32 3 -> %2288 = call <4 x float> @llvm.fma.v4f32(<4 x float> %2282, <4 x float> %2287, <4 x float> zeroinitializer) -> %2289 = extractelement <4 x float> %2288, i32 0 -> store float %2289, float* %2126, align 4 -> %2290 = bitcast i32 %52 to float -> %2291 = bitcast i32 %52 to float -> %2292 = fmul float %2290, %2291 -> %2293 = fadd float %2292, 0.000000e+00 -> %2294 = bitcast i32 %686 to float -> %2295 = bitcast i32 %686 to float -> %2296 = fmul float %2294, %2295 -> %2297 = fadd float %2293, %2296 -> %2298 = call float @llvm.sqrt.f32(float %2297) -> %2299 = fneg float %692 -> %2300 = fmul float %2298, %2299 -> %2301 = fmul float %2300, 0.000000e+00 -> %2302 = bitcast i32 %686 to float -> %2303 = fadd float %2302, %2301 -> %2304 = bitcast i32 %52 to float -> %2305 = bitcast i32 %52 to float -> %2306 = fmul float %2304, %2305 -> %2307 = fadd float %2306, 0.000000e+00 -> %2308 = bitcast i32 %686 to float -> %2309 = bitcast i32 %686 to float -> %2310 = fmul float %2308, %2309 -> %2311 = fadd float %2307, %2310 -> %2312 = call float @llvm.sqrt.f32(float %2311) -> %2313 = fneg float %692 -> %2314 = fmul float %2312, %2313 -> %2315 = bitcast i32 %52 to float -> %2316 = fadd float %2315, %2314 -> %2317 = bitcast i32 %52 to float -> %2318 = bitcast i32 %52 to float -> %2319 = fmul float %2317, %2318 -> %2320 = fadd float %2319, 0.000000e+00 -> %2321 = bitcast i32 %686 to float -> %2322 = bitcast i32 %686 to float -2800,2802c2801,2803 -< %2324 = fadd float %2323, 0.000000e+00 -< %2325 = bitcast i32 %57 to float -< %2326 = bitcast i32 %57 to float ---- -> %2324 = fadd float %2320, %2323 -> %2325 = call float @llvm.sqrt.f32(float %2324) -> %2326 = fneg float %692 -2804,2860c2805,2861 -< %2328 = fadd float %2324, %2327 -< %2329 = call float @llvm.sqrt.f32(float %2328) -< %2330 = fneg float %710 -< %2331 = fmul float %2329, %2330 -< %2332 = bitcast i32 %865 to float -< %2333 = fadd float %2332, %2331 -< %2334 = bitcast i32 %865 to float -< %2335 = bitcast i32 %865 to float -< %2336 = fmul float %2334, %2335 -< %2337 = fadd float %2336, 0.000000e+00 -< %2338 = bitcast i32 %57 to float -< %2339 = bitcast i32 %57 to float -< %2340 = fmul float %2338, %2339 -< %2341 = fadd float %2337, %2340 -< %2342 = call float @llvm.sqrt.f32(float %2341) -< %2343 = fneg float %710 -< %2344 = fmul float %2342, %2343 -< %2345 = bitcast i32 %865 to float -< %2346 = fadd float %2345, %2344 -< %2347 = fmul float %2333, %2346 -< %2348 = fadd float %2347, 0.000000e+00 -< %2349 = bitcast i32 %865 to float -< %2350 = bitcast i32 %865 to float -< %2351 = fmul float %2349, %2350 -< %2352 = fadd float %2351, 0.000000e+00 -< %2353 = bitcast i32 %57 to float -< %2354 = bitcast i32 %57 to float -< %2355 = fmul float %2353, %2354 -< %2356 = fadd float %2352, %2355 -< %2357 = call float @llvm.sqrt.f32(float %2356) -< %2358 = fneg float %710 -< %2359 = fmul float %2357, %2358 -< %2360 = fmul float %2359, 0.000000e+00 -< %2361 = bitcast i32 %57 to float -< %2362 = fadd float %2361, %2360 -< %2363 = bitcast i32 %865 to float -< %2364 = bitcast i32 %865 to float -< %2365 = fmul float %2363, %2364 -< %2366 = fadd float %2365, 0.000000e+00 -< %2367 = bitcast i32 %57 to float -< %2368 = bitcast i32 %57 to float -< %2369 = fmul float %2367, %2368 -< %2370 = fadd float %2366, %2369 -< %2371 = call float @llvm.sqrt.f32(float %2370) -< %2372 = fneg float %710 -< %2373 = fmul float %2371, %2372 -< %2374 = fmul float %2373, 0.000000e+00 -< %2375 = bitcast i32 %57 to float -< %2376 = fadd float %2375, %2374 -< %2377 = fmul float %2362, %2376 -< %2378 = fadd float %2348, %2377 -< %2379 = call float @llvm.sqrt.f32(float %2378) -< %2380 = fadd float %2379, 0.000000e+00 -< %2381 = fdiv float %2320, %2380 -< %2382 = fmul float %2381, 2.000000e+00 -< %2383 = bitcast i32 %865 to float -< %2384 = bitcast i32 %865 to float ---- -> %2328 = bitcast i32 %52 to float -> %2329 = fadd float %2328, %2327 -> %2330 = fmul float %2316, %2329 -> %2331 = fadd float %2330, 0.000000e+00 -> %2332 = bitcast i32 %52 to float -> %2333 = bitcast i32 %52 to float -> %2334 = fmul float %2332, %2333 -> %2335 = fadd float %2334, 0.000000e+00 -> %2336 = bitcast i32 %686 to float -> %2337 = bitcast i32 %686 to float -> %2338 = fmul float %2336, %2337 -> %2339 = fadd float %2335, %2338 -> %2340 = call float @llvm.sqrt.f32(float %2339) -> %2341 = fneg float %692 -> %2342 = fmul float %2340, %2341 -> %2343 = fmul float %2342, 0.000000e+00 -> %2344 = bitcast i32 %686 to float -> %2345 = fadd float %2344, %2343 -> %2346 = bitcast i32 %52 to float -> %2347 = bitcast i32 %52 to float -> %2348 = fmul float %2346, %2347 -> %2349 = fadd float %2348, 0.000000e+00 -> %2350 = bitcast i32 %686 to float -> %2351 = bitcast i32 %686 to float -> %2352 = fmul float %2350, %2351 -> %2353 = fadd float %2349, %2352 -> %2354 = call float @llvm.sqrt.f32(float %2353) -> %2355 = fneg float %692 -> %2356 = fmul float %2354, %2355 -> %2357 = fmul float %2356, 0.000000e+00 -> %2358 = bitcast i32 %686 to float -> %2359 = fadd float %2358, %2357 -> %2360 = fmul float %2345, %2359 -> %2361 = fadd float %2331, %2360 -> %2362 = call float @llvm.sqrt.f32(float %2361) -> %2363 = fadd float %2362, 0.000000e+00 -> %2364 = fdiv float %2303, %2363 -> %2365 = fmul float %2364, 2.000000e+00 -> %2366 = bitcast i32 %52 to float -> %2367 = bitcast i32 %52 to float -> %2368 = fmul float %2366, %2367 -> %2369 = fadd float %2368, 0.000000e+00 -> %2370 = bitcast i32 %686 to float -> %2371 = bitcast i32 %686 to float -> %2372 = fmul float %2370, %2371 -> %2373 = fadd float %2369, %2372 -> %2374 = call float @llvm.sqrt.f32(float %2373) -> %2375 = fneg float %692 -> %2376 = fmul float %2374, %2375 -> %2377 = bitcast i32 %52 to float -> %2378 = fadd float %2377, %2376 -> %2379 = bitcast i32 %52 to float -> %2380 = bitcast i32 %52 to float -> %2381 = fmul float %2379, %2380 -> %2382 = fadd float %2381, 0.000000e+00 -> %2383 = bitcast i32 %686 to float -> %2384 = bitcast i32 %686 to float -2862,2864c2863,2865 -< %2386 = fadd float %2385, 0.000000e+00 -< %2387 = bitcast i32 %57 to float -< %2388 = bitcast i32 %57 to float ---- -> %2386 = fadd float %2382, %2385 -> %2387 = call float @llvm.sqrt.f32(float %2386) -> %2388 = fneg float %692 -2866,2873c2867,2874 -< %2390 = fadd float %2386, %2389 -< %2391 = call float @llvm.sqrt.f32(float %2390) -< %2392 = fneg float %710 -< %2393 = fmul float %2391, %2392 -< %2394 = bitcast i32 %865 to float -< %2395 = fadd float %2394, %2393 -< %2396 = bitcast i32 %865 to float -< %2397 = bitcast i32 %865 to float ---- -> %2390 = bitcast i32 %52 to float -> %2391 = fadd float %2390, %2389 -> %2392 = bitcast i32 %52 to float -> %2393 = bitcast i32 %52 to float -> %2394 = fmul float %2392, %2393 -> %2395 = fadd float %2394, 0.000000e+00 -> %2396 = bitcast i32 %686 to float -> %2397 = bitcast i32 %686 to float -2875,2877c2876,2878 -< %2399 = fadd float %2398, 0.000000e+00 -< %2400 = bitcast i32 %57 to float -< %2401 = bitcast i32 %57 to float ---- -> %2399 = fadd float %2395, %2398 -> %2400 = call float @llvm.sqrt.f32(float %2399) -> %2401 = fneg float %692 -2879,2952c2880,2953 -< %2403 = fadd float %2399, %2402 -< %2404 = call float @llvm.sqrt.f32(float %2403) -< %2405 = fneg float %710 -< %2406 = fmul float %2404, %2405 -< %2407 = bitcast i32 %865 to float -< %2408 = fadd float %2407, %2406 -< %2409 = bitcast i32 %865 to float -< %2410 = bitcast i32 %865 to float -< %2411 = fmul float %2409, %2410 -< %2412 = fadd float %2411, 0.000000e+00 -< %2413 = bitcast i32 %57 to float -< %2414 = bitcast i32 %57 to float -< %2415 = fmul float %2413, %2414 -< %2416 = fadd float %2412, %2415 -< %2417 = call float @llvm.sqrt.f32(float %2416) -< %2418 = fneg float %710 -< %2419 = fmul float %2417, %2418 -< %2420 = bitcast i32 %865 to float -< %2421 = fadd float %2420, %2419 -< %2422 = fmul float %2408, %2421 -< %2423 = fadd float %2422, 0.000000e+00 -< %2424 = bitcast i32 %865 to float -< %2425 = bitcast i32 %865 to float -< %2426 = fmul float %2424, %2425 -< %2427 = fadd float %2426, 0.000000e+00 -< %2428 = bitcast i32 %57 to float -< %2429 = bitcast i32 %57 to float -< %2430 = fmul float %2428, %2429 -< %2431 = fadd float %2427, %2430 -< %2432 = call float @llvm.sqrt.f32(float %2431) -< %2433 = fneg float %710 -< %2434 = fmul float %2432, %2433 -< %2435 = fmul float %2434, 0.000000e+00 -< %2436 = bitcast i32 %57 to float -< %2437 = fadd float %2436, %2435 -< %2438 = bitcast i32 %865 to float -< %2439 = bitcast i32 %865 to float -< %2440 = fmul float %2438, %2439 -< %2441 = fadd float %2440, 0.000000e+00 -< %2442 = bitcast i32 %57 to float -< %2443 = bitcast i32 %57 to float -< %2444 = fmul float %2442, %2443 -< %2445 = fadd float %2441, %2444 -< %2446 = call float @llvm.sqrt.f32(float %2445) -< %2447 = fneg float %710 -< %2448 = fmul float %2446, %2447 -< %2449 = fmul float %2448, 0.000000e+00 -< %2450 = bitcast i32 %57 to float -< %2451 = fadd float %2450, %2449 -< %2452 = fmul float %2437, %2451 -< %2453 = fadd float %2423, %2452 -< %2454 = call float @llvm.sqrt.f32(float %2453) -< %2455 = fadd float %2454, 0.000000e+00 -< %2456 = fdiv float %2395, %2455 -< %2457 = fmul float %2382, %2456 -< %2458 = fneg float %2457 -< %2459 = fmul float %2458, %2300 -< %2460 = fadd float %2459, 0.000000e+00 -< %2461 = bitcast i32 %865 to float -< %2462 = bitcast i32 %865 to float -< %2463 = fmul float %2461, %2462 -< %2464 = fadd float %2463, 0.000000e+00 -< %2465 = bitcast i32 %57 to float -< %2466 = bitcast i32 %57 to float -< %2467 = fmul float %2465, %2466 -< %2468 = fadd float %2464, %2467 -< %2469 = call float @llvm.sqrt.f32(float %2468) -< %2470 = fneg float %710 -< %2471 = fmul float %2469, %2470 -< %2472 = fmul float %2471, 0.000000e+00 -< %2473 = bitcast i32 %57 to float -< %2474 = fadd float %2473, %2472 -< %2475 = bitcast i32 %865 to float -< %2476 = bitcast i32 %865 to float ---- -> %2403 = bitcast i32 %52 to float -> %2404 = fadd float %2403, %2402 -> %2405 = fmul float %2391, %2404 -> %2406 = fadd float %2405, 0.000000e+00 -> %2407 = bitcast i32 %52 to float -> %2408 = bitcast i32 %52 to float -> %2409 = fmul float %2407, %2408 -> %2410 = fadd float %2409, 0.000000e+00 -> %2411 = bitcast i32 %686 to float -> %2412 = bitcast i32 %686 to float -> %2413 = fmul float %2411, %2412 -> %2414 = fadd float %2410, %2413 -> %2415 = call float @llvm.sqrt.f32(float %2414) -> %2416 = fneg float %692 -> %2417 = fmul float %2415, %2416 -> %2418 = fmul float %2417, 0.000000e+00 -> %2419 = bitcast i32 %686 to float -> %2420 = fadd float %2419, %2418 -> %2421 = bitcast i32 %52 to float -> %2422 = bitcast i32 %52 to float -> %2423 = fmul float %2421, %2422 -> %2424 = fadd float %2423, 0.000000e+00 -> %2425 = bitcast i32 %686 to float -> %2426 = bitcast i32 %686 to float -> %2427 = fmul float %2425, %2426 -> %2428 = fadd float %2424, %2427 -> %2429 = call float @llvm.sqrt.f32(float %2428) -> %2430 = fneg float %692 -> %2431 = fmul float %2429, %2430 -> %2432 = fmul float %2431, 0.000000e+00 -> %2433 = bitcast i32 %686 to float -> %2434 = fadd float %2433, %2432 -> %2435 = fmul float %2420, %2434 -> %2436 = fadd float %2406, %2435 -> %2437 = call float @llvm.sqrt.f32(float %2436) -> %2438 = fadd float %2437, 0.000000e+00 -> %2439 = fdiv float %2378, %2438 -> %2440 = fmul float %2365, %2439 -> %2441 = fneg float %2440 -> %2442 = fmul float %2441, %2283 -> %2443 = fadd float %2442, 0.000000e+00 -> %2444 = bitcast i32 %52 to float -> %2445 = bitcast i32 %52 to float -> %2446 = fmul float %2444, %2445 -> %2447 = fadd float %2446, 0.000000e+00 -> %2448 = bitcast i32 %686 to float -> %2449 = bitcast i32 %686 to float -> %2450 = fmul float %2448, %2449 -> %2451 = fadd float %2447, %2450 -> %2452 = call float @llvm.sqrt.f32(float %2451) -> %2453 = fneg float %692 -> %2454 = fmul float %2452, %2453 -> %2455 = fmul float %2454, 0.000000e+00 -> %2456 = bitcast i32 %686 to float -> %2457 = fadd float %2456, %2455 -> %2458 = bitcast i32 %52 to float -> %2459 = bitcast i32 %52 to float -> %2460 = fmul float %2458, %2459 -> %2461 = fadd float %2460, 0.000000e+00 -> %2462 = bitcast i32 %686 to float -> %2463 = bitcast i32 %686 to float -> %2464 = fmul float %2462, %2463 -> %2465 = fadd float %2461, %2464 -> %2466 = call float @llvm.sqrt.f32(float %2465) -> %2467 = fneg float %692 -> %2468 = fmul float %2466, %2467 -> %2469 = bitcast i32 %52 to float -> %2470 = fadd float %2469, %2468 -> %2471 = bitcast i32 %52 to float -> %2472 = bitcast i32 %52 to float -> %2473 = fmul float %2471, %2472 -> %2474 = fadd float %2473, 0.000000e+00 -> %2475 = bitcast i32 %686 to float -> %2476 = bitcast i32 %686 to float -2954,2956c2955,2957 -< %2478 = fadd float %2477, 0.000000e+00 -< %2479 = bitcast i32 %57 to float -< %2480 = bitcast i32 %57 to float ---- -> %2478 = fadd float %2474, %2477 -> %2479 = call float @llvm.sqrt.f32(float %2478) -> %2480 = fneg float %692 -2958,3028c2959,3029 -< %2482 = fadd float %2478, %2481 -< %2483 = call float @llvm.sqrt.f32(float %2482) -< %2484 = fneg float %710 -< %2485 = fmul float %2483, %2484 -< %2486 = bitcast i32 %865 to float -< %2487 = fadd float %2486, %2485 -< %2488 = bitcast i32 %865 to float -< %2489 = bitcast i32 %865 to float -< %2490 = fmul float %2488, %2489 -< %2491 = fadd float %2490, 0.000000e+00 -< %2492 = bitcast i32 %57 to float -< %2493 = bitcast i32 %57 to float -< %2494 = fmul float %2492, %2493 -< %2495 = fadd float %2491, %2494 -< %2496 = call float @llvm.sqrt.f32(float %2495) -< %2497 = fneg float %710 -< %2498 = fmul float %2496, %2497 -< %2499 = bitcast i32 %865 to float -< %2500 = fadd float %2499, %2498 -< %2501 = fmul float %2487, %2500 -< %2502 = fadd float %2501, 0.000000e+00 -< %2503 = bitcast i32 %865 to float -< %2504 = bitcast i32 %865 to float -< %2505 = fmul float %2503, %2504 -< %2506 = fadd float %2505, 0.000000e+00 -< %2507 = bitcast i32 %57 to float -< %2508 = bitcast i32 %57 to float -< %2509 = fmul float %2507, %2508 -< %2510 = fadd float %2506, %2509 -< %2511 = call float @llvm.sqrt.f32(float %2510) -< %2512 = fneg float %710 -< %2513 = fmul float %2511, %2512 -< %2514 = fmul float %2513, 0.000000e+00 -< %2515 = bitcast i32 %57 to float -< %2516 = fadd float %2515, %2514 -< %2517 = bitcast i32 %865 to float -< %2518 = bitcast i32 %865 to float -< %2519 = fmul float %2517, %2518 -< %2520 = fadd float %2519, 0.000000e+00 -< %2521 = bitcast i32 %57 to float -< %2522 = bitcast i32 %57 to float -< %2523 = fmul float %2521, %2522 -< %2524 = fadd float %2520, %2523 -< %2525 = call float @llvm.sqrt.f32(float %2524) -< %2526 = fneg float %710 -< %2527 = fmul float %2525, %2526 -< %2528 = fmul float %2527, 0.000000e+00 -< %2529 = bitcast i32 %57 to float -< %2530 = fadd float %2529, %2528 -< %2531 = fmul float %2516, %2530 -< %2532 = fadd float %2502, %2531 -< %2533 = call float @llvm.sqrt.f32(float %2532) -< %2534 = fadd float %2533, 0.000000e+00 -< %2535 = fdiv float %2474, %2534 -< %2536 = fmul float %2535, 2.000000e+00 -< %2537 = bitcast i32 %865 to float -< %2538 = bitcast i32 %865 to float -< %2539 = fmul float %2537, %2538 -< %2540 = fadd float %2539, 0.000000e+00 -< %2541 = bitcast i32 %57 to float -< %2542 = bitcast i32 %57 to float -< %2543 = fmul float %2541, %2542 -< %2544 = fadd float %2540, %2543 -< %2545 = call float @llvm.sqrt.f32(float %2544) -< %2546 = fneg float %710 -< %2547 = fmul float %2545, %2546 -< %2548 = fmul float %2547, 0.000000e+00 -< %2549 = bitcast i32 %57 to float -< %2550 = fadd float %2549, %2548 -< %2551 = bitcast i32 %865 to float -< %2552 = bitcast i32 %865 to float ---- -> %2482 = bitcast i32 %52 to float -> %2483 = fadd float %2482, %2481 -> %2484 = fmul float %2470, %2483 -> %2485 = fadd float %2484, 0.000000e+00 -> %2486 = bitcast i32 %52 to float -> %2487 = bitcast i32 %52 to float -> %2488 = fmul float %2486, %2487 -> %2489 = fadd float %2488, 0.000000e+00 -> %2490 = bitcast i32 %686 to float -> %2491 = bitcast i32 %686 to float -> %2492 = fmul float %2490, %2491 -> %2493 = fadd float %2489, %2492 -> %2494 = call float @llvm.sqrt.f32(float %2493) -> %2495 = fneg float %692 -> %2496 = fmul float %2494, %2495 -> %2497 = fmul float %2496, 0.000000e+00 -> %2498 = bitcast i32 %686 to float -> %2499 = fadd float %2498, %2497 -> %2500 = bitcast i32 %52 to float -> %2501 = bitcast i32 %52 to float -> %2502 = fmul float %2500, %2501 -> %2503 = fadd float %2502, 0.000000e+00 -> %2504 = bitcast i32 %686 to float -> %2505 = bitcast i32 %686 to float -> %2506 = fmul float %2504, %2505 -> %2507 = fadd float %2503, %2506 -> %2508 = call float @llvm.sqrt.f32(float %2507) -> %2509 = fneg float %692 -> %2510 = fmul float %2508, %2509 -> %2511 = fmul float %2510, 0.000000e+00 -> %2512 = bitcast i32 %686 to float -> %2513 = fadd float %2512, %2511 -> %2514 = fmul float %2499, %2513 -> %2515 = fadd float %2485, %2514 -> %2516 = call float @llvm.sqrt.f32(float %2515) -> %2517 = fadd float %2516, 0.000000e+00 -> %2518 = fdiv float %2457, %2517 -> %2519 = fmul float %2518, 2.000000e+00 -> %2520 = bitcast i32 %52 to float -> %2521 = bitcast i32 %52 to float -> %2522 = fmul float %2520, %2521 -> %2523 = fadd float %2522, 0.000000e+00 -> %2524 = bitcast i32 %686 to float -> %2525 = bitcast i32 %686 to float -> %2526 = fmul float %2524, %2525 -> %2527 = fadd float %2523, %2526 -> %2528 = call float @llvm.sqrt.f32(float %2527) -> %2529 = fneg float %692 -> %2530 = fmul float %2528, %2529 -> %2531 = fmul float %2530, 0.000000e+00 -> %2532 = bitcast i32 %686 to float -> %2533 = fadd float %2532, %2531 -> %2534 = bitcast i32 %52 to float -> %2535 = bitcast i32 %52 to float -> %2536 = fmul float %2534, %2535 -> %2537 = fadd float %2536, 0.000000e+00 -> %2538 = bitcast i32 %686 to float -> %2539 = bitcast i32 %686 to float -> %2540 = fmul float %2538, %2539 -> %2541 = fadd float %2537, %2540 -> %2542 = call float @llvm.sqrt.f32(float %2541) -> %2543 = fneg float %692 -> %2544 = fmul float %2542, %2543 -> %2545 = bitcast i32 %52 to float -> %2546 = fadd float %2545, %2544 -> %2547 = bitcast i32 %52 to float -> %2548 = bitcast i32 %52 to float -> %2549 = fmul float %2547, %2548 -> %2550 = fadd float %2549, 0.000000e+00 -> %2551 = bitcast i32 %686 to float -> %2552 = bitcast i32 %686 to float -3030,3032c3031,3033 -< %2554 = fadd float %2553, 0.000000e+00 -< %2555 = bitcast i32 %57 to float -< %2556 = bitcast i32 %57 to float ---- -> %2554 = fadd float %2550, %2553 -> %2555 = call float @llvm.sqrt.f32(float %2554) -> %2556 = fneg float %692 -3034,3118c3035,3102 -< %2558 = fadd float %2554, %2557 -< %2559 = call float @llvm.sqrt.f32(float %2558) -< %2560 = fneg float %710 -< %2561 = fmul float %2559, %2560 -< %2562 = bitcast i32 %865 to float -< %2563 = fadd float %2562, %2561 -< %2564 = bitcast i32 %865 to float -< %2565 = bitcast i32 %865 to float -< %2566 = fmul float %2564, %2565 -< %2567 = fadd float %2566, 0.000000e+00 -< %2568 = bitcast i32 %57 to float -< %2569 = bitcast i32 %57 to float -< %2570 = fmul float %2568, %2569 -< %2571 = fadd float %2567, %2570 -< %2572 = call float @llvm.sqrt.f32(float %2571) -< %2573 = fneg float %710 -< %2574 = fmul float %2572, %2573 -< %2575 = bitcast i32 %865 to float -< %2576 = fadd float %2575, %2574 -< %2577 = fmul float %2563, %2576 -< %2578 = fadd float %2577, 0.000000e+00 -< %2579 = bitcast i32 %865 to float -< %2580 = bitcast i32 %865 to float -< %2581 = fmul float %2579, %2580 -< %2582 = fadd float %2581, 0.000000e+00 -< %2583 = bitcast i32 %57 to float -< %2584 = bitcast i32 %57 to float -< %2585 = fmul float %2583, %2584 -< %2586 = fadd float %2582, %2585 -< %2587 = call float @llvm.sqrt.f32(float %2586) -< %2588 = fneg float %710 -< %2589 = fmul float %2587, %2588 -< %2590 = fmul float %2589, 0.000000e+00 -< %2591 = bitcast i32 %57 to float -< %2592 = fadd float %2591, %2590 -< %2593 = bitcast i32 %865 to float -< %2594 = bitcast i32 %865 to float -< %2595 = fmul float %2593, %2594 -< %2596 = fadd float %2595, 0.000000e+00 -< %2597 = bitcast i32 %57 to float -< %2598 = bitcast i32 %57 to float -< %2599 = fmul float %2597, %2598 -< %2600 = fadd float %2596, %2599 -< %2601 = call float @llvm.sqrt.f32(float %2600) -< %2602 = fneg float %710 -< %2603 = fmul float %2601, %2602 -< %2604 = fmul float %2603, 0.000000e+00 -< %2605 = bitcast i32 %57 to float -< %2606 = fadd float %2605, %2604 -< %2607 = fmul float %2592, %2606 -< %2608 = fadd float %2578, %2607 -< %2609 = call float @llvm.sqrt.f32(float %2608) -< %2610 = fadd float %2609, 0.000000e+00 -< %2611 = fdiv float %2550, %2610 -< %2612 = fmul float %2536, %2611 -< %2613 = fsub float 1.000000e+00, %2612 -< %2614 = load float, float* %1652, align 4 -< %2615 = fmul float %2613, %2614 -< %2616 = fadd float %2460, %2615 -< %2617 = insertelement <4 x float> zeroinitializer, float %2616, i32 0 -< %2618 = insertelement <4 x float> %2617, float 0.000000e+00, i32 1 -< %2619 = insertelement <4 x float> %2618, float 0.000000e+00, i32 2 -< %2620 = insertelement <4 x float> %2619, float 0.000000e+00, i32 3 -< %2621 = extractelement <4 x float> %2620, i32 0 -< store float %2621, float* %2143, align 4 -< %2622 = getelementptr float, float* %1, i32 0 -< %2623 = getelementptr inbounds float, float* %2622, i64 2 -< %2624 = bitcast float* %2623 to i32* -< %2625 = load i32, i32* %2624, align 4 -< %2626 = bitcast i32 %2625 to float -< %2627 = insertelement <4 x float> zeroinitializer, float %2626, i32 0 -< %2628 = getelementptr float, float* %1, i32 0 -< %2629 = getelementptr inbounds float, float* %2628, i64 1 -< %2630 = bitcast float* %2629 to i32* -< %2631 = load i32, i32* %2630, align 4 -< %2632 = bitcast i32 %2631 to float -< %2633 = insertelement <4 x float> %2627, float %2632, i32 1 -< %2634 = insertelement <4 x float> %2633, float 0.000000e+00, i32 2 -< %2635 = insertelement <4 x float> %2634, float 0.000000e+00, i32 3 -< %2636 = extractelement <4 x float> %2635, i32 0 -< %2637 = bitcast i32* %2630 to float* -< store float %2636, float* %2637, align 4 -< %2638 = extractelement <4 x float> %2635, i32 1 -< %2639 = bitcast i32* %2624 to float* -< store float %2638, float* %2639, align 4 ---- -> %2558 = bitcast i32 %52 to float -> %2559 = fadd float %2558, %2557 -> %2560 = fmul float %2546, %2559 -> %2561 = fadd float %2560, 0.000000e+00 -> %2562 = bitcast i32 %52 to float -> %2563 = bitcast i32 %52 to float -> %2564 = fmul float %2562, %2563 -> %2565 = fadd float %2564, 0.000000e+00 -> %2566 = bitcast i32 %686 to float -> %2567 = bitcast i32 %686 to float -> %2568 = fmul float %2566, %2567 -> %2569 = fadd float %2565, %2568 -> %2570 = call float @llvm.sqrt.f32(float %2569) -> %2571 = fneg float %692 -> %2572 = fmul float %2570, %2571 -> %2573 = fmul float %2572, 0.000000e+00 -> %2574 = bitcast i32 %686 to float -> %2575 = fadd float %2574, %2573 -> %2576 = bitcast i32 %52 to float -> %2577 = bitcast i32 %52 to float -> %2578 = fmul float %2576, %2577 -> %2579 = fadd float %2578, 0.000000e+00 -> %2580 = bitcast i32 %686 to float -> %2581 = bitcast i32 %686 to float -> %2582 = fmul float %2580, %2581 -> %2583 = fadd float %2579, %2582 -> %2584 = call float @llvm.sqrt.f32(float %2583) -> %2585 = fneg float %692 -> %2586 = fmul float %2584, %2585 -> %2587 = fmul float %2586, 0.000000e+00 -> %2588 = bitcast i32 %686 to float -> %2589 = fadd float %2588, %2587 -> %2590 = fmul float %2575, %2589 -> %2591 = fadd float %2561, %2590 -> %2592 = call float @llvm.sqrt.f32(float %2591) -> %2593 = fadd float %2592, 0.000000e+00 -> %2594 = fdiv float %2533, %2593 -> %2595 = fmul float %2519, %2594 -> %2596 = fsub float 1.000000e+00, %2595 -> %2597 = load float, float* %44, align 4 -> %2598 = fmul float %2596, %2597 -> %2599 = fadd float %2443, %2598 -> %2600 = insertelement <4 x float> zeroinitializer, float %2599, i32 0 -> %2601 = insertelement <4 x float> %2600, float 0.000000e+00, i32 1 -> %2602 = insertelement <4 x float> %2601, float 0.000000e+00, i32 2 -> %2603 = insertelement <4 x float> %2602, float 0.000000e+00, i32 3 -> %2604 = extractelement <4 x float> %2603, i32 0 -> store float %2604, float* %2126, align 4 -> %2605 = getelementptr float, float* %1, i32 0 -> %2606 = getelementptr inbounds float, float* %2605, i64 2 -> %2607 = bitcast float* %2606 to i32* -> %2608 = load i32, i32* %2607, align 4 -> %2609 = bitcast i32 %2608 to float -> %2610 = insertelement <4 x float> zeroinitializer, float %2609, i32 0 -> %2611 = getelementptr float, float* %1, i32 0 -> %2612 = getelementptr inbounds float, float* %2611, i64 1 -> %2613 = bitcast float* %2612 to i32* -> %2614 = load i32, i32* %2613, align 4 -> %2615 = bitcast i32 %2614 to float -> %2616 = insertelement <4 x float> %2610, float %2615, i32 1 -> %2617 = insertelement <4 x float> %2616, float 0.000000e+00, i32 2 -> %2618 = insertelement <4 x float> %2617, float 0.000000e+00, i32 3 -> %2619 = extractelement <4 x float> %2618, i32 0 -> %2620 = bitcast i32* %2613 to float* -> store float %2619, float* %2620, align 4 -> %2621 = extractelement <4 x float> %2618, i32 1 -> %2622 = bitcast i32* %2607 to float* -> store float %2621, float* %2622, align 4 diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/diff-diospyros.txt b/src/dios-egraphs/Diospyros/flaky-outputs/diff-diospyros.txt deleted file mode 100644 index 29bb2e62..00000000 --- a/src/dios-egraphs/Diospyros/flaky-outputs/diff-diospyros.txt +++ /dev/null @@ -1,5450 +0,0 @@ -270,276c270,276 -< %58 = getelementptr float, float* %0, i32 0 -< %59 = load float, float* %58, align 4 -< %60 = insertelement <4 x float> zeroinitializer, float %59, i32 0 -< %61 = insertelement <4 x float> %60, float 1.000000e+00, i32 1 -< %62 = insertelement <4 x float> %61, float 1.000000e+00, i32 2 -< %63 = insertelement <4 x float> %62, float 1.000000e+00, i32 3 -< %64 = insertelement <4 x float> zeroinitializer, float %51, i32 0 ---- -> %58 = insertelement <4 x float> zeroinitializer, float %45, i32 0 -> %59 = insertelement <4 x float> %58, float 1.000000e+00, i32 1 -> %60 = insertelement <4 x float> %59, float 1.000000e+00, i32 2 -> %61 = insertelement <4 x float> %60, float 1.000000e+00, i32 3 -> %62 = getelementptr float, float* %1, i32 0 -> %63 = load float, float* %62, align 4 -> %64 = insertelement <4 x float> zeroinitializer, float %63, i32 0 -280c280 -< %68 = fmul <4 x float> %63, %67 ---- -> %68 = fmul <4 x float> %61, %67 -325,348c325,347 -< %110 = insertelement <4 x float> zeroinitializer, float %97, i32 0 -< %111 = insertelement <4 x float> %110, float 0.000000e+00, i32 1 -< %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 2 -< %113 = insertelement <4 x float> %112, float 0.000000e+00, i32 3 -< %114 = fmul <4 x float> %109, %113 -< %115 = fadd <4 x float> %114, zeroinitializer -< %116 = getelementptr float, float* %0, i32 0 -< %117 = getelementptr inbounds float, float* %116, i64 1 -< %118 = load float, float* %117, align 4 -< %119 = insertelement <4 x float> zeroinitializer, float %118, i32 0 -< %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 1 -< %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 2 -< %122 = insertelement <4 x float> %121, float 0.000000e+00, i32 3 -< %123 = getelementptr float, float* %1, i32 0 -< %124 = getelementptr inbounds float, float* %123, i64 3 -< %125 = load float, float* %124, align 4 -< %126 = insertelement <4 x float> zeroinitializer, float %125, i32 0 -< %127 = insertelement <4 x float> %126, float 0.000000e+00, i32 1 -< %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 2 -< %129 = insertelement <4 x float> %128, float 0.000000e+00, i32 3 -< %130 = call <4 x float> @llvm.fma.f32.3(<4 x float> %122, <4 x float> %129, <4 x float> %115) -< %131 = extractelement <4 x float> %130, i32 0 -< store float %131, float* %105, align 4 -< %132 = extractelement <4 x float> %130, i32 1 ---- -> %110 = load float, float* %96, align 4 -> %111 = insertelement <4 x float> zeroinitializer, float %110, i32 0 -> %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 1 -> %113 = insertelement <4 x float> %112, float 0.000000e+00, i32 2 -> %114 = insertelement <4 x float> %113, float 0.000000e+00, i32 3 -> %115 = fmul <4 x float> %109, %114 -> %116 = fadd <4 x float> %115, zeroinitializer -> %117 = getelementptr float, float* %0, i32 0 -> %118 = getelementptr inbounds float, float* %117, i64 1 -> %119 = load float, float* %118, align 4 -> %120 = insertelement <4 x float> zeroinitializer, float %119, i32 0 -> %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 1 -> %122 = insertelement <4 x float> %121, float 0.000000e+00, i32 2 -> %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3 -> %124 = getelementptr float, float* %1, i32 0 -> %125 = getelementptr inbounds float, float* %124, i64 3 -> %126 = load float, float* %125, align 4 -> %127 = insertelement <4 x float> zeroinitializer, float %126, i32 0 -> %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 1 -> %129 = insertelement <4 x float> %128, float 0.000000e+00, i32 2 -> %130 = insertelement <4 x float> %129, float 0.000000e+00, i32 3 -> %131 = call <4 x float> @llvm.fma.f32.3(<4 x float> %123, <4 x float> %130, <4 x float> %116) -> %132 = extractelement <4 x float> %131, i32 0 -350c349 -< %134 = getelementptr inbounds float, float* %133, i64 2 ---- -> %134 = getelementptr inbounds float, float* %133, i64 1 -352,385c351,385 -< %135 = getelementptr float, float* %0, i32 0 -< %136 = getelementptr inbounds float, float* %135, i64 2 -< %137 = load float, float* %136, align 4 -< %138 = insertelement <4 x float> zeroinitializer, float %137, i32 0 -< %139 = insertelement <4 x float> %138, float 0.000000e+00, i32 1 -< %140 = insertelement <4 x float> %139, float 0.000000e+00, i32 2 -< %141 = insertelement <4 x float> %140, float 0.000000e+00, i32 3 -< %142 = getelementptr float, float* %1, i32 0 -< %143 = load float, float* %142, align 4 -< %144 = insertelement <4 x float> zeroinitializer, float %143, i32 0 -< %145 = insertelement <4 x float> %144, float 0.000000e+00, i32 1 -< %146 = insertelement <4 x float> %145, float 0.000000e+00, i32 2 -< %147 = insertelement <4 x float> %146, float 0.000000e+00, i32 3 -< %148 = call <4 x float> @llvm.fma.f32.4(<4 x float> %141, <4 x float> %147, <4 x float> zeroinitializer) -< %149 = extractelement <4 x float> %148, i32 0 -< store float %149, float* %134, align 4 -< %150 = insertelement <4 x float> zeroinitializer, float %137, i32 0 -< %151 = insertelement <4 x float> %150, float 1.000000e+00, i32 1 -< %152 = insertelement <4 x float> %151, float 1.000000e+00, i32 2 -< %153 = insertelement <4 x float> %152, float 1.000000e+00, i32 3 -< %154 = insertelement <4 x float> zeroinitializer, float %143, i32 0 -< %155 = insertelement <4 x float> %154, float 0.000000e+00, i32 1 -< %156 = insertelement <4 x float> %155, float 0.000000e+00, i32 2 -< %157 = insertelement <4 x float> %156, float 0.000000e+00, i32 3 -< %158 = fmul <4 x float> %153, %157 -< %159 = fadd <4 x float> %158, zeroinitializer -< %160 = getelementptr float, float* %0, i32 0 -< %161 = getelementptr inbounds float, float* %160, i64 3 -< %162 = load float, float* %161, align 4 -< %163 = insertelement <4 x float> zeroinitializer, float %162, i32 0 -< %164 = insertelement <4 x float> %163, float 0.000000e+00, i32 1 -< %165 = insertelement <4 x float> %164, float 0.000000e+00, i32 2 -< %166 = insertelement <4 x float> %165, float 0.000000e+00, i32 3 -< %167 = load float, float* %78, align 4 ---- -> %135 = extractelement <4 x float> %131, i32 1 -> %136 = getelementptr float, float* %2, i32 0 -> %137 = getelementptr inbounds float, float* %136, i64 2 -> store float %135, float* %137, align 4 -> %138 = getelementptr float, float* %0, i32 0 -> %139 = getelementptr inbounds float, float* %138, i64 2 -> %140 = load float, float* %139, align 4 -> %141 = insertelement <4 x float> zeroinitializer, float %140, i32 0 -> %142 = insertelement <4 x float> %141, float 0.000000e+00, i32 1 -> %143 = insertelement <4 x float> %142, float 0.000000e+00, i32 2 -> %144 = insertelement <4 x float> %143, float 0.000000e+00, i32 3 -> %145 = getelementptr float, float* %1, i32 0 -> %146 = load float, float* %145, align 4 -> %147 = insertelement <4 x float> zeroinitializer, float %146, i32 0 -> %148 = insertelement <4 x float> %147, float 0.000000e+00, i32 1 -> %149 = insertelement <4 x float> %148, float 0.000000e+00, i32 2 -> %150 = insertelement <4 x float> %149, float 0.000000e+00, i32 3 -> %151 = call <4 x float> @llvm.fma.f32.4(<4 x float> %144, <4 x float> %150, <4 x float> zeroinitializer) -> %152 = extractelement <4 x float> %151, i32 0 -> %153 = getelementptr float, float* %2, i32 0 -> %154 = getelementptr inbounds float, float* %153, i64 2 -> store float %152, float* %154, align 4 -> %155 = insertelement <4 x float> zeroinitializer, float %140, i32 0 -> %156 = insertelement <4 x float> %155, float 1.000000e+00, i32 1 -> %157 = insertelement <4 x float> %156, float 1.000000e+00, i32 2 -> %158 = insertelement <4 x float> %157, float 1.000000e+00, i32 3 -> %159 = insertelement <4 x float> zeroinitializer, float %146, i32 0 -> %160 = insertelement <4 x float> %159, float 0.000000e+00, i32 1 -> %161 = insertelement <4 x float> %160, float 0.000000e+00, i32 2 -> %162 = insertelement <4 x float> %161, float 0.000000e+00, i32 3 -> %163 = fmul <4 x float> %158, %162 -> %164 = fadd <4 x float> %163, zeroinitializer -> %165 = getelementptr float, float* %0, i32 0 -> %166 = getelementptr inbounds float, float* %165, i64 3 -> %167 = load float, float* %166, align 4 -390,402c390,402 -< %172 = call <4 x float> @llvm.fma.f32.5(<4 x float> %166, <4 x float> %171, <4 x float> %159) -< %173 = extractelement <4 x float> %172, i32 0 -< store float %173, float* %134, align 4 -< %174 = extractelement <4 x float> %172, i32 1 -< %175 = getelementptr float, float* %2, i32 0 -< %176 = getelementptr inbounds float, float* %175, i64 3 -< store float %174, float* %176, align 4 -< %177 = load float, float* %136, align 4 -< %178 = insertelement <4 x float> zeroinitializer, float %177, i32 0 -< %179 = insertelement <4 x float> %178, float 0.000000e+00, i32 1 -< %180 = insertelement <4 x float> %179, float 0.000000e+00, i32 2 -< %181 = insertelement <4 x float> %180, float 0.000000e+00, i32 3 -< %182 = load float, float* %96, align 4 ---- -> %172 = load float, float* %78, align 4 -> %173 = insertelement <4 x float> zeroinitializer, float %172, i32 0 -> %174 = insertelement <4 x float> %173, float 0.000000e+00, i32 1 -> %175 = insertelement <4 x float> %174, float 0.000000e+00, i32 2 -> %176 = insertelement <4 x float> %175, float 0.000000e+00, i32 3 -> %177 = call <4 x float> @llvm.fma.f32.5(<4 x float> %171, <4 x float> %176, <4 x float> %164) -> %178 = extractelement <4 x float> %177, i32 0 -> store float %178, float* %154, align 4 -> %179 = extractelement <4 x float> %177, i32 1 -> %180 = getelementptr float, float* %2, i32 0 -> %181 = getelementptr inbounds float, float* %180, i64 3 -> store float %179, float* %181, align 4 -> %182 = load float, float* %139, align 4 -407,427c407,427 -< %187 = call <4 x float> @llvm.fma.f32.6(<4 x float> %181, <4 x float> %186, <4 x float> zeroinitializer) -< %188 = extractelement <4 x float> %187, i32 0 -< store float %188, float* %176, align 4 -< %189 = insertelement <4 x float> zeroinitializer, float %177, i32 0 -< %190 = insertelement <4 x float> %189, float 1.000000e+00, i32 1 -< %191 = insertelement <4 x float> %190, float 1.000000e+00, i32 2 -< %192 = insertelement <4 x float> %191, float 1.000000e+00, i32 3 -< %193 = insertelement <4 x float> zeroinitializer, float %182, i32 0 -< %194 = insertelement <4 x float> %193, float 0.000000e+00, i32 1 -< %195 = insertelement <4 x float> %194, float 0.000000e+00, i32 2 -< %196 = insertelement <4 x float> %195, float 0.000000e+00, i32 3 -< %197 = fmul <4 x float> %192, %196 -< %198 = fadd <4 x float> %197, zeroinitializer -< %199 = getelementptr float, float* %0, i32 0 -< %200 = getelementptr inbounds float, float* %199, i64 3 -< %201 = load float, float* %200, align 4 -< %202 = insertelement <4 x float> zeroinitializer, float %201, i32 0 -< %203 = insertelement <4 x float> %202, float 0.000000e+00, i32 1 -< %204 = insertelement <4 x float> %203, float 0.000000e+00, i32 2 -< %205 = insertelement <4 x float> %204, float 0.000000e+00, i32 3 -< %206 = load float, float* %124, align 4 ---- -> %187 = load float, float* %96, align 4 -> %188 = insertelement <4 x float> zeroinitializer, float %187, i32 0 -> %189 = insertelement <4 x float> %188, float 0.000000e+00, i32 1 -> %190 = insertelement <4 x float> %189, float 0.000000e+00, i32 2 -> %191 = insertelement <4 x float> %190, float 0.000000e+00, i32 3 -> %192 = call <4 x float> @llvm.fma.f32.6(<4 x float> %186, <4 x float> %191, <4 x float> zeroinitializer) -> %193 = extractelement <4 x float> %192, i32 0 -> store float %193, float* %181, align 4 -> %194 = insertelement <4 x float> zeroinitializer, float %182, i32 0 -> %195 = insertelement <4 x float> %194, float 1.000000e+00, i32 1 -> %196 = insertelement <4 x float> %195, float 1.000000e+00, i32 2 -> %197 = insertelement <4 x float> %196, float 1.000000e+00, i32 3 -> %198 = insertelement <4 x float> zeroinitializer, float %187, i32 0 -> %199 = insertelement <4 x float> %198, float 0.000000e+00, i32 1 -> %200 = insertelement <4 x float> %199, float 0.000000e+00, i32 2 -> %201 = insertelement <4 x float> %200, float 0.000000e+00, i32 3 -> %202 = fmul <4 x float> %197, %201 -> %203 = fadd <4 x float> %202, zeroinitializer -> %204 = getelementptr float, float* %0, i32 0 -> %205 = getelementptr inbounds float, float* %204, i64 3 -> %206 = load float, float* %205, align 4 -432,434c432,439 -< %211 = call <4 x float> @llvm.fma.f32.7(<4 x float> %205, <4 x float> %210, <4 x float> %198) -< %212 = extractelement <4 x float> %211, i32 0 -< store float %212, float* %176, align 4 ---- -> %211 = load float, float* %125, align 4 -> %212 = insertelement <4 x float> zeroinitializer, float %211, i32 0 -> %213 = insertelement <4 x float> %212, float 0.000000e+00, i32 1 -> %214 = insertelement <4 x float> %213, float 0.000000e+00, i32 2 -> %215 = insertelement <4 x float> %214, float 0.000000e+00, i32 3 -> %216 = call <4 x float> @llvm.fma.f32.7(<4 x float> %210, <4 x float> %215, <4 x float> %203) -> %217 = extractelement <4 x float> %216, i32 0 -> store float %217, float* %181, align 4 -658,674c663,679 -< %157 = load i32, i32* %130, align 4 -< %158 = bitcast i32 %157 to float -< %159 = bitcast i32 %157 to float -< %160 = fmul float %158, %159 -< %161 = fadd float %156, %160 -< %162 = call float @llvm.sqrt.f32.8(float %161) -< %163 = bitcast i32 %152 to float -< %164 = fcmp olt float %163, 0.000000e+00 -< %165 = sext i1 %164 to i32 -< %166 = fcmp ogt float %163, 0.000000e+00 -< %167 = zext i1 %166 to i32 -< %168 = add nsw i32 %165, %167 -< %169 = sitofp i32 %168 to float -< %170 = fneg float %169 -< %171 = fmul float %162, %170 -< %172 = bitcast i32 %152 to float -< %173 = fadd float %172, %171 ---- -> %157 = bitcast i32 %131 to float -> %158 = bitcast i32 %131 to float -> %159 = fmul float %157, %158 -> %160 = fadd float %156, %159 -> %161 = call float @llvm.sqrt.f32.8(float %160) -> %162 = bitcast i32 %152 to float -> %163 = fcmp olt float %162, 0.000000e+00 -> %164 = sext i1 %163 to i32 -> %165 = fcmp ogt float %162, 0.000000e+00 -> %166 = zext i1 %165 to i32 -> %167 = add nsw i32 %164, %166 -> %168 = sitofp i32 %167 to float -> %169 = fneg float %168 -> %170 = fmul float %161, %169 -> %171 = bitcast i32 %152 to float -> %172 = fadd float %171, %170 -> %173 = bitcast i32 %152 to float -676,687c681,692 -< %175 = bitcast i32 %152 to float -< %176 = fmul float %174, %175 -< %177 = fadd float %176, 0.000000e+00 -< %178 = bitcast i32 %157 to float -< %179 = bitcast i32 %157 to float -< %180 = fmul float %178, %179 -< %181 = fadd float %177, %180 -< %182 = call float @llvm.sqrt.f32.9(float %181) -< %183 = fneg float %169 -< %184 = fmul float %182, %183 -< %185 = bitcast i32 %152 to float -< %186 = fadd float %185, %184 ---- -> %175 = fmul float %173, %174 -> %176 = fadd float %175, 0.000000e+00 -> %177 = bitcast i32 %131 to float -> %178 = bitcast i32 %131 to float -> %179 = fmul float %177, %178 -> %180 = fadd float %176, %179 -> %181 = call float @llvm.sqrt.f32.9(float %180) -> %182 = fneg float %168 -> %183 = fmul float %181, %182 -> %184 = bitcast i32 %152 to float -> %185 = fadd float %184, %183 -> %186 = bitcast i32 %152 to float -689,702c694,707 -< %188 = bitcast i32 %152 to float -< %189 = fmul float %187, %188 -< %190 = fadd float %189, 0.000000e+00 -< %191 = bitcast i32 %157 to float -< %192 = bitcast i32 %157 to float -< %193 = fmul float %191, %192 -< %194 = fadd float %190, %193 -< %195 = call float @llvm.sqrt.f32.10(float %194) -< %196 = fneg float %169 -< %197 = fmul float %195, %196 -< %198 = bitcast i32 %152 to float -< %199 = fadd float %198, %197 -< %200 = fmul float %186, %199 -< %201 = fadd float %200, 0.000000e+00 ---- -> %188 = fmul float %186, %187 -> %189 = fadd float %188, 0.000000e+00 -> %190 = bitcast i32 %131 to float -> %191 = bitcast i32 %131 to float -> %192 = fmul float %190, %191 -> %193 = fadd float %189, %192 -> %194 = call float @llvm.sqrt.f32.10(float %193) -> %195 = fneg float %168 -> %196 = fmul float %194, %195 -> %197 = bitcast i32 %152 to float -> %198 = fadd float %197, %196 -> %199 = fmul float %185, %198 -> %200 = fadd float %199, 0.000000e+00 -> %201 = bitcast i32 %152 to float -704,716c709,721 -< %203 = bitcast i32 %152 to float -< %204 = fmul float %202, %203 -< %205 = fadd float %204, 0.000000e+00 -< %206 = bitcast i32 %157 to float -< %207 = bitcast i32 %157 to float -< %208 = fmul float %206, %207 -< %209 = fadd float %205, %208 -< %210 = call float @llvm.sqrt.f32.11(float %209) -< %211 = fneg float %169 -< %212 = fmul float %210, %211 -< %213 = fmul float %212, 0.000000e+00 -< %214 = bitcast i32 %157 to float -< %215 = fadd float %214, %213 ---- -> %203 = fmul float %201, %202 -> %204 = fadd float %203, 0.000000e+00 -> %205 = bitcast i32 %131 to float -> %206 = bitcast i32 %131 to float -> %207 = fmul float %205, %206 -> %208 = fadd float %204, %207 -> %209 = call float @llvm.sqrt.f32.11(float %208) -> %210 = fneg float %168 -> %211 = fmul float %209, %210 -> %212 = fmul float %211, 0.000000e+00 -> %213 = bitcast i32 %131 to float -> %214 = fadd float %213, %212 -> %215 = bitcast i32 %152 to float -718,736c723,741 -< %217 = bitcast i32 %152 to float -< %218 = fmul float %216, %217 -< %219 = fadd float %218, 0.000000e+00 -< %220 = bitcast i32 %157 to float -< %221 = bitcast i32 %157 to float -< %222 = fmul float %220, %221 -< %223 = fadd float %219, %222 -< %224 = call float @llvm.sqrt.f32.12(float %223) -< %225 = fneg float %169 -< %226 = fmul float %224, %225 -< %227 = fmul float %226, 0.000000e+00 -< %228 = bitcast i32 %157 to float -< %229 = fadd float %228, %227 -< %230 = fmul float %215, %229 -< %231 = fadd float %201, %230 -< %232 = call float @llvm.sqrt.f32.13(float %231) -< %233 = fadd float %232, 0.000000e+00 -< %234 = fdiv float %173, %233 -< %235 = fmul float %234, 2.000000e+00 ---- -> %217 = fmul float %215, %216 -> %218 = fadd float %217, 0.000000e+00 -> %219 = bitcast i32 %131 to float -> %220 = bitcast i32 %131 to float -> %221 = fmul float %219, %220 -> %222 = fadd float %218, %221 -> %223 = call float @llvm.sqrt.f32.12(float %222) -> %224 = fneg float %168 -> %225 = fmul float %223, %224 -> %226 = fmul float %225, 0.000000e+00 -> %227 = bitcast i32 %131 to float -> %228 = fadd float %227, %226 -> %229 = fmul float %214, %228 -> %230 = fadd float %200, %229 -> %231 = call float @llvm.sqrt.f32.13(float %230) -> %232 = fadd float %231, 0.000000e+00 -> %233 = fdiv float %172, %232 -> %234 = fmul float %233, 2.000000e+00 -> %235 = bitcast i32 %152 to float -738,749c743,754 -< %237 = bitcast i32 %152 to float -< %238 = fmul float %236, %237 -< %239 = fadd float %238, 0.000000e+00 -< %240 = bitcast i32 %157 to float -< %241 = bitcast i32 %157 to float -< %242 = fmul float %240, %241 -< %243 = fadd float %239, %242 -< %244 = call float @llvm.sqrt.f32.14(float %243) -< %245 = fneg float %169 -< %246 = fmul float %244, %245 -< %247 = bitcast i32 %152 to float -< %248 = fadd float %247, %246 ---- -> %237 = fmul float %235, %236 -> %238 = fadd float %237, 0.000000e+00 -> %239 = bitcast i32 %131 to float -> %240 = bitcast i32 %131 to float -> %241 = fmul float %239, %240 -> %242 = fadd float %238, %241 -> %243 = call float @llvm.sqrt.f32.14(float %242) -> %244 = fneg float %168 -> %245 = fmul float %243, %244 -> %246 = bitcast i32 %152 to float -> %247 = fadd float %246, %245 -> %248 = bitcast i32 %152 to float -751,762c756,767 -< %250 = bitcast i32 %152 to float -< %251 = fmul float %249, %250 -< %252 = fadd float %251, 0.000000e+00 -< %253 = bitcast i32 %157 to float -< %254 = bitcast i32 %157 to float -< %255 = fmul float %253, %254 -< %256 = fadd float %252, %255 -< %257 = call float @llvm.sqrt.f32.15(float %256) -< %258 = fneg float %169 -< %259 = fmul float %257, %258 -< %260 = bitcast i32 %152 to float -< %261 = fadd float %260, %259 ---- -> %250 = fmul float %248, %249 -> %251 = fadd float %250, 0.000000e+00 -> %252 = bitcast i32 %131 to float -> %253 = bitcast i32 %131 to float -> %254 = fmul float %252, %253 -> %255 = fadd float %251, %254 -> %256 = call float @llvm.sqrt.f32.15(float %255) -> %257 = fneg float %168 -> %258 = fmul float %256, %257 -> %259 = bitcast i32 %152 to float -> %260 = fadd float %259, %258 -> %261 = bitcast i32 %152 to float -764,777c769,782 -< %263 = bitcast i32 %152 to float -< %264 = fmul float %262, %263 -< %265 = fadd float %264, 0.000000e+00 -< %266 = bitcast i32 %157 to float -< %267 = bitcast i32 %157 to float -< %268 = fmul float %266, %267 -< %269 = fadd float %265, %268 -< %270 = call float @llvm.sqrt.f32.16(float %269) -< %271 = fneg float %169 -< %272 = fmul float %270, %271 -< %273 = bitcast i32 %152 to float -< %274 = fadd float %273, %272 -< %275 = fmul float %261, %274 -< %276 = fadd float %275, 0.000000e+00 ---- -> %263 = fmul float %261, %262 -> %264 = fadd float %263, 0.000000e+00 -> %265 = bitcast i32 %131 to float -> %266 = bitcast i32 %131 to float -> %267 = fmul float %265, %266 -> %268 = fadd float %264, %267 -> %269 = call float @llvm.sqrt.f32.16(float %268) -> %270 = fneg float %168 -> %271 = fmul float %269, %270 -> %272 = bitcast i32 %152 to float -> %273 = fadd float %272, %271 -> %274 = fmul float %260, %273 -> %275 = fadd float %274, 0.000000e+00 -> %276 = bitcast i32 %152 to float -779,791c784,796 -< %278 = bitcast i32 %152 to float -< %279 = fmul float %277, %278 -< %280 = fadd float %279, 0.000000e+00 -< %281 = bitcast i32 %157 to float -< %282 = bitcast i32 %157 to float -< %283 = fmul float %281, %282 -< %284 = fadd float %280, %283 -< %285 = call float @llvm.sqrt.f32.17(float %284) -< %286 = fneg float %169 -< %287 = fmul float %285, %286 -< %288 = fmul float %287, 0.000000e+00 -< %289 = bitcast i32 %157 to float -< %290 = fadd float %289, %288 ---- -> %278 = fmul float %276, %277 -> %279 = fadd float %278, 0.000000e+00 -> %280 = bitcast i32 %131 to float -> %281 = bitcast i32 %131 to float -> %282 = fmul float %280, %281 -> %283 = fadd float %279, %282 -> %284 = call float @llvm.sqrt.f32.17(float %283) -> %285 = fneg float %168 -> %286 = fmul float %284, %285 -> %287 = fmul float %286, 0.000000e+00 -> %288 = bitcast i32 %131 to float -> %289 = fadd float %288, %287 -> %290 = bitcast i32 %152 to float -793,812c798,817 -< %292 = bitcast i32 %152 to float -< %293 = fmul float %291, %292 -< %294 = fadd float %293, 0.000000e+00 -< %295 = bitcast i32 %157 to float -< %296 = bitcast i32 %157 to float -< %297 = fmul float %295, %296 -< %298 = fadd float %294, %297 -< %299 = call float @llvm.sqrt.f32.18(float %298) -< %300 = fneg float %169 -< %301 = fmul float %299, %300 -< %302 = fmul float %301, 0.000000e+00 -< %303 = bitcast i32 %157 to float -< %304 = fadd float %303, %302 -< %305 = fmul float %290, %304 -< %306 = fadd float %276, %305 -< %307 = call float @llvm.sqrt.f32.19(float %306) -< %308 = fadd float %307, 0.000000e+00 -< %309 = fdiv float %248, %308 -< %310 = fmul float %235, %309 -< %311 = insertelement <4 x float> %149, float %310, i32 1 ---- -> %292 = fmul float %290, %291 -> %293 = fadd float %292, 0.000000e+00 -> %294 = bitcast i32 %131 to float -> %295 = bitcast i32 %131 to float -> %296 = fmul float %294, %295 -> %297 = fadd float %293, %296 -> %298 = call float @llvm.sqrt.f32.18(float %297) -> %299 = fneg float %168 -> %300 = fmul float %298, %299 -> %301 = fmul float %300, 0.000000e+00 -> %302 = bitcast i32 %131 to float -> %303 = fadd float %302, %301 -> %304 = fmul float %289, %303 -> %305 = fadd float %275, %304 -> %306 = call float @llvm.sqrt.f32.19(float %305) -> %307 = fadd float %306, 0.000000e+00 -> %308 = fdiv float %247, %307 -> %309 = fmul float %234, %308 -> %310 = insertelement <4 x float> %149, float %309, i32 1 -> %311 = bitcast i32 %152 to float -814,825c819,830 -< %313 = bitcast i32 %152 to float -< %314 = fmul float %312, %313 -< %315 = fadd float %314, 0.000000e+00 -< %316 = bitcast i32 %157 to float -< %317 = bitcast i32 %157 to float -< %318 = fmul float %316, %317 -< %319 = fadd float %315, %318 -< %320 = call float @llvm.sqrt.f32.20(float %319) -< %321 = fneg float %169 -< %322 = fmul float %320, %321 -< %323 = bitcast i32 %152 to float -< %324 = fadd float %323, %322 ---- -> %313 = fmul float %311, %312 -> %314 = fadd float %313, 0.000000e+00 -> %315 = bitcast i32 %131 to float -> %316 = bitcast i32 %131 to float -> %317 = fmul float %315, %316 -> %318 = fadd float %314, %317 -> %319 = call float @llvm.sqrt.f32.20(float %318) -> %320 = fneg float %168 -> %321 = fmul float %319, %320 -> %322 = bitcast i32 %152 to float -> %323 = fadd float %322, %321 -> %324 = bitcast i32 %152 to float -827,838c832,843 -< %326 = bitcast i32 %152 to float -< %327 = fmul float %325, %326 -< %328 = fadd float %327, 0.000000e+00 -< %329 = bitcast i32 %157 to float -< %330 = bitcast i32 %157 to float -< %331 = fmul float %329, %330 -< %332 = fadd float %328, %331 -< %333 = call float @llvm.sqrt.f32.21(float %332) -< %334 = fneg float %169 -< %335 = fmul float %333, %334 -< %336 = bitcast i32 %152 to float -< %337 = fadd float %336, %335 ---- -> %326 = fmul float %324, %325 -> %327 = fadd float %326, 0.000000e+00 -> %328 = bitcast i32 %131 to float -> %329 = bitcast i32 %131 to float -> %330 = fmul float %328, %329 -> %331 = fadd float %327, %330 -> %332 = call float @llvm.sqrt.f32.21(float %331) -> %333 = fneg float %168 -> %334 = fmul float %332, %333 -> %335 = bitcast i32 %152 to float -> %336 = fadd float %335, %334 -> %337 = bitcast i32 %152 to float -840,853c845,858 -< %339 = bitcast i32 %152 to float -< %340 = fmul float %338, %339 -< %341 = fadd float %340, 0.000000e+00 -< %342 = bitcast i32 %157 to float -< %343 = bitcast i32 %157 to float -< %344 = fmul float %342, %343 -< %345 = fadd float %341, %344 -< %346 = call float @llvm.sqrt.f32.22(float %345) -< %347 = fneg float %169 -< %348 = fmul float %346, %347 -< %349 = bitcast i32 %152 to float -< %350 = fadd float %349, %348 -< %351 = fmul float %337, %350 -< %352 = fadd float %351, 0.000000e+00 ---- -> %339 = fmul float %337, %338 -> %340 = fadd float %339, 0.000000e+00 -> %341 = bitcast i32 %131 to float -> %342 = bitcast i32 %131 to float -> %343 = fmul float %341, %342 -> %344 = fadd float %340, %343 -> %345 = call float @llvm.sqrt.f32.22(float %344) -> %346 = fneg float %168 -> %347 = fmul float %345, %346 -> %348 = bitcast i32 %152 to float -> %349 = fadd float %348, %347 -> %350 = fmul float %336, %349 -> %351 = fadd float %350, 0.000000e+00 -> %352 = bitcast i32 %152 to float -855,867c860,872 -< %354 = bitcast i32 %152 to float -< %355 = fmul float %353, %354 -< %356 = fadd float %355, 0.000000e+00 -< %357 = bitcast i32 %157 to float -< %358 = bitcast i32 %157 to float -< %359 = fmul float %357, %358 -< %360 = fadd float %356, %359 -< %361 = call float @llvm.sqrt.f32.23(float %360) -< %362 = fneg float %169 -< %363 = fmul float %361, %362 -< %364 = fmul float %363, 0.000000e+00 -< %365 = bitcast i32 %157 to float -< %366 = fadd float %365, %364 ---- -> %354 = fmul float %352, %353 -> %355 = fadd float %354, 0.000000e+00 -> %356 = bitcast i32 %131 to float -> %357 = bitcast i32 %131 to float -> %358 = fmul float %356, %357 -> %359 = fadd float %355, %358 -> %360 = call float @llvm.sqrt.f32.23(float %359) -> %361 = fneg float %168 -> %362 = fmul float %360, %361 -> %363 = fmul float %362, 0.000000e+00 -> %364 = bitcast i32 %131 to float -> %365 = fadd float %364, %363 -> %366 = bitcast i32 %152 to float -869,887c874,892 -< %368 = bitcast i32 %152 to float -< %369 = fmul float %367, %368 -< %370 = fadd float %369, 0.000000e+00 -< %371 = bitcast i32 %157 to float -< %372 = bitcast i32 %157 to float -< %373 = fmul float %371, %372 -< %374 = fadd float %370, %373 -< %375 = call float @llvm.sqrt.f32.24(float %374) -< %376 = fneg float %169 -< %377 = fmul float %375, %376 -< %378 = fmul float %377, 0.000000e+00 -< %379 = bitcast i32 %157 to float -< %380 = fadd float %379, %378 -< %381 = fmul float %366, %380 -< %382 = fadd float %352, %381 -< %383 = call float @llvm.sqrt.f32.25(float %382) -< %384 = fadd float %383, 0.000000e+00 -< %385 = fdiv float %324, %384 -< %386 = fmul float %385, 2.000000e+00 ---- -> %368 = fmul float %366, %367 -> %369 = fadd float %368, 0.000000e+00 -> %370 = bitcast i32 %131 to float -> %371 = bitcast i32 %131 to float -> %372 = fmul float %370, %371 -> %373 = fadd float %369, %372 -> %374 = call float @llvm.sqrt.f32.24(float %373) -> %375 = fneg float %168 -> %376 = fmul float %374, %375 -> %377 = fmul float %376, 0.000000e+00 -> %378 = bitcast i32 %131 to float -> %379 = fadd float %378, %377 -> %380 = fmul float %365, %379 -> %381 = fadd float %351, %380 -> %382 = call float @llvm.sqrt.f32.25(float %381) -> %383 = fadd float %382, 0.000000e+00 -> %384 = fdiv float %323, %383 -> %385 = fmul float %384, 2.000000e+00 -> %386 = bitcast i32 %152 to float -889,901c894,906 -< %388 = bitcast i32 %152 to float -< %389 = fmul float %387, %388 -< %390 = fadd float %389, 0.000000e+00 -< %391 = bitcast i32 %157 to float -< %392 = bitcast i32 %157 to float -< %393 = fmul float %391, %392 -< %394 = fadd float %390, %393 -< %395 = call float @llvm.sqrt.f32.26(float %394) -< %396 = fneg float %169 -< %397 = fmul float %395, %396 -< %398 = fmul float %397, 0.000000e+00 -< %399 = bitcast i32 %157 to float -< %400 = fadd float %399, %398 ---- -> %388 = fmul float %386, %387 -> %389 = fadd float %388, 0.000000e+00 -> %390 = bitcast i32 %131 to float -> %391 = bitcast i32 %131 to float -> %392 = fmul float %390, %391 -> %393 = fadd float %389, %392 -> %394 = call float @llvm.sqrt.f32.26(float %393) -> %395 = fneg float %168 -> %396 = fmul float %394, %395 -> %397 = fmul float %396, 0.000000e+00 -> %398 = bitcast i32 %131 to float -> %399 = fadd float %398, %397 -> %400 = bitcast i32 %152 to float -903,914c908,919 -< %402 = bitcast i32 %152 to float -< %403 = fmul float %401, %402 -< %404 = fadd float %403, 0.000000e+00 -< %405 = bitcast i32 %157 to float -< %406 = bitcast i32 %157 to float -< %407 = fmul float %405, %406 -< %408 = fadd float %404, %407 -< %409 = call float @llvm.sqrt.f32.27(float %408) -< %410 = fneg float %169 -< %411 = fmul float %409, %410 -< %412 = bitcast i32 %152 to float -< %413 = fadd float %412, %411 ---- -> %402 = fmul float %400, %401 -> %403 = fadd float %402, 0.000000e+00 -> %404 = bitcast i32 %131 to float -> %405 = bitcast i32 %131 to float -> %406 = fmul float %404, %405 -> %407 = fadd float %403, %406 -> %408 = call float @llvm.sqrt.f32.27(float %407) -> %409 = fneg float %168 -> %410 = fmul float %408, %409 -> %411 = bitcast i32 %152 to float -> %412 = fadd float %411, %410 -> %413 = bitcast i32 %152 to float -916,929c921,934 -< %415 = bitcast i32 %152 to float -< %416 = fmul float %414, %415 -< %417 = fadd float %416, 0.000000e+00 -< %418 = bitcast i32 %157 to float -< %419 = bitcast i32 %157 to float -< %420 = fmul float %418, %419 -< %421 = fadd float %417, %420 -< %422 = call float @llvm.sqrt.f32.28(float %421) -< %423 = fneg float %169 -< %424 = fmul float %422, %423 -< %425 = bitcast i32 %152 to float -< %426 = fadd float %425, %424 -< %427 = fmul float %413, %426 -< %428 = fadd float %427, 0.000000e+00 ---- -> %415 = fmul float %413, %414 -> %416 = fadd float %415, 0.000000e+00 -> %417 = bitcast i32 %131 to float -> %418 = bitcast i32 %131 to float -> %419 = fmul float %417, %418 -> %420 = fadd float %416, %419 -> %421 = call float @llvm.sqrt.f32.28(float %420) -> %422 = fneg float %168 -> %423 = fmul float %421, %422 -> %424 = bitcast i32 %152 to float -> %425 = fadd float %424, %423 -> %426 = fmul float %412, %425 -> %427 = fadd float %426, 0.000000e+00 -> %428 = bitcast i32 %152 to float -931,943c936,948 -< %430 = bitcast i32 %152 to float -< %431 = fmul float %429, %430 -< %432 = fadd float %431, 0.000000e+00 -< %433 = bitcast i32 %157 to float -< %434 = bitcast i32 %157 to float -< %435 = fmul float %433, %434 -< %436 = fadd float %432, %435 -< %437 = call float @llvm.sqrt.f32.29(float %436) -< %438 = fneg float %169 -< %439 = fmul float %437, %438 -< %440 = fmul float %439, 0.000000e+00 -< %441 = bitcast i32 %157 to float -< %442 = fadd float %441, %440 ---- -> %430 = fmul float %428, %429 -> %431 = fadd float %430, 0.000000e+00 -> %432 = bitcast i32 %131 to float -> %433 = bitcast i32 %131 to float -> %434 = fmul float %432, %433 -> %435 = fadd float %431, %434 -> %436 = call float @llvm.sqrt.f32.29(float %435) -> %437 = fneg float %168 -> %438 = fmul float %436, %437 -> %439 = fmul float %438, 0.000000e+00 -> %440 = bitcast i32 %131 to float -> %441 = fadd float %440, %439 -> %442 = bitcast i32 %152 to float -945,964c950,969 -< %444 = bitcast i32 %152 to float -< %445 = fmul float %443, %444 -< %446 = fadd float %445, 0.000000e+00 -< %447 = bitcast i32 %157 to float -< %448 = bitcast i32 %157 to float -< %449 = fmul float %447, %448 -< %450 = fadd float %446, %449 -< %451 = call float @llvm.sqrt.f32.30(float %450) -< %452 = fneg float %169 -< %453 = fmul float %451, %452 -< %454 = fmul float %453, 0.000000e+00 -< %455 = bitcast i32 %157 to float -< %456 = fadd float %455, %454 -< %457 = fmul float %442, %456 -< %458 = fadd float %428, %457 -< %459 = call float @llvm.sqrt.f32.31(float %458) -< %460 = fadd float %459, 0.000000e+00 -< %461 = fdiv float %400, %460 -< %462 = fmul float %386, %461 -< %463 = insertelement <4 x float> %311, float %462, i32 2 ---- -> %444 = fmul float %442, %443 -> %445 = fadd float %444, 0.000000e+00 -> %446 = bitcast i32 %131 to float -> %447 = bitcast i32 %131 to float -> %448 = fmul float %446, %447 -> %449 = fadd float %445, %448 -> %450 = call float @llvm.sqrt.f32.30(float %449) -> %451 = fneg float %168 -> %452 = fmul float %450, %451 -> %453 = fmul float %452, 0.000000e+00 -> %454 = bitcast i32 %131 to float -> %455 = fadd float %454, %453 -> %456 = fmul float %441, %455 -> %457 = fadd float %427, %456 -> %458 = call float @llvm.sqrt.f32.31(float %457) -> %459 = fadd float %458, 0.000000e+00 -> %460 = fdiv float %399, %459 -> %461 = fmul float %385, %460 -> %462 = insertelement <4 x float> %310, float %461, i32 2 -> %463 = bitcast i32 %152 to float -966,978c971,983 -< %465 = bitcast i32 %152 to float -< %466 = fmul float %464, %465 -< %467 = fadd float %466, 0.000000e+00 -< %468 = bitcast i32 %157 to float -< %469 = bitcast i32 %157 to float -< %470 = fmul float %468, %469 -< %471 = fadd float %467, %470 -< %472 = call float @llvm.sqrt.f32.32(float %471) -< %473 = fneg float %169 -< %474 = fmul float %472, %473 -< %475 = fmul float %474, 0.000000e+00 -< %476 = bitcast i32 %157 to float -< %477 = fadd float %476, %475 ---- -> %465 = fmul float %463, %464 -> %466 = fadd float %465, 0.000000e+00 -> %467 = bitcast i32 %131 to float -> %468 = bitcast i32 %131 to float -> %469 = fmul float %467, %468 -> %470 = fadd float %466, %469 -> %471 = call float @llvm.sqrt.f32.32(float %470) -> %472 = fneg float %168 -> %473 = fmul float %471, %472 -> %474 = fmul float %473, 0.000000e+00 -> %475 = bitcast i32 %131 to float -> %476 = fadd float %475, %474 -> %477 = bitcast i32 %152 to float -980,991c985,996 -< %479 = bitcast i32 %152 to float -< %480 = fmul float %478, %479 -< %481 = fadd float %480, 0.000000e+00 -< %482 = bitcast i32 %157 to float -< %483 = bitcast i32 %157 to float -< %484 = fmul float %482, %483 -< %485 = fadd float %481, %484 -< %486 = call float @llvm.sqrt.f32.33(float %485) -< %487 = fneg float %169 -< %488 = fmul float %486, %487 -< %489 = bitcast i32 %152 to float -< %490 = fadd float %489, %488 ---- -> %479 = fmul float %477, %478 -> %480 = fadd float %479, 0.000000e+00 -> %481 = bitcast i32 %131 to float -> %482 = bitcast i32 %131 to float -> %483 = fmul float %481, %482 -> %484 = fadd float %480, %483 -> %485 = call float @llvm.sqrt.f32.33(float %484) -> %486 = fneg float %168 -> %487 = fmul float %485, %486 -> %488 = bitcast i32 %152 to float -> %489 = fadd float %488, %487 -> %490 = bitcast i32 %152 to float -993,1006c998,1011 -< %492 = bitcast i32 %152 to float -< %493 = fmul float %491, %492 -< %494 = fadd float %493, 0.000000e+00 -< %495 = bitcast i32 %157 to float -< %496 = bitcast i32 %157 to float -< %497 = fmul float %495, %496 -< %498 = fadd float %494, %497 -< %499 = call float @llvm.sqrt.f32.34(float %498) -< %500 = fneg float %169 -< %501 = fmul float %499, %500 -< %502 = bitcast i32 %152 to float -< %503 = fadd float %502, %501 -< %504 = fmul float %490, %503 -< %505 = fadd float %504, 0.000000e+00 ---- -> %492 = fmul float %490, %491 -> %493 = fadd float %492, 0.000000e+00 -> %494 = bitcast i32 %131 to float -> %495 = bitcast i32 %131 to float -> %496 = fmul float %494, %495 -> %497 = fadd float %493, %496 -> %498 = call float @llvm.sqrt.f32.34(float %497) -> %499 = fneg float %168 -> %500 = fmul float %498, %499 -> %501 = bitcast i32 %152 to float -> %502 = fadd float %501, %500 -> %503 = fmul float %489, %502 -> %504 = fadd float %503, 0.000000e+00 -> %505 = bitcast i32 %152 to float -1008,1020c1013,1025 -< %507 = bitcast i32 %152 to float -< %508 = fmul float %506, %507 -< %509 = fadd float %508, 0.000000e+00 -< %510 = bitcast i32 %157 to float -< %511 = bitcast i32 %157 to float -< %512 = fmul float %510, %511 -< %513 = fadd float %509, %512 -< %514 = call float @llvm.sqrt.f32.35(float %513) -< %515 = fneg float %169 -< %516 = fmul float %514, %515 -< %517 = fmul float %516, 0.000000e+00 -< %518 = bitcast i32 %157 to float -< %519 = fadd float %518, %517 ---- -> %507 = fmul float %505, %506 -> %508 = fadd float %507, 0.000000e+00 -> %509 = bitcast i32 %131 to float -> %510 = bitcast i32 %131 to float -> %511 = fmul float %509, %510 -> %512 = fadd float %508, %511 -> %513 = call float @llvm.sqrt.f32.35(float %512) -> %514 = fneg float %168 -> %515 = fmul float %513, %514 -> %516 = fmul float %515, 0.000000e+00 -> %517 = bitcast i32 %131 to float -> %518 = fadd float %517, %516 -> %519 = bitcast i32 %152 to float -1022,1040c1027,1045 -< %521 = bitcast i32 %152 to float -< %522 = fmul float %520, %521 -< %523 = fadd float %522, 0.000000e+00 -< %524 = bitcast i32 %157 to float -< %525 = bitcast i32 %157 to float -< %526 = fmul float %524, %525 -< %527 = fadd float %523, %526 -< %528 = call float @llvm.sqrt.f32.36(float %527) -< %529 = fneg float %169 -< %530 = fmul float %528, %529 -< %531 = fmul float %530, 0.000000e+00 -< %532 = bitcast i32 %157 to float -< %533 = fadd float %532, %531 -< %534 = fmul float %519, %533 -< %535 = fadd float %505, %534 -< %536 = call float @llvm.sqrt.f32.37(float %535) -< %537 = fadd float %536, 0.000000e+00 -< %538 = fdiv float %477, %537 -< %539 = fmul float %538, 2.000000e+00 ---- -> %521 = fmul float %519, %520 -> %522 = fadd float %521, 0.000000e+00 -> %523 = bitcast i32 %131 to float -> %524 = bitcast i32 %131 to float -> %525 = fmul float %523, %524 -> %526 = fadd float %522, %525 -> %527 = call float @llvm.sqrt.f32.36(float %526) -> %528 = fneg float %168 -> %529 = fmul float %527, %528 -> %530 = fmul float %529, 0.000000e+00 -> %531 = bitcast i32 %131 to float -> %532 = fadd float %531, %530 -> %533 = fmul float %518, %532 -> %534 = fadd float %504, %533 -> %535 = call float @llvm.sqrt.f32.37(float %534) -> %536 = fadd float %535, 0.000000e+00 -> %537 = fdiv float %476, %536 -> %538 = fmul float %537, 2.000000e+00 -> %539 = bitcast i32 %152 to float -1042,1053c1047,1058 -< %541 = bitcast i32 %152 to float -< %542 = fmul float %540, %541 -< %543 = fadd float %542, 0.000000e+00 -< %544 = bitcast i32 %157 to float -< %545 = bitcast i32 %157 to float -< %546 = fmul float %544, %545 -< %547 = fadd float %543, %546 -< %548 = call float @llvm.sqrt.f32.38(float %547) -< %549 = fneg float %169 -< %550 = fmul float %548, %549 -< %551 = bitcast i32 %152 to float -< %552 = fadd float %551, %550 ---- -> %541 = fmul float %539, %540 -> %542 = fadd float %541, 0.000000e+00 -> %543 = bitcast i32 %131 to float -> %544 = bitcast i32 %131 to float -> %545 = fmul float %543, %544 -> %546 = fadd float %542, %545 -> %547 = call float @llvm.sqrt.f32.38(float %546) -> %548 = fneg float %168 -> %549 = fmul float %547, %548 -> %550 = bitcast i32 %152 to float -> %551 = fadd float %550, %549 -> %552 = bitcast i32 %152 to float -1055,1066c1060,1071 -< %554 = bitcast i32 %152 to float -< %555 = fmul float %553, %554 -< %556 = fadd float %555, 0.000000e+00 -< %557 = bitcast i32 %157 to float -< %558 = bitcast i32 %157 to float -< %559 = fmul float %557, %558 -< %560 = fadd float %556, %559 -< %561 = call float @llvm.sqrt.f32.39(float %560) -< %562 = fneg float %169 -< %563 = fmul float %561, %562 -< %564 = bitcast i32 %152 to float -< %565 = fadd float %564, %563 ---- -> %554 = fmul float %552, %553 -> %555 = fadd float %554, 0.000000e+00 -> %556 = bitcast i32 %131 to float -> %557 = bitcast i32 %131 to float -> %558 = fmul float %556, %557 -> %559 = fadd float %555, %558 -> %560 = call float @llvm.sqrt.f32.39(float %559) -> %561 = fneg float %168 -> %562 = fmul float %560, %561 -> %563 = bitcast i32 %152 to float -> %564 = fadd float %563, %562 -> %565 = bitcast i32 %152 to float -1068,1081c1073,1086 -< %567 = bitcast i32 %152 to float -< %568 = fmul float %566, %567 -< %569 = fadd float %568, 0.000000e+00 -< %570 = bitcast i32 %157 to float -< %571 = bitcast i32 %157 to float -< %572 = fmul float %570, %571 -< %573 = fadd float %569, %572 -< %574 = call float @llvm.sqrt.f32.40(float %573) -< %575 = fneg float %169 -< %576 = fmul float %574, %575 -< %577 = bitcast i32 %152 to float -< %578 = fadd float %577, %576 -< %579 = fmul float %565, %578 -< %580 = fadd float %579, 0.000000e+00 ---- -> %567 = fmul float %565, %566 -> %568 = fadd float %567, 0.000000e+00 -> %569 = bitcast i32 %131 to float -> %570 = bitcast i32 %131 to float -> %571 = fmul float %569, %570 -> %572 = fadd float %568, %571 -> %573 = call float @llvm.sqrt.f32.40(float %572) -> %574 = fneg float %168 -> %575 = fmul float %573, %574 -> %576 = bitcast i32 %152 to float -> %577 = fadd float %576, %575 -> %578 = fmul float %564, %577 -> %579 = fadd float %578, 0.000000e+00 -> %580 = bitcast i32 %152 to float -1083,1095c1088,1100 -< %582 = bitcast i32 %152 to float -< %583 = fmul float %581, %582 -< %584 = fadd float %583, 0.000000e+00 -< %585 = bitcast i32 %157 to float -< %586 = bitcast i32 %157 to float -< %587 = fmul float %585, %586 -< %588 = fadd float %584, %587 -< %589 = call float @llvm.sqrt.f32.41(float %588) -< %590 = fneg float %169 -< %591 = fmul float %589, %590 -< %592 = fmul float %591, 0.000000e+00 -< %593 = bitcast i32 %157 to float -< %594 = fadd float %593, %592 ---- -> %582 = fmul float %580, %581 -> %583 = fadd float %582, 0.000000e+00 -> %584 = bitcast i32 %131 to float -> %585 = bitcast i32 %131 to float -> %586 = fmul float %584, %585 -> %587 = fadd float %583, %586 -> %588 = call float @llvm.sqrt.f32.41(float %587) -> %589 = fneg float %168 -> %590 = fmul float %588, %589 -> %591 = fmul float %590, 0.000000e+00 -> %592 = bitcast i32 %131 to float -> %593 = fadd float %592, %591 -> %594 = bitcast i32 %152 to float -1097,1117c1102,1122 -< %596 = bitcast i32 %152 to float -< %597 = fmul float %595, %596 -< %598 = fadd float %597, 0.000000e+00 -< %599 = bitcast i32 %157 to float -< %600 = bitcast i32 %157 to float -< %601 = fmul float %599, %600 -< %602 = fadd float %598, %601 -< %603 = call float @llvm.sqrt.f32.42(float %602) -< %604 = fneg float %169 -< %605 = fmul float %603, %604 -< %606 = fmul float %605, 0.000000e+00 -< %607 = bitcast i32 %157 to float -< %608 = fadd float %607, %606 -< %609 = fmul float %594, %608 -< %610 = fadd float %580, %609 -< %611 = call float @llvm.sqrt.f32.43(float %610) -< %612 = fadd float %611, 0.000000e+00 -< %613 = fdiv float %552, %612 -< %614 = fmul float %539, %613 -< %615 = insertelement <4 x float> %463, float %614, i32 3 -< %616 = fsub <4 x float> , %615 ---- -> %596 = fmul float %594, %595 -> %597 = fadd float %596, 0.000000e+00 -> %598 = bitcast i32 %131 to float -> %599 = bitcast i32 %131 to float -> %600 = fmul float %598, %599 -> %601 = fadd float %597, %600 -> %602 = call float @llvm.sqrt.f32.42(float %601) -> %603 = fneg float %168 -> %604 = fmul float %602, %603 -> %605 = fmul float %604, 0.000000e+00 -> %606 = bitcast i32 %131 to float -> %607 = fadd float %606, %605 -> %608 = fmul float %593, %607 -> %609 = fadd float %579, %608 -> %610 = call float @llvm.sqrt.f32.43(float %609) -> %611 = fadd float %610, 0.000000e+00 -> %612 = fdiv float %551, %611 -> %613 = fmul float %538, %612 -> %614 = insertelement <4 x float> %462, float %613, i32 3 -> %615 = fsub <4 x float> , %614 -> %616 = bitcast i32 %152 to float -1119,1131c1124,1136 -< %618 = bitcast i32 %152 to float -< %619 = fmul float %617, %618 -< %620 = fadd float %619, 0.000000e+00 -< %621 = bitcast i32 %157 to float -< %622 = bitcast i32 %157 to float -< %623 = fmul float %621, %622 -< %624 = fadd float %620, %623 -< %625 = call float @llvm.sqrt.f32.44(float %624) -< %626 = fneg float %169 -< %627 = fmul float %625, %626 -< %628 = fmul float %627, 0.000000e+00 -< %629 = bitcast i32 %157 to float -< %630 = fadd float %629, %628 ---- -> %618 = fmul float %616, %617 -> %619 = fadd float %618, 0.000000e+00 -> %620 = bitcast i32 %131 to float -> %621 = bitcast i32 %131 to float -> %622 = fmul float %620, %621 -> %623 = fadd float %619, %622 -> %624 = call float @llvm.sqrt.f32.44(float %623) -> %625 = fneg float %168 -> %626 = fmul float %624, %625 -> %627 = fmul float %626, 0.000000e+00 -> %628 = bitcast i32 %131 to float -> %629 = fadd float %628, %627 -> %630 = bitcast i32 %152 to float -1133,1144c1138,1149 -< %632 = bitcast i32 %152 to float -< %633 = fmul float %631, %632 -< %634 = fadd float %633, 0.000000e+00 -< %635 = bitcast i32 %157 to float -< %636 = bitcast i32 %157 to float -< %637 = fmul float %635, %636 -< %638 = fadd float %634, %637 -< %639 = call float @llvm.sqrt.f32.45(float %638) -< %640 = fneg float %169 -< %641 = fmul float %639, %640 -< %642 = bitcast i32 %152 to float -< %643 = fadd float %642, %641 ---- -> %632 = fmul float %630, %631 -> %633 = fadd float %632, 0.000000e+00 -> %634 = bitcast i32 %131 to float -> %635 = bitcast i32 %131 to float -> %636 = fmul float %634, %635 -> %637 = fadd float %633, %636 -> %638 = call float @llvm.sqrt.f32.45(float %637) -> %639 = fneg float %168 -> %640 = fmul float %638, %639 -> %641 = bitcast i32 %152 to float -> %642 = fadd float %641, %640 -> %643 = bitcast i32 %152 to float -1146,1159c1151,1164 -< %645 = bitcast i32 %152 to float -< %646 = fmul float %644, %645 -< %647 = fadd float %646, 0.000000e+00 -< %648 = bitcast i32 %157 to float -< %649 = bitcast i32 %157 to float -< %650 = fmul float %648, %649 -< %651 = fadd float %647, %650 -< %652 = call float @llvm.sqrt.f32.46(float %651) -< %653 = fneg float %169 -< %654 = fmul float %652, %653 -< %655 = bitcast i32 %152 to float -< %656 = fadd float %655, %654 -< %657 = fmul float %643, %656 -< %658 = fadd float %657, 0.000000e+00 ---- -> %645 = fmul float %643, %644 -> %646 = fadd float %645, 0.000000e+00 -> %647 = bitcast i32 %131 to float -> %648 = bitcast i32 %131 to float -> %649 = fmul float %647, %648 -> %650 = fadd float %646, %649 -> %651 = call float @llvm.sqrt.f32.46(float %650) -> %652 = fneg float %168 -> %653 = fmul float %651, %652 -> %654 = bitcast i32 %152 to float -> %655 = fadd float %654, %653 -> %656 = fmul float %642, %655 -> %657 = fadd float %656, 0.000000e+00 -> %658 = bitcast i32 %152 to float -1161,1173c1166,1178 -< %660 = bitcast i32 %152 to float -< %661 = fmul float %659, %660 -< %662 = fadd float %661, 0.000000e+00 -< %663 = bitcast i32 %157 to float -< %664 = bitcast i32 %157 to float -< %665 = fmul float %663, %664 -< %666 = fadd float %662, %665 -< %667 = call float @llvm.sqrt.f32.47(float %666) -< %668 = fneg float %169 -< %669 = fmul float %667, %668 -< %670 = fmul float %669, 0.000000e+00 -< %671 = bitcast i32 %157 to float -< %672 = fadd float %671, %670 ---- -> %660 = fmul float %658, %659 -> %661 = fadd float %660, 0.000000e+00 -> %662 = bitcast i32 %131 to float -> %663 = bitcast i32 %131 to float -> %664 = fmul float %662, %663 -> %665 = fadd float %661, %664 -> %666 = call float @llvm.sqrt.f32.47(float %665) -> %667 = fneg float %168 -> %668 = fmul float %666, %667 -> %669 = fmul float %668, 0.000000e+00 -> %670 = bitcast i32 %131 to float -> %671 = fadd float %670, %669 -> %672 = bitcast i32 %152 to float -1175,1193c1180,1198 -< %674 = bitcast i32 %152 to float -< %675 = fmul float %673, %674 -< %676 = fadd float %675, 0.000000e+00 -< %677 = bitcast i32 %157 to float -< %678 = bitcast i32 %157 to float -< %679 = fmul float %677, %678 -< %680 = fadd float %676, %679 -< %681 = call float @llvm.sqrt.f32.48(float %680) -< %682 = fneg float %169 -< %683 = fmul float %681, %682 -< %684 = fmul float %683, 0.000000e+00 -< %685 = bitcast i32 %157 to float -< %686 = fadd float %685, %684 -< %687 = fmul float %672, %686 -< %688 = fadd float %658, %687 -< %689 = call float @llvm.sqrt.f32.49(float %688) -< %690 = fadd float %689, 0.000000e+00 -< %691 = fdiv float %630, %690 -< %692 = fmul float %691, 2.000000e+00 ---- -> %674 = fmul float %672, %673 -> %675 = fadd float %674, 0.000000e+00 -> %676 = bitcast i32 %131 to float -> %677 = bitcast i32 %131 to float -> %678 = fmul float %676, %677 -> %679 = fadd float %675, %678 -> %680 = call float @llvm.sqrt.f32.48(float %679) -> %681 = fneg float %168 -> %682 = fmul float %680, %681 -> %683 = fmul float %682, 0.000000e+00 -> %684 = bitcast i32 %131 to float -> %685 = fadd float %684, %683 -> %686 = fmul float %671, %685 -> %687 = fadd float %657, %686 -> %688 = call float @llvm.sqrt.f32.49(float %687) -> %689 = fadd float %688, 0.000000e+00 -> %690 = fdiv float %629, %689 -> %691 = fmul float %690, 2.000000e+00 -> %692 = bitcast i32 %152 to float -1195,1207c1200,1212 -< %694 = bitcast i32 %152 to float -< %695 = fmul float %693, %694 -< %696 = fadd float %695, 0.000000e+00 -< %697 = bitcast i32 %157 to float -< %698 = bitcast i32 %157 to float -< %699 = fmul float %697, %698 -< %700 = fadd float %696, %699 -< %701 = call float @llvm.sqrt.f32.50(float %700) -< %702 = fneg float %169 -< %703 = fmul float %701, %702 -< %704 = fmul float %703, 0.000000e+00 -< %705 = bitcast i32 %157 to float -< %706 = fadd float %705, %704 ---- -> %694 = fmul float %692, %693 -> %695 = fadd float %694, 0.000000e+00 -> %696 = bitcast i32 %131 to float -> %697 = bitcast i32 %131 to float -> %698 = fmul float %696, %697 -> %699 = fadd float %695, %698 -> %700 = call float @llvm.sqrt.f32.50(float %699) -> %701 = fneg float %168 -> %702 = fmul float %700, %701 -> %703 = fmul float %702, 0.000000e+00 -> %704 = bitcast i32 %131 to float -> %705 = fadd float %704, %703 -> %706 = bitcast i32 %152 to float -1209,1220c1214,1225 -< %708 = bitcast i32 %152 to float -< %709 = fmul float %707, %708 -< %710 = fadd float %709, 0.000000e+00 -< %711 = bitcast i32 %157 to float -< %712 = bitcast i32 %157 to float -< %713 = fmul float %711, %712 -< %714 = fadd float %710, %713 -< %715 = call float @llvm.sqrt.f32.51(float %714) -< %716 = fneg float %169 -< %717 = fmul float %715, %716 -< %718 = bitcast i32 %152 to float -< %719 = fadd float %718, %717 ---- -> %708 = fmul float %706, %707 -> %709 = fadd float %708, 0.000000e+00 -> %710 = bitcast i32 %131 to float -> %711 = bitcast i32 %131 to float -> %712 = fmul float %710, %711 -> %713 = fadd float %709, %712 -> %714 = call float @llvm.sqrt.f32.51(float %713) -> %715 = fneg float %168 -> %716 = fmul float %714, %715 -> %717 = bitcast i32 %152 to float -> %718 = fadd float %717, %716 -> %719 = bitcast i32 %152 to float -1222,1235c1227,1240 -< %721 = bitcast i32 %152 to float -< %722 = fmul float %720, %721 -< %723 = fadd float %722, 0.000000e+00 -< %724 = bitcast i32 %157 to float -< %725 = bitcast i32 %157 to float -< %726 = fmul float %724, %725 -< %727 = fadd float %723, %726 -< %728 = call float @llvm.sqrt.f32.52(float %727) -< %729 = fneg float %169 -< %730 = fmul float %728, %729 -< %731 = bitcast i32 %152 to float -< %732 = fadd float %731, %730 -< %733 = fmul float %719, %732 -< %734 = fadd float %733, 0.000000e+00 ---- -> %721 = fmul float %719, %720 -> %722 = fadd float %721, 0.000000e+00 -> %723 = bitcast i32 %131 to float -> %724 = bitcast i32 %131 to float -> %725 = fmul float %723, %724 -> %726 = fadd float %722, %725 -> %727 = call float @llvm.sqrt.f32.52(float %726) -> %728 = fneg float %168 -> %729 = fmul float %727, %728 -> %730 = bitcast i32 %152 to float -> %731 = fadd float %730, %729 -> %732 = fmul float %718, %731 -> %733 = fadd float %732, 0.000000e+00 -> %734 = bitcast i32 %152 to float -1237,1249c1242,1254 -< %736 = bitcast i32 %152 to float -< %737 = fmul float %735, %736 -< %738 = fadd float %737, 0.000000e+00 -< %739 = bitcast i32 %157 to float -< %740 = bitcast i32 %157 to float -< %741 = fmul float %739, %740 -< %742 = fadd float %738, %741 -< %743 = call float @llvm.sqrt.f32.53(float %742) -< %744 = fneg float %169 -< %745 = fmul float %743, %744 -< %746 = fmul float %745, 0.000000e+00 -< %747 = bitcast i32 %157 to float -< %748 = fadd float %747, %746 ---- -> %736 = fmul float %734, %735 -> %737 = fadd float %736, 0.000000e+00 -> %738 = bitcast i32 %131 to float -> %739 = bitcast i32 %131 to float -> %740 = fmul float %738, %739 -> %741 = fadd float %737, %740 -> %742 = call float @llvm.sqrt.f32.53(float %741) -> %743 = fneg float %168 -> %744 = fmul float %742, %743 -> %745 = fmul float %744, 0.000000e+00 -> %746 = bitcast i32 %131 to float -> %747 = fadd float %746, %745 -> %748 = bitcast i32 %152 to float -1251,1309c1256,1314 -< %750 = bitcast i32 %152 to float -< %751 = fmul float %749, %750 -< %752 = fadd float %751, 0.000000e+00 -< %753 = bitcast i32 %157 to float -< %754 = bitcast i32 %157 to float -< %755 = fmul float %753, %754 -< %756 = fadd float %752, %755 -< %757 = call float @llvm.sqrt.f32.54(float %756) -< %758 = fneg float %169 -< %759 = fmul float %757, %758 -< %760 = fmul float %759, 0.000000e+00 -< %761 = bitcast i32 %157 to float -< %762 = fadd float %761, %760 -< %763 = fmul float %748, %762 -< %764 = fadd float %734, %763 -< %765 = call float @llvm.sqrt.f32.55(float %764) -< %766 = fadd float %765, 0.000000e+00 -< %767 = fdiv float %706, %766 -< %768 = fmul float %692, %767 -< %769 = fsub float 1.000000e+00, %768 -< %770 = insertelement <4 x float> zeroinitializer, float %769, i32 0 -< %771 = insertelement <4 x float> %770, float 0.000000e+00, i32 1 -< %772 = insertelement <4 x float> %771, float 0.000000e+00, i32 2 -< %773 = insertelement <4 x float> %772, float 0.000000e+00, i32 3 -< %774 = shufflevector <4 x float> %616, <4 x float> %773, <8 x i32> -< %775 = extractelement <8 x float> %774, i32 0 -< %776 = bitcast i32* %23 to float* -< %777 = getelementptr float, float* %2, i32 0 -< %778 = getelementptr inbounds float, float* %777, i64 3 -< %779 = bitcast float* %778 to i32* -< %780 = bitcast i32* %779 to float* -< store float %775, float* %780, align 4 -< %781 = extractelement <8 x float> %774, i32 1 -< %782 = bitcast i32* %60 to float* -< %783 = alloca [4 x float], align 16 -< %784 = bitcast [4 x float]* %783 to i32* -< %785 = bitcast i32* %784 to float* -< store float %781, float* %785, align 4 -< %786 = extractelement <8 x float> %774, i32 2 -< %787 = bitcast i32* %63 to float* -< %788 = getelementptr inbounds [4 x float], [4 x float]* %783, i64 0, i64 1 -< %789 = bitcast float* %788 to i32* -< %790 = bitcast i32* %789 to float* -< store float %786, float* %790, align 4 -< %791 = extractelement <8 x float> %774, i32 3 -< %792 = bitcast i32* %66 to float* -< %793 = getelementptr inbounds [4 x float], [4 x float]* %783, i64 0, i64 2 -< %794 = bitcast float* %793 to i32* -< %795 = bitcast i32* %794 to float* -< store float %791, float* %795, align 4 -< %796 = extractelement <8 x float> %774, i32 4 -< %797 = bitcast i32* %69 to float* -< %798 = getelementptr inbounds [4 x float], [4 x float]* %783, i64 0, i64 3 -< %799 = bitcast float* %798 to i32* -< %800 = bitcast i32* %799 to float* -< store float %796, float* %800, align 4 -< %801 = bitcast float* %1 to i8* -< %802 = bitcast [4 x float]* %783 to i8* -< call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %801, i8* nonnull align 16 dereferenceable(16) %802, i64 16, i1 false) ---- -> %750 = fmul float %748, %749 -> %751 = fadd float %750, 0.000000e+00 -> %752 = bitcast i32 %131 to float -> %753 = bitcast i32 %131 to float -> %754 = fmul float %752, %753 -> %755 = fadd float %751, %754 -> %756 = call float @llvm.sqrt.f32.54(float %755) -> %757 = fneg float %168 -> %758 = fmul float %756, %757 -> %759 = fmul float %758, 0.000000e+00 -> %760 = bitcast i32 %131 to float -> %761 = fadd float %760, %759 -> %762 = fmul float %747, %761 -> %763 = fadd float %733, %762 -> %764 = call float @llvm.sqrt.f32.55(float %763) -> %765 = fadd float %764, 0.000000e+00 -> %766 = fdiv float %705, %765 -> %767 = fmul float %691, %766 -> %768 = fsub float 1.000000e+00, %767 -> %769 = insertelement <4 x float> zeroinitializer, float %768, i32 0 -> %770 = insertelement <4 x float> %769, float 0.000000e+00, i32 1 -> %771 = insertelement <4 x float> %770, float 0.000000e+00, i32 2 -> %772 = insertelement <4 x float> %771, float 0.000000e+00, i32 3 -> %773 = shufflevector <4 x float> %615, <4 x float> %772, <8 x i32> -> %774 = extractelement <8 x float> %773, i32 0 -> %775 = bitcast i32* %23 to float* -> %776 = getelementptr float, float* %2, i32 0 -> %777 = getelementptr inbounds float, float* %776, i64 3 -> %778 = bitcast float* %777 to i32* -> %779 = bitcast i32* %778 to float* -> store float %774, float* %779, align 4 -> %780 = extractelement <8 x float> %773, i32 1 -> %781 = bitcast i32* %60 to float* -> %782 = alloca [4 x float], align 16 -> %783 = bitcast [4 x float]* %782 to i32* -> %784 = bitcast i32* %783 to float* -> store float %780, float* %784, align 4 -> %785 = extractelement <8 x float> %773, i32 2 -> %786 = bitcast i32* %63 to float* -> %787 = getelementptr inbounds [4 x float], [4 x float]* %782, i64 0, i64 1 -> %788 = bitcast float* %787 to i32* -> %789 = bitcast i32* %788 to float* -> store float %785, float* %789, align 4 -> %790 = extractelement <8 x float> %773, i32 3 -> %791 = bitcast i32* %66 to float* -> %792 = getelementptr inbounds [4 x float], [4 x float]* %782, i64 0, i64 2 -> %793 = bitcast float* %792 to i32* -> %794 = bitcast i32* %793 to float* -> store float %790, float* %794, align 4 -> %795 = extractelement <8 x float> %773, i32 4 -> %796 = bitcast i32* %69 to float* -> %797 = getelementptr inbounds [4 x float], [4 x float]* %782, i64 0, i64 3 -> %798 = bitcast float* %797 to i32* -> %799 = bitcast i32* %798 to float* -> store float %795, float* %799, align 4 -> %800 = bitcast float* %1 to i8* -> %801 = alloca [4 x float], align 16 -> %802 = bitcast [4 x float]* %801 to i8* -> call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %800, i8* nonnull align 16 dereferenceable(16) %802, i64 16, i1 false) -1311,3267c1316,3271 -< %803 = getelementptr float, float* %0, i32 0 -< %804 = bitcast float* %803 to i32* -< %805 = load i32, i32* %804, align 4 -< %806 = bitcast i32 %805 to float -< %807 = bitcast i32 %805 to float -< %808 = fmul float %806, %807 -< %809 = fadd float %808, 0.000000e+00 -< %810 = bitcast i32 %157 to float -< %811 = bitcast i32 %157 to float -< %812 = fmul float %810, %811 -< %813 = fadd float %809, %812 -< %814 = call float @llvm.sqrt.f32.56(float %813) -< %815 = sitofp i32 %168 to float -< %816 = fneg float %815 -< %817 = fmul float %814, %816 -< %818 = bitcast i32 %805 to float -< %819 = fadd float %818, %817 -< %820 = bitcast i32 %805 to float -< %821 = bitcast i32 %805 to float -< %822 = fmul float %820, %821 -< %823 = fadd float %822, 0.000000e+00 -< %824 = bitcast i32 %157 to float -< %825 = bitcast i32 %157 to float -< %826 = fmul float %824, %825 -< %827 = fadd float %823, %826 -< %828 = call float @llvm.sqrt.f32.57(float %827) -< %829 = fneg float %815 -< %830 = fmul float %828, %829 -< %831 = bitcast i32 %805 to float -< %832 = fadd float %831, %830 -< %833 = bitcast i32 %805 to float -< %834 = bitcast i32 %805 to float -< %835 = fmul float %833, %834 -< %836 = fadd float %835, 0.000000e+00 -< %837 = bitcast i32 %157 to float -< %838 = bitcast i32 %157 to float -< %839 = fmul float %837, %838 -< %840 = fadd float %836, %839 -< %841 = call float @llvm.sqrt.f32.58(float %840) -< %842 = fneg float %815 -< %843 = fmul float %841, %842 -< %844 = bitcast i32 %805 to float -< %845 = fadd float %844, %843 -< %846 = fmul float %832, %845 -< %847 = fadd float %846, 0.000000e+00 -< %848 = bitcast i32 %805 to float -< %849 = bitcast i32 %805 to float -< %850 = fmul float %848, %849 -< %851 = fadd float %850, 0.000000e+00 -< %852 = bitcast i32 %157 to float -< %853 = bitcast i32 %157 to float -< %854 = fmul float %852, %853 -< %855 = fadd float %851, %854 -< %856 = call float @llvm.sqrt.f32.59(float %855) -< %857 = fneg float %815 -< %858 = fmul float %856, %857 -< %859 = fmul float %858, 0.000000e+00 -< %860 = bitcast i32 %157 to float -< %861 = fadd float %860, %859 -< %862 = bitcast i32 %805 to float -< %863 = bitcast i32 %805 to float -< %864 = fmul float %862, %863 -< %865 = fadd float %864, 0.000000e+00 -< %866 = bitcast i32 %157 to float -< %867 = bitcast i32 %157 to float -< %868 = fmul float %866, %867 -< %869 = fadd float %865, %868 -< %870 = call float @llvm.sqrt.f32.60(float %869) -< %871 = fneg float %815 -< %872 = fmul float %870, %871 -< %873 = fmul float %872, 0.000000e+00 -< %874 = bitcast i32 %157 to float -< %875 = fadd float %874, %873 -< %876 = fmul float %861, %875 -< %877 = fadd float %847, %876 -< %878 = call float @llvm.sqrt.f32.61(float %877) -< %879 = fadd float %878, 0.000000e+00 -< %880 = fdiv float %819, %879 -< %881 = fmul float %880, 2.000000e+00 -< %882 = bitcast i32 %805 to float -< %883 = bitcast i32 %805 to float -< %884 = fmul float %882, %883 -< %885 = fadd float %884, 0.000000e+00 -< %886 = bitcast i32 %157 to float -< %887 = bitcast i32 %157 to float -< %888 = fmul float %886, %887 -< %889 = fadd float %885, %888 -< %890 = call float @llvm.sqrt.f32.62(float %889) -< %891 = fneg float %815 -< %892 = fmul float %890, %891 -< %893 = bitcast i32 %805 to float -< %894 = fadd float %893, %892 -< %895 = bitcast i32 %805 to float -< %896 = bitcast i32 %805 to float -< %897 = fmul float %895, %896 -< %898 = fadd float %897, 0.000000e+00 -< %899 = bitcast i32 %157 to float -< %900 = bitcast i32 %157 to float -< %901 = fmul float %899, %900 -< %902 = fadd float %898, %901 -< %903 = call float @llvm.sqrt.f32.63(float %902) -< %904 = fneg float %815 -< %905 = fmul float %903, %904 -< %906 = bitcast i32 %805 to float -< %907 = fadd float %906, %905 -< %908 = bitcast i32 %805 to float -< %909 = bitcast i32 %805 to float -< %910 = fmul float %908, %909 -< %911 = fadd float %910, 0.000000e+00 -< %912 = bitcast i32 %157 to float -< %913 = bitcast i32 %157 to float -< %914 = fmul float %912, %913 -< %915 = fadd float %911, %914 -< %916 = call float @llvm.sqrt.f32.64(float %915) -< %917 = fneg float %815 -< %918 = fmul float %916, %917 -< %919 = bitcast i32 %805 to float -< %920 = fadd float %919, %918 -< %921 = fmul float %907, %920 -< %922 = fadd float %921, 0.000000e+00 -< %923 = bitcast i32 %805 to float -< %924 = bitcast i32 %805 to float -< %925 = fmul float %923, %924 -< %926 = fadd float %925, 0.000000e+00 -< %927 = bitcast i32 %157 to float -< %928 = bitcast i32 %157 to float -< %929 = fmul float %927, %928 -< %930 = fadd float %926, %929 -< %931 = call float @llvm.sqrt.f32.65(float %930) -< %932 = fneg float %815 -< %933 = fmul float %931, %932 -< %934 = fmul float %933, 0.000000e+00 -< %935 = bitcast i32 %157 to float -< %936 = fadd float %935, %934 -< %937 = bitcast i32 %805 to float -< %938 = bitcast i32 %805 to float -< %939 = fmul float %937, %938 -< %940 = fadd float %939, 0.000000e+00 -< %941 = bitcast i32 %157 to float -< %942 = bitcast i32 %157 to float -< %943 = fmul float %941, %942 -< %944 = fadd float %940, %943 -< %945 = call float @llvm.sqrt.f32.66(float %944) -< %946 = fneg float %815 -< %947 = fmul float %945, %946 -< %948 = fmul float %947, 0.000000e+00 -< %949 = bitcast i32 %157 to float -< %950 = fadd float %949, %948 -< %951 = fmul float %936, %950 -< %952 = fadd float %922, %951 -< %953 = call float @llvm.sqrt.f32.67(float %952) -< %954 = fadd float %953, 0.000000e+00 -< %955 = fdiv float %894, %954 -< %956 = fmul float %881, %955 -< %957 = fsub float 1.000000e+00, %956 -< %958 = insertelement <4 x float> zeroinitializer, float %957, i32 0 -< %959 = insertelement <4 x float> %958, float 0.000000e+00, i32 1 -< %960 = insertelement <4 x float> %959, float 0.000000e+00, i32 2 -< %961 = insertelement <4 x float> %960, float 0.000000e+00, i32 3 -< %962 = getelementptr float, float* %0, i32 0 -< %963 = load float, float* %962, align 4 -< %964 = insertelement <4 x float> zeroinitializer, float %963, i32 0 -< %965 = insertelement <4 x float> %964, float 0.000000e+00, i32 1 -< %966 = insertelement <4 x float> %965, float 0.000000e+00, i32 2 -< %967 = insertelement <4 x float> %966, float 0.000000e+00, i32 3 -< %968 = call <4 x float> @llvm.fma.f32.68(<4 x float> %961, <4 x float> %967, <4 x float> zeroinitializer) -< %969 = extractelement <4 x float> %968, i32 0 -< store float %969, float* %2, align 4 -< %970 = load i32, i32* %804, align 4 -< %971 = bitcast i32 %970 to float -< %972 = bitcast i32 %970 to float -< %973 = fmul float %971, %972 -< %974 = fadd float %973, 0.000000e+00 -< %975 = bitcast i32 %157 to float -< %976 = bitcast i32 %157 to float -< %977 = fmul float %975, %976 -< %978 = fadd float %974, %977 -< %979 = call float @llvm.sqrt.f32.69(float %978) -< %980 = fneg float %815 -< %981 = fmul float %979, %980 -< %982 = bitcast i32 %970 to float -< %983 = fadd float %982, %981 -< %984 = bitcast i32 %970 to float -< %985 = bitcast i32 %970 to float -< %986 = fmul float %984, %985 -< %987 = fadd float %986, 0.000000e+00 -< %988 = bitcast i32 %157 to float -< %989 = bitcast i32 %157 to float -< %990 = fmul float %988, %989 -< %991 = fadd float %987, %990 -< %992 = call float @llvm.sqrt.f32.70(float %991) -< %993 = fneg float %815 -< %994 = fmul float %992, %993 -< %995 = bitcast i32 %970 to float -< %996 = fadd float %995, %994 -< %997 = bitcast i32 %970 to float -< %998 = bitcast i32 %970 to float -< %999 = fmul float %997, %998 -< %1000 = fadd float %999, 0.000000e+00 -< %1001 = bitcast i32 %157 to float -< %1002 = bitcast i32 %157 to float -< %1003 = fmul float %1001, %1002 -< %1004 = fadd float %1000, %1003 -< %1005 = call float @llvm.sqrt.f32.71(float %1004) -< %1006 = fneg float %815 -< %1007 = fmul float %1005, %1006 -< %1008 = bitcast i32 %970 to float -< %1009 = fadd float %1008, %1007 -< %1010 = fmul float %996, %1009 -< %1011 = fadd float %1010, 0.000000e+00 -< %1012 = bitcast i32 %970 to float -< %1013 = bitcast i32 %970 to float -< %1014 = fmul float %1012, %1013 -< %1015 = fadd float %1014, 0.000000e+00 -< %1016 = bitcast i32 %157 to float -< %1017 = bitcast i32 %157 to float -< %1018 = fmul float %1016, %1017 -< %1019 = fadd float %1015, %1018 -< %1020 = call float @llvm.sqrt.f32.72(float %1019) -< %1021 = fneg float %815 -< %1022 = fmul float %1020, %1021 -< %1023 = fmul float %1022, 0.000000e+00 -< %1024 = bitcast i32 %157 to float -< %1025 = fadd float %1024, %1023 -< %1026 = bitcast i32 %970 to float -< %1027 = bitcast i32 %970 to float -< %1028 = fmul float %1026, %1027 -< %1029 = fadd float %1028, 0.000000e+00 -< %1030 = bitcast i32 %157 to float -< %1031 = bitcast i32 %157 to float -< %1032 = fmul float %1030, %1031 -< %1033 = fadd float %1029, %1032 -< %1034 = call float @llvm.sqrt.f32.73(float %1033) -< %1035 = fneg float %815 -< %1036 = fmul float %1034, %1035 -< %1037 = fmul float %1036, 0.000000e+00 -< %1038 = bitcast i32 %157 to float -< %1039 = fadd float %1038, %1037 -< %1040 = fmul float %1025, %1039 -< %1041 = fadd float %1011, %1040 -< %1042 = call float @llvm.sqrt.f32.74(float %1041) -< %1043 = fadd float %1042, 0.000000e+00 -< %1044 = fdiv float %983, %1043 -< %1045 = fmul float %1044, 2.000000e+00 -< %1046 = bitcast i32 %970 to float -< %1047 = bitcast i32 %970 to float -< %1048 = fmul float %1046, %1047 -< %1049 = fadd float %1048, 0.000000e+00 -< %1050 = bitcast i32 %157 to float -< %1051 = bitcast i32 %157 to float -< %1052 = fmul float %1050, %1051 -< %1053 = fadd float %1049, %1052 -< %1054 = call float @llvm.sqrt.f32.75(float %1053) -< %1055 = fneg float %815 -< %1056 = fmul float %1054, %1055 -< %1057 = bitcast i32 %970 to float -< %1058 = fadd float %1057, %1056 -< %1059 = bitcast i32 %970 to float -< %1060 = bitcast i32 %970 to float -< %1061 = fmul float %1059, %1060 -< %1062 = fadd float %1061, 0.000000e+00 -< %1063 = bitcast i32 %157 to float -< %1064 = bitcast i32 %157 to float -< %1065 = fmul float %1063, %1064 -< %1066 = fadd float %1062, %1065 -< %1067 = call float @llvm.sqrt.f32.76(float %1066) -< %1068 = fneg float %815 -< %1069 = fmul float %1067, %1068 -< %1070 = bitcast i32 %970 to float -< %1071 = fadd float %1070, %1069 -< %1072 = bitcast i32 %970 to float -< %1073 = bitcast i32 %970 to float -< %1074 = fmul float %1072, %1073 -< %1075 = fadd float %1074, 0.000000e+00 -< %1076 = bitcast i32 %157 to float -< %1077 = bitcast i32 %157 to float -< %1078 = fmul float %1076, %1077 -< %1079 = fadd float %1075, %1078 -< %1080 = call float @llvm.sqrt.f32.77(float %1079) -< %1081 = fneg float %815 -< %1082 = fmul float %1080, %1081 -< %1083 = bitcast i32 %970 to float -< %1084 = fadd float %1083, %1082 -< %1085 = fmul float %1071, %1084 -< %1086 = fadd float %1085, 0.000000e+00 -< %1087 = bitcast i32 %970 to float -< %1088 = bitcast i32 %970 to float -< %1089 = fmul float %1087, %1088 -< %1090 = fadd float %1089, 0.000000e+00 -< %1091 = bitcast i32 %157 to float -< %1092 = bitcast i32 %157 to float -< %1093 = fmul float %1091, %1092 -< %1094 = fadd float %1090, %1093 -< %1095 = call float @llvm.sqrt.f32.78(float %1094) -< %1096 = fneg float %815 -< %1097 = fmul float %1095, %1096 -< %1098 = fmul float %1097, 0.000000e+00 -< %1099 = bitcast i32 %157 to float -< %1100 = fadd float %1099, %1098 -< %1101 = bitcast i32 %970 to float -< %1102 = bitcast i32 %970 to float -< %1103 = fmul float %1101, %1102 -< %1104 = fadd float %1103, 0.000000e+00 -< %1105 = bitcast i32 %157 to float -< %1106 = bitcast i32 %157 to float -< %1107 = fmul float %1105, %1106 -< %1108 = fadd float %1104, %1107 -< %1109 = call float @llvm.sqrt.f32.79(float %1108) -< %1110 = fneg float %815 -< %1111 = fmul float %1109, %1110 -< %1112 = fmul float %1111, 0.000000e+00 -< %1113 = bitcast i32 %157 to float -< %1114 = fadd float %1113, %1112 -< %1115 = fmul float %1100, %1114 -< %1116 = fadd float %1086, %1115 -< %1117 = call float @llvm.sqrt.f32.80(float %1116) -< %1118 = fadd float %1117, 0.000000e+00 -< %1119 = fdiv float %1058, %1118 -< %1120 = fmul float %1045, %1119 -< %1121 = fsub float 1.000000e+00, %1120 -< %1122 = fmul float %1121, %963 -< %1123 = fadd float %1122, 0.000000e+00 -< %1124 = bitcast i32 %970 to float -< %1125 = bitcast i32 %970 to float -< %1126 = fmul float %1124, %1125 -< %1127 = fadd float %1126, 0.000000e+00 -< %1128 = bitcast i32 %157 to float -< %1129 = bitcast i32 %157 to float -< %1130 = fmul float %1128, %1129 -< %1131 = fadd float %1127, %1130 -< %1132 = call float @llvm.sqrt.f32.81(float %1131) -< %1133 = fneg float %815 -< %1134 = fmul float %1132, %1133 -< %1135 = bitcast i32 %970 to float -< %1136 = fadd float %1135, %1134 -< %1137 = bitcast i32 %970 to float -< %1138 = bitcast i32 %970 to float -< %1139 = fmul float %1137, %1138 -< %1140 = fadd float %1139, 0.000000e+00 -< %1141 = bitcast i32 %157 to float -< %1142 = bitcast i32 %157 to float -< %1143 = fmul float %1141, %1142 -< %1144 = fadd float %1140, %1143 -< %1145 = call float @llvm.sqrt.f32.82(float %1144) -< %1146 = fneg float %815 -< %1147 = fmul float %1145, %1146 -< %1148 = bitcast i32 %970 to float -< %1149 = fadd float %1148, %1147 -< %1150 = bitcast i32 %970 to float -< %1151 = bitcast i32 %970 to float -< %1152 = fmul float %1150, %1151 -< %1153 = fadd float %1152, 0.000000e+00 -< %1154 = bitcast i32 %157 to float -< %1155 = bitcast i32 %157 to float -< %1156 = fmul float %1154, %1155 -< %1157 = fadd float %1153, %1156 -< %1158 = call float @llvm.sqrt.f32.83(float %1157) -< %1159 = fneg float %815 -< %1160 = fmul float %1158, %1159 -< %1161 = bitcast i32 %970 to float -< %1162 = fadd float %1161, %1160 -< %1163 = fmul float %1149, %1162 -< %1164 = fadd float %1163, 0.000000e+00 -< %1165 = bitcast i32 %970 to float -< %1166 = bitcast i32 %970 to float -< %1167 = fmul float %1165, %1166 -< %1168 = fadd float %1167, 0.000000e+00 -< %1169 = bitcast i32 %157 to float -< %1170 = bitcast i32 %157 to float -< %1171 = fmul float %1169, %1170 -< %1172 = fadd float %1168, %1171 -< %1173 = call float @llvm.sqrt.f32.84(float %1172) -< %1174 = fneg float %815 -< %1175 = fmul float %1173, %1174 -< %1176 = fmul float %1175, 0.000000e+00 -< %1177 = bitcast i32 %157 to float -< %1178 = fadd float %1177, %1176 -< %1179 = bitcast i32 %970 to float -< %1180 = bitcast i32 %970 to float -< %1181 = fmul float %1179, %1180 -< %1182 = fadd float %1181, 0.000000e+00 -< %1183 = bitcast i32 %157 to float -< %1184 = bitcast i32 %157 to float -< %1185 = fmul float %1183, %1184 -< %1186 = fadd float %1182, %1185 -< %1187 = call float @llvm.sqrt.f32.85(float %1186) -< %1188 = fneg float %815 -< %1189 = fmul float %1187, %1188 -< %1190 = fmul float %1189, 0.000000e+00 -< %1191 = bitcast i32 %157 to float -< %1192 = fadd float %1191, %1190 -< %1193 = fmul float %1178, %1192 -< %1194 = fadd float %1164, %1193 -< %1195 = call float @llvm.sqrt.f32.86(float %1194) -< %1196 = fadd float %1195, 0.000000e+00 -< %1197 = fdiv float %1136, %1196 -< %1198 = fmul float %1197, 2.000000e+00 -< %1199 = bitcast i32 %970 to float -< %1200 = bitcast i32 %970 to float -< %1201 = fmul float %1199, %1200 -< %1202 = fadd float %1201, 0.000000e+00 -< %1203 = bitcast i32 %157 to float -< %1204 = bitcast i32 %157 to float -< %1205 = fmul float %1203, %1204 -< %1206 = fadd float %1202, %1205 -< %1207 = call float @llvm.sqrt.f32.87(float %1206) -< %1208 = fneg float %815 -< %1209 = fmul float %1207, %1208 -< %1210 = fmul float %1209, 0.000000e+00 -< %1211 = bitcast i32 %157 to float -< %1212 = fadd float %1211, %1210 -< %1213 = bitcast i32 %970 to float -< %1214 = bitcast i32 %970 to float -< %1215 = fmul float %1213, %1214 -< %1216 = fadd float %1215, 0.000000e+00 -< %1217 = bitcast i32 %157 to float -< %1218 = bitcast i32 %157 to float -< %1219 = fmul float %1217, %1218 -< %1220 = fadd float %1216, %1219 -< %1221 = call float @llvm.sqrt.f32.88(float %1220) -< %1222 = fneg float %815 -< %1223 = fmul float %1221, %1222 -< %1224 = bitcast i32 %970 to float -< %1225 = fadd float %1224, %1223 -< %1226 = bitcast i32 %970 to float -< %1227 = bitcast i32 %970 to float -< %1228 = fmul float %1226, %1227 -< %1229 = fadd float %1228, 0.000000e+00 -< %1230 = bitcast i32 %157 to float -< %1231 = bitcast i32 %157 to float -< %1232 = fmul float %1230, %1231 -< %1233 = fadd float %1229, %1232 -< %1234 = call float @llvm.sqrt.f32.89(float %1233) -< %1235 = fneg float %815 -< %1236 = fmul float %1234, %1235 -< %1237 = bitcast i32 %970 to float -< %1238 = fadd float %1237, %1236 -< %1239 = fmul float %1225, %1238 -< %1240 = fadd float %1239, 0.000000e+00 -< %1241 = bitcast i32 %970 to float -< %1242 = bitcast i32 %970 to float -< %1243 = fmul float %1241, %1242 -< %1244 = fadd float %1243, 0.000000e+00 -< %1245 = bitcast i32 %157 to float -< %1246 = bitcast i32 %157 to float -< %1247 = fmul float %1245, %1246 -< %1248 = fadd float %1244, %1247 -< %1249 = call float @llvm.sqrt.f32.90(float %1248) -< %1250 = fneg float %815 -< %1251 = fmul float %1249, %1250 -< %1252 = fmul float %1251, 0.000000e+00 -< %1253 = bitcast i32 %157 to float -< %1254 = fadd float %1253, %1252 -< %1255 = bitcast i32 %970 to float -< %1256 = bitcast i32 %970 to float -< %1257 = fmul float %1255, %1256 -< %1258 = fadd float %1257, 0.000000e+00 -< %1259 = bitcast i32 %157 to float -< %1260 = bitcast i32 %157 to float -< %1261 = fmul float %1259, %1260 -< %1262 = fadd float %1258, %1261 -< %1263 = call float @llvm.sqrt.f32.91(float %1262) -< %1264 = fneg float %815 -< %1265 = fmul float %1263, %1264 -< %1266 = fmul float %1265, 0.000000e+00 -< %1267 = bitcast i32 %157 to float -< %1268 = fadd float %1267, %1266 -< %1269 = fmul float %1254, %1268 -< %1270 = fadd float %1240, %1269 -< %1271 = call float @llvm.sqrt.f32.92(float %1270) -< %1272 = fadd float %1271, 0.000000e+00 -< %1273 = fdiv float %1212, %1272 -< %1274 = fmul float %1198, %1273 -< %1275 = fneg float %1274 -< %1276 = load float, float* %129, align 4 -< %1277 = fmul float %1275, %1276 -< %1278 = fadd float %1123, %1277 -< %1279 = insertelement <4 x float> zeroinitializer, float %1278, i32 0 -< %1280 = insertelement <4 x float> %1279, float 0.000000e+00, i32 1 -< %1281 = insertelement <4 x float> %1280, float 0.000000e+00, i32 2 -< %1282 = insertelement <4 x float> %1281, float 0.000000e+00, i32 3 -< %1283 = extractelement <4 x float> %1282, i32 0 -< store float %1283, float* %2, align 4 -< %1284 = extractelement <4 x float> %1282, i32 1 -< %1285 = getelementptr float, float* %2, i32 0 -< %1286 = getelementptr inbounds float, float* %1285, i64 1 -< store float %1284, float* %1286, align 4 -< %1287 = bitcast i32 %970 to float -< %1288 = bitcast i32 %970 to float -< %1289 = fmul float %1287, %1288 -< %1290 = fadd float %1289, 0.000000e+00 -< %1291 = bitcast i32 %157 to float -< %1292 = bitcast i32 %157 to float -< %1293 = fmul float %1291, %1292 -< %1294 = fadd float %1290, %1293 -< %1295 = call float @llvm.sqrt.f32.93(float %1294) -< %1296 = fneg float %815 -< %1297 = fmul float %1295, %1296 -< %1298 = bitcast i32 %970 to float -< %1299 = fadd float %1298, %1297 -< %1300 = bitcast i32 %970 to float -< %1301 = bitcast i32 %970 to float -< %1302 = fmul float %1300, %1301 -< %1303 = fadd float %1302, 0.000000e+00 -< %1304 = bitcast i32 %157 to float -< %1305 = bitcast i32 %157 to float -< %1306 = fmul float %1304, %1305 -< %1307 = fadd float %1303, %1306 -< %1308 = call float @llvm.sqrt.f32.94(float %1307) -< %1309 = fneg float %815 -< %1310 = fmul float %1308, %1309 -< %1311 = bitcast i32 %970 to float -< %1312 = fadd float %1311, %1310 -< %1313 = bitcast i32 %970 to float -< %1314 = bitcast i32 %970 to float -< %1315 = fmul float %1313, %1314 -< %1316 = fadd float %1315, 0.000000e+00 -< %1317 = bitcast i32 %157 to float -< %1318 = bitcast i32 %157 to float -< %1319 = fmul float %1317, %1318 -< %1320 = fadd float %1316, %1319 -< %1321 = call float @llvm.sqrt.f32.95(float %1320) -< %1322 = fneg float %815 -< %1323 = fmul float %1321, %1322 -< %1324 = bitcast i32 %970 to float -< %1325 = fadd float %1324, %1323 -< %1326 = fmul float %1312, %1325 -< %1327 = fadd float %1326, 0.000000e+00 -< %1328 = bitcast i32 %970 to float -< %1329 = bitcast i32 %970 to float -< %1330 = fmul float %1328, %1329 -< %1331 = fadd float %1330, 0.000000e+00 -< %1332 = bitcast i32 %157 to float -< %1333 = bitcast i32 %157 to float -< %1334 = fmul float %1332, %1333 -< %1335 = fadd float %1331, %1334 -< %1336 = call float @llvm.sqrt.f32.96(float %1335) -< %1337 = fneg float %815 -< %1338 = fmul float %1336, %1337 -< %1339 = fmul float %1338, 0.000000e+00 -< %1340 = bitcast i32 %157 to float -< %1341 = fadd float %1340, %1339 -< %1342 = bitcast i32 %970 to float -< %1343 = bitcast i32 %970 to float -< %1344 = fmul float %1342, %1343 -< %1345 = fadd float %1344, 0.000000e+00 -< %1346 = bitcast i32 %157 to float -< %1347 = bitcast i32 %157 to float -< %1348 = fmul float %1346, %1347 -< %1349 = fadd float %1345, %1348 -< %1350 = call float @llvm.sqrt.f32.97(float %1349) -< %1351 = fneg float %815 -< %1352 = fmul float %1350, %1351 -< %1353 = fmul float %1352, 0.000000e+00 -< %1354 = bitcast i32 %157 to float -< %1355 = fadd float %1354, %1353 -< %1356 = fmul float %1341, %1355 -< %1357 = fadd float %1327, %1356 -< %1358 = call float @llvm.sqrt.f32.98(float %1357) -< %1359 = fadd float %1358, 0.000000e+00 -< %1360 = fdiv float %1299, %1359 -< %1361 = fmul float %1360, 2.000000e+00 -< %1362 = bitcast i32 %970 to float -< %1363 = bitcast i32 %970 to float -< %1364 = fmul float %1362, %1363 -< %1365 = fadd float %1364, 0.000000e+00 -< %1366 = bitcast i32 %157 to float -< %1367 = bitcast i32 %157 to float -< %1368 = fmul float %1366, %1367 -< %1369 = fadd float %1365, %1368 -< %1370 = call float @llvm.sqrt.f32.99(float %1369) -< %1371 = fneg float %815 -< %1372 = fmul float %1370, %1371 -< %1373 = bitcast i32 %970 to float -< %1374 = fadd float %1373, %1372 -< %1375 = bitcast i32 %970 to float -< %1376 = bitcast i32 %970 to float -< %1377 = fmul float %1375, %1376 -< %1378 = fadd float %1377, 0.000000e+00 -< %1379 = bitcast i32 %157 to float -< %1380 = bitcast i32 %157 to float -< %1381 = fmul float %1379, %1380 -< %1382 = fadd float %1378, %1381 -< %1383 = call float @llvm.sqrt.f32.100(float %1382) -< %1384 = fneg float %815 -< %1385 = fmul float %1383, %1384 -< %1386 = bitcast i32 %970 to float -< %1387 = fadd float %1386, %1385 -< %1388 = bitcast i32 %970 to float -< %1389 = bitcast i32 %970 to float -< %1390 = fmul float %1388, %1389 -< %1391 = fadd float %1390, 0.000000e+00 -< %1392 = bitcast i32 %157 to float -< %1393 = bitcast i32 %157 to float -< %1394 = fmul float %1392, %1393 -< %1395 = fadd float %1391, %1394 -< %1396 = call float @llvm.sqrt.f32.101(float %1395) -< %1397 = fneg float %815 -< %1398 = fmul float %1396, %1397 -< %1399 = bitcast i32 %970 to float -< %1400 = fadd float %1399, %1398 -< %1401 = fmul float %1387, %1400 -< %1402 = fadd float %1401, 0.000000e+00 -< %1403 = bitcast i32 %970 to float -< %1404 = bitcast i32 %970 to float -< %1405 = fmul float %1403, %1404 -< %1406 = fadd float %1405, 0.000000e+00 -< %1407 = bitcast i32 %157 to float -< %1408 = bitcast i32 %157 to float -< %1409 = fmul float %1407, %1408 -< %1410 = fadd float %1406, %1409 -< %1411 = call float @llvm.sqrt.f32.102(float %1410) -< %1412 = fneg float %815 -< %1413 = fmul float %1411, %1412 -< %1414 = fmul float %1413, 0.000000e+00 -< %1415 = bitcast i32 %157 to float -< %1416 = fadd float %1415, %1414 -< %1417 = bitcast i32 %970 to float -< %1418 = bitcast i32 %970 to float -< %1419 = fmul float %1417, %1418 -< %1420 = fadd float %1419, 0.000000e+00 -< %1421 = bitcast i32 %157 to float -< %1422 = bitcast i32 %157 to float -< %1423 = fmul float %1421, %1422 -< %1424 = fadd float %1420, %1423 -< %1425 = call float @llvm.sqrt.f32.103(float %1424) -< %1426 = fneg float %815 -< %1427 = fmul float %1425, %1426 -< %1428 = fmul float %1427, 0.000000e+00 -< %1429 = bitcast i32 %157 to float -< %1430 = fadd float %1429, %1428 -< %1431 = fmul float %1416, %1430 -< %1432 = fadd float %1402, %1431 -< %1433 = call float @llvm.sqrt.f32.104(float %1432) -< %1434 = fadd float %1433, 0.000000e+00 -< %1435 = fdiv float %1374, %1434 -< %1436 = fmul float %1361, %1435 -< %1437 = fsub float 1.000000e+00, %1436 -< %1438 = insertelement <4 x float> zeroinitializer, float %1437, i32 0 -< %1439 = insertelement <4 x float> %1438, float 0.000000e+00, i32 1 -< %1440 = insertelement <4 x float> %1439, float 0.000000e+00, i32 2 -< %1441 = insertelement <4 x float> %1440, float 0.000000e+00, i32 3 -< %1442 = getelementptr float, float* %0, i32 0 -< %1443 = getelementptr inbounds float, float* %1442, i64 1 -< %1444 = load float, float* %1443, align 4 -< %1445 = insertelement <4 x float> zeroinitializer, float %1444, i32 0 -< %1446 = insertelement <4 x float> %1445, float 0.000000e+00, i32 1 -< %1447 = insertelement <4 x float> %1446, float 0.000000e+00, i32 2 -< %1448 = insertelement <4 x float> %1447, float 0.000000e+00, i32 3 -< %1449 = call <4 x float> @llvm.fma.f32.105(<4 x float> %1441, <4 x float> %1448, <4 x float> zeroinitializer) -< %1450 = extractelement <4 x float> %1449, i32 0 -< store float %1450, float* %1286, align 4 -< %1451 = bitcast i32 %970 to float -< %1452 = bitcast i32 %970 to float -< %1453 = fmul float %1451, %1452 -< %1454 = fadd float %1453, 0.000000e+00 -< %1455 = bitcast i32 %157 to float -< %1456 = bitcast i32 %157 to float -< %1457 = fmul float %1455, %1456 -< %1458 = fadd float %1454, %1457 -< %1459 = call float @llvm.sqrt.f32.106(float %1458) -< %1460 = fneg float %815 -< %1461 = fmul float %1459, %1460 -< %1462 = bitcast i32 %970 to float -< %1463 = fadd float %1462, %1461 -< %1464 = bitcast i32 %970 to float -< %1465 = bitcast i32 %970 to float -< %1466 = fmul float %1464, %1465 -< %1467 = fadd float %1466, 0.000000e+00 -< %1468 = bitcast i32 %157 to float -< %1469 = bitcast i32 %157 to float -< %1470 = fmul float %1468, %1469 -< %1471 = fadd float %1467, %1470 -< %1472 = call float @llvm.sqrt.f32.107(float %1471) -< %1473 = fneg float %815 -< %1474 = fmul float %1472, %1473 -< %1475 = bitcast i32 %970 to float -< %1476 = fadd float %1475, %1474 -< %1477 = bitcast i32 %970 to float -< %1478 = bitcast i32 %970 to float -< %1479 = fmul float %1477, %1478 -< %1480 = fadd float %1479, 0.000000e+00 -< %1481 = bitcast i32 %157 to float -< %1482 = bitcast i32 %157 to float -< %1483 = fmul float %1481, %1482 -< %1484 = fadd float %1480, %1483 -< %1485 = call float @llvm.sqrt.f32.108(float %1484) -< %1486 = fneg float %815 -< %1487 = fmul float %1485, %1486 -< %1488 = bitcast i32 %970 to float -< %1489 = fadd float %1488, %1487 -< %1490 = fmul float %1476, %1489 -< %1491 = fadd float %1490, 0.000000e+00 -< %1492 = bitcast i32 %970 to float -< %1493 = bitcast i32 %970 to float -< %1494 = fmul float %1492, %1493 -< %1495 = fadd float %1494, 0.000000e+00 -< %1496 = bitcast i32 %157 to float -< %1497 = bitcast i32 %157 to float -< %1498 = fmul float %1496, %1497 -< %1499 = fadd float %1495, %1498 -< %1500 = call float @llvm.sqrt.f32.109(float %1499) -< %1501 = fneg float %815 -< %1502 = fmul float %1500, %1501 -< %1503 = fmul float %1502, 0.000000e+00 -< %1504 = bitcast i32 %157 to float -< %1505 = fadd float %1504, %1503 -< %1506 = bitcast i32 %970 to float -< %1507 = bitcast i32 %970 to float -< %1508 = fmul float %1506, %1507 -< %1509 = fadd float %1508, 0.000000e+00 -< %1510 = bitcast i32 %157 to float -< %1511 = bitcast i32 %157 to float -< %1512 = fmul float %1510, %1511 -< %1513 = fadd float %1509, %1512 -< %1514 = call float @llvm.sqrt.f32.110(float %1513) -< %1515 = fneg float %815 -< %1516 = fmul float %1514, %1515 -< %1517 = fmul float %1516, 0.000000e+00 -< %1518 = bitcast i32 %157 to float -< %1519 = fadd float %1518, %1517 -< %1520 = fmul float %1505, %1519 -< %1521 = fadd float %1491, %1520 -< %1522 = call float @llvm.sqrt.f32.111(float %1521) -< %1523 = fadd float %1522, 0.000000e+00 -< %1524 = fdiv float %1463, %1523 -< %1525 = fmul float %1524, 2.000000e+00 -< %1526 = bitcast i32 %970 to float -< %1527 = bitcast i32 %970 to float -< %1528 = fmul float %1526, %1527 -< %1529 = fadd float %1528, 0.000000e+00 -< %1530 = bitcast i32 %157 to float -< %1531 = bitcast i32 %157 to float -< %1532 = fmul float %1530, %1531 -< %1533 = fadd float %1529, %1532 -< %1534 = call float @llvm.sqrt.f32.112(float %1533) -< %1535 = fneg float %815 -< %1536 = fmul float %1534, %1535 -< %1537 = bitcast i32 %970 to float -< %1538 = fadd float %1537, %1536 -< %1539 = bitcast i32 %970 to float -< %1540 = bitcast i32 %970 to float -< %1541 = fmul float %1539, %1540 -< %1542 = fadd float %1541, 0.000000e+00 -< %1543 = bitcast i32 %157 to float -< %1544 = bitcast i32 %157 to float -< %1545 = fmul float %1543, %1544 -< %1546 = fadd float %1542, %1545 -< %1547 = call float @llvm.sqrt.f32.113(float %1546) -< %1548 = fneg float %815 -< %1549 = fmul float %1547, %1548 -< %1550 = bitcast i32 %970 to float -< %1551 = fadd float %1550, %1549 -< %1552 = bitcast i32 %970 to float -< %1553 = bitcast i32 %970 to float -< %1554 = fmul float %1552, %1553 -< %1555 = fadd float %1554, 0.000000e+00 -< %1556 = bitcast i32 %157 to float -< %1557 = bitcast i32 %157 to float -< %1558 = fmul float %1556, %1557 -< %1559 = fadd float %1555, %1558 -< %1560 = call float @llvm.sqrt.f32.114(float %1559) -< %1561 = fneg float %815 -< %1562 = fmul float %1560, %1561 -< %1563 = bitcast i32 %970 to float -< %1564 = fadd float %1563, %1562 -< %1565 = fmul float %1551, %1564 -< %1566 = fadd float %1565, 0.000000e+00 -< %1567 = bitcast i32 %970 to float -< %1568 = bitcast i32 %970 to float -< %1569 = fmul float %1567, %1568 -< %1570 = fadd float %1569, 0.000000e+00 -< %1571 = bitcast i32 %157 to float -< %1572 = bitcast i32 %157 to float -< %1573 = fmul float %1571, %1572 -< %1574 = fadd float %1570, %1573 -< %1575 = call float @llvm.sqrt.f32.115(float %1574) -< %1576 = fneg float %815 -< %1577 = fmul float %1575, %1576 -< %1578 = fmul float %1577, 0.000000e+00 -< %1579 = bitcast i32 %157 to float -< %1580 = fadd float %1579, %1578 -< %1581 = bitcast i32 %970 to float -< %1582 = bitcast i32 %970 to float -< %1583 = fmul float %1581, %1582 -< %1584 = fadd float %1583, 0.000000e+00 -< %1585 = bitcast i32 %157 to float -< %1586 = bitcast i32 %157 to float -< %1587 = fmul float %1585, %1586 -< %1588 = fadd float %1584, %1587 -< %1589 = call float @llvm.sqrt.f32.116(float %1588) -< %1590 = fneg float %815 -< %1591 = fmul float %1589, %1590 -< %1592 = fmul float %1591, 0.000000e+00 -< %1593 = bitcast i32 %157 to float -< %1594 = fadd float %1593, %1592 -< %1595 = fmul float %1580, %1594 -< %1596 = fadd float %1566, %1595 -< %1597 = call float @llvm.sqrt.f32.117(float %1596) -< %1598 = fadd float %1597, 0.000000e+00 -< %1599 = fdiv float %1538, %1598 -< %1600 = fmul float %1525, %1599 -< %1601 = fsub float 1.000000e+00, %1600 -< %1602 = fmul float %1601, %1444 -< %1603 = fadd float %1602, 0.000000e+00 -< %1604 = bitcast i32 %970 to float -< %1605 = bitcast i32 %970 to float -< %1606 = fmul float %1604, %1605 -< %1607 = fadd float %1606, 0.000000e+00 -< %1608 = bitcast i32 %157 to float -< %1609 = bitcast i32 %157 to float -< %1610 = fmul float %1608, %1609 -< %1611 = fadd float %1607, %1610 -< %1612 = call float @llvm.sqrt.f32.118(float %1611) -< %1613 = fneg float %815 -< %1614 = fmul float %1612, %1613 -< %1615 = bitcast i32 %970 to float -< %1616 = fadd float %1615, %1614 -< %1617 = bitcast i32 %970 to float -< %1618 = bitcast i32 %970 to float -< %1619 = fmul float %1617, %1618 -< %1620 = fadd float %1619, 0.000000e+00 -< %1621 = bitcast i32 %157 to float -< %1622 = bitcast i32 %157 to float -< %1623 = fmul float %1621, %1622 -< %1624 = fadd float %1620, %1623 -< %1625 = call float @llvm.sqrt.f32.119(float %1624) -< %1626 = fneg float %815 -< %1627 = fmul float %1625, %1626 -< %1628 = bitcast i32 %970 to float -< %1629 = fadd float %1628, %1627 -< %1630 = bitcast i32 %970 to float -< %1631 = bitcast i32 %970 to float -< %1632 = fmul float %1630, %1631 -< %1633 = fadd float %1632, 0.000000e+00 -< %1634 = bitcast i32 %157 to float -< %1635 = bitcast i32 %157 to float -< %1636 = fmul float %1634, %1635 -< %1637 = fadd float %1633, %1636 -< %1638 = call float @llvm.sqrt.f32.120(float %1637) -< %1639 = fneg float %815 -< %1640 = fmul float %1638, %1639 -< %1641 = bitcast i32 %970 to float -< %1642 = fadd float %1641, %1640 -< %1643 = fmul float %1629, %1642 -< %1644 = fadd float %1643, 0.000000e+00 -< %1645 = bitcast i32 %970 to float -< %1646 = bitcast i32 %970 to float -< %1647 = fmul float %1645, %1646 -< %1648 = fadd float %1647, 0.000000e+00 -< %1649 = bitcast i32 %157 to float -< %1650 = bitcast i32 %157 to float -< %1651 = fmul float %1649, %1650 -< %1652 = fadd float %1648, %1651 -< %1653 = call float @llvm.sqrt.f32.121(float %1652) -< %1654 = fneg float %815 -< %1655 = fmul float %1653, %1654 -< %1656 = fmul float %1655, 0.000000e+00 -< %1657 = bitcast i32 %157 to float -< %1658 = fadd float %1657, %1656 -< %1659 = bitcast i32 %970 to float -< %1660 = bitcast i32 %970 to float -< %1661 = fmul float %1659, %1660 -< %1662 = fadd float %1661, 0.000000e+00 -< %1663 = bitcast i32 %157 to float -< %1664 = bitcast i32 %157 to float -< %1665 = fmul float %1663, %1664 -< %1666 = fadd float %1662, %1665 -< %1667 = call float @llvm.sqrt.f32.122(float %1666) -< %1668 = fneg float %815 -< %1669 = fmul float %1667, %1668 -< %1670 = fmul float %1669, 0.000000e+00 -< %1671 = bitcast i32 %157 to float -< %1672 = fadd float %1671, %1670 -< %1673 = fmul float %1658, %1672 -< %1674 = fadd float %1644, %1673 -< %1675 = call float @llvm.sqrt.f32.123(float %1674) -< %1676 = fadd float %1675, 0.000000e+00 -< %1677 = fdiv float %1616, %1676 -< %1678 = fmul float %1677, 2.000000e+00 -< %1679 = bitcast i32 %970 to float -< %1680 = bitcast i32 %970 to float -< %1681 = fmul float %1679, %1680 -< %1682 = fadd float %1681, 0.000000e+00 -< %1683 = bitcast i32 %157 to float -< %1684 = bitcast i32 %157 to float -< %1685 = fmul float %1683, %1684 -< %1686 = fadd float %1682, %1685 -< %1687 = call float @llvm.sqrt.f32.124(float %1686) -< %1688 = fneg float %815 -< %1689 = fmul float %1687, %1688 -< %1690 = fmul float %1689, 0.000000e+00 -< %1691 = bitcast i32 %157 to float -< %1692 = fadd float %1691, %1690 -< %1693 = bitcast i32 %970 to float -< %1694 = bitcast i32 %970 to float -< %1695 = fmul float %1693, %1694 -< %1696 = fadd float %1695, 0.000000e+00 -< %1697 = bitcast i32 %157 to float -< %1698 = bitcast i32 %157 to float -< %1699 = fmul float %1697, %1698 -< %1700 = fadd float %1696, %1699 -< %1701 = call float @llvm.sqrt.f32.125(float %1700) -< %1702 = fneg float %815 -< %1703 = fmul float %1701, %1702 -< %1704 = bitcast i32 %970 to float -< %1705 = fadd float %1704, %1703 -< %1706 = bitcast i32 %970 to float -< %1707 = bitcast i32 %970 to float -< %1708 = fmul float %1706, %1707 -< %1709 = fadd float %1708, 0.000000e+00 -< %1710 = bitcast i32 %157 to float -< %1711 = bitcast i32 %157 to float -< %1712 = fmul float %1710, %1711 -< %1713 = fadd float %1709, %1712 -< %1714 = call float @llvm.sqrt.f32.126(float %1713) -< %1715 = fneg float %815 -< %1716 = fmul float %1714, %1715 -< %1717 = bitcast i32 %970 to float -< %1718 = fadd float %1717, %1716 -< %1719 = fmul float %1705, %1718 -< %1720 = fadd float %1719, 0.000000e+00 -< %1721 = bitcast i32 %970 to float -< %1722 = bitcast i32 %970 to float -< %1723 = fmul float %1721, %1722 -< %1724 = fadd float %1723, 0.000000e+00 -< %1725 = bitcast i32 %157 to float -< %1726 = bitcast i32 %157 to float -< %1727 = fmul float %1725, %1726 -< %1728 = fadd float %1724, %1727 -< %1729 = call float @llvm.sqrt.f32.127(float %1728) -< %1730 = fneg float %815 -< %1731 = fmul float %1729, %1730 -< %1732 = fmul float %1731, 0.000000e+00 -< %1733 = bitcast i32 %157 to float -< %1734 = fadd float %1733, %1732 -< %1735 = bitcast i32 %970 to float -< %1736 = bitcast i32 %970 to float -< %1737 = fmul float %1735, %1736 -< %1738 = fadd float %1737, 0.000000e+00 -< %1739 = bitcast i32 %157 to float -< %1740 = bitcast i32 %157 to float -< %1741 = fmul float %1739, %1740 -< %1742 = fadd float %1738, %1741 -< %1743 = call float @llvm.sqrt.f32.128(float %1742) -< %1744 = fneg float %815 -< %1745 = fmul float %1743, %1744 -< %1746 = fmul float %1745, 0.000000e+00 -< %1747 = bitcast i32 %157 to float -< %1748 = fadd float %1747, %1746 -< %1749 = fmul float %1734, %1748 -< %1750 = fadd float %1720, %1749 -< %1751 = call float @llvm.sqrt.f32.129(float %1750) -< %1752 = fadd float %1751, 0.000000e+00 -< %1753 = fdiv float %1692, %1752 -< %1754 = fmul float %1678, %1753 -< %1755 = fneg float %1754 -< %1756 = getelementptr float, float* %0, i32 0 -< %1757 = getelementptr inbounds float, float* %1756, i64 3 -< %1758 = load float, float* %1757, align 4 -< %1759 = fmul float %1755, %1758 -< %1760 = fadd float %1603, %1759 -< %1761 = insertelement <4 x float> zeroinitializer, float %1760, i32 0 -< %1762 = insertelement <4 x float> %1761, float 0.000000e+00, i32 1 -< %1763 = insertelement <4 x float> %1762, float 0.000000e+00, i32 2 -< %1764 = insertelement <4 x float> %1763, float 0.000000e+00, i32 3 -< %1765 = extractelement <4 x float> %1764, i32 0 -< store float %1765, float* %1286, align 4 -< %1766 = extractelement <4 x float> %1764, i32 1 -< store float %1766, float* %140, align 4 -< %1767 = bitcast i32 %970 to float -< %1768 = bitcast i32 %970 to float -< %1769 = fmul float %1767, %1768 -< %1770 = fadd float %1769, 0.000000e+00 -< %1771 = bitcast i32 %157 to float -< %1772 = bitcast i32 %157 to float -< %1773 = fmul float %1771, %1772 -< %1774 = fadd float %1770, %1773 -< %1775 = call float @llvm.sqrt.f32.130(float %1774) -< %1776 = fneg float %815 -< %1777 = fmul float %1775, %1776 -< %1778 = fmul float %1777, 0.000000e+00 -< %1779 = bitcast i32 %157 to float -< %1780 = fadd float %1779, %1778 -< %1781 = bitcast i32 %970 to float -< %1782 = bitcast i32 %970 to float -< %1783 = fmul float %1781, %1782 -< %1784 = fadd float %1783, 0.000000e+00 -< %1785 = bitcast i32 %157 to float -< %1786 = bitcast i32 %157 to float -< %1787 = fmul float %1785, %1786 -< %1788 = fadd float %1784, %1787 -< %1789 = call float @llvm.sqrt.f32.131(float %1788) -< %1790 = fneg float %815 -< %1791 = fmul float %1789, %1790 -< %1792 = bitcast i32 %970 to float -< %1793 = fadd float %1792, %1791 -< %1794 = bitcast i32 %970 to float -< %1795 = bitcast i32 %970 to float -< %1796 = fmul float %1794, %1795 -< %1797 = fadd float %1796, 0.000000e+00 -< %1798 = bitcast i32 %157 to float -< %1799 = bitcast i32 %157 to float -< %1800 = fmul float %1798, %1799 -< %1801 = fadd float %1797, %1800 -< %1802 = call float @llvm.sqrt.f32.132(float %1801) -< %1803 = fneg float %815 -< %1804 = fmul float %1802, %1803 -< %1805 = bitcast i32 %970 to float -< %1806 = fadd float %1805, %1804 -< %1807 = fmul float %1793, %1806 -< %1808 = fadd float %1807, 0.000000e+00 -< %1809 = bitcast i32 %970 to float -< %1810 = bitcast i32 %970 to float -< %1811 = fmul float %1809, %1810 -< %1812 = fadd float %1811, 0.000000e+00 -< %1813 = bitcast i32 %157 to float -< %1814 = bitcast i32 %157 to float -< %1815 = fmul float %1813, %1814 -< %1816 = fadd float %1812, %1815 -< %1817 = call float @llvm.sqrt.f32.133(float %1816) -< %1818 = fneg float %815 -< %1819 = fmul float %1817, %1818 -< %1820 = fmul float %1819, 0.000000e+00 -< %1821 = bitcast i32 %157 to float -< %1822 = fadd float %1821, %1820 -< %1823 = bitcast i32 %970 to float -< %1824 = bitcast i32 %970 to float -< %1825 = fmul float %1823, %1824 -< %1826 = fadd float %1825, 0.000000e+00 -< %1827 = bitcast i32 %157 to float -< %1828 = bitcast i32 %157 to float -< %1829 = fmul float %1827, %1828 -< %1830 = fadd float %1826, %1829 -< %1831 = call float @llvm.sqrt.f32.134(float %1830) -< %1832 = fneg float %815 -< %1833 = fmul float %1831, %1832 -< %1834 = fmul float %1833, 0.000000e+00 -< %1835 = bitcast i32 %157 to float -< %1836 = fadd float %1835, %1834 -< %1837 = fmul float %1822, %1836 -< %1838 = fadd float %1808, %1837 -< %1839 = call float @llvm.sqrt.f32.135(float %1838) -< %1840 = fadd float %1839, 0.000000e+00 -< %1841 = fdiv float %1780, %1840 -< %1842 = fmul float %1841, 2.000000e+00 -< %1843 = bitcast i32 %970 to float -< %1844 = bitcast i32 %970 to float -< %1845 = fmul float %1843, %1844 -< %1846 = fadd float %1845, 0.000000e+00 -< %1847 = bitcast i32 %157 to float -< %1848 = bitcast i32 %157 to float -< %1849 = fmul float %1847, %1848 -< %1850 = fadd float %1846, %1849 -< %1851 = call float @llvm.sqrt.f32.136(float %1850) -< %1852 = fneg float %815 -< %1853 = fmul float %1851, %1852 -< %1854 = bitcast i32 %970 to float -< %1855 = fadd float %1854, %1853 -< %1856 = bitcast i32 %970 to float -< %1857 = bitcast i32 %970 to float -< %1858 = fmul float %1856, %1857 -< %1859 = fadd float %1858, 0.000000e+00 -< %1860 = bitcast i32 %157 to float -< %1861 = bitcast i32 %157 to float -< %1862 = fmul float %1860, %1861 -< %1863 = fadd float %1859, %1862 -< %1864 = call float @llvm.sqrt.f32.137(float %1863) -< %1865 = fneg float %815 -< %1866 = fmul float %1864, %1865 -< %1867 = bitcast i32 %970 to float -< %1868 = fadd float %1867, %1866 -< %1869 = bitcast i32 %970 to float -< %1870 = bitcast i32 %970 to float -< %1871 = fmul float %1869, %1870 -< %1872 = fadd float %1871, 0.000000e+00 -< %1873 = bitcast i32 %157 to float -< %1874 = bitcast i32 %157 to float -< %1875 = fmul float %1873, %1874 -< %1876 = fadd float %1872, %1875 -< %1877 = call float @llvm.sqrt.f32.138(float %1876) -< %1878 = fneg float %815 -< %1879 = fmul float %1877, %1878 -< %1880 = bitcast i32 %970 to float -< %1881 = fadd float %1880, %1879 -< %1882 = fmul float %1868, %1881 -< %1883 = fadd float %1882, 0.000000e+00 -< %1884 = bitcast i32 %970 to float -< %1885 = bitcast i32 %970 to float -< %1886 = fmul float %1884, %1885 -< %1887 = fadd float %1886, 0.000000e+00 -< %1888 = bitcast i32 %157 to float -< %1889 = bitcast i32 %157 to float -< %1890 = fmul float %1888, %1889 -< %1891 = fadd float %1887, %1890 -< %1892 = call float @llvm.sqrt.f32.139(float %1891) -< %1893 = fneg float %815 -< %1894 = fmul float %1892, %1893 -< %1895 = fmul float %1894, 0.000000e+00 -< %1896 = bitcast i32 %157 to float -< %1897 = fadd float %1896, %1895 -< %1898 = bitcast i32 %970 to float -< %1899 = bitcast i32 %970 to float -< %1900 = fmul float %1898, %1899 -< %1901 = fadd float %1900, 0.000000e+00 -< %1902 = bitcast i32 %157 to float -< %1903 = bitcast i32 %157 to float -< %1904 = fmul float %1902, %1903 -< %1905 = fadd float %1901, %1904 -< %1906 = call float @llvm.sqrt.f32.140(float %1905) -< %1907 = fneg float %815 -< %1908 = fmul float %1906, %1907 -< %1909 = fmul float %1908, 0.000000e+00 -< %1910 = bitcast i32 %157 to float -< %1911 = fadd float %1910, %1909 -< %1912 = fmul float %1897, %1911 -< %1913 = fadd float %1883, %1912 -< %1914 = call float @llvm.sqrt.f32.141(float %1913) -< %1915 = fadd float %1914, 0.000000e+00 -< %1916 = fdiv float %1855, %1915 -< %1917 = fmul float %1842, %1916 -< %1918 = fneg float %1917 -< %1919 = insertelement <4 x float> zeroinitializer, float %1918, i32 0 -< %1920 = insertelement <4 x float> %1919, float 0.000000e+00, i32 1 -< %1921 = insertelement <4 x float> %1920, float 0.000000e+00, i32 2 -< %1922 = insertelement <4 x float> %1921, float 0.000000e+00, i32 3 -< %1923 = getelementptr float, float* %0, i32 0 -< %1924 = load float, float* %1923, align 4 -< %1925 = insertelement <4 x float> zeroinitializer, float %1924, i32 0 -< %1926 = insertelement <4 x float> %1925, float 0.000000e+00, i32 1 -< %1927 = insertelement <4 x float> %1926, float 0.000000e+00, i32 2 -< %1928 = insertelement <4 x float> %1927, float 0.000000e+00, i32 3 -< %1929 = call <4 x float> @llvm.fma.f32.142(<4 x float> %1922, <4 x float> %1928, <4 x float> zeroinitializer) -< %1930 = extractelement <4 x float> %1929, i32 0 -< store float %1930, float* %140, align 4 -< %1931 = bitcast i32 %970 to float -< %1932 = bitcast i32 %970 to float -< %1933 = fmul float %1931, %1932 -< %1934 = fadd float %1933, 0.000000e+00 -< %1935 = bitcast i32 %157 to float -< %1936 = bitcast i32 %157 to float -< %1937 = fmul float %1935, %1936 -< %1938 = fadd float %1934, %1937 -< %1939 = call float @llvm.sqrt.f32.143(float %1938) -< %1940 = fneg float %815 -< %1941 = fmul float %1939, %1940 -< %1942 = fmul float %1941, 0.000000e+00 -< %1943 = bitcast i32 %157 to float -< %1944 = fadd float %1943, %1942 -< %1945 = bitcast i32 %970 to float -< %1946 = bitcast i32 %970 to float -< %1947 = fmul float %1945, %1946 -< %1948 = fadd float %1947, 0.000000e+00 -< %1949 = bitcast i32 %157 to float -< %1950 = bitcast i32 %157 to float -< %1951 = fmul float %1949, %1950 -< %1952 = fadd float %1948, %1951 -< %1953 = call float @llvm.sqrt.f32.144(float %1952) -< %1954 = fneg float %815 -< %1955 = fmul float %1953, %1954 -< %1956 = bitcast i32 %970 to float -< %1957 = fadd float %1956, %1955 -< %1958 = bitcast i32 %970 to float -< %1959 = bitcast i32 %970 to float -< %1960 = fmul float %1958, %1959 -< %1961 = fadd float %1960, 0.000000e+00 -< %1962 = bitcast i32 %157 to float -< %1963 = bitcast i32 %157 to float -< %1964 = fmul float %1962, %1963 -< %1965 = fadd float %1961, %1964 -< %1966 = call float @llvm.sqrt.f32.145(float %1965) -< %1967 = fneg float %815 -< %1968 = fmul float %1966, %1967 -< %1969 = bitcast i32 %970 to float -< %1970 = fadd float %1969, %1968 -< %1971 = fmul float %1957, %1970 -< %1972 = fadd float %1971, 0.000000e+00 -< %1973 = bitcast i32 %970 to float -< %1974 = bitcast i32 %970 to float -< %1975 = fmul float %1973, %1974 -< %1976 = fadd float %1975, 0.000000e+00 -< %1977 = bitcast i32 %157 to float -< %1978 = bitcast i32 %157 to float -< %1979 = fmul float %1977, %1978 -< %1980 = fadd float %1976, %1979 -< %1981 = call float @llvm.sqrt.f32.146(float %1980) -< %1982 = fneg float %815 -< %1983 = fmul float %1981, %1982 -< %1984 = fmul float %1983, 0.000000e+00 -< %1985 = bitcast i32 %157 to float -< %1986 = fadd float %1985, %1984 -< %1987 = bitcast i32 %970 to float -< %1988 = bitcast i32 %970 to float -< %1989 = fmul float %1987, %1988 -< %1990 = fadd float %1989, 0.000000e+00 -< %1991 = bitcast i32 %157 to float -< %1992 = bitcast i32 %157 to float -< %1993 = fmul float %1991, %1992 -< %1994 = fadd float %1990, %1993 -< %1995 = call float @llvm.sqrt.f32.147(float %1994) -< %1996 = fneg float %815 -< %1997 = fmul float %1995, %1996 -< %1998 = fmul float %1997, 0.000000e+00 -< %1999 = bitcast i32 %157 to float -< %2000 = fadd float %1999, %1998 -< %2001 = fmul float %1986, %2000 -< %2002 = fadd float %1972, %2001 -< %2003 = call float @llvm.sqrt.f32.148(float %2002) -< %2004 = fadd float %2003, 0.000000e+00 -< %2005 = fdiv float %1944, %2004 -< %2006 = fmul float %2005, 2.000000e+00 -< %2007 = bitcast i32 %970 to float -< %2008 = bitcast i32 %970 to float -< %2009 = fmul float %2007, %2008 -< %2010 = fadd float %2009, 0.000000e+00 -< %2011 = bitcast i32 %157 to float -< %2012 = bitcast i32 %157 to float -< %2013 = fmul float %2011, %2012 -< %2014 = fadd float %2010, %2013 -< %2015 = call float @llvm.sqrt.f32.149(float %2014) -< %2016 = fneg float %815 -< %2017 = fmul float %2015, %2016 -< %2018 = bitcast i32 %970 to float -< %2019 = fadd float %2018, %2017 -< %2020 = bitcast i32 %970 to float -< %2021 = bitcast i32 %970 to float -< %2022 = fmul float %2020, %2021 -< %2023 = fadd float %2022, 0.000000e+00 -< %2024 = bitcast i32 %157 to float -< %2025 = bitcast i32 %157 to float -< %2026 = fmul float %2024, %2025 -< %2027 = fadd float %2023, %2026 -< %2028 = call float @llvm.sqrt.f32.150(float %2027) -< %2029 = fneg float %815 -< %2030 = fmul float %2028, %2029 -< %2031 = bitcast i32 %970 to float -< %2032 = fadd float %2031, %2030 -< %2033 = bitcast i32 %970 to float -< %2034 = bitcast i32 %970 to float -< %2035 = fmul float %2033, %2034 -< %2036 = fadd float %2035, 0.000000e+00 -< %2037 = bitcast i32 %157 to float -< %2038 = bitcast i32 %157 to float -< %2039 = fmul float %2037, %2038 -< %2040 = fadd float %2036, %2039 -< %2041 = call float @llvm.sqrt.f32.151(float %2040) -< %2042 = fneg float %815 -< %2043 = fmul float %2041, %2042 -< %2044 = bitcast i32 %970 to float -< %2045 = fadd float %2044, %2043 -< %2046 = fmul float %2032, %2045 -< %2047 = fadd float %2046, 0.000000e+00 -< %2048 = bitcast i32 %970 to float -< %2049 = bitcast i32 %970 to float -< %2050 = fmul float %2048, %2049 -< %2051 = fadd float %2050, 0.000000e+00 -< %2052 = bitcast i32 %157 to float -< %2053 = bitcast i32 %157 to float -< %2054 = fmul float %2052, %2053 -< %2055 = fadd float %2051, %2054 -< %2056 = call float @llvm.sqrt.f32.152(float %2055) -< %2057 = fneg float %815 -< %2058 = fmul float %2056, %2057 -< %2059 = fmul float %2058, 0.000000e+00 -< %2060 = bitcast i32 %157 to float -< %2061 = fadd float %2060, %2059 -< %2062 = bitcast i32 %970 to float -< %2063 = bitcast i32 %970 to float -< %2064 = fmul float %2062, %2063 -< %2065 = fadd float %2064, 0.000000e+00 -< %2066 = bitcast i32 %157 to float -< %2067 = bitcast i32 %157 to float -< %2068 = fmul float %2066, %2067 -< %2069 = fadd float %2065, %2068 -< %2070 = call float @llvm.sqrt.f32.153(float %2069) -< %2071 = fneg float %815 -< %2072 = fmul float %2070, %2071 -< %2073 = fmul float %2072, 0.000000e+00 -< %2074 = bitcast i32 %157 to float -< %2075 = fadd float %2074, %2073 -< %2076 = fmul float %2061, %2075 -< %2077 = fadd float %2047, %2076 -< %2078 = call float @llvm.sqrt.f32.154(float %2077) -< %2079 = fadd float %2078, 0.000000e+00 -< %2080 = fdiv float %2019, %2079 -< %2081 = fmul float %2006, %2080 -< %2082 = fneg float %2081 -< %2083 = fmul float %2082, %1924 -< %2084 = fadd float %2083, 0.000000e+00 -< %2085 = bitcast i32 %970 to float -< %2086 = bitcast i32 %970 to float -< %2087 = fmul float %2085, %2086 -< %2088 = fadd float %2087, 0.000000e+00 -< %2089 = bitcast i32 %157 to float -< %2090 = bitcast i32 %157 to float -< %2091 = fmul float %2089, %2090 -< %2092 = fadd float %2088, %2091 -< %2093 = call float @llvm.sqrt.f32.155(float %2092) -< %2094 = fneg float %815 -< %2095 = fmul float %2093, %2094 -< %2096 = fmul float %2095, 0.000000e+00 -< %2097 = bitcast i32 %157 to float -< %2098 = fadd float %2097, %2096 -< %2099 = bitcast i32 %970 to float -< %2100 = bitcast i32 %970 to float -< %2101 = fmul float %2099, %2100 -< %2102 = fadd float %2101, 0.000000e+00 -< %2103 = bitcast i32 %157 to float -< %2104 = bitcast i32 %157 to float -< %2105 = fmul float %2103, %2104 -< %2106 = fadd float %2102, %2105 -< %2107 = call float @llvm.sqrt.f32.156(float %2106) -< %2108 = fneg float %815 -< %2109 = fmul float %2107, %2108 -< %2110 = bitcast i32 %970 to float -< %2111 = fadd float %2110, %2109 -< %2112 = bitcast i32 %970 to float -< %2113 = bitcast i32 %970 to float -< %2114 = fmul float %2112, %2113 -< %2115 = fadd float %2114, 0.000000e+00 -< %2116 = bitcast i32 %157 to float -< %2117 = bitcast i32 %157 to float -< %2118 = fmul float %2116, %2117 -< %2119 = fadd float %2115, %2118 -< %2120 = call float @llvm.sqrt.f32.157(float %2119) -< %2121 = fneg float %815 -< %2122 = fmul float %2120, %2121 -< %2123 = bitcast i32 %970 to float -< %2124 = fadd float %2123, %2122 -< %2125 = fmul float %2111, %2124 -< %2126 = fadd float %2125, 0.000000e+00 -< %2127 = bitcast i32 %970 to float -< %2128 = bitcast i32 %970 to float -< %2129 = fmul float %2127, %2128 -< %2130 = fadd float %2129, 0.000000e+00 -< %2131 = bitcast i32 %157 to float -< %2132 = bitcast i32 %157 to float -< %2133 = fmul float %2131, %2132 -< %2134 = fadd float %2130, %2133 -< %2135 = call float @llvm.sqrt.f32.158(float %2134) -< %2136 = fneg float %815 -< %2137 = fmul float %2135, %2136 -< %2138 = fmul float %2137, 0.000000e+00 -< %2139 = bitcast i32 %157 to float -< %2140 = fadd float %2139, %2138 -< %2141 = bitcast i32 %970 to float -< %2142 = bitcast i32 %970 to float -< %2143 = fmul float %2141, %2142 -< %2144 = fadd float %2143, 0.000000e+00 -< %2145 = bitcast i32 %157 to float -< %2146 = bitcast i32 %157 to float -< %2147 = fmul float %2145, %2146 -< %2148 = fadd float %2144, %2147 -< %2149 = call float @llvm.sqrt.f32.159(float %2148) -< %2150 = fneg float %815 -< %2151 = fmul float %2149, %2150 -< %2152 = fmul float %2151, 0.000000e+00 -< %2153 = bitcast i32 %157 to float -< %2154 = fadd float %2153, %2152 -< %2155 = fmul float %2140, %2154 -< %2156 = fadd float %2126, %2155 -< %2157 = call float @llvm.sqrt.f32.160(float %2156) -< %2158 = fadd float %2157, 0.000000e+00 -< %2159 = fdiv float %2098, %2158 -< %2160 = fmul float %2159, 2.000000e+00 -< %2161 = bitcast i32 %970 to float -< %2162 = bitcast i32 %970 to float -< %2163 = fmul float %2161, %2162 -< %2164 = fadd float %2163, 0.000000e+00 -< %2165 = bitcast i32 %157 to float -< %2166 = bitcast i32 %157 to float -< %2167 = fmul float %2165, %2166 -< %2168 = fadd float %2164, %2167 -< %2169 = call float @llvm.sqrt.f32.161(float %2168) -< %2170 = fneg float %815 -< %2171 = fmul float %2169, %2170 -< %2172 = fmul float %2171, 0.000000e+00 -< %2173 = bitcast i32 %157 to float -< %2174 = fadd float %2173, %2172 -< %2175 = bitcast i32 %970 to float -< %2176 = bitcast i32 %970 to float -< %2177 = fmul float %2175, %2176 -< %2178 = fadd float %2177, 0.000000e+00 -< %2179 = bitcast i32 %157 to float -< %2180 = bitcast i32 %157 to float -< %2181 = fmul float %2179, %2180 -< %2182 = fadd float %2178, %2181 -< %2183 = call float @llvm.sqrt.f32.162(float %2182) -< %2184 = fneg float %815 -< %2185 = fmul float %2183, %2184 -< %2186 = bitcast i32 %970 to float -< %2187 = fadd float %2186, %2185 -< %2188 = bitcast i32 %970 to float -< %2189 = bitcast i32 %970 to float -< %2190 = fmul float %2188, %2189 -< %2191 = fadd float %2190, 0.000000e+00 -< %2192 = bitcast i32 %157 to float -< %2193 = bitcast i32 %157 to float -< %2194 = fmul float %2192, %2193 -< %2195 = fadd float %2191, %2194 -< %2196 = call float @llvm.sqrt.f32.163(float %2195) -< %2197 = fneg float %815 -< %2198 = fmul float %2196, %2197 -< %2199 = bitcast i32 %970 to float -< %2200 = fadd float %2199, %2198 -< %2201 = fmul float %2187, %2200 -< %2202 = fadd float %2201, 0.000000e+00 -< %2203 = bitcast i32 %970 to float -< %2204 = bitcast i32 %970 to float -< %2205 = fmul float %2203, %2204 -< %2206 = fadd float %2205, 0.000000e+00 -< %2207 = bitcast i32 %157 to float -< %2208 = bitcast i32 %157 to float -< %2209 = fmul float %2207, %2208 -< %2210 = fadd float %2206, %2209 -< %2211 = call float @llvm.sqrt.f32.164(float %2210) -< %2212 = fneg float %815 -< %2213 = fmul float %2211, %2212 -< %2214 = fmul float %2213, 0.000000e+00 -< %2215 = bitcast i32 %157 to float -< %2216 = fadd float %2215, %2214 -< %2217 = bitcast i32 %970 to float -< %2218 = bitcast i32 %970 to float -< %2219 = fmul float %2217, %2218 -< %2220 = fadd float %2219, 0.000000e+00 -< %2221 = bitcast i32 %157 to float -< %2222 = bitcast i32 %157 to float -< %2223 = fmul float %2221, %2222 -< %2224 = fadd float %2220, %2223 -< %2225 = call float @llvm.sqrt.f32.165(float %2224) -< %2226 = fneg float %815 -< %2227 = fmul float %2225, %2226 -< %2228 = fmul float %2227, 0.000000e+00 -< %2229 = bitcast i32 %157 to float -< %2230 = fadd float %2229, %2228 -< %2231 = fmul float %2216, %2230 -< %2232 = fadd float %2202, %2231 -< %2233 = call float @llvm.sqrt.f32.166(float %2232) -< %2234 = fadd float %2233, 0.000000e+00 -< %2235 = fdiv float %2174, %2234 -< %2236 = fmul float %2160, %2235 -< %2237 = fsub float 1.000000e+00, %2236 -< %2238 = load float, float* %129, align 4 -< %2239 = fmul float %2237, %2238 -< %2240 = fadd float %2084, %2239 -< %2241 = insertelement <4 x float> zeroinitializer, float %2240, i32 0 -< %2242 = insertelement <4 x float> %2241, float 0.000000e+00, i32 1 -< %2243 = insertelement <4 x float> %2242, float 0.000000e+00, i32 2 -< %2244 = insertelement <4 x float> %2243, float 0.000000e+00, i32 3 -< %2245 = extractelement <4 x float> %2244, i32 0 -< store float %2245, float* %140, align 4 -< %2246 = extractelement <4 x float> %2244, i32 1 -< %2247 = getelementptr float, float* %2, i32 0 -< %2248 = getelementptr inbounds float, float* %2247, i64 3 -< store float %2246, float* %2248, align 4 -< %2249 = bitcast i32 %970 to float -< %2250 = bitcast i32 %970 to float -< %2251 = fmul float %2249, %2250 -< %2252 = fadd float %2251, 0.000000e+00 -< %2253 = bitcast i32 %157 to float -< %2254 = bitcast i32 %157 to float -< %2255 = fmul float %2253, %2254 -< %2256 = fadd float %2252, %2255 -< %2257 = call float @llvm.sqrt.f32.167(float %2256) -< %2258 = fneg float %815 -< %2259 = fmul float %2257, %2258 -< %2260 = fmul float %2259, 0.000000e+00 -< %2261 = bitcast i32 %157 to float -< %2262 = fadd float %2261, %2260 -< %2263 = bitcast i32 %970 to float -< %2264 = bitcast i32 %970 to float -< %2265 = fmul float %2263, %2264 -< %2266 = fadd float %2265, 0.000000e+00 -< %2267 = bitcast i32 %157 to float -< %2268 = bitcast i32 %157 to float -< %2269 = fmul float %2267, %2268 -< %2270 = fadd float %2266, %2269 -< %2271 = call float @llvm.sqrt.f32.168(float %2270) -< %2272 = fneg float %815 -< %2273 = fmul float %2271, %2272 -< %2274 = bitcast i32 %970 to float -< %2275 = fadd float %2274, %2273 -< %2276 = bitcast i32 %970 to float -< %2277 = bitcast i32 %970 to float -< %2278 = fmul float %2276, %2277 -< %2279 = fadd float %2278, 0.000000e+00 -< %2280 = bitcast i32 %157 to float -< %2281 = bitcast i32 %157 to float -< %2282 = fmul float %2280, %2281 -< %2283 = fadd float %2279, %2282 -< %2284 = call float @llvm.sqrt.f32.169(float %2283) -< %2285 = fneg float %815 -< %2286 = fmul float %2284, %2285 -< %2287 = bitcast i32 %970 to float -< %2288 = fadd float %2287, %2286 -< %2289 = fmul float %2275, %2288 -< %2290 = fadd float %2289, 0.000000e+00 -< %2291 = bitcast i32 %970 to float -< %2292 = bitcast i32 %970 to float -< %2293 = fmul float %2291, %2292 -< %2294 = fadd float %2293, 0.000000e+00 -< %2295 = bitcast i32 %157 to float -< %2296 = bitcast i32 %157 to float -< %2297 = fmul float %2295, %2296 -< %2298 = fadd float %2294, %2297 -< %2299 = call float @llvm.sqrt.f32.170(float %2298) -< %2300 = fneg float %815 -< %2301 = fmul float %2299, %2300 -< %2302 = fmul float %2301, 0.000000e+00 -< %2303 = bitcast i32 %157 to float -< %2304 = fadd float %2303, %2302 -< %2305 = bitcast i32 %970 to float -< %2306 = bitcast i32 %970 to float -< %2307 = fmul float %2305, %2306 -< %2308 = fadd float %2307, 0.000000e+00 -< %2309 = bitcast i32 %157 to float -< %2310 = bitcast i32 %157 to float -< %2311 = fmul float %2309, %2310 -< %2312 = fadd float %2308, %2311 -< %2313 = call float @llvm.sqrt.f32.171(float %2312) -< %2314 = fneg float %815 -< %2315 = fmul float %2313, %2314 -< %2316 = fmul float %2315, 0.000000e+00 -< %2317 = bitcast i32 %157 to float -< %2318 = fadd float %2317, %2316 -< %2319 = fmul float %2304, %2318 -< %2320 = fadd float %2290, %2319 -< %2321 = call float @llvm.sqrt.f32.172(float %2320) -< %2322 = fadd float %2321, 0.000000e+00 -< %2323 = fdiv float %2262, %2322 -< %2324 = fmul float %2323, 2.000000e+00 -< %2325 = bitcast i32 %970 to float -< %2326 = bitcast i32 %970 to float -< %2327 = fmul float %2325, %2326 -< %2328 = fadd float %2327, 0.000000e+00 -< %2329 = bitcast i32 %157 to float -< %2330 = bitcast i32 %157 to float -< %2331 = fmul float %2329, %2330 -< %2332 = fadd float %2328, %2331 -< %2333 = call float @llvm.sqrt.f32.173(float %2332) -< %2334 = fneg float %815 -< %2335 = fmul float %2333, %2334 -< %2336 = bitcast i32 %970 to float -< %2337 = fadd float %2336, %2335 -< %2338 = bitcast i32 %970 to float -< %2339 = bitcast i32 %970 to float -< %2340 = fmul float %2338, %2339 -< %2341 = fadd float %2340, 0.000000e+00 -< %2342 = bitcast i32 %157 to float -< %2343 = bitcast i32 %157 to float -< %2344 = fmul float %2342, %2343 -< %2345 = fadd float %2341, %2344 -< %2346 = call float @llvm.sqrt.f32.174(float %2345) -< %2347 = fneg float %815 -< %2348 = fmul float %2346, %2347 -< %2349 = bitcast i32 %970 to float -< %2350 = fadd float %2349, %2348 -< %2351 = bitcast i32 %970 to float -< %2352 = bitcast i32 %970 to float -< %2353 = fmul float %2351, %2352 -< %2354 = fadd float %2353, 0.000000e+00 -< %2355 = bitcast i32 %157 to float -< %2356 = bitcast i32 %157 to float -< %2357 = fmul float %2355, %2356 -< %2358 = fadd float %2354, %2357 -< %2359 = call float @llvm.sqrt.f32.175(float %2358) -< %2360 = fneg float %815 -< %2361 = fmul float %2359, %2360 -< %2362 = bitcast i32 %970 to float -< %2363 = fadd float %2362, %2361 -< %2364 = fmul float %2350, %2363 -< %2365 = fadd float %2364, 0.000000e+00 -< %2366 = bitcast i32 %970 to float -< %2367 = bitcast i32 %970 to float -< %2368 = fmul float %2366, %2367 -< %2369 = fadd float %2368, 0.000000e+00 -< %2370 = bitcast i32 %157 to float -< %2371 = bitcast i32 %157 to float -< %2372 = fmul float %2370, %2371 -< %2373 = fadd float %2369, %2372 -< %2374 = call float @llvm.sqrt.f32.176(float %2373) -< %2375 = fneg float %815 -< %2376 = fmul float %2374, %2375 -< %2377 = fmul float %2376, 0.000000e+00 -< %2378 = bitcast i32 %157 to float -< %2379 = fadd float %2378, %2377 -< %2380 = bitcast i32 %970 to float -< %2381 = bitcast i32 %970 to float -< %2382 = fmul float %2380, %2381 -< %2383 = fadd float %2382, 0.000000e+00 -< %2384 = bitcast i32 %157 to float -< %2385 = bitcast i32 %157 to float -< %2386 = fmul float %2384, %2385 -< %2387 = fadd float %2383, %2386 -< %2388 = call float @llvm.sqrt.f32.177(float %2387) -< %2389 = fneg float %815 -< %2390 = fmul float %2388, %2389 -< %2391 = fmul float %2390, 0.000000e+00 -< %2392 = bitcast i32 %157 to float -< %2393 = fadd float %2392, %2391 -< %2394 = fmul float %2379, %2393 -< %2395 = fadd float %2365, %2394 -< %2396 = call float @llvm.sqrt.f32.178(float %2395) -< %2397 = fadd float %2396, 0.000000e+00 -< %2398 = fdiv float %2337, %2397 -< %2399 = fmul float %2324, %2398 -< %2400 = fneg float %2399 -< %2401 = insertelement <4 x float> zeroinitializer, float %2400, i32 0 -< %2402 = insertelement <4 x float> %2401, float 0.000000e+00, i32 1 -< %2403 = insertelement <4 x float> %2402, float 0.000000e+00, i32 2 -< %2404 = insertelement <4 x float> %2403, float 0.000000e+00, i32 3 -< %2405 = load float, float* %1443, align 4 -< %2406 = insertelement <4 x float> zeroinitializer, float %2405, i32 0 -< %2407 = insertelement <4 x float> %2406, float 0.000000e+00, i32 1 -< %2408 = insertelement <4 x float> %2407, float 0.000000e+00, i32 2 -< %2409 = insertelement <4 x float> %2408, float 0.000000e+00, i32 3 -< %2410 = call <4 x float> @llvm.fma.f32.179(<4 x float> %2404, <4 x float> %2409, <4 x float> zeroinitializer) -< %2411 = extractelement <4 x float> %2410, i32 0 -< store float %2411, float* %2248, align 4 -< %2412 = bitcast i32 %970 to float -< %2413 = bitcast i32 %970 to float -< %2414 = fmul float %2412, %2413 -< %2415 = fadd float %2414, 0.000000e+00 -< %2416 = bitcast i32 %157 to float -< %2417 = bitcast i32 %157 to float -< %2418 = fmul float %2416, %2417 -< %2419 = fadd float %2415, %2418 -< %2420 = call float @llvm.sqrt.f32.180(float %2419) -< %2421 = fneg float %815 -< %2422 = fmul float %2420, %2421 -< %2423 = fmul float %2422, 0.000000e+00 -< %2424 = bitcast i32 %157 to float -< %2425 = fadd float %2424, %2423 -< %2426 = bitcast i32 %970 to float -< %2427 = bitcast i32 %970 to float -< %2428 = fmul float %2426, %2427 -< %2429 = fadd float %2428, 0.000000e+00 -< %2430 = bitcast i32 %157 to float -< %2431 = bitcast i32 %157 to float -< %2432 = fmul float %2430, %2431 -< %2433 = fadd float %2429, %2432 -< %2434 = call float @llvm.sqrt.f32.181(float %2433) -< %2435 = fneg float %815 -< %2436 = fmul float %2434, %2435 -< %2437 = bitcast i32 %970 to float -< %2438 = fadd float %2437, %2436 -< %2439 = bitcast i32 %970 to float -< %2440 = bitcast i32 %970 to float -< %2441 = fmul float %2439, %2440 -< %2442 = fadd float %2441, 0.000000e+00 -< %2443 = bitcast i32 %157 to float -< %2444 = bitcast i32 %157 to float -< %2445 = fmul float %2443, %2444 -< %2446 = fadd float %2442, %2445 -< %2447 = call float @llvm.sqrt.f32.182(float %2446) -< %2448 = fneg float %815 -< %2449 = fmul float %2447, %2448 -< %2450 = bitcast i32 %970 to float -< %2451 = fadd float %2450, %2449 -< %2452 = fmul float %2438, %2451 -< %2453 = fadd float %2452, 0.000000e+00 -< %2454 = bitcast i32 %970 to float -< %2455 = bitcast i32 %970 to float -< %2456 = fmul float %2454, %2455 -< %2457 = fadd float %2456, 0.000000e+00 -< %2458 = bitcast i32 %157 to float -< %2459 = bitcast i32 %157 to float -< %2460 = fmul float %2458, %2459 -< %2461 = fadd float %2457, %2460 -< %2462 = call float @llvm.sqrt.f32.183(float %2461) -< %2463 = fneg float %815 -< %2464 = fmul float %2462, %2463 -< %2465 = fmul float %2464, 0.000000e+00 -< %2466 = bitcast i32 %157 to float -< %2467 = fadd float %2466, %2465 -< %2468 = bitcast i32 %970 to float -< %2469 = bitcast i32 %970 to float -< %2470 = fmul float %2468, %2469 -< %2471 = fadd float %2470, 0.000000e+00 -< %2472 = bitcast i32 %157 to float -< %2473 = bitcast i32 %157 to float -< %2474 = fmul float %2472, %2473 -< %2475 = fadd float %2471, %2474 -< %2476 = call float @llvm.sqrt.f32.184(float %2475) -< %2477 = fneg float %815 -< %2478 = fmul float %2476, %2477 -< %2479 = fmul float %2478, 0.000000e+00 -< %2480 = bitcast i32 %157 to float -< %2481 = fadd float %2480, %2479 -< %2482 = fmul float %2467, %2481 -< %2483 = fadd float %2453, %2482 -< %2484 = call float @llvm.sqrt.f32.185(float %2483) -< %2485 = fadd float %2484, 0.000000e+00 -< %2486 = fdiv float %2425, %2485 -< %2487 = fmul float %2486, 2.000000e+00 -< %2488 = bitcast i32 %970 to float -< %2489 = bitcast i32 %970 to float -< %2490 = fmul float %2488, %2489 -< %2491 = fadd float %2490, 0.000000e+00 -< %2492 = bitcast i32 %157 to float -< %2493 = bitcast i32 %157 to float -< %2494 = fmul float %2492, %2493 -< %2495 = fadd float %2491, %2494 -< %2496 = call float @llvm.sqrt.f32.186(float %2495) -< %2497 = fneg float %815 -< %2498 = fmul float %2496, %2497 -< %2499 = bitcast i32 %970 to float -< %2500 = fadd float %2499, %2498 -< %2501 = bitcast i32 %970 to float -< %2502 = bitcast i32 %970 to float -< %2503 = fmul float %2501, %2502 -< %2504 = fadd float %2503, 0.000000e+00 -< %2505 = bitcast i32 %157 to float -< %2506 = bitcast i32 %157 to float -< %2507 = fmul float %2505, %2506 -< %2508 = fadd float %2504, %2507 -< %2509 = call float @llvm.sqrt.f32.187(float %2508) -< %2510 = fneg float %815 -< %2511 = fmul float %2509, %2510 -< %2512 = bitcast i32 %970 to float -< %2513 = fadd float %2512, %2511 -< %2514 = bitcast i32 %970 to float -< %2515 = bitcast i32 %970 to float -< %2516 = fmul float %2514, %2515 -< %2517 = fadd float %2516, 0.000000e+00 -< %2518 = bitcast i32 %157 to float -< %2519 = bitcast i32 %157 to float -< %2520 = fmul float %2518, %2519 -< %2521 = fadd float %2517, %2520 -< %2522 = call float @llvm.sqrt.f32.188(float %2521) -< %2523 = fneg float %815 -< %2524 = fmul float %2522, %2523 -< %2525 = bitcast i32 %970 to float -< %2526 = fadd float %2525, %2524 -< %2527 = fmul float %2513, %2526 -< %2528 = fadd float %2527, 0.000000e+00 -< %2529 = bitcast i32 %970 to float -< %2530 = bitcast i32 %970 to float -< %2531 = fmul float %2529, %2530 -< %2532 = fadd float %2531, 0.000000e+00 -< %2533 = bitcast i32 %157 to float -< %2534 = bitcast i32 %157 to float -< %2535 = fmul float %2533, %2534 -< %2536 = fadd float %2532, %2535 -< %2537 = call float @llvm.sqrt.f32.189(float %2536) -< %2538 = fneg float %815 -< %2539 = fmul float %2537, %2538 -< %2540 = fmul float %2539, 0.000000e+00 -< %2541 = bitcast i32 %157 to float -< %2542 = fadd float %2541, %2540 -< %2543 = bitcast i32 %970 to float -< %2544 = bitcast i32 %970 to float -< %2545 = fmul float %2543, %2544 -< %2546 = fadd float %2545, 0.000000e+00 -< %2547 = bitcast i32 %157 to float -< %2548 = bitcast i32 %157 to float -< %2549 = fmul float %2547, %2548 -< %2550 = fadd float %2546, %2549 -< %2551 = call float @llvm.sqrt.f32.190(float %2550) -< %2552 = fneg float %815 -< %2553 = fmul float %2551, %2552 -< %2554 = fmul float %2553, 0.000000e+00 -< %2555 = bitcast i32 %157 to float -< %2556 = fadd float %2555, %2554 -< %2557 = fmul float %2542, %2556 -< %2558 = fadd float %2528, %2557 -< %2559 = call float @llvm.sqrt.f32.191(float %2558) -< %2560 = fadd float %2559, 0.000000e+00 -< %2561 = fdiv float %2500, %2560 -< %2562 = fmul float %2487, %2561 -< %2563 = fneg float %2562 -< %2564 = fmul float %2563, %2405 -< %2565 = fadd float %2564, 0.000000e+00 -< %2566 = bitcast i32 %970 to float -< %2567 = bitcast i32 %970 to float -< %2568 = fmul float %2566, %2567 -< %2569 = fadd float %2568, 0.000000e+00 -< %2570 = bitcast i32 %157 to float -< %2571 = bitcast i32 %157 to float -< %2572 = fmul float %2570, %2571 -< %2573 = fadd float %2569, %2572 -< %2574 = call float @llvm.sqrt.f32.192(float %2573) -< %2575 = fneg float %815 -< %2576 = fmul float %2574, %2575 -< %2577 = fmul float %2576, 0.000000e+00 -< %2578 = bitcast i32 %157 to float -< %2579 = fadd float %2578, %2577 -< %2580 = bitcast i32 %970 to float -< %2581 = bitcast i32 %970 to float -< %2582 = fmul float %2580, %2581 -< %2583 = fadd float %2582, 0.000000e+00 -< %2584 = bitcast i32 %157 to float -< %2585 = bitcast i32 %157 to float -< %2586 = fmul float %2584, %2585 -< %2587 = fadd float %2583, %2586 -< %2588 = call float @llvm.sqrt.f32.193(float %2587) -< %2589 = fneg float %815 -< %2590 = fmul float %2588, %2589 -< %2591 = bitcast i32 %970 to float -< %2592 = fadd float %2591, %2590 -< %2593 = bitcast i32 %970 to float -< %2594 = bitcast i32 %970 to float -< %2595 = fmul float %2593, %2594 -< %2596 = fadd float %2595, 0.000000e+00 -< %2597 = bitcast i32 %157 to float -< %2598 = bitcast i32 %157 to float -< %2599 = fmul float %2597, %2598 -< %2600 = fadd float %2596, %2599 -< %2601 = call float @llvm.sqrt.f32.194(float %2600) -< %2602 = fneg float %815 -< %2603 = fmul float %2601, %2602 -< %2604 = bitcast i32 %970 to float -< %2605 = fadd float %2604, %2603 -< %2606 = fmul float %2592, %2605 -< %2607 = fadd float %2606, 0.000000e+00 -< %2608 = bitcast i32 %970 to float -< %2609 = bitcast i32 %970 to float -< %2610 = fmul float %2608, %2609 -< %2611 = fadd float %2610, 0.000000e+00 -< %2612 = bitcast i32 %157 to float -< %2613 = bitcast i32 %157 to float -< %2614 = fmul float %2612, %2613 -< %2615 = fadd float %2611, %2614 -< %2616 = call float @llvm.sqrt.f32.195(float %2615) -< %2617 = fneg float %815 -< %2618 = fmul float %2616, %2617 -< %2619 = fmul float %2618, 0.000000e+00 -< %2620 = bitcast i32 %157 to float -< %2621 = fadd float %2620, %2619 -< %2622 = bitcast i32 %970 to float -< %2623 = bitcast i32 %970 to float -< %2624 = fmul float %2622, %2623 -< %2625 = fadd float %2624, 0.000000e+00 -< %2626 = bitcast i32 %157 to float -< %2627 = bitcast i32 %157 to float -< %2628 = fmul float %2626, %2627 -< %2629 = fadd float %2625, %2628 -< %2630 = call float @llvm.sqrt.f32.196(float %2629) -< %2631 = fneg float %815 -< %2632 = fmul float %2630, %2631 -< %2633 = fmul float %2632, 0.000000e+00 -< %2634 = bitcast i32 %157 to float -< %2635 = fadd float %2634, %2633 -< %2636 = fmul float %2621, %2635 -< %2637 = fadd float %2607, %2636 -< %2638 = call float @llvm.sqrt.f32.197(float %2637) -< %2639 = fadd float %2638, 0.000000e+00 -< %2640 = fdiv float %2579, %2639 -< %2641 = fmul float %2640, 2.000000e+00 -< %2642 = bitcast i32 %970 to float -< %2643 = bitcast i32 %970 to float -< %2644 = fmul float %2642, %2643 -< %2645 = fadd float %2644, 0.000000e+00 -< %2646 = bitcast i32 %157 to float -< %2647 = bitcast i32 %157 to float -< %2648 = fmul float %2646, %2647 -< %2649 = fadd float %2645, %2648 -< %2650 = call float @llvm.sqrt.f32.198(float %2649) -< %2651 = fneg float %815 -< %2652 = fmul float %2650, %2651 -< %2653 = fmul float %2652, 0.000000e+00 -< %2654 = bitcast i32 %157 to float -< %2655 = fadd float %2654, %2653 -< %2656 = bitcast i32 %970 to float -< %2657 = bitcast i32 %970 to float -< %2658 = fmul float %2656, %2657 -< %2659 = fadd float %2658, 0.000000e+00 -< %2660 = bitcast i32 %157 to float -< %2661 = bitcast i32 %157 to float -< %2662 = fmul float %2660, %2661 -< %2663 = fadd float %2659, %2662 -< %2664 = call float @llvm.sqrt.f32.199(float %2663) -< %2665 = fneg float %815 -< %2666 = fmul float %2664, %2665 -< %2667 = bitcast i32 %970 to float -< %2668 = fadd float %2667, %2666 -< %2669 = bitcast i32 %970 to float -< %2670 = bitcast i32 %970 to float -< %2671 = fmul float %2669, %2670 -< %2672 = fadd float %2671, 0.000000e+00 -< %2673 = bitcast i32 %157 to float -< %2674 = bitcast i32 %157 to float -< %2675 = fmul float %2673, %2674 -< %2676 = fadd float %2672, %2675 -< %2677 = call float @llvm.sqrt.f32.200(float %2676) -< %2678 = fneg float %815 -< %2679 = fmul float %2677, %2678 -< %2680 = bitcast i32 %970 to float -< %2681 = fadd float %2680, %2679 -< %2682 = fmul float %2668, %2681 -< %2683 = fadd float %2682, 0.000000e+00 -< %2684 = bitcast i32 %970 to float -< %2685 = bitcast i32 %970 to float -< %2686 = fmul float %2684, %2685 -< %2687 = fadd float %2686, 0.000000e+00 -< %2688 = bitcast i32 %157 to float -< %2689 = bitcast i32 %157 to float -< %2690 = fmul float %2688, %2689 -< %2691 = fadd float %2687, %2690 -< %2692 = call float @llvm.sqrt.f32.201(float %2691) -< %2693 = fneg float %815 -< %2694 = fmul float %2692, %2693 -< %2695 = fmul float %2694, 0.000000e+00 -< %2696 = bitcast i32 %157 to float -< %2697 = fadd float %2696, %2695 -< %2698 = bitcast i32 %970 to float -< %2699 = bitcast i32 %970 to float -< %2700 = fmul float %2698, %2699 -< %2701 = fadd float %2700, 0.000000e+00 -< %2702 = bitcast i32 %157 to float -< %2703 = bitcast i32 %157 to float -< %2704 = fmul float %2702, %2703 -< %2705 = fadd float %2701, %2704 -< %2706 = call float @llvm.sqrt.f32.202(float %2705) -< %2707 = fneg float %815 -< %2708 = fmul float %2706, %2707 -< %2709 = fmul float %2708, 0.000000e+00 -< %2710 = bitcast i32 %157 to float -< %2711 = fadd float %2710, %2709 -< %2712 = fmul float %2697, %2711 -< %2713 = fadd float %2683, %2712 -< %2714 = call float @llvm.sqrt.f32.203(float %2713) -< %2715 = fadd float %2714, 0.000000e+00 -< %2716 = fdiv float %2655, %2715 -< %2717 = fmul float %2641, %2716 -< %2718 = fsub float 1.000000e+00, %2717 -< %2719 = load float, float* %1757, align 4 -< %2720 = fmul float %2718, %2719 -< %2721 = fadd float %2565, %2720 -< %2722 = insertelement <4 x float> zeroinitializer, float %2721, i32 0 -< %2723 = insertelement <4 x float> %2722, float 0.000000e+00, i32 1 -< %2724 = insertelement <4 x float> %2723, float 0.000000e+00, i32 2 -< %2725 = insertelement <4 x float> %2724, float 0.000000e+00, i32 3 -< %2726 = extractelement <4 x float> %2725, i32 0 -< store float %2726, float* %2248, align 4 -< %2727 = getelementptr float, float* %1, i32 0 -< %2728 = getelementptr inbounds float, float* %2727, i64 2 -< %2729 = bitcast float* %2728 to i32* -< %2730 = load i32, i32* %2729, align 4 -< %2731 = bitcast i32 %2730 to float -< %2732 = insertelement <4 x float> zeroinitializer, float %2731, i32 0 -< %2733 = getelementptr float, float* %1, i32 0 -< %2734 = getelementptr inbounds float, float* %2733, i64 1 -< %2735 = bitcast float* %2734 to i32* -< %2736 = load i32, i32* %2735, align 4 -< %2737 = bitcast i32 %2736 to float -< %2738 = insertelement <4 x float> %2732, float %2737, i32 1 -< %2739 = insertelement <4 x float> %2738, float 0.000000e+00, i32 2 -< %2740 = insertelement <4 x float> %2739, float 0.000000e+00, i32 3 -< %2741 = extractelement <4 x float> %2740, i32 0 -< %2742 = bitcast i32* %95 to float* -< %2743 = bitcast i32* %2735 to float* -< store float %2741, float* %2743, align 4 -< %2744 = extractelement <4 x float> %2740, i32 1 -< %2745 = bitcast i32* %98 to float* -< %2746 = bitcast i32* %2729 to float* -< store float %2744, float* %2746, align 4 ---- -> %803 = bitcast i32 %152 to float -> %804 = bitcast i32 %152 to float -> %805 = fmul float %803, %804 -> %806 = fadd float %805, 0.000000e+00 -> %807 = load i32, i32* %130, align 4 -> %808 = bitcast i32 %807 to float -> %809 = bitcast i32 %807 to float -> %810 = fmul float %808, %809 -> %811 = fadd float %806, %810 -> %812 = call float @llvm.sqrt.f32.56(float %811) -> %813 = sitofp i32 %167 to float -> %814 = fneg float %813 -> %815 = fmul float %812, %814 -> %816 = bitcast i32 %152 to float -> %817 = fadd float %816, %815 -> %818 = bitcast i32 %152 to float -> %819 = bitcast i32 %152 to float -> %820 = fmul float %818, %819 -> %821 = fadd float %820, 0.000000e+00 -> %822 = bitcast i32 %807 to float -> %823 = bitcast i32 %807 to float -> %824 = fmul float %822, %823 -> %825 = fadd float %821, %824 -> %826 = call float @llvm.sqrt.f32.57(float %825) -> %827 = fneg float %813 -> %828 = fmul float %826, %827 -> %829 = bitcast i32 %152 to float -> %830 = fadd float %829, %828 -> %831 = bitcast i32 %152 to float -> %832 = bitcast i32 %152 to float -> %833 = fmul float %831, %832 -> %834 = fadd float %833, 0.000000e+00 -> %835 = bitcast i32 %807 to float -> %836 = bitcast i32 %807 to float -> %837 = fmul float %835, %836 -> %838 = fadd float %834, %837 -> %839 = call float @llvm.sqrt.f32.58(float %838) -> %840 = fneg float %813 -> %841 = fmul float %839, %840 -> %842 = bitcast i32 %152 to float -> %843 = fadd float %842, %841 -> %844 = fmul float %830, %843 -> %845 = fadd float %844, 0.000000e+00 -> %846 = bitcast i32 %152 to float -> %847 = bitcast i32 %152 to float -> %848 = fmul float %846, %847 -> %849 = fadd float %848, 0.000000e+00 -> %850 = bitcast i32 %807 to float -> %851 = bitcast i32 %807 to float -> %852 = fmul float %850, %851 -> %853 = fadd float %849, %852 -> %854 = call float @llvm.sqrt.f32.59(float %853) -> %855 = fneg float %813 -> %856 = fmul float %854, %855 -> %857 = fmul float %856, 0.000000e+00 -> %858 = bitcast i32 %807 to float -> %859 = fadd float %858, %857 -> %860 = bitcast i32 %152 to float -> %861 = bitcast i32 %152 to float -> %862 = fmul float %860, %861 -> %863 = fadd float %862, 0.000000e+00 -> %864 = bitcast i32 %807 to float -> %865 = bitcast i32 %807 to float -> %866 = fmul float %864, %865 -> %867 = fadd float %863, %866 -> %868 = call float @llvm.sqrt.f32.60(float %867) -> %869 = fneg float %813 -> %870 = fmul float %868, %869 -> %871 = fmul float %870, 0.000000e+00 -> %872 = bitcast i32 %807 to float -> %873 = fadd float %872, %871 -> %874 = fmul float %859, %873 -> %875 = fadd float %845, %874 -> %876 = call float @llvm.sqrt.f32.61(float %875) -> %877 = fadd float %876, 0.000000e+00 -> %878 = fdiv float %817, %877 -> %879 = fmul float %878, 2.000000e+00 -> %880 = bitcast i32 %152 to float -> %881 = bitcast i32 %152 to float -> %882 = fmul float %880, %881 -> %883 = fadd float %882, 0.000000e+00 -> %884 = bitcast i32 %807 to float -> %885 = bitcast i32 %807 to float -> %886 = fmul float %884, %885 -> %887 = fadd float %883, %886 -> %888 = call float @llvm.sqrt.f32.62(float %887) -> %889 = fneg float %813 -> %890 = fmul float %888, %889 -> %891 = bitcast i32 %152 to float -> %892 = fadd float %891, %890 -> %893 = bitcast i32 %152 to float -> %894 = bitcast i32 %152 to float -> %895 = fmul float %893, %894 -> %896 = fadd float %895, 0.000000e+00 -> %897 = bitcast i32 %807 to float -> %898 = bitcast i32 %807 to float -> %899 = fmul float %897, %898 -> %900 = fadd float %896, %899 -> %901 = call float @llvm.sqrt.f32.63(float %900) -> %902 = fneg float %813 -> %903 = fmul float %901, %902 -> %904 = bitcast i32 %152 to float -> %905 = fadd float %904, %903 -> %906 = bitcast i32 %152 to float -> %907 = bitcast i32 %152 to float -> %908 = fmul float %906, %907 -> %909 = fadd float %908, 0.000000e+00 -> %910 = bitcast i32 %807 to float -> %911 = bitcast i32 %807 to float -> %912 = fmul float %910, %911 -> %913 = fadd float %909, %912 -> %914 = call float @llvm.sqrt.f32.64(float %913) -> %915 = fneg float %813 -> %916 = fmul float %914, %915 -> %917 = bitcast i32 %152 to float -> %918 = fadd float %917, %916 -> %919 = fmul float %905, %918 -> %920 = fadd float %919, 0.000000e+00 -> %921 = bitcast i32 %152 to float -> %922 = bitcast i32 %152 to float -> %923 = fmul float %921, %922 -> %924 = fadd float %923, 0.000000e+00 -> %925 = bitcast i32 %807 to float -> %926 = bitcast i32 %807 to float -> %927 = fmul float %925, %926 -> %928 = fadd float %924, %927 -> %929 = call float @llvm.sqrt.f32.65(float %928) -> %930 = fneg float %813 -> %931 = fmul float %929, %930 -> %932 = fmul float %931, 0.000000e+00 -> %933 = bitcast i32 %807 to float -> %934 = fadd float %933, %932 -> %935 = bitcast i32 %152 to float -> %936 = bitcast i32 %152 to float -> %937 = fmul float %935, %936 -> %938 = fadd float %937, 0.000000e+00 -> %939 = bitcast i32 %807 to float -> %940 = bitcast i32 %807 to float -> %941 = fmul float %939, %940 -> %942 = fadd float %938, %941 -> %943 = call float @llvm.sqrt.f32.66(float %942) -> %944 = fneg float %813 -> %945 = fmul float %943, %944 -> %946 = fmul float %945, 0.000000e+00 -> %947 = bitcast i32 %807 to float -> %948 = fadd float %947, %946 -> %949 = fmul float %934, %948 -> %950 = fadd float %920, %949 -> %951 = call float @llvm.sqrt.f32.67(float %950) -> %952 = fadd float %951, 0.000000e+00 -> %953 = fdiv float %892, %952 -> %954 = fmul float %879, %953 -> %955 = fsub float 1.000000e+00, %954 -> %956 = insertelement <4 x float> zeroinitializer, float %955, i32 0 -> %957 = insertelement <4 x float> %956, float 0.000000e+00, i32 1 -> %958 = insertelement <4 x float> %957, float 0.000000e+00, i32 2 -> %959 = insertelement <4 x float> %958, float 0.000000e+00, i32 3 -> %960 = getelementptr float, float* %0, i32 0 -> %961 = load float, float* %960, align 4 -> %962 = insertelement <4 x float> zeroinitializer, float %961, i32 0 -> %963 = insertelement <4 x float> %962, float 0.000000e+00, i32 1 -> %964 = insertelement <4 x float> %963, float 0.000000e+00, i32 2 -> %965 = insertelement <4 x float> %964, float 0.000000e+00, i32 3 -> %966 = call <4 x float> @llvm.fma.f32.68(<4 x float> %959, <4 x float> %965, <4 x float> zeroinitializer) -> %967 = extractelement <4 x float> %966, i32 0 -> store float %967, float* %2, align 4 -> %968 = bitcast i32 %152 to float -> %969 = bitcast i32 %152 to float -> %970 = fmul float %968, %969 -> %971 = fadd float %970, 0.000000e+00 -> %972 = bitcast i32 %807 to float -> %973 = bitcast i32 %807 to float -> %974 = fmul float %972, %973 -> %975 = fadd float %971, %974 -> %976 = call float @llvm.sqrt.f32.69(float %975) -> %977 = fneg float %813 -> %978 = fmul float %976, %977 -> %979 = bitcast i32 %152 to float -> %980 = fadd float %979, %978 -> %981 = bitcast i32 %152 to float -> %982 = bitcast i32 %152 to float -> %983 = fmul float %981, %982 -> %984 = fadd float %983, 0.000000e+00 -> %985 = bitcast i32 %807 to float -> %986 = bitcast i32 %807 to float -> %987 = fmul float %985, %986 -> %988 = fadd float %984, %987 -> %989 = call float @llvm.sqrt.f32.70(float %988) -> %990 = fneg float %813 -> %991 = fmul float %989, %990 -> %992 = bitcast i32 %152 to float -> %993 = fadd float %992, %991 -> %994 = bitcast i32 %152 to float -> %995 = bitcast i32 %152 to float -> %996 = fmul float %994, %995 -> %997 = fadd float %996, 0.000000e+00 -> %998 = bitcast i32 %807 to float -> %999 = bitcast i32 %807 to float -> %1000 = fmul float %998, %999 -> %1001 = fadd float %997, %1000 -> %1002 = call float @llvm.sqrt.f32.71(float %1001) -> %1003 = fneg float %813 -> %1004 = fmul float %1002, %1003 -> %1005 = bitcast i32 %152 to float -> %1006 = fadd float %1005, %1004 -> %1007 = fmul float %993, %1006 -> %1008 = fadd float %1007, 0.000000e+00 -> %1009 = bitcast i32 %152 to float -> %1010 = bitcast i32 %152 to float -> %1011 = fmul float %1009, %1010 -> %1012 = fadd float %1011, 0.000000e+00 -> %1013 = bitcast i32 %807 to float -> %1014 = bitcast i32 %807 to float -> %1015 = fmul float %1013, %1014 -> %1016 = fadd float %1012, %1015 -> %1017 = call float @llvm.sqrt.f32.72(float %1016) -> %1018 = fneg float %813 -> %1019 = fmul float %1017, %1018 -> %1020 = fmul float %1019, 0.000000e+00 -> %1021 = bitcast i32 %807 to float -> %1022 = fadd float %1021, %1020 -> %1023 = bitcast i32 %152 to float -> %1024 = bitcast i32 %152 to float -> %1025 = fmul float %1023, %1024 -> %1026 = fadd float %1025, 0.000000e+00 -> %1027 = bitcast i32 %807 to float -> %1028 = bitcast i32 %807 to float -> %1029 = fmul float %1027, %1028 -> %1030 = fadd float %1026, %1029 -> %1031 = call float @llvm.sqrt.f32.73(float %1030) -> %1032 = fneg float %813 -> %1033 = fmul float %1031, %1032 -> %1034 = fmul float %1033, 0.000000e+00 -> %1035 = bitcast i32 %807 to float -> %1036 = fadd float %1035, %1034 -> %1037 = fmul float %1022, %1036 -> %1038 = fadd float %1008, %1037 -> %1039 = call float @llvm.sqrt.f32.74(float %1038) -> %1040 = fadd float %1039, 0.000000e+00 -> %1041 = fdiv float %980, %1040 -> %1042 = fmul float %1041, 2.000000e+00 -> %1043 = bitcast i32 %152 to float -> %1044 = bitcast i32 %152 to float -> %1045 = fmul float %1043, %1044 -> %1046 = fadd float %1045, 0.000000e+00 -> %1047 = bitcast i32 %807 to float -> %1048 = bitcast i32 %807 to float -> %1049 = fmul float %1047, %1048 -> %1050 = fadd float %1046, %1049 -> %1051 = call float @llvm.sqrt.f32.75(float %1050) -> %1052 = fneg float %813 -> %1053 = fmul float %1051, %1052 -> %1054 = bitcast i32 %152 to float -> %1055 = fadd float %1054, %1053 -> %1056 = bitcast i32 %152 to float -> %1057 = bitcast i32 %152 to float -> %1058 = fmul float %1056, %1057 -> %1059 = fadd float %1058, 0.000000e+00 -> %1060 = bitcast i32 %807 to float -> %1061 = bitcast i32 %807 to float -> %1062 = fmul float %1060, %1061 -> %1063 = fadd float %1059, %1062 -> %1064 = call float @llvm.sqrt.f32.76(float %1063) -> %1065 = fneg float %813 -> %1066 = fmul float %1064, %1065 -> %1067 = bitcast i32 %152 to float -> %1068 = fadd float %1067, %1066 -> %1069 = bitcast i32 %152 to float -> %1070 = bitcast i32 %152 to float -> %1071 = fmul float %1069, %1070 -> %1072 = fadd float %1071, 0.000000e+00 -> %1073 = bitcast i32 %807 to float -> %1074 = bitcast i32 %807 to float -> %1075 = fmul float %1073, %1074 -> %1076 = fadd float %1072, %1075 -> %1077 = call float @llvm.sqrt.f32.77(float %1076) -> %1078 = fneg float %813 -> %1079 = fmul float %1077, %1078 -> %1080 = bitcast i32 %152 to float -> %1081 = fadd float %1080, %1079 -> %1082 = fmul float %1068, %1081 -> %1083 = fadd float %1082, 0.000000e+00 -> %1084 = bitcast i32 %152 to float -> %1085 = bitcast i32 %152 to float -> %1086 = fmul float %1084, %1085 -> %1087 = fadd float %1086, 0.000000e+00 -> %1088 = bitcast i32 %807 to float -> %1089 = bitcast i32 %807 to float -> %1090 = fmul float %1088, %1089 -> %1091 = fadd float %1087, %1090 -> %1092 = call float @llvm.sqrt.f32.78(float %1091) -> %1093 = fneg float %813 -> %1094 = fmul float %1092, %1093 -> %1095 = fmul float %1094, 0.000000e+00 -> %1096 = bitcast i32 %807 to float -> %1097 = fadd float %1096, %1095 -> %1098 = bitcast i32 %152 to float -> %1099 = bitcast i32 %152 to float -> %1100 = fmul float %1098, %1099 -> %1101 = fadd float %1100, 0.000000e+00 -> %1102 = bitcast i32 %807 to float -> %1103 = bitcast i32 %807 to float -> %1104 = fmul float %1102, %1103 -> %1105 = fadd float %1101, %1104 -> %1106 = call float @llvm.sqrt.f32.79(float %1105) -> %1107 = fneg float %813 -> %1108 = fmul float %1106, %1107 -> %1109 = fmul float %1108, 0.000000e+00 -> %1110 = bitcast i32 %807 to float -> %1111 = fadd float %1110, %1109 -> %1112 = fmul float %1097, %1111 -> %1113 = fadd float %1083, %1112 -> %1114 = call float @llvm.sqrt.f32.80(float %1113) -> %1115 = fadd float %1114, 0.000000e+00 -> %1116 = fdiv float %1055, %1115 -> %1117 = fmul float %1042, %1116 -> %1118 = fsub float 1.000000e+00, %1117 -> %1119 = fmul float %1118, %961 -> %1120 = fadd float %1119, 0.000000e+00 -> %1121 = bitcast i32 %152 to float -> %1122 = bitcast i32 %152 to float -> %1123 = fmul float %1121, %1122 -> %1124 = fadd float %1123, 0.000000e+00 -> %1125 = bitcast i32 %807 to float -> %1126 = bitcast i32 %807 to float -> %1127 = fmul float %1125, %1126 -> %1128 = fadd float %1124, %1127 -> %1129 = call float @llvm.sqrt.f32.81(float %1128) -> %1130 = fneg float %813 -> %1131 = fmul float %1129, %1130 -> %1132 = bitcast i32 %152 to float -> %1133 = fadd float %1132, %1131 -> %1134 = bitcast i32 %152 to float -> %1135 = bitcast i32 %152 to float -> %1136 = fmul float %1134, %1135 -> %1137 = fadd float %1136, 0.000000e+00 -> %1138 = bitcast i32 %807 to float -> %1139 = bitcast i32 %807 to float -> %1140 = fmul float %1138, %1139 -> %1141 = fadd float %1137, %1140 -> %1142 = call float @llvm.sqrt.f32.82(float %1141) -> %1143 = fneg float %813 -> %1144 = fmul float %1142, %1143 -> %1145 = bitcast i32 %152 to float -> %1146 = fadd float %1145, %1144 -> %1147 = bitcast i32 %152 to float -> %1148 = bitcast i32 %152 to float -> %1149 = fmul float %1147, %1148 -> %1150 = fadd float %1149, 0.000000e+00 -> %1151 = bitcast i32 %807 to float -> %1152 = bitcast i32 %807 to float -> %1153 = fmul float %1151, %1152 -> %1154 = fadd float %1150, %1153 -> %1155 = call float @llvm.sqrt.f32.83(float %1154) -> %1156 = fneg float %813 -> %1157 = fmul float %1155, %1156 -> %1158 = bitcast i32 %152 to float -> %1159 = fadd float %1158, %1157 -> %1160 = fmul float %1146, %1159 -> %1161 = fadd float %1160, 0.000000e+00 -> %1162 = bitcast i32 %152 to float -> %1163 = bitcast i32 %152 to float -> %1164 = fmul float %1162, %1163 -> %1165 = fadd float %1164, 0.000000e+00 -> %1166 = bitcast i32 %807 to float -> %1167 = bitcast i32 %807 to float -> %1168 = fmul float %1166, %1167 -> %1169 = fadd float %1165, %1168 -> %1170 = call float @llvm.sqrt.f32.84(float %1169) -> %1171 = fneg float %813 -> %1172 = fmul float %1170, %1171 -> %1173 = fmul float %1172, 0.000000e+00 -> %1174 = bitcast i32 %807 to float -> %1175 = fadd float %1174, %1173 -> %1176 = bitcast i32 %152 to float -> %1177 = bitcast i32 %152 to float -> %1178 = fmul float %1176, %1177 -> %1179 = fadd float %1178, 0.000000e+00 -> %1180 = bitcast i32 %807 to float -> %1181 = bitcast i32 %807 to float -> %1182 = fmul float %1180, %1181 -> %1183 = fadd float %1179, %1182 -> %1184 = call float @llvm.sqrt.f32.85(float %1183) -> %1185 = fneg float %813 -> %1186 = fmul float %1184, %1185 -> %1187 = fmul float %1186, 0.000000e+00 -> %1188 = bitcast i32 %807 to float -> %1189 = fadd float %1188, %1187 -> %1190 = fmul float %1175, %1189 -> %1191 = fadd float %1161, %1190 -> %1192 = call float @llvm.sqrt.f32.86(float %1191) -> %1193 = fadd float %1192, 0.000000e+00 -> %1194 = fdiv float %1133, %1193 -> %1195 = fmul float %1194, 2.000000e+00 -> %1196 = bitcast i32 %152 to float -> %1197 = bitcast i32 %152 to float -> %1198 = fmul float %1196, %1197 -> %1199 = fadd float %1198, 0.000000e+00 -> %1200 = bitcast i32 %807 to float -> %1201 = bitcast i32 %807 to float -> %1202 = fmul float %1200, %1201 -> %1203 = fadd float %1199, %1202 -> %1204 = call float @llvm.sqrt.f32.87(float %1203) -> %1205 = fneg float %813 -> %1206 = fmul float %1204, %1205 -> %1207 = fmul float %1206, 0.000000e+00 -> %1208 = bitcast i32 %807 to float -> %1209 = fadd float %1208, %1207 -> %1210 = bitcast i32 %152 to float -> %1211 = bitcast i32 %152 to float -> %1212 = fmul float %1210, %1211 -> %1213 = fadd float %1212, 0.000000e+00 -> %1214 = bitcast i32 %807 to float -> %1215 = bitcast i32 %807 to float -> %1216 = fmul float %1214, %1215 -> %1217 = fadd float %1213, %1216 -> %1218 = call float @llvm.sqrt.f32.88(float %1217) -> %1219 = fneg float %813 -> %1220 = fmul float %1218, %1219 -> %1221 = bitcast i32 %152 to float -> %1222 = fadd float %1221, %1220 -> %1223 = bitcast i32 %152 to float -> %1224 = bitcast i32 %152 to float -> %1225 = fmul float %1223, %1224 -> %1226 = fadd float %1225, 0.000000e+00 -> %1227 = bitcast i32 %807 to float -> %1228 = bitcast i32 %807 to float -> %1229 = fmul float %1227, %1228 -> %1230 = fadd float %1226, %1229 -> %1231 = call float @llvm.sqrt.f32.89(float %1230) -> %1232 = fneg float %813 -> %1233 = fmul float %1231, %1232 -> %1234 = bitcast i32 %152 to float -> %1235 = fadd float %1234, %1233 -> %1236 = fmul float %1222, %1235 -> %1237 = fadd float %1236, 0.000000e+00 -> %1238 = bitcast i32 %152 to float -> %1239 = bitcast i32 %152 to float -> %1240 = fmul float %1238, %1239 -> %1241 = fadd float %1240, 0.000000e+00 -> %1242 = bitcast i32 %807 to float -> %1243 = bitcast i32 %807 to float -> %1244 = fmul float %1242, %1243 -> %1245 = fadd float %1241, %1244 -> %1246 = call float @llvm.sqrt.f32.90(float %1245) -> %1247 = fneg float %813 -> %1248 = fmul float %1246, %1247 -> %1249 = fmul float %1248, 0.000000e+00 -> %1250 = bitcast i32 %807 to float -> %1251 = fadd float %1250, %1249 -> %1252 = bitcast i32 %152 to float -> %1253 = bitcast i32 %152 to float -> %1254 = fmul float %1252, %1253 -> %1255 = fadd float %1254, 0.000000e+00 -> %1256 = bitcast i32 %807 to float -> %1257 = bitcast i32 %807 to float -> %1258 = fmul float %1256, %1257 -> %1259 = fadd float %1255, %1258 -> %1260 = call float @llvm.sqrt.f32.91(float %1259) -> %1261 = fneg float %813 -> %1262 = fmul float %1260, %1261 -> %1263 = fmul float %1262, 0.000000e+00 -> %1264 = bitcast i32 %807 to float -> %1265 = fadd float %1264, %1263 -> %1266 = fmul float %1251, %1265 -> %1267 = fadd float %1237, %1266 -> %1268 = call float @llvm.sqrt.f32.92(float %1267) -> %1269 = fadd float %1268, 0.000000e+00 -> %1270 = fdiv float %1209, %1269 -> %1271 = fmul float %1195, %1270 -> %1272 = fneg float %1271 -> %1273 = getelementptr float, float* %0, i32 0 -> %1274 = getelementptr inbounds float, float* %1273, i64 2 -> %1275 = load float, float* %1274, align 4 -> %1276 = fmul float %1272, %1275 -> %1277 = fadd float %1120, %1276 -> %1278 = insertelement <4 x float> zeroinitializer, float %1277, i32 0 -> %1279 = insertelement <4 x float> %1278, float 0.000000e+00, i32 1 -> %1280 = insertelement <4 x float> %1279, float 0.000000e+00, i32 2 -> %1281 = insertelement <4 x float> %1280, float 0.000000e+00, i32 3 -> %1282 = extractelement <4 x float> %1281, i32 0 -> store float %1282, float* %2, align 4 -> %1283 = extractelement <4 x float> %1281, i32 1 -> %1284 = getelementptr float, float* %2, i32 0 -> %1285 = getelementptr inbounds float, float* %1284, i64 1 -> store float %1283, float* %1285, align 4 -> %1286 = bitcast i32 %152 to float -> %1287 = bitcast i32 %152 to float -> %1288 = fmul float %1286, %1287 -> %1289 = fadd float %1288, 0.000000e+00 -> %1290 = bitcast i32 %807 to float -> %1291 = bitcast i32 %807 to float -> %1292 = fmul float %1290, %1291 -> %1293 = fadd float %1289, %1292 -> %1294 = call float @llvm.sqrt.f32.93(float %1293) -> %1295 = fneg float %813 -> %1296 = fmul float %1294, %1295 -> %1297 = bitcast i32 %152 to float -> %1298 = fadd float %1297, %1296 -> %1299 = bitcast i32 %152 to float -> %1300 = bitcast i32 %152 to float -> %1301 = fmul float %1299, %1300 -> %1302 = fadd float %1301, 0.000000e+00 -> %1303 = bitcast i32 %807 to float -> %1304 = bitcast i32 %807 to float -> %1305 = fmul float %1303, %1304 -> %1306 = fadd float %1302, %1305 -> %1307 = call float @llvm.sqrt.f32.94(float %1306) -> %1308 = fneg float %813 -> %1309 = fmul float %1307, %1308 -> %1310 = bitcast i32 %152 to float -> %1311 = fadd float %1310, %1309 -> %1312 = bitcast i32 %152 to float -> %1313 = bitcast i32 %152 to float -> %1314 = fmul float %1312, %1313 -> %1315 = fadd float %1314, 0.000000e+00 -> %1316 = bitcast i32 %807 to float -> %1317 = bitcast i32 %807 to float -> %1318 = fmul float %1316, %1317 -> %1319 = fadd float %1315, %1318 -> %1320 = call float @llvm.sqrt.f32.95(float %1319) -> %1321 = fneg float %813 -> %1322 = fmul float %1320, %1321 -> %1323 = bitcast i32 %152 to float -> %1324 = fadd float %1323, %1322 -> %1325 = fmul float %1311, %1324 -> %1326 = fadd float %1325, 0.000000e+00 -> %1327 = bitcast i32 %152 to float -> %1328 = bitcast i32 %152 to float -> %1329 = fmul float %1327, %1328 -> %1330 = fadd float %1329, 0.000000e+00 -> %1331 = bitcast i32 %807 to float -> %1332 = bitcast i32 %807 to float -> %1333 = fmul float %1331, %1332 -> %1334 = fadd float %1330, %1333 -> %1335 = call float @llvm.sqrt.f32.96(float %1334) -> %1336 = fneg float %813 -> %1337 = fmul float %1335, %1336 -> %1338 = fmul float %1337, 0.000000e+00 -> %1339 = bitcast i32 %807 to float -> %1340 = fadd float %1339, %1338 -> %1341 = bitcast i32 %152 to float -> %1342 = bitcast i32 %152 to float -> %1343 = fmul float %1341, %1342 -> %1344 = fadd float %1343, 0.000000e+00 -> %1345 = bitcast i32 %807 to float -> %1346 = bitcast i32 %807 to float -> %1347 = fmul float %1345, %1346 -> %1348 = fadd float %1344, %1347 -> %1349 = call float @llvm.sqrt.f32.97(float %1348) -> %1350 = fneg float %813 -> %1351 = fmul float %1349, %1350 -> %1352 = fmul float %1351, 0.000000e+00 -> %1353 = bitcast i32 %807 to float -> %1354 = fadd float %1353, %1352 -> %1355 = fmul float %1340, %1354 -> %1356 = fadd float %1326, %1355 -> %1357 = call float @llvm.sqrt.f32.98(float %1356) -> %1358 = fadd float %1357, 0.000000e+00 -> %1359 = fdiv float %1298, %1358 -> %1360 = fmul float %1359, 2.000000e+00 -> %1361 = bitcast i32 %152 to float -> %1362 = bitcast i32 %152 to float -> %1363 = fmul float %1361, %1362 -> %1364 = fadd float %1363, 0.000000e+00 -> %1365 = bitcast i32 %807 to float -> %1366 = bitcast i32 %807 to float -> %1367 = fmul float %1365, %1366 -> %1368 = fadd float %1364, %1367 -> %1369 = call float @llvm.sqrt.f32.99(float %1368) -> %1370 = fneg float %813 -> %1371 = fmul float %1369, %1370 -> %1372 = bitcast i32 %152 to float -> %1373 = fadd float %1372, %1371 -> %1374 = bitcast i32 %152 to float -> %1375 = bitcast i32 %152 to float -> %1376 = fmul float %1374, %1375 -> %1377 = fadd float %1376, 0.000000e+00 -> %1378 = bitcast i32 %807 to float -> %1379 = bitcast i32 %807 to float -> %1380 = fmul float %1378, %1379 -> %1381 = fadd float %1377, %1380 -> %1382 = call float @llvm.sqrt.f32.100(float %1381) -> %1383 = fneg float %813 -> %1384 = fmul float %1382, %1383 -> %1385 = bitcast i32 %152 to float -> %1386 = fadd float %1385, %1384 -> %1387 = bitcast i32 %152 to float -> %1388 = bitcast i32 %152 to float -> %1389 = fmul float %1387, %1388 -> %1390 = fadd float %1389, 0.000000e+00 -> %1391 = bitcast i32 %807 to float -> %1392 = bitcast i32 %807 to float -> %1393 = fmul float %1391, %1392 -> %1394 = fadd float %1390, %1393 -> %1395 = call float @llvm.sqrt.f32.101(float %1394) -> %1396 = fneg float %813 -> %1397 = fmul float %1395, %1396 -> %1398 = bitcast i32 %152 to float -> %1399 = fadd float %1398, %1397 -> %1400 = fmul float %1386, %1399 -> %1401 = fadd float %1400, 0.000000e+00 -> %1402 = bitcast i32 %152 to float -> %1403 = bitcast i32 %152 to float -> %1404 = fmul float %1402, %1403 -> %1405 = fadd float %1404, 0.000000e+00 -> %1406 = bitcast i32 %807 to float -> %1407 = bitcast i32 %807 to float -> %1408 = fmul float %1406, %1407 -> %1409 = fadd float %1405, %1408 -> %1410 = call float @llvm.sqrt.f32.102(float %1409) -> %1411 = fneg float %813 -> %1412 = fmul float %1410, %1411 -> %1413 = fmul float %1412, 0.000000e+00 -> %1414 = bitcast i32 %807 to float -> %1415 = fadd float %1414, %1413 -> %1416 = bitcast i32 %152 to float -> %1417 = bitcast i32 %152 to float -> %1418 = fmul float %1416, %1417 -> %1419 = fadd float %1418, 0.000000e+00 -> %1420 = bitcast i32 %807 to float -> %1421 = bitcast i32 %807 to float -> %1422 = fmul float %1420, %1421 -> %1423 = fadd float %1419, %1422 -> %1424 = call float @llvm.sqrt.f32.103(float %1423) -> %1425 = fneg float %813 -> %1426 = fmul float %1424, %1425 -> %1427 = fmul float %1426, 0.000000e+00 -> %1428 = bitcast i32 %807 to float -> %1429 = fadd float %1428, %1427 -> %1430 = fmul float %1415, %1429 -> %1431 = fadd float %1401, %1430 -> %1432 = call float @llvm.sqrt.f32.104(float %1431) -> %1433 = fadd float %1432, 0.000000e+00 -> %1434 = fdiv float %1373, %1433 -> %1435 = fmul float %1360, %1434 -> %1436 = fsub float 1.000000e+00, %1435 -> %1437 = insertelement <4 x float> zeroinitializer, float %1436, i32 0 -> %1438 = insertelement <4 x float> %1437, float 0.000000e+00, i32 1 -> %1439 = insertelement <4 x float> %1438, float 0.000000e+00, i32 2 -> %1440 = insertelement <4 x float> %1439, float 0.000000e+00, i32 3 -> %1441 = getelementptr float, float* %0, i32 0 -> %1442 = getelementptr inbounds float, float* %1441, i64 1 -> %1443 = load float, float* %1442, align 4 -> %1444 = insertelement <4 x float> zeroinitializer, float %1443, i32 0 -> %1445 = insertelement <4 x float> %1444, float 0.000000e+00, i32 1 -> %1446 = insertelement <4 x float> %1445, float 0.000000e+00, i32 2 -> %1447 = insertelement <4 x float> %1446, float 0.000000e+00, i32 3 -> %1448 = call <4 x float> @llvm.fma.f32.105(<4 x float> %1440, <4 x float> %1447, <4 x float> zeroinitializer) -> %1449 = extractelement <4 x float> %1448, i32 0 -> store float %1449, float* %1285, align 4 -> %1450 = bitcast i32 %152 to float -> %1451 = bitcast i32 %152 to float -> %1452 = fmul float %1450, %1451 -> %1453 = fadd float %1452, 0.000000e+00 -> %1454 = bitcast i32 %807 to float -> %1455 = bitcast i32 %807 to float -> %1456 = fmul float %1454, %1455 -> %1457 = fadd float %1453, %1456 -> %1458 = call float @llvm.sqrt.f32.106(float %1457) -> %1459 = fneg float %813 -> %1460 = fmul float %1458, %1459 -> %1461 = bitcast i32 %152 to float -> %1462 = fadd float %1461, %1460 -> %1463 = bitcast i32 %152 to float -> %1464 = bitcast i32 %152 to float -> %1465 = fmul float %1463, %1464 -> %1466 = fadd float %1465, 0.000000e+00 -> %1467 = bitcast i32 %807 to float -> %1468 = bitcast i32 %807 to float -> %1469 = fmul float %1467, %1468 -> %1470 = fadd float %1466, %1469 -> %1471 = call float @llvm.sqrt.f32.107(float %1470) -> %1472 = fneg float %813 -> %1473 = fmul float %1471, %1472 -> %1474 = bitcast i32 %152 to float -> %1475 = fadd float %1474, %1473 -> %1476 = bitcast i32 %152 to float -> %1477 = bitcast i32 %152 to float -> %1478 = fmul float %1476, %1477 -> %1479 = fadd float %1478, 0.000000e+00 -> %1480 = bitcast i32 %807 to float -> %1481 = bitcast i32 %807 to float -> %1482 = fmul float %1480, %1481 -> %1483 = fadd float %1479, %1482 -> %1484 = call float @llvm.sqrt.f32.108(float %1483) -> %1485 = fneg float %813 -> %1486 = fmul float %1484, %1485 -> %1487 = bitcast i32 %152 to float -> %1488 = fadd float %1487, %1486 -> %1489 = fmul float %1475, %1488 -> %1490 = fadd float %1489, 0.000000e+00 -> %1491 = bitcast i32 %152 to float -> %1492 = bitcast i32 %152 to float -> %1493 = fmul float %1491, %1492 -> %1494 = fadd float %1493, 0.000000e+00 -> %1495 = bitcast i32 %807 to float -> %1496 = bitcast i32 %807 to float -> %1497 = fmul float %1495, %1496 -> %1498 = fadd float %1494, %1497 -> %1499 = call float @llvm.sqrt.f32.109(float %1498) -> %1500 = fneg float %813 -> %1501 = fmul float %1499, %1500 -> %1502 = fmul float %1501, 0.000000e+00 -> %1503 = bitcast i32 %807 to float -> %1504 = fadd float %1503, %1502 -> %1505 = bitcast i32 %152 to float -> %1506 = bitcast i32 %152 to float -> %1507 = fmul float %1505, %1506 -> %1508 = fadd float %1507, 0.000000e+00 -> %1509 = bitcast i32 %807 to float -> %1510 = bitcast i32 %807 to float -> %1511 = fmul float %1509, %1510 -> %1512 = fadd float %1508, %1511 -> %1513 = call float @llvm.sqrt.f32.110(float %1512) -> %1514 = fneg float %813 -> %1515 = fmul float %1513, %1514 -> %1516 = fmul float %1515, 0.000000e+00 -> %1517 = bitcast i32 %807 to float -> %1518 = fadd float %1517, %1516 -> %1519 = fmul float %1504, %1518 -> %1520 = fadd float %1490, %1519 -> %1521 = call float @llvm.sqrt.f32.111(float %1520) -> %1522 = fadd float %1521, 0.000000e+00 -> %1523 = fdiv float %1462, %1522 -> %1524 = fmul float %1523, 2.000000e+00 -> %1525 = bitcast i32 %152 to float -> %1526 = bitcast i32 %152 to float -> %1527 = fmul float %1525, %1526 -> %1528 = fadd float %1527, 0.000000e+00 -> %1529 = bitcast i32 %807 to float -> %1530 = bitcast i32 %807 to float -> %1531 = fmul float %1529, %1530 -> %1532 = fadd float %1528, %1531 -> %1533 = call float @llvm.sqrt.f32.112(float %1532) -> %1534 = fneg float %813 -> %1535 = fmul float %1533, %1534 -> %1536 = bitcast i32 %152 to float -> %1537 = fadd float %1536, %1535 -> %1538 = bitcast i32 %152 to float -> %1539 = bitcast i32 %152 to float -> %1540 = fmul float %1538, %1539 -> %1541 = fadd float %1540, 0.000000e+00 -> %1542 = bitcast i32 %807 to float -> %1543 = bitcast i32 %807 to float -> %1544 = fmul float %1542, %1543 -> %1545 = fadd float %1541, %1544 -> %1546 = call float @llvm.sqrt.f32.113(float %1545) -> %1547 = fneg float %813 -> %1548 = fmul float %1546, %1547 -> %1549 = bitcast i32 %152 to float -> %1550 = fadd float %1549, %1548 -> %1551 = bitcast i32 %152 to float -> %1552 = bitcast i32 %152 to float -> %1553 = fmul float %1551, %1552 -> %1554 = fadd float %1553, 0.000000e+00 -> %1555 = bitcast i32 %807 to float -> %1556 = bitcast i32 %807 to float -> %1557 = fmul float %1555, %1556 -> %1558 = fadd float %1554, %1557 -> %1559 = call float @llvm.sqrt.f32.114(float %1558) -> %1560 = fneg float %813 -> %1561 = fmul float %1559, %1560 -> %1562 = bitcast i32 %152 to float -> %1563 = fadd float %1562, %1561 -> %1564 = fmul float %1550, %1563 -> %1565 = fadd float %1564, 0.000000e+00 -> %1566 = bitcast i32 %152 to float -> %1567 = bitcast i32 %152 to float -> %1568 = fmul float %1566, %1567 -> %1569 = fadd float %1568, 0.000000e+00 -> %1570 = bitcast i32 %807 to float -> %1571 = bitcast i32 %807 to float -> %1572 = fmul float %1570, %1571 -> %1573 = fadd float %1569, %1572 -> %1574 = call float @llvm.sqrt.f32.115(float %1573) -> %1575 = fneg float %813 -> %1576 = fmul float %1574, %1575 -> %1577 = fmul float %1576, 0.000000e+00 -> %1578 = bitcast i32 %807 to float -> %1579 = fadd float %1578, %1577 -> %1580 = bitcast i32 %152 to float -> %1581 = bitcast i32 %152 to float -> %1582 = fmul float %1580, %1581 -> %1583 = fadd float %1582, 0.000000e+00 -> %1584 = bitcast i32 %807 to float -> %1585 = bitcast i32 %807 to float -> %1586 = fmul float %1584, %1585 -> %1587 = fadd float %1583, %1586 -> %1588 = call float @llvm.sqrt.f32.116(float %1587) -> %1589 = fneg float %813 -> %1590 = fmul float %1588, %1589 -> %1591 = fmul float %1590, 0.000000e+00 -> %1592 = bitcast i32 %807 to float -> %1593 = fadd float %1592, %1591 -> %1594 = fmul float %1579, %1593 -> %1595 = fadd float %1565, %1594 -> %1596 = call float @llvm.sqrt.f32.117(float %1595) -> %1597 = fadd float %1596, 0.000000e+00 -> %1598 = fdiv float %1537, %1597 -> %1599 = fmul float %1524, %1598 -> %1600 = fsub float 1.000000e+00, %1599 -> %1601 = fmul float %1600, %1443 -> %1602 = fadd float %1601, 0.000000e+00 -> %1603 = bitcast i32 %152 to float -> %1604 = bitcast i32 %152 to float -> %1605 = fmul float %1603, %1604 -> %1606 = fadd float %1605, 0.000000e+00 -> %1607 = bitcast i32 %807 to float -> %1608 = bitcast i32 %807 to float -> %1609 = fmul float %1607, %1608 -> %1610 = fadd float %1606, %1609 -> %1611 = call float @llvm.sqrt.f32.118(float %1610) -> %1612 = fneg float %813 -> %1613 = fmul float %1611, %1612 -> %1614 = bitcast i32 %152 to float -> %1615 = fadd float %1614, %1613 -> %1616 = bitcast i32 %152 to float -> %1617 = bitcast i32 %152 to float -> %1618 = fmul float %1616, %1617 -> %1619 = fadd float %1618, 0.000000e+00 -> %1620 = bitcast i32 %807 to float -> %1621 = bitcast i32 %807 to float -> %1622 = fmul float %1620, %1621 -> %1623 = fadd float %1619, %1622 -> %1624 = call float @llvm.sqrt.f32.119(float %1623) -> %1625 = fneg float %813 -> %1626 = fmul float %1624, %1625 -> %1627 = bitcast i32 %152 to float -> %1628 = fadd float %1627, %1626 -> %1629 = bitcast i32 %152 to float -> %1630 = bitcast i32 %152 to float -> %1631 = fmul float %1629, %1630 -> %1632 = fadd float %1631, 0.000000e+00 -> %1633 = bitcast i32 %807 to float -> %1634 = bitcast i32 %807 to float -> %1635 = fmul float %1633, %1634 -> %1636 = fadd float %1632, %1635 -> %1637 = call float @llvm.sqrt.f32.120(float %1636) -> %1638 = fneg float %813 -> %1639 = fmul float %1637, %1638 -> %1640 = bitcast i32 %152 to float -> %1641 = fadd float %1640, %1639 -> %1642 = fmul float %1628, %1641 -> %1643 = fadd float %1642, 0.000000e+00 -> %1644 = bitcast i32 %152 to float -> %1645 = bitcast i32 %152 to float -> %1646 = fmul float %1644, %1645 -> %1647 = fadd float %1646, 0.000000e+00 -> %1648 = bitcast i32 %807 to float -> %1649 = bitcast i32 %807 to float -> %1650 = fmul float %1648, %1649 -> %1651 = fadd float %1647, %1650 -> %1652 = call float @llvm.sqrt.f32.121(float %1651) -> %1653 = fneg float %813 -> %1654 = fmul float %1652, %1653 -> %1655 = fmul float %1654, 0.000000e+00 -> %1656 = bitcast i32 %807 to float -> %1657 = fadd float %1656, %1655 -> %1658 = bitcast i32 %152 to float -> %1659 = bitcast i32 %152 to float -> %1660 = fmul float %1658, %1659 -> %1661 = fadd float %1660, 0.000000e+00 -> %1662 = bitcast i32 %807 to float -> %1663 = bitcast i32 %807 to float -> %1664 = fmul float %1662, %1663 -> %1665 = fadd float %1661, %1664 -> %1666 = call float @llvm.sqrt.f32.122(float %1665) -> %1667 = fneg float %813 -> %1668 = fmul float %1666, %1667 -> %1669 = fmul float %1668, 0.000000e+00 -> %1670 = bitcast i32 %807 to float -> %1671 = fadd float %1670, %1669 -> %1672 = fmul float %1657, %1671 -> %1673 = fadd float %1643, %1672 -> %1674 = call float @llvm.sqrt.f32.123(float %1673) -> %1675 = fadd float %1674, 0.000000e+00 -> %1676 = fdiv float %1615, %1675 -> %1677 = fmul float %1676, 2.000000e+00 -> %1678 = bitcast i32 %152 to float -> %1679 = bitcast i32 %152 to float -> %1680 = fmul float %1678, %1679 -> %1681 = fadd float %1680, 0.000000e+00 -> %1682 = bitcast i32 %807 to float -> %1683 = bitcast i32 %807 to float -> %1684 = fmul float %1682, %1683 -> %1685 = fadd float %1681, %1684 -> %1686 = call float @llvm.sqrt.f32.124(float %1685) -> %1687 = fneg float %813 -> %1688 = fmul float %1686, %1687 -> %1689 = fmul float %1688, 0.000000e+00 -> %1690 = bitcast i32 %807 to float -> %1691 = fadd float %1690, %1689 -> %1692 = bitcast i32 %152 to float -> %1693 = bitcast i32 %152 to float -> %1694 = fmul float %1692, %1693 -> %1695 = fadd float %1694, 0.000000e+00 -> %1696 = bitcast i32 %807 to float -> %1697 = bitcast i32 %807 to float -> %1698 = fmul float %1696, %1697 -> %1699 = fadd float %1695, %1698 -> %1700 = call float @llvm.sqrt.f32.125(float %1699) -> %1701 = fneg float %813 -> %1702 = fmul float %1700, %1701 -> %1703 = bitcast i32 %152 to float -> %1704 = fadd float %1703, %1702 -> %1705 = bitcast i32 %152 to float -> %1706 = bitcast i32 %152 to float -> %1707 = fmul float %1705, %1706 -> %1708 = fadd float %1707, 0.000000e+00 -> %1709 = bitcast i32 %807 to float -> %1710 = bitcast i32 %807 to float -> %1711 = fmul float %1709, %1710 -> %1712 = fadd float %1708, %1711 -> %1713 = call float @llvm.sqrt.f32.126(float %1712) -> %1714 = fneg float %813 -> %1715 = fmul float %1713, %1714 -> %1716 = bitcast i32 %152 to float -> %1717 = fadd float %1716, %1715 -> %1718 = fmul float %1704, %1717 -> %1719 = fadd float %1718, 0.000000e+00 -> %1720 = bitcast i32 %152 to float -> %1721 = bitcast i32 %152 to float -> %1722 = fmul float %1720, %1721 -> %1723 = fadd float %1722, 0.000000e+00 -> %1724 = bitcast i32 %807 to float -> %1725 = bitcast i32 %807 to float -> %1726 = fmul float %1724, %1725 -> %1727 = fadd float %1723, %1726 -> %1728 = call float @llvm.sqrt.f32.127(float %1727) -> %1729 = fneg float %813 -> %1730 = fmul float %1728, %1729 -> %1731 = fmul float %1730, 0.000000e+00 -> %1732 = bitcast i32 %807 to float -> %1733 = fadd float %1732, %1731 -> %1734 = bitcast i32 %152 to float -> %1735 = bitcast i32 %152 to float -> %1736 = fmul float %1734, %1735 -> %1737 = fadd float %1736, 0.000000e+00 -> %1738 = bitcast i32 %807 to float -> %1739 = bitcast i32 %807 to float -> %1740 = fmul float %1738, %1739 -> %1741 = fadd float %1737, %1740 -> %1742 = call float @llvm.sqrt.f32.128(float %1741) -> %1743 = fneg float %813 -> %1744 = fmul float %1742, %1743 -> %1745 = fmul float %1744, 0.000000e+00 -> %1746 = bitcast i32 %807 to float -> %1747 = fadd float %1746, %1745 -> %1748 = fmul float %1733, %1747 -> %1749 = fadd float %1719, %1748 -> %1750 = call float @llvm.sqrt.f32.129(float %1749) -> %1751 = fadd float %1750, 0.000000e+00 -> %1752 = fdiv float %1691, %1751 -> %1753 = fmul float %1677, %1752 -> %1754 = fneg float %1753 -> %1755 = load float, float* %144, align 4 -> %1756 = fmul float %1754, %1755 -> %1757 = fadd float %1602, %1756 -> %1758 = insertelement <4 x float> zeroinitializer, float %1757, i32 0 -> %1759 = insertelement <4 x float> %1758, float 0.000000e+00, i32 1 -> %1760 = insertelement <4 x float> %1759, float 0.000000e+00, i32 2 -> %1761 = insertelement <4 x float> %1760, float 0.000000e+00, i32 3 -> %1762 = extractelement <4 x float> %1761, i32 0 -> store float %1762, float* %1285, align 4 -> %1763 = extractelement <4 x float> %1761, i32 1 -> %1764 = getelementptr float, float* %2, i32 0 -> %1765 = getelementptr inbounds float, float* %1764, i64 2 -> store float %1763, float* %1765, align 4 -> %1766 = bitcast i32 %152 to float -> %1767 = bitcast i32 %152 to float -> %1768 = fmul float %1766, %1767 -> %1769 = fadd float %1768, 0.000000e+00 -> %1770 = bitcast i32 %807 to float -> %1771 = bitcast i32 %807 to float -> %1772 = fmul float %1770, %1771 -> %1773 = fadd float %1769, %1772 -> %1774 = call float @llvm.sqrt.f32.130(float %1773) -> %1775 = fneg float %813 -> %1776 = fmul float %1774, %1775 -> %1777 = fmul float %1776, 0.000000e+00 -> %1778 = bitcast i32 %807 to float -> %1779 = fadd float %1778, %1777 -> %1780 = bitcast i32 %152 to float -> %1781 = bitcast i32 %152 to float -> %1782 = fmul float %1780, %1781 -> %1783 = fadd float %1782, 0.000000e+00 -> %1784 = bitcast i32 %807 to float -> %1785 = bitcast i32 %807 to float -> %1786 = fmul float %1784, %1785 -> %1787 = fadd float %1783, %1786 -> %1788 = call float @llvm.sqrt.f32.131(float %1787) -> %1789 = fneg float %813 -> %1790 = fmul float %1788, %1789 -> %1791 = bitcast i32 %152 to float -> %1792 = fadd float %1791, %1790 -> %1793 = bitcast i32 %152 to float -> %1794 = bitcast i32 %152 to float -> %1795 = fmul float %1793, %1794 -> %1796 = fadd float %1795, 0.000000e+00 -> %1797 = bitcast i32 %807 to float -> %1798 = bitcast i32 %807 to float -> %1799 = fmul float %1797, %1798 -> %1800 = fadd float %1796, %1799 -> %1801 = call float @llvm.sqrt.f32.132(float %1800) -> %1802 = fneg float %813 -> %1803 = fmul float %1801, %1802 -> %1804 = bitcast i32 %152 to float -> %1805 = fadd float %1804, %1803 -> %1806 = fmul float %1792, %1805 -> %1807 = fadd float %1806, 0.000000e+00 -> %1808 = bitcast i32 %152 to float -> %1809 = bitcast i32 %152 to float -> %1810 = fmul float %1808, %1809 -> %1811 = fadd float %1810, 0.000000e+00 -> %1812 = bitcast i32 %807 to float -> %1813 = bitcast i32 %807 to float -> %1814 = fmul float %1812, %1813 -> %1815 = fadd float %1811, %1814 -> %1816 = call float @llvm.sqrt.f32.133(float %1815) -> %1817 = fneg float %813 -> %1818 = fmul float %1816, %1817 -> %1819 = fmul float %1818, 0.000000e+00 -> %1820 = bitcast i32 %807 to float -> %1821 = fadd float %1820, %1819 -> %1822 = bitcast i32 %152 to float -> %1823 = bitcast i32 %152 to float -> %1824 = fmul float %1822, %1823 -> %1825 = fadd float %1824, 0.000000e+00 -> %1826 = bitcast i32 %807 to float -> %1827 = bitcast i32 %807 to float -> %1828 = fmul float %1826, %1827 -> %1829 = fadd float %1825, %1828 -> %1830 = call float @llvm.sqrt.f32.134(float %1829) -> %1831 = fneg float %813 -> %1832 = fmul float %1830, %1831 -> %1833 = fmul float %1832, 0.000000e+00 -> %1834 = bitcast i32 %807 to float -> %1835 = fadd float %1834, %1833 -> %1836 = fmul float %1821, %1835 -> %1837 = fadd float %1807, %1836 -> %1838 = call float @llvm.sqrt.f32.135(float %1837) -> %1839 = fadd float %1838, 0.000000e+00 -> %1840 = fdiv float %1779, %1839 -> %1841 = fmul float %1840, 2.000000e+00 -> %1842 = bitcast i32 %152 to float -> %1843 = bitcast i32 %152 to float -> %1844 = fmul float %1842, %1843 -> %1845 = fadd float %1844, 0.000000e+00 -> %1846 = bitcast i32 %807 to float -> %1847 = bitcast i32 %807 to float -> %1848 = fmul float %1846, %1847 -> %1849 = fadd float %1845, %1848 -> %1850 = call float @llvm.sqrt.f32.136(float %1849) -> %1851 = fneg float %813 -> %1852 = fmul float %1850, %1851 -> %1853 = bitcast i32 %152 to float -> %1854 = fadd float %1853, %1852 -> %1855 = bitcast i32 %152 to float -> %1856 = bitcast i32 %152 to float -> %1857 = fmul float %1855, %1856 -> %1858 = fadd float %1857, 0.000000e+00 -> %1859 = bitcast i32 %807 to float -> %1860 = bitcast i32 %807 to float -> %1861 = fmul float %1859, %1860 -> %1862 = fadd float %1858, %1861 -> %1863 = call float @llvm.sqrt.f32.137(float %1862) -> %1864 = fneg float %813 -> %1865 = fmul float %1863, %1864 -> %1866 = bitcast i32 %152 to float -> %1867 = fadd float %1866, %1865 -> %1868 = bitcast i32 %152 to float -> %1869 = bitcast i32 %152 to float -> %1870 = fmul float %1868, %1869 -> %1871 = fadd float %1870, 0.000000e+00 -> %1872 = bitcast i32 %807 to float -> %1873 = bitcast i32 %807 to float -> %1874 = fmul float %1872, %1873 -> %1875 = fadd float %1871, %1874 -> %1876 = call float @llvm.sqrt.f32.138(float %1875) -> %1877 = fneg float %813 -> %1878 = fmul float %1876, %1877 -> %1879 = bitcast i32 %152 to float -> %1880 = fadd float %1879, %1878 -> %1881 = fmul float %1867, %1880 -> %1882 = fadd float %1881, 0.000000e+00 -> %1883 = bitcast i32 %152 to float -> %1884 = bitcast i32 %152 to float -> %1885 = fmul float %1883, %1884 -> %1886 = fadd float %1885, 0.000000e+00 -> %1887 = bitcast i32 %807 to float -> %1888 = bitcast i32 %807 to float -> %1889 = fmul float %1887, %1888 -> %1890 = fadd float %1886, %1889 -> %1891 = call float @llvm.sqrt.f32.139(float %1890) -> %1892 = fneg float %813 -> %1893 = fmul float %1891, %1892 -> %1894 = fmul float %1893, 0.000000e+00 -> %1895 = bitcast i32 %807 to float -> %1896 = fadd float %1895, %1894 -> %1897 = bitcast i32 %152 to float -> %1898 = bitcast i32 %152 to float -> %1899 = fmul float %1897, %1898 -> %1900 = fadd float %1899, 0.000000e+00 -> %1901 = bitcast i32 %807 to float -> %1902 = bitcast i32 %807 to float -> %1903 = fmul float %1901, %1902 -> %1904 = fadd float %1900, %1903 -> %1905 = call float @llvm.sqrt.f32.140(float %1904) -> %1906 = fneg float %813 -> %1907 = fmul float %1905, %1906 -> %1908 = fmul float %1907, 0.000000e+00 -> %1909 = bitcast i32 %807 to float -> %1910 = fadd float %1909, %1908 -> %1911 = fmul float %1896, %1910 -> %1912 = fadd float %1882, %1911 -> %1913 = call float @llvm.sqrt.f32.141(float %1912) -> %1914 = fadd float %1913, 0.000000e+00 -> %1915 = fdiv float %1854, %1914 -> %1916 = fmul float %1841, %1915 -> %1917 = fneg float %1916 -> %1918 = insertelement <4 x float> zeroinitializer, float %1917, i32 0 -> %1919 = insertelement <4 x float> %1918, float 0.000000e+00, i32 1 -> %1920 = insertelement <4 x float> %1919, float 0.000000e+00, i32 2 -> %1921 = insertelement <4 x float> %1920, float 0.000000e+00, i32 3 -> %1922 = getelementptr float, float* %0, i32 0 -> %1923 = load float, float* %1922, align 4 -> %1924 = insertelement <4 x float> zeroinitializer, float %1923, i32 0 -> %1925 = insertelement <4 x float> %1924, float 0.000000e+00, i32 1 -> %1926 = insertelement <4 x float> %1925, float 0.000000e+00, i32 2 -> %1927 = insertelement <4 x float> %1926, float 0.000000e+00, i32 3 -> %1928 = call <4 x float> @llvm.fma.f32.142(<4 x float> %1921, <4 x float> %1927, <4 x float> zeroinitializer) -> %1929 = extractelement <4 x float> %1928, i32 0 -> store float %1929, float* %1765, align 4 -> %1930 = bitcast i32 %152 to float -> %1931 = bitcast i32 %152 to float -> %1932 = fmul float %1930, %1931 -> %1933 = fadd float %1932, 0.000000e+00 -> %1934 = bitcast i32 %807 to float -> %1935 = bitcast i32 %807 to float -> %1936 = fmul float %1934, %1935 -> %1937 = fadd float %1933, %1936 -> %1938 = call float @llvm.sqrt.f32.143(float %1937) -> %1939 = fneg float %813 -> %1940 = fmul float %1938, %1939 -> %1941 = fmul float %1940, 0.000000e+00 -> %1942 = bitcast i32 %807 to float -> %1943 = fadd float %1942, %1941 -> %1944 = bitcast i32 %152 to float -> %1945 = bitcast i32 %152 to float -> %1946 = fmul float %1944, %1945 -> %1947 = fadd float %1946, 0.000000e+00 -> %1948 = bitcast i32 %807 to float -> %1949 = bitcast i32 %807 to float -> %1950 = fmul float %1948, %1949 -> %1951 = fadd float %1947, %1950 -> %1952 = call float @llvm.sqrt.f32.144(float %1951) -> %1953 = fneg float %813 -> %1954 = fmul float %1952, %1953 -> %1955 = bitcast i32 %152 to float -> %1956 = fadd float %1955, %1954 -> %1957 = bitcast i32 %152 to float -> %1958 = bitcast i32 %152 to float -> %1959 = fmul float %1957, %1958 -> %1960 = fadd float %1959, 0.000000e+00 -> %1961 = bitcast i32 %807 to float -> %1962 = bitcast i32 %807 to float -> %1963 = fmul float %1961, %1962 -> %1964 = fadd float %1960, %1963 -> %1965 = call float @llvm.sqrt.f32.145(float %1964) -> %1966 = fneg float %813 -> %1967 = fmul float %1965, %1966 -> %1968 = bitcast i32 %152 to float -> %1969 = fadd float %1968, %1967 -> %1970 = fmul float %1956, %1969 -> %1971 = fadd float %1970, 0.000000e+00 -> %1972 = bitcast i32 %152 to float -> %1973 = bitcast i32 %152 to float -> %1974 = fmul float %1972, %1973 -> %1975 = fadd float %1974, 0.000000e+00 -> %1976 = bitcast i32 %807 to float -> %1977 = bitcast i32 %807 to float -> %1978 = fmul float %1976, %1977 -> %1979 = fadd float %1975, %1978 -> %1980 = call float @llvm.sqrt.f32.146(float %1979) -> %1981 = fneg float %813 -> %1982 = fmul float %1980, %1981 -> %1983 = fmul float %1982, 0.000000e+00 -> %1984 = bitcast i32 %807 to float -> %1985 = fadd float %1984, %1983 -> %1986 = bitcast i32 %152 to float -> %1987 = bitcast i32 %152 to float -> %1988 = fmul float %1986, %1987 -> %1989 = fadd float %1988, 0.000000e+00 -> %1990 = bitcast i32 %807 to float -> %1991 = bitcast i32 %807 to float -> %1992 = fmul float %1990, %1991 -> %1993 = fadd float %1989, %1992 -> %1994 = call float @llvm.sqrt.f32.147(float %1993) -> %1995 = fneg float %813 -> %1996 = fmul float %1994, %1995 -> %1997 = fmul float %1996, 0.000000e+00 -> %1998 = bitcast i32 %807 to float -> %1999 = fadd float %1998, %1997 -> %2000 = fmul float %1985, %1999 -> %2001 = fadd float %1971, %2000 -> %2002 = call float @llvm.sqrt.f32.148(float %2001) -> %2003 = fadd float %2002, 0.000000e+00 -> %2004 = fdiv float %1943, %2003 -> %2005 = fmul float %2004, 2.000000e+00 -> %2006 = bitcast i32 %152 to float -> %2007 = bitcast i32 %152 to float -> %2008 = fmul float %2006, %2007 -> %2009 = fadd float %2008, 0.000000e+00 -> %2010 = bitcast i32 %807 to float -> %2011 = bitcast i32 %807 to float -> %2012 = fmul float %2010, %2011 -> %2013 = fadd float %2009, %2012 -> %2014 = call float @llvm.sqrt.f32.149(float %2013) -> %2015 = fneg float %813 -> %2016 = fmul float %2014, %2015 -> %2017 = bitcast i32 %152 to float -> %2018 = fadd float %2017, %2016 -> %2019 = bitcast i32 %152 to float -> %2020 = bitcast i32 %152 to float -> %2021 = fmul float %2019, %2020 -> %2022 = fadd float %2021, 0.000000e+00 -> %2023 = bitcast i32 %807 to float -> %2024 = bitcast i32 %807 to float -> %2025 = fmul float %2023, %2024 -> %2026 = fadd float %2022, %2025 -> %2027 = call float @llvm.sqrt.f32.150(float %2026) -> %2028 = fneg float %813 -> %2029 = fmul float %2027, %2028 -> %2030 = bitcast i32 %152 to float -> %2031 = fadd float %2030, %2029 -> %2032 = bitcast i32 %152 to float -> %2033 = bitcast i32 %152 to float -> %2034 = fmul float %2032, %2033 -> %2035 = fadd float %2034, 0.000000e+00 -> %2036 = bitcast i32 %807 to float -> %2037 = bitcast i32 %807 to float -> %2038 = fmul float %2036, %2037 -> %2039 = fadd float %2035, %2038 -> %2040 = call float @llvm.sqrt.f32.151(float %2039) -> %2041 = fneg float %813 -> %2042 = fmul float %2040, %2041 -> %2043 = bitcast i32 %152 to float -> %2044 = fadd float %2043, %2042 -> %2045 = fmul float %2031, %2044 -> %2046 = fadd float %2045, 0.000000e+00 -> %2047 = bitcast i32 %152 to float -> %2048 = bitcast i32 %152 to float -> %2049 = fmul float %2047, %2048 -> %2050 = fadd float %2049, 0.000000e+00 -> %2051 = bitcast i32 %807 to float -> %2052 = bitcast i32 %807 to float -> %2053 = fmul float %2051, %2052 -> %2054 = fadd float %2050, %2053 -> %2055 = call float @llvm.sqrt.f32.152(float %2054) -> %2056 = fneg float %813 -> %2057 = fmul float %2055, %2056 -> %2058 = fmul float %2057, 0.000000e+00 -> %2059 = bitcast i32 %807 to float -> %2060 = fadd float %2059, %2058 -> %2061 = bitcast i32 %152 to float -> %2062 = bitcast i32 %152 to float -> %2063 = fmul float %2061, %2062 -> %2064 = fadd float %2063, 0.000000e+00 -> %2065 = bitcast i32 %807 to float -> %2066 = bitcast i32 %807 to float -> %2067 = fmul float %2065, %2066 -> %2068 = fadd float %2064, %2067 -> %2069 = call float @llvm.sqrt.f32.153(float %2068) -> %2070 = fneg float %813 -> %2071 = fmul float %2069, %2070 -> %2072 = fmul float %2071, 0.000000e+00 -> %2073 = bitcast i32 %807 to float -> %2074 = fadd float %2073, %2072 -> %2075 = fmul float %2060, %2074 -> %2076 = fadd float %2046, %2075 -> %2077 = call float @llvm.sqrt.f32.154(float %2076) -> %2078 = fadd float %2077, 0.000000e+00 -> %2079 = fdiv float %2018, %2078 -> %2080 = fmul float %2005, %2079 -> %2081 = fneg float %2080 -> %2082 = fmul float %2081, %1923 -> %2083 = fadd float %2082, 0.000000e+00 -> %2084 = bitcast i32 %152 to float -> %2085 = bitcast i32 %152 to float -> %2086 = fmul float %2084, %2085 -> %2087 = fadd float %2086, 0.000000e+00 -> %2088 = bitcast i32 %807 to float -> %2089 = bitcast i32 %807 to float -> %2090 = fmul float %2088, %2089 -> %2091 = fadd float %2087, %2090 -> %2092 = call float @llvm.sqrt.f32.155(float %2091) -> %2093 = fneg float %813 -> %2094 = fmul float %2092, %2093 -> %2095 = fmul float %2094, 0.000000e+00 -> %2096 = bitcast i32 %807 to float -> %2097 = fadd float %2096, %2095 -> %2098 = bitcast i32 %152 to float -> %2099 = bitcast i32 %152 to float -> %2100 = fmul float %2098, %2099 -> %2101 = fadd float %2100, 0.000000e+00 -> %2102 = bitcast i32 %807 to float -> %2103 = bitcast i32 %807 to float -> %2104 = fmul float %2102, %2103 -> %2105 = fadd float %2101, %2104 -> %2106 = call float @llvm.sqrt.f32.156(float %2105) -> %2107 = fneg float %813 -> %2108 = fmul float %2106, %2107 -> %2109 = bitcast i32 %152 to float -> %2110 = fadd float %2109, %2108 -> %2111 = bitcast i32 %152 to float -> %2112 = bitcast i32 %152 to float -> %2113 = fmul float %2111, %2112 -> %2114 = fadd float %2113, 0.000000e+00 -> %2115 = bitcast i32 %807 to float -> %2116 = bitcast i32 %807 to float -> %2117 = fmul float %2115, %2116 -> %2118 = fadd float %2114, %2117 -> %2119 = call float @llvm.sqrt.f32.157(float %2118) -> %2120 = fneg float %813 -> %2121 = fmul float %2119, %2120 -> %2122 = bitcast i32 %152 to float -> %2123 = fadd float %2122, %2121 -> %2124 = fmul float %2110, %2123 -> %2125 = fadd float %2124, 0.000000e+00 -> %2126 = bitcast i32 %152 to float -> %2127 = bitcast i32 %152 to float -> %2128 = fmul float %2126, %2127 -> %2129 = fadd float %2128, 0.000000e+00 -> %2130 = bitcast i32 %807 to float -> %2131 = bitcast i32 %807 to float -> %2132 = fmul float %2130, %2131 -> %2133 = fadd float %2129, %2132 -> %2134 = call float @llvm.sqrt.f32.158(float %2133) -> %2135 = fneg float %813 -> %2136 = fmul float %2134, %2135 -> %2137 = fmul float %2136, 0.000000e+00 -> %2138 = bitcast i32 %807 to float -> %2139 = fadd float %2138, %2137 -> %2140 = bitcast i32 %152 to float -> %2141 = bitcast i32 %152 to float -> %2142 = fmul float %2140, %2141 -> %2143 = fadd float %2142, 0.000000e+00 -> %2144 = bitcast i32 %807 to float -> %2145 = bitcast i32 %807 to float -> %2146 = fmul float %2144, %2145 -> %2147 = fadd float %2143, %2146 -> %2148 = call float @llvm.sqrt.f32.159(float %2147) -> %2149 = fneg float %813 -> %2150 = fmul float %2148, %2149 -> %2151 = fmul float %2150, 0.000000e+00 -> %2152 = bitcast i32 %807 to float -> %2153 = fadd float %2152, %2151 -> %2154 = fmul float %2139, %2153 -> %2155 = fadd float %2125, %2154 -> %2156 = call float @llvm.sqrt.f32.160(float %2155) -> %2157 = fadd float %2156, 0.000000e+00 -> %2158 = fdiv float %2097, %2157 -> %2159 = fmul float %2158, 2.000000e+00 -> %2160 = bitcast i32 %152 to float -> %2161 = bitcast i32 %152 to float -> %2162 = fmul float %2160, %2161 -> %2163 = fadd float %2162, 0.000000e+00 -> %2164 = bitcast i32 %807 to float -> %2165 = bitcast i32 %807 to float -> %2166 = fmul float %2164, %2165 -> %2167 = fadd float %2163, %2166 -> %2168 = call float @llvm.sqrt.f32.161(float %2167) -> %2169 = fneg float %813 -> %2170 = fmul float %2168, %2169 -> %2171 = fmul float %2170, 0.000000e+00 -> %2172 = bitcast i32 %807 to float -> %2173 = fadd float %2172, %2171 -> %2174 = bitcast i32 %152 to float -> %2175 = bitcast i32 %152 to float -> %2176 = fmul float %2174, %2175 -> %2177 = fadd float %2176, 0.000000e+00 -> %2178 = bitcast i32 %807 to float -> %2179 = bitcast i32 %807 to float -> %2180 = fmul float %2178, %2179 -> %2181 = fadd float %2177, %2180 -> %2182 = call float @llvm.sqrt.f32.162(float %2181) -> %2183 = fneg float %813 -> %2184 = fmul float %2182, %2183 -> %2185 = bitcast i32 %152 to float -> %2186 = fadd float %2185, %2184 -> %2187 = bitcast i32 %152 to float -> %2188 = bitcast i32 %152 to float -> %2189 = fmul float %2187, %2188 -> %2190 = fadd float %2189, 0.000000e+00 -> %2191 = bitcast i32 %807 to float -> %2192 = bitcast i32 %807 to float -> %2193 = fmul float %2191, %2192 -> %2194 = fadd float %2190, %2193 -> %2195 = call float @llvm.sqrt.f32.163(float %2194) -> %2196 = fneg float %813 -> %2197 = fmul float %2195, %2196 -> %2198 = bitcast i32 %152 to float -> %2199 = fadd float %2198, %2197 -> %2200 = fmul float %2186, %2199 -> %2201 = fadd float %2200, 0.000000e+00 -> %2202 = bitcast i32 %152 to float -> %2203 = bitcast i32 %152 to float -> %2204 = fmul float %2202, %2203 -> %2205 = fadd float %2204, 0.000000e+00 -> %2206 = bitcast i32 %807 to float -> %2207 = bitcast i32 %807 to float -> %2208 = fmul float %2206, %2207 -> %2209 = fadd float %2205, %2208 -> %2210 = call float @llvm.sqrt.f32.164(float %2209) -> %2211 = fneg float %813 -> %2212 = fmul float %2210, %2211 -> %2213 = fmul float %2212, 0.000000e+00 -> %2214 = bitcast i32 %807 to float -> %2215 = fadd float %2214, %2213 -> %2216 = bitcast i32 %152 to float -> %2217 = bitcast i32 %152 to float -> %2218 = fmul float %2216, %2217 -> %2219 = fadd float %2218, 0.000000e+00 -> %2220 = bitcast i32 %807 to float -> %2221 = bitcast i32 %807 to float -> %2222 = fmul float %2220, %2221 -> %2223 = fadd float %2219, %2222 -> %2224 = call float @llvm.sqrt.f32.165(float %2223) -> %2225 = fneg float %813 -> %2226 = fmul float %2224, %2225 -> %2227 = fmul float %2226, 0.000000e+00 -> %2228 = bitcast i32 %807 to float -> %2229 = fadd float %2228, %2227 -> %2230 = fmul float %2215, %2229 -> %2231 = fadd float %2201, %2230 -> %2232 = call float @llvm.sqrt.f32.166(float %2231) -> %2233 = fadd float %2232, 0.000000e+00 -> %2234 = fdiv float %2173, %2233 -> %2235 = fmul float %2159, %2234 -> %2236 = fsub float 1.000000e+00, %2235 -> %2237 = load float, float* %1274, align 4 -> %2238 = fmul float %2236, %2237 -> %2239 = fadd float %2083, %2238 -> %2240 = insertelement <4 x float> zeroinitializer, float %2239, i32 0 -> %2241 = insertelement <4 x float> %2240, float 0.000000e+00, i32 1 -> %2242 = insertelement <4 x float> %2241, float 0.000000e+00, i32 2 -> %2243 = insertelement <4 x float> %2242, float 0.000000e+00, i32 3 -> %2244 = extractelement <4 x float> %2243, i32 0 -> store float %2244, float* %1765, align 4 -> %2245 = extractelement <4 x float> %2243, i32 1 -> %2246 = getelementptr float, float* %2, i32 0 -> %2247 = getelementptr inbounds float, float* %2246, i64 3 -> store float %2245, float* %2247, align 4 -> %2248 = bitcast i32 %152 to float -> %2249 = bitcast i32 %152 to float -> %2250 = fmul float %2248, %2249 -> %2251 = fadd float %2250, 0.000000e+00 -> %2252 = bitcast i32 %807 to float -> %2253 = bitcast i32 %807 to float -> %2254 = fmul float %2252, %2253 -> %2255 = fadd float %2251, %2254 -> %2256 = call float @llvm.sqrt.f32.167(float %2255) -> %2257 = fneg float %813 -> %2258 = fmul float %2256, %2257 -> %2259 = fmul float %2258, 0.000000e+00 -> %2260 = bitcast i32 %807 to float -> %2261 = fadd float %2260, %2259 -> %2262 = bitcast i32 %152 to float -> %2263 = bitcast i32 %152 to float -> %2264 = fmul float %2262, %2263 -> %2265 = fadd float %2264, 0.000000e+00 -> %2266 = bitcast i32 %807 to float -> %2267 = bitcast i32 %807 to float -> %2268 = fmul float %2266, %2267 -> %2269 = fadd float %2265, %2268 -> %2270 = call float @llvm.sqrt.f32.168(float %2269) -> %2271 = fneg float %813 -> %2272 = fmul float %2270, %2271 -> %2273 = bitcast i32 %152 to float -> %2274 = fadd float %2273, %2272 -> %2275 = bitcast i32 %152 to float -> %2276 = bitcast i32 %152 to float -> %2277 = fmul float %2275, %2276 -> %2278 = fadd float %2277, 0.000000e+00 -> %2279 = bitcast i32 %807 to float -> %2280 = bitcast i32 %807 to float -> %2281 = fmul float %2279, %2280 -> %2282 = fadd float %2278, %2281 -> %2283 = call float @llvm.sqrt.f32.169(float %2282) -> %2284 = fneg float %813 -> %2285 = fmul float %2283, %2284 -> %2286 = bitcast i32 %152 to float -> %2287 = fadd float %2286, %2285 -> %2288 = fmul float %2274, %2287 -> %2289 = fadd float %2288, 0.000000e+00 -> %2290 = bitcast i32 %152 to float -> %2291 = bitcast i32 %152 to float -> %2292 = fmul float %2290, %2291 -> %2293 = fadd float %2292, 0.000000e+00 -> %2294 = bitcast i32 %807 to float -> %2295 = bitcast i32 %807 to float -> %2296 = fmul float %2294, %2295 -> %2297 = fadd float %2293, %2296 -> %2298 = call float @llvm.sqrt.f32.170(float %2297) -> %2299 = fneg float %813 -> %2300 = fmul float %2298, %2299 -> %2301 = fmul float %2300, 0.000000e+00 -> %2302 = bitcast i32 %807 to float -> %2303 = fadd float %2302, %2301 -> %2304 = bitcast i32 %152 to float -> %2305 = bitcast i32 %152 to float -> %2306 = fmul float %2304, %2305 -> %2307 = fadd float %2306, 0.000000e+00 -> %2308 = bitcast i32 %807 to float -> %2309 = bitcast i32 %807 to float -> %2310 = fmul float %2308, %2309 -> %2311 = fadd float %2307, %2310 -> %2312 = call float @llvm.sqrt.f32.171(float %2311) -> %2313 = fneg float %813 -> %2314 = fmul float %2312, %2313 -> %2315 = fmul float %2314, 0.000000e+00 -> %2316 = bitcast i32 %807 to float -> %2317 = fadd float %2316, %2315 -> %2318 = fmul float %2303, %2317 -> %2319 = fadd float %2289, %2318 -> %2320 = call float @llvm.sqrt.f32.172(float %2319) -> %2321 = fadd float %2320, 0.000000e+00 -> %2322 = fdiv float %2261, %2321 -> %2323 = fmul float %2322, 2.000000e+00 -> %2324 = bitcast i32 %152 to float -> %2325 = bitcast i32 %152 to float -> %2326 = fmul float %2324, %2325 -> %2327 = fadd float %2326, 0.000000e+00 -> %2328 = bitcast i32 %807 to float -> %2329 = bitcast i32 %807 to float -> %2330 = fmul float %2328, %2329 -> %2331 = fadd float %2327, %2330 -> %2332 = call float @llvm.sqrt.f32.173(float %2331) -> %2333 = fneg float %813 -> %2334 = fmul float %2332, %2333 -> %2335 = bitcast i32 %152 to float -> %2336 = fadd float %2335, %2334 -> %2337 = bitcast i32 %152 to float -> %2338 = bitcast i32 %152 to float -> %2339 = fmul float %2337, %2338 -> %2340 = fadd float %2339, 0.000000e+00 -> %2341 = bitcast i32 %807 to float -> %2342 = bitcast i32 %807 to float -> %2343 = fmul float %2341, %2342 -> %2344 = fadd float %2340, %2343 -> %2345 = call float @llvm.sqrt.f32.174(float %2344) -> %2346 = fneg float %813 -> %2347 = fmul float %2345, %2346 -> %2348 = bitcast i32 %152 to float -> %2349 = fadd float %2348, %2347 -> %2350 = bitcast i32 %152 to float -> %2351 = bitcast i32 %152 to float -> %2352 = fmul float %2350, %2351 -> %2353 = fadd float %2352, 0.000000e+00 -> %2354 = bitcast i32 %807 to float -> %2355 = bitcast i32 %807 to float -> %2356 = fmul float %2354, %2355 -> %2357 = fadd float %2353, %2356 -> %2358 = call float @llvm.sqrt.f32.175(float %2357) -> %2359 = fneg float %813 -> %2360 = fmul float %2358, %2359 -> %2361 = bitcast i32 %152 to float -> %2362 = fadd float %2361, %2360 -> %2363 = fmul float %2349, %2362 -> %2364 = fadd float %2363, 0.000000e+00 -> %2365 = bitcast i32 %152 to float -> %2366 = bitcast i32 %152 to float -> %2367 = fmul float %2365, %2366 -> %2368 = fadd float %2367, 0.000000e+00 -> %2369 = bitcast i32 %807 to float -> %2370 = bitcast i32 %807 to float -> %2371 = fmul float %2369, %2370 -> %2372 = fadd float %2368, %2371 -> %2373 = call float @llvm.sqrt.f32.176(float %2372) -> %2374 = fneg float %813 -> %2375 = fmul float %2373, %2374 -> %2376 = fmul float %2375, 0.000000e+00 -> %2377 = bitcast i32 %807 to float -> %2378 = fadd float %2377, %2376 -> %2379 = bitcast i32 %152 to float -> %2380 = bitcast i32 %152 to float -> %2381 = fmul float %2379, %2380 -> %2382 = fadd float %2381, 0.000000e+00 -> %2383 = bitcast i32 %807 to float -> %2384 = bitcast i32 %807 to float -> %2385 = fmul float %2383, %2384 -> %2386 = fadd float %2382, %2385 -> %2387 = call float @llvm.sqrt.f32.177(float %2386) -> %2388 = fneg float %813 -> %2389 = fmul float %2387, %2388 -> %2390 = fmul float %2389, 0.000000e+00 -> %2391 = bitcast i32 %807 to float -> %2392 = fadd float %2391, %2390 -> %2393 = fmul float %2378, %2392 -> %2394 = fadd float %2364, %2393 -> %2395 = call float @llvm.sqrt.f32.178(float %2394) -> %2396 = fadd float %2395, 0.000000e+00 -> %2397 = fdiv float %2336, %2396 -> %2398 = fmul float %2323, %2397 -> %2399 = fneg float %2398 -> %2400 = insertelement <4 x float> zeroinitializer, float %2399, i32 0 -> %2401 = insertelement <4 x float> %2400, float 0.000000e+00, i32 1 -> %2402 = insertelement <4 x float> %2401, float 0.000000e+00, i32 2 -> %2403 = insertelement <4 x float> %2402, float 0.000000e+00, i32 3 -> %2404 = load float, float* %1442, align 4 -> %2405 = insertelement <4 x float> zeroinitializer, float %2404, i32 0 -> %2406 = insertelement <4 x float> %2405, float 0.000000e+00, i32 1 -> %2407 = insertelement <4 x float> %2406, float 0.000000e+00, i32 2 -> %2408 = insertelement <4 x float> %2407, float 0.000000e+00, i32 3 -> %2409 = call <4 x float> @llvm.fma.f32.179(<4 x float> %2403, <4 x float> %2408, <4 x float> zeroinitializer) -> %2410 = extractelement <4 x float> %2409, i32 0 -> store float %2410, float* %2247, align 4 -> %2411 = bitcast i32 %152 to float -> %2412 = bitcast i32 %152 to float -> %2413 = fmul float %2411, %2412 -> %2414 = fadd float %2413, 0.000000e+00 -> %2415 = bitcast i32 %807 to float -> %2416 = bitcast i32 %807 to float -> %2417 = fmul float %2415, %2416 -> %2418 = fadd float %2414, %2417 -> %2419 = call float @llvm.sqrt.f32.180(float %2418) -> %2420 = fneg float %813 -> %2421 = fmul float %2419, %2420 -> %2422 = fmul float %2421, 0.000000e+00 -> %2423 = bitcast i32 %807 to float -> %2424 = fadd float %2423, %2422 -> %2425 = bitcast i32 %152 to float -> %2426 = bitcast i32 %152 to float -> %2427 = fmul float %2425, %2426 -> %2428 = fadd float %2427, 0.000000e+00 -> %2429 = bitcast i32 %807 to float -> %2430 = bitcast i32 %807 to float -> %2431 = fmul float %2429, %2430 -> %2432 = fadd float %2428, %2431 -> %2433 = call float @llvm.sqrt.f32.181(float %2432) -> %2434 = fneg float %813 -> %2435 = fmul float %2433, %2434 -> %2436 = bitcast i32 %152 to float -> %2437 = fadd float %2436, %2435 -> %2438 = bitcast i32 %152 to float -> %2439 = bitcast i32 %152 to float -> %2440 = fmul float %2438, %2439 -> %2441 = fadd float %2440, 0.000000e+00 -> %2442 = bitcast i32 %807 to float -> %2443 = bitcast i32 %807 to float -> %2444 = fmul float %2442, %2443 -> %2445 = fadd float %2441, %2444 -> %2446 = call float @llvm.sqrt.f32.182(float %2445) -> %2447 = fneg float %813 -> %2448 = fmul float %2446, %2447 -> %2449 = bitcast i32 %152 to float -> %2450 = fadd float %2449, %2448 -> %2451 = fmul float %2437, %2450 -> %2452 = fadd float %2451, 0.000000e+00 -> %2453 = bitcast i32 %152 to float -> %2454 = bitcast i32 %152 to float -> %2455 = fmul float %2453, %2454 -> %2456 = fadd float %2455, 0.000000e+00 -> %2457 = bitcast i32 %807 to float -> %2458 = bitcast i32 %807 to float -> %2459 = fmul float %2457, %2458 -> %2460 = fadd float %2456, %2459 -> %2461 = call float @llvm.sqrt.f32.183(float %2460) -> %2462 = fneg float %813 -> %2463 = fmul float %2461, %2462 -> %2464 = fmul float %2463, 0.000000e+00 -> %2465 = bitcast i32 %807 to float -> %2466 = fadd float %2465, %2464 -> %2467 = bitcast i32 %152 to float -> %2468 = bitcast i32 %152 to float -> %2469 = fmul float %2467, %2468 -> %2470 = fadd float %2469, 0.000000e+00 -> %2471 = bitcast i32 %807 to float -> %2472 = bitcast i32 %807 to float -> %2473 = fmul float %2471, %2472 -> %2474 = fadd float %2470, %2473 -> %2475 = call float @llvm.sqrt.f32.184(float %2474) -> %2476 = fneg float %813 -> %2477 = fmul float %2475, %2476 -> %2478 = fmul float %2477, 0.000000e+00 -> %2479 = bitcast i32 %807 to float -> %2480 = fadd float %2479, %2478 -> %2481 = fmul float %2466, %2480 -> %2482 = fadd float %2452, %2481 -> %2483 = call float @llvm.sqrt.f32.185(float %2482) -> %2484 = fadd float %2483, 0.000000e+00 -> %2485 = fdiv float %2424, %2484 -> %2486 = fmul float %2485, 2.000000e+00 -> %2487 = bitcast i32 %152 to float -> %2488 = bitcast i32 %152 to float -> %2489 = fmul float %2487, %2488 -> %2490 = fadd float %2489, 0.000000e+00 -> %2491 = bitcast i32 %807 to float -> %2492 = bitcast i32 %807 to float -> %2493 = fmul float %2491, %2492 -> %2494 = fadd float %2490, %2493 -> %2495 = call float @llvm.sqrt.f32.186(float %2494) -> %2496 = fneg float %813 -> %2497 = fmul float %2495, %2496 -> %2498 = bitcast i32 %152 to float -> %2499 = fadd float %2498, %2497 -> %2500 = bitcast i32 %152 to float -> %2501 = bitcast i32 %152 to float -> %2502 = fmul float %2500, %2501 -> %2503 = fadd float %2502, 0.000000e+00 -> %2504 = bitcast i32 %807 to float -> %2505 = bitcast i32 %807 to float -> %2506 = fmul float %2504, %2505 -> %2507 = fadd float %2503, %2506 -> %2508 = call float @llvm.sqrt.f32.187(float %2507) -> %2509 = fneg float %813 -> %2510 = fmul float %2508, %2509 -> %2511 = bitcast i32 %152 to float -> %2512 = fadd float %2511, %2510 -> %2513 = bitcast i32 %152 to float -> %2514 = bitcast i32 %152 to float -> %2515 = fmul float %2513, %2514 -> %2516 = fadd float %2515, 0.000000e+00 -> %2517 = bitcast i32 %807 to float -> %2518 = bitcast i32 %807 to float -> %2519 = fmul float %2517, %2518 -> %2520 = fadd float %2516, %2519 -> %2521 = call float @llvm.sqrt.f32.188(float %2520) -> %2522 = fneg float %813 -> %2523 = fmul float %2521, %2522 -> %2524 = bitcast i32 %152 to float -> %2525 = fadd float %2524, %2523 -> %2526 = fmul float %2512, %2525 -> %2527 = fadd float %2526, 0.000000e+00 -> %2528 = bitcast i32 %152 to float -> %2529 = bitcast i32 %152 to float -> %2530 = fmul float %2528, %2529 -> %2531 = fadd float %2530, 0.000000e+00 -> %2532 = bitcast i32 %807 to float -> %2533 = bitcast i32 %807 to float -> %2534 = fmul float %2532, %2533 -> %2535 = fadd float %2531, %2534 -> %2536 = call float @llvm.sqrt.f32.189(float %2535) -> %2537 = fneg float %813 -> %2538 = fmul float %2536, %2537 -> %2539 = fmul float %2538, 0.000000e+00 -> %2540 = bitcast i32 %807 to float -> %2541 = fadd float %2540, %2539 -> %2542 = bitcast i32 %152 to float -> %2543 = bitcast i32 %152 to float -> %2544 = fmul float %2542, %2543 -> %2545 = fadd float %2544, 0.000000e+00 -> %2546 = bitcast i32 %807 to float -> %2547 = bitcast i32 %807 to float -> %2548 = fmul float %2546, %2547 -> %2549 = fadd float %2545, %2548 -> %2550 = call float @llvm.sqrt.f32.190(float %2549) -> %2551 = fneg float %813 -> %2552 = fmul float %2550, %2551 -> %2553 = fmul float %2552, 0.000000e+00 -> %2554 = bitcast i32 %807 to float -> %2555 = fadd float %2554, %2553 -> %2556 = fmul float %2541, %2555 -> %2557 = fadd float %2527, %2556 -> %2558 = call float @llvm.sqrt.f32.191(float %2557) -> %2559 = fadd float %2558, 0.000000e+00 -> %2560 = fdiv float %2499, %2559 -> %2561 = fmul float %2486, %2560 -> %2562 = fneg float %2561 -> %2563 = fmul float %2562, %2404 -> %2564 = fadd float %2563, 0.000000e+00 -> %2565 = bitcast i32 %152 to float -> %2566 = bitcast i32 %152 to float -> %2567 = fmul float %2565, %2566 -> %2568 = fadd float %2567, 0.000000e+00 -> %2569 = bitcast i32 %807 to float -> %2570 = bitcast i32 %807 to float -> %2571 = fmul float %2569, %2570 -> %2572 = fadd float %2568, %2571 -> %2573 = call float @llvm.sqrt.f32.192(float %2572) -> %2574 = fneg float %813 -> %2575 = fmul float %2573, %2574 -> %2576 = fmul float %2575, 0.000000e+00 -> %2577 = bitcast i32 %807 to float -> %2578 = fadd float %2577, %2576 -> %2579 = bitcast i32 %152 to float -> %2580 = bitcast i32 %152 to float -> %2581 = fmul float %2579, %2580 -> %2582 = fadd float %2581, 0.000000e+00 -> %2583 = bitcast i32 %807 to float -> %2584 = bitcast i32 %807 to float -> %2585 = fmul float %2583, %2584 -> %2586 = fadd float %2582, %2585 -> %2587 = call float @llvm.sqrt.f32.193(float %2586) -> %2588 = fneg float %813 -> %2589 = fmul float %2587, %2588 -> %2590 = bitcast i32 %152 to float -> %2591 = fadd float %2590, %2589 -> %2592 = bitcast i32 %152 to float -> %2593 = bitcast i32 %152 to float -> %2594 = fmul float %2592, %2593 -> %2595 = fadd float %2594, 0.000000e+00 -> %2596 = bitcast i32 %807 to float -> %2597 = bitcast i32 %807 to float -> %2598 = fmul float %2596, %2597 -> %2599 = fadd float %2595, %2598 -> %2600 = call float @llvm.sqrt.f32.194(float %2599) -> %2601 = fneg float %813 -> %2602 = fmul float %2600, %2601 -> %2603 = bitcast i32 %152 to float -> %2604 = fadd float %2603, %2602 -> %2605 = fmul float %2591, %2604 -> %2606 = fadd float %2605, 0.000000e+00 -> %2607 = bitcast i32 %152 to float -> %2608 = bitcast i32 %152 to float -> %2609 = fmul float %2607, %2608 -> %2610 = fadd float %2609, 0.000000e+00 -> %2611 = bitcast i32 %807 to float -> %2612 = bitcast i32 %807 to float -> %2613 = fmul float %2611, %2612 -> %2614 = fadd float %2610, %2613 -> %2615 = call float @llvm.sqrt.f32.195(float %2614) -> %2616 = fneg float %813 -> %2617 = fmul float %2615, %2616 -> %2618 = fmul float %2617, 0.000000e+00 -> %2619 = bitcast i32 %807 to float -> %2620 = fadd float %2619, %2618 -> %2621 = bitcast i32 %152 to float -> %2622 = bitcast i32 %152 to float -> %2623 = fmul float %2621, %2622 -> %2624 = fadd float %2623, 0.000000e+00 -> %2625 = bitcast i32 %807 to float -> %2626 = bitcast i32 %807 to float -> %2627 = fmul float %2625, %2626 -> %2628 = fadd float %2624, %2627 -> %2629 = call float @llvm.sqrt.f32.196(float %2628) -> %2630 = fneg float %813 -> %2631 = fmul float %2629, %2630 -> %2632 = fmul float %2631, 0.000000e+00 -> %2633 = bitcast i32 %807 to float -> %2634 = fadd float %2633, %2632 -> %2635 = fmul float %2620, %2634 -> %2636 = fadd float %2606, %2635 -> %2637 = call float @llvm.sqrt.f32.197(float %2636) -> %2638 = fadd float %2637, 0.000000e+00 -> %2639 = fdiv float %2578, %2638 -> %2640 = fmul float %2639, 2.000000e+00 -> %2641 = bitcast i32 %152 to float -> %2642 = bitcast i32 %152 to float -> %2643 = fmul float %2641, %2642 -> %2644 = fadd float %2643, 0.000000e+00 -> %2645 = bitcast i32 %807 to float -> %2646 = bitcast i32 %807 to float -> %2647 = fmul float %2645, %2646 -> %2648 = fadd float %2644, %2647 -> %2649 = call float @llvm.sqrt.f32.198(float %2648) -> %2650 = fneg float %813 -> %2651 = fmul float %2649, %2650 -> %2652 = fmul float %2651, 0.000000e+00 -> %2653 = bitcast i32 %807 to float -> %2654 = fadd float %2653, %2652 -> %2655 = bitcast i32 %152 to float -> %2656 = bitcast i32 %152 to float -> %2657 = fmul float %2655, %2656 -> %2658 = fadd float %2657, 0.000000e+00 -> %2659 = bitcast i32 %807 to float -> %2660 = bitcast i32 %807 to float -> %2661 = fmul float %2659, %2660 -> %2662 = fadd float %2658, %2661 -> %2663 = call float @llvm.sqrt.f32.199(float %2662) -> %2664 = fneg float %813 -> %2665 = fmul float %2663, %2664 -> %2666 = bitcast i32 %152 to float -> %2667 = fadd float %2666, %2665 -> %2668 = bitcast i32 %152 to float -> %2669 = bitcast i32 %152 to float -> %2670 = fmul float %2668, %2669 -> %2671 = fadd float %2670, 0.000000e+00 -> %2672 = bitcast i32 %807 to float -> %2673 = bitcast i32 %807 to float -> %2674 = fmul float %2672, %2673 -> %2675 = fadd float %2671, %2674 -> %2676 = call float @llvm.sqrt.f32.200(float %2675) -> %2677 = fneg float %813 -> %2678 = fmul float %2676, %2677 -> %2679 = bitcast i32 %152 to float -> %2680 = fadd float %2679, %2678 -> %2681 = fmul float %2667, %2680 -> %2682 = fadd float %2681, 0.000000e+00 -> %2683 = bitcast i32 %152 to float -> %2684 = bitcast i32 %152 to float -> %2685 = fmul float %2683, %2684 -> %2686 = fadd float %2685, 0.000000e+00 -> %2687 = bitcast i32 %807 to float -> %2688 = bitcast i32 %807 to float -> %2689 = fmul float %2687, %2688 -> %2690 = fadd float %2686, %2689 -> %2691 = call float @llvm.sqrt.f32.201(float %2690) -> %2692 = fneg float %813 -> %2693 = fmul float %2691, %2692 -> %2694 = fmul float %2693, 0.000000e+00 -> %2695 = bitcast i32 %807 to float -> %2696 = fadd float %2695, %2694 -> %2697 = bitcast i32 %152 to float -> %2698 = bitcast i32 %152 to float -> %2699 = fmul float %2697, %2698 -> %2700 = fadd float %2699, 0.000000e+00 -> %2701 = bitcast i32 %807 to float -> %2702 = bitcast i32 %807 to float -> %2703 = fmul float %2701, %2702 -> %2704 = fadd float %2700, %2703 -> %2705 = call float @llvm.sqrt.f32.202(float %2704) -> %2706 = fneg float %813 -> %2707 = fmul float %2705, %2706 -> %2708 = fmul float %2707, 0.000000e+00 -> %2709 = bitcast i32 %807 to float -> %2710 = fadd float %2709, %2708 -> %2711 = fmul float %2696, %2710 -> %2712 = fadd float %2682, %2711 -> %2713 = call float @llvm.sqrt.f32.203(float %2712) -> %2714 = fadd float %2713, 0.000000e+00 -> %2715 = fdiv float %2654, %2714 -> %2716 = fmul float %2640, %2715 -> %2717 = fsub float 1.000000e+00, %2716 -> %2718 = load float, float* %144, align 4 -> %2719 = fmul float %2717, %2718 -> %2720 = fadd float %2564, %2719 -> %2721 = insertelement <4 x float> zeroinitializer, float %2720, i32 0 -> %2722 = insertelement <4 x float> %2721, float 0.000000e+00, i32 1 -> %2723 = insertelement <4 x float> %2722, float 0.000000e+00, i32 2 -> %2724 = insertelement <4 x float> %2723, float 0.000000e+00, i32 3 -> %2725 = extractelement <4 x float> %2724, i32 0 -> store float %2725, float* %2247, align 4 -> %2726 = getelementptr float, float* %1, i32 0 -> %2727 = getelementptr inbounds float, float* %2726, i64 2 -> %2728 = bitcast float* %2727 to i32* -> %2729 = load i32, i32* %2728, align 4 -> %2730 = bitcast i32 %2729 to float -> %2731 = insertelement <4 x float> zeroinitializer, float %2730, i32 0 -> %2732 = getelementptr float, float* %1, i32 0 -> %2733 = getelementptr inbounds float, float* %2732, i64 1 -> %2734 = bitcast float* %2733 to i32* -> %2735 = load i32, i32* %2734, align 4 -> %2736 = bitcast i32 %2735 to float -> %2737 = insertelement <4 x float> %2731, float %2736, i32 1 -> %2738 = insertelement <4 x float> %2737, float 0.000000e+00, i32 2 -> %2739 = insertelement <4 x float> %2738, float 0.000000e+00, i32 3 -> %2740 = extractelement <4 x float> %2739, i32 0 -> %2741 = bitcast i32* %95 to float* -> %2742 = bitcast i32* %2734 to float* -> store float %2740, float* %2742, align 4 -> %2743 = extractelement <4 x float> %2739, i32 1 -> %2744 = bitcast i32* %98 to float* -> %2745 = bitcast i32* %2728 to float* -> store float %2743, float* %2745, align 4 diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/diff-final.txt b/src/dios-egraphs/Diospyros/flaky-outputs/diff-final.txt deleted file mode 100644 index 6f27b438..00000000 --- a/src/dios-egraphs/Diospyros/flaky-outputs/diff-final.txt +++ /dev/null @@ -1 +0,0 @@ -Binary files build/final and flaky-outputs/flaky-final differ diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/diff-opt.txt b/src/dios-egraphs/Diospyros/flaky-outputs/diff-opt.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/flaky-aa.ll b/src/dios-egraphs/Diospyros/flaky-outputs/flaky-aa.ll deleted file mode 100644 index 13a36f92..00000000 --- a/src/dios-egraphs/Diospyros/flaky-outputs/flaky-aa.ll +++ /dev/null @@ -1,828 +0,0 @@ -; ModuleID = 'build/opt.ll' -source_filename = "fail-tests/qr-decomp-local-arrays.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@.str = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 -@.str.1 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 -@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 -@.str.2 = private unnamed_addr constant [36 x i8] c"fail-tests/qr-decomp-local-arrays.c\00", align 1 -@.str.3 = private unnamed_addr constant [34 x i8] c"fabs(expectedQ[i] - Q[i]) < DELTA\00", align 1 -@.str.4 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 -@.str.5 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 -@.str.6 = private unnamed_addr constant [34 x i8] c"fabs(expectedR[i] - R[i]) < DELTA\00", align 1 - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @sgn(float %0) #0 { - %2 = fcmp ogt float %0, 0.000000e+00 - %3 = zext i1 %2 to i32 - %4 = fcmp olt float %0, 0.000000e+00 - %.neg = sext i1 %4 to i32 - %5 = add nsw i32 %.neg, %3 - %6 = sitofp i32 %5 to float - ret float %6 -} - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_sgn(float %0) #1 { - %2 = fcmp ogt float %0, 0.000000e+00 - %3 = zext i1 %2 to i32 - %4 = fcmp olt float %0, 0.000000e+00 - %.neg = sext i1 %4 to i32 - %5 = add nsw i32 %.neg, %3 - %6 = sitofp i32 %5 to float - ret float %6 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @naive_norm(float* %0, i32 %1) #0 { - %3 = icmp sgt i32 %1, 0 - %smax = select i1 %3, i32 %1, i32 0 - %wide.trip.count = zext i32 %smax to i64 - br i1 %3, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %2 - %4 = add nsw i64 %wide.trip.count, -1 - %xtraiter = and i64 %wide.trip.count, 3 - %5 = icmp ult i64 %4, 3 - br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count, 2147483644 - br label %6 - -6: ; preds = %6, %.lr.ph.new - %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] - %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 - %8 = load float, float* %7, align 4 - %9 = fmul float %8, %8 - %10 = fadd float %.013, %9 - %indvars.iv.next = or i64 %indvars.iv2, 1 - %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next - %12 = load float, float* %11, align 4 - %13 = fmul float %12, %12 - %14 = fadd float %10, %13 - %indvars.iv.next.1 = or i64 %indvars.iv2, 2 - %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 - %16 = load float, float* %15, align 4 - %17 = fmul float %16, %16 - %18 = fadd float %14, %17 - %indvars.iv.next.2 = or i64 %indvars.iv2, 3 - %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 - %20 = load float, float* %19, align 4 - %21 = fmul float %20, %20 - %22 = fadd float %18, %21 - %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 - %niter.nsub.3 = add i64 %niter, -4 - %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 - br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 - -._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] - %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] - %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] - %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil - %24 = load float, float* %23, align 4 - %25 = fmul float %24, %24 - %26 = fadd float %.013.epil, %25 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !3 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 - %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] - %27 = call float @llvm.sqrt.f32(float %.01.lcssa) - ret float %27 -} - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32(float) #2 - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_naive_norm(float* %0, i32 %1) #1 { - %3 = icmp sgt i32 %1, 0 - %smax = select i1 %3, i32 %1, i32 0 - %wide.trip.count = zext i32 %smax to i64 - br i1 %3, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %2 - %4 = add nsw i64 %wide.trip.count, -1 - %xtraiter = and i64 %wide.trip.count, 3 - %5 = icmp ult i64 %4, 3 - br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count, 2147483644 - br label %6 - -6: ; preds = %6, %.lr.ph.new - %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] - %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 - %8 = load float, float* %7, align 4 - %9 = fmul float %8, %8 - %10 = fadd float %.013, %9 - %indvars.iv.next = or i64 %indvars.iv2, 1 - %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next - %12 = load float, float* %11, align 4 - %13 = fmul float %12, %12 - %14 = fadd float %10, %13 - %indvars.iv.next.1 = or i64 %indvars.iv2, 2 - %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 - %16 = load float, float* %15, align 4 - %17 = fmul float %16, %16 - %18 = fadd float %14, %17 - %indvars.iv.next.2 = or i64 %indvars.iv2, 3 - %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 - %20 = load float, float* %19, align 4 - %21 = fmul float %20, %20 - %22 = fadd float %18, %21 - %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 - %niter.nsub.3 = add i64 %niter, -4 - %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 - br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 - -._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] - %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] - %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] - %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil - %24 = load float, float* %23, align 4 - %25 = fmul float %24, %24 - %26 = fadd float %.013.epil, %25 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 - %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] - %27 = call float @llvm.sqrt.f32(float %.01.lcssa) - ret float %27 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_transpose(float* %0) #0 { -.lr.ph: - %1 = getelementptr inbounds float, float* %0, i64 1 - %2 = bitcast float* %1 to i32* - %3 = load i32, i32* %2, align 4 - %4 = getelementptr inbounds float, float* %0, i64 2 - %5 = bitcast float* %4 to i32* - %6 = load i32, i32* %5, align 4 - store i32 %6, i32* %2, align 4 - store i32 %3, i32* %5, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_transpose(float* %0) #1 { -.lr.ph: - %1 = getelementptr inbounds float, float* %0, i64 1 - %2 = bitcast float* %1 to i32* - %3 = load i32, i32* %2, align 4 - %4 = getelementptr inbounds float, float* %0, i64 2 - %5 = bitcast float* %4 to i32* - %6 = load i32, i32* %5, align 4 - store i32 %6, i32* %2, align 4 - store i32 %3, i32* %5, align 4 - ret void -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { -.preheader: - store float 0.000000e+00, float* %2, align 4 - %3 = load float, float* %0, align 4 - %4 = load float, float* %1, align 4 - %5 = fmul float %3, %4 - %6 = fadd float %5, 0.000000e+00 - store float %6, float* %2, align 4 - %7 = getelementptr inbounds float, float* %0, i64 1 - %8 = load float, float* %7, align 4 - %9 = getelementptr inbounds float, float* %1, i64 2 - %10 = load float, float* %9, align 4 - %11 = fmul float %8, %10 - %12 = fadd float %6, %11 - store float %12, float* %2, align 4 - %13 = getelementptr inbounds float, float* %2, i64 1 - store float 0.000000e+00, float* %13, align 4 - %14 = load float, float* %0, align 4 - %15 = getelementptr inbounds float, float* %1, i64 1 - %16 = load float, float* %15, align 4 - %17 = fmul float %14, %16 - %18 = fadd float %17, 0.000000e+00 - store float %18, float* %13, align 4 - %19 = load float, float* %7, align 4 - %20 = getelementptr inbounds float, float* %1, i64 3 - %21 = load float, float* %20, align 4 - %22 = fmul float %19, %21 - %23 = fadd float %18, %22 - store float %23, float* %13, align 4 - %24 = getelementptr inbounds float, float* %0, i64 2 - %25 = getelementptr inbounds float, float* %2, i64 2 - store float 0.000000e+00, float* %25, align 4 - %26 = load float, float* %24, align 4 - %27 = load float, float* %1, align 4 - %28 = fmul float %26, %27 - %29 = fadd float %28, 0.000000e+00 - store float %29, float* %25, align 4 - %30 = getelementptr inbounds float, float* %0, i64 3 - %31 = load float, float* %30, align 4 - %32 = load float, float* %9, align 4 - %33 = fmul float %31, %32 - %34 = fadd float %29, %33 - store float %34, float* %25, align 4 - %35 = getelementptr inbounds float, float* %2, i64 3 - store float 0.000000e+00, float* %35, align 4 - %36 = load float, float* %24, align 4 - %37 = load float, float* %15, align 4 - %38 = fmul float %36, %37 - %39 = fadd float %38, 0.000000e+00 - store float %39, float* %35, align 4 - %40 = load float, float* %30, align 4 - %41 = load float, float* %20, align 4 - %42 = fmul float %40, %41 - %43 = fadd float %39, %42 - store float %43, float* %35, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { -.preheader: - store float 0.000000e+00, float* %2, align 4 - %3 = load float, float* %0, align 4 - %4 = load float, float* %1, align 4 - %5 = fmul float %3, %4 - %6 = fadd float %5, 0.000000e+00 - store float %6, float* %2, align 4 - %7 = getelementptr inbounds float, float* %0, i64 1 - %8 = load float, float* %7, align 4 - %9 = getelementptr inbounds float, float* %1, i64 2 - %10 = load float, float* %9, align 4 - %11 = fmul float %8, %10 - %12 = fadd float %6, %11 - store float %12, float* %2, align 4 - %13 = getelementptr inbounds float, float* %2, i64 1 - store float 0.000000e+00, float* %13, align 4 - %14 = load float, float* %0, align 4 - %15 = getelementptr inbounds float, float* %1, i64 1 - %16 = load float, float* %15, align 4 - %17 = fmul float %14, %16 - %18 = fadd float %17, 0.000000e+00 - store float %18, float* %13, align 4 - %19 = load float, float* %7, align 4 - %20 = getelementptr inbounds float, float* %1, i64 3 - %21 = load float, float* %20, align 4 - %22 = fmul float %19, %21 - %23 = fadd float %18, %22 - store float %23, float* %13, align 4 - %24 = getelementptr inbounds float, float* %0, i64 2 - %25 = getelementptr inbounds float, float* %2, i64 2 - store float 0.000000e+00, float* %25, align 4 - %26 = load float, float* %24, align 4 - %27 = load float, float* %1, align 4 - %28 = fmul float %26, %27 - %29 = fadd float %28, 0.000000e+00 - store float %29, float* %25, align 4 - %30 = getelementptr inbounds float, float* %0, i64 3 - %31 = load float, float* %30, align 4 - %32 = load float, float* %9, align 4 - %33 = fmul float %31, %32 - %34 = fadd float %29, %33 - store float %34, float* %25, align 4 - %35 = getelementptr inbounds float, float* %2, i64 3 - store float 0.000000e+00, float* %35, align 4 - %36 = load float, float* %24, align 4 - %37 = load float, float* %15, align 4 - %38 = fmul float %36, %37 - %39 = fadd float %38, 0.000000e+00 - store float %39, float* %35, align 4 - %40 = load float, float* %30, align 4 - %41 = load float, float* %20, align 4 - %42 = fmul float %40, %41 - %43 = fadd float %39, %42 - store float %43, float* %35, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { -.preheader49: - %3 = bitcast float* %1 to i8* - %4 = alloca [4 x float], align 16 - %5 = bitcast [4 x float]* %4 to i8* - %6 = bitcast float* %0 to i32* - %7 = load i32, i32* %6, align 4 - %8 = bitcast float* %2 to i32* - store i32 %7, i32* %8, align 4 - %9 = getelementptr inbounds float, float* %0, i64 1 - %10 = bitcast float* %9 to i32* - %11 = load i32, i32* %10, align 4 - %12 = getelementptr inbounds float, float* %2, i64 1 - %13 = bitcast float* %12 to i32* - store i32 %11, i32* %13, align 4 - %14 = getelementptr inbounds float, float* %0, i64 2 - %15 = bitcast float* %14 to i32* - %16 = load i32, i32* %15, align 4 - %17 = getelementptr inbounds float, float* %2, i64 2 - %18 = bitcast float* %17 to i32* - store i32 %16, i32* %18, align 4 - %19 = getelementptr inbounds float, float* %0, i64 3 - %20 = bitcast float* %19 to i32* - %21 = load i32, i32* %20, align 4 - %22 = getelementptr inbounds float, float* %2, i64 3 - %23 = bitcast float* %22 to i32* - store i32 %21, i32* %23, align 4 - %24 = bitcast i32 %7 to float - %25 = fcmp ogt float %24, 0.000000e+00 - %26 = zext i1 %25 to i32 - %27 = fcmp olt float %24, 0.000000e+00 - %.neg = sext i1 %27 to i32 - %28 = add nsw i32 %.neg, %26 - %29 = sitofp i32 %28 to float - %30 = fmul float %24, %24 - %31 = fadd float %30, 0.000000e+00 - %32 = bitcast i32 %16 to float - %33 = fmul float %32, %32 - %34 = fadd float %31, %33 - %35 = call float @llvm.sqrt.f32(float %34) #9 - %36 = fneg float %29 - %37 = fmul float %35, %36 - %38 = fadd float %24, %37 - %39 = fmul float %37, 0.000000e+00 - %40 = fadd float %32, %39 - %41 = fmul float %38, %38 - %42 = fadd float %41, 0.000000e+00 - %43 = fmul float %40, %40 - %44 = fadd float %42, %43 - %45 = call float @llvm.sqrt.f32(float %44) #9 - %46 = fadd float %45, 0x3EE4F8B580000000 - %47 = fdiv float %38, %46 - %48 = fdiv float %40, %46 - %49 = fmul float %47, 2.000000e+00 - %50 = fmul float %49, %47 - %51 = fsub float 1.000000e+00, %50 - %52 = fmul float %49, %48 - %53 = fsub float 0.000000e+00, %52 - %54 = fmul float %48, 2.000000e+00 - %55 = fmul float %54, %47 - %56 = fsub float 0.000000e+00, %55 - %57 = fmul float %54, %48 - %58 = fsub float 1.000000e+00, %57 - %59 = bitcast float %51 to i32 - %60 = bitcast [4 x float]* %4 to i32* - store i32 %59, i32* %60, align 16 - %61 = bitcast float %53 to i32 - %62 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 - %63 = bitcast float* %62 to i32* - store i32 %61, i32* %63, align 4 - %64 = bitcast float %56 to i32 - %65 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 - %66 = bitcast float* %65 to i32* - store i32 %64, i32* %66, align 8 - %67 = bitcast float %58 to i32 - %68 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 - %69 = bitcast float* %68 to i32* - store i32 %67, i32* %69, align 4 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %3, i8* nonnull align 16 dereferenceable(16) %5, i64 16, i1 false) - store float 0.000000e+00, float* %2, align 4 - %70 = load float, float* %0, align 4 - %71 = fmul float %51, %70 - %72 = fadd float %71, 0.000000e+00 - store float %72, float* %2, align 4 - %73 = load float, float* %14, align 4 - %74 = fmul float %53, %73 - %75 = fadd float %72, %74 - store float %75, float* %2, align 4 - store float 0.000000e+00, float* %12, align 4 - %76 = load float, float* %9, align 4 - %77 = fmul float %51, %76 - %78 = fadd float %77, 0.000000e+00 - store float %78, float* %12, align 4 - %79 = load float, float* %19, align 4 - %80 = fmul float %53, %79 - %81 = fadd float %78, %80 - store float %81, float* %12, align 4 - store float 0.000000e+00, float* %17, align 4 - %82 = load float, float* %0, align 4 - %83 = fmul float %56, %82 - %84 = fadd float %83, 0.000000e+00 - store float %84, float* %17, align 4 - %85 = load float, float* %14, align 4 - %86 = fmul float %58, %85 - %87 = fadd float %84, %86 - store float %87, float* %17, align 4 - store float 0.000000e+00, float* %22, align 4 - %88 = load float, float* %9, align 4 - %89 = fmul float %56, %88 - %90 = fadd float %89, 0.000000e+00 - store float %90, float* %22, align 4 - %91 = load float, float* %19, align 4 - %92 = fmul float %58, %91 - %93 = fadd float %90, %92 - store float %93, float* %22, align 4 - %94 = getelementptr inbounds float, float* %1, i64 1 - %95 = bitcast float* %94 to i32* - %96 = load i32, i32* %95, align 4 - %97 = getelementptr inbounds float, float* %1, i64 2 - %98 = bitcast float* %97 to i32* - %99 = load i32, i32* %98, align 4 - store i32 %99, i32* %95, align 4 - store i32 %96, i32* %98, align 4 - ret void -} - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { -.preheader13: - %3 = bitcast float* %2 to i8* - %4 = bitcast float* %0 to i8* - %5 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false, i1 true, i1 false) - %6 = call i8* @__memcpy_chk(i8* %3, i8* %4, i64 16, i64 %5) #9 - %7 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 - %8 = bitcast i8* %7 to float* - store float 1.000000e+00, float* %8, align 4 - %9 = getelementptr inbounds i8, i8* %7, i64 8 - %10 = getelementptr inbounds i8, i8* %7, i64 12 - %11 = bitcast i8* %10 to float* - store float 1.000000e+00, float* %11, align 4 - %12 = bitcast float* %1 to i8* - %13 = call i64 @llvm.objectsize.i64.p0i8(i8* %12, i1 false, i1 true, i1 false) - %14 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 - %15 = bitcast i8* %14 to float* - %16 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 - %17 = bitcast i8* %16 to float* - %18 = bitcast float* %2 to i32* - %19 = load i32, i32* %18, align 4 - %20 = bitcast i8* %14 to i32* - store i32 %19, i32* %20, align 4 - %21 = bitcast i8* %7 to i32* - %22 = load i32, i32* %21, align 4 - %23 = bitcast i8* %16 to i32* - store i32 %22, i32* %23, align 4 - %24 = getelementptr inbounds float, float* %2, i64 2 - %25 = bitcast float* %24 to i32* - %26 = load i32, i32* %25, align 4 - %27 = getelementptr inbounds i8, i8* %14, i64 4 - %28 = bitcast i8* %27 to i32* - store i32 %26, i32* %28, align 4 - %29 = bitcast i8* %9 to i32* - %30 = load i32, i32* %29, align 4 - %31 = getelementptr inbounds i8, i8* %16, i64 4 - %32 = bitcast i8* %31 to i32* - store i32 %30, i32* %32, align 4 - %33 = load float, float* %15, align 4 - %34 = call float @no_opt_sgn(float %33) - %35 = fneg float %34 - %36 = call float @no_opt_naive_norm(float* nonnull %15, i32 2) - %37 = fmul float %36, %35 - %38 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 - %39 = bitcast i8* %38 to float* - %40 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 - %41 = load float, float* %15, align 4 - %42 = load float, float* %17, align 4 - %43 = fmul float %37, %42 - %44 = fadd float %41, %43 - store float %44, float* %39, align 4 - %45 = bitcast i8* %27 to float* - %46 = load float, float* %45, align 4 - %47 = bitcast i8* %31 to float* - %48 = load float, float* %47, align 4 - %49 = fmul float %37, %48 - %50 = fadd float %46, %49 - %51 = getelementptr inbounds i8, i8* %38, i64 4 - %52 = bitcast i8* %51 to float* - store float %50, float* %52, align 4 - %53 = bitcast i8* %40 to float* - %54 = call float @no_opt_naive_norm(float* nonnull %39, i32 2) - %55 = fadd float %54, 0x3EE4F8B580000000 - %56 = load float, float* %39, align 4 - %57 = fdiv float %56, %55 - store float %57, float* %53, align 4 - %58 = load float, float* %52, align 4 - %59 = fdiv float %58, %55 - %60 = getelementptr inbounds i8, i8* %40, i64 4 - %61 = bitcast i8* %60 to float* - store float %59, float* %61, align 4 - %62 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 - %63 = bitcast i8* %62 to float* - %64 = load float, float* %53, align 4 - %65 = fmul float %64, 2.000000e+00 - %66 = fmul float %65, %64 - %67 = fsub float 1.000000e+00, %66 - store float %67, float* %63, align 4 - %68 = load float, float* %53, align 4 - %69 = fmul float %68, 2.000000e+00 - %70 = load float, float* %61, align 4 - %71 = fmul float %69, %70 - %72 = fsub float 0.000000e+00, %71 - %73 = getelementptr inbounds i8, i8* %62, i64 4 - %74 = bitcast i8* %73 to float* - store float %72, float* %74, align 4 - %75 = load float, float* %61, align 4 - %76 = fmul float %75, 2.000000e+00 - %77 = load float, float* %53, align 4 - %78 = fmul float %76, %77 - %79 = fsub float 0.000000e+00, %78 - %80 = getelementptr inbounds i8, i8* %62, i64 8 - %81 = bitcast i8* %80 to float* - store float %79, float* %81, align 4 - %82 = load float, float* %61, align 4 - %83 = fmul float %82, 2.000000e+00 - %84 = fmul float %83, %82 - %85 = fsub float 1.000000e+00, %84 - %86 = getelementptr inbounds i8, i8* %62, i64 12 - %87 = bitcast i8* %86 to float* - store float %85, float* %87, align 4 - %88 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 - %89 = bitcast i8* %88 to float* - %90 = bitcast i8* %62 to i32* - %91 = load i32, i32* %90, align 4 - %92 = bitcast i8* %88 to i32* - store i32 %91, i32* %92, align 4 - %93 = bitcast i8* %73 to i32* - %94 = load i32, i32* %93, align 4 - %95 = getelementptr inbounds i8, i8* %88, i64 4 - %96 = bitcast i8* %95 to i32* - store i32 %94, i32* %96, align 4 - %97 = bitcast i8* %80 to i32* - %98 = load i32, i32* %97, align 4 - %99 = getelementptr inbounds i8, i8* %88, i64 8 - %100 = bitcast i8* %99 to i32* - store i32 %98, i32* %100, align 4 - %101 = bitcast i8* %86 to i32* - %102 = load i32, i32* %101, align 4 - %103 = getelementptr inbounds i8, i8* %88, i64 12 - %104 = bitcast i8* %103 to i32* - store i32 %102, i32* %104, align 4 - %105 = call i8* @__memcpy_chk(i8* %12, i8* %88, i64 16, i64 %13) #9 - call void @no_opt_naive_fixed_matrix_multiply(float* %89, float* %0, float* %2) - call void @free(i8* %14) - call void @free(i8* %16) - call void @free(i8* %38) - call void @free(i8* %40) - call void @free(i8* %62) - call void @free(i8* %88) - call void @no_opt_naive_fixed_transpose(float* %1) - ret void -} - -; Function Attrs: nounwind -declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 - -; Function Attrs: nounwind readnone speculatable willreturn -declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 - -; Function Attrs: allocsize(0,1) -declare i8* @calloc(i64, i64) #5 - -declare void @free(i8*) #6 - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #1 { -.preheader6: - %0 = alloca i64, align 8 - %1 = alloca [4 x float], align 16 - %2 = alloca [4 x float], align 16 - %3 = alloca [4 x float], align 16 - %4 = alloca [4 x float], align 16 - %5 = alloca [4 x float], align 16 - %6 = call i64 @time(i64* null) #9 - store i64 %6, i64* %0, align 8 - %7 = call i64 @time(i64* nonnull %0) #9 - %8 = trunc i64 %7 to i32 - call void @srand(i32 %8) #9 - %9 = call i32 @rand() #9 - %10 = sitofp i32 %9 to float - %11 = fdiv float %10, 0x41747AE140000000 - %12 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 - store float %11, float* %12, align 16 - %13 = call i32 @rand() #9 - %14 = sitofp i32 %13 to float - %15 = fdiv float %14, 0x41747AE140000000 - %16 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1 - store float %15, float* %16, align 4 - %17 = call i32 @rand() #9 - %18 = sitofp i32 %17 to float - %19 = fdiv float %18, 0x41747AE140000000 - %20 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2 - store float %19, float* %20, align 8 - %21 = call i32 @rand() #9 - %22 = sitofp i32 %21 to float - %23 = fdiv float %22, 0x41747AE140000000 - %24 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3 - store float %23, float* %24, align 4 - %25 = bitcast [4 x float]* %2 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %25, i8 0, i64 16, i1 false) - %26 = bitcast [4 x float]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %26, i8 0, i64 16, i1 false) - %27 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %28 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - call void @naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %27, float* nonnull %28) - %29 = bitcast [4 x float]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %29, i8 0, i64 16, i1 false) - %30 = bitcast [4 x float]* %5 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %30, i8 0, i64 16, i1 false) - %31 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 - %32 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 - call void @no_opt_naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %31, float* nonnull %32) - %33 = load float, float* %27, align 16 - %34 = fpext float %33 to double - %35 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %34) #9 - %36 = load float, float* %31, align 16 - %37 = fpext float %36 to double - %38 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %37) #9 - %39 = load float, float* %31, align 16 - %40 = load float, float* %27, align 16 - %41 = fsub float %39, %40 - %42 = call float @llvm.fabs.f32(float %41) - %43 = fcmp uge float %42, 0x3FB99999A0000000 - br i1 %43, label %58, label %44 - -44: ; preds = %.preheader6 - %45 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1 - %46 = load float, float* %45, align 4 - %47 = fpext float %46 to double - %48 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %47) #9 - %49 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 - %50 = load float, float* %49, align 4 - %51 = fpext float %50 to double - %52 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %51) #9 - %53 = load float, float* %31, align 16 - %54 = load float, float* %27, align 16 - %55 = fsub float %53, %54 - %56 = call float @llvm.fabs.f32(float %55) - %57 = fcmp uge float %56, 0x3FB99999A0000000 - br i1 %57, label %58, label %.preheader6.1 - -58: ; preds = %115, %.preheader6.1, %44, %.preheader6 - call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.2, i64 0, i64 0), i32 300, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.3, i64 0, i64 0)) #11 - unreachable - -59: ; preds = %.preheader5 - %60 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1 - %61 = load float, float* %60, align 4 - %62 = fpext float %61 to double - %63 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %62) #9 - %64 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 1 - %65 = load float, float* %64, align 4 - %66 = fpext float %65 to double - %67 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %66) #9 - %68 = load float, float* %32, align 16 - %69 = load float, float* %28, align 16 - %70 = fsub float %68, %69 - %71 = call float @llvm.fabs.f32(float %70) - %72 = fcmp uge float %71, 0x3FB99999A0000000 - br i1 %72, label %73, label %.preheader.1 - -73: ; preds = %.preheader5, %87, %.preheader.1, %59 - call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.2, i64 0, i64 0), i32 307, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.6, i64 0, i64 0)) #11 - unreachable - -.preheader.1: ; preds = %59 - %74 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2 - %75 = load float, float* %74, align 8 - %76 = fpext float %75 to double - %77 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %76) #9 - %78 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 2 - %79 = load float, float* %78, align 8 - %80 = fpext float %79 to double - %81 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %80) #9 - %82 = load float, float* %64, align 4 - %83 = load float, float* %60, align 4 - %84 = fsub float %82, %83 - %85 = call float @llvm.fabs.f32(float %84) - %86 = fcmp uge float %85, 0x3FB99999A0000000 - br i1 %86, label %73, label %87 - -87: ; preds = %.preheader.1 - %88 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3 - %89 = load float, float* %88, align 4 - %90 = fpext float %89 to double - %91 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %90) #9 - %92 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 3 - %93 = load float, float* %92, align 4 - %94 = fpext float %93 to double - %95 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %94) #9 - %96 = load float, float* %64, align 4 - %97 = load float, float* %60, align 4 - %98 = fsub float %96, %97 - %99 = call float @llvm.fabs.f32(float %98) - %100 = fcmp uge float %99, 0x3FB99999A0000000 - br i1 %100, label %73, label %101 - -101: ; preds = %87 - ret i32 0 - -.preheader6.1: ; preds = %44 - %102 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2 - %103 = load float, float* %102, align 8 - %104 = fpext float %103 to double - %105 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %104) #9 - %106 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 - %107 = load float, float* %106, align 8 - %108 = fpext float %107 to double - %109 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %108) #9 - %110 = load float, float* %49, align 4 - %111 = load float, float* %45, align 4 - %112 = fsub float %110, %111 - %113 = call float @llvm.fabs.f32(float %112) - %114 = fcmp uge float %113, 0x3FB99999A0000000 - br i1 %114, label %58, label %115 - -115: ; preds = %.preheader6.1 - %116 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3 - %117 = load float, float* %116, align 4 - %118 = fpext float %117 to double - %119 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %118) #9 - %120 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 - %121 = load float, float* %120, align 4 - %122 = fpext float %121 to double - %123 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %122) #9 - %124 = load float, float* %49, align 4 - %125 = load float, float* %45, align 4 - %126 = fsub float %124, %125 - %127 = call float @llvm.fabs.f32(float %126) - %128 = fcmp uge float %127, 0x3FB99999A0000000 - br i1 %128, label %58, label %.preheader5 - -.preheader5: ; preds = %115 - %129 = load float, float* %28, align 16 - %130 = fpext float %129 to double - %131 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %130) #9 - %132 = load float, float* %32, align 16 - %133 = fpext float %132 to double - %134 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %133) #9 - %135 = load float, float* %32, align 16 - %136 = load float, float* %28, align 16 - %137 = fsub float %135, %136 - %138 = call float @llvm.fabs.f32(float %137) - %139 = fcmp uge float %138, 0x3FB99999A0000000 - br i1 %139, label %73, label %59 -} - -declare i64 @time(i64*) #6 - -declare void @srand(i32) #6 - -declare i32 @rand() #6 - -declare i32 @printf(i8*, ...) #6 - -; Function Attrs: nounwind readnone speculatable willreturn -declare double @llvm.fabs.f64(double) #2 - -; Function Attrs: noreturn -declare void @__assert_rtn(i8*, i8*, i32, i8*) #7 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #8 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fabs.f32(float) #2 - -attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone speculatable willreturn } -attributes #3 = { argmemonly nounwind willreturn writeonly } -attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #8 = { argmemonly nounwind willreturn } -attributes #9 = { nounwind } -attributes #10 = { nounwind allocsize(0,1) } -attributes #11 = { noreturn nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} -!3 = distinct !{!3, !4} -!4 = !{!"llvm.loop.unroll.disable"} -!5 = distinct !{!5, !4} diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/flaky-clang.ll b/src/dios-egraphs/Diospyros/flaky-outputs/flaky-clang.ll deleted file mode 100644 index 44cb581c..00000000 --- a/src/dios-egraphs/Diospyros/flaky-outputs/flaky-clang.ll +++ /dev/null @@ -1,2356 +0,0 @@ -; ModuleID = 'fail-tests/qr-decomp-local-arrays.c' -source_filename = "fail-tests/qr-decomp-local-arrays.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@.str = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 -@.str.1 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 -@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 -@.str.2 = private unnamed_addr constant [36 x i8] c"fail-tests/qr-decomp-local-arrays.c\00", align 1 -@.str.3 = private unnamed_addr constant [34 x i8] c"fabs(expectedQ[i] - Q[i]) < DELTA\00", align 1 -@.str.4 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 -@.str.5 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 -@.str.6 = private unnamed_addr constant [34 x i8] c"fabs(expectedR[i] - R[i]) < DELTA\00", align 1 - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @sgn(float %0) #0 { - %2 = alloca float, align 4 - store float %0, float* %2, align 4 - %3 = load float, float* %2, align 4 - %4 = fcmp ogt float %3, 0.000000e+00 - %5 = zext i1 %4 to i32 - %6 = load float, float* %2, align 4 - %7 = fcmp olt float %6, 0.000000e+00 - %8 = zext i1 %7 to i32 - %9 = sub nsw i32 %5, %8 - %10 = sitofp i32 %9 to float - ret float %10 -} - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_sgn(float %0) #1 { - %2 = alloca float, align 4 - store float %0, float* %2, align 4 - %3 = load float, float* %2, align 4 - %4 = fcmp ogt float %3, 0.000000e+00 - %5 = zext i1 %4 to i32 - %6 = load float, float* %2, align 4 - %7 = fcmp olt float %6, 0.000000e+00 - %8 = zext i1 %7 to i32 - %9 = sub nsw i32 %5, %8 - %10 = sitofp i32 %9 to float - ret float %10 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @naive_norm(float* %0, i32 %1) #0 { - %3 = alloca float*, align 8 - %4 = alloca i32, align 4 - %5 = alloca float, align 4 - %6 = alloca i32, align 4 - store float* %0, float** %3, align 8 - store i32 %1, i32* %4, align 4 - store float 0.000000e+00, float* %5, align 4 - store i32 0, i32* %6, align 4 - br label %7 - -7: ; preds = %25, %2 - %8 = load i32, i32* %6, align 4 - %9 = load i32, i32* %4, align 4 - %10 = icmp slt i32 %8, %9 - br i1 %10, label %11, label %28 - -11: ; preds = %7 - %12 = load float*, float** %3, align 8 - %13 = load i32, i32* %6, align 4 - %14 = sext i32 %13 to i64 - %15 = getelementptr inbounds float, float* %12, i64 %14 - %16 = load float, float* %15, align 4 - %17 = load float*, float** %3, align 8 - %18 = load i32, i32* %6, align 4 - %19 = sext i32 %18 to i64 - %20 = getelementptr inbounds float, float* %17, i64 %19 - %21 = load float, float* %20, align 4 - %22 = fmul float %16, %21 - %23 = load float, float* %5, align 4 - %24 = fadd float %23, %22 - store float %24, float* %5, align 4 - br label %25 - -25: ; preds = %11 - %26 = load i32, i32* %6, align 4 - %27 = add nsw i32 %26, 1 - store i32 %27, i32* %6, align 4 - br label %7 - -28: ; preds = %7 - %29 = load float, float* %5, align 4 - %30 = call float @llvm.sqrt.f32(float %29) - ret float %30 -} - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32(float) #2 - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_naive_norm(float* %0, i32 %1) #1 { - %3 = alloca float*, align 8 - %4 = alloca i32, align 4 - %5 = alloca float, align 4 - %6 = alloca i32, align 4 - store float* %0, float** %3, align 8 - store i32 %1, i32* %4, align 4 - store float 0.000000e+00, float* %5, align 4 - store i32 0, i32* %6, align 4 - br label %7 - -7: ; preds = %25, %2 - %8 = load i32, i32* %6, align 4 - %9 = load i32, i32* %4, align 4 - %10 = icmp slt i32 %8, %9 - br i1 %10, label %11, label %28 - -11: ; preds = %7 - %12 = load float*, float** %3, align 8 - %13 = load i32, i32* %6, align 4 - %14 = sext i32 %13 to i64 - %15 = getelementptr inbounds float, float* %12, i64 %14 - %16 = load float, float* %15, align 4 - %17 = load float*, float** %3, align 8 - %18 = load i32, i32* %6, align 4 - %19 = sext i32 %18 to i64 - %20 = getelementptr inbounds float, float* %17, i64 %19 - %21 = load float, float* %20, align 4 - %22 = fmul float %16, %21 - %23 = load float, float* %5, align 4 - %24 = fadd float %23, %22 - store float %24, float* %5, align 4 - br label %25 - -25: ; preds = %11 - %26 = load i32, i32* %6, align 4 - %27 = add nsw i32 %26, 1 - store i32 %27, i32* %6, align 4 - br label %7 - -28: ; preds = %7 - %29 = load float, float* %5, align 4 - %30 = call float @llvm.sqrt.f32(float %29) - ret float %30 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_transpose(float* %0) #0 { - %2 = alloca float*, align 8 - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca float, align 4 - store float* %0, float** %2, align 8 - store i32 0, i32* %3, align 4 - br label %6 - -6: ; preds = %51, %1 - %7 = load i32, i32* %3, align 4 - %8 = icmp slt i32 %7, 2 - br i1 %8, label %9, label %54 - -9: ; preds = %6 - %10 = load i32, i32* %3, align 4 - %11 = add nsw i32 %10, 1 - store i32 %11, i32* %4, align 4 - br label %12 - -12: ; preds = %47, %9 - %13 = load i32, i32* %4, align 4 - %14 = icmp slt i32 %13, 2 - br i1 %14, label %15, label %50 - -15: ; preds = %12 - %16 = load float*, float** %2, align 8 - %17 = load i32, i32* %3, align 4 - %18 = mul nsw i32 %17, 2 - %19 = load i32, i32* %4, align 4 - %20 = add nsw i32 %18, %19 - %21 = sext i32 %20 to i64 - %22 = getelementptr inbounds float, float* %16, i64 %21 - %23 = load float, float* %22, align 4 - store float %23, float* %5, align 4 - %24 = load float*, float** %2, align 8 - %25 = load i32, i32* %4, align 4 - %26 = mul nsw i32 %25, 2 - %27 = load i32, i32* %3, align 4 - %28 = add nsw i32 %26, %27 - %29 = sext i32 %28 to i64 - %30 = getelementptr inbounds float, float* %24, i64 %29 - %31 = load float, float* %30, align 4 - %32 = load float*, float** %2, align 8 - %33 = load i32, i32* %3, align 4 - %34 = mul nsw i32 %33, 2 - %35 = load i32, i32* %4, align 4 - %36 = add nsw i32 %34, %35 - %37 = sext i32 %36 to i64 - %38 = getelementptr inbounds float, float* %32, i64 %37 - store float %31, float* %38, align 4 - %39 = load float, float* %5, align 4 - %40 = load float*, float** %2, align 8 - %41 = load i32, i32* %4, align 4 - %42 = mul nsw i32 %41, 2 - %43 = load i32, i32* %3, align 4 - %44 = add nsw i32 %42, %43 - %45 = sext i32 %44 to i64 - %46 = getelementptr inbounds float, float* %40, i64 %45 - store float %39, float* %46, align 4 - br label %47 - -47: ; preds = %15 - %48 = load i32, i32* %4, align 4 - %49 = add nsw i32 %48, 1 - store i32 %49, i32* %4, align 4 - br label %12 - -50: ; preds = %12 - br label %51 - -51: ; preds = %50 - %52 = load i32, i32* %3, align 4 - %53 = add nsw i32 %52, 1 - store i32 %53, i32* %3, align 4 - br label %6 - -54: ; preds = %6 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_transpose(float* %0) #1 { - %2 = alloca float*, align 8 - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca float, align 4 - store float* %0, float** %2, align 8 - store i32 0, i32* %3, align 4 - br label %6 - -6: ; preds = %51, %1 - %7 = load i32, i32* %3, align 4 - %8 = icmp slt i32 %7, 2 - br i1 %8, label %9, label %54 - -9: ; preds = %6 - %10 = load i32, i32* %3, align 4 - %11 = add nsw i32 %10, 1 - store i32 %11, i32* %4, align 4 - br label %12 - -12: ; preds = %47, %9 - %13 = load i32, i32* %4, align 4 - %14 = icmp slt i32 %13, 2 - br i1 %14, label %15, label %50 - -15: ; preds = %12 - %16 = load float*, float** %2, align 8 - %17 = load i32, i32* %3, align 4 - %18 = mul nsw i32 %17, 2 - %19 = load i32, i32* %4, align 4 - %20 = add nsw i32 %18, %19 - %21 = sext i32 %20 to i64 - %22 = getelementptr inbounds float, float* %16, i64 %21 - %23 = load float, float* %22, align 4 - store float %23, float* %5, align 4 - %24 = load float*, float** %2, align 8 - %25 = load i32, i32* %4, align 4 - %26 = mul nsw i32 %25, 2 - %27 = load i32, i32* %3, align 4 - %28 = add nsw i32 %26, %27 - %29 = sext i32 %28 to i64 - %30 = getelementptr inbounds float, float* %24, i64 %29 - %31 = load float, float* %30, align 4 - %32 = load float*, float** %2, align 8 - %33 = load i32, i32* %3, align 4 - %34 = mul nsw i32 %33, 2 - %35 = load i32, i32* %4, align 4 - %36 = add nsw i32 %34, %35 - %37 = sext i32 %36 to i64 - %38 = getelementptr inbounds float, float* %32, i64 %37 - store float %31, float* %38, align 4 - %39 = load float, float* %5, align 4 - %40 = load float*, float** %2, align 8 - %41 = load i32, i32* %4, align 4 - %42 = mul nsw i32 %41, 2 - %43 = load i32, i32* %3, align 4 - %44 = add nsw i32 %42, %43 - %45 = sext i32 %44 to i64 - %46 = getelementptr inbounds float, float* %40, i64 %45 - store float %39, float* %46, align 4 - br label %47 - -47: ; preds = %15 - %48 = load i32, i32* %4, align 4 - %49 = add nsw i32 %48, 1 - store i32 %49, i32* %4, align 4 - br label %12 - -50: ; preds = %12 - br label %51 - -51: ; preds = %50 - %52 = load i32, i32* %3, align 4 - %53 = add nsw i32 %52, 1 - store i32 %53, i32* %3, align 4 - br label %6 - -54: ; preds = %6 - ret void -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { - %4 = alloca float*, align 8 - %5 = alloca float*, align 8 - %6 = alloca float*, align 8 - %7 = alloca i32, align 4 - %8 = alloca i32, align 4 - %9 = alloca i32, align 4 - store float* %0, float** %4, align 8 - store float* %1, float** %5, align 8 - store float* %2, float** %6, align 8 - store i32 0, i32* %7, align 4 - br label %10 - -10: ; preds = %63, %3 - %11 = load i32, i32* %7, align 4 - %12 = icmp slt i32 %11, 2 - br i1 %12, label %13, label %66 - -13: ; preds = %10 - store i32 0, i32* %8, align 4 - br label %14 - -14: ; preds = %59, %13 - %15 = load i32, i32* %8, align 4 - %16 = icmp slt i32 %15, 2 - br i1 %16, label %17, label %62 - -17: ; preds = %14 - %18 = load float*, float** %6, align 8 - %19 = load i32, i32* %7, align 4 - %20 = mul nsw i32 2, %19 - %21 = load i32, i32* %8, align 4 - %22 = add nsw i32 %20, %21 - %23 = sext i32 %22 to i64 - %24 = getelementptr inbounds float, float* %18, i64 %23 - store float 0.000000e+00, float* %24, align 4 - store i32 0, i32* %9, align 4 - br label %25 - -25: ; preds = %55, %17 - %26 = load i32, i32* %9, align 4 - %27 = icmp slt i32 %26, 2 - br i1 %27, label %28, label %58 - -28: ; preds = %25 - %29 = load float*, float** %4, align 8 - %30 = load i32, i32* %7, align 4 - %31 = mul nsw i32 2, %30 - %32 = load i32, i32* %9, align 4 - %33 = add nsw i32 %31, %32 - %34 = sext i32 %33 to i64 - %35 = getelementptr inbounds float, float* %29, i64 %34 - %36 = load float, float* %35, align 4 - %37 = load float*, float** %5, align 8 - %38 = load i32, i32* %9, align 4 - %39 = mul nsw i32 2, %38 - %40 = load i32, i32* %8, align 4 - %41 = add nsw i32 %39, %40 - %42 = sext i32 %41 to i64 - %43 = getelementptr inbounds float, float* %37, i64 %42 - %44 = load float, float* %43, align 4 - %45 = fmul float %36, %44 - %46 = load float*, float** %6, align 8 - %47 = load i32, i32* %7, align 4 - %48 = mul nsw i32 2, %47 - %49 = load i32, i32* %8, align 4 - %50 = add nsw i32 %48, %49 - %51 = sext i32 %50 to i64 - %52 = getelementptr inbounds float, float* %46, i64 %51 - %53 = load float, float* %52, align 4 - %54 = fadd float %53, %45 - store float %54, float* %52, align 4 - br label %55 - -55: ; preds = %28 - %56 = load i32, i32* %9, align 4 - %57 = add nsw i32 %56, 1 - store i32 %57, i32* %9, align 4 - br label %25 - -58: ; preds = %25 - br label %59 - -59: ; preds = %58 - %60 = load i32, i32* %8, align 4 - %61 = add nsw i32 %60, 1 - store i32 %61, i32* %8, align 4 - br label %14 - -62: ; preds = %14 - br label %63 - -63: ; preds = %62 - %64 = load i32, i32* %7, align 4 - %65 = add nsw i32 %64, 1 - store i32 %65, i32* %7, align 4 - br label %10 - -66: ; preds = %10 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { - %4 = alloca float*, align 8 - %5 = alloca float*, align 8 - %6 = alloca float*, align 8 - %7 = alloca i32, align 4 - %8 = alloca i32, align 4 - %9 = alloca i32, align 4 - store float* %0, float** %4, align 8 - store float* %1, float** %5, align 8 - store float* %2, float** %6, align 8 - store i32 0, i32* %7, align 4 - br label %10 - -10: ; preds = %63, %3 - %11 = load i32, i32* %7, align 4 - %12 = icmp slt i32 %11, 2 - br i1 %12, label %13, label %66 - -13: ; preds = %10 - store i32 0, i32* %8, align 4 - br label %14 - -14: ; preds = %59, %13 - %15 = load i32, i32* %8, align 4 - %16 = icmp slt i32 %15, 2 - br i1 %16, label %17, label %62 - -17: ; preds = %14 - %18 = load float*, float** %6, align 8 - %19 = load i32, i32* %7, align 4 - %20 = mul nsw i32 2, %19 - %21 = load i32, i32* %8, align 4 - %22 = add nsw i32 %20, %21 - %23 = sext i32 %22 to i64 - %24 = getelementptr inbounds float, float* %18, i64 %23 - store float 0.000000e+00, float* %24, align 4 - store i32 0, i32* %9, align 4 - br label %25 - -25: ; preds = %55, %17 - %26 = load i32, i32* %9, align 4 - %27 = icmp slt i32 %26, 2 - br i1 %27, label %28, label %58 - -28: ; preds = %25 - %29 = load float*, float** %4, align 8 - %30 = load i32, i32* %7, align 4 - %31 = mul nsw i32 2, %30 - %32 = load i32, i32* %9, align 4 - %33 = add nsw i32 %31, %32 - %34 = sext i32 %33 to i64 - %35 = getelementptr inbounds float, float* %29, i64 %34 - %36 = load float, float* %35, align 4 - %37 = load float*, float** %5, align 8 - %38 = load i32, i32* %9, align 4 - %39 = mul nsw i32 2, %38 - %40 = load i32, i32* %8, align 4 - %41 = add nsw i32 %39, %40 - %42 = sext i32 %41 to i64 - %43 = getelementptr inbounds float, float* %37, i64 %42 - %44 = load float, float* %43, align 4 - %45 = fmul float %36, %44 - %46 = load float*, float** %6, align 8 - %47 = load i32, i32* %7, align 4 - %48 = mul nsw i32 2, %47 - %49 = load i32, i32* %8, align 4 - %50 = add nsw i32 %48, %49 - %51 = sext i32 %50 to i64 - %52 = getelementptr inbounds float, float* %46, i64 %51 - %53 = load float, float* %52, align 4 - %54 = fadd float %53, %45 - store float %54, float* %52, align 4 - br label %55 - -55: ; preds = %28 - %56 = load i32, i32* %9, align 4 - %57 = add nsw i32 %56, 1 - store i32 %57, i32* %9, align 4 - br label %25 - -58: ; preds = %25 - br label %59 - -59: ; preds = %58 - %60 = load i32, i32* %8, align 4 - %61 = add nsw i32 %60, 1 - store i32 %61, i32* %8, align 4 - br label %14 - -62: ; preds = %14 - br label %63 - -63: ; preds = %62 - %64 = load i32, i32* %7, align 4 - %65 = add nsw i32 %64, 1 - store i32 %65, i32* %7, align 4 - br label %10 - -66: ; preds = %10 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { - %4 = alloca float*, align 8 - %5 = alloca i32, align 4 - %6 = alloca float, align 4 - %7 = alloca i32, align 4 - %8 = alloca float*, align 8 - %9 = alloca i32, align 4 - %10 = alloca float, align 4 - %11 = alloca i32, align 4 - %12 = alloca float*, align 8 - %13 = alloca float*, align 8 - %14 = alloca float*, align 8 - %15 = alloca i32, align 4 - %16 = alloca i32, align 4 - %17 = alloca i32, align 4 - %18 = alloca float*, align 8 - %19 = alloca float*, align 8 - %20 = alloca float*, align 8 - %21 = alloca i32, align 4 - %22 = alloca i32, align 4 - %23 = alloca i32, align 4 - %24 = alloca float*, align 8 - %25 = alloca float*, align 8 - %26 = alloca float*, align 8 - %27 = alloca i32, align 4 - %28 = alloca i32, align 4 - %29 = alloca i32, align 4 - %30 = alloca float*, align 8 - %31 = alloca i32, align 4 - %32 = alloca i32, align 4 - %33 = alloca float, align 4 - %34 = alloca float, align 4 - %35 = alloca float*, align 8 - %36 = alloca float*, align 8 - %37 = alloca float*, align 8 - %38 = alloca i32, align 4 - %39 = alloca [4 x float], align 16 - %40 = alloca i32, align 4 - %41 = alloca i32, align 4 - %42 = alloca i32, align 4 - %43 = alloca i32, align 4 - %44 = alloca [2 x float], align 4 - %45 = alloca [2 x float], align 4 - %46 = alloca i32, align 4 - %47 = alloca i32, align 4 - %48 = alloca i32, align 4 - %49 = alloca float, align 4 - %50 = alloca [2 x float], align 4 - %51 = alloca [2 x float], align 4 - %52 = alloca i32, align 4 - %53 = alloca i32, align 4 - %54 = alloca float, align 4 - %55 = alloca i32, align 4 - %56 = alloca [4 x float], align 16 - %57 = alloca i32, align 4 - %58 = alloca i32, align 4 - %59 = alloca i32, align 4 - %60 = alloca float, align 4 - %61 = alloca [4 x float], align 16 - %62 = alloca i32, align 4 - %63 = alloca i32, align 4 - %64 = alloca i32, align 4 - %65 = alloca float, align 4 - %66 = alloca i32, align 4 - %67 = alloca [4 x float], align 16 - %68 = alloca i32, align 4 - %69 = alloca i32, align 4 - %70 = alloca i32, align 4 - store float* %0, float** %35, align 8 - store float* %1, float** %36, align 8 - store float* %2, float** %37, align 8 - store i32 0, i32* %38, align 4 - br label %71 - -71: ; preds = %84, %3 - %72 = load i32, i32* %38, align 4 - %73 = icmp slt i32 %72, 4 - br i1 %73, label %74, label %87 - -74: ; preds = %71 - %75 = load float*, float** %35, align 8 - %76 = load i32, i32* %38, align 4 - %77 = sext i32 %76 to i64 - %78 = getelementptr inbounds float, float* %75, i64 %77 - %79 = load float, float* %78, align 4 - %80 = load float*, float** %37, align 8 - %81 = load i32, i32* %38, align 4 - %82 = sext i32 %81 to i64 - %83 = getelementptr inbounds float, float* %80, i64 %82 - store float %79, float* %83, align 4 - br label %84 - -84: ; preds = %74 - %85 = load i32, i32* %38, align 4 - %86 = add nsw i32 %85, 1 - store i32 %86, i32* %38, align 4 - br label %71 - -87: ; preds = %71 - %88 = bitcast [4 x float]* %39 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %88, i8 0, i64 16, i1 false) - store i32 0, i32* %40, align 4 - br label %89 - -89: ; preds = %112, %87 - %90 = load i32, i32* %40, align 4 - %91 = icmp slt i32 %90, 2 - br i1 %91, label %92, label %115 - -92: ; preds = %89 - store i32 0, i32* %41, align 4 - br label %93 - -93: ; preds = %108, %92 - %94 = load i32, i32* %41, align 4 - %95 = icmp slt i32 %94, 2 - br i1 %95, label %96, label %111 - -96: ; preds = %93 - %97 = load i32, i32* %40, align 4 - %98 = load i32, i32* %41, align 4 - %99 = icmp eq i32 %97, %98 - %100 = zext i1 %99 to i32 - %101 = sitofp i32 %100 to float - %102 = load i32, i32* %40, align 4 - %103 = mul nsw i32 %102, 2 - %104 = load i32, i32* %41, align 4 - %105 = add nsw i32 %103, %104 - %106 = sext i32 %105 to i64 - %107 = getelementptr inbounds [4 x float], [4 x float]* %39, i64 0, i64 %106 - store float %101, float* %107, align 4 - br label %108 - -108: ; preds = %96 - %109 = load i32, i32* %41, align 4 - %110 = add nsw i32 %109, 1 - store i32 %110, i32* %41, align 4 - br label %93 - -111: ; preds = %93 - br label %112 - -112: ; preds = %111 - %113 = load i32, i32* %40, align 4 - %114 = add nsw i32 %113, 1 - store i32 %114, i32* %40, align 4 - br label %89 - -115: ; preds = %89 - store i32 0, i32* %42, align 4 - br label %116 - -116: ; preds = %643, %115 - %117 = load i32, i32* %42, align 4 - %118 = icmp slt i32 %117, 1 - br i1 %118, label %119, label %646 - -119: ; preds = %116 - %120 = load i32, i32* %42, align 4 - %121 = sub nsw i32 2, %120 - store i32 %121, i32* %43, align 4 - %122 = bitcast [2 x float]* %44 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %122, i8 0, i64 8, i1 false) - %123 = bitcast [2 x float]* %45 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %123, i8 0, i64 8, i1 false) - store i32 0, i32* %46, align 4 - br label %124 - -124: ; preds = %134, %119 - %125 = load i32, i32* %46, align 4 - %126 = icmp slt i32 %125, 2 - br i1 %126, label %127, label %137 - -127: ; preds = %124 - %128 = load i32, i32* %46, align 4 - %129 = sext i32 %128 to i64 - %130 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 %129 - store float 0.000000e+00, float* %130, align 4 - %131 = load i32, i32* %46, align 4 - %132 = sext i32 %131 to i64 - %133 = getelementptr inbounds [2 x float], [2 x float]* %45, i64 0, i64 %132 - store float 0.000000e+00, float* %133, align 4 - br label %134 - -134: ; preds = %127 - %135 = load i32, i32* %46, align 4 - %136 = add nsw i32 %135, 1 - store i32 %136, i32* %46, align 4 - br label %124 - -137: ; preds = %124 - store i32 0, i32* %47, align 4 - br label %138 - -138: ; preds = %167, %137 - %139 = load i32, i32* %47, align 4 - %140 = load i32, i32* %43, align 4 - %141 = icmp slt i32 %139, %140 - br i1 %141, label %142, label %170 - -142: ; preds = %138 - %143 = load i32, i32* %42, align 4 - %144 = load i32, i32* %47, align 4 - %145 = add nsw i32 %143, %144 - store i32 %145, i32* %48, align 4 - %146 = load float*, float** %37, align 8 - %147 = load i32, i32* %48, align 4 - %148 = mul nsw i32 %147, 2 - %149 = load i32, i32* %42, align 4 - %150 = add nsw i32 %148, %149 - %151 = sext i32 %150 to i64 - %152 = getelementptr inbounds float, float* %146, i64 %151 - %153 = load float, float* %152, align 4 - %154 = load i32, i32* %47, align 4 - %155 = sext i32 %154 to i64 - %156 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 %155 - store float %153, float* %156, align 4 - %157 = load i32, i32* %48, align 4 - %158 = mul nsw i32 %157, 2 - %159 = load i32, i32* %42, align 4 - %160 = add nsw i32 %158, %159 - %161 = sext i32 %160 to i64 - %162 = getelementptr inbounds [4 x float], [4 x float]* %39, i64 0, i64 %161 - %163 = load float, float* %162, align 4 - %164 = load i32, i32* %47, align 4 - %165 = sext i32 %164 to i64 - %166 = getelementptr inbounds [2 x float], [2 x float]* %45, i64 0, i64 %165 - store float %163, float* %166, align 4 - br label %167 - -167: ; preds = %142 - %168 = load i32, i32* %47, align 4 - %169 = add nsw i32 %168, 1 - store i32 %169, i32* %47, align 4 - br label %138 - -170: ; preds = %138 - %171 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 0 - %172 = load float, float* %171, align 4 - store float %172, float* %34, align 4 - %173 = load float, float* %34, align 4 - %174 = fcmp ogt float %173, 0.000000e+00 - %175 = zext i1 %174 to i32 - %176 = load float, float* %34, align 4 - %177 = fcmp olt float %176, 0.000000e+00 - %178 = zext i1 %177 to i32 - %179 = sub nsw i32 %175, %178 - %180 = sitofp i32 %179 to float - %181 = fneg float %180 - %182 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 0 - %183 = load i32, i32* %43, align 4 - store float* %182, float** %4, align 8 - store i32 %183, i32* %5, align 4 - store float 0.000000e+00, float* %6, align 4 - store i32 0, i32* %7, align 4 - br label %184 - -184: ; preds = %188, %170 - %185 = load i32, i32* %7, align 4 - %186 = load i32, i32* %5, align 4 - %187 = icmp slt i32 %185, %186 - br i1 %187, label %188, label %204 - -188: ; preds = %184 - %189 = load float*, float** %4, align 8 - %190 = load i32, i32* %7, align 4 - %191 = sext i32 %190 to i64 - %192 = getelementptr inbounds float, float* %189, i64 %191 - %193 = load float, float* %192, align 4 - %194 = load float*, float** %4, align 8 - %195 = load i32, i32* %7, align 4 - %196 = sext i32 %195 to i64 - %197 = getelementptr inbounds float, float* %194, i64 %196 - %198 = load float, float* %197, align 4 - %199 = fmul float %193, %198 - %200 = load float, float* %6, align 4 - %201 = fadd float %200, %199 - store float %201, float* %6, align 4 - %202 = load i32, i32* %7, align 4 - %203 = add nsw i32 %202, 1 - store i32 %203, i32* %7, align 4 - br label %184 - -204: ; preds = %184 - %205 = load float, float* %6, align 4 - %206 = call float @llvm.sqrt.f32(float %205) #8 - %207 = fmul float %181, %206 - store float %207, float* %49, align 4 - %208 = bitcast [2 x float]* %50 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %208, i8 0, i64 8, i1 false) - %209 = bitcast [2 x float]* %51 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %209, i8 0, i64 8, i1 false) - store i32 0, i32* %52, align 4 - br label %210 - -210: ; preds = %220, %204 - %211 = load i32, i32* %52, align 4 - %212 = icmp slt i32 %211, 2 - br i1 %212, label %213, label %223 - -213: ; preds = %210 - %214 = load i32, i32* %52, align 4 - %215 = sext i32 %214 to i64 - %216 = getelementptr inbounds [2 x float], [2 x float]* %50, i64 0, i64 %215 - store float 0.000000e+00, float* %216, align 4 - %217 = load i32, i32* %52, align 4 - %218 = sext i32 %217 to i64 - %219 = getelementptr inbounds [2 x float], [2 x float]* %51, i64 0, i64 %218 - store float 0.000000e+00, float* %219, align 4 - br label %220 - -220: ; preds = %213 - %221 = load i32, i32* %52, align 4 - %222 = add nsw i32 %221, 1 - store i32 %222, i32* %52, align 4 - br label %210 - -223: ; preds = %210 - store i32 0, i32* %53, align 4 - br label %224 - -224: ; preds = %243, %223 - %225 = load i32, i32* %53, align 4 - %226 = load i32, i32* %43, align 4 - %227 = icmp slt i32 %225, %226 - br i1 %227, label %228, label %246 - -228: ; preds = %224 - %229 = load i32, i32* %53, align 4 - %230 = sext i32 %229 to i64 - %231 = getelementptr inbounds [2 x float], [2 x float]* %44, i64 0, i64 %230 - %232 = load float, float* %231, align 4 - %233 = load float, float* %49, align 4 - %234 = load i32, i32* %53, align 4 - %235 = sext i32 %234 to i64 - %236 = getelementptr inbounds [2 x float], [2 x float]* %45, i64 0, i64 %235 - %237 = load float, float* %236, align 4 - %238 = fmul float %233, %237 - %239 = fadd float %232, %238 - %240 = load i32, i32* %53, align 4 - %241 = sext i32 %240 to i64 - %242 = getelementptr inbounds [2 x float], [2 x float]* %50, i64 0, i64 %241 - store float %239, float* %242, align 4 - br label %243 - -243: ; preds = %228 - %244 = load i32, i32* %53, align 4 - %245 = add nsw i32 %244, 1 - store i32 %245, i32* %53, align 4 - br label %224 - -246: ; preds = %224 - %247 = getelementptr inbounds [2 x float], [2 x float]* %50, i64 0, i64 0 - %248 = load i32, i32* %43, align 4 - store float* %247, float** %8, align 8 - store i32 %248, i32* %9, align 4 - store float 0.000000e+00, float* %10, align 4 - store i32 0, i32* %11, align 4 - br label %249 - -249: ; preds = %253, %246 - %250 = load i32, i32* %11, align 4 - %251 = load i32, i32* %9, align 4 - %252 = icmp slt i32 %250, %251 - br i1 %252, label %253, label %269 - -253: ; preds = %249 - %254 = load float*, float** %8, align 8 - %255 = load i32, i32* %11, align 4 - %256 = sext i32 %255 to i64 - %257 = getelementptr inbounds float, float* %254, i64 %256 - %258 = load float, float* %257, align 4 - %259 = load float*, float** %8, align 8 - %260 = load i32, i32* %11, align 4 - %261 = sext i32 %260 to i64 - %262 = getelementptr inbounds float, float* %259, i64 %261 - %263 = load float, float* %262, align 4 - %264 = fmul float %258, %263 - %265 = load float, float* %10, align 4 - %266 = fadd float %265, %264 - store float %266, float* %10, align 4 - %267 = load i32, i32* %11, align 4 - %268 = add nsw i32 %267, 1 - store i32 %268, i32* %11, align 4 - br label %249 - -269: ; preds = %249 - %270 = load float, float* %10, align 4 - %271 = call float @llvm.sqrt.f32(float %270) #8 - store float %271, float* %54, align 4 - store i32 0, i32* %55, align 4 - br label %272 - -272: ; preds = %287, %269 - %273 = load i32, i32* %55, align 4 - %274 = load i32, i32* %43, align 4 - %275 = icmp slt i32 %273, %274 - br i1 %275, label %276, label %290 - -276: ; preds = %272 - %277 = load i32, i32* %55, align 4 - %278 = sext i32 %277 to i64 - %279 = getelementptr inbounds [2 x float], [2 x float]* %50, i64 0, i64 %278 - %280 = load float, float* %279, align 4 - %281 = load float, float* %54, align 4 - %282 = fadd float %281, 0x3EE4F8B580000000 - %283 = fdiv float %280, %282 - %284 = load i32, i32* %55, align 4 - %285 = sext i32 %284 to i64 - %286 = getelementptr inbounds [2 x float], [2 x float]* %51, i64 0, i64 %285 - store float %283, float* %286, align 4 - br label %287 - -287: ; preds = %276 - %288 = load i32, i32* %55, align 4 - %289 = add nsw i32 %288, 1 - store i32 %289, i32* %55, align 4 - br label %272 - -290: ; preds = %272 - %291 = bitcast [4 x float]* %56 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %291, i8 0, i64 16, i1 false) - store i32 0, i32* %57, align 4 - br label %292 - -292: ; preds = %299, %290 - %293 = load i32, i32* %57, align 4 - %294 = icmp slt i32 %293, 4 - br i1 %294, label %295, label %302 - -295: ; preds = %292 - %296 = load i32, i32* %57, align 4 - %297 = sext i32 %296 to i64 - %298 = getelementptr inbounds [4 x float], [4 x float]* %56, i64 0, i64 %297 - store float 0.000000e+00, float* %298, align 4 - br label %299 - -299: ; preds = %295 - %300 = load i32, i32* %57, align 4 - %301 = add nsw i32 %300, 1 - store i32 %301, i32* %57, align 4 - br label %292 - -302: ; preds = %292 - store i32 0, i32* %58, align 4 - br label %303 - -303: ; preds = %341, %302 - %304 = load i32, i32* %58, align 4 - %305 = load i32, i32* %43, align 4 - %306 = icmp slt i32 %304, %305 - br i1 %306, label %307, label %344 - -307: ; preds = %303 - store i32 0, i32* %59, align 4 - br label %308 - -308: ; preds = %337, %307 - %309 = load i32, i32* %59, align 4 - %310 = load i32, i32* %43, align 4 - %311 = icmp slt i32 %309, %310 - br i1 %311, label %312, label %340 - -312: ; preds = %308 - %313 = load i32, i32* %58, align 4 - %314 = load i32, i32* %59, align 4 - %315 = icmp eq i32 %313, %314 - %316 = zext i1 %315 to i64 - %317 = select i1 %315, float 1.000000e+00, float 0.000000e+00 - %318 = load i32, i32* %58, align 4 - %319 = sext i32 %318 to i64 - %320 = getelementptr inbounds [2 x float], [2 x float]* %51, i64 0, i64 %319 - %321 = load float, float* %320, align 4 - %322 = fmul float 2.000000e+00, %321 - %323 = load i32, i32* %59, align 4 - %324 = sext i32 %323 to i64 - %325 = getelementptr inbounds [2 x float], [2 x float]* %51, i64 0, i64 %324 - %326 = load float, float* %325, align 4 - %327 = fmul float %322, %326 - %328 = fsub float %317, %327 - store float %328, float* %60, align 4 - %329 = load float, float* %60, align 4 - %330 = load i32, i32* %58, align 4 - %331 = load i32, i32* %43, align 4 - %332 = mul nsw i32 %330, %331 - %333 = load i32, i32* %59, align 4 - %334 = add nsw i32 %332, %333 - %335 = sext i32 %334 to i64 - %336 = getelementptr inbounds [4 x float], [4 x float]* %56, i64 0, i64 %335 - store float %329, float* %336, align 4 - br label %337 - -337: ; preds = %312 - %338 = load i32, i32* %59, align 4 - %339 = add nsw i32 %338, 1 - store i32 %339, i32* %59, align 4 - br label %308 - -340: ; preds = %308 - br label %341 - -341: ; preds = %340 - %342 = load i32, i32* %58, align 4 - %343 = add nsw i32 %342, 1 - store i32 %343, i32* %58, align 4 - br label %303 - -344: ; preds = %303 - %345 = bitcast [4 x float]* %61 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %345, i8 0, i64 16, i1 false) - store i32 0, i32* %62, align 4 - br label %346 - -346: ; preds = %353, %344 - %347 = load i32, i32* %62, align 4 - %348 = icmp slt i32 %347, 4 - br i1 %348, label %349, label %356 - -349: ; preds = %346 - %350 = load i32, i32* %62, align 4 - %351 = sext i32 %350 to i64 - %352 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 %351 - store float 0.000000e+00, float* %352, align 4 - br label %353 - -353: ; preds = %349 - %354 = load i32, i32* %62, align 4 - %355 = add nsw i32 %354, 1 - store i32 %355, i32* %62, align 4 - br label %346 - -356: ; preds = %346 - store i32 0, i32* %63, align 4 - br label %357 - -357: ; preds = %403, %356 - %358 = load i32, i32* %63, align 4 - %359 = icmp slt i32 %358, 2 - br i1 %359, label %360, label %406 - -360: ; preds = %357 - store i32 0, i32* %64, align 4 - br label %361 - -361: ; preds = %399, %360 - %362 = load i32, i32* %64, align 4 - %363 = icmp slt i32 %362, 2 - br i1 %363, label %364, label %402 - -364: ; preds = %361 - %365 = load i32, i32* %63, align 4 - %366 = load i32, i32* %42, align 4 - %367 = icmp slt i32 %365, %366 - br i1 %367, label %372, label %368 - -368: ; preds = %364 - %369 = load i32, i32* %64, align 4 - %370 = load i32, i32* %42, align 4 - %371 = icmp slt i32 %369, %370 - br i1 %371, label %372, label %378 - -372: ; preds = %368, %364 - %373 = load i32, i32* %63, align 4 - %374 = load i32, i32* %64, align 4 - %375 = icmp eq i32 %373, %374 - %376 = zext i1 %375 to i64 - %377 = select i1 %375, float 1.000000e+00, float 0.000000e+00 - store float %377, float* %65, align 4 - br label %391 - -378: ; preds = %368 - %379 = load i32, i32* %63, align 4 - %380 = load i32, i32* %42, align 4 - %381 = sub nsw i32 %379, %380 - %382 = load i32, i32* %43, align 4 - %383 = mul nsw i32 %381, %382 - %384 = load i32, i32* %64, align 4 - %385 = load i32, i32* %42, align 4 - %386 = sub nsw i32 %384, %385 - %387 = add nsw i32 %383, %386 - %388 = sext i32 %387 to i64 - %389 = getelementptr inbounds [4 x float], [4 x float]* %56, i64 0, i64 %388 - %390 = load float, float* %389, align 4 - store float %390, float* %65, align 4 - br label %391 - -391: ; preds = %378, %372 - %392 = load float, float* %65, align 4 - %393 = load i32, i32* %63, align 4 - %394 = mul nsw i32 %393, 2 - %395 = load i32, i32* %64, align 4 - %396 = add nsw i32 %394, %395 - %397 = sext i32 %396 to i64 - %398 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 %397 - store float %392, float* %398, align 4 - br label %399 - -399: ; preds = %391 - %400 = load i32, i32* %64, align 4 - %401 = add nsw i32 %400, 1 - store i32 %401, i32* %64, align 4 - br label %361 - -402: ; preds = %361 - br label %403 - -403: ; preds = %402 - %404 = load i32, i32* %63, align 4 - %405 = add nsw i32 %404, 1 - store i32 %405, i32* %63, align 4 - br label %357 - -406: ; preds = %357 - %407 = load i32, i32* %42, align 4 - %408 = icmp eq i32 %407, 0 - br i1 %408, label %409, label %483 - -409: ; preds = %406 - store i32 0, i32* %66, align 4 - br label %410 - -410: ; preds = %422, %409 - %411 = load i32, i32* %66, align 4 - %412 = icmp slt i32 %411, 4 - br i1 %412, label %413, label %425 - -413: ; preds = %410 - %414 = load i32, i32* %66, align 4 - %415 = sext i32 %414 to i64 - %416 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 %415 - %417 = load float, float* %416, align 4 - %418 = load float*, float** %36, align 8 - %419 = load i32, i32* %66, align 4 - %420 = sext i32 %419 to i64 - %421 = getelementptr inbounds float, float* %418, i64 %420 - store float %417, float* %421, align 4 - br label %422 - -422: ; preds = %413 - %423 = load i32, i32* %66, align 4 - %424 = add nsw i32 %423, 1 - store i32 %424, i32* %66, align 4 - br label %410 - -425: ; preds = %410 - %426 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 0 - %427 = load float*, float** %35, align 8 - %428 = load float*, float** %37, align 8 - store float* %426, float** %12, align 8 - store float* %427, float** %13, align 8 - store float* %428, float** %14, align 8 - store i32 0, i32* %15, align 4 - br label %429 - -429: ; preds = %479, %425 - %430 = load i32, i32* %15, align 4 - %431 = icmp slt i32 %430, 2 - br i1 %431, label %432, label %482 - -432: ; preds = %429 - store i32 0, i32* %16, align 4 - br label %433 - -433: ; preds = %476, %432 - %434 = load i32, i32* %16, align 4 - %435 = icmp slt i32 %434, 2 - br i1 %435, label %436, label %479 - -436: ; preds = %433 - %437 = load float*, float** %14, align 8 - %438 = load i32, i32* %15, align 4 - %439 = mul nsw i32 2, %438 - %440 = load i32, i32* %16, align 4 - %441 = add nsw i32 %439, %440 - %442 = sext i32 %441 to i64 - %443 = getelementptr inbounds float, float* %437, i64 %442 - store float 0.000000e+00, float* %443, align 4 - store i32 0, i32* %17, align 4 - br label %444 - -444: ; preds = %447, %436 - %445 = load i32, i32* %17, align 4 - %446 = icmp slt i32 %445, 2 - br i1 %446, label %447, label %476 - -447: ; preds = %444 - %448 = load float*, float** %12, align 8 - %449 = load i32, i32* %15, align 4 - %450 = mul nsw i32 2, %449 - %451 = load i32, i32* %17, align 4 - %452 = add nsw i32 %450, %451 - %453 = sext i32 %452 to i64 - %454 = getelementptr inbounds float, float* %448, i64 %453 - %455 = load float, float* %454, align 4 - %456 = load float*, float** %13, align 8 - %457 = load i32, i32* %17, align 4 - %458 = mul nsw i32 2, %457 - %459 = load i32, i32* %16, align 4 - %460 = add nsw i32 %458, %459 - %461 = sext i32 %460 to i64 - %462 = getelementptr inbounds float, float* %456, i64 %461 - %463 = load float, float* %462, align 4 - %464 = fmul float %455, %463 - %465 = load float*, float** %14, align 8 - %466 = load i32, i32* %15, align 4 - %467 = mul nsw i32 2, %466 - %468 = load i32, i32* %16, align 4 - %469 = add nsw i32 %467, %468 - %470 = sext i32 %469 to i64 - %471 = getelementptr inbounds float, float* %465, i64 %470 - %472 = load float, float* %471, align 4 - %473 = fadd float %472, %464 - store float %473, float* %471, align 4 - %474 = load i32, i32* %17, align 4 - %475 = add nsw i32 %474, 1 - store i32 %475, i32* %17, align 4 - br label %444 - -476: ; preds = %444 - %477 = load i32, i32* %16, align 4 - %478 = add nsw i32 %477, 1 - store i32 %478, i32* %16, align 4 - br label %433 - -479: ; preds = %433 - %480 = load i32, i32* %15, align 4 - %481 = add nsw i32 %480, 1 - store i32 %481, i32* %15, align 4 - br label %429 - -482: ; preds = %429 - br label %642 - -483: ; preds = %406 - %484 = bitcast [4 x float]* %67 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %484, i8 0, i64 16, i1 false) - store i32 0, i32* %68, align 4 - br label %485 - -485: ; preds = %492, %483 - %486 = load i32, i32* %68, align 4 - %487 = icmp slt i32 %486, 4 - br i1 %487, label %488, label %495 - -488: ; preds = %485 - %489 = load i32, i32* %68, align 4 - %490 = sext i32 %489 to i64 - %491 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 %490 - store float 0.000000e+00, float* %491, align 4 - br label %492 - -492: ; preds = %488 - %493 = load i32, i32* %68, align 4 - %494 = add nsw i32 %493, 1 - store i32 %494, i32* %68, align 4 - br label %485 - -495: ; preds = %485 - %496 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 0 - %497 = load float*, float** %36, align 8 - %498 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 0 - store float* %496, float** %18, align 8 - store float* %497, float** %19, align 8 - store float* %498, float** %20, align 8 - store i32 0, i32* %21, align 4 - br label %499 - -499: ; preds = %549, %495 - %500 = load i32, i32* %21, align 4 - %501 = icmp slt i32 %500, 2 - br i1 %501, label %502, label %552 - -502: ; preds = %499 - store i32 0, i32* %22, align 4 - br label %503 - -503: ; preds = %546, %502 - %504 = load i32, i32* %22, align 4 - %505 = icmp slt i32 %504, 2 - br i1 %505, label %506, label %549 - -506: ; preds = %503 - %507 = load float*, float** %20, align 8 - %508 = load i32, i32* %21, align 4 - %509 = mul nsw i32 2, %508 - %510 = load i32, i32* %22, align 4 - %511 = add nsw i32 %509, %510 - %512 = sext i32 %511 to i64 - %513 = getelementptr inbounds float, float* %507, i64 %512 - store float 0.000000e+00, float* %513, align 4 - store i32 0, i32* %23, align 4 - br label %514 - -514: ; preds = %517, %506 - %515 = load i32, i32* %23, align 4 - %516 = icmp slt i32 %515, 2 - br i1 %516, label %517, label %546 - -517: ; preds = %514 - %518 = load float*, float** %18, align 8 - %519 = load i32, i32* %21, align 4 - %520 = mul nsw i32 2, %519 - %521 = load i32, i32* %23, align 4 - %522 = add nsw i32 %520, %521 - %523 = sext i32 %522 to i64 - %524 = getelementptr inbounds float, float* %518, i64 %523 - %525 = load float, float* %524, align 4 - %526 = load float*, float** %19, align 8 - %527 = load i32, i32* %23, align 4 - %528 = mul nsw i32 2, %527 - %529 = load i32, i32* %22, align 4 - %530 = add nsw i32 %528, %529 - %531 = sext i32 %530 to i64 - %532 = getelementptr inbounds float, float* %526, i64 %531 - %533 = load float, float* %532, align 4 - %534 = fmul float %525, %533 - %535 = load float*, float** %20, align 8 - %536 = load i32, i32* %21, align 4 - %537 = mul nsw i32 2, %536 - %538 = load i32, i32* %22, align 4 - %539 = add nsw i32 %537, %538 - %540 = sext i32 %539 to i64 - %541 = getelementptr inbounds float, float* %535, i64 %540 - %542 = load float, float* %541, align 4 - %543 = fadd float %542, %534 - store float %543, float* %541, align 4 - %544 = load i32, i32* %23, align 4 - %545 = add nsw i32 %544, 1 - store i32 %545, i32* %23, align 4 - br label %514 - -546: ; preds = %514 - %547 = load i32, i32* %22, align 4 - %548 = add nsw i32 %547, 1 - store i32 %548, i32* %22, align 4 - br label %503 - -549: ; preds = %503 - %550 = load i32, i32* %21, align 4 - %551 = add nsw i32 %550, 1 - store i32 %551, i32* %21, align 4 - br label %499 - -552: ; preds = %499 - store i32 0, i32* %69, align 4 - br label %553 - -553: ; preds = %565, %552 - %554 = load i32, i32* %69, align 4 - %555 = icmp slt i32 %554, 4 - br i1 %555, label %556, label %568 - -556: ; preds = %553 - %557 = load i32, i32* %69, align 4 - %558 = sext i32 %557 to i64 - %559 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 %558 - %560 = load float, float* %559, align 4 - %561 = load float*, float** %36, align 8 - %562 = load i32, i32* %69, align 4 - %563 = sext i32 %562 to i64 - %564 = getelementptr inbounds float, float* %561, i64 %563 - store float %560, float* %564, align 4 - br label %565 - -565: ; preds = %556 - %566 = load i32, i32* %69, align 4 - %567 = add nsw i32 %566, 1 - store i32 %567, i32* %69, align 4 - br label %553 - -568: ; preds = %553 - %569 = getelementptr inbounds [4 x float], [4 x float]* %61, i64 0, i64 0 - %570 = load float*, float** %37, align 8 - %571 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 0 - store float* %569, float** %24, align 8 - store float* %570, float** %25, align 8 - store float* %571, float** %26, align 8 - store i32 0, i32* %27, align 4 - br label %572 - -572: ; preds = %622, %568 - %573 = load i32, i32* %27, align 4 - %574 = icmp slt i32 %573, 2 - br i1 %574, label %575, label %625 - -575: ; preds = %572 - store i32 0, i32* %28, align 4 - br label %576 - -576: ; preds = %619, %575 - %577 = load i32, i32* %28, align 4 - %578 = icmp slt i32 %577, 2 - br i1 %578, label %579, label %622 - -579: ; preds = %576 - %580 = load float*, float** %26, align 8 - %581 = load i32, i32* %27, align 4 - %582 = mul nsw i32 2, %581 - %583 = load i32, i32* %28, align 4 - %584 = add nsw i32 %582, %583 - %585 = sext i32 %584 to i64 - %586 = getelementptr inbounds float, float* %580, i64 %585 - store float 0.000000e+00, float* %586, align 4 - store i32 0, i32* %29, align 4 - br label %587 - -587: ; preds = %590, %579 - %588 = load i32, i32* %29, align 4 - %589 = icmp slt i32 %588, 2 - br i1 %589, label %590, label %619 - -590: ; preds = %587 - %591 = load float*, float** %24, align 8 - %592 = load i32, i32* %27, align 4 - %593 = mul nsw i32 2, %592 - %594 = load i32, i32* %29, align 4 - %595 = add nsw i32 %593, %594 - %596 = sext i32 %595 to i64 - %597 = getelementptr inbounds float, float* %591, i64 %596 - %598 = load float, float* %597, align 4 - %599 = load float*, float** %25, align 8 - %600 = load i32, i32* %29, align 4 - %601 = mul nsw i32 2, %600 - %602 = load i32, i32* %28, align 4 - %603 = add nsw i32 %601, %602 - %604 = sext i32 %603 to i64 - %605 = getelementptr inbounds float, float* %599, i64 %604 - %606 = load float, float* %605, align 4 - %607 = fmul float %598, %606 - %608 = load float*, float** %26, align 8 - %609 = load i32, i32* %27, align 4 - %610 = mul nsw i32 2, %609 - %611 = load i32, i32* %28, align 4 - %612 = add nsw i32 %610, %611 - %613 = sext i32 %612 to i64 - %614 = getelementptr inbounds float, float* %608, i64 %613 - %615 = load float, float* %614, align 4 - %616 = fadd float %615, %607 - store float %616, float* %614, align 4 - %617 = load i32, i32* %29, align 4 - %618 = add nsw i32 %617, 1 - store i32 %618, i32* %29, align 4 - br label %587 - -619: ; preds = %587 - %620 = load i32, i32* %28, align 4 - %621 = add nsw i32 %620, 1 - store i32 %621, i32* %28, align 4 - br label %576 - -622: ; preds = %576 - %623 = load i32, i32* %27, align 4 - %624 = add nsw i32 %623, 1 - store i32 %624, i32* %27, align 4 - br label %572 - -625: ; preds = %572 - store i32 0, i32* %70, align 4 - br label %626 - -626: ; preds = %638, %625 - %627 = load i32, i32* %70, align 4 - %628 = icmp slt i32 %627, 4 - br i1 %628, label %629, label %641 - -629: ; preds = %626 - %630 = load i32, i32* %70, align 4 - %631 = sext i32 %630 to i64 - %632 = getelementptr inbounds [4 x float], [4 x float]* %67, i64 0, i64 %631 - %633 = load float, float* %632, align 4 - %634 = load float*, float** %37, align 8 - %635 = load i32, i32* %70, align 4 - %636 = sext i32 %635 to i64 - %637 = getelementptr inbounds float, float* %634, i64 %636 - store float %633, float* %637, align 4 - br label %638 - -638: ; preds = %629 - %639 = load i32, i32* %70, align 4 - %640 = add nsw i32 %639, 1 - store i32 %640, i32* %70, align 4 - br label %626 - -641: ; preds = %626 - br label %642 - -642: ; preds = %641, %482 - br label %643 - -643: ; preds = %642 - %644 = load i32, i32* %42, align 4 - %645 = add nsw i32 %644, 1 - store i32 %645, i32* %42, align 4 - br label %116 - -646: ; preds = %116 - %647 = load float*, float** %36, align 8 - store float* %647, float** %30, align 8 - store i32 0, i32* %31, align 4 - br label %648 - -648: ; preds = %691, %646 - %649 = load i32, i32* %31, align 4 - %650 = icmp slt i32 %649, 2 - br i1 %650, label %651, label %694 - -651: ; preds = %648 - %652 = load i32, i32* %31, align 4 - %653 = add nsw i32 %652, 1 - store i32 %653, i32* %32, align 4 - br label %654 - -654: ; preds = %657, %651 - %655 = load i32, i32* %32, align 4 - %656 = icmp slt i32 %655, 2 - br i1 %656, label %657, label %691 - -657: ; preds = %654 - %658 = load float*, float** %30, align 8 - %659 = load i32, i32* %31, align 4 - %660 = mul nsw i32 %659, 2 - %661 = load i32, i32* %32, align 4 - %662 = add nsw i32 %660, %661 - %663 = sext i32 %662 to i64 - %664 = getelementptr inbounds float, float* %658, i64 %663 - %665 = load float, float* %664, align 4 - store float %665, float* %33, align 4 - %666 = load float*, float** %30, align 8 - %667 = load i32, i32* %32, align 4 - %668 = mul nsw i32 %667, 2 - %669 = load i32, i32* %31, align 4 - %670 = add nsw i32 %668, %669 - %671 = sext i32 %670 to i64 - %672 = getelementptr inbounds float, float* %666, i64 %671 - %673 = load float, float* %672, align 4 - %674 = load float*, float** %30, align 8 - %675 = load i32, i32* %31, align 4 - %676 = mul nsw i32 %675, 2 - %677 = load i32, i32* %32, align 4 - %678 = add nsw i32 %676, %677 - %679 = sext i32 %678 to i64 - %680 = getelementptr inbounds float, float* %674, i64 %679 - store float %673, float* %680, align 4 - %681 = load float, float* %33, align 4 - %682 = load float*, float** %30, align 8 - %683 = load i32, i32* %32, align 4 - %684 = mul nsw i32 %683, 2 - %685 = load i32, i32* %31, align 4 - %686 = add nsw i32 %684, %685 - %687 = sext i32 %686 to i64 - %688 = getelementptr inbounds float, float* %682, i64 %687 - store float %681, float* %688, align 4 - %689 = load i32, i32* %32, align 4 - %690 = add nsw i32 %689, 1 - store i32 %690, i32* %32, align 4 - br label %654 - -691: ; preds = %654 - %692 = load i32, i32* %31, align 4 - %693 = add nsw i32 %692, 1 - store i32 %693, i32* %31, align 4 - br label %648 - -694: ; preds = %648 - ret void -} - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { - %4 = alloca float*, align 8 - %5 = alloca float*, align 8 - %6 = alloca float*, align 8 - %7 = alloca float*, align 8 - %8 = alloca i32, align 4 - %9 = alloca i32, align 4 - %10 = alloca i32, align 4 - %11 = alloca i32, align 4 - %12 = alloca float*, align 8 - %13 = alloca float*, align 8 - %14 = alloca i32, align 4 - %15 = alloca i32, align 4 - %16 = alloca float, align 4 - %17 = alloca float*, align 8 - %18 = alloca float*, align 8 - %19 = alloca i32, align 4 - %20 = alloca float, align 4 - %21 = alloca i32, align 4 - %22 = alloca float*, align 8 - %23 = alloca i32, align 4 - %24 = alloca i32, align 4 - %25 = alloca float, align 4 - %26 = alloca float*, align 8 - %27 = alloca i32, align 4 - %28 = alloca i32, align 4 - %29 = alloca float, align 4 - %30 = alloca float*, align 8 - store float* %0, float** %4, align 8 - store float* %1, float** %5, align 8 - store float* %2, float** %6, align 8 - %31 = load float*, float** %6, align 8 - %32 = bitcast float* %31 to i8* - %33 = load float*, float** %4, align 8 - %34 = bitcast float* %33 to i8* - %35 = load float*, float** %6, align 8 - %36 = bitcast float* %35 to i8* - %37 = call i64 @llvm.objectsize.i64.p0i8(i8* %36, i1 false, i1 true, i1 false) - %38 = call i8* @__memcpy_chk(i8* %32, i8* %34, i64 16, i64 %37) #8 - %39 = call i8* @calloc(i64 4, i64 4) #9 - %40 = bitcast i8* %39 to float* - store float* %40, float** %7, align 8 - store i32 0, i32* %8, align 4 - br label %41 - -41: ; preds = %65, %3 - %42 = load i32, i32* %8, align 4 - %43 = icmp slt i32 %42, 2 - br i1 %43, label %44, label %68 - -44: ; preds = %41 - store i32 0, i32* %9, align 4 - br label %45 - -45: ; preds = %61, %44 - %46 = load i32, i32* %9, align 4 - %47 = icmp slt i32 %46, 2 - br i1 %47, label %48, label %64 - -48: ; preds = %45 - %49 = load i32, i32* %8, align 4 - %50 = load i32, i32* %9, align 4 - %51 = icmp eq i32 %49, %50 - %52 = zext i1 %51 to i32 - %53 = sitofp i32 %52 to float - %54 = load float*, float** %7, align 8 - %55 = load i32, i32* %8, align 4 - %56 = mul nsw i32 %55, 2 - %57 = load i32, i32* %9, align 4 - %58 = add nsw i32 %56, %57 - %59 = sext i32 %58 to i64 - %60 = getelementptr inbounds float, float* %54, i64 %59 - store float %53, float* %60, align 4 - br label %61 - -61: ; preds = %48 - %62 = load i32, i32* %9, align 4 - %63 = add nsw i32 %62, 1 - store i32 %63, i32* %9, align 4 - br label %45 - -64: ; preds = %45 - br label %65 - -65: ; preds = %64 - %66 = load i32, i32* %8, align 4 - %67 = add nsw i32 %66, 1 - store i32 %67, i32* %8, align 4 - br label %41 - -68: ; preds = %41 - store i32 0, i32* %10, align 4 - br label %69 - -69: ; preds = %343, %68 - %70 = load i32, i32* %10, align 4 - %71 = icmp slt i32 %70, 1 - br i1 %71, label %72, label %346 - -72: ; preds = %69 - %73 = load i32, i32* %10, align 4 - %74 = sub nsw i32 2, %73 - store i32 %74, i32* %11, align 4 - %75 = load i32, i32* %11, align 4 - %76 = sext i32 %75 to i64 - %77 = call i8* @calloc(i64 4, i64 %76) #9 - %78 = bitcast i8* %77 to float* - store float* %78, float** %12, align 8 - %79 = load i32, i32* %11, align 4 - %80 = sext i32 %79 to i64 - %81 = call i8* @calloc(i64 4, i64 %80) #9 - %82 = bitcast i8* %81 to float* - store float* %82, float** %13, align 8 - store i32 0, i32* %14, align 4 - br label %83 - -83: ; preds = %115, %72 - %84 = load i32, i32* %14, align 4 - %85 = load i32, i32* %11, align 4 - %86 = icmp slt i32 %84, %85 - br i1 %86, label %87, label %118 - -87: ; preds = %83 - %88 = load i32, i32* %10, align 4 - %89 = load i32, i32* %14, align 4 - %90 = add nsw i32 %88, %89 - store i32 %90, i32* %15, align 4 - %91 = load float*, float** %6, align 8 - %92 = load i32, i32* %15, align 4 - %93 = mul nsw i32 %92, 2 - %94 = load i32, i32* %10, align 4 - %95 = add nsw i32 %93, %94 - %96 = sext i32 %95 to i64 - %97 = getelementptr inbounds float, float* %91, i64 %96 - %98 = load float, float* %97, align 4 - %99 = load float*, float** %12, align 8 - %100 = load i32, i32* %14, align 4 - %101 = sext i32 %100 to i64 - %102 = getelementptr inbounds float, float* %99, i64 %101 - store float %98, float* %102, align 4 - %103 = load float*, float** %7, align 8 - %104 = load i32, i32* %15, align 4 - %105 = mul nsw i32 %104, 2 - %106 = load i32, i32* %10, align 4 - %107 = add nsw i32 %105, %106 - %108 = sext i32 %107 to i64 - %109 = getelementptr inbounds float, float* %103, i64 %108 - %110 = load float, float* %109, align 4 - %111 = load float*, float** %13, align 8 - %112 = load i32, i32* %14, align 4 - %113 = sext i32 %112 to i64 - %114 = getelementptr inbounds float, float* %111, i64 %113 - store float %110, float* %114, align 4 - br label %115 - -115: ; preds = %87 - %116 = load i32, i32* %14, align 4 - %117 = add nsw i32 %116, 1 - store i32 %117, i32* %14, align 4 - br label %83 - -118: ; preds = %83 - %119 = load float*, float** %12, align 8 - %120 = getelementptr inbounds float, float* %119, i64 0 - %121 = load float, float* %120, align 4 - %122 = call float @no_opt_sgn(float %121) - %123 = fneg float %122 - %124 = load float*, float** %12, align 8 - %125 = load i32, i32* %11, align 4 - %126 = call float @no_opt_naive_norm(float* %124, i32 %125) - %127 = fmul float %123, %126 - store float %127, float* %16, align 4 - %128 = load i32, i32* %11, align 4 - %129 = sext i32 %128 to i64 - %130 = call i8* @calloc(i64 4, i64 %129) #9 - %131 = bitcast i8* %130 to float* - store float* %131, float** %17, align 8 - %132 = load i32, i32* %11, align 4 - %133 = sext i32 %132 to i64 - %134 = call i8* @calloc(i64 4, i64 %133) #9 - %135 = bitcast i8* %134 to float* - store float* %135, float** %18, align 8 - store i32 0, i32* %19, align 4 - br label %136 - -136: ; preds = %158, %118 - %137 = load i32, i32* %19, align 4 - %138 = load i32, i32* %11, align 4 - %139 = icmp slt i32 %137, %138 - br i1 %139, label %140, label %161 - -140: ; preds = %136 - %141 = load float*, float** %12, align 8 - %142 = load i32, i32* %19, align 4 - %143 = sext i32 %142 to i64 - %144 = getelementptr inbounds float, float* %141, i64 %143 - %145 = load float, float* %144, align 4 - %146 = load float, float* %16, align 4 - %147 = load float*, float** %13, align 8 - %148 = load i32, i32* %19, align 4 - %149 = sext i32 %148 to i64 - %150 = getelementptr inbounds float, float* %147, i64 %149 - %151 = load float, float* %150, align 4 - %152 = fmul float %146, %151 - %153 = fadd float %145, %152 - %154 = load float*, float** %17, align 8 - %155 = load i32, i32* %19, align 4 - %156 = sext i32 %155 to i64 - %157 = getelementptr inbounds float, float* %154, i64 %156 - store float %153, float* %157, align 4 - br label %158 - -158: ; preds = %140 - %159 = load i32, i32* %19, align 4 - %160 = add nsw i32 %159, 1 - store i32 %160, i32* %19, align 4 - br label %136 - -161: ; preds = %136 - %162 = load float*, float** %17, align 8 - %163 = load i32, i32* %11, align 4 - %164 = call float @no_opt_naive_norm(float* %162, i32 %163) - store float %164, float* %20, align 4 - store i32 0, i32* %21, align 4 - br label %165 - -165: ; preds = %182, %161 - %166 = load i32, i32* %21, align 4 - %167 = load i32, i32* %11, align 4 - %168 = icmp slt i32 %166, %167 - br i1 %168, label %169, label %185 - -169: ; preds = %165 - %170 = load float*, float** %17, align 8 - %171 = load i32, i32* %21, align 4 - %172 = sext i32 %171 to i64 - %173 = getelementptr inbounds float, float* %170, i64 %172 - %174 = load float, float* %173, align 4 - %175 = load float, float* %20, align 4 - %176 = fadd float %175, 0x3EE4F8B580000000 - %177 = fdiv float %174, %176 - %178 = load float*, float** %18, align 8 - %179 = load i32, i32* %21, align 4 - %180 = sext i32 %179 to i64 - %181 = getelementptr inbounds float, float* %178, i64 %180 - store float %177, float* %181, align 4 - br label %182 - -182: ; preds = %169 - %183 = load i32, i32* %21, align 4 - %184 = add nsw i32 %183, 1 - store i32 %184, i32* %21, align 4 - br label %165 - -185: ; preds = %165 - %186 = load i32, i32* %11, align 4 - %187 = load i32, i32* %11, align 4 - %188 = mul nsw i32 %186, %187 - %189 = sext i32 %188 to i64 - %190 = call i8* @calloc(i64 4, i64 %189) #9 - %191 = bitcast i8* %190 to float* - store float* %191, float** %22, align 8 - store i32 0, i32* %23, align 4 - br label %192 - -192: ; preds = %233, %185 - %193 = load i32, i32* %23, align 4 - %194 = load i32, i32* %11, align 4 - %195 = icmp slt i32 %193, %194 - br i1 %195, label %196, label %236 - -196: ; preds = %192 - store i32 0, i32* %24, align 4 - br label %197 - -197: ; preds = %229, %196 - %198 = load i32, i32* %24, align 4 - %199 = load i32, i32* %11, align 4 - %200 = icmp slt i32 %198, %199 - br i1 %200, label %201, label %232 - -201: ; preds = %197 - %202 = load i32, i32* %23, align 4 - %203 = load i32, i32* %24, align 4 - %204 = icmp eq i32 %202, %203 - %205 = zext i1 %204 to i64 - %206 = select i1 %204, float 1.000000e+00, float 0.000000e+00 - %207 = load float*, float** %18, align 8 - %208 = load i32, i32* %23, align 4 - %209 = sext i32 %208 to i64 - %210 = getelementptr inbounds float, float* %207, i64 %209 - %211 = load float, float* %210, align 4 - %212 = fmul float 2.000000e+00, %211 - %213 = load float*, float** %18, align 8 - %214 = load i32, i32* %24, align 4 - %215 = sext i32 %214 to i64 - %216 = getelementptr inbounds float, float* %213, i64 %215 - %217 = load float, float* %216, align 4 - %218 = fmul float %212, %217 - %219 = fsub float %206, %218 - store float %219, float* %25, align 4 - %220 = load float, float* %25, align 4 - %221 = load float*, float** %22, align 8 - %222 = load i32, i32* %23, align 4 - %223 = load i32, i32* %11, align 4 - %224 = mul nsw i32 %222, %223 - %225 = load i32, i32* %24, align 4 - %226 = add nsw i32 %224, %225 - %227 = sext i32 %226 to i64 - %228 = getelementptr inbounds float, float* %221, i64 %227 - store float %220, float* %228, align 4 - br label %229 - -229: ; preds = %201 - %230 = load i32, i32* %24, align 4 - %231 = add nsw i32 %230, 1 - store i32 %231, i32* %24, align 4 - br label %197 - -232: ; preds = %197 - br label %233 - -233: ; preds = %232 - %234 = load i32, i32* %23, align 4 - %235 = add nsw i32 %234, 1 - store i32 %235, i32* %23, align 4 - br label %192 - -236: ; preds = %192 - %237 = call i8* @calloc(i64 4, i64 4) #9 - %238 = bitcast i8* %237 to float* - store float* %238, float** %26, align 8 - store i32 0, i32* %27, align 4 - br label %239 - -239: ; preds = %287, %236 - %240 = load i32, i32* %27, align 4 - %241 = icmp slt i32 %240, 2 - br i1 %241, label %242, label %290 - -242: ; preds = %239 - store i32 0, i32* %28, align 4 - br label %243 - -243: ; preds = %283, %242 - %244 = load i32, i32* %28, align 4 - %245 = icmp slt i32 %244, 2 - br i1 %245, label %246, label %286 - -246: ; preds = %243 - %247 = load i32, i32* %27, align 4 - %248 = load i32, i32* %10, align 4 - %249 = icmp slt i32 %247, %248 - br i1 %249, label %254, label %250 - -250: ; preds = %246 - %251 = load i32, i32* %28, align 4 - %252 = load i32, i32* %10, align 4 - %253 = icmp slt i32 %251, %252 - br i1 %253, label %254, label %260 - -254: ; preds = %250, %246 - %255 = load i32, i32* %27, align 4 - %256 = load i32, i32* %28, align 4 - %257 = icmp eq i32 %255, %256 - %258 = zext i1 %257 to i64 - %259 = select i1 %257, float 1.000000e+00, float 0.000000e+00 - store float %259, float* %29, align 4 - br label %274 - -260: ; preds = %250 - %261 = load float*, float** %22, align 8 - %262 = load i32, i32* %27, align 4 - %263 = load i32, i32* %10, align 4 - %264 = sub nsw i32 %262, %263 - %265 = load i32, i32* %11, align 4 - %266 = mul nsw i32 %264, %265 - %267 = load i32, i32* %28, align 4 - %268 = load i32, i32* %10, align 4 - %269 = sub nsw i32 %267, %268 - %270 = add nsw i32 %266, %269 - %271 = sext i32 %270 to i64 - %272 = getelementptr inbounds float, float* %261, i64 %271 - %273 = load float, float* %272, align 4 - store float %273, float* %29, align 4 - br label %274 - -274: ; preds = %260, %254 - %275 = load float, float* %29, align 4 - %276 = load float*, float** %26, align 8 - %277 = load i32, i32* %27, align 4 - %278 = mul nsw i32 %277, 2 - %279 = load i32, i32* %28, align 4 - %280 = add nsw i32 %278, %279 - %281 = sext i32 %280 to i64 - %282 = getelementptr inbounds float, float* %276, i64 %281 - store float %275, float* %282, align 4 - br label %283 - -283: ; preds = %274 - %284 = load i32, i32* %28, align 4 - %285 = add nsw i32 %284, 1 - store i32 %285, i32* %28, align 4 - br label %243 - -286: ; preds = %243 - br label %287 - -287: ; preds = %286 - %288 = load i32, i32* %27, align 4 - %289 = add nsw i32 %288, 1 - store i32 %289, i32* %27, align 4 - br label %239 - -290: ; preds = %239 - %291 = load i32, i32* %10, align 4 - %292 = icmp eq i32 %291, 0 - br i1 %292, label %293, label %305 - -293: ; preds = %290 - %294 = load float*, float** %5, align 8 - %295 = bitcast float* %294 to i8* - %296 = load float*, float** %26, align 8 - %297 = bitcast float* %296 to i8* - %298 = load float*, float** %5, align 8 - %299 = bitcast float* %298 to i8* - %300 = call i64 @llvm.objectsize.i64.p0i8(i8* %299, i1 false, i1 true, i1 false) - %301 = call i8* @__memcpy_chk(i8* %295, i8* %297, i64 16, i64 %300) #8 - %302 = load float*, float** %26, align 8 - %303 = load float*, float** %4, align 8 - %304 = load float*, float** %6, align 8 - call void @no_opt_naive_fixed_matrix_multiply(float* %302, float* %303, float* %304) - br label %330 - -305: ; preds = %290 - %306 = call i8* @calloc(i64 4, i64 4) #9 - %307 = bitcast i8* %306 to float* - store float* %307, float** %30, align 8 - %308 = load float*, float** %26, align 8 - %309 = load float*, float** %5, align 8 - %310 = load float*, float** %30, align 8 - call void @no_opt_naive_fixed_matrix_multiply(float* %308, float* %309, float* %310) - %311 = load float*, float** %5, align 8 - %312 = bitcast float* %311 to i8* - %313 = load float*, float** %30, align 8 - %314 = bitcast float* %313 to i8* - %315 = load float*, float** %5, align 8 - %316 = bitcast float* %315 to i8* - %317 = call i64 @llvm.objectsize.i64.p0i8(i8* %316, i1 false, i1 true, i1 false) - %318 = call i8* @__memcpy_chk(i8* %312, i8* %314, i64 16, i64 %317) #8 - %319 = load float*, float** %26, align 8 - %320 = load float*, float** %6, align 8 - %321 = load float*, float** %30, align 8 - call void @no_opt_naive_fixed_matrix_multiply(float* %319, float* %320, float* %321) - %322 = load float*, float** %6, align 8 - %323 = bitcast float* %322 to i8* - %324 = load float*, float** %30, align 8 - %325 = bitcast float* %324 to i8* - %326 = load float*, float** %6, align 8 - %327 = bitcast float* %326 to i8* - %328 = call i64 @llvm.objectsize.i64.p0i8(i8* %327, i1 false, i1 true, i1 false) - %329 = call i8* @__memcpy_chk(i8* %323, i8* %325, i64 16, i64 %328) #8 - br label %330 - -330: ; preds = %305, %293 - %331 = load float*, float** %12, align 8 - %332 = bitcast float* %331 to i8* - call void @free(i8* %332) - %333 = load float*, float** %13, align 8 - %334 = bitcast float* %333 to i8* - call void @free(i8* %334) - %335 = load float*, float** %17, align 8 - %336 = bitcast float* %335 to i8* - call void @free(i8* %336) - %337 = load float*, float** %18, align 8 - %338 = bitcast float* %337 to i8* - call void @free(i8* %338) - %339 = load float*, float** %22, align 8 - %340 = bitcast float* %339 to i8* - call void @free(i8* %340) - %341 = load float*, float** %26, align 8 - %342 = bitcast float* %341 to i8* - call void @free(i8* %342) - br label %343 - -343: ; preds = %330 - %344 = load i32, i32* %10, align 4 - %345 = add nsw i32 %344, 1 - store i32 %345, i32* %10, align 4 - br label %69 - -346: ; preds = %69 - %347 = load float*, float** %5, align 8 - call void @no_opt_naive_fixed_transpose(float* %347) - ret void -} - -; Function Attrs: nounwind -declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 - -; Function Attrs: nounwind readnone speculatable willreturn -declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 - -; Function Attrs: allocsize(0,1) -declare i8* @calloc(i64, i64) #5 - -declare void @free(i8*) #6 - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #1 { - %1 = alloca i32, align 4 - %2 = alloca i64, align 8 - %3 = alloca [4 x float], align 16 - %4 = alloca i32, align 4 - %5 = alloca [4 x float], align 16 - %6 = alloca [4 x float], align 16 - %7 = alloca [4 x float], align 16 - %8 = alloca [4 x float], align 16 - %9 = alloca i32, align 4 - %10 = alloca i32, align 4 - %11 = alloca i32, align 4 - %12 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - %13 = call i64 @time(i64* null) - store i64 %13, i64* %2, align 8 - %14 = call i64 @time(i64* %2) - %15 = trunc i64 %14 to i32 - call void @srand(i32 %15) - %16 = bitcast [4 x float]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %16, i8 0, i64 16, i1 false) - store i32 0, i32* %4, align 4 - br label %17 - -17: ; preds = %27, %0 - %18 = load i32, i32* %4, align 4 - %19 = icmp slt i32 %18, 4 - br i1 %19, label %20, label %30 - -20: ; preds = %17 - %21 = call i32 @rand() - %22 = sitofp i32 %21 to float - %23 = fdiv float %22, 0x41747AE140000000 - %24 = load i32, i32* %4, align 4 - %25 = sext i32 %24 to i64 - %26 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 %25 - store float %23, float* %26, align 4 - br label %27 - -27: ; preds = %20 - %28 = load i32, i32* %4, align 4 - %29 = add nsw i32 %28, 1 - store i32 %29, i32* %4, align 4 - br label %17 - -30: ; preds = %17 - %31 = bitcast [4 x float]* %5 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %31, i8 0, i64 16, i1 false) - %32 = bitcast [4 x float]* %6 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %32, i8 0, i64 16, i1 false) - %33 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - %34 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 - %35 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - call void @naive_fixed_qr_decomp(float* %33, float* %34, float* %35) - %36 = bitcast [4 x float]* %7 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %36, i8 0, i64 16, i1 false) - %37 = bitcast [4 x float]* %8 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %37, i8 0, i64 16, i1 false) - %38 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - %39 = getelementptr inbounds [4 x float], [4 x float]* %7, i64 0, i64 0 - %40 = getelementptr inbounds [4 x float], [4 x float]* %8, i64 0, i64 0 - call void @no_opt_naive_fixed_qr_decomp(float* %38, float* %39, float* %40) - store i32 0, i32* %9, align 4 - br label %41 - -41: ; preds = %91, %30 - %42 = load i32, i32* %9, align 4 - %43 = icmp slt i32 %42, 2 - br i1 %43, label %44, label %94 - -44: ; preds = %41 - store i32 0, i32* %10, align 4 - br label %45 - -45: ; preds = %87, %44 - %46 = load i32, i32* %10, align 4 - %47 = icmp slt i32 %46, 2 - br i1 %47, label %48, label %90 - -48: ; preds = %45 - %49 = load i32, i32* %9, align 4 - %50 = mul nsw i32 %49, 2 - %51 = load i32, i32* %10, align 4 - %52 = add nsw i32 %50, %51 - %53 = sext i32 %52 to i64 - %54 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 %53 - %55 = load float, float* %54, align 4 - %56 = fpext float %55 to double - %57 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %56) - %58 = load i32, i32* %9, align 4 - %59 = mul nsw i32 %58, 2 - %60 = load i32, i32* %10, align 4 - %61 = add nsw i32 %59, %60 - %62 = sext i32 %61 to i64 - %63 = getelementptr inbounds [4 x float], [4 x float]* %7, i64 0, i64 %62 - %64 = load float, float* %63, align 4 - %65 = fpext float %64 to double - %66 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %65) - %67 = load i32, i32* %9, align 4 - %68 = sext i32 %67 to i64 - %69 = getelementptr inbounds [4 x float], [4 x float]* %7, i64 0, i64 %68 - %70 = load float, float* %69, align 4 - %71 = load i32, i32* %9, align 4 - %72 = sext i32 %71 to i64 - %73 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 %72 - %74 = load float, float* %73, align 4 - %75 = fsub float %70, %74 - %76 = fpext float %75 to double - %77 = call double @llvm.fabs.f64(double %76) - %78 = fcmp olt double %77, 0x3FB99999A0000000 - %79 = xor i1 %78, true - %80 = zext i1 %79 to i32 - %81 = sext i32 %80 to i64 - %82 = icmp ne i64 %81, 0 - br i1 %82, label %83, label %85 - -83: ; preds = %48 - call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.2, i64 0, i64 0), i32 300, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.3, i64 0, i64 0)) #10 - unreachable - -84: ; No predecessors! - br label %86 - -85: ; preds = %48 - br label %86 - -86: ; preds = %85, %84 - br label %87 - -87: ; preds = %86 - %88 = load i32, i32* %10, align 4 - %89 = add nsw i32 %88, 1 - store i32 %89, i32* %10, align 4 - br label %45 - -90: ; preds = %45 - br label %91 - -91: ; preds = %90 - %92 = load i32, i32* %9, align 4 - %93 = add nsw i32 %92, 1 - store i32 %93, i32* %9, align 4 - br label %41 - -94: ; preds = %41 - store i32 0, i32* %11, align 4 - br label %95 - -95: ; preds = %145, %94 - %96 = load i32, i32* %11, align 4 - %97 = icmp slt i32 %96, 2 - br i1 %97, label %98, label %148 - -98: ; preds = %95 - store i32 0, i32* %12, align 4 - br label %99 - -99: ; preds = %141, %98 - %100 = load i32, i32* %12, align 4 - %101 = icmp slt i32 %100, 2 - br i1 %101, label %102, label %144 - -102: ; preds = %99 - %103 = load i32, i32* %11, align 4 - %104 = mul nsw i32 %103, 2 - %105 = load i32, i32* %12, align 4 - %106 = add nsw i32 %104, %105 - %107 = sext i32 %106 to i64 - %108 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 %107 - %109 = load float, float* %108, align 4 - %110 = fpext float %109 to double - %111 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %110) - %112 = load i32, i32* %11, align 4 - %113 = mul nsw i32 %112, 2 - %114 = load i32, i32* %12, align 4 - %115 = add nsw i32 %113, %114 - %116 = sext i32 %115 to i64 - %117 = getelementptr inbounds [4 x float], [4 x float]* %8, i64 0, i64 %116 - %118 = load float, float* %117, align 4 - %119 = fpext float %118 to double - %120 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %119) - %121 = load i32, i32* %11, align 4 - %122 = sext i32 %121 to i64 - %123 = getelementptr inbounds [4 x float], [4 x float]* %8, i64 0, i64 %122 - %124 = load float, float* %123, align 4 - %125 = load i32, i32* %11, align 4 - %126 = sext i32 %125 to i64 - %127 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 %126 - %128 = load float, float* %127, align 4 - %129 = fsub float %124, %128 - %130 = fpext float %129 to double - %131 = call double @llvm.fabs.f64(double %130) - %132 = fcmp olt double %131, 0x3FB99999A0000000 - %133 = xor i1 %132, true - %134 = zext i1 %133 to i32 - %135 = sext i32 %134 to i64 - %136 = icmp ne i64 %135, 0 - br i1 %136, label %137, label %139 - -137: ; preds = %102 - call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.2, i64 0, i64 0), i32 307, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.6, i64 0, i64 0)) #10 - unreachable - -138: ; No predecessors! - br label %140 - -139: ; preds = %102 - br label %140 - -140: ; preds = %139, %138 - br label %141 - -141: ; preds = %140 - %142 = load i32, i32* %12, align 4 - %143 = add nsw i32 %142, 1 - store i32 %143, i32* %12, align 4 - br label %99 - -144: ; preds = %99 - br label %145 - -145: ; preds = %144 - %146 = load i32, i32* %11, align 4 - %147 = add nsw i32 %146, 1 - store i32 %147, i32* %11, align 4 - br label %95 - -148: ; preds = %95 - %149 = load i32, i32* %1, align 4 - ret i32 %149 -} - -declare i64 @time(i64*) #6 - -declare void @srand(i32) #6 - -declare i32 @rand() #6 - -declare i32 @printf(i8*, ...) #6 - -; Function Attrs: nounwind readnone speculatable willreturn -declare double @llvm.fabs.f64(double) #2 - -; Function Attrs: noreturn -declare void @__assert_rtn(i8*, i8*, i32, i8*) #7 - -attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone speculatable willreturn } -attributes #3 = { argmemonly nounwind willreturn writeonly } -attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #8 = { nounwind } -attributes #9 = { allocsize(0,1) } -attributes #10 = { noreturn } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/flaky-dce.ll b/src/dios-egraphs/Diospyros/flaky-outputs/flaky-dce.ll deleted file mode 100644 index ef4a3d72..00000000 --- a/src/dios-egraphs/Diospyros/flaky-outputs/flaky-dce.ll +++ /dev/null @@ -1,3482 +0,0 @@ -; ModuleID = 'build/diospyros.ll' -source_filename = "fail-tests/qr-decomp-local-arrays.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@.str = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 -@.str.1 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 -@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 -@.str.2 = private unnamed_addr constant [36 x i8] c"fail-tests/qr-decomp-local-arrays.c\00", align 1 -@.str.3 = private unnamed_addr constant [34 x i8] c"fabs(expectedQ[i] - Q[i]) < DELTA\00", align 1 -@.str.4 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 -@.str.5 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 -@.str.6 = private unnamed_addr constant [34 x i8] c"fabs(expectedR[i] - R[i]) < DELTA\00", align 1 - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @sgn(float %0) #0 { - %2 = fcmp ogt float %0, 0.000000e+00 - %3 = zext i1 %2 to i32 - %4 = fcmp olt float %0, 0.000000e+00 - %.neg = sext i1 %4 to i32 - %5 = add nsw i32 %.neg, %3 - %6 = sitofp i32 %5 to float - ret float %6 -} - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_sgn(float %0) #1 { - %2 = fcmp ogt float %0, 0.000000e+00 - %3 = zext i1 %2 to i32 - %4 = fcmp olt float %0, 0.000000e+00 - %.neg = sext i1 %4 to i32 - %5 = add nsw i32 %.neg, %3 - %6 = sitofp i32 %5 to float - ret float %6 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @naive_norm(float* %0, i32 %1) #0 { - %3 = icmp sgt i32 %1, 0 - %smax = select i1 %3, i32 %1, i32 0 - %wide.trip.count = zext i32 %smax to i64 - br i1 %3, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %2 - %4 = add nsw i64 %wide.trip.count, -1 - %xtraiter = and i64 %wide.trip.count, 3 - %5 = icmp ult i64 %4, 3 - br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count, 2147483644 - br label %6 - -6: ; preds = %6, %.lr.ph.new - %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] - %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 - %8 = load float, float* %7, align 4 - %9 = fmul float %8, %8 - %10 = fadd float %.013, %9 - %indvars.iv.next = or i64 %indvars.iv2, 1 - %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next - %12 = load float, float* %11, align 4 - %13 = fmul float %12, %12 - %14 = fadd float %10, %13 - %indvars.iv.next.1 = or i64 %indvars.iv2, 2 - %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 - %16 = load float, float* %15, align 4 - %17 = fmul float %16, %16 - %18 = fadd float %14, %17 - %indvars.iv.next.2 = or i64 %indvars.iv2, 3 - %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 - %20 = load float, float* %19, align 4 - %21 = fmul float %20, %20 - %22 = fadd float %18, %21 - %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 - %niter.nsub.3 = add i64 %niter, -4 - %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 - br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 - -._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] - %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] - %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] - %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil - %24 = load float, float* %23, align 4 - %25 = fmul float %24, %24 - %26 = fadd float %.013.epil, %25 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !3 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 - %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] - %27 = call float @llvm.sqrt.f32(float %.01.lcssa) - ret float %27 -} - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32(float) #2 - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_naive_norm(float* %0, i32 %1) #1 { - %3 = icmp sgt i32 %1, 0 - %smax = select i1 %3, i32 %1, i32 0 - %wide.trip.count = zext i32 %smax to i64 - br i1 %3, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %2 - %4 = add nsw i64 %wide.trip.count, -1 - %xtraiter = and i64 %wide.trip.count, 3 - %5 = icmp ult i64 %4, 3 - br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count, 2147483644 - br label %6 - -6: ; preds = %6, %.lr.ph.new - %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] - %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 - %8 = load float, float* %7, align 4 - %9 = fmul float %8, %8 - %10 = fadd float %.013, %9 - %indvars.iv.next = or i64 %indvars.iv2, 1 - %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next - %12 = load float, float* %11, align 4 - %13 = fmul float %12, %12 - %14 = fadd float %10, %13 - %indvars.iv.next.1 = or i64 %indvars.iv2, 2 - %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 - %16 = load float, float* %15, align 4 - %17 = fmul float %16, %16 - %18 = fadd float %14, %17 - %indvars.iv.next.2 = or i64 %indvars.iv2, 3 - %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 - %20 = load float, float* %19, align 4 - %21 = fmul float %20, %20 - %22 = fadd float %18, %21 - %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 - %niter.nsub.3 = add i64 %niter, -4 - %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 - br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 - -._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] - %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] - %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] - %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil - %24 = load float, float* %23, align 4 - %25 = fmul float %24, %24 - %26 = fadd float %.013.epil, %25 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 - %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] - %27 = call float @llvm.sqrt.f32(float %.01.lcssa) - ret float %27 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_transpose(float* %0) #0 { -.lr.ph: - %1 = getelementptr inbounds float, float* %0, i64 1 - %2 = bitcast float* %1 to i32* - %3 = load i32, i32* %2, align 4 - %4 = getelementptr inbounds float, float* %0, i64 2 - %5 = bitcast float* %4 to i32* - %6 = load i32, i32* %5, align 4 - store i32 %6, i32* %2, align 4 - store i32 %3, i32* %5, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_transpose(float* %0) #1 { -.lr.ph: - %1 = getelementptr inbounds float, float* %0, i64 1 - %2 = bitcast float* %1 to i32* - %3 = load i32, i32* %2, align 4 - %4 = getelementptr inbounds float, float* %0, i64 2 - %5 = bitcast float* %4 to i32* - %6 = load i32, i32* %5, align 4 - store i32 %6, i32* %2, align 4 - store i32 %3, i32* %5, align 4 - ret void -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { -.preheader: - store float 0.000000e+00, float* %2, align 4 - %3 = getelementptr float, float* %0, i32 0 - %4 = load float, float* %3, align 4 - %5 = insertelement <4 x float> zeroinitializer, float %4, i32 0 - %6 = insertelement <4 x float> %5, float 0.000000e+00, i32 1 - %7 = insertelement <4 x float> %6, float 0.000000e+00, i32 2 - %8 = insertelement <4 x float> %7, float 0.000000e+00, i32 3 - %9 = getelementptr float, float* %1, i32 0 - %10 = load float, float* %9, align 4 - %11 = insertelement <4 x float> zeroinitializer, float %10, i32 0 - %12 = insertelement <4 x float> %11, float 0.000000e+00, i32 1 - %13 = insertelement <4 x float> %12, float 0.000000e+00, i32 2 - %14 = insertelement <4 x float> %13, float 0.000000e+00, i32 3 - %15 = call <4 x float> @llvm.fma.v4f32(<4 x float> %8, <4 x float> %14, <4 x float> zeroinitializer) - %16 = extractelement <4 x float> %15, i32 0 - store float %16, float* %2, align 4 - %17 = insertelement <4 x float> zeroinitializer, float %4, i32 0 - %18 = insertelement <4 x float> %17, float 1.000000e+00, i32 1 - %19 = insertelement <4 x float> %18, float 1.000000e+00, i32 2 - %20 = insertelement <4 x float> %19, float 1.000000e+00, i32 3 - %21 = getelementptr float, float* %1, i32 0 - %22 = load float, float* %21, align 4 - %23 = insertelement <4 x float> zeroinitializer, float %22, i32 0 - %24 = insertelement <4 x float> %23, float 0.000000e+00, i32 1 - %25 = insertelement <4 x float> %24, float 0.000000e+00, i32 2 - %26 = insertelement <4 x float> %25, float 0.000000e+00, i32 3 - %27 = fmul <4 x float> %20, %26 - %28 = fadd <4 x float> %27, zeroinitializer - %29 = getelementptr float, float* %0, i32 0 - %30 = getelementptr inbounds float, float* %29, i64 1 - %31 = load float, float* %30, align 4 - %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 - %33 = insertelement <4 x float> %32, float 0.000000e+00, i32 1 - %34 = insertelement <4 x float> %33, float 0.000000e+00, i32 2 - %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 3 - %36 = getelementptr float, float* %1, i32 0 - %37 = getelementptr inbounds float, float* %36, i64 2 - %38 = load float, float* %37, align 4 - %39 = insertelement <4 x float> zeroinitializer, float %38, i32 0 - %40 = insertelement <4 x float> %39, float 0.000000e+00, i32 1 - %41 = insertelement <4 x float> %40, float 0.000000e+00, i32 2 - %42 = insertelement <4 x float> %41, float 0.000000e+00, i32 3 - %43 = call <4 x float> @llvm.fma.v4f32(<4 x float> %35, <4 x float> %42, <4 x float> %28) - %44 = extractelement <4 x float> %43, i32 0 - store float %44, float* %2, align 4 - %45 = extractelement <4 x float> %43, i32 1 - %46 = getelementptr float, float* %2, i32 0 - %47 = getelementptr inbounds float, float* %46, i64 1 - store float %45, float* %47, align 4 - %48 = getelementptr float, float* %0, i32 0 - %49 = load float, float* %48, align 4 - %50 = insertelement <4 x float> zeroinitializer, float %49, i32 0 - %51 = insertelement <4 x float> %50, float 0.000000e+00, i32 1 - %52 = insertelement <4 x float> %51, float 0.000000e+00, i32 2 - %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 3 - %54 = getelementptr float, float* %1, i32 0 - %55 = getelementptr inbounds float, float* %54, i64 1 - %56 = load float, float* %55, align 4 - %57 = insertelement <4 x float> zeroinitializer, float %56, i32 0 - %58 = insertelement <4 x float> %57, float 0.000000e+00, i32 1 - %59 = insertelement <4 x float> %58, float 0.000000e+00, i32 2 - %60 = insertelement <4 x float> %59, float 0.000000e+00, i32 3 - %61 = call <4 x float> @llvm.fma.v4f32(<4 x float> %53, <4 x float> %60, <4 x float> zeroinitializer) - %62 = extractelement <4 x float> %61, i32 0 - %63 = getelementptr float, float* %2, i32 0 - %64 = getelementptr inbounds float, float* %63, i64 1 - store float %62, float* %64, align 4 - %65 = insertelement <4 x float> zeroinitializer, float %49, i32 0 - %66 = insertelement <4 x float> %65, float 1.000000e+00, i32 1 - %67 = insertelement <4 x float> %66, float 1.000000e+00, i32 2 - %68 = insertelement <4 x float> %67, float 1.000000e+00, i32 3 - %69 = load float, float* %55, align 4 - %70 = insertelement <4 x float> zeroinitializer, float %69, i32 0 - %71 = insertelement <4 x float> %70, float 0.000000e+00, i32 1 - %72 = insertelement <4 x float> %71, float 0.000000e+00, i32 2 - %73 = insertelement <4 x float> %72, float 0.000000e+00, i32 3 - %74 = fmul <4 x float> %68, %73 - %75 = fadd <4 x float> %74, zeroinitializer - %76 = getelementptr float, float* %0, i32 0 - %77 = getelementptr inbounds float, float* %76, i64 1 - %78 = load float, float* %77, align 4 - %79 = insertelement <4 x float> zeroinitializer, float %78, i32 0 - %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 1 - %81 = insertelement <4 x float> %80, float 0.000000e+00, i32 2 - %82 = insertelement <4 x float> %81, float 0.000000e+00, i32 3 - %83 = getelementptr float, float* %1, i32 0 - %84 = getelementptr inbounds float, float* %83, i64 3 - %85 = load float, float* %84, align 4 - %86 = insertelement <4 x float> zeroinitializer, float %85, i32 0 - %87 = insertelement <4 x float> %86, float 0.000000e+00, i32 1 - %88 = insertelement <4 x float> %87, float 0.000000e+00, i32 2 - %89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3 - %90 = call <4 x float> @llvm.fma.v4f32(<4 x float> %82, <4 x float> %89, <4 x float> %75) - %91 = extractelement <4 x float> %90, i32 0 - %92 = getelementptr float, float* %2, i32 0 - %93 = getelementptr inbounds float, float* %92, i64 1 - store float %91, float* %93, align 4 - %94 = extractelement <4 x float> %90, i32 1 - %95 = getelementptr float, float* %2, i32 0 - %96 = getelementptr inbounds float, float* %95, i64 2 - store float %94, float* %96, align 4 - %97 = getelementptr float, float* %0, i32 0 - %98 = getelementptr inbounds float, float* %97, i64 2 - %99 = load float, float* %98, align 4 - %100 = insertelement <4 x float> zeroinitializer, float %99, i32 0 - %101 = insertelement <4 x float> %100, float 0.000000e+00, i32 1 - %102 = insertelement <4 x float> %101, float 0.000000e+00, i32 2 - %103 = insertelement <4 x float> %102, float 0.000000e+00, i32 3 - %104 = getelementptr float, float* %1, i32 0 - %105 = load float, float* %104, align 4 - %106 = insertelement <4 x float> zeroinitializer, float %105, i32 0 - %107 = insertelement <4 x float> %106, float 0.000000e+00, i32 1 - %108 = insertelement <4 x float> %107, float 0.000000e+00, i32 2 - %109 = insertelement <4 x float> %108, float 0.000000e+00, i32 3 - %110 = call <4 x float> @llvm.fma.v4f32(<4 x float> %103, <4 x float> %109, <4 x float> zeroinitializer) - %111 = extractelement <4 x float> %110, i32 0 - %112 = getelementptr float, float* %2, i32 0 - %113 = getelementptr inbounds float, float* %112, i64 2 - store float %111, float* %113, align 4 - %114 = insertelement <4 x float> zeroinitializer, float %99, i32 0 - %115 = insertelement <4 x float> %114, float 1.000000e+00, i32 1 - %116 = insertelement <4 x float> %115, float 1.000000e+00, i32 2 - %117 = insertelement <4 x float> %116, float 1.000000e+00, i32 3 - %118 = insertelement <4 x float> zeroinitializer, float %105, i32 0 - %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 1 - %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 2 - %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 3 - %122 = fmul <4 x float> %117, %121 - %123 = fadd <4 x float> %122, zeroinitializer - %124 = getelementptr float, float* %0, i32 0 - %125 = getelementptr inbounds float, float* %124, i64 3 - %126 = load float, float* %125, align 4 - %127 = insertelement <4 x float> zeroinitializer, float %126, i32 0 - %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 1 - %129 = insertelement <4 x float> %128, float 0.000000e+00, i32 2 - %130 = insertelement <4 x float> %129, float 0.000000e+00, i32 3 - %131 = load float, float* %37, align 4 - %132 = insertelement <4 x float> zeroinitializer, float %131, i32 0 - %133 = insertelement <4 x float> %132, float 0.000000e+00, i32 1 - %134 = insertelement <4 x float> %133, float 0.000000e+00, i32 2 - %135 = insertelement <4 x float> %134, float 0.000000e+00, i32 3 - %136 = call <4 x float> @llvm.fma.v4f32(<4 x float> %130, <4 x float> %135, <4 x float> %123) - %137 = extractelement <4 x float> %136, i32 0 - store float %137, float* %113, align 4 - %138 = extractelement <4 x float> %136, i32 1 - %139 = getelementptr float, float* %2, i32 0 - %140 = getelementptr inbounds float, float* %139, i64 3 - store float %138, float* %140, align 4 - %141 = load float, float* %98, align 4 - %142 = insertelement <4 x float> zeroinitializer, float %141, i32 0 - %143 = insertelement <4 x float> %142, float 0.000000e+00, i32 1 - %144 = insertelement <4 x float> %143, float 0.000000e+00, i32 2 - %145 = insertelement <4 x float> %144, float 0.000000e+00, i32 3 - %146 = load float, float* %55, align 4 - %147 = insertelement <4 x float> zeroinitializer, float %146, i32 0 - %148 = insertelement <4 x float> %147, float 0.000000e+00, i32 1 - %149 = insertelement <4 x float> %148, float 0.000000e+00, i32 2 - %150 = insertelement <4 x float> %149, float 0.000000e+00, i32 3 - %151 = call <4 x float> @llvm.fma.v4f32(<4 x float> %145, <4 x float> %150, <4 x float> zeroinitializer) - %152 = extractelement <4 x float> %151, i32 0 - store float %152, float* %140, align 4 - %153 = insertelement <4 x float> zeroinitializer, float %141, i32 0 - %154 = insertelement <4 x float> %153, float 1.000000e+00, i32 1 - %155 = insertelement <4 x float> %154, float 1.000000e+00, i32 2 - %156 = insertelement <4 x float> %155, float 1.000000e+00, i32 3 - %157 = insertelement <4 x float> zeroinitializer, float %146, i32 0 - %158 = insertelement <4 x float> %157, float 0.000000e+00, i32 1 - %159 = insertelement <4 x float> %158, float 0.000000e+00, i32 2 - %160 = insertelement <4 x float> %159, float 0.000000e+00, i32 3 - %161 = fmul <4 x float> %156, %160 - %162 = fadd <4 x float> %161, zeroinitializer - %163 = getelementptr float, float* %0, i32 0 - %164 = getelementptr inbounds float, float* %163, i64 3 - %165 = load float, float* %164, align 4 - %166 = insertelement <4 x float> zeroinitializer, float %165, i32 0 - %167 = insertelement <4 x float> %166, float 0.000000e+00, i32 1 - %168 = insertelement <4 x float> %167, float 0.000000e+00, i32 2 - %169 = insertelement <4 x float> %168, float 0.000000e+00, i32 3 - %170 = load float, float* %84, align 4 - %171 = insertelement <4 x float> zeroinitializer, float %170, i32 0 - %172 = insertelement <4 x float> %171, float 0.000000e+00, i32 1 - %173 = insertelement <4 x float> %172, float 0.000000e+00, i32 2 - %174 = insertelement <4 x float> %173, float 0.000000e+00, i32 3 - %175 = call <4 x float> @llvm.fma.v4f32(<4 x float> %169, <4 x float> %174, <4 x float> %162) - %176 = extractelement <4 x float> %175, i32 0 - store float %176, float* %140, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { -.preheader: - store float 0.000000e+00, float* %2, align 4 - %3 = load float, float* %0, align 4 - %4 = load float, float* %1, align 4 - %5 = fmul float %3, %4 - %6 = fadd float %5, 0.000000e+00 - store float %6, float* %2, align 4 - %7 = getelementptr inbounds float, float* %0, i64 1 - %8 = load float, float* %7, align 4 - %9 = getelementptr inbounds float, float* %1, i64 2 - %10 = load float, float* %9, align 4 - %11 = fmul float %8, %10 - %12 = fadd float %6, %11 - store float %12, float* %2, align 4 - %13 = getelementptr inbounds float, float* %2, i64 1 - store float 0.000000e+00, float* %13, align 4 - %14 = load float, float* %0, align 4 - %15 = getelementptr inbounds float, float* %1, i64 1 - %16 = load float, float* %15, align 4 - %17 = fmul float %14, %16 - %18 = fadd float %17, 0.000000e+00 - store float %18, float* %13, align 4 - %19 = load float, float* %7, align 4 - %20 = getelementptr inbounds float, float* %1, i64 3 - %21 = load float, float* %20, align 4 - %22 = fmul float %19, %21 - %23 = fadd float %18, %22 - store float %23, float* %13, align 4 - %24 = getelementptr inbounds float, float* %0, i64 2 - %25 = getelementptr inbounds float, float* %2, i64 2 - store float 0.000000e+00, float* %25, align 4 - %26 = load float, float* %24, align 4 - %27 = load float, float* %1, align 4 - %28 = fmul float %26, %27 - %29 = fadd float %28, 0.000000e+00 - store float %29, float* %25, align 4 - %30 = getelementptr inbounds float, float* %0, i64 3 - %31 = load float, float* %30, align 4 - %32 = load float, float* %9, align 4 - %33 = fmul float %31, %32 - %34 = fadd float %29, %33 - store float %34, float* %25, align 4 - %35 = getelementptr inbounds float, float* %2, i64 3 - store float 0.000000e+00, float* %35, align 4 - %36 = load float, float* %24, align 4 - %37 = load float, float* %15, align 4 - %38 = fmul float %36, %37 - %39 = fadd float %38, 0.000000e+00 - store float %39, float* %35, align 4 - %40 = load float, float* %30, align 4 - %41 = load float, float* %20, align 4 - %42 = fmul float %40, %41 - %43 = fadd float %39, %42 - store float %43, float* %35, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { -.preheader49: - %3 = getelementptr float, float* %0, i32 0 - %4 = bitcast float* %3 to i32* - %5 = load i32, i32* %4, align 4 - %6 = bitcast i32 %5 to float - %7 = insertelement <4 x float> zeroinitializer, float %6, i32 0 - %8 = insertelement <4 x float> %7, float 0.000000e+00, i32 1 - %9 = insertelement <4 x float> %8, float 0.000000e+00, i32 2 - %10 = insertelement <4 x float> %9, float 0.000000e+00, i32 3 - %11 = extractelement <4 x float> %10, i32 0 - %12 = getelementptr float, float* %2, i32 0 - %13 = bitcast float* %12 to i32* - %14 = bitcast i32* %13 to float* - store float %11, float* %14, align 4 - %15 = getelementptr float, float* %0, i32 0 - %16 = getelementptr inbounds float, float* %15, i64 1 - %17 = bitcast float* %16 to i32* - %18 = load i32, i32* %17, align 4 - %19 = bitcast i32 %18 to float - %20 = insertelement <4 x float> zeroinitializer, float %19, i32 0 - %21 = insertelement <4 x float> %20, float 0.000000e+00, i32 1 - %22 = insertelement <4 x float> %21, float 0.000000e+00, i32 2 - %23 = insertelement <4 x float> %22, float 0.000000e+00, i32 3 - %24 = extractelement <4 x float> %23, i32 0 - %25 = getelementptr float, float* %2, i32 0 - %26 = getelementptr inbounds float, float* %25, i64 1 - %27 = bitcast float* %26 to i32* - %28 = bitcast i32* %27 to float* - store float %24, float* %28, align 4 - %29 = getelementptr float, float* %0, i32 0 - %30 = getelementptr inbounds float, float* %29, i64 2 - %31 = bitcast float* %30 to i32* - %32 = load i32, i32* %31, align 4 - %33 = bitcast i32 %32 to float - %34 = insertelement <4 x float> zeroinitializer, float %33, i32 0 - %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 1 - %36 = insertelement <4 x float> %35, float 0.000000e+00, i32 2 - %37 = insertelement <4 x float> %36, float 0.000000e+00, i32 3 - %38 = extractelement <4 x float> %37, i32 0 - %39 = getelementptr float, float* %2, i32 0 - %40 = getelementptr inbounds float, float* %39, i64 2 - %41 = bitcast float* %40 to i32* - %42 = bitcast i32* %41 to float* - store float %38, float* %42, align 4 - %43 = getelementptr float, float* %0, i32 0 - %44 = getelementptr inbounds float, float* %43, i64 3 - %45 = bitcast float* %44 to i32* - %46 = load i32, i32* %45, align 4 - %47 = bitcast i32 %46 to float - %48 = fneg float %47 - %49 = insertelement <4 x float> zeroinitializer, float %48, i32 0 - %50 = getelementptr float, float* %0, i32 0 - %51 = bitcast float* %50 to i32* - %52 = load i32, i32* %51, align 4 - %53 = bitcast i32 %52 to float - %54 = bitcast i32 %52 to float - %55 = fmul float %53, %54 - %56 = fadd float %55, 0.000000e+00 - %57 = bitcast i32 %32 to float - %58 = bitcast i32 %32 to float - %59 = fmul float %57, %58 - %60 = fadd float %56, %59 - %61 = call float @llvm.sqrt.f32(float %60) - %62 = bitcast i32 %52 to float - %63 = fcmp olt float %62, 0.000000e+00 - %64 = sext i1 %63 to i32 - %65 = fcmp ogt float %62, 0.000000e+00 - %66 = zext i1 %65 to i32 - %67 = add nsw i32 %64, %66 - %68 = sitofp i32 %67 to float - %69 = fneg float %68 - %70 = fmul float %61, %69 - %71 = bitcast i32 %52 to float - %72 = fadd float %71, %70 - %73 = bitcast i32 %52 to float - %74 = bitcast i32 %52 to float - %75 = fmul float %73, %74 - %76 = fadd float %75, 0.000000e+00 - %77 = bitcast i32 %32 to float - %78 = bitcast i32 %32 to float - %79 = fmul float %77, %78 - %80 = fadd float %76, %79 - %81 = call float @llvm.sqrt.f32(float %80) - %82 = fneg float %68 - %83 = fmul float %81, %82 - %84 = bitcast i32 %52 to float - %85 = fadd float %84, %83 - %86 = bitcast i32 %52 to float - %87 = bitcast i32 %52 to float - %88 = fmul float %86, %87 - %89 = fadd float %88, 0.000000e+00 - %90 = bitcast i32 %32 to float - %91 = bitcast i32 %32 to float - %92 = fmul float %90, %91 - %93 = fadd float %89, %92 - %94 = call float @llvm.sqrt.f32(float %93) - %95 = fneg float %68 - %96 = fmul float %94, %95 - %97 = bitcast i32 %52 to float - %98 = fadd float %97, %96 - %99 = fmul float %85, %98 - %100 = fadd float %99, 0.000000e+00 - %101 = bitcast i32 %52 to float - %102 = bitcast i32 %52 to float - %103 = fmul float %101, %102 - %104 = fadd float %103, 0.000000e+00 - %105 = bitcast i32 %32 to float - %106 = bitcast i32 %32 to float - %107 = fmul float %105, %106 - %108 = fadd float %104, %107 - %109 = call float @llvm.sqrt.f32(float %108) - %110 = fneg float %68 - %111 = fmul float %109, %110 - %112 = fmul float %111, 0.000000e+00 - %113 = bitcast i32 %32 to float - %114 = fadd float %113, %112 - %115 = bitcast i32 %52 to float - %116 = bitcast i32 %52 to float - %117 = fmul float %115, %116 - %118 = fadd float %117, 0.000000e+00 - %119 = bitcast i32 %32 to float - %120 = bitcast i32 %32 to float - %121 = fmul float %119, %120 - %122 = fadd float %118, %121 - %123 = call float @llvm.sqrt.f32(float %122) - %124 = fneg float %68 - %125 = fmul float %123, %124 - %126 = fmul float %125, 0.000000e+00 - %127 = bitcast i32 %32 to float - %128 = fadd float %127, %126 - %129 = fmul float %114, %128 - %130 = fadd float %100, %129 - %131 = call float @llvm.sqrt.f32(float %130) - %132 = fadd float %131, 0.000000e+00 - %133 = fdiv float %72, %132 - %134 = fmul float %133, 2.000000e+00 - %135 = bitcast i32 %52 to float - %136 = bitcast i32 %52 to float - %137 = fmul float %135, %136 - %138 = fadd float %137, 0.000000e+00 - %139 = bitcast i32 %32 to float - %140 = bitcast i32 %32 to float - %141 = fmul float %139, %140 - %142 = fadd float %138, %141 - %143 = call float @llvm.sqrt.f32(float %142) - %144 = fneg float %68 - %145 = fmul float %143, %144 - %146 = bitcast i32 %52 to float - %147 = fadd float %146, %145 - %148 = bitcast i32 %52 to float - %149 = bitcast i32 %52 to float - %150 = fmul float %148, %149 - %151 = fadd float %150, 0.000000e+00 - %152 = bitcast i32 %32 to float - %153 = bitcast i32 %32 to float - %154 = fmul float %152, %153 - %155 = fadd float %151, %154 - %156 = call float @llvm.sqrt.f32(float %155) - %157 = fneg float %68 - %158 = fmul float %156, %157 - %159 = bitcast i32 %52 to float - %160 = fadd float %159, %158 - %161 = bitcast i32 %52 to float - %162 = bitcast i32 %52 to float - %163 = fmul float %161, %162 - %164 = fadd float %163, 0.000000e+00 - %165 = bitcast i32 %32 to float - %166 = bitcast i32 %32 to float - %167 = fmul float %165, %166 - %168 = fadd float %164, %167 - %169 = call float @llvm.sqrt.f32(float %168) - %170 = fneg float %68 - %171 = fmul float %169, %170 - %172 = bitcast i32 %52 to float - %173 = fadd float %172, %171 - %174 = fmul float %160, %173 - %175 = fadd float %174, 0.000000e+00 - %176 = bitcast i32 %52 to float - %177 = bitcast i32 %52 to float - %178 = fmul float %176, %177 - %179 = fadd float %178, 0.000000e+00 - %180 = bitcast i32 %32 to float - %181 = bitcast i32 %32 to float - %182 = fmul float %180, %181 - %183 = fadd float %179, %182 - %184 = call float @llvm.sqrt.f32(float %183) - %185 = fneg float %68 - %186 = fmul float %184, %185 - %187 = fmul float %186, 0.000000e+00 - %188 = bitcast i32 %32 to float - %189 = fadd float %188, %187 - %190 = bitcast i32 %52 to float - %191 = bitcast i32 %52 to float - %192 = fmul float %190, %191 - %193 = fadd float %192, 0.000000e+00 - %194 = bitcast i32 %32 to float - %195 = bitcast i32 %32 to float - %196 = fmul float %194, %195 - %197 = fadd float %193, %196 - %198 = call float @llvm.sqrt.f32(float %197) - %199 = fneg float %68 - %200 = fmul float %198, %199 - %201 = fmul float %200, 0.000000e+00 - %202 = bitcast i32 %32 to float - %203 = fadd float %202, %201 - %204 = fmul float %189, %203 - %205 = fadd float %175, %204 - %206 = call float @llvm.sqrt.f32(float %205) - %207 = fadd float %206, 0.000000e+00 - %208 = fdiv float %147, %207 - %209 = fmul float %134, %208 - %210 = insertelement <4 x float> %49, float %209, i32 1 - %211 = bitcast i32 %52 to float - %212 = bitcast i32 %52 to float - %213 = fmul float %211, %212 - %214 = fadd float %213, 0.000000e+00 - %215 = bitcast i32 %32 to float - %216 = bitcast i32 %32 to float - %217 = fmul float %215, %216 - %218 = fadd float %214, %217 - %219 = call float @llvm.sqrt.f32(float %218) - %220 = fneg float %68 - %221 = fmul float %219, %220 - %222 = bitcast i32 %52 to float - %223 = fadd float %222, %221 - %224 = bitcast i32 %52 to float - %225 = bitcast i32 %52 to float - %226 = fmul float %224, %225 - %227 = fadd float %226, 0.000000e+00 - %228 = bitcast i32 %32 to float - %229 = bitcast i32 %32 to float - %230 = fmul float %228, %229 - %231 = fadd float %227, %230 - %232 = call float @llvm.sqrt.f32(float %231) - %233 = fneg float %68 - %234 = fmul float %232, %233 - %235 = bitcast i32 %52 to float - %236 = fadd float %235, %234 - %237 = bitcast i32 %52 to float - %238 = bitcast i32 %52 to float - %239 = fmul float %237, %238 - %240 = fadd float %239, 0.000000e+00 - %241 = bitcast i32 %32 to float - %242 = bitcast i32 %32 to float - %243 = fmul float %241, %242 - %244 = fadd float %240, %243 - %245 = call float @llvm.sqrt.f32(float %244) - %246 = fneg float %68 - %247 = fmul float %245, %246 - %248 = bitcast i32 %52 to float - %249 = fadd float %248, %247 - %250 = fmul float %236, %249 - %251 = fadd float %250, 0.000000e+00 - %252 = bitcast i32 %52 to float - %253 = bitcast i32 %52 to float - %254 = fmul float %252, %253 - %255 = fadd float %254, 0.000000e+00 - %256 = bitcast i32 %32 to float - %257 = bitcast i32 %32 to float - %258 = fmul float %256, %257 - %259 = fadd float %255, %258 - %260 = call float @llvm.sqrt.f32(float %259) - %261 = fneg float %68 - %262 = fmul float %260, %261 - %263 = fmul float %262, 0.000000e+00 - %264 = bitcast i32 %32 to float - %265 = fadd float %264, %263 - %266 = bitcast i32 %52 to float - %267 = bitcast i32 %52 to float - %268 = fmul float %266, %267 - %269 = fadd float %268, 0.000000e+00 - %270 = bitcast i32 %32 to float - %271 = bitcast i32 %32 to float - %272 = fmul float %270, %271 - %273 = fadd float %269, %272 - %274 = call float @llvm.sqrt.f32(float %273) - %275 = fneg float %68 - %276 = fmul float %274, %275 - %277 = fmul float %276, 0.000000e+00 - %278 = bitcast i32 %32 to float - %279 = fadd float %278, %277 - %280 = fmul float %265, %279 - %281 = fadd float %251, %280 - %282 = call float @llvm.sqrt.f32(float %281) - %283 = fadd float %282, 0.000000e+00 - %284 = fdiv float %223, %283 - %285 = fmul float %284, 2.000000e+00 - %286 = bitcast i32 %52 to float - %287 = bitcast i32 %52 to float - %288 = fmul float %286, %287 - %289 = fadd float %288, 0.000000e+00 - %290 = bitcast i32 %32 to float - %291 = bitcast i32 %32 to float - %292 = fmul float %290, %291 - %293 = fadd float %289, %292 - %294 = call float @llvm.sqrt.f32(float %293) - %295 = fneg float %68 - %296 = fmul float %294, %295 - %297 = fmul float %296, 0.000000e+00 - %298 = bitcast i32 %32 to float - %299 = fadd float %298, %297 - %300 = bitcast i32 %52 to float - %301 = bitcast i32 %52 to float - %302 = fmul float %300, %301 - %303 = fadd float %302, 0.000000e+00 - %304 = bitcast i32 %32 to float - %305 = bitcast i32 %32 to float - %306 = fmul float %304, %305 - %307 = fadd float %303, %306 - %308 = call float @llvm.sqrt.f32(float %307) - %309 = fneg float %68 - %310 = fmul float %308, %309 - %311 = bitcast i32 %52 to float - %312 = fadd float %311, %310 - %313 = bitcast i32 %52 to float - %314 = bitcast i32 %52 to float - %315 = fmul float %313, %314 - %316 = fadd float %315, 0.000000e+00 - %317 = bitcast i32 %32 to float - %318 = bitcast i32 %32 to float - %319 = fmul float %317, %318 - %320 = fadd float %316, %319 - %321 = call float @llvm.sqrt.f32(float %320) - %322 = fneg float %68 - %323 = fmul float %321, %322 - %324 = bitcast i32 %52 to float - %325 = fadd float %324, %323 - %326 = fmul float %312, %325 - %327 = fadd float %326, 0.000000e+00 - %328 = bitcast i32 %52 to float - %329 = bitcast i32 %52 to float - %330 = fmul float %328, %329 - %331 = fadd float %330, 0.000000e+00 - %332 = bitcast i32 %32 to float - %333 = bitcast i32 %32 to float - %334 = fmul float %332, %333 - %335 = fadd float %331, %334 - %336 = call float @llvm.sqrt.f32(float %335) - %337 = fneg float %68 - %338 = fmul float %336, %337 - %339 = fmul float %338, 0.000000e+00 - %340 = bitcast i32 %32 to float - %341 = fadd float %340, %339 - %342 = bitcast i32 %52 to float - %343 = bitcast i32 %52 to float - %344 = fmul float %342, %343 - %345 = fadd float %344, 0.000000e+00 - %346 = bitcast i32 %32 to float - %347 = bitcast i32 %32 to float - %348 = fmul float %346, %347 - %349 = fadd float %345, %348 - %350 = call float @llvm.sqrt.f32(float %349) - %351 = fneg float %68 - %352 = fmul float %350, %351 - %353 = fmul float %352, 0.000000e+00 - %354 = bitcast i32 %32 to float - %355 = fadd float %354, %353 - %356 = fmul float %341, %355 - %357 = fadd float %327, %356 - %358 = call float @llvm.sqrt.f32(float %357) - %359 = fadd float %358, 0.000000e+00 - %360 = fdiv float %299, %359 - %361 = fmul float %285, %360 - %362 = insertelement <4 x float> %210, float %361, i32 2 - %363 = bitcast i32 %52 to float - %364 = bitcast i32 %52 to float - %365 = fmul float %363, %364 - %366 = fadd float %365, 0.000000e+00 - %367 = bitcast i32 %32 to float - %368 = bitcast i32 %32 to float - %369 = fmul float %367, %368 - %370 = fadd float %366, %369 - %371 = call float @llvm.sqrt.f32(float %370) - %372 = fneg float %68 - %373 = fmul float %371, %372 - %374 = fmul float %373, 0.000000e+00 - %375 = bitcast i32 %32 to float - %376 = fadd float %375, %374 - %377 = bitcast i32 %52 to float - %378 = bitcast i32 %52 to float - %379 = fmul float %377, %378 - %380 = fadd float %379, 0.000000e+00 - %381 = bitcast i32 %32 to float - %382 = bitcast i32 %32 to float - %383 = fmul float %381, %382 - %384 = fadd float %380, %383 - %385 = call float @llvm.sqrt.f32(float %384) - %386 = fneg float %68 - %387 = fmul float %385, %386 - %388 = bitcast i32 %52 to float - %389 = fadd float %388, %387 - %390 = bitcast i32 %52 to float - %391 = bitcast i32 %52 to float - %392 = fmul float %390, %391 - %393 = fadd float %392, 0.000000e+00 - %394 = bitcast i32 %32 to float - %395 = bitcast i32 %32 to float - %396 = fmul float %394, %395 - %397 = fadd float %393, %396 - %398 = call float @llvm.sqrt.f32(float %397) - %399 = fneg float %68 - %400 = fmul float %398, %399 - %401 = bitcast i32 %52 to float - %402 = fadd float %401, %400 - %403 = fmul float %389, %402 - %404 = fadd float %403, 0.000000e+00 - %405 = bitcast i32 %52 to float - %406 = bitcast i32 %52 to float - %407 = fmul float %405, %406 - %408 = fadd float %407, 0.000000e+00 - %409 = bitcast i32 %32 to float - %410 = bitcast i32 %32 to float - %411 = fmul float %409, %410 - %412 = fadd float %408, %411 - %413 = call float @llvm.sqrt.f32(float %412) - %414 = fneg float %68 - %415 = fmul float %413, %414 - %416 = fmul float %415, 0.000000e+00 - %417 = bitcast i32 %32 to float - %418 = fadd float %417, %416 - %419 = bitcast i32 %52 to float - %420 = bitcast i32 %52 to float - %421 = fmul float %419, %420 - %422 = fadd float %421, 0.000000e+00 - %423 = bitcast i32 %32 to float - %424 = bitcast i32 %32 to float - %425 = fmul float %423, %424 - %426 = fadd float %422, %425 - %427 = call float @llvm.sqrt.f32(float %426) - %428 = fneg float %68 - %429 = fmul float %427, %428 - %430 = fmul float %429, 0.000000e+00 - %431 = bitcast i32 %32 to float - %432 = fadd float %431, %430 - %433 = fmul float %418, %432 - %434 = fadd float %404, %433 - %435 = call float @llvm.sqrt.f32(float %434) - %436 = fadd float %435, 0.000000e+00 - %437 = fdiv float %376, %436 - %438 = fmul float %437, 2.000000e+00 - %439 = bitcast i32 %52 to float - %440 = bitcast i32 %52 to float - %441 = fmul float %439, %440 - %442 = fadd float %441, 0.000000e+00 - %443 = bitcast i32 %32 to float - %444 = bitcast i32 %32 to float - %445 = fmul float %443, %444 - %446 = fadd float %442, %445 - %447 = call float @llvm.sqrt.f32(float %446) - %448 = fneg float %68 - %449 = fmul float %447, %448 - %450 = bitcast i32 %52 to float - %451 = fadd float %450, %449 - %452 = bitcast i32 %52 to float - %453 = bitcast i32 %52 to float - %454 = fmul float %452, %453 - %455 = fadd float %454, 0.000000e+00 - %456 = bitcast i32 %32 to float - %457 = bitcast i32 %32 to float - %458 = fmul float %456, %457 - %459 = fadd float %455, %458 - %460 = call float @llvm.sqrt.f32(float %459) - %461 = fneg float %68 - %462 = fmul float %460, %461 - %463 = bitcast i32 %52 to float - %464 = fadd float %463, %462 - %465 = bitcast i32 %52 to float - %466 = bitcast i32 %52 to float - %467 = fmul float %465, %466 - %468 = fadd float %467, 0.000000e+00 - %469 = bitcast i32 %32 to float - %470 = bitcast i32 %32 to float - %471 = fmul float %469, %470 - %472 = fadd float %468, %471 - %473 = call float @llvm.sqrt.f32(float %472) - %474 = fneg float %68 - %475 = fmul float %473, %474 - %476 = bitcast i32 %52 to float - %477 = fadd float %476, %475 - %478 = fmul float %464, %477 - %479 = fadd float %478, 0.000000e+00 - %480 = bitcast i32 %52 to float - %481 = bitcast i32 %52 to float - %482 = fmul float %480, %481 - %483 = fadd float %482, 0.000000e+00 - %484 = bitcast i32 %32 to float - %485 = bitcast i32 %32 to float - %486 = fmul float %484, %485 - %487 = fadd float %483, %486 - %488 = call float @llvm.sqrt.f32(float %487) - %489 = fneg float %68 - %490 = fmul float %488, %489 - %491 = fmul float %490, 0.000000e+00 - %492 = bitcast i32 %32 to float - %493 = fadd float %492, %491 - %494 = bitcast i32 %52 to float - %495 = bitcast i32 %52 to float - %496 = fmul float %494, %495 - %497 = fadd float %496, 0.000000e+00 - %498 = bitcast i32 %32 to float - %499 = bitcast i32 %32 to float - %500 = fmul float %498, %499 - %501 = fadd float %497, %500 - %502 = call float @llvm.sqrt.f32(float %501) - %503 = fneg float %68 - %504 = fmul float %502, %503 - %505 = fmul float %504, 0.000000e+00 - %506 = bitcast i32 %32 to float - %507 = fadd float %506, %505 - %508 = fmul float %493, %507 - %509 = fadd float %479, %508 - %510 = call float @llvm.sqrt.f32(float %509) - %511 = fadd float %510, 0.000000e+00 - %512 = fdiv float %451, %511 - %513 = fmul float %438, %512 - %514 = insertelement <4 x float> %362, float %513, i32 3 - %515 = fsub <4 x float> , %514 - %516 = bitcast i32 %52 to float - %517 = bitcast i32 %52 to float - %518 = fmul float %516, %517 - %519 = fadd float %518, 0.000000e+00 - %520 = bitcast i32 %32 to float - %521 = bitcast i32 %32 to float - %522 = fmul float %520, %521 - %523 = fadd float %519, %522 - %524 = call float @llvm.sqrt.f32(float %523) - %525 = fneg float %68 - %526 = fmul float %524, %525 - %527 = fmul float %526, 0.000000e+00 - %528 = bitcast i32 %32 to float - %529 = fadd float %528, %527 - %530 = bitcast i32 %52 to float - %531 = bitcast i32 %52 to float - %532 = fmul float %530, %531 - %533 = fadd float %532, 0.000000e+00 - %534 = bitcast i32 %32 to float - %535 = bitcast i32 %32 to float - %536 = fmul float %534, %535 - %537 = fadd float %533, %536 - %538 = call float @llvm.sqrt.f32(float %537) - %539 = fneg float %68 - %540 = fmul float %538, %539 - %541 = bitcast i32 %52 to float - %542 = fadd float %541, %540 - %543 = bitcast i32 %52 to float - %544 = bitcast i32 %52 to float - %545 = fmul float %543, %544 - %546 = fadd float %545, 0.000000e+00 - %547 = bitcast i32 %32 to float - %548 = bitcast i32 %32 to float - %549 = fmul float %547, %548 - %550 = fadd float %546, %549 - %551 = call float @llvm.sqrt.f32(float %550) - %552 = fneg float %68 - %553 = fmul float %551, %552 - %554 = bitcast i32 %52 to float - %555 = fadd float %554, %553 - %556 = fmul float %542, %555 - %557 = fadd float %556, 0.000000e+00 - %558 = bitcast i32 %52 to float - %559 = bitcast i32 %52 to float - %560 = fmul float %558, %559 - %561 = fadd float %560, 0.000000e+00 - %562 = bitcast i32 %32 to float - %563 = bitcast i32 %32 to float - %564 = fmul float %562, %563 - %565 = fadd float %561, %564 - %566 = call float @llvm.sqrt.f32(float %565) - %567 = fneg float %68 - %568 = fmul float %566, %567 - %569 = fmul float %568, 0.000000e+00 - %570 = bitcast i32 %32 to float - %571 = fadd float %570, %569 - %572 = bitcast i32 %52 to float - %573 = bitcast i32 %52 to float - %574 = fmul float %572, %573 - %575 = fadd float %574, 0.000000e+00 - %576 = bitcast i32 %32 to float - %577 = bitcast i32 %32 to float - %578 = fmul float %576, %577 - %579 = fadd float %575, %578 - %580 = call float @llvm.sqrt.f32(float %579) - %581 = fneg float %68 - %582 = fmul float %580, %581 - %583 = fmul float %582, 0.000000e+00 - %584 = bitcast i32 %32 to float - %585 = fadd float %584, %583 - %586 = fmul float %571, %585 - %587 = fadd float %557, %586 - %588 = call float @llvm.sqrt.f32(float %587) - %589 = fadd float %588, 0.000000e+00 - %590 = fdiv float %529, %589 - %591 = fmul float %590, 2.000000e+00 - %592 = bitcast i32 %52 to float - %593 = bitcast i32 %52 to float - %594 = fmul float %592, %593 - %595 = fadd float %594, 0.000000e+00 - %596 = bitcast i32 %32 to float - %597 = bitcast i32 %32 to float - %598 = fmul float %596, %597 - %599 = fadd float %595, %598 - %600 = call float @llvm.sqrt.f32(float %599) - %601 = fneg float %68 - %602 = fmul float %600, %601 - %603 = fmul float %602, 0.000000e+00 - %604 = bitcast i32 %32 to float - %605 = fadd float %604, %603 - %606 = bitcast i32 %52 to float - %607 = bitcast i32 %52 to float - %608 = fmul float %606, %607 - %609 = fadd float %608, 0.000000e+00 - %610 = bitcast i32 %32 to float - %611 = bitcast i32 %32 to float - %612 = fmul float %610, %611 - %613 = fadd float %609, %612 - %614 = call float @llvm.sqrt.f32(float %613) - %615 = fneg float %68 - %616 = fmul float %614, %615 - %617 = bitcast i32 %52 to float - %618 = fadd float %617, %616 - %619 = bitcast i32 %52 to float - %620 = bitcast i32 %52 to float - %621 = fmul float %619, %620 - %622 = fadd float %621, 0.000000e+00 - %623 = bitcast i32 %32 to float - %624 = bitcast i32 %32 to float - %625 = fmul float %623, %624 - %626 = fadd float %622, %625 - %627 = call float @llvm.sqrt.f32(float %626) - %628 = fneg float %68 - %629 = fmul float %627, %628 - %630 = bitcast i32 %52 to float - %631 = fadd float %630, %629 - %632 = fmul float %618, %631 - %633 = fadd float %632, 0.000000e+00 - %634 = bitcast i32 %52 to float - %635 = bitcast i32 %52 to float - %636 = fmul float %634, %635 - %637 = fadd float %636, 0.000000e+00 - %638 = bitcast i32 %32 to float - %639 = bitcast i32 %32 to float - %640 = fmul float %638, %639 - %641 = fadd float %637, %640 - %642 = call float @llvm.sqrt.f32(float %641) - %643 = fneg float %68 - %644 = fmul float %642, %643 - %645 = fmul float %644, 0.000000e+00 - %646 = bitcast i32 %32 to float - %647 = fadd float %646, %645 - %648 = bitcast i32 %52 to float - %649 = bitcast i32 %52 to float - %650 = fmul float %648, %649 - %651 = fadd float %650, 0.000000e+00 - %652 = bitcast i32 %32 to float - %653 = bitcast i32 %32 to float - %654 = fmul float %652, %653 - %655 = fadd float %651, %654 - %656 = call float @llvm.sqrt.f32(float %655) - %657 = fneg float %68 - %658 = fmul float %656, %657 - %659 = fmul float %658, 0.000000e+00 - %660 = bitcast i32 %32 to float - %661 = fadd float %660, %659 - %662 = fmul float %647, %661 - %663 = fadd float %633, %662 - %664 = call float @llvm.sqrt.f32(float %663) - %665 = fadd float %664, 0.000000e+00 - %666 = fdiv float %605, %665 - %667 = fmul float %591, %666 - %668 = fsub float 1.000000e+00, %667 - %669 = insertelement <4 x float> zeroinitializer, float %668, i32 0 - %670 = insertelement <4 x float> %669, float 0.000000e+00, i32 1 - %671 = insertelement <4 x float> %670, float 0.000000e+00, i32 2 - %672 = insertelement <4 x float> %671, float 0.000000e+00, i32 3 - %673 = shufflevector <4 x float> %515, <4 x float> %672, <8 x i32> - %674 = extractelement <8 x float> %673, i32 0 - %675 = getelementptr float, float* %2, i32 0 - %676 = getelementptr inbounds float, float* %675, i64 3 - %677 = bitcast float* %676 to i32* - %678 = bitcast i32* %677 to float* - store float %674, float* %678, align 4 - %679 = bitcast float* %1 to i8* - %680 = alloca [4 x float], align 16 - %681 = bitcast [4 x float]* %680 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %679, i8* nonnull align 16 dereferenceable(16) %681, i64 16, i1 false) - store float 0.000000e+00, float* %2, align 4 - %682 = bitcast i32 %52 to float - %683 = bitcast i32 %52 to float - %684 = fmul float %682, %683 - %685 = fadd float %684, 0.000000e+00 - %686 = load i32, i32* %31, align 4 - %687 = bitcast i32 %686 to float - %688 = bitcast i32 %686 to float - %689 = fmul float %687, %688 - %690 = fadd float %685, %689 - %691 = call float @llvm.sqrt.f32(float %690) - %692 = sitofp i32 %67 to float - %693 = fneg float %692 - %694 = fmul float %691, %693 - %695 = bitcast i32 %52 to float - %696 = fadd float %695, %694 - %697 = bitcast i32 %52 to float - %698 = bitcast i32 %52 to float - %699 = fmul float %697, %698 - %700 = fadd float %699, 0.000000e+00 - %701 = bitcast i32 %686 to float - %702 = bitcast i32 %686 to float - %703 = fmul float %701, %702 - %704 = fadd float %700, %703 - %705 = call float @llvm.sqrt.f32(float %704) - %706 = fneg float %692 - %707 = fmul float %705, %706 - %708 = bitcast i32 %52 to float - %709 = fadd float %708, %707 - %710 = bitcast i32 %52 to float - %711 = bitcast i32 %52 to float - %712 = fmul float %710, %711 - %713 = fadd float %712, 0.000000e+00 - %714 = bitcast i32 %686 to float - %715 = bitcast i32 %686 to float - %716 = fmul float %714, %715 - %717 = fadd float %713, %716 - %718 = call float @llvm.sqrt.f32(float %717) - %719 = fneg float %692 - %720 = fmul float %718, %719 - %721 = bitcast i32 %52 to float - %722 = fadd float %721, %720 - %723 = fmul float %709, %722 - %724 = fadd float %723, 0.000000e+00 - %725 = bitcast i32 %52 to float - %726 = bitcast i32 %52 to float - %727 = fmul float %725, %726 - %728 = fadd float %727, 0.000000e+00 - %729 = bitcast i32 %686 to float - %730 = bitcast i32 %686 to float - %731 = fmul float %729, %730 - %732 = fadd float %728, %731 - %733 = call float @llvm.sqrt.f32(float %732) - %734 = fneg float %692 - %735 = fmul float %733, %734 - %736 = fmul float %735, 0.000000e+00 - %737 = bitcast i32 %686 to float - %738 = fadd float %737, %736 - %739 = bitcast i32 %52 to float - %740 = bitcast i32 %52 to float - %741 = fmul float %739, %740 - %742 = fadd float %741, 0.000000e+00 - %743 = bitcast i32 %686 to float - %744 = bitcast i32 %686 to float - %745 = fmul float %743, %744 - %746 = fadd float %742, %745 - %747 = call float @llvm.sqrt.f32(float %746) - %748 = fneg float %692 - %749 = fmul float %747, %748 - %750 = fmul float %749, 0.000000e+00 - %751 = bitcast i32 %686 to float - %752 = fadd float %751, %750 - %753 = fmul float %738, %752 - %754 = fadd float %724, %753 - %755 = call float @llvm.sqrt.f32(float %754) - %756 = fadd float %755, 0.000000e+00 - %757 = fdiv float %696, %756 - %758 = fmul float %757, 2.000000e+00 - %759 = bitcast i32 %52 to float - %760 = bitcast i32 %52 to float - %761 = fmul float %759, %760 - %762 = fadd float %761, 0.000000e+00 - %763 = bitcast i32 %686 to float - %764 = bitcast i32 %686 to float - %765 = fmul float %763, %764 - %766 = fadd float %762, %765 - %767 = call float @llvm.sqrt.f32(float %766) - %768 = fneg float %692 - %769 = fmul float %767, %768 - %770 = bitcast i32 %52 to float - %771 = fadd float %770, %769 - %772 = bitcast i32 %52 to float - %773 = bitcast i32 %52 to float - %774 = fmul float %772, %773 - %775 = fadd float %774, 0.000000e+00 - %776 = bitcast i32 %686 to float - %777 = bitcast i32 %686 to float - %778 = fmul float %776, %777 - %779 = fadd float %775, %778 - %780 = call float @llvm.sqrt.f32(float %779) - %781 = fneg float %692 - %782 = fmul float %780, %781 - %783 = bitcast i32 %52 to float - %784 = fadd float %783, %782 - %785 = bitcast i32 %52 to float - %786 = bitcast i32 %52 to float - %787 = fmul float %785, %786 - %788 = fadd float %787, 0.000000e+00 - %789 = bitcast i32 %686 to float - %790 = bitcast i32 %686 to float - %791 = fmul float %789, %790 - %792 = fadd float %788, %791 - %793 = call float @llvm.sqrt.f32(float %792) - %794 = fneg float %692 - %795 = fmul float %793, %794 - %796 = bitcast i32 %52 to float - %797 = fadd float %796, %795 - %798 = fmul float %784, %797 - %799 = fadd float %798, 0.000000e+00 - %800 = bitcast i32 %52 to float - %801 = bitcast i32 %52 to float - %802 = fmul float %800, %801 - %803 = fadd float %802, 0.000000e+00 - %804 = bitcast i32 %686 to float - %805 = bitcast i32 %686 to float - %806 = fmul float %804, %805 - %807 = fadd float %803, %806 - %808 = call float @llvm.sqrt.f32(float %807) - %809 = fneg float %692 - %810 = fmul float %808, %809 - %811 = fmul float %810, 0.000000e+00 - %812 = bitcast i32 %686 to float - %813 = fadd float %812, %811 - %814 = bitcast i32 %52 to float - %815 = bitcast i32 %52 to float - %816 = fmul float %814, %815 - %817 = fadd float %816, 0.000000e+00 - %818 = bitcast i32 %686 to float - %819 = bitcast i32 %686 to float - %820 = fmul float %818, %819 - %821 = fadd float %817, %820 - %822 = call float @llvm.sqrt.f32(float %821) - %823 = fneg float %692 - %824 = fmul float %822, %823 - %825 = fmul float %824, 0.000000e+00 - %826 = bitcast i32 %686 to float - %827 = fadd float %826, %825 - %828 = fmul float %813, %827 - %829 = fadd float %799, %828 - %830 = call float @llvm.sqrt.f32(float %829) - %831 = fadd float %830, 0.000000e+00 - %832 = fdiv float %771, %831 - %833 = fmul float %758, %832 - %834 = fsub float 1.000000e+00, %833 - %835 = insertelement <4 x float> zeroinitializer, float %834, i32 0 - %836 = insertelement <4 x float> %835, float 0.000000e+00, i32 1 - %837 = insertelement <4 x float> %836, float 0.000000e+00, i32 2 - %838 = insertelement <4 x float> %837, float 0.000000e+00, i32 3 - %839 = getelementptr float, float* %0, i32 0 - %840 = load float, float* %839, align 4 - %841 = insertelement <4 x float> zeroinitializer, float %840, i32 0 - %842 = insertelement <4 x float> %841, float 0.000000e+00, i32 1 - %843 = insertelement <4 x float> %842, float 0.000000e+00, i32 2 - %844 = insertelement <4 x float> %843, float 0.000000e+00, i32 3 - %845 = call <4 x float> @llvm.fma.v4f32(<4 x float> %838, <4 x float> %844, <4 x float> zeroinitializer) - %846 = extractelement <4 x float> %845, i32 0 - store float %846, float* %2, align 4 - %847 = bitcast i32 %52 to float - %848 = bitcast i32 %52 to float - %849 = fmul float %847, %848 - %850 = fadd float %849, 0.000000e+00 - %851 = bitcast i32 %686 to float - %852 = bitcast i32 %686 to float - %853 = fmul float %851, %852 - %854 = fadd float %850, %853 - %855 = call float @llvm.sqrt.f32(float %854) - %856 = fneg float %692 - %857 = fmul float %855, %856 - %858 = bitcast i32 %52 to float - %859 = fadd float %858, %857 - %860 = bitcast i32 %52 to float - %861 = bitcast i32 %52 to float - %862 = fmul float %860, %861 - %863 = fadd float %862, 0.000000e+00 - %864 = bitcast i32 %686 to float - %865 = bitcast i32 %686 to float - %866 = fmul float %864, %865 - %867 = fadd float %863, %866 - %868 = call float @llvm.sqrt.f32(float %867) - %869 = fneg float %692 - %870 = fmul float %868, %869 - %871 = bitcast i32 %52 to float - %872 = fadd float %871, %870 - %873 = bitcast i32 %52 to float - %874 = bitcast i32 %52 to float - %875 = fmul float %873, %874 - %876 = fadd float %875, 0.000000e+00 - %877 = bitcast i32 %686 to float - %878 = bitcast i32 %686 to float - %879 = fmul float %877, %878 - %880 = fadd float %876, %879 - %881 = call float @llvm.sqrt.f32(float %880) - %882 = fneg float %692 - %883 = fmul float %881, %882 - %884 = bitcast i32 %52 to float - %885 = fadd float %884, %883 - %886 = fmul float %872, %885 - %887 = fadd float %886, 0.000000e+00 - %888 = bitcast i32 %52 to float - %889 = bitcast i32 %52 to float - %890 = fmul float %888, %889 - %891 = fadd float %890, 0.000000e+00 - %892 = bitcast i32 %686 to float - %893 = bitcast i32 %686 to float - %894 = fmul float %892, %893 - %895 = fadd float %891, %894 - %896 = call float @llvm.sqrt.f32(float %895) - %897 = fneg float %692 - %898 = fmul float %896, %897 - %899 = fmul float %898, 0.000000e+00 - %900 = bitcast i32 %686 to float - %901 = fadd float %900, %899 - %902 = bitcast i32 %52 to float - %903 = bitcast i32 %52 to float - %904 = fmul float %902, %903 - %905 = fadd float %904, 0.000000e+00 - %906 = bitcast i32 %686 to float - %907 = bitcast i32 %686 to float - %908 = fmul float %906, %907 - %909 = fadd float %905, %908 - %910 = call float @llvm.sqrt.f32(float %909) - %911 = fneg float %692 - %912 = fmul float %910, %911 - %913 = fmul float %912, 0.000000e+00 - %914 = bitcast i32 %686 to float - %915 = fadd float %914, %913 - %916 = fmul float %901, %915 - %917 = fadd float %887, %916 - %918 = call float @llvm.sqrt.f32(float %917) - %919 = fadd float %918, 0.000000e+00 - %920 = fdiv float %859, %919 - %921 = fmul float %920, 2.000000e+00 - %922 = bitcast i32 %52 to float - %923 = bitcast i32 %52 to float - %924 = fmul float %922, %923 - %925 = fadd float %924, 0.000000e+00 - %926 = bitcast i32 %686 to float - %927 = bitcast i32 %686 to float - %928 = fmul float %926, %927 - %929 = fadd float %925, %928 - %930 = call float @llvm.sqrt.f32(float %929) - %931 = fneg float %692 - %932 = fmul float %930, %931 - %933 = bitcast i32 %52 to float - %934 = fadd float %933, %932 - %935 = bitcast i32 %52 to float - %936 = bitcast i32 %52 to float - %937 = fmul float %935, %936 - %938 = fadd float %937, 0.000000e+00 - %939 = bitcast i32 %686 to float - %940 = bitcast i32 %686 to float - %941 = fmul float %939, %940 - %942 = fadd float %938, %941 - %943 = call float @llvm.sqrt.f32(float %942) - %944 = fneg float %692 - %945 = fmul float %943, %944 - %946 = bitcast i32 %52 to float - %947 = fadd float %946, %945 - %948 = bitcast i32 %52 to float - %949 = bitcast i32 %52 to float - %950 = fmul float %948, %949 - %951 = fadd float %950, 0.000000e+00 - %952 = bitcast i32 %686 to float - %953 = bitcast i32 %686 to float - %954 = fmul float %952, %953 - %955 = fadd float %951, %954 - %956 = call float @llvm.sqrt.f32(float %955) - %957 = fneg float %692 - %958 = fmul float %956, %957 - %959 = bitcast i32 %52 to float - %960 = fadd float %959, %958 - %961 = fmul float %947, %960 - %962 = fadd float %961, 0.000000e+00 - %963 = bitcast i32 %52 to float - %964 = bitcast i32 %52 to float - %965 = fmul float %963, %964 - %966 = fadd float %965, 0.000000e+00 - %967 = bitcast i32 %686 to float - %968 = bitcast i32 %686 to float - %969 = fmul float %967, %968 - %970 = fadd float %966, %969 - %971 = call float @llvm.sqrt.f32(float %970) - %972 = fneg float %692 - %973 = fmul float %971, %972 - %974 = fmul float %973, 0.000000e+00 - %975 = bitcast i32 %686 to float - %976 = fadd float %975, %974 - %977 = bitcast i32 %52 to float - %978 = bitcast i32 %52 to float - %979 = fmul float %977, %978 - %980 = fadd float %979, 0.000000e+00 - %981 = bitcast i32 %686 to float - %982 = bitcast i32 %686 to float - %983 = fmul float %981, %982 - %984 = fadd float %980, %983 - %985 = call float @llvm.sqrt.f32(float %984) - %986 = fneg float %692 - %987 = fmul float %985, %986 - %988 = fmul float %987, 0.000000e+00 - %989 = bitcast i32 %686 to float - %990 = fadd float %989, %988 - %991 = fmul float %976, %990 - %992 = fadd float %962, %991 - %993 = call float @llvm.sqrt.f32(float %992) - %994 = fadd float %993, 0.000000e+00 - %995 = fdiv float %934, %994 - %996 = fmul float %921, %995 - %997 = fsub float 1.000000e+00, %996 - %998 = fmul float %997, %840 - %999 = fadd float %998, 0.000000e+00 - %1000 = bitcast i32 %52 to float - %1001 = bitcast i32 %52 to float - %1002 = fmul float %1000, %1001 - %1003 = fadd float %1002, 0.000000e+00 - %1004 = bitcast i32 %686 to float - %1005 = bitcast i32 %686 to float - %1006 = fmul float %1004, %1005 - %1007 = fadd float %1003, %1006 - %1008 = call float @llvm.sqrt.f32(float %1007) - %1009 = fneg float %692 - %1010 = fmul float %1008, %1009 - %1011 = bitcast i32 %52 to float - %1012 = fadd float %1011, %1010 - %1013 = bitcast i32 %52 to float - %1014 = bitcast i32 %52 to float - %1015 = fmul float %1013, %1014 - %1016 = fadd float %1015, 0.000000e+00 - %1017 = bitcast i32 %686 to float - %1018 = bitcast i32 %686 to float - %1019 = fmul float %1017, %1018 - %1020 = fadd float %1016, %1019 - %1021 = call float @llvm.sqrt.f32(float %1020) - %1022 = fneg float %692 - %1023 = fmul float %1021, %1022 - %1024 = bitcast i32 %52 to float - %1025 = fadd float %1024, %1023 - %1026 = bitcast i32 %52 to float - %1027 = bitcast i32 %52 to float - %1028 = fmul float %1026, %1027 - %1029 = fadd float %1028, 0.000000e+00 - %1030 = bitcast i32 %686 to float - %1031 = bitcast i32 %686 to float - %1032 = fmul float %1030, %1031 - %1033 = fadd float %1029, %1032 - %1034 = call float @llvm.sqrt.f32(float %1033) - %1035 = fneg float %692 - %1036 = fmul float %1034, %1035 - %1037 = bitcast i32 %52 to float - %1038 = fadd float %1037, %1036 - %1039 = fmul float %1025, %1038 - %1040 = fadd float %1039, 0.000000e+00 - %1041 = bitcast i32 %52 to float - %1042 = bitcast i32 %52 to float - %1043 = fmul float %1041, %1042 - %1044 = fadd float %1043, 0.000000e+00 - %1045 = bitcast i32 %686 to float - %1046 = bitcast i32 %686 to float - %1047 = fmul float %1045, %1046 - %1048 = fadd float %1044, %1047 - %1049 = call float @llvm.sqrt.f32(float %1048) - %1050 = fneg float %692 - %1051 = fmul float %1049, %1050 - %1052 = fmul float %1051, 0.000000e+00 - %1053 = bitcast i32 %686 to float - %1054 = fadd float %1053, %1052 - %1055 = bitcast i32 %52 to float - %1056 = bitcast i32 %52 to float - %1057 = fmul float %1055, %1056 - %1058 = fadd float %1057, 0.000000e+00 - %1059 = bitcast i32 %686 to float - %1060 = bitcast i32 %686 to float - %1061 = fmul float %1059, %1060 - %1062 = fadd float %1058, %1061 - %1063 = call float @llvm.sqrt.f32(float %1062) - %1064 = fneg float %692 - %1065 = fmul float %1063, %1064 - %1066 = fmul float %1065, 0.000000e+00 - %1067 = bitcast i32 %686 to float - %1068 = fadd float %1067, %1066 - %1069 = fmul float %1054, %1068 - %1070 = fadd float %1040, %1069 - %1071 = call float @llvm.sqrt.f32(float %1070) - %1072 = fadd float %1071, 0.000000e+00 - %1073 = fdiv float %1012, %1072 - %1074 = fmul float %1073, 2.000000e+00 - %1075 = bitcast i32 %52 to float - %1076 = bitcast i32 %52 to float - %1077 = fmul float %1075, %1076 - %1078 = fadd float %1077, 0.000000e+00 - %1079 = bitcast i32 %686 to float - %1080 = bitcast i32 %686 to float - %1081 = fmul float %1079, %1080 - %1082 = fadd float %1078, %1081 - %1083 = call float @llvm.sqrt.f32(float %1082) - %1084 = fneg float %692 - %1085 = fmul float %1083, %1084 - %1086 = fmul float %1085, 0.000000e+00 - %1087 = bitcast i32 %686 to float - %1088 = fadd float %1087, %1086 - %1089 = bitcast i32 %52 to float - %1090 = bitcast i32 %52 to float - %1091 = fmul float %1089, %1090 - %1092 = fadd float %1091, 0.000000e+00 - %1093 = bitcast i32 %686 to float - %1094 = bitcast i32 %686 to float - %1095 = fmul float %1093, %1094 - %1096 = fadd float %1092, %1095 - %1097 = call float @llvm.sqrt.f32(float %1096) - %1098 = fneg float %692 - %1099 = fmul float %1097, %1098 - %1100 = bitcast i32 %52 to float - %1101 = fadd float %1100, %1099 - %1102 = bitcast i32 %52 to float - %1103 = bitcast i32 %52 to float - %1104 = fmul float %1102, %1103 - %1105 = fadd float %1104, 0.000000e+00 - %1106 = bitcast i32 %686 to float - %1107 = bitcast i32 %686 to float - %1108 = fmul float %1106, %1107 - %1109 = fadd float %1105, %1108 - %1110 = call float @llvm.sqrt.f32(float %1109) - %1111 = fneg float %692 - %1112 = fmul float %1110, %1111 - %1113 = bitcast i32 %52 to float - %1114 = fadd float %1113, %1112 - %1115 = fmul float %1101, %1114 - %1116 = fadd float %1115, 0.000000e+00 - %1117 = bitcast i32 %52 to float - %1118 = bitcast i32 %52 to float - %1119 = fmul float %1117, %1118 - %1120 = fadd float %1119, 0.000000e+00 - %1121 = bitcast i32 %686 to float - %1122 = bitcast i32 %686 to float - %1123 = fmul float %1121, %1122 - %1124 = fadd float %1120, %1123 - %1125 = call float @llvm.sqrt.f32(float %1124) - %1126 = fneg float %692 - %1127 = fmul float %1125, %1126 - %1128 = fmul float %1127, 0.000000e+00 - %1129 = bitcast i32 %686 to float - %1130 = fadd float %1129, %1128 - %1131 = bitcast i32 %52 to float - %1132 = bitcast i32 %52 to float - %1133 = fmul float %1131, %1132 - %1134 = fadd float %1133, 0.000000e+00 - %1135 = bitcast i32 %686 to float - %1136 = bitcast i32 %686 to float - %1137 = fmul float %1135, %1136 - %1138 = fadd float %1134, %1137 - %1139 = call float @llvm.sqrt.f32(float %1138) - %1140 = fneg float %692 - %1141 = fmul float %1139, %1140 - %1142 = fmul float %1141, 0.000000e+00 - %1143 = bitcast i32 %686 to float - %1144 = fadd float %1143, %1142 - %1145 = fmul float %1130, %1144 - %1146 = fadd float %1116, %1145 - %1147 = call float @llvm.sqrt.f32(float %1146) - %1148 = fadd float %1147, 0.000000e+00 - %1149 = fdiv float %1088, %1148 - %1150 = fmul float %1074, %1149 - %1151 = fneg float %1150 - %1152 = getelementptr float, float* %0, i32 0 - %1153 = getelementptr inbounds float, float* %1152, i64 2 - %1154 = load float, float* %1153, align 4 - %1155 = fmul float %1151, %1154 - %1156 = fadd float %999, %1155 - %1157 = insertelement <4 x float> zeroinitializer, float %1156, i32 0 - %1158 = insertelement <4 x float> %1157, float 0.000000e+00, i32 1 - %1159 = insertelement <4 x float> %1158, float 0.000000e+00, i32 2 - %1160 = insertelement <4 x float> %1159, float 0.000000e+00, i32 3 - %1161 = extractelement <4 x float> %1160, i32 0 - store float %1161, float* %2, align 4 - %1162 = extractelement <4 x float> %1160, i32 1 - %1163 = getelementptr float, float* %2, i32 0 - %1164 = getelementptr inbounds float, float* %1163, i64 1 - store float %1162, float* %1164, align 4 - %1165 = bitcast i32 %52 to float - %1166 = bitcast i32 %52 to float - %1167 = fmul float %1165, %1166 - %1168 = fadd float %1167, 0.000000e+00 - %1169 = bitcast i32 %686 to float - %1170 = bitcast i32 %686 to float - %1171 = fmul float %1169, %1170 - %1172 = fadd float %1168, %1171 - %1173 = call float @llvm.sqrt.f32(float %1172) - %1174 = fneg float %692 - %1175 = fmul float %1173, %1174 - %1176 = bitcast i32 %52 to float - %1177 = fadd float %1176, %1175 - %1178 = bitcast i32 %52 to float - %1179 = bitcast i32 %52 to float - %1180 = fmul float %1178, %1179 - %1181 = fadd float %1180, 0.000000e+00 - %1182 = bitcast i32 %686 to float - %1183 = bitcast i32 %686 to float - %1184 = fmul float %1182, %1183 - %1185 = fadd float %1181, %1184 - %1186 = call float @llvm.sqrt.f32(float %1185) - %1187 = fneg float %692 - %1188 = fmul float %1186, %1187 - %1189 = bitcast i32 %52 to float - %1190 = fadd float %1189, %1188 - %1191 = bitcast i32 %52 to float - %1192 = bitcast i32 %52 to float - %1193 = fmul float %1191, %1192 - %1194 = fadd float %1193, 0.000000e+00 - %1195 = bitcast i32 %686 to float - %1196 = bitcast i32 %686 to float - %1197 = fmul float %1195, %1196 - %1198 = fadd float %1194, %1197 - %1199 = call float @llvm.sqrt.f32(float %1198) - %1200 = fneg float %692 - %1201 = fmul float %1199, %1200 - %1202 = bitcast i32 %52 to float - %1203 = fadd float %1202, %1201 - %1204 = fmul float %1190, %1203 - %1205 = fadd float %1204, 0.000000e+00 - %1206 = bitcast i32 %52 to float - %1207 = bitcast i32 %52 to float - %1208 = fmul float %1206, %1207 - %1209 = fadd float %1208, 0.000000e+00 - %1210 = bitcast i32 %686 to float - %1211 = bitcast i32 %686 to float - %1212 = fmul float %1210, %1211 - %1213 = fadd float %1209, %1212 - %1214 = call float @llvm.sqrt.f32(float %1213) - %1215 = fneg float %692 - %1216 = fmul float %1214, %1215 - %1217 = fmul float %1216, 0.000000e+00 - %1218 = bitcast i32 %686 to float - %1219 = fadd float %1218, %1217 - %1220 = bitcast i32 %52 to float - %1221 = bitcast i32 %52 to float - %1222 = fmul float %1220, %1221 - %1223 = fadd float %1222, 0.000000e+00 - %1224 = bitcast i32 %686 to float - %1225 = bitcast i32 %686 to float - %1226 = fmul float %1224, %1225 - %1227 = fadd float %1223, %1226 - %1228 = call float @llvm.sqrt.f32(float %1227) - %1229 = fneg float %692 - %1230 = fmul float %1228, %1229 - %1231 = fmul float %1230, 0.000000e+00 - %1232 = bitcast i32 %686 to float - %1233 = fadd float %1232, %1231 - %1234 = fmul float %1219, %1233 - %1235 = fadd float %1205, %1234 - %1236 = call float @llvm.sqrt.f32(float %1235) - %1237 = fadd float %1236, 0.000000e+00 - %1238 = fdiv float %1177, %1237 - %1239 = fmul float %1238, 2.000000e+00 - %1240 = bitcast i32 %52 to float - %1241 = bitcast i32 %52 to float - %1242 = fmul float %1240, %1241 - %1243 = fadd float %1242, 0.000000e+00 - %1244 = bitcast i32 %686 to float - %1245 = bitcast i32 %686 to float - %1246 = fmul float %1244, %1245 - %1247 = fadd float %1243, %1246 - %1248 = call float @llvm.sqrt.f32(float %1247) - %1249 = fneg float %692 - %1250 = fmul float %1248, %1249 - %1251 = bitcast i32 %52 to float - %1252 = fadd float %1251, %1250 - %1253 = bitcast i32 %52 to float - %1254 = bitcast i32 %52 to float - %1255 = fmul float %1253, %1254 - %1256 = fadd float %1255, 0.000000e+00 - %1257 = bitcast i32 %686 to float - %1258 = bitcast i32 %686 to float - %1259 = fmul float %1257, %1258 - %1260 = fadd float %1256, %1259 - %1261 = call float @llvm.sqrt.f32(float %1260) - %1262 = fneg float %692 - %1263 = fmul float %1261, %1262 - %1264 = bitcast i32 %52 to float - %1265 = fadd float %1264, %1263 - %1266 = bitcast i32 %52 to float - %1267 = bitcast i32 %52 to float - %1268 = fmul float %1266, %1267 - %1269 = fadd float %1268, 0.000000e+00 - %1270 = bitcast i32 %686 to float - %1271 = bitcast i32 %686 to float - %1272 = fmul float %1270, %1271 - %1273 = fadd float %1269, %1272 - %1274 = call float @llvm.sqrt.f32(float %1273) - %1275 = fneg float %692 - %1276 = fmul float %1274, %1275 - %1277 = bitcast i32 %52 to float - %1278 = fadd float %1277, %1276 - %1279 = fmul float %1265, %1278 - %1280 = fadd float %1279, 0.000000e+00 - %1281 = bitcast i32 %52 to float - %1282 = bitcast i32 %52 to float - %1283 = fmul float %1281, %1282 - %1284 = fadd float %1283, 0.000000e+00 - %1285 = bitcast i32 %686 to float - %1286 = bitcast i32 %686 to float - %1287 = fmul float %1285, %1286 - %1288 = fadd float %1284, %1287 - %1289 = call float @llvm.sqrt.f32(float %1288) - %1290 = fneg float %692 - %1291 = fmul float %1289, %1290 - %1292 = fmul float %1291, 0.000000e+00 - %1293 = bitcast i32 %686 to float - %1294 = fadd float %1293, %1292 - %1295 = bitcast i32 %52 to float - %1296 = bitcast i32 %52 to float - %1297 = fmul float %1295, %1296 - %1298 = fadd float %1297, 0.000000e+00 - %1299 = bitcast i32 %686 to float - %1300 = bitcast i32 %686 to float - %1301 = fmul float %1299, %1300 - %1302 = fadd float %1298, %1301 - %1303 = call float @llvm.sqrt.f32(float %1302) - %1304 = fneg float %692 - %1305 = fmul float %1303, %1304 - %1306 = fmul float %1305, 0.000000e+00 - %1307 = bitcast i32 %686 to float - %1308 = fadd float %1307, %1306 - %1309 = fmul float %1294, %1308 - %1310 = fadd float %1280, %1309 - %1311 = call float @llvm.sqrt.f32(float %1310) - %1312 = fadd float %1311, 0.000000e+00 - %1313 = fdiv float %1252, %1312 - %1314 = fmul float %1239, %1313 - %1315 = fsub float 1.000000e+00, %1314 - %1316 = insertelement <4 x float> zeroinitializer, float %1315, i32 0 - %1317 = insertelement <4 x float> %1316, float 0.000000e+00, i32 1 - %1318 = insertelement <4 x float> %1317, float 0.000000e+00, i32 2 - %1319 = insertelement <4 x float> %1318, float 0.000000e+00, i32 3 - %1320 = getelementptr float, float* %0, i32 0 - %1321 = getelementptr inbounds float, float* %1320, i64 1 - %1322 = load float, float* %1321, align 4 - %1323 = insertelement <4 x float> zeroinitializer, float %1322, i32 0 - %1324 = insertelement <4 x float> %1323, float 0.000000e+00, i32 1 - %1325 = insertelement <4 x float> %1324, float 0.000000e+00, i32 2 - %1326 = insertelement <4 x float> %1325, float 0.000000e+00, i32 3 - %1327 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1319, <4 x float> %1326, <4 x float> zeroinitializer) - %1328 = extractelement <4 x float> %1327, i32 0 - store float %1328, float* %1164, align 4 - %1329 = bitcast i32 %52 to float - %1330 = bitcast i32 %52 to float - %1331 = fmul float %1329, %1330 - %1332 = fadd float %1331, 0.000000e+00 - %1333 = bitcast i32 %686 to float - %1334 = bitcast i32 %686 to float - %1335 = fmul float %1333, %1334 - %1336 = fadd float %1332, %1335 - %1337 = call float @llvm.sqrt.f32(float %1336) - %1338 = fneg float %692 - %1339 = fmul float %1337, %1338 - %1340 = bitcast i32 %52 to float - %1341 = fadd float %1340, %1339 - %1342 = bitcast i32 %52 to float - %1343 = bitcast i32 %52 to float - %1344 = fmul float %1342, %1343 - %1345 = fadd float %1344, 0.000000e+00 - %1346 = bitcast i32 %686 to float - %1347 = bitcast i32 %686 to float - %1348 = fmul float %1346, %1347 - %1349 = fadd float %1345, %1348 - %1350 = call float @llvm.sqrt.f32(float %1349) - %1351 = fneg float %692 - %1352 = fmul float %1350, %1351 - %1353 = bitcast i32 %52 to float - %1354 = fadd float %1353, %1352 - %1355 = bitcast i32 %52 to float - %1356 = bitcast i32 %52 to float - %1357 = fmul float %1355, %1356 - %1358 = fadd float %1357, 0.000000e+00 - %1359 = bitcast i32 %686 to float - %1360 = bitcast i32 %686 to float - %1361 = fmul float %1359, %1360 - %1362 = fadd float %1358, %1361 - %1363 = call float @llvm.sqrt.f32(float %1362) - %1364 = fneg float %692 - %1365 = fmul float %1363, %1364 - %1366 = bitcast i32 %52 to float - %1367 = fadd float %1366, %1365 - %1368 = fmul float %1354, %1367 - %1369 = fadd float %1368, 0.000000e+00 - %1370 = bitcast i32 %52 to float - %1371 = bitcast i32 %52 to float - %1372 = fmul float %1370, %1371 - %1373 = fadd float %1372, 0.000000e+00 - %1374 = bitcast i32 %686 to float - %1375 = bitcast i32 %686 to float - %1376 = fmul float %1374, %1375 - %1377 = fadd float %1373, %1376 - %1378 = call float @llvm.sqrt.f32(float %1377) - %1379 = fneg float %692 - %1380 = fmul float %1378, %1379 - %1381 = fmul float %1380, 0.000000e+00 - %1382 = bitcast i32 %686 to float - %1383 = fadd float %1382, %1381 - %1384 = bitcast i32 %52 to float - %1385 = bitcast i32 %52 to float - %1386 = fmul float %1384, %1385 - %1387 = fadd float %1386, 0.000000e+00 - %1388 = bitcast i32 %686 to float - %1389 = bitcast i32 %686 to float - %1390 = fmul float %1388, %1389 - %1391 = fadd float %1387, %1390 - %1392 = call float @llvm.sqrt.f32(float %1391) - %1393 = fneg float %692 - %1394 = fmul float %1392, %1393 - %1395 = fmul float %1394, 0.000000e+00 - %1396 = bitcast i32 %686 to float - %1397 = fadd float %1396, %1395 - %1398 = fmul float %1383, %1397 - %1399 = fadd float %1369, %1398 - %1400 = call float @llvm.sqrt.f32(float %1399) - %1401 = fadd float %1400, 0.000000e+00 - %1402 = fdiv float %1341, %1401 - %1403 = fmul float %1402, 2.000000e+00 - %1404 = bitcast i32 %52 to float - %1405 = bitcast i32 %52 to float - %1406 = fmul float %1404, %1405 - %1407 = fadd float %1406, 0.000000e+00 - %1408 = bitcast i32 %686 to float - %1409 = bitcast i32 %686 to float - %1410 = fmul float %1408, %1409 - %1411 = fadd float %1407, %1410 - %1412 = call float @llvm.sqrt.f32(float %1411) - %1413 = fneg float %692 - %1414 = fmul float %1412, %1413 - %1415 = bitcast i32 %52 to float - %1416 = fadd float %1415, %1414 - %1417 = bitcast i32 %52 to float - %1418 = bitcast i32 %52 to float - %1419 = fmul float %1417, %1418 - %1420 = fadd float %1419, 0.000000e+00 - %1421 = bitcast i32 %686 to float - %1422 = bitcast i32 %686 to float - %1423 = fmul float %1421, %1422 - %1424 = fadd float %1420, %1423 - %1425 = call float @llvm.sqrt.f32(float %1424) - %1426 = fneg float %692 - %1427 = fmul float %1425, %1426 - %1428 = bitcast i32 %52 to float - %1429 = fadd float %1428, %1427 - %1430 = bitcast i32 %52 to float - %1431 = bitcast i32 %52 to float - %1432 = fmul float %1430, %1431 - %1433 = fadd float %1432, 0.000000e+00 - %1434 = bitcast i32 %686 to float - %1435 = bitcast i32 %686 to float - %1436 = fmul float %1434, %1435 - %1437 = fadd float %1433, %1436 - %1438 = call float @llvm.sqrt.f32(float %1437) - %1439 = fneg float %692 - %1440 = fmul float %1438, %1439 - %1441 = bitcast i32 %52 to float - %1442 = fadd float %1441, %1440 - %1443 = fmul float %1429, %1442 - %1444 = fadd float %1443, 0.000000e+00 - %1445 = bitcast i32 %52 to float - %1446 = bitcast i32 %52 to float - %1447 = fmul float %1445, %1446 - %1448 = fadd float %1447, 0.000000e+00 - %1449 = bitcast i32 %686 to float - %1450 = bitcast i32 %686 to float - %1451 = fmul float %1449, %1450 - %1452 = fadd float %1448, %1451 - %1453 = call float @llvm.sqrt.f32(float %1452) - %1454 = fneg float %692 - %1455 = fmul float %1453, %1454 - %1456 = fmul float %1455, 0.000000e+00 - %1457 = bitcast i32 %686 to float - %1458 = fadd float %1457, %1456 - %1459 = bitcast i32 %52 to float - %1460 = bitcast i32 %52 to float - %1461 = fmul float %1459, %1460 - %1462 = fadd float %1461, 0.000000e+00 - %1463 = bitcast i32 %686 to float - %1464 = bitcast i32 %686 to float - %1465 = fmul float %1463, %1464 - %1466 = fadd float %1462, %1465 - %1467 = call float @llvm.sqrt.f32(float %1466) - %1468 = fneg float %692 - %1469 = fmul float %1467, %1468 - %1470 = fmul float %1469, 0.000000e+00 - %1471 = bitcast i32 %686 to float - %1472 = fadd float %1471, %1470 - %1473 = fmul float %1458, %1472 - %1474 = fadd float %1444, %1473 - %1475 = call float @llvm.sqrt.f32(float %1474) - %1476 = fadd float %1475, 0.000000e+00 - %1477 = fdiv float %1416, %1476 - %1478 = fmul float %1403, %1477 - %1479 = fsub float 1.000000e+00, %1478 - %1480 = fmul float %1479, %1322 - %1481 = fadd float %1480, 0.000000e+00 - %1482 = bitcast i32 %52 to float - %1483 = bitcast i32 %52 to float - %1484 = fmul float %1482, %1483 - %1485 = fadd float %1484, 0.000000e+00 - %1486 = bitcast i32 %686 to float - %1487 = bitcast i32 %686 to float - %1488 = fmul float %1486, %1487 - %1489 = fadd float %1485, %1488 - %1490 = call float @llvm.sqrt.f32(float %1489) - %1491 = fneg float %692 - %1492 = fmul float %1490, %1491 - %1493 = bitcast i32 %52 to float - %1494 = fadd float %1493, %1492 - %1495 = bitcast i32 %52 to float - %1496 = bitcast i32 %52 to float - %1497 = fmul float %1495, %1496 - %1498 = fadd float %1497, 0.000000e+00 - %1499 = bitcast i32 %686 to float - %1500 = bitcast i32 %686 to float - %1501 = fmul float %1499, %1500 - %1502 = fadd float %1498, %1501 - %1503 = call float @llvm.sqrt.f32(float %1502) - %1504 = fneg float %692 - %1505 = fmul float %1503, %1504 - %1506 = bitcast i32 %52 to float - %1507 = fadd float %1506, %1505 - %1508 = bitcast i32 %52 to float - %1509 = bitcast i32 %52 to float - %1510 = fmul float %1508, %1509 - %1511 = fadd float %1510, 0.000000e+00 - %1512 = bitcast i32 %686 to float - %1513 = bitcast i32 %686 to float - %1514 = fmul float %1512, %1513 - %1515 = fadd float %1511, %1514 - %1516 = call float @llvm.sqrt.f32(float %1515) - %1517 = fneg float %692 - %1518 = fmul float %1516, %1517 - %1519 = bitcast i32 %52 to float - %1520 = fadd float %1519, %1518 - %1521 = fmul float %1507, %1520 - %1522 = fadd float %1521, 0.000000e+00 - %1523 = bitcast i32 %52 to float - %1524 = bitcast i32 %52 to float - %1525 = fmul float %1523, %1524 - %1526 = fadd float %1525, 0.000000e+00 - %1527 = bitcast i32 %686 to float - %1528 = bitcast i32 %686 to float - %1529 = fmul float %1527, %1528 - %1530 = fadd float %1526, %1529 - %1531 = call float @llvm.sqrt.f32(float %1530) - %1532 = fneg float %692 - %1533 = fmul float %1531, %1532 - %1534 = fmul float %1533, 0.000000e+00 - %1535 = bitcast i32 %686 to float - %1536 = fadd float %1535, %1534 - %1537 = bitcast i32 %52 to float - %1538 = bitcast i32 %52 to float - %1539 = fmul float %1537, %1538 - %1540 = fadd float %1539, 0.000000e+00 - %1541 = bitcast i32 %686 to float - %1542 = bitcast i32 %686 to float - %1543 = fmul float %1541, %1542 - %1544 = fadd float %1540, %1543 - %1545 = call float @llvm.sqrt.f32(float %1544) - %1546 = fneg float %692 - %1547 = fmul float %1545, %1546 - %1548 = fmul float %1547, 0.000000e+00 - %1549 = bitcast i32 %686 to float - %1550 = fadd float %1549, %1548 - %1551 = fmul float %1536, %1550 - %1552 = fadd float %1522, %1551 - %1553 = call float @llvm.sqrt.f32(float %1552) - %1554 = fadd float %1553, 0.000000e+00 - %1555 = fdiv float %1494, %1554 - %1556 = fmul float %1555, 2.000000e+00 - %1557 = bitcast i32 %52 to float - %1558 = bitcast i32 %52 to float - %1559 = fmul float %1557, %1558 - %1560 = fadd float %1559, 0.000000e+00 - %1561 = bitcast i32 %686 to float - %1562 = bitcast i32 %686 to float - %1563 = fmul float %1561, %1562 - %1564 = fadd float %1560, %1563 - %1565 = call float @llvm.sqrt.f32(float %1564) - %1566 = fneg float %692 - %1567 = fmul float %1565, %1566 - %1568 = fmul float %1567, 0.000000e+00 - %1569 = bitcast i32 %686 to float - %1570 = fadd float %1569, %1568 - %1571 = bitcast i32 %52 to float - %1572 = bitcast i32 %52 to float - %1573 = fmul float %1571, %1572 - %1574 = fadd float %1573, 0.000000e+00 - %1575 = bitcast i32 %686 to float - %1576 = bitcast i32 %686 to float - %1577 = fmul float %1575, %1576 - %1578 = fadd float %1574, %1577 - %1579 = call float @llvm.sqrt.f32(float %1578) - %1580 = fneg float %692 - %1581 = fmul float %1579, %1580 - %1582 = bitcast i32 %52 to float - %1583 = fadd float %1582, %1581 - %1584 = bitcast i32 %52 to float - %1585 = bitcast i32 %52 to float - %1586 = fmul float %1584, %1585 - %1587 = fadd float %1586, 0.000000e+00 - %1588 = bitcast i32 %686 to float - %1589 = bitcast i32 %686 to float - %1590 = fmul float %1588, %1589 - %1591 = fadd float %1587, %1590 - %1592 = call float @llvm.sqrt.f32(float %1591) - %1593 = fneg float %692 - %1594 = fmul float %1592, %1593 - %1595 = bitcast i32 %52 to float - %1596 = fadd float %1595, %1594 - %1597 = fmul float %1583, %1596 - %1598 = fadd float %1597, 0.000000e+00 - %1599 = bitcast i32 %52 to float - %1600 = bitcast i32 %52 to float - %1601 = fmul float %1599, %1600 - %1602 = fadd float %1601, 0.000000e+00 - %1603 = bitcast i32 %686 to float - %1604 = bitcast i32 %686 to float - %1605 = fmul float %1603, %1604 - %1606 = fadd float %1602, %1605 - %1607 = call float @llvm.sqrt.f32(float %1606) - %1608 = fneg float %692 - %1609 = fmul float %1607, %1608 - %1610 = fmul float %1609, 0.000000e+00 - %1611 = bitcast i32 %686 to float - %1612 = fadd float %1611, %1610 - %1613 = bitcast i32 %52 to float - %1614 = bitcast i32 %52 to float - %1615 = fmul float %1613, %1614 - %1616 = fadd float %1615, 0.000000e+00 - %1617 = bitcast i32 %686 to float - %1618 = bitcast i32 %686 to float - %1619 = fmul float %1617, %1618 - %1620 = fadd float %1616, %1619 - %1621 = call float @llvm.sqrt.f32(float %1620) - %1622 = fneg float %692 - %1623 = fmul float %1621, %1622 - %1624 = fmul float %1623, 0.000000e+00 - %1625 = bitcast i32 %686 to float - %1626 = fadd float %1625, %1624 - %1627 = fmul float %1612, %1626 - %1628 = fadd float %1598, %1627 - %1629 = call float @llvm.sqrt.f32(float %1628) - %1630 = fadd float %1629, 0.000000e+00 - %1631 = fdiv float %1570, %1630 - %1632 = fmul float %1556, %1631 - %1633 = fneg float %1632 - %1634 = load float, float* %44, align 4 - %1635 = fmul float %1633, %1634 - %1636 = fadd float %1481, %1635 - %1637 = insertelement <4 x float> zeroinitializer, float %1636, i32 0 - %1638 = insertelement <4 x float> %1637, float 0.000000e+00, i32 1 - %1639 = insertelement <4 x float> %1638, float 0.000000e+00, i32 2 - %1640 = insertelement <4 x float> %1639, float 0.000000e+00, i32 3 - %1641 = extractelement <4 x float> %1640, i32 0 - store float %1641, float* %1164, align 4 - %1642 = extractelement <4 x float> %1640, i32 1 - %1643 = getelementptr float, float* %2, i32 0 - %1644 = getelementptr inbounds float, float* %1643, i64 2 - store float %1642, float* %1644, align 4 - %1645 = bitcast i32 %52 to float - %1646 = bitcast i32 %52 to float - %1647 = fmul float %1645, %1646 - %1648 = fadd float %1647, 0.000000e+00 - %1649 = bitcast i32 %686 to float - %1650 = bitcast i32 %686 to float - %1651 = fmul float %1649, %1650 - %1652 = fadd float %1648, %1651 - %1653 = call float @llvm.sqrt.f32(float %1652) - %1654 = fneg float %692 - %1655 = fmul float %1653, %1654 - %1656 = fmul float %1655, 0.000000e+00 - %1657 = bitcast i32 %686 to float - %1658 = fadd float %1657, %1656 - %1659 = bitcast i32 %52 to float - %1660 = bitcast i32 %52 to float - %1661 = fmul float %1659, %1660 - %1662 = fadd float %1661, 0.000000e+00 - %1663 = bitcast i32 %686 to float - %1664 = bitcast i32 %686 to float - %1665 = fmul float %1663, %1664 - %1666 = fadd float %1662, %1665 - %1667 = call float @llvm.sqrt.f32(float %1666) - %1668 = fneg float %692 - %1669 = fmul float %1667, %1668 - %1670 = bitcast i32 %52 to float - %1671 = fadd float %1670, %1669 - %1672 = bitcast i32 %52 to float - %1673 = bitcast i32 %52 to float - %1674 = fmul float %1672, %1673 - %1675 = fadd float %1674, 0.000000e+00 - %1676 = bitcast i32 %686 to float - %1677 = bitcast i32 %686 to float - %1678 = fmul float %1676, %1677 - %1679 = fadd float %1675, %1678 - %1680 = call float @llvm.sqrt.f32(float %1679) - %1681 = fneg float %692 - %1682 = fmul float %1680, %1681 - %1683 = bitcast i32 %52 to float - %1684 = fadd float %1683, %1682 - %1685 = fmul float %1671, %1684 - %1686 = fadd float %1685, 0.000000e+00 - %1687 = bitcast i32 %52 to float - %1688 = bitcast i32 %52 to float - %1689 = fmul float %1687, %1688 - %1690 = fadd float %1689, 0.000000e+00 - %1691 = bitcast i32 %686 to float - %1692 = bitcast i32 %686 to float - %1693 = fmul float %1691, %1692 - %1694 = fadd float %1690, %1693 - %1695 = call float @llvm.sqrt.f32(float %1694) - %1696 = fneg float %692 - %1697 = fmul float %1695, %1696 - %1698 = fmul float %1697, 0.000000e+00 - %1699 = bitcast i32 %686 to float - %1700 = fadd float %1699, %1698 - %1701 = bitcast i32 %52 to float - %1702 = bitcast i32 %52 to float - %1703 = fmul float %1701, %1702 - %1704 = fadd float %1703, 0.000000e+00 - %1705 = bitcast i32 %686 to float - %1706 = bitcast i32 %686 to float - %1707 = fmul float %1705, %1706 - %1708 = fadd float %1704, %1707 - %1709 = call float @llvm.sqrt.f32(float %1708) - %1710 = fneg float %692 - %1711 = fmul float %1709, %1710 - %1712 = fmul float %1711, 0.000000e+00 - %1713 = bitcast i32 %686 to float - %1714 = fadd float %1713, %1712 - %1715 = fmul float %1700, %1714 - %1716 = fadd float %1686, %1715 - %1717 = call float @llvm.sqrt.f32(float %1716) - %1718 = fadd float %1717, 0.000000e+00 - %1719 = fdiv float %1658, %1718 - %1720 = fmul float %1719, 2.000000e+00 - %1721 = bitcast i32 %52 to float - %1722 = bitcast i32 %52 to float - %1723 = fmul float %1721, %1722 - %1724 = fadd float %1723, 0.000000e+00 - %1725 = bitcast i32 %686 to float - %1726 = bitcast i32 %686 to float - %1727 = fmul float %1725, %1726 - %1728 = fadd float %1724, %1727 - %1729 = call float @llvm.sqrt.f32(float %1728) - %1730 = fneg float %692 - %1731 = fmul float %1729, %1730 - %1732 = bitcast i32 %52 to float - %1733 = fadd float %1732, %1731 - %1734 = bitcast i32 %52 to float - %1735 = bitcast i32 %52 to float - %1736 = fmul float %1734, %1735 - %1737 = fadd float %1736, 0.000000e+00 - %1738 = bitcast i32 %686 to float - %1739 = bitcast i32 %686 to float - %1740 = fmul float %1738, %1739 - %1741 = fadd float %1737, %1740 - %1742 = call float @llvm.sqrt.f32(float %1741) - %1743 = fneg float %692 - %1744 = fmul float %1742, %1743 - %1745 = bitcast i32 %52 to float - %1746 = fadd float %1745, %1744 - %1747 = bitcast i32 %52 to float - %1748 = bitcast i32 %52 to float - %1749 = fmul float %1747, %1748 - %1750 = fadd float %1749, 0.000000e+00 - %1751 = bitcast i32 %686 to float - %1752 = bitcast i32 %686 to float - %1753 = fmul float %1751, %1752 - %1754 = fadd float %1750, %1753 - %1755 = call float @llvm.sqrt.f32(float %1754) - %1756 = fneg float %692 - %1757 = fmul float %1755, %1756 - %1758 = bitcast i32 %52 to float - %1759 = fadd float %1758, %1757 - %1760 = fmul float %1746, %1759 - %1761 = fadd float %1760, 0.000000e+00 - %1762 = bitcast i32 %52 to float - %1763 = bitcast i32 %52 to float - %1764 = fmul float %1762, %1763 - %1765 = fadd float %1764, 0.000000e+00 - %1766 = bitcast i32 %686 to float - %1767 = bitcast i32 %686 to float - %1768 = fmul float %1766, %1767 - %1769 = fadd float %1765, %1768 - %1770 = call float @llvm.sqrt.f32(float %1769) - %1771 = fneg float %692 - %1772 = fmul float %1770, %1771 - %1773 = fmul float %1772, 0.000000e+00 - %1774 = bitcast i32 %686 to float - %1775 = fadd float %1774, %1773 - %1776 = bitcast i32 %52 to float - %1777 = bitcast i32 %52 to float - %1778 = fmul float %1776, %1777 - %1779 = fadd float %1778, 0.000000e+00 - %1780 = bitcast i32 %686 to float - %1781 = bitcast i32 %686 to float - %1782 = fmul float %1780, %1781 - %1783 = fadd float %1779, %1782 - %1784 = call float @llvm.sqrt.f32(float %1783) - %1785 = fneg float %692 - %1786 = fmul float %1784, %1785 - %1787 = fmul float %1786, 0.000000e+00 - %1788 = bitcast i32 %686 to float - %1789 = fadd float %1788, %1787 - %1790 = fmul float %1775, %1789 - %1791 = fadd float %1761, %1790 - %1792 = call float @llvm.sqrt.f32(float %1791) - %1793 = fadd float %1792, 0.000000e+00 - %1794 = fdiv float %1733, %1793 - %1795 = fmul float %1720, %1794 - %1796 = fneg float %1795 - %1797 = insertelement <4 x float> zeroinitializer, float %1796, i32 0 - %1798 = insertelement <4 x float> %1797, float 0.000000e+00, i32 1 - %1799 = insertelement <4 x float> %1798, float 0.000000e+00, i32 2 - %1800 = insertelement <4 x float> %1799, float 0.000000e+00, i32 3 - %1801 = getelementptr float, float* %0, i32 0 - %1802 = load float, float* %1801, align 4 - %1803 = insertelement <4 x float> zeroinitializer, float %1802, i32 0 - %1804 = insertelement <4 x float> %1803, float 0.000000e+00, i32 1 - %1805 = insertelement <4 x float> %1804, float 0.000000e+00, i32 2 - %1806 = insertelement <4 x float> %1805, float 0.000000e+00, i32 3 - %1807 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1800, <4 x float> %1806, <4 x float> zeroinitializer) - %1808 = extractelement <4 x float> %1807, i32 0 - store float %1808, float* %1644, align 4 - %1809 = bitcast i32 %52 to float - %1810 = bitcast i32 %52 to float - %1811 = fmul float %1809, %1810 - %1812 = fadd float %1811, 0.000000e+00 - %1813 = bitcast i32 %686 to float - %1814 = bitcast i32 %686 to float - %1815 = fmul float %1813, %1814 - %1816 = fadd float %1812, %1815 - %1817 = call float @llvm.sqrt.f32(float %1816) - %1818 = fneg float %692 - %1819 = fmul float %1817, %1818 - %1820 = fmul float %1819, 0.000000e+00 - %1821 = bitcast i32 %686 to float - %1822 = fadd float %1821, %1820 - %1823 = bitcast i32 %52 to float - %1824 = bitcast i32 %52 to float - %1825 = fmul float %1823, %1824 - %1826 = fadd float %1825, 0.000000e+00 - %1827 = bitcast i32 %686 to float - %1828 = bitcast i32 %686 to float - %1829 = fmul float %1827, %1828 - %1830 = fadd float %1826, %1829 - %1831 = call float @llvm.sqrt.f32(float %1830) - %1832 = fneg float %692 - %1833 = fmul float %1831, %1832 - %1834 = bitcast i32 %52 to float - %1835 = fadd float %1834, %1833 - %1836 = bitcast i32 %52 to float - %1837 = bitcast i32 %52 to float - %1838 = fmul float %1836, %1837 - %1839 = fadd float %1838, 0.000000e+00 - %1840 = bitcast i32 %686 to float - %1841 = bitcast i32 %686 to float - %1842 = fmul float %1840, %1841 - %1843 = fadd float %1839, %1842 - %1844 = call float @llvm.sqrt.f32(float %1843) - %1845 = fneg float %692 - %1846 = fmul float %1844, %1845 - %1847 = bitcast i32 %52 to float - %1848 = fadd float %1847, %1846 - %1849 = fmul float %1835, %1848 - %1850 = fadd float %1849, 0.000000e+00 - %1851 = bitcast i32 %52 to float - %1852 = bitcast i32 %52 to float - %1853 = fmul float %1851, %1852 - %1854 = fadd float %1853, 0.000000e+00 - %1855 = bitcast i32 %686 to float - %1856 = bitcast i32 %686 to float - %1857 = fmul float %1855, %1856 - %1858 = fadd float %1854, %1857 - %1859 = call float @llvm.sqrt.f32(float %1858) - %1860 = fneg float %692 - %1861 = fmul float %1859, %1860 - %1862 = fmul float %1861, 0.000000e+00 - %1863 = bitcast i32 %686 to float - %1864 = fadd float %1863, %1862 - %1865 = bitcast i32 %52 to float - %1866 = bitcast i32 %52 to float - %1867 = fmul float %1865, %1866 - %1868 = fadd float %1867, 0.000000e+00 - %1869 = bitcast i32 %686 to float - %1870 = bitcast i32 %686 to float - %1871 = fmul float %1869, %1870 - %1872 = fadd float %1868, %1871 - %1873 = call float @llvm.sqrt.f32(float %1872) - %1874 = fneg float %692 - %1875 = fmul float %1873, %1874 - %1876 = fmul float %1875, 0.000000e+00 - %1877 = bitcast i32 %686 to float - %1878 = fadd float %1877, %1876 - %1879 = fmul float %1864, %1878 - %1880 = fadd float %1850, %1879 - %1881 = call float @llvm.sqrt.f32(float %1880) - %1882 = fadd float %1881, 0.000000e+00 - %1883 = fdiv float %1822, %1882 - %1884 = fmul float %1883, 2.000000e+00 - %1885 = bitcast i32 %52 to float - %1886 = bitcast i32 %52 to float - %1887 = fmul float %1885, %1886 - %1888 = fadd float %1887, 0.000000e+00 - %1889 = bitcast i32 %686 to float - %1890 = bitcast i32 %686 to float - %1891 = fmul float %1889, %1890 - %1892 = fadd float %1888, %1891 - %1893 = call float @llvm.sqrt.f32(float %1892) - %1894 = fneg float %692 - %1895 = fmul float %1893, %1894 - %1896 = bitcast i32 %52 to float - %1897 = fadd float %1896, %1895 - %1898 = bitcast i32 %52 to float - %1899 = bitcast i32 %52 to float - %1900 = fmul float %1898, %1899 - %1901 = fadd float %1900, 0.000000e+00 - %1902 = bitcast i32 %686 to float - %1903 = bitcast i32 %686 to float - %1904 = fmul float %1902, %1903 - %1905 = fadd float %1901, %1904 - %1906 = call float @llvm.sqrt.f32(float %1905) - %1907 = fneg float %692 - %1908 = fmul float %1906, %1907 - %1909 = bitcast i32 %52 to float - %1910 = fadd float %1909, %1908 - %1911 = bitcast i32 %52 to float - %1912 = bitcast i32 %52 to float - %1913 = fmul float %1911, %1912 - %1914 = fadd float %1913, 0.000000e+00 - %1915 = bitcast i32 %686 to float - %1916 = bitcast i32 %686 to float - %1917 = fmul float %1915, %1916 - %1918 = fadd float %1914, %1917 - %1919 = call float @llvm.sqrt.f32(float %1918) - %1920 = fneg float %692 - %1921 = fmul float %1919, %1920 - %1922 = bitcast i32 %52 to float - %1923 = fadd float %1922, %1921 - %1924 = fmul float %1910, %1923 - %1925 = fadd float %1924, 0.000000e+00 - %1926 = bitcast i32 %52 to float - %1927 = bitcast i32 %52 to float - %1928 = fmul float %1926, %1927 - %1929 = fadd float %1928, 0.000000e+00 - %1930 = bitcast i32 %686 to float - %1931 = bitcast i32 %686 to float - %1932 = fmul float %1930, %1931 - %1933 = fadd float %1929, %1932 - %1934 = call float @llvm.sqrt.f32(float %1933) - %1935 = fneg float %692 - %1936 = fmul float %1934, %1935 - %1937 = fmul float %1936, 0.000000e+00 - %1938 = bitcast i32 %686 to float - %1939 = fadd float %1938, %1937 - %1940 = bitcast i32 %52 to float - %1941 = bitcast i32 %52 to float - %1942 = fmul float %1940, %1941 - %1943 = fadd float %1942, 0.000000e+00 - %1944 = bitcast i32 %686 to float - %1945 = bitcast i32 %686 to float - %1946 = fmul float %1944, %1945 - %1947 = fadd float %1943, %1946 - %1948 = call float @llvm.sqrt.f32(float %1947) - %1949 = fneg float %692 - %1950 = fmul float %1948, %1949 - %1951 = fmul float %1950, 0.000000e+00 - %1952 = bitcast i32 %686 to float - %1953 = fadd float %1952, %1951 - %1954 = fmul float %1939, %1953 - %1955 = fadd float %1925, %1954 - %1956 = call float @llvm.sqrt.f32(float %1955) - %1957 = fadd float %1956, 0.000000e+00 - %1958 = fdiv float %1897, %1957 - %1959 = fmul float %1884, %1958 - %1960 = fneg float %1959 - %1961 = fmul float %1960, %1802 - %1962 = fadd float %1961, 0.000000e+00 - %1963 = bitcast i32 %52 to float - %1964 = bitcast i32 %52 to float - %1965 = fmul float %1963, %1964 - %1966 = fadd float %1965, 0.000000e+00 - %1967 = bitcast i32 %686 to float - %1968 = bitcast i32 %686 to float - %1969 = fmul float %1967, %1968 - %1970 = fadd float %1966, %1969 - %1971 = call float @llvm.sqrt.f32(float %1970) - %1972 = fneg float %692 - %1973 = fmul float %1971, %1972 - %1974 = fmul float %1973, 0.000000e+00 - %1975 = bitcast i32 %686 to float - %1976 = fadd float %1975, %1974 - %1977 = bitcast i32 %52 to float - %1978 = bitcast i32 %52 to float - %1979 = fmul float %1977, %1978 - %1980 = fadd float %1979, 0.000000e+00 - %1981 = bitcast i32 %686 to float - %1982 = bitcast i32 %686 to float - %1983 = fmul float %1981, %1982 - %1984 = fadd float %1980, %1983 - %1985 = call float @llvm.sqrt.f32(float %1984) - %1986 = fneg float %692 - %1987 = fmul float %1985, %1986 - %1988 = bitcast i32 %52 to float - %1989 = fadd float %1988, %1987 - %1990 = bitcast i32 %52 to float - %1991 = bitcast i32 %52 to float - %1992 = fmul float %1990, %1991 - %1993 = fadd float %1992, 0.000000e+00 - %1994 = bitcast i32 %686 to float - %1995 = bitcast i32 %686 to float - %1996 = fmul float %1994, %1995 - %1997 = fadd float %1993, %1996 - %1998 = call float @llvm.sqrt.f32(float %1997) - %1999 = fneg float %692 - %2000 = fmul float %1998, %1999 - %2001 = bitcast i32 %52 to float - %2002 = fadd float %2001, %2000 - %2003 = fmul float %1989, %2002 - %2004 = fadd float %2003, 0.000000e+00 - %2005 = bitcast i32 %52 to float - %2006 = bitcast i32 %52 to float - %2007 = fmul float %2005, %2006 - %2008 = fadd float %2007, 0.000000e+00 - %2009 = bitcast i32 %686 to float - %2010 = bitcast i32 %686 to float - %2011 = fmul float %2009, %2010 - %2012 = fadd float %2008, %2011 - %2013 = call float @llvm.sqrt.f32(float %2012) - %2014 = fneg float %692 - %2015 = fmul float %2013, %2014 - %2016 = fmul float %2015, 0.000000e+00 - %2017 = bitcast i32 %686 to float - %2018 = fadd float %2017, %2016 - %2019 = bitcast i32 %52 to float - %2020 = bitcast i32 %52 to float - %2021 = fmul float %2019, %2020 - %2022 = fadd float %2021, 0.000000e+00 - %2023 = bitcast i32 %686 to float - %2024 = bitcast i32 %686 to float - %2025 = fmul float %2023, %2024 - %2026 = fadd float %2022, %2025 - %2027 = call float @llvm.sqrt.f32(float %2026) - %2028 = fneg float %692 - %2029 = fmul float %2027, %2028 - %2030 = fmul float %2029, 0.000000e+00 - %2031 = bitcast i32 %686 to float - %2032 = fadd float %2031, %2030 - %2033 = fmul float %2018, %2032 - %2034 = fadd float %2004, %2033 - %2035 = call float @llvm.sqrt.f32(float %2034) - %2036 = fadd float %2035, 0.000000e+00 - %2037 = fdiv float %1976, %2036 - %2038 = fmul float %2037, 2.000000e+00 - %2039 = bitcast i32 %52 to float - %2040 = bitcast i32 %52 to float - %2041 = fmul float %2039, %2040 - %2042 = fadd float %2041, 0.000000e+00 - %2043 = bitcast i32 %686 to float - %2044 = bitcast i32 %686 to float - %2045 = fmul float %2043, %2044 - %2046 = fadd float %2042, %2045 - %2047 = call float @llvm.sqrt.f32(float %2046) - %2048 = fneg float %692 - %2049 = fmul float %2047, %2048 - %2050 = fmul float %2049, 0.000000e+00 - %2051 = bitcast i32 %686 to float - %2052 = fadd float %2051, %2050 - %2053 = bitcast i32 %52 to float - %2054 = bitcast i32 %52 to float - %2055 = fmul float %2053, %2054 - %2056 = fadd float %2055, 0.000000e+00 - %2057 = bitcast i32 %686 to float - %2058 = bitcast i32 %686 to float - %2059 = fmul float %2057, %2058 - %2060 = fadd float %2056, %2059 - %2061 = call float @llvm.sqrt.f32(float %2060) - %2062 = fneg float %692 - %2063 = fmul float %2061, %2062 - %2064 = bitcast i32 %52 to float - %2065 = fadd float %2064, %2063 - %2066 = bitcast i32 %52 to float - %2067 = bitcast i32 %52 to float - %2068 = fmul float %2066, %2067 - %2069 = fadd float %2068, 0.000000e+00 - %2070 = bitcast i32 %686 to float - %2071 = bitcast i32 %686 to float - %2072 = fmul float %2070, %2071 - %2073 = fadd float %2069, %2072 - %2074 = call float @llvm.sqrt.f32(float %2073) - %2075 = fneg float %692 - %2076 = fmul float %2074, %2075 - %2077 = bitcast i32 %52 to float - %2078 = fadd float %2077, %2076 - %2079 = fmul float %2065, %2078 - %2080 = fadd float %2079, 0.000000e+00 - %2081 = bitcast i32 %52 to float - %2082 = bitcast i32 %52 to float - %2083 = fmul float %2081, %2082 - %2084 = fadd float %2083, 0.000000e+00 - %2085 = bitcast i32 %686 to float - %2086 = bitcast i32 %686 to float - %2087 = fmul float %2085, %2086 - %2088 = fadd float %2084, %2087 - %2089 = call float @llvm.sqrt.f32(float %2088) - %2090 = fneg float %692 - %2091 = fmul float %2089, %2090 - %2092 = fmul float %2091, 0.000000e+00 - %2093 = bitcast i32 %686 to float - %2094 = fadd float %2093, %2092 - %2095 = bitcast i32 %52 to float - %2096 = bitcast i32 %52 to float - %2097 = fmul float %2095, %2096 - %2098 = fadd float %2097, 0.000000e+00 - %2099 = bitcast i32 %686 to float - %2100 = bitcast i32 %686 to float - %2101 = fmul float %2099, %2100 - %2102 = fadd float %2098, %2101 - %2103 = call float @llvm.sqrt.f32(float %2102) - %2104 = fneg float %692 - %2105 = fmul float %2103, %2104 - %2106 = fmul float %2105, 0.000000e+00 - %2107 = bitcast i32 %686 to float - %2108 = fadd float %2107, %2106 - %2109 = fmul float %2094, %2108 - %2110 = fadd float %2080, %2109 - %2111 = call float @llvm.sqrt.f32(float %2110) - %2112 = fadd float %2111, 0.000000e+00 - %2113 = fdiv float %2052, %2112 - %2114 = fmul float %2038, %2113 - %2115 = fsub float 1.000000e+00, %2114 - %2116 = load float, float* %1153, align 4 - %2117 = fmul float %2115, %2116 - %2118 = fadd float %1962, %2117 - %2119 = insertelement <4 x float> zeroinitializer, float %2118, i32 0 - %2120 = insertelement <4 x float> %2119, float 0.000000e+00, i32 1 - %2121 = insertelement <4 x float> %2120, float 0.000000e+00, i32 2 - %2122 = insertelement <4 x float> %2121, float 0.000000e+00, i32 3 - %2123 = extractelement <4 x float> %2122, i32 0 - store float %2123, float* %1644, align 4 - %2124 = extractelement <4 x float> %2122, i32 1 - %2125 = getelementptr float, float* %2, i32 0 - %2126 = getelementptr inbounds float, float* %2125, i64 3 - store float %2124, float* %2126, align 4 - %2127 = bitcast i32 %52 to float - %2128 = bitcast i32 %52 to float - %2129 = fmul float %2127, %2128 - %2130 = fadd float %2129, 0.000000e+00 - %2131 = bitcast i32 %686 to float - %2132 = bitcast i32 %686 to float - %2133 = fmul float %2131, %2132 - %2134 = fadd float %2130, %2133 - %2135 = call float @llvm.sqrt.f32(float %2134) - %2136 = fneg float %692 - %2137 = fmul float %2135, %2136 - %2138 = fmul float %2137, 0.000000e+00 - %2139 = bitcast i32 %686 to float - %2140 = fadd float %2139, %2138 - %2141 = bitcast i32 %52 to float - %2142 = bitcast i32 %52 to float - %2143 = fmul float %2141, %2142 - %2144 = fadd float %2143, 0.000000e+00 - %2145 = bitcast i32 %686 to float - %2146 = bitcast i32 %686 to float - %2147 = fmul float %2145, %2146 - %2148 = fadd float %2144, %2147 - %2149 = call float @llvm.sqrt.f32(float %2148) - %2150 = fneg float %692 - %2151 = fmul float %2149, %2150 - %2152 = bitcast i32 %52 to float - %2153 = fadd float %2152, %2151 - %2154 = bitcast i32 %52 to float - %2155 = bitcast i32 %52 to float - %2156 = fmul float %2154, %2155 - %2157 = fadd float %2156, 0.000000e+00 - %2158 = bitcast i32 %686 to float - %2159 = bitcast i32 %686 to float - %2160 = fmul float %2158, %2159 - %2161 = fadd float %2157, %2160 - %2162 = call float @llvm.sqrt.f32(float %2161) - %2163 = fneg float %692 - %2164 = fmul float %2162, %2163 - %2165 = bitcast i32 %52 to float - %2166 = fadd float %2165, %2164 - %2167 = fmul float %2153, %2166 - %2168 = fadd float %2167, 0.000000e+00 - %2169 = bitcast i32 %52 to float - %2170 = bitcast i32 %52 to float - %2171 = fmul float %2169, %2170 - %2172 = fadd float %2171, 0.000000e+00 - %2173 = bitcast i32 %686 to float - %2174 = bitcast i32 %686 to float - %2175 = fmul float %2173, %2174 - %2176 = fadd float %2172, %2175 - %2177 = call float @llvm.sqrt.f32(float %2176) - %2178 = fneg float %692 - %2179 = fmul float %2177, %2178 - %2180 = fmul float %2179, 0.000000e+00 - %2181 = bitcast i32 %686 to float - %2182 = fadd float %2181, %2180 - %2183 = bitcast i32 %52 to float - %2184 = bitcast i32 %52 to float - %2185 = fmul float %2183, %2184 - %2186 = fadd float %2185, 0.000000e+00 - %2187 = bitcast i32 %686 to float - %2188 = bitcast i32 %686 to float - %2189 = fmul float %2187, %2188 - %2190 = fadd float %2186, %2189 - %2191 = call float @llvm.sqrt.f32(float %2190) - %2192 = fneg float %692 - %2193 = fmul float %2191, %2192 - %2194 = fmul float %2193, 0.000000e+00 - %2195 = bitcast i32 %686 to float - %2196 = fadd float %2195, %2194 - %2197 = fmul float %2182, %2196 - %2198 = fadd float %2168, %2197 - %2199 = call float @llvm.sqrt.f32(float %2198) - %2200 = fadd float %2199, 0.000000e+00 - %2201 = fdiv float %2140, %2200 - %2202 = fmul float %2201, 2.000000e+00 - %2203 = bitcast i32 %52 to float - %2204 = bitcast i32 %52 to float - %2205 = fmul float %2203, %2204 - %2206 = fadd float %2205, 0.000000e+00 - %2207 = bitcast i32 %686 to float - %2208 = bitcast i32 %686 to float - %2209 = fmul float %2207, %2208 - %2210 = fadd float %2206, %2209 - %2211 = call float @llvm.sqrt.f32(float %2210) - %2212 = fneg float %692 - %2213 = fmul float %2211, %2212 - %2214 = bitcast i32 %52 to float - %2215 = fadd float %2214, %2213 - %2216 = bitcast i32 %52 to float - %2217 = bitcast i32 %52 to float - %2218 = fmul float %2216, %2217 - %2219 = fadd float %2218, 0.000000e+00 - %2220 = bitcast i32 %686 to float - %2221 = bitcast i32 %686 to float - %2222 = fmul float %2220, %2221 - %2223 = fadd float %2219, %2222 - %2224 = call float @llvm.sqrt.f32(float %2223) - %2225 = fneg float %692 - %2226 = fmul float %2224, %2225 - %2227 = bitcast i32 %52 to float - %2228 = fadd float %2227, %2226 - %2229 = bitcast i32 %52 to float - %2230 = bitcast i32 %52 to float - %2231 = fmul float %2229, %2230 - %2232 = fadd float %2231, 0.000000e+00 - %2233 = bitcast i32 %686 to float - %2234 = bitcast i32 %686 to float - %2235 = fmul float %2233, %2234 - %2236 = fadd float %2232, %2235 - %2237 = call float @llvm.sqrt.f32(float %2236) - %2238 = fneg float %692 - %2239 = fmul float %2237, %2238 - %2240 = bitcast i32 %52 to float - %2241 = fadd float %2240, %2239 - %2242 = fmul float %2228, %2241 - %2243 = fadd float %2242, 0.000000e+00 - %2244 = bitcast i32 %52 to float - %2245 = bitcast i32 %52 to float - %2246 = fmul float %2244, %2245 - %2247 = fadd float %2246, 0.000000e+00 - %2248 = bitcast i32 %686 to float - %2249 = bitcast i32 %686 to float - %2250 = fmul float %2248, %2249 - %2251 = fadd float %2247, %2250 - %2252 = call float @llvm.sqrt.f32(float %2251) - %2253 = fneg float %692 - %2254 = fmul float %2252, %2253 - %2255 = fmul float %2254, 0.000000e+00 - %2256 = bitcast i32 %686 to float - %2257 = fadd float %2256, %2255 - %2258 = bitcast i32 %52 to float - %2259 = bitcast i32 %52 to float - %2260 = fmul float %2258, %2259 - %2261 = fadd float %2260, 0.000000e+00 - %2262 = bitcast i32 %686 to float - %2263 = bitcast i32 %686 to float - %2264 = fmul float %2262, %2263 - %2265 = fadd float %2261, %2264 - %2266 = call float @llvm.sqrt.f32(float %2265) - %2267 = fneg float %692 - %2268 = fmul float %2266, %2267 - %2269 = fmul float %2268, 0.000000e+00 - %2270 = bitcast i32 %686 to float - %2271 = fadd float %2270, %2269 - %2272 = fmul float %2257, %2271 - %2273 = fadd float %2243, %2272 - %2274 = call float @llvm.sqrt.f32(float %2273) - %2275 = fadd float %2274, 0.000000e+00 - %2276 = fdiv float %2215, %2275 - %2277 = fmul float %2202, %2276 - %2278 = fneg float %2277 - %2279 = insertelement <4 x float> zeroinitializer, float %2278, i32 0 - %2280 = insertelement <4 x float> %2279, float 0.000000e+00, i32 1 - %2281 = insertelement <4 x float> %2280, float 0.000000e+00, i32 2 - %2282 = insertelement <4 x float> %2281, float 0.000000e+00, i32 3 - %2283 = load float, float* %1321, align 4 - %2284 = insertelement <4 x float> zeroinitializer, float %2283, i32 0 - %2285 = insertelement <4 x float> %2284, float 0.000000e+00, i32 1 - %2286 = insertelement <4 x float> %2285, float 0.000000e+00, i32 2 - %2287 = insertelement <4 x float> %2286, float 0.000000e+00, i32 3 - %2288 = call <4 x float> @llvm.fma.v4f32(<4 x float> %2282, <4 x float> %2287, <4 x float> zeroinitializer) - %2289 = extractelement <4 x float> %2288, i32 0 - store float %2289, float* %2126, align 4 - %2290 = bitcast i32 %52 to float - %2291 = bitcast i32 %52 to float - %2292 = fmul float %2290, %2291 - %2293 = fadd float %2292, 0.000000e+00 - %2294 = bitcast i32 %686 to float - %2295 = bitcast i32 %686 to float - %2296 = fmul float %2294, %2295 - %2297 = fadd float %2293, %2296 - %2298 = call float @llvm.sqrt.f32(float %2297) - %2299 = fneg float %692 - %2300 = fmul float %2298, %2299 - %2301 = fmul float %2300, 0.000000e+00 - %2302 = bitcast i32 %686 to float - %2303 = fadd float %2302, %2301 - %2304 = bitcast i32 %52 to float - %2305 = bitcast i32 %52 to float - %2306 = fmul float %2304, %2305 - %2307 = fadd float %2306, 0.000000e+00 - %2308 = bitcast i32 %686 to float - %2309 = bitcast i32 %686 to float - %2310 = fmul float %2308, %2309 - %2311 = fadd float %2307, %2310 - %2312 = call float @llvm.sqrt.f32(float %2311) - %2313 = fneg float %692 - %2314 = fmul float %2312, %2313 - %2315 = bitcast i32 %52 to float - %2316 = fadd float %2315, %2314 - %2317 = bitcast i32 %52 to float - %2318 = bitcast i32 %52 to float - %2319 = fmul float %2317, %2318 - %2320 = fadd float %2319, 0.000000e+00 - %2321 = bitcast i32 %686 to float - %2322 = bitcast i32 %686 to float - %2323 = fmul float %2321, %2322 - %2324 = fadd float %2320, %2323 - %2325 = call float @llvm.sqrt.f32(float %2324) - %2326 = fneg float %692 - %2327 = fmul float %2325, %2326 - %2328 = bitcast i32 %52 to float - %2329 = fadd float %2328, %2327 - %2330 = fmul float %2316, %2329 - %2331 = fadd float %2330, 0.000000e+00 - %2332 = bitcast i32 %52 to float - %2333 = bitcast i32 %52 to float - %2334 = fmul float %2332, %2333 - %2335 = fadd float %2334, 0.000000e+00 - %2336 = bitcast i32 %686 to float - %2337 = bitcast i32 %686 to float - %2338 = fmul float %2336, %2337 - %2339 = fadd float %2335, %2338 - %2340 = call float @llvm.sqrt.f32(float %2339) - %2341 = fneg float %692 - %2342 = fmul float %2340, %2341 - %2343 = fmul float %2342, 0.000000e+00 - %2344 = bitcast i32 %686 to float - %2345 = fadd float %2344, %2343 - %2346 = bitcast i32 %52 to float - %2347 = bitcast i32 %52 to float - %2348 = fmul float %2346, %2347 - %2349 = fadd float %2348, 0.000000e+00 - %2350 = bitcast i32 %686 to float - %2351 = bitcast i32 %686 to float - %2352 = fmul float %2350, %2351 - %2353 = fadd float %2349, %2352 - %2354 = call float @llvm.sqrt.f32(float %2353) - %2355 = fneg float %692 - %2356 = fmul float %2354, %2355 - %2357 = fmul float %2356, 0.000000e+00 - %2358 = bitcast i32 %686 to float - %2359 = fadd float %2358, %2357 - %2360 = fmul float %2345, %2359 - %2361 = fadd float %2331, %2360 - %2362 = call float @llvm.sqrt.f32(float %2361) - %2363 = fadd float %2362, 0.000000e+00 - %2364 = fdiv float %2303, %2363 - %2365 = fmul float %2364, 2.000000e+00 - %2366 = bitcast i32 %52 to float - %2367 = bitcast i32 %52 to float - %2368 = fmul float %2366, %2367 - %2369 = fadd float %2368, 0.000000e+00 - %2370 = bitcast i32 %686 to float - %2371 = bitcast i32 %686 to float - %2372 = fmul float %2370, %2371 - %2373 = fadd float %2369, %2372 - %2374 = call float @llvm.sqrt.f32(float %2373) - %2375 = fneg float %692 - %2376 = fmul float %2374, %2375 - %2377 = bitcast i32 %52 to float - %2378 = fadd float %2377, %2376 - %2379 = bitcast i32 %52 to float - %2380 = bitcast i32 %52 to float - %2381 = fmul float %2379, %2380 - %2382 = fadd float %2381, 0.000000e+00 - %2383 = bitcast i32 %686 to float - %2384 = bitcast i32 %686 to float - %2385 = fmul float %2383, %2384 - %2386 = fadd float %2382, %2385 - %2387 = call float @llvm.sqrt.f32(float %2386) - %2388 = fneg float %692 - %2389 = fmul float %2387, %2388 - %2390 = bitcast i32 %52 to float - %2391 = fadd float %2390, %2389 - %2392 = bitcast i32 %52 to float - %2393 = bitcast i32 %52 to float - %2394 = fmul float %2392, %2393 - %2395 = fadd float %2394, 0.000000e+00 - %2396 = bitcast i32 %686 to float - %2397 = bitcast i32 %686 to float - %2398 = fmul float %2396, %2397 - %2399 = fadd float %2395, %2398 - %2400 = call float @llvm.sqrt.f32(float %2399) - %2401 = fneg float %692 - %2402 = fmul float %2400, %2401 - %2403 = bitcast i32 %52 to float - %2404 = fadd float %2403, %2402 - %2405 = fmul float %2391, %2404 - %2406 = fadd float %2405, 0.000000e+00 - %2407 = bitcast i32 %52 to float - %2408 = bitcast i32 %52 to float - %2409 = fmul float %2407, %2408 - %2410 = fadd float %2409, 0.000000e+00 - %2411 = bitcast i32 %686 to float - %2412 = bitcast i32 %686 to float - %2413 = fmul float %2411, %2412 - %2414 = fadd float %2410, %2413 - %2415 = call float @llvm.sqrt.f32(float %2414) - %2416 = fneg float %692 - %2417 = fmul float %2415, %2416 - %2418 = fmul float %2417, 0.000000e+00 - %2419 = bitcast i32 %686 to float - %2420 = fadd float %2419, %2418 - %2421 = bitcast i32 %52 to float - %2422 = bitcast i32 %52 to float - %2423 = fmul float %2421, %2422 - %2424 = fadd float %2423, 0.000000e+00 - %2425 = bitcast i32 %686 to float - %2426 = bitcast i32 %686 to float - %2427 = fmul float %2425, %2426 - %2428 = fadd float %2424, %2427 - %2429 = call float @llvm.sqrt.f32(float %2428) - %2430 = fneg float %692 - %2431 = fmul float %2429, %2430 - %2432 = fmul float %2431, 0.000000e+00 - %2433 = bitcast i32 %686 to float - %2434 = fadd float %2433, %2432 - %2435 = fmul float %2420, %2434 - %2436 = fadd float %2406, %2435 - %2437 = call float @llvm.sqrt.f32(float %2436) - %2438 = fadd float %2437, 0.000000e+00 - %2439 = fdiv float %2378, %2438 - %2440 = fmul float %2365, %2439 - %2441 = fneg float %2440 - %2442 = fmul float %2441, %2283 - %2443 = fadd float %2442, 0.000000e+00 - %2444 = bitcast i32 %52 to float - %2445 = bitcast i32 %52 to float - %2446 = fmul float %2444, %2445 - %2447 = fadd float %2446, 0.000000e+00 - %2448 = bitcast i32 %686 to float - %2449 = bitcast i32 %686 to float - %2450 = fmul float %2448, %2449 - %2451 = fadd float %2447, %2450 - %2452 = call float @llvm.sqrt.f32(float %2451) - %2453 = fneg float %692 - %2454 = fmul float %2452, %2453 - %2455 = fmul float %2454, 0.000000e+00 - %2456 = bitcast i32 %686 to float - %2457 = fadd float %2456, %2455 - %2458 = bitcast i32 %52 to float - %2459 = bitcast i32 %52 to float - %2460 = fmul float %2458, %2459 - %2461 = fadd float %2460, 0.000000e+00 - %2462 = bitcast i32 %686 to float - %2463 = bitcast i32 %686 to float - %2464 = fmul float %2462, %2463 - %2465 = fadd float %2461, %2464 - %2466 = call float @llvm.sqrt.f32(float %2465) - %2467 = fneg float %692 - %2468 = fmul float %2466, %2467 - %2469 = bitcast i32 %52 to float - %2470 = fadd float %2469, %2468 - %2471 = bitcast i32 %52 to float - %2472 = bitcast i32 %52 to float - %2473 = fmul float %2471, %2472 - %2474 = fadd float %2473, 0.000000e+00 - %2475 = bitcast i32 %686 to float - %2476 = bitcast i32 %686 to float - %2477 = fmul float %2475, %2476 - %2478 = fadd float %2474, %2477 - %2479 = call float @llvm.sqrt.f32(float %2478) - %2480 = fneg float %692 - %2481 = fmul float %2479, %2480 - %2482 = bitcast i32 %52 to float - %2483 = fadd float %2482, %2481 - %2484 = fmul float %2470, %2483 - %2485 = fadd float %2484, 0.000000e+00 - %2486 = bitcast i32 %52 to float - %2487 = bitcast i32 %52 to float - %2488 = fmul float %2486, %2487 - %2489 = fadd float %2488, 0.000000e+00 - %2490 = bitcast i32 %686 to float - %2491 = bitcast i32 %686 to float - %2492 = fmul float %2490, %2491 - %2493 = fadd float %2489, %2492 - %2494 = call float @llvm.sqrt.f32(float %2493) - %2495 = fneg float %692 - %2496 = fmul float %2494, %2495 - %2497 = fmul float %2496, 0.000000e+00 - %2498 = bitcast i32 %686 to float - %2499 = fadd float %2498, %2497 - %2500 = bitcast i32 %52 to float - %2501 = bitcast i32 %52 to float - %2502 = fmul float %2500, %2501 - %2503 = fadd float %2502, 0.000000e+00 - %2504 = bitcast i32 %686 to float - %2505 = bitcast i32 %686 to float - %2506 = fmul float %2504, %2505 - %2507 = fadd float %2503, %2506 - %2508 = call float @llvm.sqrt.f32(float %2507) - %2509 = fneg float %692 - %2510 = fmul float %2508, %2509 - %2511 = fmul float %2510, 0.000000e+00 - %2512 = bitcast i32 %686 to float - %2513 = fadd float %2512, %2511 - %2514 = fmul float %2499, %2513 - %2515 = fadd float %2485, %2514 - %2516 = call float @llvm.sqrt.f32(float %2515) - %2517 = fadd float %2516, 0.000000e+00 - %2518 = fdiv float %2457, %2517 - %2519 = fmul float %2518, 2.000000e+00 - %2520 = bitcast i32 %52 to float - %2521 = bitcast i32 %52 to float - %2522 = fmul float %2520, %2521 - %2523 = fadd float %2522, 0.000000e+00 - %2524 = bitcast i32 %686 to float - %2525 = bitcast i32 %686 to float - %2526 = fmul float %2524, %2525 - %2527 = fadd float %2523, %2526 - %2528 = call float @llvm.sqrt.f32(float %2527) - %2529 = fneg float %692 - %2530 = fmul float %2528, %2529 - %2531 = fmul float %2530, 0.000000e+00 - %2532 = bitcast i32 %686 to float - %2533 = fadd float %2532, %2531 - %2534 = bitcast i32 %52 to float - %2535 = bitcast i32 %52 to float - %2536 = fmul float %2534, %2535 - %2537 = fadd float %2536, 0.000000e+00 - %2538 = bitcast i32 %686 to float - %2539 = bitcast i32 %686 to float - %2540 = fmul float %2538, %2539 - %2541 = fadd float %2537, %2540 - %2542 = call float @llvm.sqrt.f32(float %2541) - %2543 = fneg float %692 - %2544 = fmul float %2542, %2543 - %2545 = bitcast i32 %52 to float - %2546 = fadd float %2545, %2544 - %2547 = bitcast i32 %52 to float - %2548 = bitcast i32 %52 to float - %2549 = fmul float %2547, %2548 - %2550 = fadd float %2549, 0.000000e+00 - %2551 = bitcast i32 %686 to float - %2552 = bitcast i32 %686 to float - %2553 = fmul float %2551, %2552 - %2554 = fadd float %2550, %2553 - %2555 = call float @llvm.sqrt.f32(float %2554) - %2556 = fneg float %692 - %2557 = fmul float %2555, %2556 - %2558 = bitcast i32 %52 to float - %2559 = fadd float %2558, %2557 - %2560 = fmul float %2546, %2559 - %2561 = fadd float %2560, 0.000000e+00 - %2562 = bitcast i32 %52 to float - %2563 = bitcast i32 %52 to float - %2564 = fmul float %2562, %2563 - %2565 = fadd float %2564, 0.000000e+00 - %2566 = bitcast i32 %686 to float - %2567 = bitcast i32 %686 to float - %2568 = fmul float %2566, %2567 - %2569 = fadd float %2565, %2568 - %2570 = call float @llvm.sqrt.f32(float %2569) - %2571 = fneg float %692 - %2572 = fmul float %2570, %2571 - %2573 = fmul float %2572, 0.000000e+00 - %2574 = bitcast i32 %686 to float - %2575 = fadd float %2574, %2573 - %2576 = bitcast i32 %52 to float - %2577 = bitcast i32 %52 to float - %2578 = fmul float %2576, %2577 - %2579 = fadd float %2578, 0.000000e+00 - %2580 = bitcast i32 %686 to float - %2581 = bitcast i32 %686 to float - %2582 = fmul float %2580, %2581 - %2583 = fadd float %2579, %2582 - %2584 = call float @llvm.sqrt.f32(float %2583) - %2585 = fneg float %692 - %2586 = fmul float %2584, %2585 - %2587 = fmul float %2586, 0.000000e+00 - %2588 = bitcast i32 %686 to float - %2589 = fadd float %2588, %2587 - %2590 = fmul float %2575, %2589 - %2591 = fadd float %2561, %2590 - %2592 = call float @llvm.sqrt.f32(float %2591) - %2593 = fadd float %2592, 0.000000e+00 - %2594 = fdiv float %2533, %2593 - %2595 = fmul float %2519, %2594 - %2596 = fsub float 1.000000e+00, %2595 - %2597 = load float, float* %44, align 4 - %2598 = fmul float %2596, %2597 - %2599 = fadd float %2443, %2598 - %2600 = insertelement <4 x float> zeroinitializer, float %2599, i32 0 - %2601 = insertelement <4 x float> %2600, float 0.000000e+00, i32 1 - %2602 = insertelement <4 x float> %2601, float 0.000000e+00, i32 2 - %2603 = insertelement <4 x float> %2602, float 0.000000e+00, i32 3 - %2604 = extractelement <4 x float> %2603, i32 0 - store float %2604, float* %2126, align 4 - %2605 = getelementptr float, float* %1, i32 0 - %2606 = getelementptr inbounds float, float* %2605, i64 2 - %2607 = bitcast float* %2606 to i32* - %2608 = load i32, i32* %2607, align 4 - %2609 = bitcast i32 %2608 to float - %2610 = insertelement <4 x float> zeroinitializer, float %2609, i32 0 - %2611 = getelementptr float, float* %1, i32 0 - %2612 = getelementptr inbounds float, float* %2611, i64 1 - %2613 = bitcast float* %2612 to i32* - %2614 = load i32, i32* %2613, align 4 - %2615 = bitcast i32 %2614 to float - %2616 = insertelement <4 x float> %2610, float %2615, i32 1 - %2617 = insertelement <4 x float> %2616, float 0.000000e+00, i32 2 - %2618 = insertelement <4 x float> %2617, float 0.000000e+00, i32 3 - %2619 = extractelement <4 x float> %2618, i32 0 - %2620 = bitcast i32* %2613 to float* - store float %2619, float* %2620, align 4 - %2621 = extractelement <4 x float> %2618, i32 1 - %2622 = bitcast i32* %2607 to float* - store float %2621, float* %2622, align 4 - ret void -} - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { -.preheader13: - %3 = bitcast float* %2 to i8* - %4 = bitcast float* %0 to i8* - %5 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false, i1 true, i1 false) - %6 = call i8* @__memcpy_chk(i8* %3, i8* %4, i64 16, i64 %5) #9 - %7 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 - %8 = bitcast i8* %7 to float* - store float 1.000000e+00, float* %8, align 4 - %9 = getelementptr inbounds i8, i8* %7, i64 8 - %10 = getelementptr inbounds i8, i8* %7, i64 12 - %11 = bitcast i8* %10 to float* - store float 1.000000e+00, float* %11, align 4 - %12 = bitcast float* %1 to i8* - %13 = call i64 @llvm.objectsize.i64.p0i8(i8* %12, i1 false, i1 true, i1 false) - %14 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 - %15 = bitcast i8* %14 to float* - %16 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 - %17 = bitcast i8* %16 to float* - %18 = bitcast float* %2 to i32* - %19 = load i32, i32* %18, align 4 - %20 = bitcast i8* %14 to i32* - store i32 %19, i32* %20, align 4 - %21 = bitcast i8* %7 to i32* - %22 = load i32, i32* %21, align 4 - %23 = bitcast i8* %16 to i32* - store i32 %22, i32* %23, align 4 - %24 = getelementptr inbounds float, float* %2, i64 2 - %25 = bitcast float* %24 to i32* - %26 = load i32, i32* %25, align 4 - %27 = getelementptr inbounds i8, i8* %14, i64 4 - %28 = bitcast i8* %27 to i32* - store i32 %26, i32* %28, align 4 - %29 = bitcast i8* %9 to i32* - %30 = load i32, i32* %29, align 4 - %31 = getelementptr inbounds i8, i8* %16, i64 4 - %32 = bitcast i8* %31 to i32* - store i32 %30, i32* %32, align 4 - %33 = load float, float* %15, align 4 - %34 = call float @no_opt_sgn(float %33) - %35 = fneg float %34 - %36 = call float @no_opt_naive_norm(float* nonnull %15, i32 2) - %37 = fmul float %36, %35 - %38 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 - %39 = bitcast i8* %38 to float* - %40 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 - %41 = load float, float* %15, align 4 - %42 = load float, float* %17, align 4 - %43 = fmul float %37, %42 - %44 = fadd float %41, %43 - store float %44, float* %39, align 4 - %45 = bitcast i8* %27 to float* - %46 = load float, float* %45, align 4 - %47 = bitcast i8* %31 to float* - %48 = load float, float* %47, align 4 - %49 = fmul float %37, %48 - %50 = fadd float %46, %49 - %51 = getelementptr inbounds i8, i8* %38, i64 4 - %52 = bitcast i8* %51 to float* - store float %50, float* %52, align 4 - %53 = bitcast i8* %40 to float* - %54 = call float @no_opt_naive_norm(float* nonnull %39, i32 2) - %55 = fadd float %54, 0x3EE4F8B580000000 - %56 = load float, float* %39, align 4 - %57 = fdiv float %56, %55 - store float %57, float* %53, align 4 - %58 = load float, float* %52, align 4 - %59 = fdiv float %58, %55 - %60 = getelementptr inbounds i8, i8* %40, i64 4 - %61 = bitcast i8* %60 to float* - store float %59, float* %61, align 4 - %62 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 - %63 = bitcast i8* %62 to float* - %64 = load float, float* %53, align 4 - %65 = fmul float %64, 2.000000e+00 - %66 = fmul float %65, %64 - %67 = fsub float 1.000000e+00, %66 - store float %67, float* %63, align 4 - %68 = load float, float* %53, align 4 - %69 = fmul float %68, 2.000000e+00 - %70 = load float, float* %61, align 4 - %71 = fmul float %69, %70 - %72 = fsub float 0.000000e+00, %71 - %73 = getelementptr inbounds i8, i8* %62, i64 4 - %74 = bitcast i8* %73 to float* - store float %72, float* %74, align 4 - %75 = load float, float* %61, align 4 - %76 = fmul float %75, 2.000000e+00 - %77 = load float, float* %53, align 4 - %78 = fmul float %76, %77 - %79 = fsub float 0.000000e+00, %78 - %80 = getelementptr inbounds i8, i8* %62, i64 8 - %81 = bitcast i8* %80 to float* - store float %79, float* %81, align 4 - %82 = load float, float* %61, align 4 - %83 = fmul float %82, 2.000000e+00 - %84 = fmul float %83, %82 - %85 = fsub float 1.000000e+00, %84 - %86 = getelementptr inbounds i8, i8* %62, i64 12 - %87 = bitcast i8* %86 to float* - store float %85, float* %87, align 4 - %88 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 - %89 = bitcast i8* %88 to float* - %90 = bitcast i8* %62 to i32* - %91 = load i32, i32* %90, align 4 - %92 = bitcast i8* %88 to i32* - store i32 %91, i32* %92, align 4 - %93 = bitcast i8* %73 to i32* - %94 = load i32, i32* %93, align 4 - %95 = getelementptr inbounds i8, i8* %88, i64 4 - %96 = bitcast i8* %95 to i32* - store i32 %94, i32* %96, align 4 - %97 = bitcast i8* %80 to i32* - %98 = load i32, i32* %97, align 4 - %99 = getelementptr inbounds i8, i8* %88, i64 8 - %100 = bitcast i8* %99 to i32* - store i32 %98, i32* %100, align 4 - %101 = bitcast i8* %86 to i32* - %102 = load i32, i32* %101, align 4 - %103 = getelementptr inbounds i8, i8* %88, i64 12 - %104 = bitcast i8* %103 to i32* - store i32 %102, i32* %104, align 4 - %105 = call i8* @__memcpy_chk(i8* %12, i8* %88, i64 16, i64 %13) #9 - call void @no_opt_naive_fixed_matrix_multiply(float* %89, float* %0, float* %2) - call void @free(i8* %14) - call void @free(i8* %16) - call void @free(i8* %38) - call void @free(i8* %40) - call void @free(i8* %62) - call void @free(i8* %88) - call void @no_opt_naive_fixed_transpose(float* %1) - ret void -} - -; Function Attrs: nounwind -declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 - -; Function Attrs: nounwind readnone speculatable willreturn -declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 - -; Function Attrs: allocsize(0,1) -declare i8* @calloc(i64, i64) #5 - -declare void @free(i8*) #6 - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #1 { -.preheader6: - %0 = alloca i64, align 8 - %1 = alloca [4 x float], align 16 - %2 = alloca [4 x float], align 16 - %3 = alloca [4 x float], align 16 - %4 = alloca [4 x float], align 16 - %5 = alloca [4 x float], align 16 - %6 = call i64 @time(i64* null) #9 - store i64 %6, i64* %0, align 8 - %7 = call i64 @time(i64* nonnull %0) #9 - %8 = trunc i64 %7 to i32 - call void @srand(i32 %8) #9 - %9 = call i32 @rand() #9 - %10 = sitofp i32 %9 to float - %11 = fdiv float %10, 0x41747AE140000000 - %12 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 - store float %11, float* %12, align 16 - %13 = call i32 @rand() #9 - %14 = sitofp i32 %13 to float - %15 = fdiv float %14, 0x41747AE140000000 - %16 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1 - store float %15, float* %16, align 4 - %17 = call i32 @rand() #9 - %18 = sitofp i32 %17 to float - %19 = fdiv float %18, 0x41747AE140000000 - %20 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2 - store float %19, float* %20, align 8 - %21 = call i32 @rand() #9 - %22 = sitofp i32 %21 to float - %23 = fdiv float %22, 0x41747AE140000000 - %24 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3 - store float %23, float* %24, align 4 - %25 = bitcast [4 x float]* %2 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %25, i8 0, i64 16, i1 false) - %26 = bitcast [4 x float]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %26, i8 0, i64 16, i1 false) - %27 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %28 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - call void @naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %27, float* nonnull %28) - %29 = bitcast [4 x float]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %29, i8 0, i64 16, i1 false) - %30 = bitcast [4 x float]* %5 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %30, i8 0, i64 16, i1 false) - %31 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 - %32 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 - call void @no_opt_naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %31, float* nonnull %32) - %33 = load float, float* %27, align 16 - %34 = fpext float %33 to double - %35 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %34) #9 - %36 = load float, float* %31, align 16 - %37 = fpext float %36 to double - %38 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %37) #9 - %39 = load float, float* %31, align 16 - %40 = load float, float* %27, align 16 - %41 = fsub float %39, %40 - %42 = call float @llvm.fabs.f32(float %41) - %43 = fcmp uge float %42, 0x3FB99999A0000000 - br i1 %43, label %58, label %44 - -44: ; preds = %.preheader6 - %45 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1 - %46 = load float, float* %45, align 4 - %47 = fpext float %46 to double - %48 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %47) #9 - %49 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 - %50 = load float, float* %49, align 4 - %51 = fpext float %50 to double - %52 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %51) #9 - %53 = load float, float* %31, align 16 - %54 = load float, float* %27, align 16 - %55 = fsub float %53, %54 - %56 = call float @llvm.fabs.f32(float %55) - %57 = fcmp uge float %56, 0x3FB99999A0000000 - br i1 %57, label %58, label %.preheader6.1 - -58: ; preds = %115, %.preheader6.1, %44, %.preheader6 - call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.2, i64 0, i64 0), i32 300, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.3, i64 0, i64 0)) #11 - unreachable - -59: ; preds = %.preheader5 - %60 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1 - %61 = load float, float* %60, align 4 - %62 = fpext float %61 to double - %63 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %62) #9 - %64 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 1 - %65 = load float, float* %64, align 4 - %66 = fpext float %65 to double - %67 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %66) #9 - %68 = load float, float* %32, align 16 - %69 = load float, float* %28, align 16 - %70 = fsub float %68, %69 - %71 = call float @llvm.fabs.f32(float %70) - %72 = fcmp uge float %71, 0x3FB99999A0000000 - br i1 %72, label %73, label %.preheader.1 - -73: ; preds = %.preheader5, %87, %.preheader.1, %59 - call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.2, i64 0, i64 0), i32 307, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.6, i64 0, i64 0)) #11 - unreachable - -.preheader.1: ; preds = %59 - %74 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2 - %75 = load float, float* %74, align 8 - %76 = fpext float %75 to double - %77 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %76) #9 - %78 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 2 - %79 = load float, float* %78, align 8 - %80 = fpext float %79 to double - %81 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %80) #9 - %82 = load float, float* %64, align 4 - %83 = load float, float* %60, align 4 - %84 = fsub float %82, %83 - %85 = call float @llvm.fabs.f32(float %84) - %86 = fcmp uge float %85, 0x3FB99999A0000000 - br i1 %86, label %73, label %87 - -87: ; preds = %.preheader.1 - %88 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3 - %89 = load float, float* %88, align 4 - %90 = fpext float %89 to double - %91 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %90) #9 - %92 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 3 - %93 = load float, float* %92, align 4 - %94 = fpext float %93 to double - %95 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %94) #9 - %96 = load float, float* %64, align 4 - %97 = load float, float* %60, align 4 - %98 = fsub float %96, %97 - %99 = call float @llvm.fabs.f32(float %98) - %100 = fcmp uge float %99, 0x3FB99999A0000000 - br i1 %100, label %73, label %101 - -101: ; preds = %87 - ret i32 0 - -.preheader6.1: ; preds = %44 - %102 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2 - %103 = load float, float* %102, align 8 - %104 = fpext float %103 to double - %105 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %104) #9 - %106 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 - %107 = load float, float* %106, align 8 - %108 = fpext float %107 to double - %109 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %108) #9 - %110 = load float, float* %49, align 4 - %111 = load float, float* %45, align 4 - %112 = fsub float %110, %111 - %113 = call float @llvm.fabs.f32(float %112) - %114 = fcmp uge float %113, 0x3FB99999A0000000 - br i1 %114, label %58, label %115 - -115: ; preds = %.preheader6.1 - %116 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3 - %117 = load float, float* %116, align 4 - %118 = fpext float %117 to double - %119 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %118) #9 - %120 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 - %121 = load float, float* %120, align 4 - %122 = fpext float %121 to double - %123 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %122) #9 - %124 = load float, float* %49, align 4 - %125 = load float, float* %45, align 4 - %126 = fsub float %124, %125 - %127 = call float @llvm.fabs.f32(float %126) - %128 = fcmp uge float %127, 0x3FB99999A0000000 - br i1 %128, label %58, label %.preheader5 - -.preheader5: ; preds = %115 - %129 = load float, float* %28, align 16 - %130 = fpext float %129 to double - %131 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %130) #9 - %132 = load float, float* %32, align 16 - %133 = fpext float %132 to double - %134 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %133) #9 - %135 = load float, float* %32, align 16 - %136 = load float, float* %28, align 16 - %137 = fsub float %135, %136 - %138 = call float @llvm.fabs.f32(float %137) - %139 = fcmp uge float %138, 0x3FB99999A0000000 - br i1 %139, label %73, label %59 -} - -declare i64 @time(i64*) #6 - -declare void @srand(i32) #6 - -declare i32 @rand() #6 - -declare i32 @printf(i8*, ...) #6 - -; Function Attrs: nounwind readnone speculatable willreturn -declare double @llvm.fabs.f64(double) #2 - -; Function Attrs: noreturn -declare void @__assert_rtn(i8*, i8*, i32, i8*) #7 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #8 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fabs.f32(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #2 - -attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone speculatable willreturn } -attributes #3 = { argmemonly nounwind willreturn writeonly } -attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #8 = { argmemonly nounwind willreturn } -attributes #9 = { nounwind } -attributes #10 = { nounwind allocsize(0,1) } -attributes #11 = { noreturn nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} -!3 = distinct !{!3, !4} -!4 = !{!"llvm.loop.unroll.disable"} -!5 = distinct !{!5, !4} diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/flaky-diospyros.ll b/src/dios-egraphs/Diospyros/flaky-outputs/flaky-diospyros.ll deleted file mode 100644 index 00e98758..00000000 --- a/src/dios-egraphs/Diospyros/flaky-outputs/flaky-diospyros.ll +++ /dev/null @@ -1,4260 +0,0 @@ -; ModuleID = 'build/aa.ll' -source_filename = "fail-tests/qr-decomp-local-arrays.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@.str = private unnamed_addr constant [14 x i8] c"Q Output: %f\0A\00", align 1 -@.str.1 = private unnamed_addr constant [23 x i8] c"Expected Q Output: %f\0A\00", align 1 -@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 -@.str.2 = private unnamed_addr constant [36 x i8] c"fail-tests/qr-decomp-local-arrays.c\00", align 1 -@.str.3 = private unnamed_addr constant [34 x i8] c"fabs(expectedQ[i] - Q[i]) < DELTA\00", align 1 -@.str.4 = private unnamed_addr constant [14 x i8] c"R Output: %f\0A\00", align 1 -@.str.5 = private unnamed_addr constant [23 x i8] c"Expected R Output: %f\0A\00", align 1 -@.str.6 = private unnamed_addr constant [34 x i8] c"fabs(expectedR[i] - R[i]) < DELTA\00", align 1 - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @sgn(float %0) #0 { - %2 = fcmp ogt float %0, 0.000000e+00 - %3 = zext i1 %2 to i32 - %4 = fcmp olt float %0, 0.000000e+00 - %.neg = sext i1 %4 to i32 - %5 = add nsw i32 %.neg, %3 - %6 = sitofp i32 %5 to float - ret float %6 -} - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_sgn(float %0) #1 { - %2 = fcmp ogt float %0, 0.000000e+00 - %3 = zext i1 %2 to i32 - %4 = fcmp olt float %0, 0.000000e+00 - %.neg = sext i1 %4 to i32 - %5 = add nsw i32 %.neg, %3 - %6 = sitofp i32 %5 to float - ret float %6 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define float @naive_norm(float* %0, i32 %1) #0 { - %3 = icmp sgt i32 %1, 0 - %smax = select i1 %3, i32 %1, i32 0 - %wide.trip.count = zext i32 %smax to i64 - br i1 %3, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %2 - %4 = add nsw i64 %wide.trip.count, -1 - %xtraiter = and i64 %wide.trip.count, 3 - %5 = icmp ult i64 %4, 3 - br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count, 2147483644 - br label %6 - -6: ; preds = %6, %.lr.ph.new - %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] - %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 - %8 = load float, float* %7, align 4 - %9 = fmul float %8, %8 - %10 = fadd float %.013, %9 - %indvars.iv.next = or i64 %indvars.iv2, 1 - %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next - %12 = load float, float* %11, align 4 - %13 = fmul float %12, %12 - %14 = fadd float %10, %13 - %indvars.iv.next.1 = or i64 %indvars.iv2, 2 - %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 - %16 = load float, float* %15, align 4 - %17 = fmul float %16, %16 - %18 = fadd float %14, %17 - %indvars.iv.next.2 = or i64 %indvars.iv2, 3 - %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 - %20 = load float, float* %19, align 4 - %21 = fmul float %20, %20 - %22 = fadd float %18, %21 - %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 - %niter.nsub.3 = add i64 %niter, -4 - %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 - br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 - -._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] - %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] - %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] - %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil - %24 = load float, float* %23, align 4 - %25 = fmul float %24, %24 - %26 = fadd float %.013.epil, %25 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !3 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 - %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] - %27 = call float @llvm.sqrt.f32(float %.01.lcssa) - ret float %27 -} - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32(float) #2 - -; Function Attrs: noinline nounwind ssp uwtable -define float @no_opt_naive_norm(float* %0, i32 %1) #1 { - %3 = icmp sgt i32 %1, 0 - %smax = select i1 %3, i32 %1, i32 0 - %wide.trip.count = zext i32 %smax to i64 - br i1 %3, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %2 - %4 = add nsw i64 %wide.trip.count, -1 - %xtraiter = and i64 %wide.trip.count, 3 - %5 = icmp ult i64 %4, 3 - br i1 %5, label %._crit_edge.unr-lcssa, label %.lr.ph.new - -.lr.ph.new: ; preds = %.lr.ph - %unroll_iter = and i64 %wide.trip.count, 2147483644 - br label %6 - -6: ; preds = %6, %.lr.ph.new - %.013 = phi float [ 0.000000e+00, %.lr.ph.new ], [ %22, %6 ] - %indvars.iv2 = phi i64 [ 0, %.lr.ph.new ], [ %indvars.iv.next.3, %6 ] - %niter = phi i64 [ %unroll_iter, %.lr.ph.new ], [ %niter.nsub.3, %6 ] - %7 = getelementptr inbounds float, float* %0, i64 %indvars.iv2 - %8 = load float, float* %7, align 4 - %9 = fmul float %8, %8 - %10 = fadd float %.013, %9 - %indvars.iv.next = or i64 %indvars.iv2, 1 - %11 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next - %12 = load float, float* %11, align 4 - %13 = fmul float %12, %12 - %14 = fadd float %10, %13 - %indvars.iv.next.1 = or i64 %indvars.iv2, 2 - %15 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.1 - %16 = load float, float* %15, align 4 - %17 = fmul float %16, %16 - %18 = fadd float %14, %17 - %indvars.iv.next.2 = or i64 %indvars.iv2, 3 - %19 = getelementptr inbounds float, float* %0, i64 %indvars.iv.next.2 - %20 = load float, float* %19, align 4 - %21 = fmul float %20, %20 - %22 = fadd float %18, %21 - %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv2, 4 - %niter.nsub.3 = add i64 %niter, -4 - %niter.ncmp.3.not = icmp eq i64 %niter.nsub.3, 0 - br i1 %niter.ncmp.3.not, label %._crit_edge.unr-lcssa, label %6 - -._crit_edge.unr-lcssa: ; preds = %6, %.lr.ph - %split.ph = phi float [ undef, %.lr.ph ], [ %22, %6 ] - %.013.unr = phi float [ 0.000000e+00, %.lr.ph ], [ %22, %6 ] - %indvars.iv2.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.3, %6 ] - %lcmp.mod.not = icmp eq i64 %xtraiter, 0 - br i1 %lcmp.mod.not, label %._crit_edge, label %.epil.preheader - -.epil.preheader: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa - %.013.epil = phi float [ %26, %.epil.preheader ], [ %.013.unr, %._crit_edge.unr-lcssa ] - %indvars.iv2.epil = phi i64 [ %indvars.iv.next.epil, %.epil.preheader ], [ %indvars.iv2.unr, %._crit_edge.unr-lcssa ] - %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %._crit_edge.unr-lcssa ] - %23 = getelementptr inbounds float, float* %0, i64 %indvars.iv2.epil - %24 = load float, float* %23, align 4 - %25 = fmul float %24, %24 - %26 = fadd float %.013.epil, %25 - %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv2.epil, 1 - %epil.iter.sub = add i64 %epil.iter, -1 - %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 - br i1 %epil.iter.cmp.not, label %._crit_edge, label %.epil.preheader, !llvm.loop !5 - -._crit_edge: ; preds = %.epil.preheader, %._crit_edge.unr-lcssa, %2 - %.01.lcssa = phi float [ 0.000000e+00, %2 ], [ %split.ph, %._crit_edge.unr-lcssa ], [ %26, %.epil.preheader ] - %27 = call float @llvm.sqrt.f32(float %.01.lcssa) - ret float %27 -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_transpose(float* %0) #0 { -.lr.ph: - %1 = getelementptr inbounds float, float* %0, i64 1 - %2 = bitcast float* %1 to i32* - %3 = load i32, i32* %2, align 4 - %4 = getelementptr inbounds float, float* %0, i64 2 - %5 = bitcast float* %4 to i32* - %6 = load i32, i32* %5, align 4 - store i32 %6, i32* %2, align 4 - store i32 %3, i32* %5, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_transpose(float* %0) #1 { -.lr.ph: - %1 = getelementptr inbounds float, float* %0, i64 1 - %2 = bitcast float* %1 to i32* - %3 = load i32, i32* %2, align 4 - %4 = getelementptr inbounds float, float* %0, i64 2 - %5 = bitcast float* %4 to i32* - %6 = load i32, i32* %5, align 4 - store i32 %6, i32* %2, align 4 - store i32 %3, i32* %5, align 4 - ret void -} - -; Function Attrs: alwaysinline nounwind ssp uwtable -define void @naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #0 { -.preheader: - %3 = load float, float* %0, align 4 - %4 = load float, float* %1, align 4 - %5 = fmul float %3, %4 - %6 = fadd float %5, 0.000000e+00 - %7 = getelementptr inbounds float, float* %0, i64 1 - %8 = load float, float* %7, align 4 - %9 = getelementptr inbounds float, float* %1, i64 2 - %10 = load float, float* %9, align 4 - %11 = fmul float %8, %10 - %12 = fadd float %6, %11 - %13 = getelementptr inbounds float, float* %2, i64 1 - %14 = load float, float* %0, align 4 - %15 = getelementptr inbounds float, float* %1, i64 1 - %16 = load float, float* %15, align 4 - %17 = fmul float %14, %16 - %18 = fadd float %17, 0.000000e+00 - %19 = load float, float* %7, align 4 - %20 = getelementptr inbounds float, float* %1, i64 3 - %21 = load float, float* %20, align 4 - %22 = fmul float %19, %21 - %23 = fadd float %18, %22 - %24 = getelementptr inbounds float, float* %0, i64 2 - %25 = getelementptr inbounds float, float* %2, i64 2 - %26 = load float, float* %24, align 4 - %27 = load float, float* %1, align 4 - %28 = fmul float %26, %27 - %29 = fadd float %28, 0.000000e+00 - %30 = getelementptr inbounds float, float* %0, i64 3 - %31 = load float, float* %30, align 4 - %32 = load float, float* %9, align 4 - %33 = fmul float %31, %32 - %34 = fadd float %29, %33 - %35 = getelementptr inbounds float, float* %2, i64 3 - %36 = load float, float* %24, align 4 - %37 = load float, float* %15, align 4 - %38 = fmul float %36, %37 - %39 = fadd float %38, 0.000000e+00 - %40 = load float, float* %30, align 4 - %41 = load float, float* %20, align 4 - %42 = fmul float %40, %41 - %43 = fadd float %39, %42 - store float 0.000000e+00, float* %2, align 4 - %44 = getelementptr float, float* %0, i32 0 - %45 = load float, float* %44, align 4 - %46 = insertelement <4 x float> zeroinitializer, float %45, i32 0 - %47 = insertelement <4 x float> %46, float 0.000000e+00, i32 1 - %48 = insertelement <4 x float> %47, float 0.000000e+00, i32 2 - %49 = insertelement <4 x float> %48, float 0.000000e+00, i32 3 - %50 = getelementptr float, float* %1, i32 0 - %51 = load float, float* %50, align 4 - %52 = insertelement <4 x float> zeroinitializer, float %51, i32 0 - %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 1 - %54 = insertelement <4 x float> %53, float 0.000000e+00, i32 2 - %55 = insertelement <4 x float> %54, float 0.000000e+00, i32 3 - %56 = call <4 x float> @llvm.fma.f32(<4 x float> %49, <4 x float> %55, <4 x float> zeroinitializer) - %57 = extractelement <4 x float> %56, i32 0 - store float %57, float* %2, align 4 - %58 = insertelement <4 x float> zeroinitializer, float %45, i32 0 - %59 = insertelement <4 x float> %58, float 1.000000e+00, i32 1 - %60 = insertelement <4 x float> %59, float 1.000000e+00, i32 2 - %61 = insertelement <4 x float> %60, float 1.000000e+00, i32 3 - %62 = getelementptr float, float* %1, i32 0 - %63 = load float, float* %62, align 4 - %64 = insertelement <4 x float> zeroinitializer, float %63, i32 0 - %65 = insertelement <4 x float> %64, float 0.000000e+00, i32 1 - %66 = insertelement <4 x float> %65, float 0.000000e+00, i32 2 - %67 = insertelement <4 x float> %66, float 0.000000e+00, i32 3 - %68 = fmul <4 x float> %61, %67 - %69 = fadd <4 x float> %68, zeroinitializer - %70 = getelementptr float, float* %0, i32 0 - %71 = getelementptr inbounds float, float* %70, i64 1 - %72 = load float, float* %71, align 4 - %73 = insertelement <4 x float> zeroinitializer, float %72, i32 0 - %74 = insertelement <4 x float> %73, float 0.000000e+00, i32 1 - %75 = insertelement <4 x float> %74, float 0.000000e+00, i32 2 - %76 = insertelement <4 x float> %75, float 0.000000e+00, i32 3 - %77 = getelementptr float, float* %1, i32 0 - %78 = getelementptr inbounds float, float* %77, i64 2 - %79 = load float, float* %78, align 4 - %80 = insertelement <4 x float> zeroinitializer, float %79, i32 0 - %81 = insertelement <4 x float> %80, float 0.000000e+00, i32 1 - %82 = insertelement <4 x float> %81, float 0.000000e+00, i32 2 - %83 = insertelement <4 x float> %82, float 0.000000e+00, i32 3 - %84 = call <4 x float> @llvm.fma.f32.1(<4 x float> %76, <4 x float> %83, <4 x float> %69) - %85 = extractelement <4 x float> %84, i32 0 - store float %85, float* %2, align 4 - %86 = extractelement <4 x float> %84, i32 1 - %87 = getelementptr float, float* %2, i32 0 - %88 = getelementptr inbounds float, float* %87, i64 1 - store float %86, float* %88, align 4 - %89 = getelementptr float, float* %0, i32 0 - %90 = load float, float* %89, align 4 - %91 = insertelement <4 x float> zeroinitializer, float %90, i32 0 - %92 = insertelement <4 x float> %91, float 0.000000e+00, i32 1 - %93 = insertelement <4 x float> %92, float 0.000000e+00, i32 2 - %94 = insertelement <4 x float> %93, float 0.000000e+00, i32 3 - %95 = getelementptr float, float* %1, i32 0 - %96 = getelementptr inbounds float, float* %95, i64 1 - %97 = load float, float* %96, align 4 - %98 = insertelement <4 x float> zeroinitializer, float %97, i32 0 - %99 = insertelement <4 x float> %98, float 0.000000e+00, i32 1 - %100 = insertelement <4 x float> %99, float 0.000000e+00, i32 2 - %101 = insertelement <4 x float> %100, float 0.000000e+00, i32 3 - %102 = call <4 x float> @llvm.fma.f32.2(<4 x float> %94, <4 x float> %101, <4 x float> zeroinitializer) - %103 = extractelement <4 x float> %102, i32 0 - %104 = getelementptr float, float* %2, i32 0 - %105 = getelementptr inbounds float, float* %104, i64 1 - store float %103, float* %105, align 4 - %106 = insertelement <4 x float> zeroinitializer, float %90, i32 0 - %107 = insertelement <4 x float> %106, float 1.000000e+00, i32 1 - %108 = insertelement <4 x float> %107, float 1.000000e+00, i32 2 - %109 = insertelement <4 x float> %108, float 1.000000e+00, i32 3 - %110 = load float, float* %96, align 4 - %111 = insertelement <4 x float> zeroinitializer, float %110, i32 0 - %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 1 - %113 = insertelement <4 x float> %112, float 0.000000e+00, i32 2 - %114 = insertelement <4 x float> %113, float 0.000000e+00, i32 3 - %115 = fmul <4 x float> %109, %114 - %116 = fadd <4 x float> %115, zeroinitializer - %117 = getelementptr float, float* %0, i32 0 - %118 = getelementptr inbounds float, float* %117, i64 1 - %119 = load float, float* %118, align 4 - %120 = insertelement <4 x float> zeroinitializer, float %119, i32 0 - %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 1 - %122 = insertelement <4 x float> %121, float 0.000000e+00, i32 2 - %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3 - %124 = getelementptr float, float* %1, i32 0 - %125 = getelementptr inbounds float, float* %124, i64 3 - %126 = load float, float* %125, align 4 - %127 = insertelement <4 x float> zeroinitializer, float %126, i32 0 - %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 1 - %129 = insertelement <4 x float> %128, float 0.000000e+00, i32 2 - %130 = insertelement <4 x float> %129, float 0.000000e+00, i32 3 - %131 = call <4 x float> @llvm.fma.f32.3(<4 x float> %123, <4 x float> %130, <4 x float> %116) - %132 = extractelement <4 x float> %131, i32 0 - %133 = getelementptr float, float* %2, i32 0 - %134 = getelementptr inbounds float, float* %133, i64 1 - store float %132, float* %134, align 4 - %135 = extractelement <4 x float> %131, i32 1 - %136 = getelementptr float, float* %2, i32 0 - %137 = getelementptr inbounds float, float* %136, i64 2 - store float %135, float* %137, align 4 - %138 = getelementptr float, float* %0, i32 0 - %139 = getelementptr inbounds float, float* %138, i64 2 - %140 = load float, float* %139, align 4 - %141 = insertelement <4 x float> zeroinitializer, float %140, i32 0 - %142 = insertelement <4 x float> %141, float 0.000000e+00, i32 1 - %143 = insertelement <4 x float> %142, float 0.000000e+00, i32 2 - %144 = insertelement <4 x float> %143, float 0.000000e+00, i32 3 - %145 = getelementptr float, float* %1, i32 0 - %146 = load float, float* %145, align 4 - %147 = insertelement <4 x float> zeroinitializer, float %146, i32 0 - %148 = insertelement <4 x float> %147, float 0.000000e+00, i32 1 - %149 = insertelement <4 x float> %148, float 0.000000e+00, i32 2 - %150 = insertelement <4 x float> %149, float 0.000000e+00, i32 3 - %151 = call <4 x float> @llvm.fma.f32.4(<4 x float> %144, <4 x float> %150, <4 x float> zeroinitializer) - %152 = extractelement <4 x float> %151, i32 0 - %153 = getelementptr float, float* %2, i32 0 - %154 = getelementptr inbounds float, float* %153, i64 2 - store float %152, float* %154, align 4 - %155 = insertelement <4 x float> zeroinitializer, float %140, i32 0 - %156 = insertelement <4 x float> %155, float 1.000000e+00, i32 1 - %157 = insertelement <4 x float> %156, float 1.000000e+00, i32 2 - %158 = insertelement <4 x float> %157, float 1.000000e+00, i32 3 - %159 = insertelement <4 x float> zeroinitializer, float %146, i32 0 - %160 = insertelement <4 x float> %159, float 0.000000e+00, i32 1 - %161 = insertelement <4 x float> %160, float 0.000000e+00, i32 2 - %162 = insertelement <4 x float> %161, float 0.000000e+00, i32 3 - %163 = fmul <4 x float> %158, %162 - %164 = fadd <4 x float> %163, zeroinitializer - %165 = getelementptr float, float* %0, i32 0 - %166 = getelementptr inbounds float, float* %165, i64 3 - %167 = load float, float* %166, align 4 - %168 = insertelement <4 x float> zeroinitializer, float %167, i32 0 - %169 = insertelement <4 x float> %168, float 0.000000e+00, i32 1 - %170 = insertelement <4 x float> %169, float 0.000000e+00, i32 2 - %171 = insertelement <4 x float> %170, float 0.000000e+00, i32 3 - %172 = load float, float* %78, align 4 - %173 = insertelement <4 x float> zeroinitializer, float %172, i32 0 - %174 = insertelement <4 x float> %173, float 0.000000e+00, i32 1 - %175 = insertelement <4 x float> %174, float 0.000000e+00, i32 2 - %176 = insertelement <4 x float> %175, float 0.000000e+00, i32 3 - %177 = call <4 x float> @llvm.fma.f32.5(<4 x float> %171, <4 x float> %176, <4 x float> %164) - %178 = extractelement <4 x float> %177, i32 0 - store float %178, float* %154, align 4 - %179 = extractelement <4 x float> %177, i32 1 - %180 = getelementptr float, float* %2, i32 0 - %181 = getelementptr inbounds float, float* %180, i64 3 - store float %179, float* %181, align 4 - %182 = load float, float* %139, align 4 - %183 = insertelement <4 x float> zeroinitializer, float %182, i32 0 - %184 = insertelement <4 x float> %183, float 0.000000e+00, i32 1 - %185 = insertelement <4 x float> %184, float 0.000000e+00, i32 2 - %186 = insertelement <4 x float> %185, float 0.000000e+00, i32 3 - %187 = load float, float* %96, align 4 - %188 = insertelement <4 x float> zeroinitializer, float %187, i32 0 - %189 = insertelement <4 x float> %188, float 0.000000e+00, i32 1 - %190 = insertelement <4 x float> %189, float 0.000000e+00, i32 2 - %191 = insertelement <4 x float> %190, float 0.000000e+00, i32 3 - %192 = call <4 x float> @llvm.fma.f32.6(<4 x float> %186, <4 x float> %191, <4 x float> zeroinitializer) - %193 = extractelement <4 x float> %192, i32 0 - store float %193, float* %181, align 4 - %194 = insertelement <4 x float> zeroinitializer, float %182, i32 0 - %195 = insertelement <4 x float> %194, float 1.000000e+00, i32 1 - %196 = insertelement <4 x float> %195, float 1.000000e+00, i32 2 - %197 = insertelement <4 x float> %196, float 1.000000e+00, i32 3 - %198 = insertelement <4 x float> zeroinitializer, float %187, i32 0 - %199 = insertelement <4 x float> %198, float 0.000000e+00, i32 1 - %200 = insertelement <4 x float> %199, float 0.000000e+00, i32 2 - %201 = insertelement <4 x float> %200, float 0.000000e+00, i32 3 - %202 = fmul <4 x float> %197, %201 - %203 = fadd <4 x float> %202, zeroinitializer - %204 = getelementptr float, float* %0, i32 0 - %205 = getelementptr inbounds float, float* %204, i64 3 - %206 = load float, float* %205, align 4 - %207 = insertelement <4 x float> zeroinitializer, float %206, i32 0 - %208 = insertelement <4 x float> %207, float 0.000000e+00, i32 1 - %209 = insertelement <4 x float> %208, float 0.000000e+00, i32 2 - %210 = insertelement <4 x float> %209, float 0.000000e+00, i32 3 - %211 = load float, float* %125, align 4 - %212 = insertelement <4 x float> zeroinitializer, float %211, i32 0 - %213 = insertelement <4 x float> %212, float 0.000000e+00, i32 1 - %214 = insertelement <4 x float> %213, float 0.000000e+00, i32 2 - %215 = insertelement <4 x float> %214, float 0.000000e+00, i32 3 - %216 = call <4 x float> @llvm.fma.f32.7(<4 x float> %210, <4 x float> %215, <4 x float> %203) - %217 = extractelement <4 x float> %216, i32 0 - store float %217, float* %181, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_matrix_multiply(float* %0, float* %1, float* %2) #1 { -.preheader: - store float 0.000000e+00, float* %2, align 4 - %3 = load float, float* %0, align 4 - %4 = load float, float* %1, align 4 - %5 = fmul float %3, %4 - %6 = fadd float %5, 0.000000e+00 - store float %6, float* %2, align 4 - %7 = getelementptr inbounds float, float* %0, i64 1 - %8 = load float, float* %7, align 4 - %9 = getelementptr inbounds float, float* %1, i64 2 - %10 = load float, float* %9, align 4 - %11 = fmul float %8, %10 - %12 = fadd float %6, %11 - store float %12, float* %2, align 4 - %13 = getelementptr inbounds float, float* %2, i64 1 - store float 0.000000e+00, float* %13, align 4 - %14 = load float, float* %0, align 4 - %15 = getelementptr inbounds float, float* %1, i64 1 - %16 = load float, float* %15, align 4 - %17 = fmul float %14, %16 - %18 = fadd float %17, 0.000000e+00 - store float %18, float* %13, align 4 - %19 = load float, float* %7, align 4 - %20 = getelementptr inbounds float, float* %1, i64 3 - %21 = load float, float* %20, align 4 - %22 = fmul float %19, %21 - %23 = fadd float %18, %22 - store float %23, float* %13, align 4 - %24 = getelementptr inbounds float, float* %0, i64 2 - %25 = getelementptr inbounds float, float* %2, i64 2 - store float 0.000000e+00, float* %25, align 4 - %26 = load float, float* %24, align 4 - %27 = load float, float* %1, align 4 - %28 = fmul float %26, %27 - %29 = fadd float %28, 0.000000e+00 - store float %29, float* %25, align 4 - %30 = getelementptr inbounds float, float* %0, i64 3 - %31 = load float, float* %30, align 4 - %32 = load float, float* %9, align 4 - %33 = fmul float %31, %32 - %34 = fadd float %29, %33 - store float %34, float* %25, align 4 - %35 = getelementptr inbounds float, float* %2, i64 3 - store float 0.000000e+00, float* %35, align 4 - %36 = load float, float* %24, align 4 - %37 = load float, float* %15, align 4 - %38 = fmul float %36, %37 - %39 = fadd float %38, 0.000000e+00 - store float %39, float* %35, align 4 - %40 = load float, float* %30, align 4 - %41 = load float, float* %20, align 4 - %42 = fmul float %40, %41 - %43 = fadd float %39, %42 - store float %43, float* %35, align 4 - ret void -} - -; Function Attrs: noinline nounwind ssp uwtable -define void @naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { -.preheader49: - %3 = bitcast float* %1 to i8* - %4 = alloca [4 x float], align 16 - %5 = bitcast [4 x float]* %4 to i8* - %6 = bitcast float* %0 to i32* - %7 = load i32, i32* %6, align 4 - %8 = bitcast float* %2 to i32* - %9 = getelementptr inbounds float, float* %0, i64 1 - %10 = bitcast float* %9 to i32* - %11 = load i32, i32* %10, align 4 - %12 = getelementptr inbounds float, float* %2, i64 1 - %13 = bitcast float* %12 to i32* - %14 = getelementptr inbounds float, float* %0, i64 2 - %15 = bitcast float* %14 to i32* - %16 = load i32, i32* %15, align 4 - %17 = getelementptr inbounds float, float* %2, i64 2 - %18 = bitcast float* %17 to i32* - %19 = getelementptr inbounds float, float* %0, i64 3 - %20 = bitcast float* %19 to i32* - %21 = load i32, i32* %20, align 4 - %22 = getelementptr inbounds float, float* %2, i64 3 - %23 = bitcast float* %22 to i32* - %24 = bitcast i32 %7 to float - %25 = fcmp ogt float %24, 0.000000e+00 - %26 = zext i1 %25 to i32 - %27 = fcmp olt float %24, 0.000000e+00 - %.neg = sext i1 %27 to i32 - %28 = add nsw i32 %.neg, %26 - %29 = sitofp i32 %28 to float - %30 = fmul float %24, %24 - %31 = fadd float %30, 0.000000e+00 - %32 = bitcast i32 %16 to float - %33 = fmul float %32, %32 - %34 = fadd float %31, %33 - %35 = call float @llvm.sqrt.f32(float %34) #9 - %36 = fneg float %29 - %37 = fmul float %35, %36 - %38 = fadd float %24, %37 - %39 = fmul float %37, 0.000000e+00 - %40 = fadd float %32, %39 - %41 = fmul float %38, %38 - %42 = fadd float %41, 0.000000e+00 - %43 = fmul float %40, %40 - %44 = fadd float %42, %43 - %45 = call float @llvm.sqrt.f32(float %44) #9 - %46 = fadd float %45, 0x3EE4F8B580000000 - %47 = fdiv float %38, %46 - %48 = fdiv float %40, %46 - %49 = fmul float %47, 2.000000e+00 - %50 = fmul float %49, %47 - %51 = fsub float 1.000000e+00, %50 - %52 = fmul float %49, %48 - %53 = fsub float 0.000000e+00, %52 - %54 = fmul float %48, 2.000000e+00 - %55 = fmul float %54, %47 - %56 = fsub float 0.000000e+00, %55 - %57 = fmul float %54, %48 - %58 = fsub float 1.000000e+00, %57 - %59 = bitcast float %51 to i32 - %60 = bitcast [4 x float]* %4 to i32* - %61 = bitcast float %53 to i32 - %62 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 - %63 = bitcast float* %62 to i32* - %64 = bitcast float %56 to i32 - %65 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 - %66 = bitcast float* %65 to i32* - %67 = bitcast float %58 to i32 - %68 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 - %69 = bitcast float* %68 to i32* - %70 = load float, float* %0, align 4 - %71 = fmul float %51, %70 - %72 = fadd float %71, 0.000000e+00 - %73 = load float, float* %14, align 4 - %74 = fmul float %53, %73 - %75 = fadd float %72, %74 - %76 = load float, float* %9, align 4 - %77 = fmul float %51, %76 - %78 = fadd float %77, 0.000000e+00 - %79 = load float, float* %19, align 4 - %80 = fmul float %53, %79 - %81 = fadd float %78, %80 - %82 = load float, float* %0, align 4 - %83 = fmul float %56, %82 - %84 = fadd float %83, 0.000000e+00 - %85 = load float, float* %14, align 4 - %86 = fmul float %58, %85 - %87 = fadd float %84, %86 - %88 = load float, float* %9, align 4 - %89 = fmul float %56, %88 - %90 = fadd float %89, 0.000000e+00 - %91 = load float, float* %19, align 4 - %92 = fmul float %58, %91 - %93 = fadd float %90, %92 - %94 = getelementptr inbounds float, float* %1, i64 1 - %95 = bitcast float* %94 to i32* - %96 = load i32, i32* %95, align 4 - %97 = getelementptr inbounds float, float* %1, i64 2 - %98 = bitcast float* %97 to i32* - %99 = load i32, i32* %98, align 4 - %100 = getelementptr float, float* %0, i32 0 - %101 = bitcast float* %100 to i32* - %102 = load i32, i32* %101, align 4 - %103 = bitcast i32 %102 to float - %104 = insertelement <4 x float> zeroinitializer, float %103, i32 0 - %105 = insertelement <4 x float> %104, float 0.000000e+00, i32 1 - %106 = insertelement <4 x float> %105, float 0.000000e+00, i32 2 - %107 = insertelement <4 x float> %106, float 0.000000e+00, i32 3 - %108 = extractelement <4 x float> %107, i32 0 - %109 = bitcast i32* %8 to float* - %110 = getelementptr float, float* %2, i32 0 - %111 = bitcast float* %110 to i32* - %112 = bitcast i32* %111 to float* - store float %108, float* %112, align 4 - %113 = getelementptr float, float* %0, i32 0 - %114 = getelementptr inbounds float, float* %113, i64 1 - %115 = bitcast float* %114 to i32* - %116 = load i32, i32* %115, align 4 - %117 = bitcast i32 %116 to float - %118 = insertelement <4 x float> zeroinitializer, float %117, i32 0 - %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 1 - %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 2 - %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 3 - %122 = extractelement <4 x float> %121, i32 0 - %123 = bitcast i32* %13 to float* - %124 = getelementptr float, float* %2, i32 0 - %125 = getelementptr inbounds float, float* %124, i64 1 - %126 = bitcast float* %125 to i32* - %127 = bitcast i32* %126 to float* - store float %122, float* %127, align 4 - %128 = getelementptr float, float* %0, i32 0 - %129 = getelementptr inbounds float, float* %128, i64 2 - %130 = bitcast float* %129 to i32* - %131 = load i32, i32* %130, align 4 - %132 = bitcast i32 %131 to float - %133 = insertelement <4 x float> zeroinitializer, float %132, i32 0 - %134 = insertelement <4 x float> %133, float 0.000000e+00, i32 1 - %135 = insertelement <4 x float> %134, float 0.000000e+00, i32 2 - %136 = insertelement <4 x float> %135, float 0.000000e+00, i32 3 - %137 = extractelement <4 x float> %136, i32 0 - %138 = bitcast i32* %18 to float* - %139 = getelementptr float, float* %2, i32 0 - %140 = getelementptr inbounds float, float* %139, i64 2 - %141 = bitcast float* %140 to i32* - %142 = bitcast i32* %141 to float* - store float %137, float* %142, align 4 - %143 = getelementptr float, float* %0, i32 0 - %144 = getelementptr inbounds float, float* %143, i64 3 - %145 = bitcast float* %144 to i32* - %146 = load i32, i32* %145, align 4 - %147 = bitcast i32 %146 to float - %148 = fneg float %147 - %149 = insertelement <4 x float> zeroinitializer, float %148, i32 0 - %150 = getelementptr float, float* %0, i32 0 - %151 = bitcast float* %150 to i32* - %152 = load i32, i32* %151, align 4 - %153 = bitcast i32 %152 to float - %154 = bitcast i32 %152 to float - %155 = fmul float %153, %154 - %156 = fadd float %155, 0.000000e+00 - %157 = bitcast i32 %131 to float - %158 = bitcast i32 %131 to float - %159 = fmul float %157, %158 - %160 = fadd float %156, %159 - %161 = call float @llvm.sqrt.f32.8(float %160) - %162 = bitcast i32 %152 to float - %163 = fcmp olt float %162, 0.000000e+00 - %164 = sext i1 %163 to i32 - %165 = fcmp ogt float %162, 0.000000e+00 - %166 = zext i1 %165 to i32 - %167 = add nsw i32 %164, %166 - %168 = sitofp i32 %167 to float - %169 = fneg float %168 - %170 = fmul float %161, %169 - %171 = bitcast i32 %152 to float - %172 = fadd float %171, %170 - %173 = bitcast i32 %152 to float - %174 = bitcast i32 %152 to float - %175 = fmul float %173, %174 - %176 = fadd float %175, 0.000000e+00 - %177 = bitcast i32 %131 to float - %178 = bitcast i32 %131 to float - %179 = fmul float %177, %178 - %180 = fadd float %176, %179 - %181 = call float @llvm.sqrt.f32.9(float %180) - %182 = fneg float %168 - %183 = fmul float %181, %182 - %184 = bitcast i32 %152 to float - %185 = fadd float %184, %183 - %186 = bitcast i32 %152 to float - %187 = bitcast i32 %152 to float - %188 = fmul float %186, %187 - %189 = fadd float %188, 0.000000e+00 - %190 = bitcast i32 %131 to float - %191 = bitcast i32 %131 to float - %192 = fmul float %190, %191 - %193 = fadd float %189, %192 - %194 = call float @llvm.sqrt.f32.10(float %193) - %195 = fneg float %168 - %196 = fmul float %194, %195 - %197 = bitcast i32 %152 to float - %198 = fadd float %197, %196 - %199 = fmul float %185, %198 - %200 = fadd float %199, 0.000000e+00 - %201 = bitcast i32 %152 to float - %202 = bitcast i32 %152 to float - %203 = fmul float %201, %202 - %204 = fadd float %203, 0.000000e+00 - %205 = bitcast i32 %131 to float - %206 = bitcast i32 %131 to float - %207 = fmul float %205, %206 - %208 = fadd float %204, %207 - %209 = call float @llvm.sqrt.f32.11(float %208) - %210 = fneg float %168 - %211 = fmul float %209, %210 - %212 = fmul float %211, 0.000000e+00 - %213 = bitcast i32 %131 to float - %214 = fadd float %213, %212 - %215 = bitcast i32 %152 to float - %216 = bitcast i32 %152 to float - %217 = fmul float %215, %216 - %218 = fadd float %217, 0.000000e+00 - %219 = bitcast i32 %131 to float - %220 = bitcast i32 %131 to float - %221 = fmul float %219, %220 - %222 = fadd float %218, %221 - %223 = call float @llvm.sqrt.f32.12(float %222) - %224 = fneg float %168 - %225 = fmul float %223, %224 - %226 = fmul float %225, 0.000000e+00 - %227 = bitcast i32 %131 to float - %228 = fadd float %227, %226 - %229 = fmul float %214, %228 - %230 = fadd float %200, %229 - %231 = call float @llvm.sqrt.f32.13(float %230) - %232 = fadd float %231, 0.000000e+00 - %233 = fdiv float %172, %232 - %234 = fmul float %233, 2.000000e+00 - %235 = bitcast i32 %152 to float - %236 = bitcast i32 %152 to float - %237 = fmul float %235, %236 - %238 = fadd float %237, 0.000000e+00 - %239 = bitcast i32 %131 to float - %240 = bitcast i32 %131 to float - %241 = fmul float %239, %240 - %242 = fadd float %238, %241 - %243 = call float @llvm.sqrt.f32.14(float %242) - %244 = fneg float %168 - %245 = fmul float %243, %244 - %246 = bitcast i32 %152 to float - %247 = fadd float %246, %245 - %248 = bitcast i32 %152 to float - %249 = bitcast i32 %152 to float - %250 = fmul float %248, %249 - %251 = fadd float %250, 0.000000e+00 - %252 = bitcast i32 %131 to float - %253 = bitcast i32 %131 to float - %254 = fmul float %252, %253 - %255 = fadd float %251, %254 - %256 = call float @llvm.sqrt.f32.15(float %255) - %257 = fneg float %168 - %258 = fmul float %256, %257 - %259 = bitcast i32 %152 to float - %260 = fadd float %259, %258 - %261 = bitcast i32 %152 to float - %262 = bitcast i32 %152 to float - %263 = fmul float %261, %262 - %264 = fadd float %263, 0.000000e+00 - %265 = bitcast i32 %131 to float - %266 = bitcast i32 %131 to float - %267 = fmul float %265, %266 - %268 = fadd float %264, %267 - %269 = call float @llvm.sqrt.f32.16(float %268) - %270 = fneg float %168 - %271 = fmul float %269, %270 - %272 = bitcast i32 %152 to float - %273 = fadd float %272, %271 - %274 = fmul float %260, %273 - %275 = fadd float %274, 0.000000e+00 - %276 = bitcast i32 %152 to float - %277 = bitcast i32 %152 to float - %278 = fmul float %276, %277 - %279 = fadd float %278, 0.000000e+00 - %280 = bitcast i32 %131 to float - %281 = bitcast i32 %131 to float - %282 = fmul float %280, %281 - %283 = fadd float %279, %282 - %284 = call float @llvm.sqrt.f32.17(float %283) - %285 = fneg float %168 - %286 = fmul float %284, %285 - %287 = fmul float %286, 0.000000e+00 - %288 = bitcast i32 %131 to float - %289 = fadd float %288, %287 - %290 = bitcast i32 %152 to float - %291 = bitcast i32 %152 to float - %292 = fmul float %290, %291 - %293 = fadd float %292, 0.000000e+00 - %294 = bitcast i32 %131 to float - %295 = bitcast i32 %131 to float - %296 = fmul float %294, %295 - %297 = fadd float %293, %296 - %298 = call float @llvm.sqrt.f32.18(float %297) - %299 = fneg float %168 - %300 = fmul float %298, %299 - %301 = fmul float %300, 0.000000e+00 - %302 = bitcast i32 %131 to float - %303 = fadd float %302, %301 - %304 = fmul float %289, %303 - %305 = fadd float %275, %304 - %306 = call float @llvm.sqrt.f32.19(float %305) - %307 = fadd float %306, 0.000000e+00 - %308 = fdiv float %247, %307 - %309 = fmul float %234, %308 - %310 = insertelement <4 x float> %149, float %309, i32 1 - %311 = bitcast i32 %152 to float - %312 = bitcast i32 %152 to float - %313 = fmul float %311, %312 - %314 = fadd float %313, 0.000000e+00 - %315 = bitcast i32 %131 to float - %316 = bitcast i32 %131 to float - %317 = fmul float %315, %316 - %318 = fadd float %314, %317 - %319 = call float @llvm.sqrt.f32.20(float %318) - %320 = fneg float %168 - %321 = fmul float %319, %320 - %322 = bitcast i32 %152 to float - %323 = fadd float %322, %321 - %324 = bitcast i32 %152 to float - %325 = bitcast i32 %152 to float - %326 = fmul float %324, %325 - %327 = fadd float %326, 0.000000e+00 - %328 = bitcast i32 %131 to float - %329 = bitcast i32 %131 to float - %330 = fmul float %328, %329 - %331 = fadd float %327, %330 - %332 = call float @llvm.sqrt.f32.21(float %331) - %333 = fneg float %168 - %334 = fmul float %332, %333 - %335 = bitcast i32 %152 to float - %336 = fadd float %335, %334 - %337 = bitcast i32 %152 to float - %338 = bitcast i32 %152 to float - %339 = fmul float %337, %338 - %340 = fadd float %339, 0.000000e+00 - %341 = bitcast i32 %131 to float - %342 = bitcast i32 %131 to float - %343 = fmul float %341, %342 - %344 = fadd float %340, %343 - %345 = call float @llvm.sqrt.f32.22(float %344) - %346 = fneg float %168 - %347 = fmul float %345, %346 - %348 = bitcast i32 %152 to float - %349 = fadd float %348, %347 - %350 = fmul float %336, %349 - %351 = fadd float %350, 0.000000e+00 - %352 = bitcast i32 %152 to float - %353 = bitcast i32 %152 to float - %354 = fmul float %352, %353 - %355 = fadd float %354, 0.000000e+00 - %356 = bitcast i32 %131 to float - %357 = bitcast i32 %131 to float - %358 = fmul float %356, %357 - %359 = fadd float %355, %358 - %360 = call float @llvm.sqrt.f32.23(float %359) - %361 = fneg float %168 - %362 = fmul float %360, %361 - %363 = fmul float %362, 0.000000e+00 - %364 = bitcast i32 %131 to float - %365 = fadd float %364, %363 - %366 = bitcast i32 %152 to float - %367 = bitcast i32 %152 to float - %368 = fmul float %366, %367 - %369 = fadd float %368, 0.000000e+00 - %370 = bitcast i32 %131 to float - %371 = bitcast i32 %131 to float - %372 = fmul float %370, %371 - %373 = fadd float %369, %372 - %374 = call float @llvm.sqrt.f32.24(float %373) - %375 = fneg float %168 - %376 = fmul float %374, %375 - %377 = fmul float %376, 0.000000e+00 - %378 = bitcast i32 %131 to float - %379 = fadd float %378, %377 - %380 = fmul float %365, %379 - %381 = fadd float %351, %380 - %382 = call float @llvm.sqrt.f32.25(float %381) - %383 = fadd float %382, 0.000000e+00 - %384 = fdiv float %323, %383 - %385 = fmul float %384, 2.000000e+00 - %386 = bitcast i32 %152 to float - %387 = bitcast i32 %152 to float - %388 = fmul float %386, %387 - %389 = fadd float %388, 0.000000e+00 - %390 = bitcast i32 %131 to float - %391 = bitcast i32 %131 to float - %392 = fmul float %390, %391 - %393 = fadd float %389, %392 - %394 = call float @llvm.sqrt.f32.26(float %393) - %395 = fneg float %168 - %396 = fmul float %394, %395 - %397 = fmul float %396, 0.000000e+00 - %398 = bitcast i32 %131 to float - %399 = fadd float %398, %397 - %400 = bitcast i32 %152 to float - %401 = bitcast i32 %152 to float - %402 = fmul float %400, %401 - %403 = fadd float %402, 0.000000e+00 - %404 = bitcast i32 %131 to float - %405 = bitcast i32 %131 to float - %406 = fmul float %404, %405 - %407 = fadd float %403, %406 - %408 = call float @llvm.sqrt.f32.27(float %407) - %409 = fneg float %168 - %410 = fmul float %408, %409 - %411 = bitcast i32 %152 to float - %412 = fadd float %411, %410 - %413 = bitcast i32 %152 to float - %414 = bitcast i32 %152 to float - %415 = fmul float %413, %414 - %416 = fadd float %415, 0.000000e+00 - %417 = bitcast i32 %131 to float - %418 = bitcast i32 %131 to float - %419 = fmul float %417, %418 - %420 = fadd float %416, %419 - %421 = call float @llvm.sqrt.f32.28(float %420) - %422 = fneg float %168 - %423 = fmul float %421, %422 - %424 = bitcast i32 %152 to float - %425 = fadd float %424, %423 - %426 = fmul float %412, %425 - %427 = fadd float %426, 0.000000e+00 - %428 = bitcast i32 %152 to float - %429 = bitcast i32 %152 to float - %430 = fmul float %428, %429 - %431 = fadd float %430, 0.000000e+00 - %432 = bitcast i32 %131 to float - %433 = bitcast i32 %131 to float - %434 = fmul float %432, %433 - %435 = fadd float %431, %434 - %436 = call float @llvm.sqrt.f32.29(float %435) - %437 = fneg float %168 - %438 = fmul float %436, %437 - %439 = fmul float %438, 0.000000e+00 - %440 = bitcast i32 %131 to float - %441 = fadd float %440, %439 - %442 = bitcast i32 %152 to float - %443 = bitcast i32 %152 to float - %444 = fmul float %442, %443 - %445 = fadd float %444, 0.000000e+00 - %446 = bitcast i32 %131 to float - %447 = bitcast i32 %131 to float - %448 = fmul float %446, %447 - %449 = fadd float %445, %448 - %450 = call float @llvm.sqrt.f32.30(float %449) - %451 = fneg float %168 - %452 = fmul float %450, %451 - %453 = fmul float %452, 0.000000e+00 - %454 = bitcast i32 %131 to float - %455 = fadd float %454, %453 - %456 = fmul float %441, %455 - %457 = fadd float %427, %456 - %458 = call float @llvm.sqrt.f32.31(float %457) - %459 = fadd float %458, 0.000000e+00 - %460 = fdiv float %399, %459 - %461 = fmul float %385, %460 - %462 = insertelement <4 x float> %310, float %461, i32 2 - %463 = bitcast i32 %152 to float - %464 = bitcast i32 %152 to float - %465 = fmul float %463, %464 - %466 = fadd float %465, 0.000000e+00 - %467 = bitcast i32 %131 to float - %468 = bitcast i32 %131 to float - %469 = fmul float %467, %468 - %470 = fadd float %466, %469 - %471 = call float @llvm.sqrt.f32.32(float %470) - %472 = fneg float %168 - %473 = fmul float %471, %472 - %474 = fmul float %473, 0.000000e+00 - %475 = bitcast i32 %131 to float - %476 = fadd float %475, %474 - %477 = bitcast i32 %152 to float - %478 = bitcast i32 %152 to float - %479 = fmul float %477, %478 - %480 = fadd float %479, 0.000000e+00 - %481 = bitcast i32 %131 to float - %482 = bitcast i32 %131 to float - %483 = fmul float %481, %482 - %484 = fadd float %480, %483 - %485 = call float @llvm.sqrt.f32.33(float %484) - %486 = fneg float %168 - %487 = fmul float %485, %486 - %488 = bitcast i32 %152 to float - %489 = fadd float %488, %487 - %490 = bitcast i32 %152 to float - %491 = bitcast i32 %152 to float - %492 = fmul float %490, %491 - %493 = fadd float %492, 0.000000e+00 - %494 = bitcast i32 %131 to float - %495 = bitcast i32 %131 to float - %496 = fmul float %494, %495 - %497 = fadd float %493, %496 - %498 = call float @llvm.sqrt.f32.34(float %497) - %499 = fneg float %168 - %500 = fmul float %498, %499 - %501 = bitcast i32 %152 to float - %502 = fadd float %501, %500 - %503 = fmul float %489, %502 - %504 = fadd float %503, 0.000000e+00 - %505 = bitcast i32 %152 to float - %506 = bitcast i32 %152 to float - %507 = fmul float %505, %506 - %508 = fadd float %507, 0.000000e+00 - %509 = bitcast i32 %131 to float - %510 = bitcast i32 %131 to float - %511 = fmul float %509, %510 - %512 = fadd float %508, %511 - %513 = call float @llvm.sqrt.f32.35(float %512) - %514 = fneg float %168 - %515 = fmul float %513, %514 - %516 = fmul float %515, 0.000000e+00 - %517 = bitcast i32 %131 to float - %518 = fadd float %517, %516 - %519 = bitcast i32 %152 to float - %520 = bitcast i32 %152 to float - %521 = fmul float %519, %520 - %522 = fadd float %521, 0.000000e+00 - %523 = bitcast i32 %131 to float - %524 = bitcast i32 %131 to float - %525 = fmul float %523, %524 - %526 = fadd float %522, %525 - %527 = call float @llvm.sqrt.f32.36(float %526) - %528 = fneg float %168 - %529 = fmul float %527, %528 - %530 = fmul float %529, 0.000000e+00 - %531 = bitcast i32 %131 to float - %532 = fadd float %531, %530 - %533 = fmul float %518, %532 - %534 = fadd float %504, %533 - %535 = call float @llvm.sqrt.f32.37(float %534) - %536 = fadd float %535, 0.000000e+00 - %537 = fdiv float %476, %536 - %538 = fmul float %537, 2.000000e+00 - %539 = bitcast i32 %152 to float - %540 = bitcast i32 %152 to float - %541 = fmul float %539, %540 - %542 = fadd float %541, 0.000000e+00 - %543 = bitcast i32 %131 to float - %544 = bitcast i32 %131 to float - %545 = fmul float %543, %544 - %546 = fadd float %542, %545 - %547 = call float @llvm.sqrt.f32.38(float %546) - %548 = fneg float %168 - %549 = fmul float %547, %548 - %550 = bitcast i32 %152 to float - %551 = fadd float %550, %549 - %552 = bitcast i32 %152 to float - %553 = bitcast i32 %152 to float - %554 = fmul float %552, %553 - %555 = fadd float %554, 0.000000e+00 - %556 = bitcast i32 %131 to float - %557 = bitcast i32 %131 to float - %558 = fmul float %556, %557 - %559 = fadd float %555, %558 - %560 = call float @llvm.sqrt.f32.39(float %559) - %561 = fneg float %168 - %562 = fmul float %560, %561 - %563 = bitcast i32 %152 to float - %564 = fadd float %563, %562 - %565 = bitcast i32 %152 to float - %566 = bitcast i32 %152 to float - %567 = fmul float %565, %566 - %568 = fadd float %567, 0.000000e+00 - %569 = bitcast i32 %131 to float - %570 = bitcast i32 %131 to float - %571 = fmul float %569, %570 - %572 = fadd float %568, %571 - %573 = call float @llvm.sqrt.f32.40(float %572) - %574 = fneg float %168 - %575 = fmul float %573, %574 - %576 = bitcast i32 %152 to float - %577 = fadd float %576, %575 - %578 = fmul float %564, %577 - %579 = fadd float %578, 0.000000e+00 - %580 = bitcast i32 %152 to float - %581 = bitcast i32 %152 to float - %582 = fmul float %580, %581 - %583 = fadd float %582, 0.000000e+00 - %584 = bitcast i32 %131 to float - %585 = bitcast i32 %131 to float - %586 = fmul float %584, %585 - %587 = fadd float %583, %586 - %588 = call float @llvm.sqrt.f32.41(float %587) - %589 = fneg float %168 - %590 = fmul float %588, %589 - %591 = fmul float %590, 0.000000e+00 - %592 = bitcast i32 %131 to float - %593 = fadd float %592, %591 - %594 = bitcast i32 %152 to float - %595 = bitcast i32 %152 to float - %596 = fmul float %594, %595 - %597 = fadd float %596, 0.000000e+00 - %598 = bitcast i32 %131 to float - %599 = bitcast i32 %131 to float - %600 = fmul float %598, %599 - %601 = fadd float %597, %600 - %602 = call float @llvm.sqrt.f32.42(float %601) - %603 = fneg float %168 - %604 = fmul float %602, %603 - %605 = fmul float %604, 0.000000e+00 - %606 = bitcast i32 %131 to float - %607 = fadd float %606, %605 - %608 = fmul float %593, %607 - %609 = fadd float %579, %608 - %610 = call float @llvm.sqrt.f32.43(float %609) - %611 = fadd float %610, 0.000000e+00 - %612 = fdiv float %551, %611 - %613 = fmul float %538, %612 - %614 = insertelement <4 x float> %462, float %613, i32 3 - %615 = fsub <4 x float> , %614 - %616 = bitcast i32 %152 to float - %617 = bitcast i32 %152 to float - %618 = fmul float %616, %617 - %619 = fadd float %618, 0.000000e+00 - %620 = bitcast i32 %131 to float - %621 = bitcast i32 %131 to float - %622 = fmul float %620, %621 - %623 = fadd float %619, %622 - %624 = call float @llvm.sqrt.f32.44(float %623) - %625 = fneg float %168 - %626 = fmul float %624, %625 - %627 = fmul float %626, 0.000000e+00 - %628 = bitcast i32 %131 to float - %629 = fadd float %628, %627 - %630 = bitcast i32 %152 to float - %631 = bitcast i32 %152 to float - %632 = fmul float %630, %631 - %633 = fadd float %632, 0.000000e+00 - %634 = bitcast i32 %131 to float - %635 = bitcast i32 %131 to float - %636 = fmul float %634, %635 - %637 = fadd float %633, %636 - %638 = call float @llvm.sqrt.f32.45(float %637) - %639 = fneg float %168 - %640 = fmul float %638, %639 - %641 = bitcast i32 %152 to float - %642 = fadd float %641, %640 - %643 = bitcast i32 %152 to float - %644 = bitcast i32 %152 to float - %645 = fmul float %643, %644 - %646 = fadd float %645, 0.000000e+00 - %647 = bitcast i32 %131 to float - %648 = bitcast i32 %131 to float - %649 = fmul float %647, %648 - %650 = fadd float %646, %649 - %651 = call float @llvm.sqrt.f32.46(float %650) - %652 = fneg float %168 - %653 = fmul float %651, %652 - %654 = bitcast i32 %152 to float - %655 = fadd float %654, %653 - %656 = fmul float %642, %655 - %657 = fadd float %656, 0.000000e+00 - %658 = bitcast i32 %152 to float - %659 = bitcast i32 %152 to float - %660 = fmul float %658, %659 - %661 = fadd float %660, 0.000000e+00 - %662 = bitcast i32 %131 to float - %663 = bitcast i32 %131 to float - %664 = fmul float %662, %663 - %665 = fadd float %661, %664 - %666 = call float @llvm.sqrt.f32.47(float %665) - %667 = fneg float %168 - %668 = fmul float %666, %667 - %669 = fmul float %668, 0.000000e+00 - %670 = bitcast i32 %131 to float - %671 = fadd float %670, %669 - %672 = bitcast i32 %152 to float - %673 = bitcast i32 %152 to float - %674 = fmul float %672, %673 - %675 = fadd float %674, 0.000000e+00 - %676 = bitcast i32 %131 to float - %677 = bitcast i32 %131 to float - %678 = fmul float %676, %677 - %679 = fadd float %675, %678 - %680 = call float @llvm.sqrt.f32.48(float %679) - %681 = fneg float %168 - %682 = fmul float %680, %681 - %683 = fmul float %682, 0.000000e+00 - %684 = bitcast i32 %131 to float - %685 = fadd float %684, %683 - %686 = fmul float %671, %685 - %687 = fadd float %657, %686 - %688 = call float @llvm.sqrt.f32.49(float %687) - %689 = fadd float %688, 0.000000e+00 - %690 = fdiv float %629, %689 - %691 = fmul float %690, 2.000000e+00 - %692 = bitcast i32 %152 to float - %693 = bitcast i32 %152 to float - %694 = fmul float %692, %693 - %695 = fadd float %694, 0.000000e+00 - %696 = bitcast i32 %131 to float - %697 = bitcast i32 %131 to float - %698 = fmul float %696, %697 - %699 = fadd float %695, %698 - %700 = call float @llvm.sqrt.f32.50(float %699) - %701 = fneg float %168 - %702 = fmul float %700, %701 - %703 = fmul float %702, 0.000000e+00 - %704 = bitcast i32 %131 to float - %705 = fadd float %704, %703 - %706 = bitcast i32 %152 to float - %707 = bitcast i32 %152 to float - %708 = fmul float %706, %707 - %709 = fadd float %708, 0.000000e+00 - %710 = bitcast i32 %131 to float - %711 = bitcast i32 %131 to float - %712 = fmul float %710, %711 - %713 = fadd float %709, %712 - %714 = call float @llvm.sqrt.f32.51(float %713) - %715 = fneg float %168 - %716 = fmul float %714, %715 - %717 = bitcast i32 %152 to float - %718 = fadd float %717, %716 - %719 = bitcast i32 %152 to float - %720 = bitcast i32 %152 to float - %721 = fmul float %719, %720 - %722 = fadd float %721, 0.000000e+00 - %723 = bitcast i32 %131 to float - %724 = bitcast i32 %131 to float - %725 = fmul float %723, %724 - %726 = fadd float %722, %725 - %727 = call float @llvm.sqrt.f32.52(float %726) - %728 = fneg float %168 - %729 = fmul float %727, %728 - %730 = bitcast i32 %152 to float - %731 = fadd float %730, %729 - %732 = fmul float %718, %731 - %733 = fadd float %732, 0.000000e+00 - %734 = bitcast i32 %152 to float - %735 = bitcast i32 %152 to float - %736 = fmul float %734, %735 - %737 = fadd float %736, 0.000000e+00 - %738 = bitcast i32 %131 to float - %739 = bitcast i32 %131 to float - %740 = fmul float %738, %739 - %741 = fadd float %737, %740 - %742 = call float @llvm.sqrt.f32.53(float %741) - %743 = fneg float %168 - %744 = fmul float %742, %743 - %745 = fmul float %744, 0.000000e+00 - %746 = bitcast i32 %131 to float - %747 = fadd float %746, %745 - %748 = bitcast i32 %152 to float - %749 = bitcast i32 %152 to float - %750 = fmul float %748, %749 - %751 = fadd float %750, 0.000000e+00 - %752 = bitcast i32 %131 to float - %753 = bitcast i32 %131 to float - %754 = fmul float %752, %753 - %755 = fadd float %751, %754 - %756 = call float @llvm.sqrt.f32.54(float %755) - %757 = fneg float %168 - %758 = fmul float %756, %757 - %759 = fmul float %758, 0.000000e+00 - %760 = bitcast i32 %131 to float - %761 = fadd float %760, %759 - %762 = fmul float %747, %761 - %763 = fadd float %733, %762 - %764 = call float @llvm.sqrt.f32.55(float %763) - %765 = fadd float %764, 0.000000e+00 - %766 = fdiv float %705, %765 - %767 = fmul float %691, %766 - %768 = fsub float 1.000000e+00, %767 - %769 = insertelement <4 x float> zeroinitializer, float %768, i32 0 - %770 = insertelement <4 x float> %769, float 0.000000e+00, i32 1 - %771 = insertelement <4 x float> %770, float 0.000000e+00, i32 2 - %772 = insertelement <4 x float> %771, float 0.000000e+00, i32 3 - %773 = shufflevector <4 x float> %615, <4 x float> %772, <8 x i32> - %774 = extractelement <8 x float> %773, i32 0 - %775 = bitcast i32* %23 to float* - %776 = getelementptr float, float* %2, i32 0 - %777 = getelementptr inbounds float, float* %776, i64 3 - %778 = bitcast float* %777 to i32* - %779 = bitcast i32* %778 to float* - store float %774, float* %779, align 4 - %780 = extractelement <8 x float> %773, i32 1 - %781 = bitcast i32* %60 to float* - %782 = alloca [4 x float], align 16 - %783 = bitcast [4 x float]* %782 to i32* - %784 = bitcast i32* %783 to float* - store float %780, float* %784, align 4 - %785 = extractelement <8 x float> %773, i32 2 - %786 = bitcast i32* %63 to float* - %787 = getelementptr inbounds [4 x float], [4 x float]* %782, i64 0, i64 1 - %788 = bitcast float* %787 to i32* - %789 = bitcast i32* %788 to float* - store float %785, float* %789, align 4 - %790 = extractelement <8 x float> %773, i32 3 - %791 = bitcast i32* %66 to float* - %792 = getelementptr inbounds [4 x float], [4 x float]* %782, i64 0, i64 2 - %793 = bitcast float* %792 to i32* - %794 = bitcast i32* %793 to float* - store float %790, float* %794, align 4 - %795 = extractelement <8 x float> %773, i32 4 - %796 = bitcast i32* %69 to float* - %797 = getelementptr inbounds [4 x float], [4 x float]* %782, i64 0, i64 3 - %798 = bitcast float* %797 to i32* - %799 = bitcast i32* %798 to float* - store float %795, float* %799, align 4 - %800 = bitcast float* %1 to i8* - %801 = alloca [4 x float], align 16 - %802 = bitcast [4 x float]* %801 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %800, i8* nonnull align 16 dereferenceable(16) %802, i64 16, i1 false) - store float 0.000000e+00, float* %2, align 4 - %803 = bitcast i32 %152 to float - %804 = bitcast i32 %152 to float - %805 = fmul float %803, %804 - %806 = fadd float %805, 0.000000e+00 - %807 = load i32, i32* %130, align 4 - %808 = bitcast i32 %807 to float - %809 = bitcast i32 %807 to float - %810 = fmul float %808, %809 - %811 = fadd float %806, %810 - %812 = call float @llvm.sqrt.f32.56(float %811) - %813 = sitofp i32 %167 to float - %814 = fneg float %813 - %815 = fmul float %812, %814 - %816 = bitcast i32 %152 to float - %817 = fadd float %816, %815 - %818 = bitcast i32 %152 to float - %819 = bitcast i32 %152 to float - %820 = fmul float %818, %819 - %821 = fadd float %820, 0.000000e+00 - %822 = bitcast i32 %807 to float - %823 = bitcast i32 %807 to float - %824 = fmul float %822, %823 - %825 = fadd float %821, %824 - %826 = call float @llvm.sqrt.f32.57(float %825) - %827 = fneg float %813 - %828 = fmul float %826, %827 - %829 = bitcast i32 %152 to float - %830 = fadd float %829, %828 - %831 = bitcast i32 %152 to float - %832 = bitcast i32 %152 to float - %833 = fmul float %831, %832 - %834 = fadd float %833, 0.000000e+00 - %835 = bitcast i32 %807 to float - %836 = bitcast i32 %807 to float - %837 = fmul float %835, %836 - %838 = fadd float %834, %837 - %839 = call float @llvm.sqrt.f32.58(float %838) - %840 = fneg float %813 - %841 = fmul float %839, %840 - %842 = bitcast i32 %152 to float - %843 = fadd float %842, %841 - %844 = fmul float %830, %843 - %845 = fadd float %844, 0.000000e+00 - %846 = bitcast i32 %152 to float - %847 = bitcast i32 %152 to float - %848 = fmul float %846, %847 - %849 = fadd float %848, 0.000000e+00 - %850 = bitcast i32 %807 to float - %851 = bitcast i32 %807 to float - %852 = fmul float %850, %851 - %853 = fadd float %849, %852 - %854 = call float @llvm.sqrt.f32.59(float %853) - %855 = fneg float %813 - %856 = fmul float %854, %855 - %857 = fmul float %856, 0.000000e+00 - %858 = bitcast i32 %807 to float - %859 = fadd float %858, %857 - %860 = bitcast i32 %152 to float - %861 = bitcast i32 %152 to float - %862 = fmul float %860, %861 - %863 = fadd float %862, 0.000000e+00 - %864 = bitcast i32 %807 to float - %865 = bitcast i32 %807 to float - %866 = fmul float %864, %865 - %867 = fadd float %863, %866 - %868 = call float @llvm.sqrt.f32.60(float %867) - %869 = fneg float %813 - %870 = fmul float %868, %869 - %871 = fmul float %870, 0.000000e+00 - %872 = bitcast i32 %807 to float - %873 = fadd float %872, %871 - %874 = fmul float %859, %873 - %875 = fadd float %845, %874 - %876 = call float @llvm.sqrt.f32.61(float %875) - %877 = fadd float %876, 0.000000e+00 - %878 = fdiv float %817, %877 - %879 = fmul float %878, 2.000000e+00 - %880 = bitcast i32 %152 to float - %881 = bitcast i32 %152 to float - %882 = fmul float %880, %881 - %883 = fadd float %882, 0.000000e+00 - %884 = bitcast i32 %807 to float - %885 = bitcast i32 %807 to float - %886 = fmul float %884, %885 - %887 = fadd float %883, %886 - %888 = call float @llvm.sqrt.f32.62(float %887) - %889 = fneg float %813 - %890 = fmul float %888, %889 - %891 = bitcast i32 %152 to float - %892 = fadd float %891, %890 - %893 = bitcast i32 %152 to float - %894 = bitcast i32 %152 to float - %895 = fmul float %893, %894 - %896 = fadd float %895, 0.000000e+00 - %897 = bitcast i32 %807 to float - %898 = bitcast i32 %807 to float - %899 = fmul float %897, %898 - %900 = fadd float %896, %899 - %901 = call float @llvm.sqrt.f32.63(float %900) - %902 = fneg float %813 - %903 = fmul float %901, %902 - %904 = bitcast i32 %152 to float - %905 = fadd float %904, %903 - %906 = bitcast i32 %152 to float - %907 = bitcast i32 %152 to float - %908 = fmul float %906, %907 - %909 = fadd float %908, 0.000000e+00 - %910 = bitcast i32 %807 to float - %911 = bitcast i32 %807 to float - %912 = fmul float %910, %911 - %913 = fadd float %909, %912 - %914 = call float @llvm.sqrt.f32.64(float %913) - %915 = fneg float %813 - %916 = fmul float %914, %915 - %917 = bitcast i32 %152 to float - %918 = fadd float %917, %916 - %919 = fmul float %905, %918 - %920 = fadd float %919, 0.000000e+00 - %921 = bitcast i32 %152 to float - %922 = bitcast i32 %152 to float - %923 = fmul float %921, %922 - %924 = fadd float %923, 0.000000e+00 - %925 = bitcast i32 %807 to float - %926 = bitcast i32 %807 to float - %927 = fmul float %925, %926 - %928 = fadd float %924, %927 - %929 = call float @llvm.sqrt.f32.65(float %928) - %930 = fneg float %813 - %931 = fmul float %929, %930 - %932 = fmul float %931, 0.000000e+00 - %933 = bitcast i32 %807 to float - %934 = fadd float %933, %932 - %935 = bitcast i32 %152 to float - %936 = bitcast i32 %152 to float - %937 = fmul float %935, %936 - %938 = fadd float %937, 0.000000e+00 - %939 = bitcast i32 %807 to float - %940 = bitcast i32 %807 to float - %941 = fmul float %939, %940 - %942 = fadd float %938, %941 - %943 = call float @llvm.sqrt.f32.66(float %942) - %944 = fneg float %813 - %945 = fmul float %943, %944 - %946 = fmul float %945, 0.000000e+00 - %947 = bitcast i32 %807 to float - %948 = fadd float %947, %946 - %949 = fmul float %934, %948 - %950 = fadd float %920, %949 - %951 = call float @llvm.sqrt.f32.67(float %950) - %952 = fadd float %951, 0.000000e+00 - %953 = fdiv float %892, %952 - %954 = fmul float %879, %953 - %955 = fsub float 1.000000e+00, %954 - %956 = insertelement <4 x float> zeroinitializer, float %955, i32 0 - %957 = insertelement <4 x float> %956, float 0.000000e+00, i32 1 - %958 = insertelement <4 x float> %957, float 0.000000e+00, i32 2 - %959 = insertelement <4 x float> %958, float 0.000000e+00, i32 3 - %960 = getelementptr float, float* %0, i32 0 - %961 = load float, float* %960, align 4 - %962 = insertelement <4 x float> zeroinitializer, float %961, i32 0 - %963 = insertelement <4 x float> %962, float 0.000000e+00, i32 1 - %964 = insertelement <4 x float> %963, float 0.000000e+00, i32 2 - %965 = insertelement <4 x float> %964, float 0.000000e+00, i32 3 - %966 = call <4 x float> @llvm.fma.f32.68(<4 x float> %959, <4 x float> %965, <4 x float> zeroinitializer) - %967 = extractelement <4 x float> %966, i32 0 - store float %967, float* %2, align 4 - %968 = bitcast i32 %152 to float - %969 = bitcast i32 %152 to float - %970 = fmul float %968, %969 - %971 = fadd float %970, 0.000000e+00 - %972 = bitcast i32 %807 to float - %973 = bitcast i32 %807 to float - %974 = fmul float %972, %973 - %975 = fadd float %971, %974 - %976 = call float @llvm.sqrt.f32.69(float %975) - %977 = fneg float %813 - %978 = fmul float %976, %977 - %979 = bitcast i32 %152 to float - %980 = fadd float %979, %978 - %981 = bitcast i32 %152 to float - %982 = bitcast i32 %152 to float - %983 = fmul float %981, %982 - %984 = fadd float %983, 0.000000e+00 - %985 = bitcast i32 %807 to float - %986 = bitcast i32 %807 to float - %987 = fmul float %985, %986 - %988 = fadd float %984, %987 - %989 = call float @llvm.sqrt.f32.70(float %988) - %990 = fneg float %813 - %991 = fmul float %989, %990 - %992 = bitcast i32 %152 to float - %993 = fadd float %992, %991 - %994 = bitcast i32 %152 to float - %995 = bitcast i32 %152 to float - %996 = fmul float %994, %995 - %997 = fadd float %996, 0.000000e+00 - %998 = bitcast i32 %807 to float - %999 = bitcast i32 %807 to float - %1000 = fmul float %998, %999 - %1001 = fadd float %997, %1000 - %1002 = call float @llvm.sqrt.f32.71(float %1001) - %1003 = fneg float %813 - %1004 = fmul float %1002, %1003 - %1005 = bitcast i32 %152 to float - %1006 = fadd float %1005, %1004 - %1007 = fmul float %993, %1006 - %1008 = fadd float %1007, 0.000000e+00 - %1009 = bitcast i32 %152 to float - %1010 = bitcast i32 %152 to float - %1011 = fmul float %1009, %1010 - %1012 = fadd float %1011, 0.000000e+00 - %1013 = bitcast i32 %807 to float - %1014 = bitcast i32 %807 to float - %1015 = fmul float %1013, %1014 - %1016 = fadd float %1012, %1015 - %1017 = call float @llvm.sqrt.f32.72(float %1016) - %1018 = fneg float %813 - %1019 = fmul float %1017, %1018 - %1020 = fmul float %1019, 0.000000e+00 - %1021 = bitcast i32 %807 to float - %1022 = fadd float %1021, %1020 - %1023 = bitcast i32 %152 to float - %1024 = bitcast i32 %152 to float - %1025 = fmul float %1023, %1024 - %1026 = fadd float %1025, 0.000000e+00 - %1027 = bitcast i32 %807 to float - %1028 = bitcast i32 %807 to float - %1029 = fmul float %1027, %1028 - %1030 = fadd float %1026, %1029 - %1031 = call float @llvm.sqrt.f32.73(float %1030) - %1032 = fneg float %813 - %1033 = fmul float %1031, %1032 - %1034 = fmul float %1033, 0.000000e+00 - %1035 = bitcast i32 %807 to float - %1036 = fadd float %1035, %1034 - %1037 = fmul float %1022, %1036 - %1038 = fadd float %1008, %1037 - %1039 = call float @llvm.sqrt.f32.74(float %1038) - %1040 = fadd float %1039, 0.000000e+00 - %1041 = fdiv float %980, %1040 - %1042 = fmul float %1041, 2.000000e+00 - %1043 = bitcast i32 %152 to float - %1044 = bitcast i32 %152 to float - %1045 = fmul float %1043, %1044 - %1046 = fadd float %1045, 0.000000e+00 - %1047 = bitcast i32 %807 to float - %1048 = bitcast i32 %807 to float - %1049 = fmul float %1047, %1048 - %1050 = fadd float %1046, %1049 - %1051 = call float @llvm.sqrt.f32.75(float %1050) - %1052 = fneg float %813 - %1053 = fmul float %1051, %1052 - %1054 = bitcast i32 %152 to float - %1055 = fadd float %1054, %1053 - %1056 = bitcast i32 %152 to float - %1057 = bitcast i32 %152 to float - %1058 = fmul float %1056, %1057 - %1059 = fadd float %1058, 0.000000e+00 - %1060 = bitcast i32 %807 to float - %1061 = bitcast i32 %807 to float - %1062 = fmul float %1060, %1061 - %1063 = fadd float %1059, %1062 - %1064 = call float @llvm.sqrt.f32.76(float %1063) - %1065 = fneg float %813 - %1066 = fmul float %1064, %1065 - %1067 = bitcast i32 %152 to float - %1068 = fadd float %1067, %1066 - %1069 = bitcast i32 %152 to float - %1070 = bitcast i32 %152 to float - %1071 = fmul float %1069, %1070 - %1072 = fadd float %1071, 0.000000e+00 - %1073 = bitcast i32 %807 to float - %1074 = bitcast i32 %807 to float - %1075 = fmul float %1073, %1074 - %1076 = fadd float %1072, %1075 - %1077 = call float @llvm.sqrt.f32.77(float %1076) - %1078 = fneg float %813 - %1079 = fmul float %1077, %1078 - %1080 = bitcast i32 %152 to float - %1081 = fadd float %1080, %1079 - %1082 = fmul float %1068, %1081 - %1083 = fadd float %1082, 0.000000e+00 - %1084 = bitcast i32 %152 to float - %1085 = bitcast i32 %152 to float - %1086 = fmul float %1084, %1085 - %1087 = fadd float %1086, 0.000000e+00 - %1088 = bitcast i32 %807 to float - %1089 = bitcast i32 %807 to float - %1090 = fmul float %1088, %1089 - %1091 = fadd float %1087, %1090 - %1092 = call float @llvm.sqrt.f32.78(float %1091) - %1093 = fneg float %813 - %1094 = fmul float %1092, %1093 - %1095 = fmul float %1094, 0.000000e+00 - %1096 = bitcast i32 %807 to float - %1097 = fadd float %1096, %1095 - %1098 = bitcast i32 %152 to float - %1099 = bitcast i32 %152 to float - %1100 = fmul float %1098, %1099 - %1101 = fadd float %1100, 0.000000e+00 - %1102 = bitcast i32 %807 to float - %1103 = bitcast i32 %807 to float - %1104 = fmul float %1102, %1103 - %1105 = fadd float %1101, %1104 - %1106 = call float @llvm.sqrt.f32.79(float %1105) - %1107 = fneg float %813 - %1108 = fmul float %1106, %1107 - %1109 = fmul float %1108, 0.000000e+00 - %1110 = bitcast i32 %807 to float - %1111 = fadd float %1110, %1109 - %1112 = fmul float %1097, %1111 - %1113 = fadd float %1083, %1112 - %1114 = call float @llvm.sqrt.f32.80(float %1113) - %1115 = fadd float %1114, 0.000000e+00 - %1116 = fdiv float %1055, %1115 - %1117 = fmul float %1042, %1116 - %1118 = fsub float 1.000000e+00, %1117 - %1119 = fmul float %1118, %961 - %1120 = fadd float %1119, 0.000000e+00 - %1121 = bitcast i32 %152 to float - %1122 = bitcast i32 %152 to float - %1123 = fmul float %1121, %1122 - %1124 = fadd float %1123, 0.000000e+00 - %1125 = bitcast i32 %807 to float - %1126 = bitcast i32 %807 to float - %1127 = fmul float %1125, %1126 - %1128 = fadd float %1124, %1127 - %1129 = call float @llvm.sqrt.f32.81(float %1128) - %1130 = fneg float %813 - %1131 = fmul float %1129, %1130 - %1132 = bitcast i32 %152 to float - %1133 = fadd float %1132, %1131 - %1134 = bitcast i32 %152 to float - %1135 = bitcast i32 %152 to float - %1136 = fmul float %1134, %1135 - %1137 = fadd float %1136, 0.000000e+00 - %1138 = bitcast i32 %807 to float - %1139 = bitcast i32 %807 to float - %1140 = fmul float %1138, %1139 - %1141 = fadd float %1137, %1140 - %1142 = call float @llvm.sqrt.f32.82(float %1141) - %1143 = fneg float %813 - %1144 = fmul float %1142, %1143 - %1145 = bitcast i32 %152 to float - %1146 = fadd float %1145, %1144 - %1147 = bitcast i32 %152 to float - %1148 = bitcast i32 %152 to float - %1149 = fmul float %1147, %1148 - %1150 = fadd float %1149, 0.000000e+00 - %1151 = bitcast i32 %807 to float - %1152 = bitcast i32 %807 to float - %1153 = fmul float %1151, %1152 - %1154 = fadd float %1150, %1153 - %1155 = call float @llvm.sqrt.f32.83(float %1154) - %1156 = fneg float %813 - %1157 = fmul float %1155, %1156 - %1158 = bitcast i32 %152 to float - %1159 = fadd float %1158, %1157 - %1160 = fmul float %1146, %1159 - %1161 = fadd float %1160, 0.000000e+00 - %1162 = bitcast i32 %152 to float - %1163 = bitcast i32 %152 to float - %1164 = fmul float %1162, %1163 - %1165 = fadd float %1164, 0.000000e+00 - %1166 = bitcast i32 %807 to float - %1167 = bitcast i32 %807 to float - %1168 = fmul float %1166, %1167 - %1169 = fadd float %1165, %1168 - %1170 = call float @llvm.sqrt.f32.84(float %1169) - %1171 = fneg float %813 - %1172 = fmul float %1170, %1171 - %1173 = fmul float %1172, 0.000000e+00 - %1174 = bitcast i32 %807 to float - %1175 = fadd float %1174, %1173 - %1176 = bitcast i32 %152 to float - %1177 = bitcast i32 %152 to float - %1178 = fmul float %1176, %1177 - %1179 = fadd float %1178, 0.000000e+00 - %1180 = bitcast i32 %807 to float - %1181 = bitcast i32 %807 to float - %1182 = fmul float %1180, %1181 - %1183 = fadd float %1179, %1182 - %1184 = call float @llvm.sqrt.f32.85(float %1183) - %1185 = fneg float %813 - %1186 = fmul float %1184, %1185 - %1187 = fmul float %1186, 0.000000e+00 - %1188 = bitcast i32 %807 to float - %1189 = fadd float %1188, %1187 - %1190 = fmul float %1175, %1189 - %1191 = fadd float %1161, %1190 - %1192 = call float @llvm.sqrt.f32.86(float %1191) - %1193 = fadd float %1192, 0.000000e+00 - %1194 = fdiv float %1133, %1193 - %1195 = fmul float %1194, 2.000000e+00 - %1196 = bitcast i32 %152 to float - %1197 = bitcast i32 %152 to float - %1198 = fmul float %1196, %1197 - %1199 = fadd float %1198, 0.000000e+00 - %1200 = bitcast i32 %807 to float - %1201 = bitcast i32 %807 to float - %1202 = fmul float %1200, %1201 - %1203 = fadd float %1199, %1202 - %1204 = call float @llvm.sqrt.f32.87(float %1203) - %1205 = fneg float %813 - %1206 = fmul float %1204, %1205 - %1207 = fmul float %1206, 0.000000e+00 - %1208 = bitcast i32 %807 to float - %1209 = fadd float %1208, %1207 - %1210 = bitcast i32 %152 to float - %1211 = bitcast i32 %152 to float - %1212 = fmul float %1210, %1211 - %1213 = fadd float %1212, 0.000000e+00 - %1214 = bitcast i32 %807 to float - %1215 = bitcast i32 %807 to float - %1216 = fmul float %1214, %1215 - %1217 = fadd float %1213, %1216 - %1218 = call float @llvm.sqrt.f32.88(float %1217) - %1219 = fneg float %813 - %1220 = fmul float %1218, %1219 - %1221 = bitcast i32 %152 to float - %1222 = fadd float %1221, %1220 - %1223 = bitcast i32 %152 to float - %1224 = bitcast i32 %152 to float - %1225 = fmul float %1223, %1224 - %1226 = fadd float %1225, 0.000000e+00 - %1227 = bitcast i32 %807 to float - %1228 = bitcast i32 %807 to float - %1229 = fmul float %1227, %1228 - %1230 = fadd float %1226, %1229 - %1231 = call float @llvm.sqrt.f32.89(float %1230) - %1232 = fneg float %813 - %1233 = fmul float %1231, %1232 - %1234 = bitcast i32 %152 to float - %1235 = fadd float %1234, %1233 - %1236 = fmul float %1222, %1235 - %1237 = fadd float %1236, 0.000000e+00 - %1238 = bitcast i32 %152 to float - %1239 = bitcast i32 %152 to float - %1240 = fmul float %1238, %1239 - %1241 = fadd float %1240, 0.000000e+00 - %1242 = bitcast i32 %807 to float - %1243 = bitcast i32 %807 to float - %1244 = fmul float %1242, %1243 - %1245 = fadd float %1241, %1244 - %1246 = call float @llvm.sqrt.f32.90(float %1245) - %1247 = fneg float %813 - %1248 = fmul float %1246, %1247 - %1249 = fmul float %1248, 0.000000e+00 - %1250 = bitcast i32 %807 to float - %1251 = fadd float %1250, %1249 - %1252 = bitcast i32 %152 to float - %1253 = bitcast i32 %152 to float - %1254 = fmul float %1252, %1253 - %1255 = fadd float %1254, 0.000000e+00 - %1256 = bitcast i32 %807 to float - %1257 = bitcast i32 %807 to float - %1258 = fmul float %1256, %1257 - %1259 = fadd float %1255, %1258 - %1260 = call float @llvm.sqrt.f32.91(float %1259) - %1261 = fneg float %813 - %1262 = fmul float %1260, %1261 - %1263 = fmul float %1262, 0.000000e+00 - %1264 = bitcast i32 %807 to float - %1265 = fadd float %1264, %1263 - %1266 = fmul float %1251, %1265 - %1267 = fadd float %1237, %1266 - %1268 = call float @llvm.sqrt.f32.92(float %1267) - %1269 = fadd float %1268, 0.000000e+00 - %1270 = fdiv float %1209, %1269 - %1271 = fmul float %1195, %1270 - %1272 = fneg float %1271 - %1273 = getelementptr float, float* %0, i32 0 - %1274 = getelementptr inbounds float, float* %1273, i64 2 - %1275 = load float, float* %1274, align 4 - %1276 = fmul float %1272, %1275 - %1277 = fadd float %1120, %1276 - %1278 = insertelement <4 x float> zeroinitializer, float %1277, i32 0 - %1279 = insertelement <4 x float> %1278, float 0.000000e+00, i32 1 - %1280 = insertelement <4 x float> %1279, float 0.000000e+00, i32 2 - %1281 = insertelement <4 x float> %1280, float 0.000000e+00, i32 3 - %1282 = extractelement <4 x float> %1281, i32 0 - store float %1282, float* %2, align 4 - %1283 = extractelement <4 x float> %1281, i32 1 - %1284 = getelementptr float, float* %2, i32 0 - %1285 = getelementptr inbounds float, float* %1284, i64 1 - store float %1283, float* %1285, align 4 - %1286 = bitcast i32 %152 to float - %1287 = bitcast i32 %152 to float - %1288 = fmul float %1286, %1287 - %1289 = fadd float %1288, 0.000000e+00 - %1290 = bitcast i32 %807 to float - %1291 = bitcast i32 %807 to float - %1292 = fmul float %1290, %1291 - %1293 = fadd float %1289, %1292 - %1294 = call float @llvm.sqrt.f32.93(float %1293) - %1295 = fneg float %813 - %1296 = fmul float %1294, %1295 - %1297 = bitcast i32 %152 to float - %1298 = fadd float %1297, %1296 - %1299 = bitcast i32 %152 to float - %1300 = bitcast i32 %152 to float - %1301 = fmul float %1299, %1300 - %1302 = fadd float %1301, 0.000000e+00 - %1303 = bitcast i32 %807 to float - %1304 = bitcast i32 %807 to float - %1305 = fmul float %1303, %1304 - %1306 = fadd float %1302, %1305 - %1307 = call float @llvm.sqrt.f32.94(float %1306) - %1308 = fneg float %813 - %1309 = fmul float %1307, %1308 - %1310 = bitcast i32 %152 to float - %1311 = fadd float %1310, %1309 - %1312 = bitcast i32 %152 to float - %1313 = bitcast i32 %152 to float - %1314 = fmul float %1312, %1313 - %1315 = fadd float %1314, 0.000000e+00 - %1316 = bitcast i32 %807 to float - %1317 = bitcast i32 %807 to float - %1318 = fmul float %1316, %1317 - %1319 = fadd float %1315, %1318 - %1320 = call float @llvm.sqrt.f32.95(float %1319) - %1321 = fneg float %813 - %1322 = fmul float %1320, %1321 - %1323 = bitcast i32 %152 to float - %1324 = fadd float %1323, %1322 - %1325 = fmul float %1311, %1324 - %1326 = fadd float %1325, 0.000000e+00 - %1327 = bitcast i32 %152 to float - %1328 = bitcast i32 %152 to float - %1329 = fmul float %1327, %1328 - %1330 = fadd float %1329, 0.000000e+00 - %1331 = bitcast i32 %807 to float - %1332 = bitcast i32 %807 to float - %1333 = fmul float %1331, %1332 - %1334 = fadd float %1330, %1333 - %1335 = call float @llvm.sqrt.f32.96(float %1334) - %1336 = fneg float %813 - %1337 = fmul float %1335, %1336 - %1338 = fmul float %1337, 0.000000e+00 - %1339 = bitcast i32 %807 to float - %1340 = fadd float %1339, %1338 - %1341 = bitcast i32 %152 to float - %1342 = bitcast i32 %152 to float - %1343 = fmul float %1341, %1342 - %1344 = fadd float %1343, 0.000000e+00 - %1345 = bitcast i32 %807 to float - %1346 = bitcast i32 %807 to float - %1347 = fmul float %1345, %1346 - %1348 = fadd float %1344, %1347 - %1349 = call float @llvm.sqrt.f32.97(float %1348) - %1350 = fneg float %813 - %1351 = fmul float %1349, %1350 - %1352 = fmul float %1351, 0.000000e+00 - %1353 = bitcast i32 %807 to float - %1354 = fadd float %1353, %1352 - %1355 = fmul float %1340, %1354 - %1356 = fadd float %1326, %1355 - %1357 = call float @llvm.sqrt.f32.98(float %1356) - %1358 = fadd float %1357, 0.000000e+00 - %1359 = fdiv float %1298, %1358 - %1360 = fmul float %1359, 2.000000e+00 - %1361 = bitcast i32 %152 to float - %1362 = bitcast i32 %152 to float - %1363 = fmul float %1361, %1362 - %1364 = fadd float %1363, 0.000000e+00 - %1365 = bitcast i32 %807 to float - %1366 = bitcast i32 %807 to float - %1367 = fmul float %1365, %1366 - %1368 = fadd float %1364, %1367 - %1369 = call float @llvm.sqrt.f32.99(float %1368) - %1370 = fneg float %813 - %1371 = fmul float %1369, %1370 - %1372 = bitcast i32 %152 to float - %1373 = fadd float %1372, %1371 - %1374 = bitcast i32 %152 to float - %1375 = bitcast i32 %152 to float - %1376 = fmul float %1374, %1375 - %1377 = fadd float %1376, 0.000000e+00 - %1378 = bitcast i32 %807 to float - %1379 = bitcast i32 %807 to float - %1380 = fmul float %1378, %1379 - %1381 = fadd float %1377, %1380 - %1382 = call float @llvm.sqrt.f32.100(float %1381) - %1383 = fneg float %813 - %1384 = fmul float %1382, %1383 - %1385 = bitcast i32 %152 to float - %1386 = fadd float %1385, %1384 - %1387 = bitcast i32 %152 to float - %1388 = bitcast i32 %152 to float - %1389 = fmul float %1387, %1388 - %1390 = fadd float %1389, 0.000000e+00 - %1391 = bitcast i32 %807 to float - %1392 = bitcast i32 %807 to float - %1393 = fmul float %1391, %1392 - %1394 = fadd float %1390, %1393 - %1395 = call float @llvm.sqrt.f32.101(float %1394) - %1396 = fneg float %813 - %1397 = fmul float %1395, %1396 - %1398 = bitcast i32 %152 to float - %1399 = fadd float %1398, %1397 - %1400 = fmul float %1386, %1399 - %1401 = fadd float %1400, 0.000000e+00 - %1402 = bitcast i32 %152 to float - %1403 = bitcast i32 %152 to float - %1404 = fmul float %1402, %1403 - %1405 = fadd float %1404, 0.000000e+00 - %1406 = bitcast i32 %807 to float - %1407 = bitcast i32 %807 to float - %1408 = fmul float %1406, %1407 - %1409 = fadd float %1405, %1408 - %1410 = call float @llvm.sqrt.f32.102(float %1409) - %1411 = fneg float %813 - %1412 = fmul float %1410, %1411 - %1413 = fmul float %1412, 0.000000e+00 - %1414 = bitcast i32 %807 to float - %1415 = fadd float %1414, %1413 - %1416 = bitcast i32 %152 to float - %1417 = bitcast i32 %152 to float - %1418 = fmul float %1416, %1417 - %1419 = fadd float %1418, 0.000000e+00 - %1420 = bitcast i32 %807 to float - %1421 = bitcast i32 %807 to float - %1422 = fmul float %1420, %1421 - %1423 = fadd float %1419, %1422 - %1424 = call float @llvm.sqrt.f32.103(float %1423) - %1425 = fneg float %813 - %1426 = fmul float %1424, %1425 - %1427 = fmul float %1426, 0.000000e+00 - %1428 = bitcast i32 %807 to float - %1429 = fadd float %1428, %1427 - %1430 = fmul float %1415, %1429 - %1431 = fadd float %1401, %1430 - %1432 = call float @llvm.sqrt.f32.104(float %1431) - %1433 = fadd float %1432, 0.000000e+00 - %1434 = fdiv float %1373, %1433 - %1435 = fmul float %1360, %1434 - %1436 = fsub float 1.000000e+00, %1435 - %1437 = insertelement <4 x float> zeroinitializer, float %1436, i32 0 - %1438 = insertelement <4 x float> %1437, float 0.000000e+00, i32 1 - %1439 = insertelement <4 x float> %1438, float 0.000000e+00, i32 2 - %1440 = insertelement <4 x float> %1439, float 0.000000e+00, i32 3 - %1441 = getelementptr float, float* %0, i32 0 - %1442 = getelementptr inbounds float, float* %1441, i64 1 - %1443 = load float, float* %1442, align 4 - %1444 = insertelement <4 x float> zeroinitializer, float %1443, i32 0 - %1445 = insertelement <4 x float> %1444, float 0.000000e+00, i32 1 - %1446 = insertelement <4 x float> %1445, float 0.000000e+00, i32 2 - %1447 = insertelement <4 x float> %1446, float 0.000000e+00, i32 3 - %1448 = call <4 x float> @llvm.fma.f32.105(<4 x float> %1440, <4 x float> %1447, <4 x float> zeroinitializer) - %1449 = extractelement <4 x float> %1448, i32 0 - store float %1449, float* %1285, align 4 - %1450 = bitcast i32 %152 to float - %1451 = bitcast i32 %152 to float - %1452 = fmul float %1450, %1451 - %1453 = fadd float %1452, 0.000000e+00 - %1454 = bitcast i32 %807 to float - %1455 = bitcast i32 %807 to float - %1456 = fmul float %1454, %1455 - %1457 = fadd float %1453, %1456 - %1458 = call float @llvm.sqrt.f32.106(float %1457) - %1459 = fneg float %813 - %1460 = fmul float %1458, %1459 - %1461 = bitcast i32 %152 to float - %1462 = fadd float %1461, %1460 - %1463 = bitcast i32 %152 to float - %1464 = bitcast i32 %152 to float - %1465 = fmul float %1463, %1464 - %1466 = fadd float %1465, 0.000000e+00 - %1467 = bitcast i32 %807 to float - %1468 = bitcast i32 %807 to float - %1469 = fmul float %1467, %1468 - %1470 = fadd float %1466, %1469 - %1471 = call float @llvm.sqrt.f32.107(float %1470) - %1472 = fneg float %813 - %1473 = fmul float %1471, %1472 - %1474 = bitcast i32 %152 to float - %1475 = fadd float %1474, %1473 - %1476 = bitcast i32 %152 to float - %1477 = bitcast i32 %152 to float - %1478 = fmul float %1476, %1477 - %1479 = fadd float %1478, 0.000000e+00 - %1480 = bitcast i32 %807 to float - %1481 = bitcast i32 %807 to float - %1482 = fmul float %1480, %1481 - %1483 = fadd float %1479, %1482 - %1484 = call float @llvm.sqrt.f32.108(float %1483) - %1485 = fneg float %813 - %1486 = fmul float %1484, %1485 - %1487 = bitcast i32 %152 to float - %1488 = fadd float %1487, %1486 - %1489 = fmul float %1475, %1488 - %1490 = fadd float %1489, 0.000000e+00 - %1491 = bitcast i32 %152 to float - %1492 = bitcast i32 %152 to float - %1493 = fmul float %1491, %1492 - %1494 = fadd float %1493, 0.000000e+00 - %1495 = bitcast i32 %807 to float - %1496 = bitcast i32 %807 to float - %1497 = fmul float %1495, %1496 - %1498 = fadd float %1494, %1497 - %1499 = call float @llvm.sqrt.f32.109(float %1498) - %1500 = fneg float %813 - %1501 = fmul float %1499, %1500 - %1502 = fmul float %1501, 0.000000e+00 - %1503 = bitcast i32 %807 to float - %1504 = fadd float %1503, %1502 - %1505 = bitcast i32 %152 to float - %1506 = bitcast i32 %152 to float - %1507 = fmul float %1505, %1506 - %1508 = fadd float %1507, 0.000000e+00 - %1509 = bitcast i32 %807 to float - %1510 = bitcast i32 %807 to float - %1511 = fmul float %1509, %1510 - %1512 = fadd float %1508, %1511 - %1513 = call float @llvm.sqrt.f32.110(float %1512) - %1514 = fneg float %813 - %1515 = fmul float %1513, %1514 - %1516 = fmul float %1515, 0.000000e+00 - %1517 = bitcast i32 %807 to float - %1518 = fadd float %1517, %1516 - %1519 = fmul float %1504, %1518 - %1520 = fadd float %1490, %1519 - %1521 = call float @llvm.sqrt.f32.111(float %1520) - %1522 = fadd float %1521, 0.000000e+00 - %1523 = fdiv float %1462, %1522 - %1524 = fmul float %1523, 2.000000e+00 - %1525 = bitcast i32 %152 to float - %1526 = bitcast i32 %152 to float - %1527 = fmul float %1525, %1526 - %1528 = fadd float %1527, 0.000000e+00 - %1529 = bitcast i32 %807 to float - %1530 = bitcast i32 %807 to float - %1531 = fmul float %1529, %1530 - %1532 = fadd float %1528, %1531 - %1533 = call float @llvm.sqrt.f32.112(float %1532) - %1534 = fneg float %813 - %1535 = fmul float %1533, %1534 - %1536 = bitcast i32 %152 to float - %1537 = fadd float %1536, %1535 - %1538 = bitcast i32 %152 to float - %1539 = bitcast i32 %152 to float - %1540 = fmul float %1538, %1539 - %1541 = fadd float %1540, 0.000000e+00 - %1542 = bitcast i32 %807 to float - %1543 = bitcast i32 %807 to float - %1544 = fmul float %1542, %1543 - %1545 = fadd float %1541, %1544 - %1546 = call float @llvm.sqrt.f32.113(float %1545) - %1547 = fneg float %813 - %1548 = fmul float %1546, %1547 - %1549 = bitcast i32 %152 to float - %1550 = fadd float %1549, %1548 - %1551 = bitcast i32 %152 to float - %1552 = bitcast i32 %152 to float - %1553 = fmul float %1551, %1552 - %1554 = fadd float %1553, 0.000000e+00 - %1555 = bitcast i32 %807 to float - %1556 = bitcast i32 %807 to float - %1557 = fmul float %1555, %1556 - %1558 = fadd float %1554, %1557 - %1559 = call float @llvm.sqrt.f32.114(float %1558) - %1560 = fneg float %813 - %1561 = fmul float %1559, %1560 - %1562 = bitcast i32 %152 to float - %1563 = fadd float %1562, %1561 - %1564 = fmul float %1550, %1563 - %1565 = fadd float %1564, 0.000000e+00 - %1566 = bitcast i32 %152 to float - %1567 = bitcast i32 %152 to float - %1568 = fmul float %1566, %1567 - %1569 = fadd float %1568, 0.000000e+00 - %1570 = bitcast i32 %807 to float - %1571 = bitcast i32 %807 to float - %1572 = fmul float %1570, %1571 - %1573 = fadd float %1569, %1572 - %1574 = call float @llvm.sqrt.f32.115(float %1573) - %1575 = fneg float %813 - %1576 = fmul float %1574, %1575 - %1577 = fmul float %1576, 0.000000e+00 - %1578 = bitcast i32 %807 to float - %1579 = fadd float %1578, %1577 - %1580 = bitcast i32 %152 to float - %1581 = bitcast i32 %152 to float - %1582 = fmul float %1580, %1581 - %1583 = fadd float %1582, 0.000000e+00 - %1584 = bitcast i32 %807 to float - %1585 = bitcast i32 %807 to float - %1586 = fmul float %1584, %1585 - %1587 = fadd float %1583, %1586 - %1588 = call float @llvm.sqrt.f32.116(float %1587) - %1589 = fneg float %813 - %1590 = fmul float %1588, %1589 - %1591 = fmul float %1590, 0.000000e+00 - %1592 = bitcast i32 %807 to float - %1593 = fadd float %1592, %1591 - %1594 = fmul float %1579, %1593 - %1595 = fadd float %1565, %1594 - %1596 = call float @llvm.sqrt.f32.117(float %1595) - %1597 = fadd float %1596, 0.000000e+00 - %1598 = fdiv float %1537, %1597 - %1599 = fmul float %1524, %1598 - %1600 = fsub float 1.000000e+00, %1599 - %1601 = fmul float %1600, %1443 - %1602 = fadd float %1601, 0.000000e+00 - %1603 = bitcast i32 %152 to float - %1604 = bitcast i32 %152 to float - %1605 = fmul float %1603, %1604 - %1606 = fadd float %1605, 0.000000e+00 - %1607 = bitcast i32 %807 to float - %1608 = bitcast i32 %807 to float - %1609 = fmul float %1607, %1608 - %1610 = fadd float %1606, %1609 - %1611 = call float @llvm.sqrt.f32.118(float %1610) - %1612 = fneg float %813 - %1613 = fmul float %1611, %1612 - %1614 = bitcast i32 %152 to float - %1615 = fadd float %1614, %1613 - %1616 = bitcast i32 %152 to float - %1617 = bitcast i32 %152 to float - %1618 = fmul float %1616, %1617 - %1619 = fadd float %1618, 0.000000e+00 - %1620 = bitcast i32 %807 to float - %1621 = bitcast i32 %807 to float - %1622 = fmul float %1620, %1621 - %1623 = fadd float %1619, %1622 - %1624 = call float @llvm.sqrt.f32.119(float %1623) - %1625 = fneg float %813 - %1626 = fmul float %1624, %1625 - %1627 = bitcast i32 %152 to float - %1628 = fadd float %1627, %1626 - %1629 = bitcast i32 %152 to float - %1630 = bitcast i32 %152 to float - %1631 = fmul float %1629, %1630 - %1632 = fadd float %1631, 0.000000e+00 - %1633 = bitcast i32 %807 to float - %1634 = bitcast i32 %807 to float - %1635 = fmul float %1633, %1634 - %1636 = fadd float %1632, %1635 - %1637 = call float @llvm.sqrt.f32.120(float %1636) - %1638 = fneg float %813 - %1639 = fmul float %1637, %1638 - %1640 = bitcast i32 %152 to float - %1641 = fadd float %1640, %1639 - %1642 = fmul float %1628, %1641 - %1643 = fadd float %1642, 0.000000e+00 - %1644 = bitcast i32 %152 to float - %1645 = bitcast i32 %152 to float - %1646 = fmul float %1644, %1645 - %1647 = fadd float %1646, 0.000000e+00 - %1648 = bitcast i32 %807 to float - %1649 = bitcast i32 %807 to float - %1650 = fmul float %1648, %1649 - %1651 = fadd float %1647, %1650 - %1652 = call float @llvm.sqrt.f32.121(float %1651) - %1653 = fneg float %813 - %1654 = fmul float %1652, %1653 - %1655 = fmul float %1654, 0.000000e+00 - %1656 = bitcast i32 %807 to float - %1657 = fadd float %1656, %1655 - %1658 = bitcast i32 %152 to float - %1659 = bitcast i32 %152 to float - %1660 = fmul float %1658, %1659 - %1661 = fadd float %1660, 0.000000e+00 - %1662 = bitcast i32 %807 to float - %1663 = bitcast i32 %807 to float - %1664 = fmul float %1662, %1663 - %1665 = fadd float %1661, %1664 - %1666 = call float @llvm.sqrt.f32.122(float %1665) - %1667 = fneg float %813 - %1668 = fmul float %1666, %1667 - %1669 = fmul float %1668, 0.000000e+00 - %1670 = bitcast i32 %807 to float - %1671 = fadd float %1670, %1669 - %1672 = fmul float %1657, %1671 - %1673 = fadd float %1643, %1672 - %1674 = call float @llvm.sqrt.f32.123(float %1673) - %1675 = fadd float %1674, 0.000000e+00 - %1676 = fdiv float %1615, %1675 - %1677 = fmul float %1676, 2.000000e+00 - %1678 = bitcast i32 %152 to float - %1679 = bitcast i32 %152 to float - %1680 = fmul float %1678, %1679 - %1681 = fadd float %1680, 0.000000e+00 - %1682 = bitcast i32 %807 to float - %1683 = bitcast i32 %807 to float - %1684 = fmul float %1682, %1683 - %1685 = fadd float %1681, %1684 - %1686 = call float @llvm.sqrt.f32.124(float %1685) - %1687 = fneg float %813 - %1688 = fmul float %1686, %1687 - %1689 = fmul float %1688, 0.000000e+00 - %1690 = bitcast i32 %807 to float - %1691 = fadd float %1690, %1689 - %1692 = bitcast i32 %152 to float - %1693 = bitcast i32 %152 to float - %1694 = fmul float %1692, %1693 - %1695 = fadd float %1694, 0.000000e+00 - %1696 = bitcast i32 %807 to float - %1697 = bitcast i32 %807 to float - %1698 = fmul float %1696, %1697 - %1699 = fadd float %1695, %1698 - %1700 = call float @llvm.sqrt.f32.125(float %1699) - %1701 = fneg float %813 - %1702 = fmul float %1700, %1701 - %1703 = bitcast i32 %152 to float - %1704 = fadd float %1703, %1702 - %1705 = bitcast i32 %152 to float - %1706 = bitcast i32 %152 to float - %1707 = fmul float %1705, %1706 - %1708 = fadd float %1707, 0.000000e+00 - %1709 = bitcast i32 %807 to float - %1710 = bitcast i32 %807 to float - %1711 = fmul float %1709, %1710 - %1712 = fadd float %1708, %1711 - %1713 = call float @llvm.sqrt.f32.126(float %1712) - %1714 = fneg float %813 - %1715 = fmul float %1713, %1714 - %1716 = bitcast i32 %152 to float - %1717 = fadd float %1716, %1715 - %1718 = fmul float %1704, %1717 - %1719 = fadd float %1718, 0.000000e+00 - %1720 = bitcast i32 %152 to float - %1721 = bitcast i32 %152 to float - %1722 = fmul float %1720, %1721 - %1723 = fadd float %1722, 0.000000e+00 - %1724 = bitcast i32 %807 to float - %1725 = bitcast i32 %807 to float - %1726 = fmul float %1724, %1725 - %1727 = fadd float %1723, %1726 - %1728 = call float @llvm.sqrt.f32.127(float %1727) - %1729 = fneg float %813 - %1730 = fmul float %1728, %1729 - %1731 = fmul float %1730, 0.000000e+00 - %1732 = bitcast i32 %807 to float - %1733 = fadd float %1732, %1731 - %1734 = bitcast i32 %152 to float - %1735 = bitcast i32 %152 to float - %1736 = fmul float %1734, %1735 - %1737 = fadd float %1736, 0.000000e+00 - %1738 = bitcast i32 %807 to float - %1739 = bitcast i32 %807 to float - %1740 = fmul float %1738, %1739 - %1741 = fadd float %1737, %1740 - %1742 = call float @llvm.sqrt.f32.128(float %1741) - %1743 = fneg float %813 - %1744 = fmul float %1742, %1743 - %1745 = fmul float %1744, 0.000000e+00 - %1746 = bitcast i32 %807 to float - %1747 = fadd float %1746, %1745 - %1748 = fmul float %1733, %1747 - %1749 = fadd float %1719, %1748 - %1750 = call float @llvm.sqrt.f32.129(float %1749) - %1751 = fadd float %1750, 0.000000e+00 - %1752 = fdiv float %1691, %1751 - %1753 = fmul float %1677, %1752 - %1754 = fneg float %1753 - %1755 = load float, float* %144, align 4 - %1756 = fmul float %1754, %1755 - %1757 = fadd float %1602, %1756 - %1758 = insertelement <4 x float> zeroinitializer, float %1757, i32 0 - %1759 = insertelement <4 x float> %1758, float 0.000000e+00, i32 1 - %1760 = insertelement <4 x float> %1759, float 0.000000e+00, i32 2 - %1761 = insertelement <4 x float> %1760, float 0.000000e+00, i32 3 - %1762 = extractelement <4 x float> %1761, i32 0 - store float %1762, float* %1285, align 4 - %1763 = extractelement <4 x float> %1761, i32 1 - %1764 = getelementptr float, float* %2, i32 0 - %1765 = getelementptr inbounds float, float* %1764, i64 2 - store float %1763, float* %1765, align 4 - %1766 = bitcast i32 %152 to float - %1767 = bitcast i32 %152 to float - %1768 = fmul float %1766, %1767 - %1769 = fadd float %1768, 0.000000e+00 - %1770 = bitcast i32 %807 to float - %1771 = bitcast i32 %807 to float - %1772 = fmul float %1770, %1771 - %1773 = fadd float %1769, %1772 - %1774 = call float @llvm.sqrt.f32.130(float %1773) - %1775 = fneg float %813 - %1776 = fmul float %1774, %1775 - %1777 = fmul float %1776, 0.000000e+00 - %1778 = bitcast i32 %807 to float - %1779 = fadd float %1778, %1777 - %1780 = bitcast i32 %152 to float - %1781 = bitcast i32 %152 to float - %1782 = fmul float %1780, %1781 - %1783 = fadd float %1782, 0.000000e+00 - %1784 = bitcast i32 %807 to float - %1785 = bitcast i32 %807 to float - %1786 = fmul float %1784, %1785 - %1787 = fadd float %1783, %1786 - %1788 = call float @llvm.sqrt.f32.131(float %1787) - %1789 = fneg float %813 - %1790 = fmul float %1788, %1789 - %1791 = bitcast i32 %152 to float - %1792 = fadd float %1791, %1790 - %1793 = bitcast i32 %152 to float - %1794 = bitcast i32 %152 to float - %1795 = fmul float %1793, %1794 - %1796 = fadd float %1795, 0.000000e+00 - %1797 = bitcast i32 %807 to float - %1798 = bitcast i32 %807 to float - %1799 = fmul float %1797, %1798 - %1800 = fadd float %1796, %1799 - %1801 = call float @llvm.sqrt.f32.132(float %1800) - %1802 = fneg float %813 - %1803 = fmul float %1801, %1802 - %1804 = bitcast i32 %152 to float - %1805 = fadd float %1804, %1803 - %1806 = fmul float %1792, %1805 - %1807 = fadd float %1806, 0.000000e+00 - %1808 = bitcast i32 %152 to float - %1809 = bitcast i32 %152 to float - %1810 = fmul float %1808, %1809 - %1811 = fadd float %1810, 0.000000e+00 - %1812 = bitcast i32 %807 to float - %1813 = bitcast i32 %807 to float - %1814 = fmul float %1812, %1813 - %1815 = fadd float %1811, %1814 - %1816 = call float @llvm.sqrt.f32.133(float %1815) - %1817 = fneg float %813 - %1818 = fmul float %1816, %1817 - %1819 = fmul float %1818, 0.000000e+00 - %1820 = bitcast i32 %807 to float - %1821 = fadd float %1820, %1819 - %1822 = bitcast i32 %152 to float - %1823 = bitcast i32 %152 to float - %1824 = fmul float %1822, %1823 - %1825 = fadd float %1824, 0.000000e+00 - %1826 = bitcast i32 %807 to float - %1827 = bitcast i32 %807 to float - %1828 = fmul float %1826, %1827 - %1829 = fadd float %1825, %1828 - %1830 = call float @llvm.sqrt.f32.134(float %1829) - %1831 = fneg float %813 - %1832 = fmul float %1830, %1831 - %1833 = fmul float %1832, 0.000000e+00 - %1834 = bitcast i32 %807 to float - %1835 = fadd float %1834, %1833 - %1836 = fmul float %1821, %1835 - %1837 = fadd float %1807, %1836 - %1838 = call float @llvm.sqrt.f32.135(float %1837) - %1839 = fadd float %1838, 0.000000e+00 - %1840 = fdiv float %1779, %1839 - %1841 = fmul float %1840, 2.000000e+00 - %1842 = bitcast i32 %152 to float - %1843 = bitcast i32 %152 to float - %1844 = fmul float %1842, %1843 - %1845 = fadd float %1844, 0.000000e+00 - %1846 = bitcast i32 %807 to float - %1847 = bitcast i32 %807 to float - %1848 = fmul float %1846, %1847 - %1849 = fadd float %1845, %1848 - %1850 = call float @llvm.sqrt.f32.136(float %1849) - %1851 = fneg float %813 - %1852 = fmul float %1850, %1851 - %1853 = bitcast i32 %152 to float - %1854 = fadd float %1853, %1852 - %1855 = bitcast i32 %152 to float - %1856 = bitcast i32 %152 to float - %1857 = fmul float %1855, %1856 - %1858 = fadd float %1857, 0.000000e+00 - %1859 = bitcast i32 %807 to float - %1860 = bitcast i32 %807 to float - %1861 = fmul float %1859, %1860 - %1862 = fadd float %1858, %1861 - %1863 = call float @llvm.sqrt.f32.137(float %1862) - %1864 = fneg float %813 - %1865 = fmul float %1863, %1864 - %1866 = bitcast i32 %152 to float - %1867 = fadd float %1866, %1865 - %1868 = bitcast i32 %152 to float - %1869 = bitcast i32 %152 to float - %1870 = fmul float %1868, %1869 - %1871 = fadd float %1870, 0.000000e+00 - %1872 = bitcast i32 %807 to float - %1873 = bitcast i32 %807 to float - %1874 = fmul float %1872, %1873 - %1875 = fadd float %1871, %1874 - %1876 = call float @llvm.sqrt.f32.138(float %1875) - %1877 = fneg float %813 - %1878 = fmul float %1876, %1877 - %1879 = bitcast i32 %152 to float - %1880 = fadd float %1879, %1878 - %1881 = fmul float %1867, %1880 - %1882 = fadd float %1881, 0.000000e+00 - %1883 = bitcast i32 %152 to float - %1884 = bitcast i32 %152 to float - %1885 = fmul float %1883, %1884 - %1886 = fadd float %1885, 0.000000e+00 - %1887 = bitcast i32 %807 to float - %1888 = bitcast i32 %807 to float - %1889 = fmul float %1887, %1888 - %1890 = fadd float %1886, %1889 - %1891 = call float @llvm.sqrt.f32.139(float %1890) - %1892 = fneg float %813 - %1893 = fmul float %1891, %1892 - %1894 = fmul float %1893, 0.000000e+00 - %1895 = bitcast i32 %807 to float - %1896 = fadd float %1895, %1894 - %1897 = bitcast i32 %152 to float - %1898 = bitcast i32 %152 to float - %1899 = fmul float %1897, %1898 - %1900 = fadd float %1899, 0.000000e+00 - %1901 = bitcast i32 %807 to float - %1902 = bitcast i32 %807 to float - %1903 = fmul float %1901, %1902 - %1904 = fadd float %1900, %1903 - %1905 = call float @llvm.sqrt.f32.140(float %1904) - %1906 = fneg float %813 - %1907 = fmul float %1905, %1906 - %1908 = fmul float %1907, 0.000000e+00 - %1909 = bitcast i32 %807 to float - %1910 = fadd float %1909, %1908 - %1911 = fmul float %1896, %1910 - %1912 = fadd float %1882, %1911 - %1913 = call float @llvm.sqrt.f32.141(float %1912) - %1914 = fadd float %1913, 0.000000e+00 - %1915 = fdiv float %1854, %1914 - %1916 = fmul float %1841, %1915 - %1917 = fneg float %1916 - %1918 = insertelement <4 x float> zeroinitializer, float %1917, i32 0 - %1919 = insertelement <4 x float> %1918, float 0.000000e+00, i32 1 - %1920 = insertelement <4 x float> %1919, float 0.000000e+00, i32 2 - %1921 = insertelement <4 x float> %1920, float 0.000000e+00, i32 3 - %1922 = getelementptr float, float* %0, i32 0 - %1923 = load float, float* %1922, align 4 - %1924 = insertelement <4 x float> zeroinitializer, float %1923, i32 0 - %1925 = insertelement <4 x float> %1924, float 0.000000e+00, i32 1 - %1926 = insertelement <4 x float> %1925, float 0.000000e+00, i32 2 - %1927 = insertelement <4 x float> %1926, float 0.000000e+00, i32 3 - %1928 = call <4 x float> @llvm.fma.f32.142(<4 x float> %1921, <4 x float> %1927, <4 x float> zeroinitializer) - %1929 = extractelement <4 x float> %1928, i32 0 - store float %1929, float* %1765, align 4 - %1930 = bitcast i32 %152 to float - %1931 = bitcast i32 %152 to float - %1932 = fmul float %1930, %1931 - %1933 = fadd float %1932, 0.000000e+00 - %1934 = bitcast i32 %807 to float - %1935 = bitcast i32 %807 to float - %1936 = fmul float %1934, %1935 - %1937 = fadd float %1933, %1936 - %1938 = call float @llvm.sqrt.f32.143(float %1937) - %1939 = fneg float %813 - %1940 = fmul float %1938, %1939 - %1941 = fmul float %1940, 0.000000e+00 - %1942 = bitcast i32 %807 to float - %1943 = fadd float %1942, %1941 - %1944 = bitcast i32 %152 to float - %1945 = bitcast i32 %152 to float - %1946 = fmul float %1944, %1945 - %1947 = fadd float %1946, 0.000000e+00 - %1948 = bitcast i32 %807 to float - %1949 = bitcast i32 %807 to float - %1950 = fmul float %1948, %1949 - %1951 = fadd float %1947, %1950 - %1952 = call float @llvm.sqrt.f32.144(float %1951) - %1953 = fneg float %813 - %1954 = fmul float %1952, %1953 - %1955 = bitcast i32 %152 to float - %1956 = fadd float %1955, %1954 - %1957 = bitcast i32 %152 to float - %1958 = bitcast i32 %152 to float - %1959 = fmul float %1957, %1958 - %1960 = fadd float %1959, 0.000000e+00 - %1961 = bitcast i32 %807 to float - %1962 = bitcast i32 %807 to float - %1963 = fmul float %1961, %1962 - %1964 = fadd float %1960, %1963 - %1965 = call float @llvm.sqrt.f32.145(float %1964) - %1966 = fneg float %813 - %1967 = fmul float %1965, %1966 - %1968 = bitcast i32 %152 to float - %1969 = fadd float %1968, %1967 - %1970 = fmul float %1956, %1969 - %1971 = fadd float %1970, 0.000000e+00 - %1972 = bitcast i32 %152 to float - %1973 = bitcast i32 %152 to float - %1974 = fmul float %1972, %1973 - %1975 = fadd float %1974, 0.000000e+00 - %1976 = bitcast i32 %807 to float - %1977 = bitcast i32 %807 to float - %1978 = fmul float %1976, %1977 - %1979 = fadd float %1975, %1978 - %1980 = call float @llvm.sqrt.f32.146(float %1979) - %1981 = fneg float %813 - %1982 = fmul float %1980, %1981 - %1983 = fmul float %1982, 0.000000e+00 - %1984 = bitcast i32 %807 to float - %1985 = fadd float %1984, %1983 - %1986 = bitcast i32 %152 to float - %1987 = bitcast i32 %152 to float - %1988 = fmul float %1986, %1987 - %1989 = fadd float %1988, 0.000000e+00 - %1990 = bitcast i32 %807 to float - %1991 = bitcast i32 %807 to float - %1992 = fmul float %1990, %1991 - %1993 = fadd float %1989, %1992 - %1994 = call float @llvm.sqrt.f32.147(float %1993) - %1995 = fneg float %813 - %1996 = fmul float %1994, %1995 - %1997 = fmul float %1996, 0.000000e+00 - %1998 = bitcast i32 %807 to float - %1999 = fadd float %1998, %1997 - %2000 = fmul float %1985, %1999 - %2001 = fadd float %1971, %2000 - %2002 = call float @llvm.sqrt.f32.148(float %2001) - %2003 = fadd float %2002, 0.000000e+00 - %2004 = fdiv float %1943, %2003 - %2005 = fmul float %2004, 2.000000e+00 - %2006 = bitcast i32 %152 to float - %2007 = bitcast i32 %152 to float - %2008 = fmul float %2006, %2007 - %2009 = fadd float %2008, 0.000000e+00 - %2010 = bitcast i32 %807 to float - %2011 = bitcast i32 %807 to float - %2012 = fmul float %2010, %2011 - %2013 = fadd float %2009, %2012 - %2014 = call float @llvm.sqrt.f32.149(float %2013) - %2015 = fneg float %813 - %2016 = fmul float %2014, %2015 - %2017 = bitcast i32 %152 to float - %2018 = fadd float %2017, %2016 - %2019 = bitcast i32 %152 to float - %2020 = bitcast i32 %152 to float - %2021 = fmul float %2019, %2020 - %2022 = fadd float %2021, 0.000000e+00 - %2023 = bitcast i32 %807 to float - %2024 = bitcast i32 %807 to float - %2025 = fmul float %2023, %2024 - %2026 = fadd float %2022, %2025 - %2027 = call float @llvm.sqrt.f32.150(float %2026) - %2028 = fneg float %813 - %2029 = fmul float %2027, %2028 - %2030 = bitcast i32 %152 to float - %2031 = fadd float %2030, %2029 - %2032 = bitcast i32 %152 to float - %2033 = bitcast i32 %152 to float - %2034 = fmul float %2032, %2033 - %2035 = fadd float %2034, 0.000000e+00 - %2036 = bitcast i32 %807 to float - %2037 = bitcast i32 %807 to float - %2038 = fmul float %2036, %2037 - %2039 = fadd float %2035, %2038 - %2040 = call float @llvm.sqrt.f32.151(float %2039) - %2041 = fneg float %813 - %2042 = fmul float %2040, %2041 - %2043 = bitcast i32 %152 to float - %2044 = fadd float %2043, %2042 - %2045 = fmul float %2031, %2044 - %2046 = fadd float %2045, 0.000000e+00 - %2047 = bitcast i32 %152 to float - %2048 = bitcast i32 %152 to float - %2049 = fmul float %2047, %2048 - %2050 = fadd float %2049, 0.000000e+00 - %2051 = bitcast i32 %807 to float - %2052 = bitcast i32 %807 to float - %2053 = fmul float %2051, %2052 - %2054 = fadd float %2050, %2053 - %2055 = call float @llvm.sqrt.f32.152(float %2054) - %2056 = fneg float %813 - %2057 = fmul float %2055, %2056 - %2058 = fmul float %2057, 0.000000e+00 - %2059 = bitcast i32 %807 to float - %2060 = fadd float %2059, %2058 - %2061 = bitcast i32 %152 to float - %2062 = bitcast i32 %152 to float - %2063 = fmul float %2061, %2062 - %2064 = fadd float %2063, 0.000000e+00 - %2065 = bitcast i32 %807 to float - %2066 = bitcast i32 %807 to float - %2067 = fmul float %2065, %2066 - %2068 = fadd float %2064, %2067 - %2069 = call float @llvm.sqrt.f32.153(float %2068) - %2070 = fneg float %813 - %2071 = fmul float %2069, %2070 - %2072 = fmul float %2071, 0.000000e+00 - %2073 = bitcast i32 %807 to float - %2074 = fadd float %2073, %2072 - %2075 = fmul float %2060, %2074 - %2076 = fadd float %2046, %2075 - %2077 = call float @llvm.sqrt.f32.154(float %2076) - %2078 = fadd float %2077, 0.000000e+00 - %2079 = fdiv float %2018, %2078 - %2080 = fmul float %2005, %2079 - %2081 = fneg float %2080 - %2082 = fmul float %2081, %1923 - %2083 = fadd float %2082, 0.000000e+00 - %2084 = bitcast i32 %152 to float - %2085 = bitcast i32 %152 to float - %2086 = fmul float %2084, %2085 - %2087 = fadd float %2086, 0.000000e+00 - %2088 = bitcast i32 %807 to float - %2089 = bitcast i32 %807 to float - %2090 = fmul float %2088, %2089 - %2091 = fadd float %2087, %2090 - %2092 = call float @llvm.sqrt.f32.155(float %2091) - %2093 = fneg float %813 - %2094 = fmul float %2092, %2093 - %2095 = fmul float %2094, 0.000000e+00 - %2096 = bitcast i32 %807 to float - %2097 = fadd float %2096, %2095 - %2098 = bitcast i32 %152 to float - %2099 = bitcast i32 %152 to float - %2100 = fmul float %2098, %2099 - %2101 = fadd float %2100, 0.000000e+00 - %2102 = bitcast i32 %807 to float - %2103 = bitcast i32 %807 to float - %2104 = fmul float %2102, %2103 - %2105 = fadd float %2101, %2104 - %2106 = call float @llvm.sqrt.f32.156(float %2105) - %2107 = fneg float %813 - %2108 = fmul float %2106, %2107 - %2109 = bitcast i32 %152 to float - %2110 = fadd float %2109, %2108 - %2111 = bitcast i32 %152 to float - %2112 = bitcast i32 %152 to float - %2113 = fmul float %2111, %2112 - %2114 = fadd float %2113, 0.000000e+00 - %2115 = bitcast i32 %807 to float - %2116 = bitcast i32 %807 to float - %2117 = fmul float %2115, %2116 - %2118 = fadd float %2114, %2117 - %2119 = call float @llvm.sqrt.f32.157(float %2118) - %2120 = fneg float %813 - %2121 = fmul float %2119, %2120 - %2122 = bitcast i32 %152 to float - %2123 = fadd float %2122, %2121 - %2124 = fmul float %2110, %2123 - %2125 = fadd float %2124, 0.000000e+00 - %2126 = bitcast i32 %152 to float - %2127 = bitcast i32 %152 to float - %2128 = fmul float %2126, %2127 - %2129 = fadd float %2128, 0.000000e+00 - %2130 = bitcast i32 %807 to float - %2131 = bitcast i32 %807 to float - %2132 = fmul float %2130, %2131 - %2133 = fadd float %2129, %2132 - %2134 = call float @llvm.sqrt.f32.158(float %2133) - %2135 = fneg float %813 - %2136 = fmul float %2134, %2135 - %2137 = fmul float %2136, 0.000000e+00 - %2138 = bitcast i32 %807 to float - %2139 = fadd float %2138, %2137 - %2140 = bitcast i32 %152 to float - %2141 = bitcast i32 %152 to float - %2142 = fmul float %2140, %2141 - %2143 = fadd float %2142, 0.000000e+00 - %2144 = bitcast i32 %807 to float - %2145 = bitcast i32 %807 to float - %2146 = fmul float %2144, %2145 - %2147 = fadd float %2143, %2146 - %2148 = call float @llvm.sqrt.f32.159(float %2147) - %2149 = fneg float %813 - %2150 = fmul float %2148, %2149 - %2151 = fmul float %2150, 0.000000e+00 - %2152 = bitcast i32 %807 to float - %2153 = fadd float %2152, %2151 - %2154 = fmul float %2139, %2153 - %2155 = fadd float %2125, %2154 - %2156 = call float @llvm.sqrt.f32.160(float %2155) - %2157 = fadd float %2156, 0.000000e+00 - %2158 = fdiv float %2097, %2157 - %2159 = fmul float %2158, 2.000000e+00 - %2160 = bitcast i32 %152 to float - %2161 = bitcast i32 %152 to float - %2162 = fmul float %2160, %2161 - %2163 = fadd float %2162, 0.000000e+00 - %2164 = bitcast i32 %807 to float - %2165 = bitcast i32 %807 to float - %2166 = fmul float %2164, %2165 - %2167 = fadd float %2163, %2166 - %2168 = call float @llvm.sqrt.f32.161(float %2167) - %2169 = fneg float %813 - %2170 = fmul float %2168, %2169 - %2171 = fmul float %2170, 0.000000e+00 - %2172 = bitcast i32 %807 to float - %2173 = fadd float %2172, %2171 - %2174 = bitcast i32 %152 to float - %2175 = bitcast i32 %152 to float - %2176 = fmul float %2174, %2175 - %2177 = fadd float %2176, 0.000000e+00 - %2178 = bitcast i32 %807 to float - %2179 = bitcast i32 %807 to float - %2180 = fmul float %2178, %2179 - %2181 = fadd float %2177, %2180 - %2182 = call float @llvm.sqrt.f32.162(float %2181) - %2183 = fneg float %813 - %2184 = fmul float %2182, %2183 - %2185 = bitcast i32 %152 to float - %2186 = fadd float %2185, %2184 - %2187 = bitcast i32 %152 to float - %2188 = bitcast i32 %152 to float - %2189 = fmul float %2187, %2188 - %2190 = fadd float %2189, 0.000000e+00 - %2191 = bitcast i32 %807 to float - %2192 = bitcast i32 %807 to float - %2193 = fmul float %2191, %2192 - %2194 = fadd float %2190, %2193 - %2195 = call float @llvm.sqrt.f32.163(float %2194) - %2196 = fneg float %813 - %2197 = fmul float %2195, %2196 - %2198 = bitcast i32 %152 to float - %2199 = fadd float %2198, %2197 - %2200 = fmul float %2186, %2199 - %2201 = fadd float %2200, 0.000000e+00 - %2202 = bitcast i32 %152 to float - %2203 = bitcast i32 %152 to float - %2204 = fmul float %2202, %2203 - %2205 = fadd float %2204, 0.000000e+00 - %2206 = bitcast i32 %807 to float - %2207 = bitcast i32 %807 to float - %2208 = fmul float %2206, %2207 - %2209 = fadd float %2205, %2208 - %2210 = call float @llvm.sqrt.f32.164(float %2209) - %2211 = fneg float %813 - %2212 = fmul float %2210, %2211 - %2213 = fmul float %2212, 0.000000e+00 - %2214 = bitcast i32 %807 to float - %2215 = fadd float %2214, %2213 - %2216 = bitcast i32 %152 to float - %2217 = bitcast i32 %152 to float - %2218 = fmul float %2216, %2217 - %2219 = fadd float %2218, 0.000000e+00 - %2220 = bitcast i32 %807 to float - %2221 = bitcast i32 %807 to float - %2222 = fmul float %2220, %2221 - %2223 = fadd float %2219, %2222 - %2224 = call float @llvm.sqrt.f32.165(float %2223) - %2225 = fneg float %813 - %2226 = fmul float %2224, %2225 - %2227 = fmul float %2226, 0.000000e+00 - %2228 = bitcast i32 %807 to float - %2229 = fadd float %2228, %2227 - %2230 = fmul float %2215, %2229 - %2231 = fadd float %2201, %2230 - %2232 = call float @llvm.sqrt.f32.166(float %2231) - %2233 = fadd float %2232, 0.000000e+00 - %2234 = fdiv float %2173, %2233 - %2235 = fmul float %2159, %2234 - %2236 = fsub float 1.000000e+00, %2235 - %2237 = load float, float* %1274, align 4 - %2238 = fmul float %2236, %2237 - %2239 = fadd float %2083, %2238 - %2240 = insertelement <4 x float> zeroinitializer, float %2239, i32 0 - %2241 = insertelement <4 x float> %2240, float 0.000000e+00, i32 1 - %2242 = insertelement <4 x float> %2241, float 0.000000e+00, i32 2 - %2243 = insertelement <4 x float> %2242, float 0.000000e+00, i32 3 - %2244 = extractelement <4 x float> %2243, i32 0 - store float %2244, float* %1765, align 4 - %2245 = extractelement <4 x float> %2243, i32 1 - %2246 = getelementptr float, float* %2, i32 0 - %2247 = getelementptr inbounds float, float* %2246, i64 3 - store float %2245, float* %2247, align 4 - %2248 = bitcast i32 %152 to float - %2249 = bitcast i32 %152 to float - %2250 = fmul float %2248, %2249 - %2251 = fadd float %2250, 0.000000e+00 - %2252 = bitcast i32 %807 to float - %2253 = bitcast i32 %807 to float - %2254 = fmul float %2252, %2253 - %2255 = fadd float %2251, %2254 - %2256 = call float @llvm.sqrt.f32.167(float %2255) - %2257 = fneg float %813 - %2258 = fmul float %2256, %2257 - %2259 = fmul float %2258, 0.000000e+00 - %2260 = bitcast i32 %807 to float - %2261 = fadd float %2260, %2259 - %2262 = bitcast i32 %152 to float - %2263 = bitcast i32 %152 to float - %2264 = fmul float %2262, %2263 - %2265 = fadd float %2264, 0.000000e+00 - %2266 = bitcast i32 %807 to float - %2267 = bitcast i32 %807 to float - %2268 = fmul float %2266, %2267 - %2269 = fadd float %2265, %2268 - %2270 = call float @llvm.sqrt.f32.168(float %2269) - %2271 = fneg float %813 - %2272 = fmul float %2270, %2271 - %2273 = bitcast i32 %152 to float - %2274 = fadd float %2273, %2272 - %2275 = bitcast i32 %152 to float - %2276 = bitcast i32 %152 to float - %2277 = fmul float %2275, %2276 - %2278 = fadd float %2277, 0.000000e+00 - %2279 = bitcast i32 %807 to float - %2280 = bitcast i32 %807 to float - %2281 = fmul float %2279, %2280 - %2282 = fadd float %2278, %2281 - %2283 = call float @llvm.sqrt.f32.169(float %2282) - %2284 = fneg float %813 - %2285 = fmul float %2283, %2284 - %2286 = bitcast i32 %152 to float - %2287 = fadd float %2286, %2285 - %2288 = fmul float %2274, %2287 - %2289 = fadd float %2288, 0.000000e+00 - %2290 = bitcast i32 %152 to float - %2291 = bitcast i32 %152 to float - %2292 = fmul float %2290, %2291 - %2293 = fadd float %2292, 0.000000e+00 - %2294 = bitcast i32 %807 to float - %2295 = bitcast i32 %807 to float - %2296 = fmul float %2294, %2295 - %2297 = fadd float %2293, %2296 - %2298 = call float @llvm.sqrt.f32.170(float %2297) - %2299 = fneg float %813 - %2300 = fmul float %2298, %2299 - %2301 = fmul float %2300, 0.000000e+00 - %2302 = bitcast i32 %807 to float - %2303 = fadd float %2302, %2301 - %2304 = bitcast i32 %152 to float - %2305 = bitcast i32 %152 to float - %2306 = fmul float %2304, %2305 - %2307 = fadd float %2306, 0.000000e+00 - %2308 = bitcast i32 %807 to float - %2309 = bitcast i32 %807 to float - %2310 = fmul float %2308, %2309 - %2311 = fadd float %2307, %2310 - %2312 = call float @llvm.sqrt.f32.171(float %2311) - %2313 = fneg float %813 - %2314 = fmul float %2312, %2313 - %2315 = fmul float %2314, 0.000000e+00 - %2316 = bitcast i32 %807 to float - %2317 = fadd float %2316, %2315 - %2318 = fmul float %2303, %2317 - %2319 = fadd float %2289, %2318 - %2320 = call float @llvm.sqrt.f32.172(float %2319) - %2321 = fadd float %2320, 0.000000e+00 - %2322 = fdiv float %2261, %2321 - %2323 = fmul float %2322, 2.000000e+00 - %2324 = bitcast i32 %152 to float - %2325 = bitcast i32 %152 to float - %2326 = fmul float %2324, %2325 - %2327 = fadd float %2326, 0.000000e+00 - %2328 = bitcast i32 %807 to float - %2329 = bitcast i32 %807 to float - %2330 = fmul float %2328, %2329 - %2331 = fadd float %2327, %2330 - %2332 = call float @llvm.sqrt.f32.173(float %2331) - %2333 = fneg float %813 - %2334 = fmul float %2332, %2333 - %2335 = bitcast i32 %152 to float - %2336 = fadd float %2335, %2334 - %2337 = bitcast i32 %152 to float - %2338 = bitcast i32 %152 to float - %2339 = fmul float %2337, %2338 - %2340 = fadd float %2339, 0.000000e+00 - %2341 = bitcast i32 %807 to float - %2342 = bitcast i32 %807 to float - %2343 = fmul float %2341, %2342 - %2344 = fadd float %2340, %2343 - %2345 = call float @llvm.sqrt.f32.174(float %2344) - %2346 = fneg float %813 - %2347 = fmul float %2345, %2346 - %2348 = bitcast i32 %152 to float - %2349 = fadd float %2348, %2347 - %2350 = bitcast i32 %152 to float - %2351 = bitcast i32 %152 to float - %2352 = fmul float %2350, %2351 - %2353 = fadd float %2352, 0.000000e+00 - %2354 = bitcast i32 %807 to float - %2355 = bitcast i32 %807 to float - %2356 = fmul float %2354, %2355 - %2357 = fadd float %2353, %2356 - %2358 = call float @llvm.sqrt.f32.175(float %2357) - %2359 = fneg float %813 - %2360 = fmul float %2358, %2359 - %2361 = bitcast i32 %152 to float - %2362 = fadd float %2361, %2360 - %2363 = fmul float %2349, %2362 - %2364 = fadd float %2363, 0.000000e+00 - %2365 = bitcast i32 %152 to float - %2366 = bitcast i32 %152 to float - %2367 = fmul float %2365, %2366 - %2368 = fadd float %2367, 0.000000e+00 - %2369 = bitcast i32 %807 to float - %2370 = bitcast i32 %807 to float - %2371 = fmul float %2369, %2370 - %2372 = fadd float %2368, %2371 - %2373 = call float @llvm.sqrt.f32.176(float %2372) - %2374 = fneg float %813 - %2375 = fmul float %2373, %2374 - %2376 = fmul float %2375, 0.000000e+00 - %2377 = bitcast i32 %807 to float - %2378 = fadd float %2377, %2376 - %2379 = bitcast i32 %152 to float - %2380 = bitcast i32 %152 to float - %2381 = fmul float %2379, %2380 - %2382 = fadd float %2381, 0.000000e+00 - %2383 = bitcast i32 %807 to float - %2384 = bitcast i32 %807 to float - %2385 = fmul float %2383, %2384 - %2386 = fadd float %2382, %2385 - %2387 = call float @llvm.sqrt.f32.177(float %2386) - %2388 = fneg float %813 - %2389 = fmul float %2387, %2388 - %2390 = fmul float %2389, 0.000000e+00 - %2391 = bitcast i32 %807 to float - %2392 = fadd float %2391, %2390 - %2393 = fmul float %2378, %2392 - %2394 = fadd float %2364, %2393 - %2395 = call float @llvm.sqrt.f32.178(float %2394) - %2396 = fadd float %2395, 0.000000e+00 - %2397 = fdiv float %2336, %2396 - %2398 = fmul float %2323, %2397 - %2399 = fneg float %2398 - %2400 = insertelement <4 x float> zeroinitializer, float %2399, i32 0 - %2401 = insertelement <4 x float> %2400, float 0.000000e+00, i32 1 - %2402 = insertelement <4 x float> %2401, float 0.000000e+00, i32 2 - %2403 = insertelement <4 x float> %2402, float 0.000000e+00, i32 3 - %2404 = load float, float* %1442, align 4 - %2405 = insertelement <4 x float> zeroinitializer, float %2404, i32 0 - %2406 = insertelement <4 x float> %2405, float 0.000000e+00, i32 1 - %2407 = insertelement <4 x float> %2406, float 0.000000e+00, i32 2 - %2408 = insertelement <4 x float> %2407, float 0.000000e+00, i32 3 - %2409 = call <4 x float> @llvm.fma.f32.179(<4 x float> %2403, <4 x float> %2408, <4 x float> zeroinitializer) - %2410 = extractelement <4 x float> %2409, i32 0 - store float %2410, float* %2247, align 4 - %2411 = bitcast i32 %152 to float - %2412 = bitcast i32 %152 to float - %2413 = fmul float %2411, %2412 - %2414 = fadd float %2413, 0.000000e+00 - %2415 = bitcast i32 %807 to float - %2416 = bitcast i32 %807 to float - %2417 = fmul float %2415, %2416 - %2418 = fadd float %2414, %2417 - %2419 = call float @llvm.sqrt.f32.180(float %2418) - %2420 = fneg float %813 - %2421 = fmul float %2419, %2420 - %2422 = fmul float %2421, 0.000000e+00 - %2423 = bitcast i32 %807 to float - %2424 = fadd float %2423, %2422 - %2425 = bitcast i32 %152 to float - %2426 = bitcast i32 %152 to float - %2427 = fmul float %2425, %2426 - %2428 = fadd float %2427, 0.000000e+00 - %2429 = bitcast i32 %807 to float - %2430 = bitcast i32 %807 to float - %2431 = fmul float %2429, %2430 - %2432 = fadd float %2428, %2431 - %2433 = call float @llvm.sqrt.f32.181(float %2432) - %2434 = fneg float %813 - %2435 = fmul float %2433, %2434 - %2436 = bitcast i32 %152 to float - %2437 = fadd float %2436, %2435 - %2438 = bitcast i32 %152 to float - %2439 = bitcast i32 %152 to float - %2440 = fmul float %2438, %2439 - %2441 = fadd float %2440, 0.000000e+00 - %2442 = bitcast i32 %807 to float - %2443 = bitcast i32 %807 to float - %2444 = fmul float %2442, %2443 - %2445 = fadd float %2441, %2444 - %2446 = call float @llvm.sqrt.f32.182(float %2445) - %2447 = fneg float %813 - %2448 = fmul float %2446, %2447 - %2449 = bitcast i32 %152 to float - %2450 = fadd float %2449, %2448 - %2451 = fmul float %2437, %2450 - %2452 = fadd float %2451, 0.000000e+00 - %2453 = bitcast i32 %152 to float - %2454 = bitcast i32 %152 to float - %2455 = fmul float %2453, %2454 - %2456 = fadd float %2455, 0.000000e+00 - %2457 = bitcast i32 %807 to float - %2458 = bitcast i32 %807 to float - %2459 = fmul float %2457, %2458 - %2460 = fadd float %2456, %2459 - %2461 = call float @llvm.sqrt.f32.183(float %2460) - %2462 = fneg float %813 - %2463 = fmul float %2461, %2462 - %2464 = fmul float %2463, 0.000000e+00 - %2465 = bitcast i32 %807 to float - %2466 = fadd float %2465, %2464 - %2467 = bitcast i32 %152 to float - %2468 = bitcast i32 %152 to float - %2469 = fmul float %2467, %2468 - %2470 = fadd float %2469, 0.000000e+00 - %2471 = bitcast i32 %807 to float - %2472 = bitcast i32 %807 to float - %2473 = fmul float %2471, %2472 - %2474 = fadd float %2470, %2473 - %2475 = call float @llvm.sqrt.f32.184(float %2474) - %2476 = fneg float %813 - %2477 = fmul float %2475, %2476 - %2478 = fmul float %2477, 0.000000e+00 - %2479 = bitcast i32 %807 to float - %2480 = fadd float %2479, %2478 - %2481 = fmul float %2466, %2480 - %2482 = fadd float %2452, %2481 - %2483 = call float @llvm.sqrt.f32.185(float %2482) - %2484 = fadd float %2483, 0.000000e+00 - %2485 = fdiv float %2424, %2484 - %2486 = fmul float %2485, 2.000000e+00 - %2487 = bitcast i32 %152 to float - %2488 = bitcast i32 %152 to float - %2489 = fmul float %2487, %2488 - %2490 = fadd float %2489, 0.000000e+00 - %2491 = bitcast i32 %807 to float - %2492 = bitcast i32 %807 to float - %2493 = fmul float %2491, %2492 - %2494 = fadd float %2490, %2493 - %2495 = call float @llvm.sqrt.f32.186(float %2494) - %2496 = fneg float %813 - %2497 = fmul float %2495, %2496 - %2498 = bitcast i32 %152 to float - %2499 = fadd float %2498, %2497 - %2500 = bitcast i32 %152 to float - %2501 = bitcast i32 %152 to float - %2502 = fmul float %2500, %2501 - %2503 = fadd float %2502, 0.000000e+00 - %2504 = bitcast i32 %807 to float - %2505 = bitcast i32 %807 to float - %2506 = fmul float %2504, %2505 - %2507 = fadd float %2503, %2506 - %2508 = call float @llvm.sqrt.f32.187(float %2507) - %2509 = fneg float %813 - %2510 = fmul float %2508, %2509 - %2511 = bitcast i32 %152 to float - %2512 = fadd float %2511, %2510 - %2513 = bitcast i32 %152 to float - %2514 = bitcast i32 %152 to float - %2515 = fmul float %2513, %2514 - %2516 = fadd float %2515, 0.000000e+00 - %2517 = bitcast i32 %807 to float - %2518 = bitcast i32 %807 to float - %2519 = fmul float %2517, %2518 - %2520 = fadd float %2516, %2519 - %2521 = call float @llvm.sqrt.f32.188(float %2520) - %2522 = fneg float %813 - %2523 = fmul float %2521, %2522 - %2524 = bitcast i32 %152 to float - %2525 = fadd float %2524, %2523 - %2526 = fmul float %2512, %2525 - %2527 = fadd float %2526, 0.000000e+00 - %2528 = bitcast i32 %152 to float - %2529 = bitcast i32 %152 to float - %2530 = fmul float %2528, %2529 - %2531 = fadd float %2530, 0.000000e+00 - %2532 = bitcast i32 %807 to float - %2533 = bitcast i32 %807 to float - %2534 = fmul float %2532, %2533 - %2535 = fadd float %2531, %2534 - %2536 = call float @llvm.sqrt.f32.189(float %2535) - %2537 = fneg float %813 - %2538 = fmul float %2536, %2537 - %2539 = fmul float %2538, 0.000000e+00 - %2540 = bitcast i32 %807 to float - %2541 = fadd float %2540, %2539 - %2542 = bitcast i32 %152 to float - %2543 = bitcast i32 %152 to float - %2544 = fmul float %2542, %2543 - %2545 = fadd float %2544, 0.000000e+00 - %2546 = bitcast i32 %807 to float - %2547 = bitcast i32 %807 to float - %2548 = fmul float %2546, %2547 - %2549 = fadd float %2545, %2548 - %2550 = call float @llvm.sqrt.f32.190(float %2549) - %2551 = fneg float %813 - %2552 = fmul float %2550, %2551 - %2553 = fmul float %2552, 0.000000e+00 - %2554 = bitcast i32 %807 to float - %2555 = fadd float %2554, %2553 - %2556 = fmul float %2541, %2555 - %2557 = fadd float %2527, %2556 - %2558 = call float @llvm.sqrt.f32.191(float %2557) - %2559 = fadd float %2558, 0.000000e+00 - %2560 = fdiv float %2499, %2559 - %2561 = fmul float %2486, %2560 - %2562 = fneg float %2561 - %2563 = fmul float %2562, %2404 - %2564 = fadd float %2563, 0.000000e+00 - %2565 = bitcast i32 %152 to float - %2566 = bitcast i32 %152 to float - %2567 = fmul float %2565, %2566 - %2568 = fadd float %2567, 0.000000e+00 - %2569 = bitcast i32 %807 to float - %2570 = bitcast i32 %807 to float - %2571 = fmul float %2569, %2570 - %2572 = fadd float %2568, %2571 - %2573 = call float @llvm.sqrt.f32.192(float %2572) - %2574 = fneg float %813 - %2575 = fmul float %2573, %2574 - %2576 = fmul float %2575, 0.000000e+00 - %2577 = bitcast i32 %807 to float - %2578 = fadd float %2577, %2576 - %2579 = bitcast i32 %152 to float - %2580 = bitcast i32 %152 to float - %2581 = fmul float %2579, %2580 - %2582 = fadd float %2581, 0.000000e+00 - %2583 = bitcast i32 %807 to float - %2584 = bitcast i32 %807 to float - %2585 = fmul float %2583, %2584 - %2586 = fadd float %2582, %2585 - %2587 = call float @llvm.sqrt.f32.193(float %2586) - %2588 = fneg float %813 - %2589 = fmul float %2587, %2588 - %2590 = bitcast i32 %152 to float - %2591 = fadd float %2590, %2589 - %2592 = bitcast i32 %152 to float - %2593 = bitcast i32 %152 to float - %2594 = fmul float %2592, %2593 - %2595 = fadd float %2594, 0.000000e+00 - %2596 = bitcast i32 %807 to float - %2597 = bitcast i32 %807 to float - %2598 = fmul float %2596, %2597 - %2599 = fadd float %2595, %2598 - %2600 = call float @llvm.sqrt.f32.194(float %2599) - %2601 = fneg float %813 - %2602 = fmul float %2600, %2601 - %2603 = bitcast i32 %152 to float - %2604 = fadd float %2603, %2602 - %2605 = fmul float %2591, %2604 - %2606 = fadd float %2605, 0.000000e+00 - %2607 = bitcast i32 %152 to float - %2608 = bitcast i32 %152 to float - %2609 = fmul float %2607, %2608 - %2610 = fadd float %2609, 0.000000e+00 - %2611 = bitcast i32 %807 to float - %2612 = bitcast i32 %807 to float - %2613 = fmul float %2611, %2612 - %2614 = fadd float %2610, %2613 - %2615 = call float @llvm.sqrt.f32.195(float %2614) - %2616 = fneg float %813 - %2617 = fmul float %2615, %2616 - %2618 = fmul float %2617, 0.000000e+00 - %2619 = bitcast i32 %807 to float - %2620 = fadd float %2619, %2618 - %2621 = bitcast i32 %152 to float - %2622 = bitcast i32 %152 to float - %2623 = fmul float %2621, %2622 - %2624 = fadd float %2623, 0.000000e+00 - %2625 = bitcast i32 %807 to float - %2626 = bitcast i32 %807 to float - %2627 = fmul float %2625, %2626 - %2628 = fadd float %2624, %2627 - %2629 = call float @llvm.sqrt.f32.196(float %2628) - %2630 = fneg float %813 - %2631 = fmul float %2629, %2630 - %2632 = fmul float %2631, 0.000000e+00 - %2633 = bitcast i32 %807 to float - %2634 = fadd float %2633, %2632 - %2635 = fmul float %2620, %2634 - %2636 = fadd float %2606, %2635 - %2637 = call float @llvm.sqrt.f32.197(float %2636) - %2638 = fadd float %2637, 0.000000e+00 - %2639 = fdiv float %2578, %2638 - %2640 = fmul float %2639, 2.000000e+00 - %2641 = bitcast i32 %152 to float - %2642 = bitcast i32 %152 to float - %2643 = fmul float %2641, %2642 - %2644 = fadd float %2643, 0.000000e+00 - %2645 = bitcast i32 %807 to float - %2646 = bitcast i32 %807 to float - %2647 = fmul float %2645, %2646 - %2648 = fadd float %2644, %2647 - %2649 = call float @llvm.sqrt.f32.198(float %2648) - %2650 = fneg float %813 - %2651 = fmul float %2649, %2650 - %2652 = fmul float %2651, 0.000000e+00 - %2653 = bitcast i32 %807 to float - %2654 = fadd float %2653, %2652 - %2655 = bitcast i32 %152 to float - %2656 = bitcast i32 %152 to float - %2657 = fmul float %2655, %2656 - %2658 = fadd float %2657, 0.000000e+00 - %2659 = bitcast i32 %807 to float - %2660 = bitcast i32 %807 to float - %2661 = fmul float %2659, %2660 - %2662 = fadd float %2658, %2661 - %2663 = call float @llvm.sqrt.f32.199(float %2662) - %2664 = fneg float %813 - %2665 = fmul float %2663, %2664 - %2666 = bitcast i32 %152 to float - %2667 = fadd float %2666, %2665 - %2668 = bitcast i32 %152 to float - %2669 = bitcast i32 %152 to float - %2670 = fmul float %2668, %2669 - %2671 = fadd float %2670, 0.000000e+00 - %2672 = bitcast i32 %807 to float - %2673 = bitcast i32 %807 to float - %2674 = fmul float %2672, %2673 - %2675 = fadd float %2671, %2674 - %2676 = call float @llvm.sqrt.f32.200(float %2675) - %2677 = fneg float %813 - %2678 = fmul float %2676, %2677 - %2679 = bitcast i32 %152 to float - %2680 = fadd float %2679, %2678 - %2681 = fmul float %2667, %2680 - %2682 = fadd float %2681, 0.000000e+00 - %2683 = bitcast i32 %152 to float - %2684 = bitcast i32 %152 to float - %2685 = fmul float %2683, %2684 - %2686 = fadd float %2685, 0.000000e+00 - %2687 = bitcast i32 %807 to float - %2688 = bitcast i32 %807 to float - %2689 = fmul float %2687, %2688 - %2690 = fadd float %2686, %2689 - %2691 = call float @llvm.sqrt.f32.201(float %2690) - %2692 = fneg float %813 - %2693 = fmul float %2691, %2692 - %2694 = fmul float %2693, 0.000000e+00 - %2695 = bitcast i32 %807 to float - %2696 = fadd float %2695, %2694 - %2697 = bitcast i32 %152 to float - %2698 = bitcast i32 %152 to float - %2699 = fmul float %2697, %2698 - %2700 = fadd float %2699, 0.000000e+00 - %2701 = bitcast i32 %807 to float - %2702 = bitcast i32 %807 to float - %2703 = fmul float %2701, %2702 - %2704 = fadd float %2700, %2703 - %2705 = call float @llvm.sqrt.f32.202(float %2704) - %2706 = fneg float %813 - %2707 = fmul float %2705, %2706 - %2708 = fmul float %2707, 0.000000e+00 - %2709 = bitcast i32 %807 to float - %2710 = fadd float %2709, %2708 - %2711 = fmul float %2696, %2710 - %2712 = fadd float %2682, %2711 - %2713 = call float @llvm.sqrt.f32.203(float %2712) - %2714 = fadd float %2713, 0.000000e+00 - %2715 = fdiv float %2654, %2714 - %2716 = fmul float %2640, %2715 - %2717 = fsub float 1.000000e+00, %2716 - %2718 = load float, float* %144, align 4 - %2719 = fmul float %2717, %2718 - %2720 = fadd float %2564, %2719 - %2721 = insertelement <4 x float> zeroinitializer, float %2720, i32 0 - %2722 = insertelement <4 x float> %2721, float 0.000000e+00, i32 1 - %2723 = insertelement <4 x float> %2722, float 0.000000e+00, i32 2 - %2724 = insertelement <4 x float> %2723, float 0.000000e+00, i32 3 - %2725 = extractelement <4 x float> %2724, i32 0 - store float %2725, float* %2247, align 4 - %2726 = getelementptr float, float* %1, i32 0 - %2727 = getelementptr inbounds float, float* %2726, i64 2 - %2728 = bitcast float* %2727 to i32* - %2729 = load i32, i32* %2728, align 4 - %2730 = bitcast i32 %2729 to float - %2731 = insertelement <4 x float> zeroinitializer, float %2730, i32 0 - %2732 = getelementptr float, float* %1, i32 0 - %2733 = getelementptr inbounds float, float* %2732, i64 1 - %2734 = bitcast float* %2733 to i32* - %2735 = load i32, i32* %2734, align 4 - %2736 = bitcast i32 %2735 to float - %2737 = insertelement <4 x float> %2731, float %2736, i32 1 - %2738 = insertelement <4 x float> %2737, float 0.000000e+00, i32 2 - %2739 = insertelement <4 x float> %2738, float 0.000000e+00, i32 3 - %2740 = extractelement <4 x float> %2739, i32 0 - %2741 = bitcast i32* %95 to float* - %2742 = bitcast i32* %2734 to float* - store float %2740, float* %2742, align 4 - %2743 = extractelement <4 x float> %2739, i32 1 - %2744 = bitcast i32* %98 to float* - %2745 = bitcast i32* %2728 to float* - store float %2743, float* %2745, align 4 - ret void -} - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 - -; Function Attrs: noinline nounwind ssp uwtable -define void @no_opt_naive_fixed_qr_decomp(float* %0, float* %1, float* %2) #1 { -.preheader13: - %3 = bitcast float* %2 to i8* - %4 = bitcast float* %0 to i8* - %5 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false, i1 true, i1 false) - %6 = call i8* @__memcpy_chk(i8* %3, i8* %4, i64 16, i64 %5) #9 - %7 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 - %8 = bitcast i8* %7 to float* - store float 1.000000e+00, float* %8, align 4 - %9 = getelementptr inbounds i8, i8* %7, i64 8 - %10 = getelementptr inbounds i8, i8* %7, i64 12 - %11 = bitcast i8* %10 to float* - store float 1.000000e+00, float* %11, align 4 - %12 = bitcast float* %1 to i8* - %13 = call i64 @llvm.objectsize.i64.p0i8(i8* %12, i1 false, i1 true, i1 false) - %14 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 - %15 = bitcast i8* %14 to float* - %16 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 - %17 = bitcast i8* %16 to float* - %18 = bitcast float* %2 to i32* - %19 = load i32, i32* %18, align 4 - %20 = bitcast i8* %14 to i32* - store i32 %19, i32* %20, align 4 - %21 = bitcast i8* %7 to i32* - %22 = load i32, i32* %21, align 4 - %23 = bitcast i8* %16 to i32* - store i32 %22, i32* %23, align 4 - %24 = getelementptr inbounds float, float* %2, i64 2 - %25 = bitcast float* %24 to i32* - %26 = load i32, i32* %25, align 4 - %27 = getelementptr inbounds i8, i8* %14, i64 4 - %28 = bitcast i8* %27 to i32* - store i32 %26, i32* %28, align 4 - %29 = bitcast i8* %9 to i32* - %30 = load i32, i32* %29, align 4 - %31 = getelementptr inbounds i8, i8* %16, i64 4 - %32 = bitcast i8* %31 to i32* - store i32 %30, i32* %32, align 4 - %33 = load float, float* %15, align 4 - %34 = call float @no_opt_sgn(float %33) - %35 = fneg float %34 - %36 = call float @no_opt_naive_norm(float* nonnull %15, i32 2) - %37 = fmul float %36, %35 - %38 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 - %39 = bitcast i8* %38 to float* - %40 = call dereferenceable_or_null(8) i8* @calloc(i64 4, i64 2) #10 - %41 = load float, float* %15, align 4 - %42 = load float, float* %17, align 4 - %43 = fmul float %37, %42 - %44 = fadd float %41, %43 - store float %44, float* %39, align 4 - %45 = bitcast i8* %27 to float* - %46 = load float, float* %45, align 4 - %47 = bitcast i8* %31 to float* - %48 = load float, float* %47, align 4 - %49 = fmul float %37, %48 - %50 = fadd float %46, %49 - %51 = getelementptr inbounds i8, i8* %38, i64 4 - %52 = bitcast i8* %51 to float* - store float %50, float* %52, align 4 - %53 = bitcast i8* %40 to float* - %54 = call float @no_opt_naive_norm(float* nonnull %39, i32 2) - %55 = fadd float %54, 0x3EE4F8B580000000 - %56 = load float, float* %39, align 4 - %57 = fdiv float %56, %55 - store float %57, float* %53, align 4 - %58 = load float, float* %52, align 4 - %59 = fdiv float %58, %55 - %60 = getelementptr inbounds i8, i8* %40, i64 4 - %61 = bitcast i8* %60 to float* - store float %59, float* %61, align 4 - %62 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 - %63 = bitcast i8* %62 to float* - %64 = load float, float* %53, align 4 - %65 = fmul float %64, 2.000000e+00 - %66 = fmul float %65, %64 - %67 = fsub float 1.000000e+00, %66 - store float %67, float* %63, align 4 - %68 = load float, float* %53, align 4 - %69 = fmul float %68, 2.000000e+00 - %70 = load float, float* %61, align 4 - %71 = fmul float %69, %70 - %72 = fsub float 0.000000e+00, %71 - %73 = getelementptr inbounds i8, i8* %62, i64 4 - %74 = bitcast i8* %73 to float* - store float %72, float* %74, align 4 - %75 = load float, float* %61, align 4 - %76 = fmul float %75, 2.000000e+00 - %77 = load float, float* %53, align 4 - %78 = fmul float %76, %77 - %79 = fsub float 0.000000e+00, %78 - %80 = getelementptr inbounds i8, i8* %62, i64 8 - %81 = bitcast i8* %80 to float* - store float %79, float* %81, align 4 - %82 = load float, float* %61, align 4 - %83 = fmul float %82, 2.000000e+00 - %84 = fmul float %83, %82 - %85 = fsub float 1.000000e+00, %84 - %86 = getelementptr inbounds i8, i8* %62, i64 12 - %87 = bitcast i8* %86 to float* - store float %85, float* %87, align 4 - %88 = call dereferenceable_or_null(16) i8* @calloc(i64 4, i64 4) #10 - %89 = bitcast i8* %88 to float* - %90 = bitcast i8* %62 to i32* - %91 = load i32, i32* %90, align 4 - %92 = bitcast i8* %88 to i32* - store i32 %91, i32* %92, align 4 - %93 = bitcast i8* %73 to i32* - %94 = load i32, i32* %93, align 4 - %95 = getelementptr inbounds i8, i8* %88, i64 4 - %96 = bitcast i8* %95 to i32* - store i32 %94, i32* %96, align 4 - %97 = bitcast i8* %80 to i32* - %98 = load i32, i32* %97, align 4 - %99 = getelementptr inbounds i8, i8* %88, i64 8 - %100 = bitcast i8* %99 to i32* - store i32 %98, i32* %100, align 4 - %101 = bitcast i8* %86 to i32* - %102 = load i32, i32* %101, align 4 - %103 = getelementptr inbounds i8, i8* %88, i64 12 - %104 = bitcast i8* %103 to i32* - store i32 %102, i32* %104, align 4 - %105 = call i8* @__memcpy_chk(i8* %12, i8* %88, i64 16, i64 %13) #9 - call void @no_opt_naive_fixed_matrix_multiply(float* %89, float* %0, float* %2) - call void @free(i8* %14) - call void @free(i8* %16) - call void @free(i8* %38) - call void @free(i8* %40) - call void @free(i8* %62) - call void @free(i8* %88) - call void @no_opt_naive_fixed_transpose(float* %1) - ret void -} - -; Function Attrs: nounwind -declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #4 - -; Function Attrs: nounwind readnone speculatable willreturn -declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 - -; Function Attrs: allocsize(0,1) -declare i8* @calloc(i64, i64) #5 - -declare void @free(i8*) #6 - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #1 { -.preheader6: - %0 = alloca i64, align 8 - %1 = alloca [4 x float], align 16 - %2 = alloca [4 x float], align 16 - %3 = alloca [4 x float], align 16 - %4 = alloca [4 x float], align 16 - %5 = alloca [4 x float], align 16 - %6 = call i64 @time(i64* null) #9 - store i64 %6, i64* %0, align 8 - %7 = call i64 @time(i64* nonnull %0) #9 - %8 = trunc i64 %7 to i32 - call void @srand(i32 %8) #9 - %9 = call i32 @rand() #9 - %10 = sitofp i32 %9 to float - %11 = fdiv float %10, 0x41747AE140000000 - %12 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0 - store float %11, float* %12, align 16 - %13 = call i32 @rand() #9 - %14 = sitofp i32 %13 to float - %15 = fdiv float %14, 0x41747AE140000000 - %16 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1 - store float %15, float* %16, align 4 - %17 = call i32 @rand() #9 - %18 = sitofp i32 %17 to float - %19 = fdiv float %18, 0x41747AE140000000 - %20 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2 - store float %19, float* %20, align 8 - %21 = call i32 @rand() #9 - %22 = sitofp i32 %21 to float - %23 = fdiv float %22, 0x41747AE140000000 - %24 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3 - store float %23, float* %24, align 4 - %25 = bitcast [4 x float]* %2 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %25, i8 0, i64 16, i1 false) - %26 = bitcast [4 x float]* %3 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %26, i8 0, i64 16, i1 false) - %27 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %28 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - call void @naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %27, float* nonnull %28) - %29 = bitcast [4 x float]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %29, i8 0, i64 16, i1 false) - %30 = bitcast [4 x float]* %5 to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %30, i8 0, i64 16, i1 false) - %31 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 - %32 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 - call void @no_opt_naive_fixed_qr_decomp(float* nonnull %12, float* nonnull %31, float* nonnull %32) - %33 = load float, float* %27, align 16 - %34 = fpext float %33 to double - %35 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %34) #9 - %36 = load float, float* %31, align 16 - %37 = fpext float %36 to double - %38 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %37) #9 - %39 = load float, float* %31, align 16 - %40 = load float, float* %27, align 16 - %41 = fsub float %39, %40 - %42 = call float @llvm.fabs.f32(float %41) - %43 = fcmp uge float %42, 0x3FB99999A0000000 - br i1 %43, label %58, label %44 - -44: ; preds = %.preheader6 - %45 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1 - %46 = load float, float* %45, align 4 - %47 = fpext float %46 to double - %48 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %47) #9 - %49 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 - %50 = load float, float* %49, align 4 - %51 = fpext float %50 to double - %52 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %51) #9 - %53 = load float, float* %31, align 16 - %54 = load float, float* %27, align 16 - %55 = fsub float %53, %54 - %56 = call float @llvm.fabs.f32(float %55) - %57 = fcmp uge float %56, 0x3FB99999A0000000 - br i1 %57, label %58, label %.preheader6.1 - -58: ; preds = %115, %.preheader6.1, %44, %.preheader6 - call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.2, i64 0, i64 0), i32 300, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.3, i64 0, i64 0)) #11 - unreachable - -59: ; preds = %.preheader5 - %60 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1 - %61 = load float, float* %60, align 4 - %62 = fpext float %61 to double - %63 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %62) #9 - %64 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 1 - %65 = load float, float* %64, align 4 - %66 = fpext float %65 to double - %67 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %66) #9 - %68 = load float, float* %32, align 16 - %69 = load float, float* %28, align 16 - %70 = fsub float %68, %69 - %71 = call float @llvm.fabs.f32(float %70) - %72 = fcmp uge float %71, 0x3FB99999A0000000 - br i1 %72, label %73, label %.preheader.1 - -73: ; preds = %.preheader5, %87, %.preheader.1, %59 - call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @__func__.main, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.2, i64 0, i64 0), i32 307, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.6, i64 0, i64 0)) #11 - unreachable - -.preheader.1: ; preds = %59 - %74 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2 - %75 = load float, float* %74, align 8 - %76 = fpext float %75 to double - %77 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %76) #9 - %78 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 2 - %79 = load float, float* %78, align 8 - %80 = fpext float %79 to double - %81 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %80) #9 - %82 = load float, float* %64, align 4 - %83 = load float, float* %60, align 4 - %84 = fsub float %82, %83 - %85 = call float @llvm.fabs.f32(float %84) - %86 = fcmp uge float %85, 0x3FB99999A0000000 - br i1 %86, label %73, label %87 - -87: ; preds = %.preheader.1 - %88 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3 - %89 = load float, float* %88, align 4 - %90 = fpext float %89 to double - %91 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %90) #9 - %92 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 3 - %93 = load float, float* %92, align 4 - %94 = fpext float %93 to double - %95 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %94) #9 - %96 = load float, float* %64, align 4 - %97 = load float, float* %60, align 4 - %98 = fsub float %96, %97 - %99 = call float @llvm.fabs.f32(float %98) - %100 = fcmp uge float %99, 0x3FB99999A0000000 - br i1 %100, label %73, label %101 - -101: ; preds = %87 - ret i32 0 - -.preheader6.1: ; preds = %44 - %102 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2 - %103 = load float, float* %102, align 8 - %104 = fpext float %103 to double - %105 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %104) #9 - %106 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 - %107 = load float, float* %106, align 8 - %108 = fpext float %107 to double - %109 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %108) #9 - %110 = load float, float* %49, align 4 - %111 = load float, float* %45, align 4 - %112 = fsub float %110, %111 - %113 = call float @llvm.fabs.f32(float %112) - %114 = fcmp uge float %113, 0x3FB99999A0000000 - br i1 %114, label %58, label %115 - -115: ; preds = %.preheader6.1 - %116 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3 - %117 = load float, float* %116, align 4 - %118 = fpext float %117 to double - %119 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %118) #9 - %120 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 - %121 = load float, float* %120, align 4 - %122 = fpext float %121 to double - %123 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), double %122) #9 - %124 = load float, float* %49, align 4 - %125 = load float, float* %45, align 4 - %126 = fsub float %124, %125 - %127 = call float @llvm.fabs.f32(float %126) - %128 = fcmp uge float %127, 0x3FB99999A0000000 - br i1 %128, label %58, label %.preheader5 - -.preheader5: ; preds = %115 - %129 = load float, float* %28, align 16 - %130 = fpext float %129 to double - %131 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4, i64 0, i64 0), double %130) #9 - %132 = load float, float* %32, align 16 - %133 = fpext float %132 to double - %134 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([23 x i8], [23 x i8]* @.str.5, i64 0, i64 0), double %133) #9 - %135 = load float, float* %32, align 16 - %136 = load float, float* %28, align 16 - %137 = fsub float %135, %136 - %138 = call float @llvm.fabs.f32(float %137) - %139 = fcmp uge float %138, 0x3FB99999A0000000 - br i1 %139, label %73, label %59 -} - -declare i64 @time(i64*) #6 - -declare void @srand(i32) #6 - -declare i32 @rand() #6 - -declare i32 @printf(i8*, ...) #6 - -; Function Attrs: nounwind readnone speculatable willreturn -declare double @llvm.fabs.f64(double) #2 - -; Function Attrs: noreturn -declare void @__assert_rtn(i8*, i8*, i32, i8*) #7 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #8 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fabs.f32(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.1(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.2(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.3(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.4(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.5(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.6(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.7(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.8(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.9(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.10(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.11(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.12(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.13(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.14(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.15(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.16(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.17(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.18(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.19(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.20(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.21(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.22(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.23(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.24(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.25(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.26(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.27(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.28(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.29(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.30(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.31(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.32(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.33(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.34(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.35(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.36(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.37(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.38(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.39(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.40(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.41(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.42(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.43(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.44(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.45(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.46(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.47(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.48(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.49(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.50(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.51(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.52(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.53(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.54(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.55(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.56(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.57(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.58(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.59(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.60(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.61(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.62(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.63(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.64(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.65(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.66(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.67(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.68(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.69(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.70(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.71(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.72(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.73(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.74(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.75(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.76(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.77(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.78(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.79(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.80(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.81(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.82(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.83(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.84(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.85(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.86(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.87(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.88(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.89(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.90(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.91(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.92(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.93(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.94(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.95(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.96(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.97(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.98(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.99(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.100(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.101(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.102(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.103(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.104(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.105(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.106(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.107(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.108(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.109(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.110(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.111(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.112(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.113(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.114(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.115(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.116(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.117(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.118(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.119(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.120(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.121(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.122(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.123(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.124(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.125(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.126(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.127(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.128(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.129(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.130(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.131(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.132(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.133(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.134(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.135(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.136(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.137(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.138(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.139(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.140(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.141(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.142(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.143(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.144(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.145(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.146(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.147(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.148(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.149(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.150(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.151(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.152(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.153(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.154(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.155(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.156(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.157(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.158(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.159(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.160(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.161(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.162(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.163(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.164(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.165(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.166(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.167(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.168(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.169(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.170(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.171(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.172(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.173(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.174(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.175(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.176(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.177(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.178(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <4 x float> @llvm.fma.f32.179(<4 x float>, <4 x float>, <4 x float>) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.180(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.181(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.182(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.183(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.184(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.185(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.186(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.187(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.188(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.189(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.190(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.191(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.192(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.193(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.194(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.195(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.196(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.197(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.198(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.199(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.200(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.201(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.202(float) #2 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32.203(float) #2 - -attributes #0 = { alwaysinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone speculatable willreturn } -attributes #3 = { argmemonly nounwind willreturn writeonly } -attributes #4 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { allocsize(0,1) "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="true" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #8 = { argmemonly nounwind willreturn } -attributes #9 = { nounwind } -attributes #10 = { nounwind allocsize(0,1) } -attributes #11 = { noreturn nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} -!3 = distinct !{!3, !4} -!4 = !{!"llvm.loop.unroll.disable"} -!5 = distinct !{!5, !4} diff --git a/src/dios-egraphs/Diospyros/flaky-outputs/flaky-final b/src/dios-egraphs/Diospyros/flaky-outputs/flaky-final deleted file mode 100755 index 0555e7480c8c0b64524b5228ee2ab242a3b18ae6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 13676 zcmeHOe{fY-oxgzu0}Y<{N`J80AJ1;FFKh{5?Mkb)FPH}x%!5FQB$#js;RP}#FY*1T zV8tdaujTrP#!-}+g)y_*8Fr_;lXaj*#&%yIC845?rBc>1YpN74Q&uxZYuYN=&*z+T zU*3DL-O)dGW}SB@_nhzdd%nNV_ndp*y*Dqu{a;t!V;Ds<4Wny@VHg7EmJ-8A8a_aV z@i#b4oR(EnxiYvZxPC3ExkphhdQFUa{4iiy8-jHka#^IjFPEmx8*oLQv-GfK#lt(} z)4cimw#-2=82;Kk#Y4P!nw>~jHfmY1cw%cTL(13pyIYmM<(k9AU4c@qZ`wcVvv!2r zI>S*g{>lfMqqu;?dT~c1_RM zXWXv*`<^QYJp0J;6$6%)h}eyXqcN)^jbu+ECX^F&=`pjQhsqF7G2!dx^?lkU2WTd^LRf_`mwC{=3iAl zwd~YCeKk<=osWKh-iJW*;e<;_7&ZeOgjFuDpRo|IheUn&;Xd{BvB^4UcNtES8FSIw z7qHBVrhS1w5ABM@!|fHzE8th_RxsiCAWrHZLw+b*u5sq8a1=67$Uq?jf6W=FHSO2M z##5qVP<%6eXOVbjL{yAO@Xd4LxzVB*Cd9(yjVD^f?R#Vz{u2JB>Bp}a;#-4k*PgV~ z_VG6di^RsXY44si?b`7P;SF9ENnY_TMX>oVP1}QJs&?G$K0bxROeFwb+3{4Hqr29H4YFoNi#>}9cMH7%rqec(Avi6d9X(mlvP z%o3SD*9}P6l<^R0Xd)l+ql_%Ie>B*W|6a;--A<#jW40QO>Dd%%0uJA|3gt;J0|=uW6@&8B~Z_` zBdDVAkQ2slQj{YxN|(&W{gm*B##_Yw9~kk7lOx3_RF<|^)W0D7&{FpTDwcJ=sTfUD z>4doH5`e^zC=d1v|4CTv6Mn3kpF(HjL7F+>T){0S)M62MY7c_3OZbliZajeDo-d$~ z;Un<<1f8FN+4V{IgI)0D=0Gt>A(}11IU6p}srAL8yl0CP$6n{D9dh1DM9e}pUoJ7m&Pzl^GfOc2;l z8f2D8MuvBYGL*erl4OJgo!{UtT}LRrq=<~N)Qz$ddG)L+)p1@AlwrzJ_oOwf4;clmD2^pheo=CbLSU4Kwn-sFf@G$eaY?m(6b_%4H}%mjgn z^XykboPa#!x zm;h3|%eex`wg!OYTTO~$ggcUvB=2%g=UOxwjm)`cBbcUDlH^^^FeK-+Qj%7Zbp;w_ zrwp$5a=*v=@8i`XRq_p42nMbQtjsZEr3$?OB;k z2RZP34wHHX$}#zMui%n|9jG2lr4EY>rB#0c;4jf*M(Tbm{AU4zqwW{rBp1k=6=&TA z;r}B>Cnu_~AML_#-5{>RKpI|z55e=5!aoRrlkcs4S@_RjQ^RFCA@34EYdj*#+p*Dj z6Kd~)q&N6=wxM3w6rKc8G);iWOcP&pCZf!mfp#1>G-#}@O z0Er^XKLJOi4a3wjjGamCys{b+(C7{B!#)V+&^{oId(pJ-ao(AoF&$u)DBIx3vaOA5 z!ZdrrdoYBIH4H&MP&^#q69efq6xOAMA8vcXaIpTo@V_Dai9Sr#uR*(?9$Z8q42hH=!N01*B8MP|er-0{3B_vH?;!|9%(wTcHoEhl2u{ySfWc?nQLLF1HH^A;$#sab zFpf;HfV?#*&_p`5h5@Z8wz+`3E3k(?@Q9rD2T{6{)%X2eBoVw}b`SNTYdqo9!nn9j z-#=kJ^>Wn4Me;3mlRJ#+8pn;`0G~DB(&?D?adUVOwx^tVko~^AYqVeFO{g6lbFKpo z9+W&Qdls5E!0l@M-^utKuhig^x4zEL@vO>h`iF2IqrncAI}R^dqSh;#P$ zGW)!btj{vc!G34$HT*fu+gIma<T3;@PKhz6$ViJn8++Q5J<0cD_p@0A{)I`sTp!|Ffo{vs?>5$ zPAU2_^r(Q{sHv03SVgN;`5>`-tL;bsuGqs<)i1UOT*xic+cY!98du$Tf=?V40X#dhQD$BJ0V4rl=@82Qy(gN=z z?PPoQV+sSH3VDPLOEcpV6VB*4Rz)xoXjb8zwMZEd~fBir41g6*Q367d2ph{)9dP!wF zK|m=5l(xOlhIso;^;4Htu?>35zIujCLh-1gCN_<87tP{P)s|M_Dml5zGA!7N9Zv^-yrm5Li-5)1EI$Nq0by$4+0E4{rp`Y`O)dq56pSx+n>w~ zDWD5o=7jL0+2=an+C;oF5x?KJpk?yYtr4T8skLoUJRFP17JoLn zXj{0sqrG!cTSs$K+oGmuv}sqYq8U6}WB#yKvhLHZjlM-bcICd0`Kp4|c%ixe+V*nE zIcnA`HSDI(Z|e8T>vi9t z`xf1I>i+Y(-=q64>;8c5|55i(>)zF0=nEMrWT23NLIw&MC}g0JfkFlf87O3+kby!5 z3K=M5;QuZI-aF=hn%2Ali=V3&)Ao0PDoZ>v73f2Q1nY*~XDAfMYJQ14z8JoeJ=Yu7&>}D7M;nSmB-F z=0rS%(Eos554e^`VGD zBQuTO8^ILmh_)N8Ev-Al+bm-b{(&N$p+2gqM?oFOSHCmka4pu^5ewsiVtW(5zu#%K zC)(nzoo%~}i$$M}TIz9J1^-{`FVqS0C7cwxbV(TXCz=TYB7-z0JM-Lv~_iH{h!;QJBc_v!I1*DJh6 z<32sUUE^sydm=yoqk{7NUWI?t#gQbAAJuq3%b(GBC*I6*{G!HtwZ7kK-2MNKKL7_G zS>;ipkPJRA4_}an^M4G~pOO#c;Vbg+>O6cy9^R0LZ_UG7^YD&5oG+yDafJM)pR(akt<1o$YI-;@72az6am!PV=)lJ#$?>Q;rDU!pmK)76?oo-T1s zmg~ccsI~O&+OOuxHgff7wyo^Z9FttLmTW95Yn#G0m$gCVY Date: Mon, 2 May 2022 15:41:34 -0400 Subject: [PATCH 082/143] debugging --- src/dios-egraphs/Diospyros/diospyros.cpp | 65 +- src/dios-egraphs/Diospyros/src/lib.rs | 27 +- src/dios-egraphs/Diospyros/src/llvm_to_egg.rs | 1258 ----------------- 3 files changed, 70 insertions(+), 1280 deletions(-) delete mode 100644 src/dios-egraphs/Diospyros/src/llvm_to_egg.rs diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index 8b1ac4bf..87271e20 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -15,6 +15,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" @@ -482,6 +483,30 @@ bool call_is_not_sqrt(CallInst *inst) { // will be done } +/** + * True iff an instruction is "vectorizable" +*/ +bool can_vectorize(Value* value){ + // TODO: + Instruction * instr = dyn_cast(value); + assert(instr != NULL); + if (isa(instr)){ + if (instr->getOpcode() == Instruction::FAdd) { + return true; + } else if (instr -> getOpcode() == Instruction::FSub) { + return true; + } else if (instr -> getOpcode() == Instruction::FDiv) { + return true; + } else if (instr -> getOpcode() == Instruction::FMul) { + return true; + } else if (instr -> getOpcode() == Instruction::FNeg) { + return true; + } + return false; + } + return false; +} + /** * Below is the main DiospyrosPass that activates the Rust lib.rs code, * which calls the Egg vectorizer and rewrites the optimized code in place. @@ -501,8 +526,6 @@ struct DiospyrosPass : public FunctionPass { } bool has_changes = false; for (auto &B : F) { - // ------------ Construction Zone --------------- - // TODO: Consider removing as the new procedure can overcome this // We skip over basic blocks without floating point types bool has_float = false; @@ -515,8 +538,6 @@ struct DiospyrosPass : public FunctionPass { continue; } - // ------------ Construction Zone --------------- - // Assumes Alias Analysis Movement Pass has been done previously // Pulls out Instructions into sections of code called "Chunks" // @@ -531,22 +552,16 @@ struct DiospyrosPass : public FunctionPass { chunk_accumulator.push_back(chunk_vector); } chunk_vector = {wrap(op)}; - chunk_accumulator.push_back(chunk_vector); - chunk_vector = {}; } else if (auto *op = dyn_cast(&I)) { if (!chunk_vector.empty()) { chunk_accumulator.push_back(chunk_vector); } chunk_vector = {wrap(op)}; - chunk_accumulator.push_back(chunk_vector); - chunk_vector = {}; } else if (auto *op = dyn_cast(&I)) { if (!chunk_vector.empty()) { chunk_accumulator.push_back(chunk_vector); } chunk_vector = {wrap(op)}; - chunk_accumulator.push_back(chunk_vector); - chunk_vector = {}; } else if (CallInst *call_inst = dyn_cast(&I)) { if (is_memset_variety(call_inst)) { if (!chunk_vector.empty()) { @@ -554,43 +569,58 @@ struct DiospyrosPass : public FunctionPass { } Instruction *memset = dyn_cast(call_inst); chunk_vector = {wrap(memset)}; - chunk_accumulator.push_back(chunk_vector); - chunk_vector = {}; } else if (is_memcopy_variety(call_inst)) { if (!chunk_vector.empty()) { chunk_accumulator.push_back(chunk_vector); } Instruction *memcopy = dyn_cast(call_inst); chunk_vector = {wrap(memcopy)}; - chunk_accumulator.push_back(chunk_vector); - chunk_vector = {}; } else if (is_memmove_variety(call_inst)) { if (!chunk_vector.empty()) { chunk_accumulator.push_back(chunk_vector); } Instruction *memmove = dyn_cast(call_inst); chunk_vector = {wrap(memmove)}; - chunk_accumulator.push_back(chunk_vector); - chunk_vector = {}; } } else if (auto *op = dyn_cast(&I)) { Value *load_loc = op->getOperand(0); if (!chunk_vector.empty()) { chunk_accumulator.push_back(chunk_vector); } - chunk_vector = {}; + chunk_vector = {wrap(op)}; + } else { + Instruction *instr = dyn_cast(&I); + chunk_vector.push_back(wrap(instr)); } } if (!chunk_vector.empty()) { chunk_accumulator.push_back(chunk_vector); } + for (auto &chunk_vector : chunk_accumulator) { + for (auto &instr : chunk_vector) { + errs() << *unwrap(instr) << "\n"; + } + errs() << "---------------------\n"; + } + for (int i = 0; i < chunk_accumulator.size(); ++i) { auto &chunk_vector = chunk_accumulator[i]; if (chunk_vector.empty()) { continue; } + // check if the chunk vector actually has instructions to optimixe on + bool has_vectorizable_instrs = false; + for (auto &instr : chunk_vector) { + if (can_vectorize(unwrap(instr))) { + has_vectorizable_instrs = true; + } + } + if (!has_vectorizable_instrs) { + continue; + } + // If an instruction is used multiple times outside the chunk, // add it to a restricted list. // TODO: only consider future chunks! @@ -616,6 +646,7 @@ struct DiospyrosPass : public FunctionPass { } has_changes = has_changes || true; + assert(chunk_vector.size() != 0); Value *last_instr_val = unwrap(chunk_vector.back()); Instruction *last_instr = dyn_cast(last_instr_val); assert(last_instr != NULL); diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 23b7ab5e..86edded6 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -222,11 +222,15 @@ unsafe fn balanced_pad_vector<'a>( let width = config::vector_width(); assert!(is_pow2(width as u32)); let length = binop_vec.len(); + assert!( + length > 0, + "There must be 1 or more operators to vectorize." + ); // Check vector less than width, and then return - if length < width { - enode_vec.push(VecLang::Vec(binop_vec.clone().into_boxed_slice())); - return enode_vec; - } + // if length < width { + // enode_vec.push(VecLang::Vec(binop_vec.clone().into_boxed_slice())); + // return enode_vec; + // } let closest_pow2 = get_pow2(cmp::max(length, width) as u32); let diff = closest_pow2 - (length as u32); for _ in 0..diff { @@ -417,6 +421,7 @@ unsafe fn bop_to_egg( Id::from((left_next_idx - 1) as usize), Id::from((right_next_idx - 1) as usize), ]; + println!("{:?}", right_egg_nodes); right_egg_nodes.push(choose_binop(&llvm_instr, ids)); (right_egg_nodes, right_next_idx + 1) } @@ -699,14 +704,24 @@ unsafe fn num_to_llvm(n: &i32, md: &Egg2LLVMState) -> LLVMValueRef { unsafe fn vec_to_llvm(boxed_ids: &Box<[Id]>, md: &Egg2LLVMState) -> LLVMValueRef { // Convert the Boxed Ids to a Vector, and generate a vector of zeros + // Invariant: idvec must not be empty let idvec = boxed_ids.to_vec(); let idvec_len = idvec.len(); + assert!( + !idvec.is_empty(), + "Id Vec Cannot be empty when converting Vector to an LLVM Vector" + ); let mut zeros = Vec::new(); for _ in 0..idvec_len { zeros.push(LLVMConstReal(LLVMFloatTypeInContext(md.context), 0 as f64)); } // Convert the Vector of Zeros to a Mut PTr to construct an LLVM Zero Vector + // Invariant: zeros must not be empty + assert!( + !zeros.is_empty(), + "Zeros Vector Cannot be empty when converting Vector to an LLVM Vector" + ); let zeros_ptr = zeros.as_mut_ptr(); let mut vector = LLVMConstVector(zeros_ptr, idvec.len() as u32); for (idx, &eggid) in idvec.iter().enumerate() { @@ -1007,6 +1022,7 @@ unsafe fn egg_to_llvm(egg_node: &VecLang, translation_metadata: &Egg2LLVMState) } } +// TODO: Add non-vectorized version as well! unsafe fn egg_to_llvm_main( expr: RecExpr, llvm2egg_metadata: &LLVM2EggState, @@ -1030,7 +1046,7 @@ unsafe fn egg_to_llvm_main( // HERE, we stitch our work back into the current LLVM code - // NOTE: We Assume Vectorizer will maintain relative positions of elements in vector + // NOTE: We Assume Egg rewriter will maintain relative positions of elements in vector // Extract the elements of the vector, to be assigned back to where they are to be used. let num_extractions = llvm2egg_metadata.start_instructions.len(); for i in 0..num_extractions { @@ -1045,5 +1061,6 @@ unsafe fn egg_to_llvm_main( // Replace all the uses of the old instruction with the new extracted value // Old instruction cannot have been removed. LLVMReplaceAllUsesWith(*old_instr, extracted_value); + LLVMInstructionRemoveFromParent(*old_instr); } } diff --git a/src/dios-egraphs/Diospyros/src/llvm_to_egg.rs b/src/dios-egraphs/Diospyros/src/llvm_to_egg.rs deleted file mode 100644 index c088c1a3..00000000 --- a/src/dios-egraphs/Diospyros/src/llvm_to_egg.rs +++ /dev/null @@ -1,1258 +0,0 @@ -extern crate llvm_sys as llvm; -use dioslib::{config, rules, veclang::VecLang}; -use egg::*; -use libc::size_t; -use llvm::{core::*, prelude::*, LLVMOpcode::*, LLVMRealPredicate}; -use std::{ - cmp, - collections::{BTreeMap, BTreeSet}, - ffi::CStr, - mem, - os::raw::c_char, - slice::from_raw_parts, -}; - -extern "C" { - fn llvm_index(val: LLVMValueRef, index: i32) -> i32; - fn llvm_name(val: LLVMValueRef) -> *const c_char; - fn isa_unop(val: LLVMValueRef) -> bool; - fn isa_bop(val: LLVMValueRef) -> bool; - fn isa_constant(val: LLVMValueRef) -> bool; - fn isa_constfp(val: LLVMValueRef) -> bool; - fn isa_gep(val: LLVMValueRef) -> bool; - fn isa_load(val: LLVMValueRef) -> bool; - fn isa_store(val: LLVMValueRef) -> bool; - fn isa_argument(val: LLVMValueRef) -> bool; - fn isa_call(val: LLVMValueRef) -> bool; - fn isa_fptrunc(val: LLVMValueRef) -> bool; - fn isa_fpext(val: LLVMValueRef) -> bool; - fn isa_alloca(val: LLVMValueRef) -> bool; - fn isa_phi(val: LLVMValueRef) -> bool; - fn _isa_sextint(val: LLVMValueRef) -> bool; - fn isa_sitofp(val: LLVMValueRef) -> bool; - fn isa_constaggregatezero(val: LLVMValueRef) -> bool; - fn _isa_constaggregate(val: LLVMValueRef) -> bool; - fn isa_integertype(val: LLVMValueRef) -> bool; - fn _isa_intptr(val: LLVMValueRef) -> bool; - fn isa_floatptr(val: LLVMValueRef) -> bool; - fn _isa_floattype(val: LLVMValueRef) -> bool; - fn isa_bitcast(val: LLVMValueRef) -> bool; - fn isa_sqrt32(val: LLVMValueRef) -> bool; - fn isa_sqrt64(val: LLVMValueRef) -> bool; - fn get_constant_float(val: LLVMValueRef) -> f32; - fn build_constant_float(n: f64, context: LLVMContextRef) -> LLVMValueRef; -} - -// Note: We use BTreeMaps to enforce ordering in the map -// Without ordering, tests become flaky and start failing a lot more often -// We do not use HashMaps for this reason as ordering is not enforced. -// GEPMap : Maps the array name and array offset as symbols to the GEP -// LLVM Value Ref that LLVM Generated -type GEPMap = BTreeMap<(Symbol, Symbol), LLVMValueRef>; -type LLVMPairMap = BTreeMap; - -static mut ARG_IDX: i32 = 0; -static mut CALL_IDX: i32 = 0; -static mut NODE_IDX: u32 = 0; - -unsafe fn gen_node_idx() -> u32 { - NODE_IDX += 1; - return NODE_IDX; -} - -unsafe fn gen_arg_name() -> String { - ARG_IDX += 1; - let string = "ARGUMENT".to_string(); - let result = format!("{}{}", string, ARG_IDX.to_string()); - result -} - -unsafe fn gen_call_name() -> String { - CALL_IDX += 1; - let string = "CALL".to_string(); - let result = format!("{}{}", string, CALL_IDX.to_string()); - result -} - -// Reference Comparison: https://www.reddit.com/r/rust/comments/2r3wjk/is_there_way_to_compare_objects_by_address_in_rust/ -// Compares whether addresses of LLVMValueRefs are the same. -// Not the contents of the Value Refs -fn cmp_val_ref_address(a1: &llvm::LLVMValue, a2: &llvm::LLVMValue) -> bool { - a1 as *const _ == a2 as *const _ -} - -fn _cmp_typ(a1: &LLVMTypeRef, a2: &LLVMTypeRef) -> bool { - a1 as *const _ == a2 as *const _ -} - -/// Converts LLVMValueRef binop to equivalent VecLang Binop node -unsafe fn choose_binop(bop: &LLVMValueRef, ids: [Id; 2]) -> VecLang { - match LLVMGetInstructionOpcode(*bop) { - LLVMFAdd => VecLang::Add(ids), - LLVMFMul => VecLang::Mul(ids), - LLVMFSub => VecLang::Minus(ids), - LLVMFDiv => VecLang::Div(ids), - _ => panic!("Choose_Binop: Opcode Match Error"), - } -} - -/// Translates VecLang binop expression node to the corresponding LLVMValueRef -unsafe fn translate_binop( - enode: &VecLang, - left: LLVMValueRef, - right: LLVMValueRef, - builder: LLVMBuilderRef, - name: *const c_char, -) -> LLVMValueRef { - match enode { - VecLang::VecAdd(_) | VecLang::Add(_) => LLVMBuildFAdd(builder, left, right, name), - VecLang::VecMul(_) | VecLang::Mul(_) => LLVMBuildFMul(builder, left, right, name), - VecLang::VecMinus(_) | VecLang::Minus(_) => LLVMBuildFSub(builder, left, right, name), - VecLang::VecDiv(_) | VecLang::Div(_) => LLVMBuildFDiv(builder, left, right, name), - // use binary bitwise operators for or / and - VecLang::Or(_) => LLVMBuildOr(builder, left, right, name), - VecLang::And(_) => LLVMBuildAnd(builder, left, right, name), - VecLang::Lt(_) => LLVMBuildFCmp(builder, LLVMRealPredicate::LLVMRealOLT, left, right, name), - _ => panic!("Not a vector or scalar binop."), - } -} - -/// Translates VecLang unop expression node to the corresponding LLVMValueRef -unsafe fn translate_unop( - enode: &VecLang, - n: LLVMValueRef, - builder: LLVMBuilderRef, - context: LLVMContextRef, - module: LLVMModuleRef, - name: *const c_char, -) -> LLVMValueRef { - match enode { - VecLang::Sgn(_) => { - let one = LLVMConstReal(LLVMFloatTypeInContext(context), 1 as f64); - let param_types = [ - LLVMFloatTypeInContext(context), - LLVMFloatTypeInContext(context), - ] - .as_mut_ptr(); - let fn_type = - LLVMFunctionType(LLVMFloatTypeInContext(context), param_types, 2, 0 as i32); - let func = - LLVMAddFunction(module, b"llvm.copysign.f32\0".as_ptr() as *const _, fn_type); - let args = [one, n].as_mut_ptr(); - LLVMBuildCall(builder, func, args, 2, name) - } - VecLang::Sqrt(_) => { - let param_types = [LLVMFloatTypeInContext(context)].as_mut_ptr(); - let fn_type = - LLVMFunctionType(LLVMFloatTypeInContext(context), param_types, 1, 0 as i32); - let func = LLVMAddFunction(module, b"llvm.sqrt.f32\0".as_ptr() as *const _, fn_type); - let args = [n].as_mut_ptr(); - LLVMBuildCall(builder, func, args, 1, name) - } - VecLang::Neg(_) => LLVMBuildFNeg(builder, n, name), - _ => panic!("Not a scalar unop."), - } -} - -/// translate_get converts a VecLang Get Node to the corresponding LLVM Ir array name and -/// LLVM IR offset, as symbols. -unsafe fn translate_get(get: &VecLang, enode_vec: &[VecLang]) -> (Symbol, Symbol) { - match get { - VecLang::Get([sym, i]) => { - match (&enode_vec[usize::from(*sym)], &enode_vec[usize::from(*i)]) { - (VecLang::Symbol(name), VecLang::Symbol(offset)) => { - return (*name, *offset); - } - _ => panic!("Match Error: Expects Pair of Symbol, Symbol."), - } - } - _ => panic!("Match Error in Translate Get: Expects Get Enode."), - } -} - -/// Main function to optimize: Takes in a basic block of instructions, -/// optimizes it, and then translates it to LLVM IR code, in place. - -#[repr(C)] -pub struct IntLLVMPair { - node_int: u32, - arg: LLVMValueRef, -} - -#[repr(C)] -pub struct LLVMPair { - original_value: LLVMValueRef, - new_value: LLVMValueRef, -} - -#[repr(C)] -pub struct VectorPointerSize { - llvm_pointer: *const LLVMPair, - llvm_pointer_size: size_t, -} - -#[no_mangle] -pub fn optimize( - module: LLVMModuleRef, - context: LLVMContextRef, - builder: LLVMBuilderRef, - bb: *const LLVMValueRef, - size: size_t, - past_instrs: *const LLVMPair, - past_size: size_t, - run_egg: bool, - print_opt: bool, -) -> VectorPointerSize { - unsafe { - // llvm to egg - let llvm_instrs = from_raw_parts(bb, size); - let past_llvm_instrs = from_raw_parts(past_instrs, past_size); - let mut llvm_arg_pairs = BTreeMap::new(); - for instr_pair in past_llvm_instrs { - let original_value = instr_pair.original_value; - let new_value = instr_pair.new_value; - // assert!(isa_load(original_value) || isa_alloca(original_value)); - // assert!(isa_load(new_value) || isa_alloca(new_value)); - llvm_arg_pairs.insert(original_value, new_value); - } - let mut node_to_arg = Vec::new(); - let (expr, gep_map, store_map, symbol_map) = - llvm_to_egg(llvm_instrs, &mut llvm_arg_pairs, &mut node_to_arg); - - // optimization pass - if print_opt { - eprintln!("{}", expr.pretty(10)); - } - let mut best = expr.clone(); - if run_egg { - let pair = rules::run(&expr, 180, true, !run_egg); - best = pair.1; - } - if print_opt { - eprintln!("{}", best.pretty(10)); - } - - // egg to llvm - egg_to_llvm( - best, - &gep_map, - &store_map, - &symbol_map, - &mut llvm_arg_pairs, // does this work properly?, IDK? Need to return mut value - &node_to_arg, - module, - context, - builder, - ); - - let mut final_llvm_arg_pairs = Vec::new(); - for (unchanged_val, new_val) in llvm_arg_pairs.iter() { - let pair = LLVMPair { - original_value: *unchanged_val, - new_value: *new_val, - }; - // assert!(isa_load(*unchanged_val) || isa_alloca(*unchanged_val)); - // assert!(isa_load(*new_val) || isa_alloca(*new_val)); - final_llvm_arg_pairs.push(pair); - } - - // https://stackoverflow.com/questions/39224904/how-to-expose-a-rust-vect-to-ffi - let mut llvm_arg_pairs_boxed_slice: Box<[LLVMPair]> = - final_llvm_arg_pairs.into_boxed_slice(); - let llvm_arg_pairs_array: *mut LLVMPair = llvm_arg_pairs_boxed_slice.as_mut_ptr(); - let llvm_arg_pairs_array_len: usize = llvm_arg_pairs_boxed_slice.len(); - mem::forget(llvm_arg_pairs_boxed_slice); - - // TODO: FIX THIS - return VectorPointerSize { - llvm_pointer: llvm_arg_pairs_array, - llvm_pointer_size: llvm_arg_pairs_array_len, - }; - } -} - -// ------------ NEW CONVERSION FROM LLVM IR TO EGG EXPRESSIONS ------- - -type StoreMap = BTreeMap; -type IdMap = BTreeSet; -type SymbolMap = BTreeMap; - -enum LLVMOpType { - Argument, - Constant, - Store, - Load, - Gep, - Unop, - Bop, - Call, - FPTrunc, - SIToFP, - Bitcast, - Sqrt32, - Sqrt64, - FPExt, -} - -unsafe fn get_pow2(n: u32) -> u32 { - let mut pow = 1; - while pow < n { - pow *= 2; - } - return pow; -} - -fn is_pow2(n: u32) -> bool { - if n == 1 { - return true; - } else if n % 2 == 1 { - return false; - } - return is_pow2(n / 2); -} - -/// New Pad Vector should round the number of elements up to a power of 2, and then recursive -/// divide each into the lane width. Assumes lane width is also a power of 2 in size. -/// Raises assertion error if width is not a power of 2 -/// If the vector has less than the width, we do not pad, and just append that vector to enodevect -unsafe fn balanced_pad_vector<'a>( - binop_vec: &mut Vec, - enode_vec: &'a mut Vec, -) -> &'a mut Vec { - let width = config::vector_width(); - assert!(is_pow2(width as u32)); - let length = binop_vec.len(); - // Check vector less than width, and then return - if length < width { - enode_vec.push(VecLang::Vec(binop_vec.clone().into_boxed_slice())); - return enode_vec; - } - let closest_pow2 = get_pow2(cmp::max(length, width) as u32); - let diff = closest_pow2 - (length as u32); - for _ in 0..diff { - let zero = VecLang::Num(0); - enode_vec.push(zero); - let zero_idx = enode_vec.len() - 1; - binop_vec.push(Id::from(zero_idx)); - } - return build_concat(width, binop_vec, enode_vec); -} - -/// Recursively concatenate vectors together -unsafe fn build_concat<'a>( - lane_width: usize, - binop_vec: &mut Vec, - enode_vec: &'a mut Vec, -) -> &'a mut Vec { - if binop_vec.len() == lane_width { - enode_vec.push(VecLang::Vec(binop_vec.clone().into_boxed_slice())); - return enode_vec; - } - let num_binops = binop_vec.len(); - let halfway = num_binops / 2; - let (mut left, mut right) = (Vec::new(), Vec::new()); - for (i, b) in binop_vec.iter().enumerate() { - if i < halfway { - left.push(*b); - } else { - right.push(*b); - } - } - assert_eq!(left.len(), right.len()); - assert_eq!(left.len() + right.len(), num_binops); - assert_eq!(left.len() % lane_width, 0); - assert_eq!(right.len() % lane_width, 0); - let enode_vec1 = build_concat(lane_width, &mut left, enode_vec); - let idx1 = enode_vec1.len() - 1; - let enode_vec2 = build_concat(lane_width, &mut right, enode_vec1); - let idx2 = enode_vec2.len() - 1; - enode_vec2.push(VecLang::Concat([Id::from(idx1), Id::from(idx2)])); - return enode_vec2; -} - -unsafe fn _llvm_print(inst: LLVMValueRef) -> () { - LLVMDumpValue(inst); - println!(); -} - -unsafe fn _llvm_recursive_print(inst: LLVMValueRef) -> () { - if isa_argument(inst) { - return LLVMDumpValue(inst); - } else if isa_constant(inst) { - return LLVMDumpValue(inst); - } - let num_ops = LLVMGetNumOperands(inst); - for i in 0..num_ops { - let operand = LLVMGetOperand(inst, i as u32); - _llvm_recursive_print(operand); - print!(" "); - } - println!(); - LLVMDumpValue(inst); - println!(); - return; -} - -unsafe fn llvm_recursive_add( - builder: LLVMBuilderRef, - inst: LLVMValueRef, - context: LLVMContextRef, - llvm_arg_pairs: &mut LLVMPairMap, -) -> LLVMValueRef { - let cloned_inst = LLVMInstructionClone(inst); - if isa_argument(inst) { - return inst; - } - let mut matched = false; - let mut ret_value = inst; - for (original_val, new_val) in (&*llvm_arg_pairs).iter() { - if cmp_val_ref_address(&**original_val, &*inst) { - matched = true; - ret_value = *new_val; - break; - } - } - if matched { - return ret_value; - } - if isa_constant(inst) { - return inst; - } else if isa_phi(inst) { - return inst; - } else if isa_alloca(inst) { - // We have this in the base case to stop reconstruction of allocas, - // because allocas are like loads, and should not get reconstructioned - // search the llvm_arg_pairs for allocas that were already created - let mut matched = false; - let mut ret_value = inst; - for (original_val, new_val) in (&*llvm_arg_pairs).iter() { - // let original_llvm = llvm_pair.original_value; - // let new_llvm = llvm_pair.new_value; - if cmp_val_ref_address(&**original_val, &*inst) { - matched = true; - ret_value = *new_val; - break; - } - } - if matched { - return ret_value; - } else { - // assert!(isa_load(inst) || isa_alloca(inst)); - // assert!(isa_load(cloned_inst) || isa_alloca(cloned_inst)); - llvm_arg_pairs.insert(inst, cloned_inst); - LLVMInsertIntoBuilder(builder, cloned_inst); - return cloned_inst; - } - } - let num_ops = LLVMGetNumOperands(inst); - for i in 0..num_ops { - let operand = LLVMGetOperand(inst, i as u32); - let new_operand = llvm_recursive_add(builder, operand, context, llvm_arg_pairs); - LLVMSetOperand(cloned_inst, i as u32, new_operand); - } - LLVMInsertIntoBuilder(builder, cloned_inst); - - let mut in_map = false; - for (original_inst, _) in (&*llvm_arg_pairs).iter() { - if cmp_val_ref_address(&**original_inst, &*inst) { - in_map = true; - } - } - if isa_load(inst) { - if !in_map { - // assert!(isa_load(inst) || isa_alloca(inst)); - // assert!(isa_load(cloned_inst) || isa_alloca(cloned_inst)); - llvm_arg_pairs.insert(inst, cloned_inst); - } - } - return cloned_inst; -} - -unsafe fn match_llvm_op(expr: &LLVMValueRef) -> LLVMOpType { - if isa_bop(*expr) { - return LLVMOpType::Bop; - } else if isa_unop(*expr) { - return LLVMOpType::Unop; - } else if isa_constant(*expr) { - return LLVMOpType::Constant; - } else if isa_gep(*expr) { - return LLVMOpType::Gep; - } else if isa_load(*expr) { - return LLVMOpType::Load; - } else if isa_store(*expr) { - return LLVMOpType::Store; - } else if isa_argument(*expr) { - return LLVMOpType::Argument; - } else if isa_call(*expr) { - return LLVMOpType::Call; - } else if isa_fptrunc(*expr) { - return LLVMOpType::FPTrunc; - } else if isa_sitofp(*expr) { - return LLVMOpType::SIToFP; - } else if isa_bitcast(*expr) { - return LLVMOpType::Bitcast; - } else if isa_sqrt32(*expr) { - return LLVMOpType::Sqrt32; - } else if isa_sqrt64(*expr) { - return LLVMOpType::Sqrt64; - } else if isa_fpext(*expr) { - return LLVMOpType::FPExt; - } else { - LLVMDumpValue(*expr); - println!(); - panic!("ref_to_egg: Unmatched case for LLVMValueRef {:?}", *expr); - } -} - -unsafe fn choose_unop(unop: &LLVMValueRef, id: Id) -> VecLang { - match LLVMGetInstructionOpcode(*unop) { - LLVMFNeg => VecLang::Neg([id]), - _ => panic!("Choose_Unop: Opcode Match Error"), - } -} - -unsafe fn arg_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - _gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &LLVMPairMap, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - let sym_name = gen_arg_name(); - let symbol = VecLang::Symbol(Symbol::from(sym_name)); - symbol_map.insert(symbol.clone(), expr); - enode_vec.push(symbol); - return (enode_vec, next_idx + 1); -} - -unsafe fn bop_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - let left = LLVMGetOperand(expr, 0); - let right = LLVMGetOperand(expr, 1); - let (v1, next_idx1) = ref_to_egg( - left, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - let (mut v2, next_idx2) = ref_to_egg( - right, - v1, - next_idx1, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - let ids = [ - Id::from((next_idx1 - 1) as usize), - Id::from((next_idx2 - 1) as usize), - ]; - v2.push(choose_binop(&expr, ids)); - (v2, next_idx2 + 1) -} - -unsafe fn unop_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - let sub_expr = LLVMGetOperand(expr, 0); - let (mut v, next_idx1) = ref_to_egg( - sub_expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - let id = Id::from((next_idx1 - 1) as usize); - v.push(choose_unop(&expr, id)); - (v, next_idx1 + 1) -} - -unsafe fn gep_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &LLVMPairMap, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - // // assert!(isa_argument(expr) || isa_gep(expr) || isa_load(expr)); - // let mut enode_vec = Vec::new(); - let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); - enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); - - let num_gep_operands = LLVMGetNumOperands(expr); - let mut indices = Vec::new(); - for operand_idx in 1..num_gep_operands { - let array_offset = llvm_index(expr, operand_idx); - indices.push(array_offset); - } - let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); - let offsets_symbol = Symbol::from(&offsets_string); - enode_vec.push(VecLang::Symbol(offsets_symbol)); - - let get_node = VecLang::Get([ - Id::from((next_idx) as usize), - Id::from((next_idx + 1) as usize), - ]); - (*gep_map).insert( - (Symbol::from(array_name), Symbol::from(&offsets_string)), - expr, - ); - enode_vec.push(get_node); - - return (enode_vec, next_idx + 3); -} - -unsafe fn _address_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &LLVMPairMap, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); - enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); - - let num_gep_operands = LLVMGetNumOperands(expr); - let mut indices = Vec::new(); - for operand_idx in 1..num_gep_operands { - let array_offset = llvm_index(expr, operand_idx); - indices.push(array_offset); - } - let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); - let offsets_symbol = Symbol::from(&offsets_string); - enode_vec.push(VecLang::Symbol(offsets_symbol)); - - let get_node = VecLang::Get([ - Id::from((next_idx) as usize), - Id::from((next_idx + 1) as usize), - ]); - (*gep_map).insert( - (Symbol::from(array_name), Symbol::from(&offsets_string)), - expr, - ); - enode_vec.push(get_node); - - return (enode_vec, next_idx + 3); -} - -unsafe fn sitofp_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &LLVMPairMap, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); - enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); - - let num_gep_operands = LLVMGetNumOperands(expr); - let mut indices = Vec::new(); - for operand_idx in 1..num_gep_operands { - let array_offset = llvm_index(expr, operand_idx); - indices.push(array_offset); - } - let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); - let offsets_symbol = Symbol::from(&offsets_string); - enode_vec.push(VecLang::Symbol(offsets_symbol)); - - let get_node = VecLang::Get([ - Id::from((next_idx) as usize), - Id::from((next_idx + 1) as usize), - ]); - (*gep_map).insert( - (Symbol::from(array_name), Symbol::from(&offsets_string)), - expr, - ); - enode_vec.push(get_node); - - return (enode_vec, next_idx + 3); -} - -unsafe fn load_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - return gep_to_egg( - expr, // we pass the entire instruction and not just the address - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); -} - -unsafe fn store_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - let data = LLVMGetOperand(expr, 0); - let addr = LLVMGetOperand(expr, 1); // expected to be a gep operator or addr in LLVM - let (vec, next_idx1) = ref_to_egg( - data, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - (*store_map).insert(next_idx1 - 1, addr); - (*id_map).insert(Id::from((next_idx1 - 1) as usize)); - return (vec, next_idx1); -} - -unsafe fn const_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - _gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &LLVMPairMap, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - let value = get_constant_float(expr); - enode_vec.push(VecLang::Num(value as i32)); - (enode_vec, next_idx + 1) -} - -unsafe fn _load_arg_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &LLVMPairMap, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - // assert!(isa_argument(expr) || isa_gep(expr)); - let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); - enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); - - let num_gep_operands = LLVMGetNumOperands(expr); - let mut indices = Vec::new(); - for operand_idx in 1..num_gep_operands { - let array_offset = llvm_index(expr, operand_idx); - indices.push(array_offset); - } - let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); - let offsets_symbol = Symbol::from(&offsets_string); - enode_vec.push(VecLang::Symbol(offsets_symbol)); - - let get_node = VecLang::Get([ - Id::from((next_idx) as usize), - Id::from((next_idx + 1) as usize), - ]); - (*gep_map).insert( - (Symbol::from(array_name), Symbol::from(&offsets_string)), - expr, - ); - enode_vec.push(get_node); - - return (enode_vec, next_idx + 3); -} - -unsafe fn load_call_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - if isa_sqrt32(expr) { - return sqrt32_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - } - let call_sym_name = gen_call_name(); - let call_sym = VecLang::Symbol(Symbol::from(call_sym_name)); - symbol_map.insert(call_sym.clone(), expr); - enode_vec.push(call_sym); - return (enode_vec, next_idx + 1); -} - -unsafe fn fpext_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - // assert!(isa_fpext(expr)); - let operand = LLVMGetOperand(expr, 0); - return ref_to_egg( - operand, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); -} - -unsafe fn sqrt32_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - // assert!(isa_sqrt32(expr)); - let operand = LLVMGetOperand(expr, 0); - let (mut new_enode_vec, next_idx1) = ref_to_egg( - operand, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - let sqrt_node = VecLang::Sqrt([Id::from((next_idx1 - 1) as usize)]); - new_enode_vec.push(sqrt_node); - return (new_enode_vec, next_idx1 + 1); -} - -unsafe fn sqrt64_to_egg( - _expr: LLVMValueRef, - _enode_vec: Vec, - _next_idx: i32, - _gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &LLVMPairMap, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - // assert!(isa_sqrt64(expr)); - panic!("Currently, we do not handle calls to sqrt.f64 without fpext and fptrunc before and after!. This is the only 'context sensitive' instance in the dispatch matching. ") -} - -unsafe fn fptrunc_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - // assert!(isa_fptrunc(expr)); - let operand = LLVMGetOperand(expr, 0); - if isa_sqrt64(operand) { - return sqrt64_to_egg( - operand, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - } - return ref_to_egg( - operand, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); -} - -unsafe fn bitcast_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - // assert!(isa_bitcast(expr)); - let operand = LLVMGetOperand(expr, 0); - let result = ref_to_egg( - operand, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - return result; -} - -unsafe fn ref_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &LLVMPairMap, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - for (original_val, _) in llvm_arg_pairs.iter() { - if cmp_val_ref_address(&**original_val, &*expr) { - // Here we create a new numbered variable node - let var_idx = gen_node_idx(); - let var_idx_str = var_idx.to_string(); - let special_var_node = VecLang::Symbol(Symbol::from(var_idx_str)); - enode_vec.push(special_var_node); - let node_to_arg_pair = IntLLVMPair { - arg: expr, - node_int: var_idx, - }; - node_to_arg.push(node_to_arg_pair); - return (enode_vec, next_idx + 1); - } - } - let (vec, next_idx) = match match_llvm_op(&expr) { - LLVMOpType::Bop => bop_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Unop => unop_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Constant => const_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Gep => gep_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Load => load_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Store => store_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Argument => arg_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Call => load_call_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::FPTrunc => fptrunc_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::FPExt => fpext_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::SIToFP => sitofp_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Bitcast => bitcast_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Sqrt32 => sqrt32_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Sqrt64 => sqrt64_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - }; - return (vec, next_idx); -} - -struct LLVM2EggState { - llvm2egg: &BTreeMap, -} - -unsafe fn ref_to_egg( - llvm_instr: LLVMValueRef, - next_node_idx: i64, - mut egg_nodes: Vec, - mut translation_metadata: LLVM2EggState, -) -> Vec { - if (translation_metadata.llvm2egg.contains_key(llvm_instr)) { - let translated_egg_node = translation_metadata - .llvm2egg - .get(llvm_instr) - .expect("Key must exist"); - egg_nodes.push(translated_egg_node); - return (egg_nodes, next_node_idx + 1); - } - panic!("Unimplemented"); -} - -unsafe fn llvm_to_egg(llvm_instrs_in_chunk: &[LLVMValueRef]) -> RecExpr { - let mut egg_nodes: Vec = Vec::new(); - - // Map from (translated) llvm instructions to egg graph nodes - let mut llvm_instr2egg_node: BTreeMap = BTreeMap::new(); - - // State Variable To Hold Maps During Translation - let mut llvm2egg_state = LLVM2EggState { - llvm2egg: llvm_instr2egg_node, - }; - - // Index of next node to translate - let mut next_node_idx: i64 = 0; - - // for each store, iterate backwards from that store and translate to egg - for llvm_instr in llvm_instrs_in_chunk.iter() { - if isa_store(*llvm_instr) { - ref_to_egg() - } - } - - panic!("Unimplemented"); -} - -unsafe fn llvm_to_egg<'a>( - bb_vec: &[LLVMValueRef], - llvm_arg_pairs: &mut LLVMPairMap, - node_to_arg: &mut Vec, -) -> (RecExpr, GEPMap, StoreMap, SymbolMap) { - let mut enode_vec = Vec::new(); - let (mut gep_map, mut store_map, mut id_map, mut symbol_map) = ( - BTreeMap::new(), - BTreeMap::new(), - BTreeSet::new(), - BTreeMap::new(), - ); - let mut next_idx = 0; - for bop in bb_vec.iter() { - if isa_store(*bop) { - let (new_enode_vec, next_idx1) = ref_to_egg( - *bop, - enode_vec, - next_idx, - &mut gep_map, - &mut store_map, - &mut id_map, - &mut symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - next_idx = next_idx1; - enode_vec = new_enode_vec; - } - } - let mut final_vec = Vec::new(); - for id in id_map.iter() { - final_vec.push(*id); - } - balanced_pad_vector(&mut final_vec, &mut enode_vec); - - let rec_expr = RecExpr::from(enode_vec); - (rec_expr, gep_map, store_map, symbol_map) -} From f1bc37783c45ae9d9dcc47e6606d53e43dba1497 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Tue, 3 May 2022 20:56:12 -0400 Subject: [PATCH 083/143] add works, mult works --- src/dios-egraphs/Diospyros/diospyros.cpp | 44 ++++++++++++++++-------- src/dios-egraphs/Diospyros/src/lib.rs | 11 +++--- 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index 87271e20..ddd6d7ff 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -14,8 +14,8 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" @@ -485,21 +485,21 @@ bool call_is_not_sqrt(CallInst *inst) { /** * True iff an instruction is "vectorizable" -*/ -bool can_vectorize(Value* value){ - // TODO: - Instruction * instr = dyn_cast(value); + */ +bool can_vectorize(Value *value) { + // TODO: + Instruction *instr = dyn_cast(value); assert(instr != NULL); - if (isa(instr)){ + if (isa(instr)) { if (instr->getOpcode() == Instruction::FAdd) { return true; - } else if (instr -> getOpcode() == Instruction::FSub) { + } else if (instr->getOpcode() == Instruction::FSub) { return true; - } else if (instr -> getOpcode() == Instruction::FDiv) { + } else if (instr->getOpcode() == Instruction::FDiv) { return true; - } else if (instr -> getOpcode() == Instruction::FMul) { + } else if (instr->getOpcode() == Instruction::FMul) { return true; - } else if (instr -> getOpcode() == Instruction::FNeg) { + } else if (instr->getOpcode() == Instruction::FNeg) { return true; } return false; @@ -610,7 +610,8 @@ struct DiospyrosPass : public FunctionPass { continue; } - // check if the chunk vector actually has instructions to optimixe on + // check if the chunk vector actually has instructions to + // optimixe on bool has_vectorizable_instrs = false; for (auto &instr : chunk_vector) { if (can_vectorize(unwrap(instr))) { @@ -647,11 +648,24 @@ struct DiospyrosPass : public FunctionPass { has_changes = has_changes || true; assert(chunk_vector.size() != 0); - Value *last_instr_val = unwrap(chunk_vector.back()); - Instruction *last_instr = dyn_cast(last_instr_val); - assert(last_instr != NULL); + + Value *last_instr_val = NULL; + Instruction *last_instr = NULL; + for (int i = chunk_vector.size() - 1; i >= 0; i--) { + last_instr_val = unwrap(chunk_vector[i]); + assert(last_instr_val != NULL); + last_instr = dyn_cast(last_instr_val); + assert(last_instr != NULL); + if (!last_instr->isTerminator()) { + break; + } + } + + // Value *last_instr_val = unwrap(chunk_vector.back()); + // Instruction *last_instr = + // dyn_cast(last_instr_val); assert(last_instr != + // NULL); IRBuilder<> builder(last_instr); - builder.SetInsertPoint(&B); Module *mod = F.getParent(); LLVMContext &context = F.getContext(); diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 86edded6..d2bbe589 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -421,7 +421,6 @@ unsafe fn bop_to_egg( Id::from((left_next_idx - 1) as usize), Id::from((right_next_idx - 1) as usize), ]; - println!("{:?}", right_egg_nodes); right_egg_nodes.push(choose_binop(&llvm_instr, ids)); (right_egg_nodes, right_next_idx + 1) } @@ -552,10 +551,11 @@ unsafe fn start_translating_llvm_to_egg( translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { translation_metadata.start_instructions.push(llvm_instr); + let pair_result = llvm_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); translation_metadata .start_ids - .push(Id::from(next_node_idx as usize)); - return llvm_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); + .push(Id::from((pair_result.1 - 1) as usize)); + pair_result } unsafe fn can_start_translation_instr(llvm_instr: LLVMValueRef) -> bool { @@ -689,7 +689,9 @@ unsafe fn reg_to_llvm(egg_node: &VecLang, translation_metadata: &Egg2LLVMState) for (llvm_instr, reg_node) in llvm2reg.iter() { // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. if reg_node == egg_node { - return *llvm_instr; + let new_instr = LLVMInstructionClone(*llvm_instr); + LLVMInsertIntoBuilder(translation_metadata.builder, new_instr); + return new_instr; } } panic!( @@ -981,6 +983,7 @@ unsafe fn vecsgn_to_llvm(vec: &Id, md: &Egg2LLVMState) -> LLVMValueRef { /// Side Effect: Builds and Insert LLVM instructions unsafe fn egg_to_llvm(egg_node: &VecLang, translation_metadata: &Egg2LLVMState) -> LLVMValueRef { match egg_node { + VecLang::NoOptVec(..) => panic!("No Opt Vec was found. Egg to LLVM Translation does not currently handle no opt vec nodes."), VecLang::Symbol(..) => { panic!("Symbol was found. Egg to LLVM Translation does not handle symbol nodes.") } From 4258a85f396a2c56afdc96ee1ddf66653d9bdaeb Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Tue, 3 May 2022 22:47:22 -0400 Subject: [PATCH 084/143] add no optimization shortcut --- src/dios-egraphs/Diospyros/src/lib.rs | 75 +++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 11 deletions(-) diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index d2bbe589..e098eda2 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -153,7 +153,7 @@ pub fn optimize( // llvm to egg let (egg_expr, llvm2egg_metadata) = - llvm_to_egg_main(chunk_llvm_instrs, restricted_llvm_instrs, true); + llvm_to_egg_main(chunk_llvm_instrs, restricted_llvm_instrs, run_egg); // Bail if no egg Nodes to optimize if egg_expr.as_ref().is_empty() { @@ -175,7 +175,7 @@ pub fn optimize( } // egg to llvm - egg_to_llvm_main(best_egg_expr, &llvm2egg_metadata, module, context, builder); + egg_to_llvm_main(best_egg_expr, &llvm2egg_metadata, module, context, builder, run_egg); } } @@ -227,10 +227,10 @@ unsafe fn balanced_pad_vector<'a>( "There must be 1 or more operators to vectorize." ); // Check vector less than width, and then return - // if length < width { - // enode_vec.push(VecLang::Vec(binop_vec.clone().into_boxed_slice())); - // return enode_vec; - // } + if length < width { + enode_vec.push(VecLang::Vec(binop_vec.clone().into_boxed_slice())); + return enode_vec; + } let closest_pow2 = get_pow2(cmp::max(length, width) as u32); let diff = closest_pow2 - (length as u32); for _ in 0..diff { @@ -574,7 +574,7 @@ unsafe fn can_start_translation_instr(llvm_instr: LLVMValueRef) -> bool { unsafe fn llvm_to_egg_main( llvm_instrs_in_chunk: &[LLVMValueRef], restricted_instrs: &[LLVMValueRef], - _vectorize: bool, + vectorize: bool, // TODO: feed this in as an argument llvm_instr2egg_node: BTreeMap, ) -> (RecExpr, LLVM2EggState) { let mut egg_nodes: Vec = Vec::new(); @@ -645,8 +645,17 @@ unsafe fn llvm_to_egg_main( } } - // If vectorize is true, then generate the vector, with padding - // TODO: Implement a switch to not vectorize + // For testing purposes: Handle no vectorization + if !vectorize { + let mut outer_vec_ids = Vec::new(); + for id in translation_metadata.start_ids.iter() { + outer_vec_ids.push(*id); + } + egg_nodes.push(VecLang::NoOptVec(outer_vec_ids.clone().into_boxed_slice())); + let rec_expr = RecExpr::from(egg_nodes); + return (rec_expr, translation_metadata); + } + // Generate a padded vector let mut outer_vec_ids = Vec::new(); for id in translation_metadata.start_ids.iter() { @@ -978,12 +987,32 @@ unsafe fn vecsgn_to_llvm(vec: &Id, md: &Egg2LLVMState) -> LLVMValueRef { LLVMBuildCall(md.builder, func, args, 2, b"\0".as_ptr() as *const _) } +/** + * Vector representing No Optimization: Egg will not have modified the vector at all. + */ +unsafe fn nooptvec_to_llvm(boxed_ids: &Box<[Id]>, md: &Egg2LLVMState) -> () { + // Convert the Boxed Ids to a Vector, and generate a vector of zeros + // Invariant: idvec must not be empty + let idvec = boxed_ids.to_vec(); + assert!( + !idvec.is_empty(), + "Id Vec Cannot be empty when converting Vector to an LLVM Vector" + ); + for (i, &eggid) in idvec.iter().enumerate() { + let egg_node = &md.egg_nodes_vector[usize::from(eggid)]; + let new_instr = egg_to_llvm(egg_node, md); + let old_instr = md.llvm2egg_metadata.start_instructions.get(i).expect("Index Must Exist In Start Instructions"); + LLVMReplaceAllUsesWith(*old_instr, new_instr); + LLVMInstructionRemoveFromParent(*old_instr); + } +} + /// Egg To LLVM Dispatches translation of VecLanf Egg Nodes to LLVMValueRegs /// /// Side Effect: Builds and Insert LLVM instructions unsafe fn egg_to_llvm(egg_node: &VecLang, translation_metadata: &Egg2LLVMState) -> LLVMValueRef { match egg_node { - VecLang::NoOptVec(..) => panic!("No Opt Vec was found. Egg to LLVM Translation does not currently handle no opt vec nodes."), + VecLang::NoOptVec(boxed_ids) => panic!("No Opt Vector was found. Egg to LLVM Translation does not handle No Opt Vector nodes at this location."), VecLang::Symbol(..) => { panic!("Symbol was found. Egg to LLVM Translation does not handle symbol nodes.") } @@ -1025,6 +1054,20 @@ unsafe fn egg_to_llvm(egg_node: &VecLang, translation_metadata: &Egg2LLVMState) } } +unsafe fn is_nooptvec(egg_expr: &VecLang) -> bool { + match egg_expr { + VecLang::NoOptVec(..) => true, + _ => false + } +} + +unsafe fn get_noopt_eggnodes(egg_expr: &VecLang) -> &Box<[Id]> { + match egg_expr { + VecLang::NoOptVec(boxed_ids) => boxed_ids, + _ => panic!("Not a NoOptVec!") + } +} + // TODO: Add non-vectorized version as well! unsafe fn egg_to_llvm_main( expr: RecExpr, @@ -1032,6 +1075,7 @@ unsafe fn egg_to_llvm_main( module: LLVMModuleRef, context: LLVMContextRef, builder: LLVMBuilderRef, + vectorize: bool, ) -> () { // Walk the RecExpr of Egg Nodes and translate it in place to LLVM let egg_nodes = expr.as_ref(); @@ -1045,9 +1089,18 @@ unsafe fn egg_to_llvm_main( context: context, module: module, }; + // If vectorize was not true, we are finished, because nooptvectorize_to_llvm will generate the required code. + if !vectorize { + assert!(is_nooptvec(last_egg_node)); + return nooptvec_to_llvm(get_noopt_eggnodes(last_egg_node), &translation_metadata); + } + + // Regular translation from vectorization + + assert!(!is_nooptvec(last_egg_node)); let llvm_vector = egg_to_llvm(last_egg_node, &translation_metadata); - // HERE, we stitch our work back into the current LLVM code + // BELOW HERE, we allow for vectorization output, and we stitch our work back into the current LLVM code // NOTE: We Assume Egg rewriter will maintain relative positions of elements in vector // Extract the elements of the vector, to be assigned back to where they are to be used. From 1e0b9bcb7139eeaa360634189812a71f957825cf Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 4 May 2022 00:13:42 -0400 Subject: [PATCH 085/143] fix issue with argument being caught not in current chunk --- src/dios-egraphs/Diospyros/c-tests/div.c | 28 ++++++++++++++++++++++++ src/dios-egraphs/Diospyros/c-tests/sub.c | 26 ++++++++++++++++++++++ src/dios-egraphs/Diospyros/diospyros.cpp | 12 +++++----- src/dios-egraphs/Diospyros/src/lib.rs | 12 ++++++---- 4 files changed, 68 insertions(+), 10 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/c-tests/div.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/sub.c diff --git a/src/dios-egraphs/Diospyros/c-tests/div.c b/src/dios-egraphs/Diospyros/c-tests/div.c new file mode 100644 index 00000000..90c19534 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/div.c @@ -0,0 +1,28 @@ +#include +#include +#include +#define SIZE 4 +#define DELTA 0.1 + +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = b_in[0] / a_in[0]; + c_out[1] = b_in[1] / a_in[1]; + c_out[2] = b_in[2] / a_in[2]; + c_out[3] = b_in[3] / a_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 6, 8}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(fabs(c_out[0] - 5.0) < DELTA); + assert(fabs(c_out[1] - 3.0) < DELTA); + assert(fabs(c_out[2] - 2.0) < DELTA); + assert(fabs(c_out[3] - 2.0) < DELTA); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/sub.c b/src/dios-egraphs/Diospyros/c-tests/sub.c new file mode 100644 index 00000000..3ef3621e --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/sub.c @@ -0,0 +1,26 @@ +#include +#include +#define SIZE 4 + +void diff(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] - b_in[0]; + c_out[1] = a_in[1] - b_in[1]; + c_out[2] = a_in[2] - b_in[2]; + c_out[3] = a_in[3] - b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 7, 8}; + float b_in[SIZE] = {5, 6, 3, 4}; + float c_out[SIZE]; + diff(a_in, b_in, c_out); + assert(c_out[0] == -4); + assert(c_out[1] == -4); + assert(c_out[2] == 4); + assert(c_out[3] == 4); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index ddd6d7ff..c14e9ec7 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -597,12 +597,12 @@ struct DiospyrosPass : public FunctionPass { chunk_accumulator.push_back(chunk_vector); } - for (auto &chunk_vector : chunk_accumulator) { - for (auto &instr : chunk_vector) { - errs() << *unwrap(instr) << "\n"; - } - errs() << "---------------------\n"; - } + // for (auto &chunk_vector : chunk_accumulator) { + // for (auto &instr : chunk_vector) { + // errs() << *unwrap(instr) << "\n"; + // } + // errs() << "---------------------\n"; + // } for (int i = 0; i < chunk_accumulator.size(); ++i) { auto &chunk_vector = chunk_accumulator[i]; diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index e098eda2..87b644eb 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -344,7 +344,9 @@ unsafe fn isa_supported_unop(llvm_instr: LLVMValueRef) -> bool { } unsafe fn match_llvm_op(llvm_instr: &LLVMValueRef) -> LLVMOpType { - if isa_fadd(*llvm_instr) { + if isa_argument(*llvm_instr) { + return LLVMOpType::Argument; + } else if isa_fadd(*llvm_instr) { return LLVMOpType::FAdd; } else if isa_fsub(*llvm_instr) { return LLVMOpType::FSub; @@ -356,9 +358,7 @@ unsafe fn match_llvm_op(llvm_instr: &LLVMValueRef) -> LLVMOpType { return LLVMOpType::FNeg; } else if isa_constant(*llvm_instr) { return LLVMOpType::Constant; - } else if isa_argument(*llvm_instr) { - return LLVMOpType::Argument; - } else if isa_sqrt32(*llvm_instr) { + } else if isa_sqrt32(*llvm_instr) { return LLVMOpType::Sqrt32; } else { return LLVMOpType::UnhandledLLVMOpCode; @@ -520,9 +520,11 @@ unsafe fn llvm_to_egg( return unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); } // If the current llvm instruction is not in the current chunk, we must return a register + // The current llvm instruction must not be a arguments, because arguments will be outside every chunk if !translation_metadata .instructions_in_chunk .contains(&llvm_instr) + && !isa_argument(llvm_instr) { return unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); } @@ -683,6 +685,7 @@ unsafe fn arg_to_llvm(egg_node: &VecLang, translation_metadata: &Egg2LLVMState) for (llvm_instr, arg_node) in llvm2arg.iter() { // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. if arg_node == egg_node { + assert!(isa_argument(*llvm_instr)); return *llvm_instr; } } @@ -699,6 +702,7 @@ unsafe fn reg_to_llvm(egg_node: &VecLang, translation_metadata: &Egg2LLVMState) // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. if reg_node == egg_node { let new_instr = LLVMInstructionClone(*llvm_instr); + assert!(!isa_argument(*llvm_instr)); LLVMInsertIntoBuilder(translation_metadata.builder, new_instr); return new_instr; } From 5856101f6c3801294fccd6f16c9bd73a32ddcca3 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 4 May 2022 02:50:25 -0400 Subject: [PATCH 086/143] fix up some more cases for five-binops --- src/dios-egraphs/Diospyros/diospyros.cpp | 14 ++--- src/dios-egraphs/Diospyros/src/lib.rs | 67 ++++++++++++++++-------- 2 files changed, 51 insertions(+), 30 deletions(-) diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index c14e9ec7..fb73c26b 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -597,12 +597,12 @@ struct DiospyrosPass : public FunctionPass { chunk_accumulator.push_back(chunk_vector); } - // for (auto &chunk_vector : chunk_accumulator) { - // for (auto &instr : chunk_vector) { - // errs() << *unwrap(instr) << "\n"; - // } - // errs() << "---------------------\n"; - // } + for (auto &chunk_vector : chunk_accumulator) { + for (auto &instr : chunk_vector) { + errs() << *unwrap(instr) << "\n"; + } + errs() << "---------------------\n"; + } for (int i = 0; i < chunk_accumulator.size(); ++i) { auto &chunk_vector = chunk_accumulator[i]; @@ -631,7 +631,7 @@ struct DiospyrosPass : public FunctionPass { // guaranteed to be a different chunk vector ahead of // the origianl one. bool must_restrict = false; - auto &other_chunk_vector = chunk_accumulator[i]; + auto &other_chunk_vector = chunk_accumulator[j]; for (auto other_chunk_instr : other_chunk_vector) { if (unwrap(chunk_instr) == unwrap(other_chunk_instr)) { diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 87b644eb..ecafd1cb 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -175,7 +175,14 @@ pub fn optimize( } // egg to llvm - egg_to_llvm_main(best_egg_expr, &llvm2egg_metadata, module, context, builder, run_egg); + egg_to_llvm_main( + best_egg_expr, + &llvm2egg_metadata, + module, + context, + builder, + run_egg, + ); } } @@ -358,7 +365,7 @@ unsafe fn match_llvm_op(llvm_instr: &LLVMValueRef) -> LLVMOpType { return LLVMOpType::FNeg; } else if isa_constant(*llvm_instr) { return LLVMOpType::Constant; - } else if isa_sqrt32(*llvm_instr) { + } else if isa_sqrt32(*llvm_instr) { return LLVMOpType::Sqrt32; } else { return LLVMOpType::UnhandledLLVMOpCode; @@ -674,18 +681,19 @@ unsafe fn llvm_to_egg_main( struct Egg2LLVMState<'a> { llvm2egg_metadata: LLVM2EggState, egg_nodes_vector: &'a [VecLang], + prior_translated_nodes: BTreeSet, builder: LLVMBuilderRef, context: LLVMContextRef, module: LLVMModuleRef, } -unsafe fn arg_to_llvm(egg_node: &VecLang, translation_metadata: &Egg2LLVMState) -> LLVMValueRef { +unsafe fn arg_to_llvm(egg_node: &VecLang, translation_metadata: &mut Egg2LLVMState) -> LLVMValueRef { // TODO: Make More Efficient with BTREEMAP? let llvm2arg = &translation_metadata.llvm2egg_metadata.llvm2arg; for (llvm_instr, arg_node) in llvm2arg.iter() { // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. if arg_node == egg_node { - assert!(isa_argument(*llvm_instr)); + assert!(isa_argument(*llvm_instr)); return *llvm_instr; } } @@ -695,15 +703,19 @@ unsafe fn arg_to_llvm(egg_node: &VecLang, translation_metadata: &Egg2LLVMState) ); } -unsafe fn reg_to_llvm(egg_node: &VecLang, translation_metadata: &Egg2LLVMState) -> LLVMValueRef { +unsafe fn reg_to_llvm(egg_node: &VecLang, translation_metadata: &mut Egg2LLVMState) -> LLVMValueRef { // TODO: Make More Efficient with BTREEMAP? let llvm2reg = &translation_metadata.llvm2egg_metadata.llvm2reg; for (llvm_instr, reg_node) in llvm2reg.iter() { // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. if reg_node == egg_node { + assert!(!isa_argument(*llvm_instr)); + if translation_metadata.prior_translated_nodes.contains(&*llvm_instr) { + return *llvm_instr; + } let new_instr = LLVMInstructionClone(*llvm_instr); - assert!(!isa_argument(*llvm_instr)); LLVMInsertIntoBuilder(translation_metadata.builder, new_instr); + translation_metadata.prior_translated_nodes.insert(new_instr); return new_instr; } } @@ -713,11 +725,11 @@ unsafe fn reg_to_llvm(egg_node: &VecLang, translation_metadata: &Egg2LLVMState) ); } -unsafe fn num_to_llvm(n: &i32, md: &Egg2LLVMState) -> LLVMValueRef { +unsafe fn num_to_llvm(n: &i32, md: &mut Egg2LLVMState) -> LLVMValueRef { LLVMConstReal(LLVMFloatTypeInContext(md.context), *n as f64) } -unsafe fn vec_to_llvm(boxed_ids: &Box<[Id]>, md: &Egg2LLVMState) -> LLVMValueRef { +unsafe fn vec_to_llvm(boxed_ids: &Box<[Id]>, md: &mut Egg2LLVMState) -> LLVMValueRef { // Convert the Boxed Ids to a Vector, and generate a vector of zeros // Invariant: idvec must not be empty let idvec = boxed_ids.to_vec(); @@ -771,7 +783,7 @@ unsafe fn binop_to_llvm( binop_node: &VecLang, left_id: &Id, right_id: &Id, - md: &Egg2LLVMState, + md: &mut Egg2LLVMState, ) -> LLVMValueRef { let left = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_id)], md); let right = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_id)], md); @@ -851,7 +863,7 @@ unsafe fn binop_to_llvm( } } -unsafe fn concat_to_llvm(left_vector: &Id, right_vector: &Id, md: &Egg2LLVMState) -> LLVMValueRef { +unsafe fn concat_to_llvm(left_vector: &Id, right_vector: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { { let trans_v1 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_vector)], md); let mut trans_v2 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_vector)], md); @@ -922,7 +934,7 @@ unsafe fn mac_to_llvm( accumulator_vector: &Id, left_prod_vector: &Id, right_prod_vector: &Id, - md: &Egg2LLVMState, + md: &mut Egg2LLVMState, ) -> LLVMValueRef { let trans_acc = egg_to_llvm(&md.egg_nodes_vector[usize::from(*accumulator_vector)], md); let trans_v1 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_prod_vector)], md); @@ -935,7 +947,7 @@ unsafe fn mac_to_llvm( LLVMBuildCall(md.builder, func, args, 3, b"\0".as_ptr() as *const _) } -unsafe fn scalar_unop_to_llvm(n: &Id, unop_node: &VecLang, md: &Egg2LLVMState) -> LLVMValueRef { +unsafe fn scalar_unop_to_llvm(n: &Id, unop_node: &VecLang, md: &mut Egg2LLVMState) -> LLVMValueRef { let mut number = egg_to_llvm(&md.egg_nodes_vector[usize::from(*n)], md); if isa_integertype(number) { number = LLVMBuildBitCast( @@ -955,12 +967,12 @@ unsafe fn scalar_unop_to_llvm(n: &Id, unop_node: &VecLang, md: &Egg2LLVMState) - ) } -unsafe fn vecneg_to_llvm(vec: &Id, md: &Egg2LLVMState) -> LLVMValueRef { +unsafe fn vecneg_to_llvm(vec: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { let neg_vector = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); LLVMBuildFNeg(md.builder, neg_vector, b"\0".as_ptr() as *const _) } -unsafe fn vecsqrt_to_llvm(vec: &Id, md: &Egg2LLVMState) -> LLVMValueRef { +unsafe fn vecsqrt_to_llvm(vec: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { let sqrt_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); let vec_type = LLVMTypeOf(sqrt_vec); let param_types = [vec_type].as_mut_ptr(); @@ -970,7 +982,7 @@ unsafe fn vecsqrt_to_llvm(vec: &Id, md: &Egg2LLVMState) -> LLVMValueRef { LLVMBuildCall(md.builder, func, args, 1, b"\0".as_ptr() as *const _) } -unsafe fn vecsgn_to_llvm(vec: &Id, md: &Egg2LLVMState) -> LLVMValueRef { +unsafe fn vecsgn_to_llvm(vec: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { let sgn_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); let vec_type = LLVMTypeOf(sgn_vec); let vec_size = LLVMGetVectorSize(vec_type); @@ -994,7 +1006,7 @@ unsafe fn vecsgn_to_llvm(vec: &Id, md: &Egg2LLVMState) -> LLVMValueRef { /** * Vector representing No Optimization: Egg will not have modified the vector at all. */ -unsafe fn nooptvec_to_llvm(boxed_ids: &Box<[Id]>, md: &Egg2LLVMState) -> () { +unsafe fn nooptvec_to_llvm(boxed_ids: &Box<[Id]>, md: &mut Egg2LLVMState) -> () { // Convert the Boxed Ids to a Vector, and generate a vector of zeros // Invariant: idvec must not be empty let idvec = boxed_ids.to_vec(); @@ -1005,7 +1017,11 @@ unsafe fn nooptvec_to_llvm(boxed_ids: &Box<[Id]>, md: &Egg2LLVMState) -> () { for (i, &eggid) in idvec.iter().enumerate() { let egg_node = &md.egg_nodes_vector[usize::from(eggid)]; let new_instr = egg_to_llvm(egg_node, md); - let old_instr = md.llvm2egg_metadata.start_instructions.get(i).expect("Index Must Exist In Start Instructions"); + let old_instr = md + .llvm2egg_metadata + .start_instructions + .get(i) + .expect("Index Must Exist In Start Instructions"); LLVMReplaceAllUsesWith(*old_instr, new_instr); LLVMInstructionRemoveFromParent(*old_instr); } @@ -1014,7 +1030,7 @@ unsafe fn nooptvec_to_llvm(boxed_ids: &Box<[Id]>, md: &Egg2LLVMState) -> () { /// Egg To LLVM Dispatches translation of VecLanf Egg Nodes to LLVMValueRegs /// /// Side Effect: Builds and Insert LLVM instructions -unsafe fn egg_to_llvm(egg_node: &VecLang, translation_metadata: &Egg2LLVMState) -> LLVMValueRef { +unsafe fn egg_to_llvm(egg_node: &VecLang, translation_metadata: &mut Egg2LLVMState) -> LLVMValueRef { match egg_node { VecLang::NoOptVec(boxed_ids) => panic!("No Opt Vector was found. Egg to LLVM Translation does not handle No Opt Vector nodes at this location."), VecLang::Symbol(..) => { @@ -1061,14 +1077,14 @@ unsafe fn egg_to_llvm(egg_node: &VecLang, translation_metadata: &Egg2LLVMState) unsafe fn is_nooptvec(egg_expr: &VecLang) -> bool { match egg_expr { VecLang::NoOptVec(..) => true, - _ => false + _ => false, } } unsafe fn get_noopt_eggnodes(egg_expr: &VecLang) -> &Box<[Id]> { match egg_expr { VecLang::NoOptVec(boxed_ids) => boxed_ids, - _ => panic!("Not a NoOptVec!") + _ => panic!("Not a NoOptVec!"), } } @@ -1086,9 +1102,14 @@ unsafe fn egg_to_llvm_main( let last_egg_node = egg_nodes .last() .expect("No match for last element of vector of Egg Terms."); - let translation_metadata = Egg2LLVMState { + + // Nodes converted to llvm already, not to be retranslated + let prior_translated_nodes: BTreeSet = BTreeSet::new(); + + let mut translation_metadata = Egg2LLVMState { egg_nodes_vector: egg_nodes, llvm2egg_metadata: llvm2egg_metadata.clone(), + prior_translated_nodes: prior_translated_nodes, builder: builder, context: context, module: module, @@ -1096,13 +1117,13 @@ unsafe fn egg_to_llvm_main( // If vectorize was not true, we are finished, because nooptvectorize_to_llvm will generate the required code. if !vectorize { assert!(is_nooptvec(last_egg_node)); - return nooptvec_to_llvm(get_noopt_eggnodes(last_egg_node), &translation_metadata); + return nooptvec_to_llvm(get_noopt_eggnodes(last_egg_node), &mut translation_metadata); } // Regular translation from vectorization assert!(!is_nooptvec(last_egg_node)); - let llvm_vector = egg_to_llvm(last_egg_node, &translation_metadata); + let llvm_vector = egg_to_llvm(last_egg_node, &mut translation_metadata); // BELOW HERE, we allow for vectorization output, and we stitch our work back into the current LLVM code From f25c257d8692be84b4aefee37649357cd29f422f Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 4 May 2022 03:04:14 -0400 Subject: [PATCH 087/143] if else fix issue with no cloning in prior basic block --- src/dios-egraphs/Diospyros/src/lib.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index ecafd1cb..e81ae2d8 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -710,9 +710,14 @@ unsafe fn reg_to_llvm(egg_node: &VecLang, translation_metadata: &mut Egg2LLVMSta // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. if reg_node == egg_node { assert!(!isa_argument(*llvm_instr)); + // do not clone an instruction translated in a prior chunk if translation_metadata.prior_translated_nodes.contains(&*llvm_instr) { return *llvm_instr; } + // do not clone an instruction translated in a prior basic block + if !translation_metadata.llvm2egg_metadata.instructions_in_chunk.contains(&*llvm_instr) { + return *llvm_instr; + } let new_instr = LLVMInstructionClone(*llvm_instr); LLVMInsertIntoBuilder(translation_metadata.builder, new_instr); translation_metadata.prior_translated_nodes.insert(new_instr); From 53ddaa7c0f4539e9ba48ba128d6091d42a024fee Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 4 May 2022 16:52:16 -0400 Subject: [PATCH 088/143] 12 tests fail --- src/dios-egraphs/Diospyros/diospyros.cpp | 40 ++++++++++++++++-------- src/dios-egraphs/Diospyros/src/lib.rs | 21 +++++++++---- 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index fb73c26b..ec549db2 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -649,23 +649,37 @@ struct DiospyrosPass : public FunctionPass { has_changes = has_changes || true; assert(chunk_vector.size() != 0); - Value *last_instr_val = NULL; - Instruction *last_instr = NULL; - for (int i = chunk_vector.size() - 1; i >= 0; i--) { - last_instr_val = unwrap(chunk_vector[i]); - assert(last_instr_val != NULL); - last_instr = dyn_cast(last_instr_val); - assert(last_instr != NULL); - if (!last_instr->isTerminator()) { + // Place builder at first instruction that is not a "handled + // instruction" + int insert_pos = 0; + for (int i = 0; i < chunk_vector.size(); i++) { + if (can_vectorize(unwrap(chunk_vector[i]))) { + insert_pos++; + } else { break; } } - - // Value *last_instr_val = unwrap(chunk_vector.back()); - // Instruction *last_instr = - // dyn_cast(last_instr_val); assert(last_instr != - // NULL); + Value *last_instr_val = unwrap(chunk_vector[insert_pos]); + assert(last_instr_val != NULL); + Instruction *last_instr = dyn_cast(last_instr_val); + assert(last_instr != NULL); + if (insert_pos >= chunk_vector.size()) { + last_instr = last_instr->getNextNode(); + assert(last_instr != NULL); + } IRBuilder<> builder(last_instr); + // Value *last_instr_val = NULL; + // Instruction *last_instr = NULL; + // for (int i = chunk_vector.size() - 1; i >= 0; i--) { + // last_instr_val = unwrap(chunk_vector[i]); + // assert(last_instr_val != NULL); + // last_instr = dyn_cast(last_instr_val); + // assert(last_instr != NULL); + // if (!last_instr->isTerminator()) { + // break; + // } + // } + // IRBuilder<> builder(last_instr); Module *mod = F.getParent(); LLVMContext &context = F.getContext(); diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index e81ae2d8..6cb4c994 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -386,6 +386,7 @@ struct LLVM2EggState { llvm2arg: BTreeMap, instructions_in_chunk: BTreeSet, restricted_instructions: BTreeSet, + prior_translated_instructions: BTreeSet, start_instructions: Vec, start_ids: Vec, } @@ -509,6 +510,10 @@ unsafe fn llvm_to_egg( next_node_idx: u32, translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { + // Mark instruction as translated, as it will be after it goes through the code below + if !translation_metadata.prior_translated_instructions.contains(&llvm_instr) { + translation_metadata.prior_translated_instructions.insert(llvm_instr); + } // If, on a different pass, the instruction was translated already, then // just used the egg node representing the translation if translation_metadata.llvm2reg.contains_key(&llvm_instr) { @@ -627,12 +632,15 @@ unsafe fn llvm_to_egg_main( // Invariant: chunk instructions are not empty in size assert!(!instructions_in_chunk.is_empty()); + let mut prior_translated_instructions: BTreeSet = BTreeSet::new(); + // State Variable To Hold Maps During Translation let mut translation_metadata = LLVM2EggState { llvm2reg: llvm_instr2reg_node, llvm2arg: llvm_instr2arg_node, instructions_in_chunk: instructions_in_chunk, restricted_instructions: restricted_instrs_set, + prior_translated_instructions: prior_translated_instructions, start_instructions: start_instructions, start_ids: start_ids, }; @@ -640,9 +648,10 @@ unsafe fn llvm_to_egg_main( // Index of next node to translate let mut next_node_idx: u32 = 0; - // for each store, iterate backwards from that store and translate to egg - for llvm_instr in llvm_instrs_in_chunk.iter() { - if can_start_translation_instr(*llvm_instr) { + // for each final instruction, iterate backwards from that final instruction and translate to egg + for llvm_instr in llvm_instrs_in_chunk.iter().rev() { + // only start translation back if it is a "translatable instruction" and it was not translated already + if can_start_translation_instr(*llvm_instr) && !translation_metadata.prior_translated_instructions.contains(&llvm_instr) { let (new_egg_nodes, new_next_node_idx) = start_translating_llvm_to_egg( *llvm_instr, egg_nodes, @@ -710,11 +719,11 @@ unsafe fn reg_to_llvm(egg_node: &VecLang, translation_metadata: &mut Egg2LLVMSta // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. if reg_node == egg_node { assert!(!isa_argument(*llvm_instr)); - // do not clone an instruction translated in a prior chunk + // do not clone an instruction translated earlier in the same chunk if translation_metadata.prior_translated_nodes.contains(&*llvm_instr) { return *llvm_instr; } - // do not clone an instruction translated in a prior basic block + // do not clone an instruction translated in a prior basic block / prior chunk if !translation_metadata.llvm2egg_metadata.instructions_in_chunk.contains(&*llvm_instr) { return *llvm_instr; } @@ -1135,7 +1144,7 @@ unsafe fn egg_to_llvm_main( // NOTE: We Assume Egg rewriter will maintain relative positions of elements in vector // Extract the elements of the vector, to be assigned back to where they are to be used. let num_extractions = llvm2egg_metadata.start_instructions.len(); - for i in 0..num_extractions { + for i in (0..num_extractions).rev() { let old_instr = llvm2egg_metadata .start_instructions .get(i) From b23f8d504a0bad4dd27049952b75e458bc88b630 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 4 May 2022 17:40:46 -0400 Subject: [PATCH 089/143] loop inline fixed --- src/dios-egraphs/Diospyros/diospyros.cpp | 42 +++++++++++++----------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index ec549db2..1a6b3021 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -501,6 +501,8 @@ bool can_vectorize(Value *value) { return true; } else if (instr->getOpcode() == Instruction::FNeg) { return true; + } else if (isa_sqrt32(wrap(instr))) { + return true; } return false; } @@ -581,6 +583,9 @@ struct DiospyrosPass : public FunctionPass { } Instruction *memmove = dyn_cast(call_inst); chunk_vector = {wrap(memmove)}; + } else { + Instruction *instr = dyn_cast(&I); + chunk_vector.push_back(wrap(instr)); } } else if (auto *op = dyn_cast(&I)) { Value *load_loc = op->getOperand(0); @@ -597,12 +602,12 @@ struct DiospyrosPass : public FunctionPass { chunk_accumulator.push_back(chunk_vector); } - for (auto &chunk_vector : chunk_accumulator) { - for (auto &instr : chunk_vector) { - errs() << *unwrap(instr) << "\n"; - } - errs() << "---------------------\n"; - } + // for (auto &chunk_vector : chunk_accumulator) { + // for (auto &instr : chunk_vector) { + // errs() << *unwrap(instr) << "\n"; + // } + // errs() << "---------------------\n"; + // } for (int i = 0; i < chunk_accumulator.size(); ++i) { auto &chunk_vector = chunk_accumulator[i]; @@ -652,34 +657,31 @@ struct DiospyrosPass : public FunctionPass { // Place builder at first instruction that is not a "handled // instruction" int insert_pos = 0; + bool has_seen_vectorizable = false; for (int i = 0; i < chunk_vector.size(); i++) { if (can_vectorize(unwrap(chunk_vector[i]))) { + has_seen_vectorizable = true; + insert_pos++; + } else if (!has_seen_vectorizable) { insert_pos++; } else { break; } } - Value *last_instr_val = unwrap(chunk_vector[insert_pos]); + Value *last_instr_val = NULL; + if (insert_pos >= chunk_vector.size()) { + last_instr_val = unwrap(chunk_vector[insert_pos - 1]); + } else { + last_instr_val = unwrap(chunk_vector[insert_pos]); + } assert(last_instr_val != NULL); Instruction *last_instr = dyn_cast(last_instr_val); assert(last_instr != NULL); if (insert_pos >= chunk_vector.size()) { last_instr = last_instr->getNextNode(); assert(last_instr != NULL); - } + } IRBuilder<> builder(last_instr); - // Value *last_instr_val = NULL; - // Instruction *last_instr = NULL; - // for (int i = chunk_vector.size() - 1; i >= 0; i--) { - // last_instr_val = unwrap(chunk_vector[i]); - // assert(last_instr_val != NULL); - // last_instr = dyn_cast(last_instr_val); - // assert(last_instr != NULL); - // if (!last_instr->isTerminator()) { - // break; - // } - // } - // IRBuilder<> builder(last_instr); Module *mod = F.getParent(); LLVMContext &context = F.getContext(); From a869f6903c1ee03632cc7cd5837a22bb3e07052b Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 4 May 2022 21:18:59 -0400 Subject: [PATCH 090/143] all c-tests pass, but load store movement had issue Issue with moving over memmmove/memcpy/mem intrinsics --- .../Diospyros/LoadStoreMovement.cpp | 4 +- .../Diospyros/c-tests/qr-decomp-test-0.c | 81 ++++++++++++++ src/dios-egraphs/Diospyros/diospyros.cpp | 103 ++++++------------ src/dios-egraphs/Diospyros/src/lib.rs | 2 +- 4 files changed, 120 insertions(+), 70 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-0.c diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index 6735e435..804004a4 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -260,8 +260,8 @@ struct LoadStoreMovementPass : public FunctionPass { (F.getName().size() > 7 && F.getName().substr(0, 7) == "no_opt_")) { return false; } - rewrite_stores(F); - rewrite_loads(F); + // rewrite_stores(F); + // rewrite_loads(F); return true; } diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-0.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-0.c new file mode 100644 index 00000000..f43090bb --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-0.c @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +// float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + A[k] = alpha; + } +} + +void no_opt_sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + A[k] = alpha; + } +} + +int main(void) { + float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; + sample_test(A, x, e); + float expectedA[SIZE] = {0}; + float expectedx[SIZE] = {0}; + float expectede[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedx, expectede); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + printf("X Output: %f\n", x[i]); + printf("Expected X Output: %f\n", expectedx[i]); + printf("E Output: %f\n", e[i]); + printf("Expected E Output: %f\n", expectede[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + assert(fabs(expectedx[i] - x[i]) < DELTA); + assert(fabs(expectede[i] - e[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index 1a6b3021..eab4e83e 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -490,21 +490,18 @@ bool can_vectorize(Value *value) { // TODO: Instruction *instr = dyn_cast(value); assert(instr != NULL); - if (isa(instr)) { - if (instr->getOpcode() == Instruction::FAdd) { - return true; - } else if (instr->getOpcode() == Instruction::FSub) { - return true; - } else if (instr->getOpcode() == Instruction::FDiv) { - return true; - } else if (instr->getOpcode() == Instruction::FMul) { - return true; - } else if (instr->getOpcode() == Instruction::FNeg) { - return true; - } else if (isa_sqrt32(wrap(instr))) { - return true; - } - return false; + if (instr->getOpcode() == Instruction::FAdd) { + return true; + } else if (instr->getOpcode() == Instruction::FSub) { + return true; + } else if (instr->getOpcode() == Instruction::FDiv) { + return true; + } else if (instr->getOpcode() == Instruction::FMul) { + return true; + } else if (instr->getOpcode() == Instruction::FNeg) { + return true; + } else if (isa_sqrt32(wrap(instr))) { + return true; } return false; } @@ -545,69 +542,41 @@ struct DiospyrosPass : public FunctionPass { // std::vector> chunk_accumulator; std::vector chunk_vector = {}; + bool vectorizable_flag = false; for (auto &I : B) { - if (auto *op = dyn_cast(&I)) { - Value *store_loc = op->getOperand(1); - chunk_vector.push_back(wrap(op)); - } else if (auto *op = dyn_cast(&I)) { - if (!chunk_vector.empty()) { - chunk_accumulator.push_back(chunk_vector); - } - chunk_vector = {wrap(op)}; - } else if (auto *op = dyn_cast(&I)) { + Value *val = dyn_cast(&I); + assert(val != NULL); + if (can_vectorize(val) && !vectorizable_flag) { if (!chunk_vector.empty()) { chunk_accumulator.push_back(chunk_vector); - } - chunk_vector = {wrap(op)}; - } else if (auto *op = dyn_cast(&I)) { + } + vectorizable_flag = true; + chunk_vector = {wrap(val)}; + } else if (can_vectorize(val) && vectorizable_flag) { + chunk_vector.push_back(wrap(val)); + } else if (!can_vectorize(val) && !vectorizable_flag) { + chunk_vector.push_back(wrap(val)); + } else if (!can_vectorize(val) && vectorizable_flag) { if (!chunk_vector.empty()) { chunk_accumulator.push_back(chunk_vector); - } - chunk_vector = {wrap(op)}; - } else if (CallInst *call_inst = dyn_cast(&I)) { - if (is_memset_variety(call_inst)) { - if (!chunk_vector.empty()) { - chunk_accumulator.push_back(chunk_vector); - } - Instruction *memset = dyn_cast(call_inst); - chunk_vector = {wrap(memset)}; - } else if (is_memcopy_variety(call_inst)) { - if (!chunk_vector.empty()) { - chunk_accumulator.push_back(chunk_vector); - } - Instruction *memcopy = dyn_cast(call_inst); - chunk_vector = {wrap(memcopy)}; - } else if (is_memmove_variety(call_inst)) { - if (!chunk_vector.empty()) { - chunk_accumulator.push_back(chunk_vector); - } - Instruction *memmove = dyn_cast(call_inst); - chunk_vector = {wrap(memmove)}; - } else { - Instruction *instr = dyn_cast(&I); - chunk_vector.push_back(wrap(instr)); - } - } else if (auto *op = dyn_cast(&I)) { - Value *load_loc = op->getOperand(0); - if (!chunk_vector.empty()) { - chunk_accumulator.push_back(chunk_vector); - } - chunk_vector = {wrap(op)}; + } + vectorizable_flag = false; + chunk_vector = {wrap(val)}; } else { - Instruction *instr = dyn_cast(&I); - chunk_vector.push_back(wrap(instr)); + throw "No other cases possible!"; } } if (!chunk_vector.empty()) { chunk_accumulator.push_back(chunk_vector); } - - // for (auto &chunk_vector : chunk_accumulator) { - // for (auto &instr : chunk_vector) { - // errs() << *unwrap(instr) << "\n"; - // } - // errs() << "---------------------\n"; - // } + + errs() << "New Basic Block\n"; + for (auto &chunk_vector : chunk_accumulator) { + for (auto &instr : chunk_vector) { + errs() << *unwrap(instr) << "\n"; + } + errs() << "---------------------\n"; + } for (int i = 0; i < chunk_accumulator.size(); ++i) { auto &chunk_vector = chunk_accumulator[i]; diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 6cb4c994..762b2c96 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -1046,7 +1046,7 @@ unsafe fn nooptvec_to_llvm(boxed_ids: &Box<[Id]>, md: &mut Egg2LLVMState) -> () /// Side Effect: Builds and Insert LLVM instructions unsafe fn egg_to_llvm(egg_node: &VecLang, translation_metadata: &mut Egg2LLVMState) -> LLVMValueRef { match egg_node { - VecLang::NoOptVec(boxed_ids) => panic!("No Opt Vector was found. Egg to LLVM Translation does not handle No Opt Vector nodes at this location."), + VecLang::NoOptVec(..) => panic!("No Opt Vector was found. Egg to LLVM Translation does not handle No Opt Vector nodes at this location."), VecLang::Symbol(..) => { panic!("Symbol was found. Egg to LLVM Translation does not handle symbol nodes.") } From 09fc974073724714038b7e11d5679e215ff84b79 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 4 May 2022 21:31:18 -0400 Subject: [PATCH 091/143] remove prints in c++ --- src/dios-egraphs/Diospyros/diospyros.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index eab4e83e..f1c2a8a2 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -569,14 +569,6 @@ struct DiospyrosPass : public FunctionPass { if (!chunk_vector.empty()) { chunk_accumulator.push_back(chunk_vector); } - - errs() << "New Basic Block\n"; - for (auto &chunk_vector : chunk_accumulator) { - for (auto &instr : chunk_vector) { - errs() << *unwrap(instr) << "\n"; - } - errs() << "---------------------\n"; - } for (int i = 0; i < chunk_accumulator.size(); ++i) { auto &chunk_vector = chunk_accumulator[i]; From 60388260a7f10e3fbb2ff6e0797188c547d93457 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 6 May 2022 01:10:50 -0400 Subject: [PATCH 092/143] start fixing load store movement --- .../Diospyros/LoadStoreMovement.cpp | 1 + .../Diospyros/NewLoadStoreMovement.cpp | 281 ++++++++++++++++++ 2 files changed, 282 insertions(+) create mode 100644 src/dios-egraphs/Diospyros/NewLoadStoreMovement.cpp diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index 804004a4..d5d541c1 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" + using namespace llvm; namespace { diff --git a/src/dios-egraphs/Diospyros/NewLoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/NewLoadStoreMovement.cpp new file mode 100644 index 00000000..d5d541c1 --- /dev/null +++ b/src/dios-egraphs/Diospyros/NewLoadStoreMovement.cpp @@ -0,0 +1,281 @@ +#include + +#include +#include +#include + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +namespace { +struct LoadStoreMovementPass : public FunctionPass { + static char ID; + LoadStoreMovementPass() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } + + void rewrite_stores(Function &F) { + AliasAnalysis *AA = &getAnalysis().getAAResults(); + for (auto &B : F) { + bool has_float = false; + for (auto &I : B) { + if (I.getType()->isFloatTy()) { + has_float = true; + } + } + if (!has_float) { + continue; + } + // We also skip over all basic blocks without stores + bool has_store = false; + for (auto &I : B) { + if (auto *op = dyn_cast(&I)) { + has_store = true; + } + } + if (!has_store) { + continue; + } + + std::vector reversed_instructions = {}; + std::vector all_instructions = {}; + int head_pointer = + -1; // points to head location in all_instructions + Instruction *first_instr = NULL; + for (BasicBlock::reverse_iterator iter = B.rbegin(); + iter != B.rend(); ++iter) { + Instruction *I = &(*iter); + first_instr = I; + if (auto *store_op = dyn_cast(I)) { + if (head_pointer < 0) { + reversed_instructions.push_back(I); + } else { + int current_counter = head_pointer; + while (current_counter >= 0) { + Instruction *curr_instr = + reversed_instructions[current_counter]; + if (curr_instr->isTerminator()) { + ++current_counter; + break; + } else if (auto *other_store_op = + dyn_cast(curr_instr)) { + if (AA->isNoAlias( + store_op->getOperand(1), + other_store_op->getOperand(1))) { + --current_counter; + } else { + break; + } + } else if (auto *load_op = + dyn_cast(curr_instr)) { + if (AA->isNoAlias(store_op->getOperand(1), + load_op->getOperand(0))) { + --current_counter; + } else { + break; + } + } else { + --current_counter; + } + } + // Do the insertion + reversed_instructions.insert( + reversed_instructions.begin() + current_counter, I); + } + } else { + reversed_instructions.push_back(I); + } + ++head_pointer; + all_instructions.push_back(I); + } + if (first_instr == NULL) { + assert(false); + } + IRBuilder<> builder(first_instr); + // we add the instructions at the end + builder.SetInsertPoint(&B); + // here we are going to add back our instructions + std::reverse(reversed_instructions.begin(), + reversed_instructions.end()); + BasicBlock::InstListType &bb_instrs = B.getInstList(); + std::map original_to_clone_map = {}; + for (auto &I : reversed_instructions) { + // we clone the original instruciton, then insert into builder + Instruction *cloned_instr = I->clone(); + // when adding, need to take caution about the users + original_to_clone_map[I] = cloned_instr; + for (unsigned int i = 0; i < I->getNumOperands(); i++) { + Value *operand = I->getOperand(i); + Instruction *operand_instr = dyn_cast(operand); + if (original_to_clone_map.find(operand_instr) != + original_to_clone_map.end()) { + Instruction *clone_instr = + original_to_clone_map[operand_instr]; + Value *clone_value = dyn_cast(clone_instr); + cloned_instr->setOperand(i, clone_value); + } else { + cloned_instr->setOperand(i, operand); + } + } + bb_instrs.push_back(cloned_instr); + for (auto &U : I->uses()) { + User *user = U.getUser(); + user->setOperand(U.getOperandNo(), cloned_instr); + } + } + // here we need to delete all original instructions, going forwards + // with no reversal as they are in reversed order + for (auto &I : all_instructions) { + I->eraseFromParent(); + } + } + } + + void rewrite_loads(Function &F) { + AliasAnalysis *AA = &getAnalysis().getAAResults(); + std::map original_to_clone_map = {}; + std::vector all_instructions = {}; + for (auto &B : F) { + std::vector instructions = {}; + + int head_pointer = + -1; // points to head location in all_instructions + Instruction *first_instr = NULL; + for (auto &I : B) { + first_instr = &I; + if (auto *load_op = dyn_cast(&I)) { + if (isa(load_op->getOperand(0))) { + if (head_pointer < 0) { + instructions.push_back(&I); + } else { + int current_counter = head_pointer; + while (current_counter > 0) { + Instruction *curr_instr = + instructions[current_counter]; + if (auto *op = dyn_cast(&I)) { + ++current_counter; + break; + } + // else if (auto *other_load_op = + // dyn_cast(curr_instr)) + // { + // if (AA->isNoAlias( + // other_load_op->getOperand(0), + // load_op->getOperand(0))) { + // --current_counter; + // } else { + // break; + // } + // } + else if (auto *store_op = + dyn_cast(curr_instr)) { + if (AA->isNoAlias(store_op->getOperand(1), + load_op->getOperand(0))) { + --current_counter; + } else { + break; + } + } else { + --current_counter; + } + } + // Do the insertion + assert(current_counter >= 0); + instructions.insert( + instructions.begin() + current_counter, &I); + } + } else { + instructions.push_back(&I); + } + } else { + instructions.push_back(&I); + } + ++head_pointer; + all_instructions.push_back(&I); + } + if (first_instr == NULL) { + assert(false); + } + IRBuilder<> builder(first_instr); + // we add the instructions at the end + builder.SetInsertPoint(&B); + // here we are going to add back our instructions + BasicBlock::InstListType &bb_instrs = B.getInstList(); + for (auto &I : instructions) { + // we clone the original instruciton, then insert into builder + Instruction *cloned_instr = I->clone(); + // when adding, need to take caution about the users + original_to_clone_map[I] = cloned_instr; + for (unsigned int i = 0; i < I->getNumOperands(); i++) { + Value *operand = I->getOperand(i); + Instruction *operand_instr = dyn_cast(operand); + if (operand_instr != NULL) { + if (original_to_clone_map.find(operand_instr) != + original_to_clone_map.end()) { + Instruction *clone_instr = + original_to_clone_map[operand_instr]; + Value *clone_value = dyn_cast(clone_instr); + cloned_instr->setOperand(i, clone_value); + } else { + cloned_instr->setOperand(i, operand); + } + } + } + bb_instrs.push_back(cloned_instr); + Instruction *instr = &(*I); + for (auto &U : instr->uses()) { + User *user = U.getUser(); + user->setOperand(U.getOperandNo(), cloned_instr); + } + } + } + // here we need to delete all original instructions, going + // forwards with no reversal as they are in reversed order + std::reverse(all_instructions.begin(), all_instructions.end()); + for (auto &I : all_instructions) { + I->eraseFromParent(); + } + } + + virtual bool runOnFunction(Function &F) override { + /** + * In this pass, we walk backwards finding the first load from the + * bottom, and push it up as far as we can. We continue upwards, + * pushing loads upward. + * + * We gr + */ + if (F.getName() == "main" || + (F.getName().size() > 7 && F.getName().substr(0, 7) == "no_opt_")) { + return false; + } + // rewrite_stores(F); + // rewrite_loads(F); + + return true; + } +}; +} // namespace + +char LoadStoreMovementPass::ID = 0; + +// Automatically enable the pass. +// http://adriansampson.net/blog/clangpass.html +static void registerLoadStoreMovementPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + PM.add(new LoadStoreMovementPass()); +} +static RegisterStandardPasses RegisterMyPass( + PassManagerBuilder::EP_EarlyAsPossible, registerLoadStoreMovementPass); \ No newline at end of file From ee70097183d4f033c369fb5b0c729e9435bbc45b Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 6 May 2022 01:22:21 -0400 Subject: [PATCH 093/143] remove std namesapace --- .../Diospyros/LoadStoreMovement.cpp | 225 +----------------- .../Diospyros/NewLoadStoreMovement.cpp | 1 + src/dios-egraphs/Diospyros/diospyros.cpp | 25 +- 3 files changed, 21 insertions(+), 230 deletions(-) diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index d5d541c1..b69ab09a 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -17,6 +17,9 @@ using namespace llvm; +const std::string MAIN_FUNCTION_NAME = "main"; +const std::string NO_OPT_PREFIX = "no_opt_"; + namespace { struct LoadStoreMovementPass : public FunctionPass { static char ID; @@ -29,223 +32,12 @@ struct LoadStoreMovementPass : public FunctionPass { void rewrite_stores(Function &F) { AliasAnalysis *AA = &getAnalysis().getAAResults(); for (auto &B : F) { - bool has_float = false; - for (auto &I : B) { - if (I.getType()->isFloatTy()) { - has_float = true; - } - } - if (!has_float) { - continue; - } - // We also skip over all basic blocks without stores - bool has_store = false; - for (auto &I : B) { - if (auto *op = dyn_cast(&I)) { - has_store = true; - } - } - if (!has_store) { - continue; - } - - std::vector reversed_instructions = {}; - std::vector all_instructions = {}; - int head_pointer = - -1; // points to head location in all_instructions - Instruction *first_instr = NULL; - for (BasicBlock::reverse_iterator iter = B.rbegin(); - iter != B.rend(); ++iter) { - Instruction *I = &(*iter); - first_instr = I; - if (auto *store_op = dyn_cast(I)) { - if (head_pointer < 0) { - reversed_instructions.push_back(I); - } else { - int current_counter = head_pointer; - while (current_counter >= 0) { - Instruction *curr_instr = - reversed_instructions[current_counter]; - if (curr_instr->isTerminator()) { - ++current_counter; - break; - } else if (auto *other_store_op = - dyn_cast(curr_instr)) { - if (AA->isNoAlias( - store_op->getOperand(1), - other_store_op->getOperand(1))) { - --current_counter; - } else { - break; - } - } else if (auto *load_op = - dyn_cast(curr_instr)) { - if (AA->isNoAlias(store_op->getOperand(1), - load_op->getOperand(0))) { - --current_counter; - } else { - break; - } - } else { - --current_counter; - } - } - // Do the insertion - reversed_instructions.insert( - reversed_instructions.begin() + current_counter, I); - } - } else { - reversed_instructions.push_back(I); - } - ++head_pointer; - all_instructions.push_back(I); - } - if (first_instr == NULL) { - assert(false); - } - IRBuilder<> builder(first_instr); - // we add the instructions at the end - builder.SetInsertPoint(&B); - // here we are going to add back our instructions - std::reverse(reversed_instructions.begin(), - reversed_instructions.end()); - BasicBlock::InstListType &bb_instrs = B.getInstList(); - std::map original_to_clone_map = {}; - for (auto &I : reversed_instructions) { - // we clone the original instruciton, then insert into builder - Instruction *cloned_instr = I->clone(); - // when adding, need to take caution about the users - original_to_clone_map[I] = cloned_instr; - for (unsigned int i = 0; i < I->getNumOperands(); i++) { - Value *operand = I->getOperand(i); - Instruction *operand_instr = dyn_cast(operand); - if (original_to_clone_map.find(operand_instr) != - original_to_clone_map.end()) { - Instruction *clone_instr = - original_to_clone_map[operand_instr]; - Value *clone_value = dyn_cast(clone_instr); - cloned_instr->setOperand(i, clone_value); - } else { - cloned_instr->setOperand(i, operand); - } - } - bb_instrs.push_back(cloned_instr); - for (auto &U : I->uses()) { - User *user = U.getUser(); - user->setOperand(U.getOperandNo(), cloned_instr); - } - } - // here we need to delete all original instructions, going forwards - // with no reversal as they are in reversed order - for (auto &I : all_instructions) { - I->eraseFromParent(); - } } } void rewrite_loads(Function &F) { AliasAnalysis *AA = &getAnalysis().getAAResults(); - std::map original_to_clone_map = {}; - std::vector all_instructions = {}; for (auto &B : F) { - std::vector instructions = {}; - - int head_pointer = - -1; // points to head location in all_instructions - Instruction *first_instr = NULL; - for (auto &I : B) { - first_instr = &I; - if (auto *load_op = dyn_cast(&I)) { - if (isa(load_op->getOperand(0))) { - if (head_pointer < 0) { - instructions.push_back(&I); - } else { - int current_counter = head_pointer; - while (current_counter > 0) { - Instruction *curr_instr = - instructions[current_counter]; - if (auto *op = dyn_cast(&I)) { - ++current_counter; - break; - } - // else if (auto *other_load_op = - // dyn_cast(curr_instr)) - // { - // if (AA->isNoAlias( - // other_load_op->getOperand(0), - // load_op->getOperand(0))) { - // --current_counter; - // } else { - // break; - // } - // } - else if (auto *store_op = - dyn_cast(curr_instr)) { - if (AA->isNoAlias(store_op->getOperand(1), - load_op->getOperand(0))) { - --current_counter; - } else { - break; - } - } else { - --current_counter; - } - } - // Do the insertion - assert(current_counter >= 0); - instructions.insert( - instructions.begin() + current_counter, &I); - } - } else { - instructions.push_back(&I); - } - } else { - instructions.push_back(&I); - } - ++head_pointer; - all_instructions.push_back(&I); - } - if (first_instr == NULL) { - assert(false); - } - IRBuilder<> builder(first_instr); - // we add the instructions at the end - builder.SetInsertPoint(&B); - // here we are going to add back our instructions - BasicBlock::InstListType &bb_instrs = B.getInstList(); - for (auto &I : instructions) { - // we clone the original instruciton, then insert into builder - Instruction *cloned_instr = I->clone(); - // when adding, need to take caution about the users - original_to_clone_map[I] = cloned_instr; - for (unsigned int i = 0; i < I->getNumOperands(); i++) { - Value *operand = I->getOperand(i); - Instruction *operand_instr = dyn_cast(operand); - if (operand_instr != NULL) { - if (original_to_clone_map.find(operand_instr) != - original_to_clone_map.end()) { - Instruction *clone_instr = - original_to_clone_map[operand_instr]; - Value *clone_value = dyn_cast(clone_instr); - cloned_instr->setOperand(i, clone_value); - } else { - cloned_instr->setOperand(i, operand); - } - } - } - bb_instrs.push_back(cloned_instr); - Instruction *instr = &(*I); - for (auto &U : instr->uses()) { - User *user = U.getUser(); - user->setOperand(U.getOperandNo(), cloned_instr); - } - } - } - // here we need to delete all original instructions, going - // forwards with no reversal as they are in reversed order - std::reverse(all_instructions.begin(), all_instructions.end()); - for (auto &I : all_instructions) { - I->eraseFromParent(); } } @@ -254,15 +46,14 @@ struct LoadStoreMovementPass : public FunctionPass { * In this pass, we walk backwards finding the first load from the * bottom, and push it up as far as we can. We continue upwards, * pushing loads upward. - * - * We gr */ - if (F.getName() == "main" || - (F.getName().size() > 7 && F.getName().substr(0, 7) == "no_opt_")) { + if (F.getName() == MAIN_FUNCTION_NAME || + (F.getName().size() > NO_OPT_PREFIX.size() && + F.getName().substr(0, NO_OPT_PREFIX.size()) == NO_OPT_PREFIX)) { return false; } - // rewrite_stores(F); - // rewrite_loads(F); + rewrite_stores(F); + rewrite_loads(F); return true; } diff --git a/src/dios-egraphs/Diospyros/NewLoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/NewLoadStoreMovement.cpp index d5d541c1..da11c170 100644 --- a/src/dios-egraphs/Diospyros/NewLoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/NewLoadStoreMovement.cpp @@ -1,3 +1,4 @@ + #include #include diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index f1c2a8a2..c208107a 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -27,7 +27,6 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; -using namespace std; int main(int argc, char **argv) { llvm::cl::ParseCommandLineOptions(argc, argv); @@ -49,16 +48,16 @@ extern "C" void optimize(LLVMModuleRef mod, LLVMContextRef context, std::size_t restricted_size, bool run_egg, bool print_opt); -const string ARRAY_NAME = "no-array-name"; -const string TEMP_NAME = "no-temp-name"; -const string SQRT64_FUNCTION_NAME = "llvm.sqrt.f64"; -const string SQRT32_FUNCTION_NAME = "llvm.sqrt.f32"; -const string MEMSET_PREFIX = "memset"; -const string LLVM_MEMSET_PREFIX = "llvm.memset"; -const string MEMMOVE_PREFIX = "memmove"; -const string MEMCOPY_PREFIX = "memcopy"; -const string MAIN_FUNCTION_NAME = "main"; -const string NO_OPT_PREFIX = "no_opt_"; +const std::string ARRAY_NAME = "no-array-name"; +const std::string TEMP_NAME = "no-temp-name"; +const std::string SQRT64_FUNCTION_NAME = "llvm.sqrt.f64"; +const std::string SQRT32_FUNCTION_NAME = "llvm.sqrt.f32"; +const std::string MEMSET_PREFIX = "memset"; +const std::string LLVM_MEMSET_PREFIX = "llvm.memset"; +const std::string MEMMOVE_PREFIX = "memmove"; +const std::string MEMCOPY_PREFIX = "memcopy"; +const std::string MAIN_FUNCTION_NAME = "main"; +const std::string NO_OPT_PREFIX = "no_opt_"; const int SQRT_OPERATOR = 3; const int BINARY_OPERATOR = 2; @@ -82,7 +81,7 @@ int gen_fresh_index() { */ const char *gen_fresh_array() { ++FRESH_ARRAY_COUNTER; - string array_str = ARRAY_NAME + to_string(FRESH_ARRAY_COUNTER); + std::string array_str = ARRAY_NAME + std::to_string(FRESH_ARRAY_COUNTER); char *cstr = new char[array_str.length() + 1]; std::strcpy(cstr, array_str.c_str()); return cstr; @@ -93,7 +92,7 @@ const char *gen_fresh_array() { */ const char *gen_fresh_temp() { ++FRESH_TEMP_COUNTER; - string temp_str = TEMP_NAME + to_string(FRESH_TEMP_COUNTER); + std::string temp_str = TEMP_NAME + std::to_string(FRESH_TEMP_COUNTER); char *cstr = new char[temp_str.length() + 1]; std::strcpy(cstr, temp_str.c_str()); return cstr; From 0dfb1c3b9edfa1aab64b8649e65f3fc5b32bd025 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 6 May 2022 13:06:20 -0400 Subject: [PATCH 094/143] add in final steps of store movement --- .../Diospyros/LoadStoreMovement.cpp | 58 +++++++++++++++++++ ...eMovement.cpp => OldLoadStoreMovement.cpp} | 0 2 files changed, 58 insertions(+) rename src/dios-egraphs/Diospyros/{NewLoadStoreMovement.cpp => OldLoadStoreMovement.cpp} (100%) diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index b69ab09a..3fc53f31 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -32,6 +32,64 @@ struct LoadStoreMovementPass : public FunctionPass { void rewrite_stores(Function &F) { AliasAnalysis *AA = &getAnalysis().getAAResults(); for (auto &B : F) { + + // Perform Pushing Back of Store Instructions + std::vector final_instrs_list = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + + // Place any non-Store Instructions at the end of the list of instructions + if (!isa(instr)) { + final_instrs_list.push_back(instr); + continue; + } + + // Handle Store Instructions + int insertion_offset = final_instrs_list.size(); + while (true) { + Instruction *store_instr = instr; + // If there is no prior instruction, push back at current offset, and stop. + if (insertion_offset - 1 < 0) { + final_instrs_list.insert(final_instrs_list.begin() + insertion_offset, store_instr); + break; + } + Instruction *prior_instr = final_instrs_list[insertion_offset - 1]; + // If the prior instruction is used in the store's + // arguments, do not push it back + int num_operands = store_instr->getNumOperands(); + bool break_while = false; + for (int i = 0; i < num_operands; i++) { + Value *store_operand = store_instr->getOperand(i); + Instruction *store_operand_instr = + dyn_cast(store_operand); + assert(store_operand_instr != NULL); + if (store_operand_instr == prior_instr) { + final_instrs_list.insert(final_instrs_list.begin() + insertion_offset, store_instr); + break_while = true; + break; + } + } + if (break_while) { + break; + } + // If the prior instruction alias with the store + // instruction, do not push it back + if (!AA->isNoAlias(store_instr, prior_instr)) { + final_instrs_list.insert(final_instrs_list.begin() + insertion_offset, store_instr); + break; + } + // Otherwise, keep pushing back the store instruction + --insertion_offset; + } + + // TODO + // First, insert clone all instructions, and insert them into the basic block at the very beginning + // Next, + // Then, + // Finally, delete all the original instructions in the basic block + + } } } diff --git a/src/dios-egraphs/Diospyros/NewLoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/OldLoadStoreMovement.cpp similarity index 100% rename from src/dios-egraphs/Diospyros/NewLoadStoreMovement.cpp rename to src/dios-egraphs/Diospyros/OldLoadStoreMovement.cpp From e96442c75f02a972f3c0b9adfd1ebb6fc291d965 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 6 May 2022 15:11:41 -0400 Subject: [PATCH 095/143] new version of veclang variant added --- .../Diospyros/LoadStoreMovement.cpp | 77 ++- src/dios-egraphs/src/cost.rs | 8 +- src/dios-egraphs/src/main.rs | 452 ++++++++++-------- src/dios-egraphs/src/veclang-original.rs | 59 +++ src/dios-egraphs/src/veclang.rs | 12 + 5 files changed, 404 insertions(+), 204 deletions(-) create mode 100644 src/dios-egraphs/src/veclang-original.rs diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index 3fc53f31..e275529f 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -34,27 +34,27 @@ struct LoadStoreMovementPass : public FunctionPass { for (auto &B : F) { // Perform Pushing Back of Store Instructions - std::vector final_instrs_list = {}; + std::vector final_instrs_vec = {}; for (auto &I : B) { Instruction *instr = dyn_cast(&I); assert(instr != NULL); // Place any non-Store Instructions at the end of the list of instructions if (!isa(instr)) { - final_instrs_list.push_back(instr); + final_instrs_vec.push_back(instr); continue; } // Handle Store Instructions - int insertion_offset = final_instrs_list.size(); + int insertion_offset = final_instrs_vec.size(); while (true) { Instruction *store_instr = instr; // If there is no prior instruction, push back at current offset, and stop. if (insertion_offset - 1 < 0) { - final_instrs_list.insert(final_instrs_list.begin() + insertion_offset, store_instr); + final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, store_instr); break; } - Instruction *prior_instr = final_instrs_list[insertion_offset - 1]; + Instruction *prior_instr = final_instrs_vec[insertion_offset - 1]; // If the prior instruction is used in the store's // arguments, do not push it back int num_operands = store_instr->getNumOperands(); @@ -65,7 +65,7 @@ struct LoadStoreMovementPass : public FunctionPass { dyn_cast(store_operand); assert(store_operand_instr != NULL); if (store_operand_instr == prior_instr) { - final_instrs_list.insert(final_instrs_list.begin() + insertion_offset, store_instr); + final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, store_instr); break_while = true; break; } @@ -76,7 +76,7 @@ struct LoadStoreMovementPass : public FunctionPass { // If the prior instruction alias with the store // instruction, do not push it back if (!AA->isNoAlias(store_instr, prior_instr)) { - final_instrs_list.insert(final_instrs_list.begin() + insertion_offset, store_instr); + final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, store_instr); break; } // Otherwise, keep pushing back the store instruction @@ -85,10 +85,67 @@ struct LoadStoreMovementPass : public FunctionPass { // TODO // First, insert clone all instructions, and insert them into the basic block at the very beginning - // Next, - // Then, + + // build ordered vector of cloned instructions + // build map from original vector to cloned vector + std::vector cloned_instrs = {}; + std::map original_to_clone_map = {}; + std::map clone_to_original_map = {}; + for (Instruction *instr : final_instrs_vec) { + Instruction *cloned_instr = instr->clone(); + cloned_instrs.push_back(cloned_instr); + original_to_clone_map[instr] = cloned_instr; + clone_to_original_map[cloned_instr] = instr; + } + + // Grab first instruction to build before at. + Instruction *first_instr = NULL; + for (auto &I : B) { + first_instr = dyn_cast(&I); + assert(first_instr != NULL); + break; + } + IRBuilder<> builder(first_instr); + + for (Instruction *cloned_instr : cloned_instrs) { + // The cloned instruction has arguments pointing backwards to prior original instructions + // Some of these prior instructions themselves will themselves be cloned. + // We need to replace the prior original instructions with clones instructions + int num_operands = cloned_instr->getNumOperands(); + for (int i = 0; i < num_operands; i++) { + Value *clone_operand = cloned_instr->getOperand(i); + Instruction *clone_operand_instr = dyn_cast(clone_operand); + if (clone_operand_instr != NULL) { + if (original_to_clone_map.count(clone_operand_instr) > 0) { + Instruction *replacement_operand = original_to_clone_map[clone_operand_instr]; + Value *replacement_value = dyn_cast(replacement_operand); + assert(replacement_value != NULL); + cloned_instr->setOperand(i, replacement_value); + } + } + } + + // Furthermore, we need to change all uses of the original instruction to be the new cloned instruction + Instruction *original_instr = clone_to_original_map[cloned_instr]; + for (auto &U : original_instr->uses()) { + User *user = U.getUser(); + user->setOperand(U.getOperandNo(), cloned_instr); + } + + // Finish by inserting cloned instruction + builder.Insert(cloned_instr); + } + // Finally, delete all the original instructions in the basic block - + // Do this in reverse order. + std::reverse(final_instrs_vec.begin(), final_instrs_vec.end()); + for (auto &I : final_instrs_vec) { + if (I != NULL) { + try { + I->eraseFromParent(); + } catch (...) {} + } + } } } } diff --git a/src/dios-egraphs/src/cost.rs b/src/dios-egraphs/src/cost.rs index f9f036d0..f11d5c1f 100644 --- a/src/dios-egraphs/src/cost.rs +++ b/src/dios-egraphs/src/cost.rs @@ -13,15 +13,21 @@ impl CostFunction for VecCostFn<'_> { fn cost(&mut self, enode: &VecLang, mut costs: C) -> Self::Cost where C: FnMut(Id) -> Self::Cost, - { + { + const NO_OPTIMIZATION: f64 = 0.0; const LITERAL: f64 = 0.001; const STRUCTURE: f64 = 0.1; const VEC_OP: f64 = 1.; const OP: f64 = 1.; const BIG: f64 = 100.0; let op_cost = match enode { + // No Optimization case for testing purposes + VecLang::NoOptVec(..) => NO_OPTIMIZATION, + // You get literals for extremely cheap VecLang::Num(..) => LITERAL, + VecLang::Reg(..) => LITERAL, + VecLang::Arg(..) => LITERAL, VecLang::Symbol(..) => LITERAL, VecLang::Get(..) => LITERAL, diff --git a/src/dios-egraphs/src/main.rs b/src/dios-egraphs/src/main.rs index b2d58b76..82bd089e 100644 --- a/src/dios-egraphs/src/main.rs +++ b/src/dios-egraphs/src/main.rs @@ -3,167 +3,233 @@ use clap::{App, Arg}; use dioslib::*; fn main() { - let matches = App::new("Diospyros Rewriter") - .arg( - Arg::with_name("INPUT") - .help("Sets the input file") - .required(true) - .index(1), - ) - .arg( - Arg::with_name("no-ac") - .long("no-ac") - .help("Disable associativity and commutativity rules"), - ) - .arg( - Arg::with_name("no-vec") - .long("no-vec") - .help("Disable vector rules"), - ) - .get_matches(); + let matches = App::new("Diospyros Rewriter") + .arg( + Arg::with_name("INPUT") + .help("Sets the input file") + .required(true) + .index(1), + ) + .arg( + Arg::with_name("no-ac") + .long("no-ac") + .help("Disable associativity and commutativity rules"), + ) + .arg( + Arg::with_name("no-vec") + .long("no-vec") + .help("Disable vector rules"), + ) + .get_matches(); - use std::{env, fs}; + use std::{env, fs}; - // Get a path string to parse a program. - let path = matches.value_of("INPUT").unwrap(); - let timeout = env::var("TIMEOUT") - .ok() - .and_then(|t| t.parse::().ok()) - .unwrap_or(180); - let prog_str = fs::read_to_string(path).expect("Failed to read the input file."); + // Get a path string to parse a program. + let path = matches.value_of("INPUT").unwrap(); + let timeout = env::var("TIMEOUT") + .ok() + .and_then(|t| t.parse::().ok()) + .unwrap_or(180); + let prog_str = fs::read_to_string(path).expect("Failed to read the input file."); - // AST conversion: boxed Rosette terms to Egg syntax - let converted: String = stringconversion::convert_string(&prog_str) - .expect("Failed to convert the input file to egg AST."); + // AST conversion: boxed Rosette terms to Egg syntax + let converted: String = stringconversion::convert_string(&prog_str) + .expect("Failed to convert the input file to egg AST."); - // Rewrite a list of expressions to a concatenation of vectors - let concats = rewriteconcats::list_to_concats(&converted); - let prog = concats.unwrap().parse().unwrap(); + // Rewrite a list of expressions to a concatenation of vectors + let concats = rewriteconcats::list_to_concats(&converted); + let prog = concats.unwrap().parse().unwrap(); - // Rules to disable flags - let no_ac = matches.is_present("no-ac"); - let no_vec = matches.is_present("no-vec"); + // Rules to disable flags + let no_ac = matches.is_present("no-ac"); + let no_vec = matches.is_present("no-vec"); - // Run rewriter - eprintln!( - "Running egg with timeout {:?}s, width: {:?}", - timeout, - config::vector_width() - ); - let (cost, best) = rules::run(&prog, timeout, no_ac, no_vec); + // Run rewriter + eprintln!( + "Running egg with timeout {:?}s, width: {:?}", + timeout, + config::vector_width() + ); + let (cost, best) = rules::run(&prog, timeout, no_ac, no_vec); - println!("{}", best.pretty(80)); /* Pretty print with width 80 */ - eprintln!("\nCost: {}", cost); + println!("{}", best.pretty(80)); /* Pretty print with width 80 */ + eprintln!("\nCost: {}", cost); } #[cfg(test)] mod tests { - use super::{rules::*, veclang::VecLang}; - use assert_approx_eq::assert_approx_eq; - use egg::*; + use super::{rules::*, veclang::VecLang}; + use assert_approx_eq::assert_approx_eq; + use egg::*; - fn run_egpraph_with_start(prog: &str, exp_best: &str, exp_best_cost: f64) { - // AST conversion: boxed Rosette terms to Egg syntax - let converted: String = super::stringconversion::convert_string(&prog.to_string()) - .expect("Failed to convert the input file to egg AST."); + fn run_egpraph_with_start(prog: &str, exp_best: &str, exp_best_cost: f64) { + // AST conversion: boxed Rosette terms to Egg syntax + let converted: String = super::stringconversion::convert_string(&prog.to_string()) + .expect("Failed to convert the input file to egg AST."); - // Rewrite a list of expressions to a concatenation of vectors - let concats = super::rewriteconcats::list_to_concats(&converted); - let start = concats.unwrap().parse().unwrap(); + // Rewrite a list of expressions to a concatenation of vectors + let concats = super::rewriteconcats::list_to_concats(&converted); + let start = concats.unwrap().parse().unwrap(); - // Run with AC off - let (best_cost, best) = run(&start, 60, true, false); + // Run with AC off + let (best_cost, best) = run(&start, 60, true, false); - println!( - "original:\n{}\nbest:\n{}\nbest cost {}", - start.pretty(80), - best.pretty(80), - best_cost, - ); - if best != exp_best.parse().unwrap() { - println!( - "Expected best not equal:{}", - exp_best.parse::>().unwrap().pretty(80) - ); - } - assert_approx_eq!(best_cost, exp_best_cost, 0.000001); + println!( + "original:\n{}\nbest:\n{}\nbest cost {}", + start.pretty(80), + best.pretty(80), + best_cost, + ); + if best != exp_best.parse().unwrap() { + println!( + "Expected best not equal:{}", + exp_best.parse::>().unwrap().pretty(80) + ); } + assert_approx_eq!(best_cost, exp_best_cost, 0.000001); + } + + #[test] + fn simple_register_parse() { + let start = "(Vec (+ a b) (+ c d) 0 0)"; + let exp_best = "(VecAdd (Vec a c 0 0) (Vec b d 0 0))"; + let exp_best_cost = 1.208; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn direct_recexpr() { - let expr = RecExpr::from( - [VecLang::Symbol(Symbol::from("a_in")), - VecLang::Symbol(Symbol::from("b_in")), + #[test] + fn simple_register() { + let expr = RecExpr::from( + [ + VecLang::Symbol(Symbol::from("x1")), + VecLang::Num(1), + VecLang::Symbol(Symbol::from("x2")), + VecLang::Num(2), + VecLang::Add([Id::from(1), Id::from(3)]), + VecLang::Symbol(Symbol::from("x3")), + VecLang::Reg(1), + VecLang::Symbol(Symbol::from("x4")), + VecLang::Num(4), + VecLang::Add([Id::from(6), Id::from(8)]), + VecLang::Symbol(Symbol::from("x5")), + VecLang::Num(5), + VecLang::Symbol(Symbol::from("x6")), + VecLang::Reg(2), + VecLang::Add([Id::from(11), Id::from(13)]), + VecLang::Symbol(Symbol::from("x7")), + VecLang::Reg(3), + VecLang::Symbol(Symbol::from("x8")), + VecLang::Reg(4), + VecLang::Add([Id::from(16), Id::from(18)]), + VecLang::Vec(Box::new([ + Id::from(4), + Id::from(9), + Id::from(14), + Id::from(19), + ])), + ] + .to_vec(), + ); + let (_, new_expr) = super::rules::run(&expr, 180, false, false); + println!("Test1!"); + println!("{:?}", expr); + println!("{:?}", new_expr); + } + + #[test] + fn direct_recexpr() { + let expr = RecExpr::from( + [ + VecLang::Symbol(Symbol::from("a_in")), + VecLang::Symbol(Symbol::from("b_in")), VecLang::Num(0), VecLang::Num(1), VecLang::Num(2), VecLang::Num(3), - VecLang::Get([Id::from(0), Id::from(2)]), - VecLang::Get([Id::from(1), Id::from(2)]), - VecLang::Add([Id::from(6), Id::from(7)]), + VecLang::Get([Id::from(0), Id::from(2)]), + VecLang::Get([Id::from(1), Id::from(2)]), + VecLang::Add([Id::from(6), Id::from(7)]), VecLang::Get([Id::from(0), Id::from(3)]), - VecLang::Get([Id::from(1), Id::from(3)]), + VecLang::Get([Id::from(1), Id::from(3)]), VecLang::Add([Id::from(9), Id::from(10)]), VecLang::Get([Id::from(0), Id::from(4)]), - VecLang::Get([Id::from(1), Id::from(4)]), - VecLang::Add([Id::from(12), Id::from(13)]), + VecLang::Get([Id::from(1), Id::from(4)]), + VecLang::Add([Id::from(12), Id::from(13)]), VecLang::Get([Id::from(0), Id::from(5)]), - VecLang::Get([Id::from(1), Id::from(5)]), - VecLang::Add([Id::from(15), Id::from(16)]), - VecLang::Vec(Box::new([Id::from(8), Id::from(11), Id::from(14), Id::from(17)]))].to_vec() - ); - let (cost, _) = super::rules::run(&expr, 180, false, false); - assert_approx_eq!(cost, 1.026, 0.000001); - } + VecLang::Get([Id::from(1), Id::from(5)]), + VecLang::Add([Id::from(15), Id::from(16)]), + VecLang::Vec(Box::new([ + Id::from(8), + Id::from(11), + Id::from(14), + Id::from(17), + ])), + ] + .to_vec(), + ); + let (cost, _) = super::rules::run(&expr, 180, false, false); + assert_approx_eq!(cost, 1.026, 0.000001); + } - #[test] - fn direct_recexpr_2() { - let expr = RecExpr::from( - [ - VecLang::Symbol(Symbol::from("scalar_in")), - VecLang::Num(0), - VecLang::Get([Id::from(0), Id::from(1)]), - VecLang::Vec(Box::new([Id::from(2), Id::from(1), Id::from(1), Id::from(1)])), - VecLang::Symbol(Symbol::from("a_in")), - VecLang::Num(4), - VecLang::Num(5), - VecLang::Num(6), - VecLang::Num(7), - VecLang::Get([Id::from(4), Id::from(5)]), - VecLang::Get([Id::from(4), Id::from(6)]), - VecLang::Get([Id::from(4), Id::from(7)]), - VecLang::Get([Id::from(4), Id::from(8)]), - VecLang::Mul([Id::from(9), Id::from(2)]), - VecLang::Mul([Id::from(10), Id::from(2)]), - VecLang::Mul([Id::from(11), Id::from(2)]), - VecLang::Mul([Id::from(12), Id::from(2)]), - VecLang::Vec(Box::new([Id::from(13), Id::from(14), Id::from(15), Id::from(16)])), - VecLang::Concat([Id::from(3), Id::from(17)]) - ].to_vec() - ); + #[test] + fn direct_recexpr_2() { + let expr = RecExpr::from( + [ + VecLang::Symbol(Symbol::from("scalar_in")), + VecLang::Num(0), + VecLang::Get([Id::from(0), Id::from(1)]), + VecLang::Vec(Box::new([ + Id::from(2), + Id::from(1), + Id::from(1), + Id::from(1), + ])), + VecLang::Symbol(Symbol::from("a_in")), + VecLang::Num(4), + VecLang::Num(5), + VecLang::Num(6), + VecLang::Num(7), + VecLang::Get([Id::from(4), Id::from(5)]), + VecLang::Get([Id::from(4), Id::from(6)]), + VecLang::Get([Id::from(4), Id::from(7)]), + VecLang::Get([Id::from(4), Id::from(8)]), + VecLang::Mul([Id::from(9), Id::from(2)]), + VecLang::Mul([Id::from(10), Id::from(2)]), + VecLang::Mul([Id::from(11), Id::from(2)]), + VecLang::Mul([Id::from(12), Id::from(2)]), + VecLang::Vec(Box::new([ + Id::from(13), + Id::from(14), + Id::from(15), + Id::from(16), + ])), + VecLang::Concat([Id::from(3), Id::from(17)]), + ] + .to_vec(), + ); - let (cost, _) = super::rules::run(&expr, 180, false, false); - assert_approx_eq!(cost, 1.133, 0.000001); - } + let (cost, _) = super::rules::run(&expr, 180, false, false); + assert_approx_eq!(cost, 1.133, 0.000001); + } - #[test] - fn simple_vector_add() { - let start = "(Vec (+ a b) (+ c d) 0 0)"; - let exp_best = "(VecAdd (Vec a c 0 0) (Vec b d 0 0))"; - let exp_best_cost = 1.208; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + #[test] + fn simple_vector_add() { + let start = "(Vec (+ a b) (+ c d) 0 0)"; + let exp_best = "(VecAdd (Vec a c 0 0) (Vec b d 0 0))"; + let exp_best_cost = 1.208; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn vector_pairwise_mac() { - let start = " + #[test] + fn vector_pairwise_mac() { + let start = " (Vec (+ (* a b) (+ (* c d) (* e f))) (+ (* aa bb) (+ (* cc dd) (* ee ff))) 0 0)"; - let exp_best = " + let exp_best = " (VecMAC (VecMAC (VecMul (Vec c aa 0 0) (Vec d bb 0 0)) @@ -171,13 +237,13 @@ mod tests { (Vec f ff 0 0)) (Vec a cc 0 0) (Vec b dd 0 0))"; - let exp_best_cost = 3.624; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 3.624; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn qr_decomp_snippet() { - let start = " + #[test] + fn qr_decomp_snippet() { + let start = " (Vec (* (neg (sgn (Get A 0))) @@ -198,7 +264,7 @@ mod tests { (* (Get A 0) (Get A 0)) (* (Get A 2) (Get A 2))))) (Get A 2))"; - let _best_with_ac = " + let _best_with_ac = " (VecMul (VecNeg (Vec @@ -213,21 +279,21 @@ mod tests { (Vec (Get A 0) (Get A 0) (Get A 0) 1)) (Vec (Get A 2) (Get A 2) (Get A 2) 1) (Vec (Get A 2) (Get A 2) (Get A 2) 1))))"; - let exp_best_cost = 121.048; + let exp_best_cost = 121.048; - // No rewrites found with AC off - run_egpraph_with_start(start, start, exp_best_cost); - } + // No rewrites found with AC off + run_egpraph_with_start(start, start, exp_best_cost); + } - #[test] - fn vector_variadic_add_mac() { - let start = " + #[test] + fn vector_variadic_add_mac() { + let start = " (Vec (+ (* a b) (* c d) (* e f)) (+ (* aa bb) (* cc dd) (* ee ff)) 0 0)"; - let exp_best = " + let exp_best = " (VecMAC (VecMAC (VecMul (Vec e ee 0 0) (Vec f ff 0 0)) @@ -235,81 +301,81 @@ mod tests { (Vec d dd 0 0)) (Vec a aa 0 0) (Vec b bb 0 0))"; - let exp_best_cost = 3.624; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 3.624; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn vector_mac() { - let start = " + #[test] + fn vector_mac() { + let start = " (Vec (+ ?a0 (* ?b0 ?c0)) (+ ?a1 (* ?b1 ?c1)) (+ ?a2 (* ?b2 ?c2)) (+ ?a3 (* ?b3 ?c3)))"; - let exp_best = " + let exp_best = " (VecMAC (Vec ?a0 ?a1 ?a2 ?a3) (Vec ?b0 ?b1 ?b2 ?b3) (Vec ?c0 ?c1 ?c2 ?c3))"; - let exp_best_cost = 1.312; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 1.312; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn vector_mac_just_mul_or_zero() { - let start = " + #[test] + fn vector_mac_just_mul_or_zero() { + let start = " (Vec (+ ?a0 (* ?b0 ?c0)) (* ?b1 ?c1) 0 (+ ?a3 (* ?b3 ?c3)))"; - let exp_best = " + let exp_best = " (VecMAC (Vec ?a0 0 0 ?a3) (Vec ?b0 ?b1 0 ?b3) (Vec ?c0 ?c1 0 ?c3))"; - let exp_best_cost = 1.312; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 1.312; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn vector_matrix_multiply_2x2_2x2() { - let start = " + #[test] + fn vector_matrix_multiply_2x2_2x2() { + let start = " (Vec (+ (* v0 v4) (* v1 v6)) (+ (* v0 v5) (* v1 v7)) (+ (* v2 v4) (* v3 v6)) (+ (* v2 v5) (* v3 v7)))"; - let exp_best = " + let exp_best = " (VecMAC (VecMul (Vec v1 v1 v6 v7) (Vec v6 v7 v3 v3)) (Vec v4 v5 v2 v2) (Vec v0 v0 v4 v5))"; - let exp_best_cost = 2.416; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 2.416; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn vector_matrix_multiply_2x2_2x2_explicit_get() { - let start = " + #[test] + fn vector_matrix_multiply_2x2_2x2_explicit_get() { + let start = " (Vec (+ (* (Get a 0) (Get b 0)) (* (Get a 1) (Get b 2))) (+ (* (Get a 0) (Get b 1)) (* (Get a 1) (Get b 3))) (+ (* (Get a 2) (Get b 0)) (* (Get a 3) (Get b 2))) (+ (* (Get a 2) (Get b 1)) (* (Get a 3) (Get b 3))))"; - let exp_best = " + let exp_best = " (VecMAC (VecMul (LitVec (Get a 0) (Get a 0) (Get a 2) (Get a 2)) (LitVec (Get b 0) (Get b 1) (Get b 0) (Get b 1))) (LitVec (Get a 1) (Get a 1) (Get a 3) (Get a 3)) (LitVec (Get b 2) (Get b 3) (Get b 2) (Get b 3)))"; - let exp_best_cost = 2.052; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 2.052; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn vector_matrix_multiply_2x3_3x3() { - let start = " + #[test] + fn vector_matrix_multiply_2x3_3x3() { + let start = " (List (+ (* (Get A 0) (Get B 0)) @@ -335,7 +401,7 @@ mod tests { (* (Get A 3) (Get B 2)) (* (Get A 4) (Get B 5)) (* (Get A 5) (Get B 8))))"; - let exp_best = " + let exp_best = " (Concat (VecMAC (VecMAC @@ -353,13 +419,13 @@ mod tests { (LitVec (Get B 7) (Get B 8) 0 0)) (LitVec (Get A 3) (Get A 3) 0 0) (LitVec (Get B 1) (Get B 2) 0 0)))"; - let exp_best_cost = 6.43; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 6.43; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn vector_2d_conv_2x2_2x2() { - let start = " + #[test] + fn vector_2d_conv_2x2_2x2() { + let start = " (List (* v0 v4) (+ (* v0 v5) (* v1 v4)) @@ -370,7 +436,7 @@ mod tests { (* v2 v6) (+ (* v2 v7) (* v3 v6)) (* v3 v7))"; - let exp_best = " + let exp_best = " (Concat (VecMAC (VecMul (Vec 1 v0 1 v0) (Vec 0 v5 0 v6)) @@ -387,13 +453,13 @@ mod tests { (Vec v0 v3 v2 v3) (Vec v7 v5 v6 v6)) (VecMul (Vec v3 0 0 0) (Vec v7 0 0 0))))"; - let exp_best_cost = 9.656; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 9.656; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn vector_2d_conv_3x3_3x3() { - let start = "(Concat + #[test] + fn vector_2d_conv_3x3_3x3() { + let start = "(Concat (Vec (* v0 v9) (+ (* v0 v10) (* v1 v9)) @@ -462,7 +528,7 @@ mod tests { (+ (* v7 v17) (* v8 v16))) (List (* v8 v17))))))))"; - let exp_best = " + let exp_best = " (Vec (VecMAC (VecMAC @@ -544,7 +610,7 @@ mod tests { (List (* v8 v17))))))) 0 0)"; - let exp_best_cost = 156.468; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 156.468; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } } diff --git a/src/dios-egraphs/src/veclang-original.rs b/src/dios-egraphs/src/veclang-original.rs new file mode 100644 index 00000000..ee056f17 --- /dev/null +++ b/src/dios-egraphs/src/veclang-original.rs @@ -0,0 +1,59 @@ +use egg::*; + +define_language! { + pub enum VecLang { + Num(i32), + + // Id is a key to identify EClasses within an EGraph, represents + // children nodes + "+" = Add([Id; 2]), + "*" = Mul([Id; 2]), + "-" = Minus([Id; 2]), + "/" = Div([Id; 2]), + + "or" = Or([Id; 2]), + "&&" = And([Id; 2]), + "ite" = Ite([Id; 3]), + "<" = Lt([Id; 2]), + + "sgn" = Sgn([Id; 1]), + "sqrt" = Sqrt([Id; 1]), + "neg" = Neg([Id; 1]), + + // Lists have a variable number of elements + "List" = List(Box<[Id]>), + + // Vectors have width elements + "Vec" = Vec(Box<[Id]>), + + // Vector with all literals + "LitVec" = LitVec(Box<[Id]>), + + "Get" = Get([Id; 2]), + + // Used for partitioning and recombining lists + "Concat" = Concat([Id; 2]), + + // Vector operations that take 2 vectors of inputs + "VecAdd" = VecAdd([Id; 2]), + "VecMinus" = VecMinus([Id; 2]), + "VecMul" = VecMul([Id; 2]), + "VecDiv" = VecDiv([Id; 2]), + // "VecMulSgn" = VecMulSgn([Id; 2]), + + // Vector operations that take 1 vector of inputs + "VecNeg" = VecNeg([Id; 1]), + "VecSqrt" = VecSqrt([Id; 1]), + "VecSgn" = VecSgn([Id; 1]), + + // MAC takes 3 lists: acc, v1, v2 + "VecMAC" = VecMAC([Id; 3]), + + // language items are parsed in order, and we want symbol to + // be a fallback, so we put it last. + // `Symbol` is an egg-provided interned string type + Symbol(egg::Symbol), + } +} + +pub type EGraph = egg::EGraph; diff --git a/src/dios-egraphs/src/veclang.rs b/src/dios-egraphs/src/veclang.rs index ee056f17..739adbce 100644 --- a/src/dios-egraphs/src/veclang.rs +++ b/src/dios-egraphs/src/veclang.rs @@ -4,6 +4,12 @@ define_language! { pub enum VecLang { Num(i32), + // Register points to other computation, denoted by a number + Reg(u32), + + // Argument points to a argument, denoted by a number + Arg(u32), + // Id is a key to identify EClasses within an EGraph, represents // children nodes "+" = Add([Id; 2]), @@ -26,6 +32,9 @@ define_language! { // Vectors have width elements "Vec" = Vec(Box<[Id]>), + // Vectors have width elements, not to be optimized (for testing purposes) + "NoOptVec" = NoOptVec(Box<[Id]>), + // Vector with all literals "LitVec" = LitVec(Box<[Id]>), @@ -49,6 +58,9 @@ define_language! { // MAC takes 3 lists: acc, v1, v2 "VecMAC" = VecMAC([Id; 3]), + // Info specific to register + // RegInfo(egg::Symbol), + // language items are parsed in order, and we want symbol to // be a fallback, so we put it last. // `Symbol` is an egg-provided interned string type From ab98a31a50e24058b948a1390a135662d6ac91ce Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 6 May 2022 16:40:15 -0400 Subject: [PATCH 096/143] delete old instructions works now --- .../Diospyros/LoadStoreMovement.cpp | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index e275529f..fc4b0d0d 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -33,6 +33,14 @@ struct LoadStoreMovementPass : public FunctionPass { AliasAnalysis *AA = &getAnalysis().getAAResults(); for (auto &B : F) { + // Grab all instructions + std::vector all_instrs = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + all_instrs.push_back(instr); + } + // Perform Pushing Back of Store Instructions std::vector final_instrs_vec = {}; for (auto &I : B) { @@ -135,16 +143,14 @@ struct LoadStoreMovementPass : public FunctionPass { // Finish by inserting cloned instruction builder.Insert(cloned_instr); } - - // Finally, delete all the original instructions in the basic block - // Do this in reverse order. - std::reverse(final_instrs_vec.begin(), final_instrs_vec.end()); - for (auto &I : final_instrs_vec) { - if (I != NULL) { - try { - I->eraseFromParent(); - } catch (...) {} - } + } + + // Finally, delete all the original instructions in the basic block + // Do this in reverse order. + std::reverse(all_instrs.begin(), all_instrs.end()); + for (Instruction *instr : all_instrs) { + if (!instr->isTerminator()) { + instr->eraseFromParent(); } } } From 991aea94d956c207634c923de538bec072598e41 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sun, 8 May 2022 02:52:52 -0400 Subject: [PATCH 097/143] move fail tests to c-tests Tests that passed are now moved into c-tests --- .../Diospyros/LoadStoreMovement.cpp | 52 ++-- src/dios-egraphs/Diospyros/c-tests/fft.c | 165 +++++++++++ .../{fail-tests => c-tests}/local-array-1.c | 0 .../{fail-tests => c-tests}/local-array-2.c | 0 .../{fail-tests => c-tests}/local-array-3.c | 0 .../{fail-tests => c-tests}/local-array-4.c | 0 .../Diospyros/c-tests/qr-decomp-fixed-size.c | 267 +++++++++++++++++ .../qr-decomp-local-arrays.c | 8 +- .../Diospyros/c-tests/qr-decomp.c | 268 ++++++++++++++++++ .../test1-local-array.c | 6 +- .../test2-local-array.c | 8 +- .../test3-local-array.c | 8 +- .../Diospyros/fail-tests/fft-FAIL.c | 78 ----- .../Diospyros/fail-tests/qr-decomp-FAIL.c | 130 --------- .../fail-tests/qr-decomp-fixed-size-FAIL.c | 162 ----------- 15 files changed, 743 insertions(+), 409 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/c-tests/fft.c rename src/dios-egraphs/Diospyros/{fail-tests => c-tests}/local-array-1.c (100%) rename src/dios-egraphs/Diospyros/{fail-tests => c-tests}/local-array-2.c (100%) rename src/dios-egraphs/Diospyros/{fail-tests => c-tests}/local-array-3.c (100%) rename src/dios-egraphs/Diospyros/{fail-tests => c-tests}/local-array-4.c (100%) create mode 100644 src/dios-egraphs/Diospyros/c-tests/qr-decomp-fixed-size.c rename src/dios-egraphs/Diospyros/{fail-tests => c-tests}/qr-decomp-local-arrays.c (98%) create mode 100644 src/dios-egraphs/Diospyros/c-tests/qr-decomp.c rename src/dios-egraphs/Diospyros/{fail-tests => c-tests}/test1-local-array.c (93%) rename src/dios-egraphs/Diospyros/{fail-tests => c-tests}/test2-local-array.c (97%) rename src/dios-egraphs/Diospyros/{fail-tests => c-tests}/test3-local-array.c (98%) delete mode 100644 src/dios-egraphs/Diospyros/fail-tests/fft-FAIL.c delete mode 100644 src/dios-egraphs/Diospyros/fail-tests/qr-decomp-FAIL.c delete mode 100644 src/dios-egraphs/Diospyros/fail-tests/qr-decomp-fixed-size-FAIL.c diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index fc4b0d0d..fccd0071 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -29,7 +29,10 @@ struct LoadStoreMovementPass : public FunctionPass { AU.addRequired(); } - void rewrite_stores(Function &F) { + /** + * + */ + void rewrite_loads(Function &F) { AliasAnalysis *AA = &getAnalysis().getAAResults(); for (auto &B : F) { @@ -41,53 +44,54 @@ struct LoadStoreMovementPass : public FunctionPass { all_instrs.push_back(instr); } - // Perform Pushing Back of Store Instructions + // Perform Pushing Back of Load Instructions std::vector final_instrs_vec = {}; for (auto &I : B) { Instruction *instr = dyn_cast(&I); assert(instr != NULL); - // Place any non-Store Instructions at the end of the list of instructions - if (!isa(instr)) { + // Place any non-Load Instructions at the end of the list of instructions + if (!isa(instr)) { final_instrs_vec.push_back(instr); continue; } - // Handle Store Instructions + // Handle Load Instructions int insertion_offset = final_instrs_vec.size(); while (true) { - Instruction *store_instr = instr; + Instruction *load_instr = instr; // If there is no prior instruction, push back at current offset, and stop. if (insertion_offset - 1 < 0) { - final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, store_instr); + final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, load_instr); break; } Instruction *prior_instr = final_instrs_vec[insertion_offset - 1]; - // If the prior instruction is used in the store's + // If the prior instruction is used in the load's // arguments, do not push it back - int num_operands = store_instr->getNumOperands(); + int num_operands = load_instr->getNumOperands(); bool break_while = false; for (int i = 0; i < num_operands; i++) { - Value *store_operand = store_instr->getOperand(i); - Instruction *store_operand_instr = - dyn_cast(store_operand); - assert(store_operand_instr != NULL); - if (store_operand_instr == prior_instr) { - final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, store_instr); - break_while = true; - break; + Value *load_operand = load_instr->getOperand(i); + Instruction *load_operand_instr = + dyn_cast(load_operand); + if (load_operand_instr != NULL) { + if (load_operand_instr == prior_instr) { + final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, load_instr); + break_while = true; + break; + } } } if (break_while) { break; } - // If the prior instruction alias with the store + // If the prior instruction alias with the load // instruction, do not push it back - if (!AA->isNoAlias(store_instr, prior_instr)) { - final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, store_instr); + if (!AA->isNoAlias(load_instr, prior_instr)) { + final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, load_instr); break; } - // Otherwise, keep pushing back the store instruction + // Otherwise, keep pushing back the load instruction --insertion_offset; } @@ -156,7 +160,7 @@ struct LoadStoreMovementPass : public FunctionPass { } } - void rewrite_loads(Function &F) { + void rewrite_stores(Function &F) { AliasAnalysis *AA = &getAnalysis().getAAResults(); for (auto &B : F) { } @@ -173,8 +177,8 @@ struct LoadStoreMovementPass : public FunctionPass { F.getName().substr(0, NO_OPT_PREFIX.size()) == NO_OPT_PREFIX)) { return false; } - rewrite_stores(F); - rewrite_loads(F); + // rewrite_loads(F); + // rewrite_loads(F); return true; } diff --git a/src/dios-egraphs/Diospyros/c-tests/fft.c b/src/dios-egraphs/Diospyros/c-tests/fft.c new file mode 100644 index 00000000..8ab52066 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/fft.c @@ -0,0 +1,165 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 8 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / 2], + float img_twid_in[SIZE / 2], float real_out[SIZE], + float img_out[SIZE]) { + int even = 0; + int odd = 0; + int log = 0; + int rootindex = 0; + int span = SIZE >> 1; + float temp = 0; + + for (int i = 0; i < SIZE; i++) { + real_out[i] = real_in[i]; + img_out[i] = img_in[i]; + } + + while (span != 0) { + odd = span; + while (odd < SIZE) { + odd = odd | span; + even = odd ^ span; + + temp = real_out[even] + real_out[odd]; + real_out[odd] = real_out[even] - real_out[odd]; + real_out[even] = temp; + + temp = img_out[even] + img_out[odd]; + img_out[odd] = img_out[even] - img_out[odd]; + img_out[even] = temp; + + rootindex = (even << log) & (SIZE - 1); + if (rootindex > 0) { + temp = real_twid_in[rootindex] * real_out[odd] - + img_twid_in[rootindex] * img_out[odd]; + img_out[odd] = real_twid_in[rootindex] * img_out[odd] + + img_twid_in[rootindex] * real_out[odd]; + real_out[odd] = temp; + } + odd += 1; + } + span >>= 1; + log += 1; + } +} + +void no_opt_fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / 2], + float img_twid_in[SIZE / 2], float real_out[SIZE], + float img_out[SIZE]) { + int even = 0; + int odd = 0; + int log = 0; + int rootindex = 0; + int span = SIZE >> 1; + float temp = 0; + + for (int i = 0; i < SIZE; i++) { + real_out[i] = real_in[i]; + img_out[i] = img_in[i]; + } + + while (span != 0) { + odd = span; + while (odd < SIZE) { + odd = odd | span; + even = odd ^ span; + + temp = real_out[even] + real_out[odd]; + real_out[odd] = real_out[even] - real_out[odd]; + real_out[even] = temp; + + temp = img_out[even] + img_out[odd]; + img_out[odd] = img_out[even] - img_out[odd]; + img_out[even] = temp; + + rootindex = (even << log) & (SIZE - 1); + if (rootindex > 0) { + temp = real_twid_in[rootindex] * real_out[odd] - + img_twid_in[rootindex] * img_out[odd]; + img_out[odd] = real_twid_in[rootindex] * img_out[odd] + + img_twid_in[rootindex] * real_out[odd]; + real_out[odd] = temp; + } + odd += 1; + } + span >>= 1; + log += 1; + } +} + + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float real_in[SIZE] = {0.0f}; + float img_in[SIZE] = {0.0f}; + float real_twid_in[SIZE / 2] = {0.0f}; + float img_twid_in[SIZE / 2] = {0.0f}; + float real_out[SIZE] = {0.0f}; + float img_out[SIZE] = {0.0f}; + + float expected_real_in[SIZE] = {0.0f}; + float expected_img_in[SIZE] = {0.0f}; + float expected_real_twid_in[SIZE / 2] = {0.0f}; + float expected_img_twid_in[SIZE / 2] = {0.0f}; + float expected_real_out[SIZE] = {0.0f}; + float expected_img_out[SIZE] = {0.0f}; + + for (int i = 0; i < SIZE; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + real_in[i] = n; + expected_real_in[i] = n; + } + for (int i = 0; i < SIZE; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + img_in[i] = n; + expected_img_in[i] = n; + } + for (int i = 0; i < SIZE / 2; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + real_twid_in[i] = n; + expected_real_twid_in[i] = n; + } + for (int i = 0; i < SIZE / 2; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + img_twid_in[i] = n; + expected_img_twid_in[i] = n; + } + for (int i = 0; i < SIZE; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + real_out[i] = n; + expected_real_out[i] = n; + } + for (int i = 0; i < SIZE; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + img_out[i] = n; + expected_img_out[i] = n; + } + + fft(real_in, img_in, real_twid_in, img_twid_in, real_out, img_out); + no_opt_fft(expected_real_in, expected_img_in, expected_real_twid_in, expected_img_twid_in, expected_real_out, expected_img_out); + + for (int i = 0; i < SIZE; i++) { + printf("Real Out Output: %f\n", real_out[i]); + printf("Expected Real Out Output: %f\n", expected_real_out[i]); + assert(fabs(real_out[i] - expected_real_out[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Img Out Output: %f\n", img_out[i]); + printf("Expected Img Out Output: %f\n", expected_img_out[i]); + assert(fabs(img_out[i] - expected_img_out[i]) < DELTA); + } +} diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array-1.c b/src/dios-egraphs/Diospyros/c-tests/local-array-1.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/local-array-1.c rename to src/dios-egraphs/Diospyros/c-tests/local-array-1.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array-2.c b/src/dios-egraphs/Diospyros/c-tests/local-array-2.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/local-array-2.c rename to src/dios-egraphs/Diospyros/c-tests/local-array-2.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array-3.c b/src/dios-egraphs/Diospyros/c-tests/local-array-3.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/local-array-3.c rename to src/dios-egraphs/Diospyros/c-tests/local-array-3.c diff --git a/src/dios-egraphs/Diospyros/fail-tests/local-array-4.c b/src/dios-egraphs/Diospyros/c-tests/local-array-4.c similarity index 100% rename from src/dios-egraphs/Diospyros/fail-tests/local-array-4.c rename to src/dios-egraphs/Diospyros/c-tests/local-array-4.c diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-fixed-size.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-fixed-size.c new file mode 100644 index 00000000..69c40b58 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-fixed-size.c @@ -0,0 +1,267 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 4 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float *a) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float *a) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float *a, float *b, float *c) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0.0f; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float *a, float *b, float *c) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0.0f; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float *A, float *Q, float *R) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float *A, float *Q, float *R) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float expectedQ[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + + naive_fixed_qr_decomp(A, Q, R); + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-local-arrays.c similarity index 98% rename from src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c rename to src/dios-egraphs/Diospyros/c-tests/qr-decomp-local-arrays.c index 2678fc1c..430d8af1 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-local-arrays.c +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-local-arrays.c @@ -278,8 +278,8 @@ void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], } int main(void) { - time_t t = time(NULL); - srand((unsigned)time(&t)); + // time_t t = time(NULL); + // srand((unsigned)time(&t)); float A[SIZE * SIZE] = {0.0f}; for (int i = 0; i < SIZE * SIZE; i++) { @@ -298,14 +298,14 @@ int main(void) { for (int j = 0; j < SIZE; j++) { printf("Q Output: %f\n", Q[i * SIZE + j]); printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); - // assert(fabs(expectedQ[i] - Q[i]) < DELTA); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); } } for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { printf("R Output: %f\n", R[i * SIZE + j]); printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); - // assert(fabs(expectedR[i] - R[i]) < DELTA); + assert(fabs(expectedR[i] - R[i]) < DELTA); } } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp.c new file mode 100644 index 00000000..16d181f0 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp.c @@ -0,0 +1,268 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_transpose(float *a, int n) __attribute__((always_inline)); +void naive_matrix_multiply(float *a, float *b, float *c, int row1, int col1, + int col2) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +// Naive implementation +void naive_transpose(float *a, int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} + +void no_opt_naive_transpose(float *a, int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void naive_matrix_multiply(float *a, float *b, float *c, int row1, int col1, + int col2) { + for (int y = 0; y < row1; y++) { + for (int x = 0; x < col2; x++) { + c[col2 * y + x] = 0.0f; + for (int k = 0; k < col1; k++) { + c[col2 * y + x] += a[col1 * y + k] * b[col2 * k + x]; + } + } + } +} + +void no_opt_naive_matrix_multiply(float *a, float *b, float *c, int row1, int col1, + int col2) { + for (int y = 0; y < row1; y++) { + for (int x = 0; x < col2; x++) { + c[col2 * y + x] = 0.0f; + for (int k = 0; k < col1; k++) { + c[col2 * y + x] += a[col1 * y + k] * b[col2 * k + x]; + } + } + } +} + +void naive_qr_decomp(float *A, float *Q, float *R, int n) { + memcpy(R, A, sizeof(float) * n * n); + + // Build identity matrix of size n * n + float *I = (float *)calloc(sizeof(float), n * n); + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + I[i * n + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < n - 1; k++) { + int m = n - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * n + k]; + e[i] = I[row * n + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / norm_u; + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), n * n); + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * n + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * n * n); // Q = q_t + naive_matrix_multiply(q_t, A, R, n, n, n); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), n * n); + naive_matrix_multiply(q_t, Q, res, n, n, n); // R = q_t * A + memcpy(Q, res, sizeof(float) * n * n); + naive_matrix_multiply(q_t, R, res, n, n, n); // R = q_t * A + memcpy(R, res, sizeof(float) * n * n); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_transpose(Q, n); +} + +void no_opt_naive_qr_decomp(float *A, float *Q, float *R, int n) { + memcpy(R, A, sizeof(float) * n * n); + + // Build identity matrix of size n * n + float *I = (float *)calloc(sizeof(float), n * n); + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + I[i * n + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < n - 1; k++) { + int m = n - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * n + k]; + e[i] = I[row * n + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / norm_u; + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), n * n); + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * n + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * n * n); // Q = q_t + no_opt_naive_matrix_multiply(q_t, A, R, n, n, n); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), n * n); + no_opt_naive_matrix_multiply(q_t, Q, res, n, n, n); // R = q_t * A + memcpy(Q, res, sizeof(float) * n * n); + no_opt_naive_matrix_multiply(q_t, R, res, n, n, n); // R = q_t * A + memcpy(R, res, sizeof(float) * n * n); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_transpose(Q, n); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + printf("%f\n", A[i]); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + naive_qr_decomp(A, Q, R, SIZE); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_qr_decomp(A, expectedQ, expectedR, SIZE); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test1-local-array.c b/src/dios-egraphs/Diospyros/c-tests/test1-local-array.c similarity index 93% rename from src/dios-egraphs/Diospyros/fail-tests/test1-local-array.c rename to src/dios-egraphs/Diospyros/c-tests/test1-local-array.c index 6423eebc..cb9ba117 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test1-local-array.c +++ b/src/dios-egraphs/Diospyros/c-tests/test1-local-array.c @@ -96,8 +96,8 @@ int main(void) { printf("Expected X Output: %f\n", expectedx[i]); printf("E Output: %f\n", e[i]); printf("Expected E Output: %f\n", expectede[i]); - // assert(fabs(expectedA[i] - A[i]) < DELTA); - // assert(fabs(expectedx[i] - x[i]) < DELTA); - // assert(fabs(expectede[i] - e[i]) < DELTA); + assert(fabs(expectedA[i] - A[i]) < DELTA); + assert(fabs(expectedx[i] - x[i]) < DELTA); + assert(fabs(expectede[i] - e[i]) < DELTA); } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test2-local-array.c b/src/dios-egraphs/Diospyros/c-tests/test2-local-array.c similarity index 97% rename from src/dios-egraphs/Diospyros/fail-tests/test2-local-array.c rename to src/dios-egraphs/Diospyros/c-tests/test2-local-array.c index 4339b38b..d39765f3 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test2-local-array.c +++ b/src/dios-egraphs/Diospyros/c-tests/test2-local-array.c @@ -243,8 +243,8 @@ void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], } int main(void) { - time_t t = time(NULL); - srand((unsigned)time(&t)); + // time_t t = time(NULL); + // srand((unsigned)time(&t)); float A[SIZE * SIZE] = {0.0f}; for (int i = 0; i < SIZE * SIZE; i++) { @@ -263,14 +263,14 @@ int main(void) { for (int j = 0; j < SIZE; j++) { printf("Q Output: %f\n", Q[i * SIZE + j]); printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); - // assert(fabs(expectedQ[i] - Q[i]) < DELTA); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); } } for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { printf("R Output: %f\n", R[i * SIZE + j]); printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); - // assert(fabs(expectedR[i] - R[i]) < DELTA); + assert(fabs(expectedR[i] - R[i]) < DELTA); } } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/test3-local-array.c b/src/dios-egraphs/Diospyros/c-tests/test3-local-array.c similarity index 98% rename from src/dios-egraphs/Diospyros/fail-tests/test3-local-array.c rename to src/dios-egraphs/Diospyros/c-tests/test3-local-array.c index 7adc9634..7495df9d 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/test3-local-array.c +++ b/src/dios-egraphs/Diospyros/c-tests/test3-local-array.c @@ -281,8 +281,8 @@ void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], } int main(void) { - time_t t = time(NULL); - srand((unsigned)time(&t)); + // time_t t = time(NULL); + // srand((unsigned)time(&t)); float A[SIZE * SIZE] = {0.0f}; for (int i = 0; i < SIZE * SIZE; i++) { @@ -301,14 +301,14 @@ int main(void) { for (int j = 0; j < SIZE; j++) { printf("Q Output: %f\n", Q[i * SIZE + j]); printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); - // assert(fabs(expectedQ[i] - Q[i]) < DELTA); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); } } for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { printf("R Output: %f\n", R[i * SIZE + j]); printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); - // assert(fabs(expectedR[i] - R[i]) < DELTA); + assert(fabs(expectedR[i] - R[i]) < DELTA); } } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/fft-FAIL.c b/src/dios-egraphs/Diospyros/fail-tests/fft-FAIL.c deleted file mode 100644 index 90c1babd..00000000 --- a/src/dios-egraphs/Diospyros/fail-tests/fft-FAIL.c +++ /dev/null @@ -1,78 +0,0 @@ -#include -#define SIZE 8 - -void fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / 2], - float img_twid_in[SIZE / 2], float real_out[SIZE], - float img_out[SIZE]) { - int even = 0; - int odd = 0; - int log = 0; - int rootindex = 0; - int span = SIZE >> 1; - float temp = 0; - - for (int i = 0; i < SIZE; i++) { - real_out[i] = real_in[i]; - img_out[i] = img_in[i]; - } - - while (span != 0) { - odd = span; - while (odd < SIZE) { - odd = odd | span; - even = odd ^ span; - - temp = real_out[even] + real_out[odd]; - real_out[odd] = real_out[even] - real_out[odd]; - real_out[even] = temp; - - temp = img_out[even] + img_out[odd]; - img_out[odd] = img_out[even] - img_out[odd]; - img_out[even] = temp; - - rootindex = (even << log) & (SIZE - 1); - if (rootindex > 0) { - temp = real_twid_in[rootindex] * real_out[odd] - - img_twid_in[rootindex] * img_out[odd]; - img_out[odd] = real_twid_in[rootindex] * img_out[odd] + - img_twid_in[rootindex] * real_out[odd]; - real_out[odd] = temp; - } - odd += 1; - } - span >>= 1; - log += 1; - } -} - -int main(void) { - float real_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; - float img_in[SIZE] = {0, 1, 2, 3, 4, 5, 6, 7}; - float real_twid_in[SIZE / 2] = {4, 3, 2, 1}; - float img_twid_in[SIZE / 2] = {8, 7, 6, 5}; - float real_out[SIZE] = {1, 1, 1, 1, 1, 1, 1, 1}; - float img_out[SIZE] = {2, 3, 4, 5, 6, 7, 8, 9}; - fft(real_in, img_in, real_twid_in, img_twid_in, real_out, img_out); - for (int i = 0; i < SIZE; i++) { - printf("%f\n", real_out[i]); - } - for (int i = 0; i < SIZE; i++) { - printf("%f\n", img_out[i]); - } - // 36.000000 - // -4.000000 - // 12.000000 - // -20.000000 - // 44.000000 - // -20.000000 - // 76.000000 - // -116.000000 - // 28.000000 - // -4.000000 - // -36.000000 - // 28.000000 - // -100.000000 - // 28.000000 - // -4.000000 - // 60.000000 -} diff --git a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-FAIL.c b/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-FAIL.c deleted file mode 100644 index c19c7117..00000000 --- a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-FAIL.c +++ /dev/null @@ -1,130 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define SIZE 4 - -float sgn(float v) __attribute__((always_inline)); -float naive_norm(float *x, int m) __attribute__((always_inline)); -void naive_transpose(float *a, int n) __attribute__((always_inline)); -void naive_matrix_multiply(float *a, float *b, float *c, int row1, int col1, - int col2) __attribute__((always_inline)); - -float sgn(float v) { return (v > 0) - (v < 0); } - -// Naive implementation -void naive_transpose(float *a, int n) { - for (int i = 0; i < n; i++) { - for (int j = i + 1; j < n; j++) { - float tmp = a[i * n + j]; - a[i * n + j] = a[j * n + i]; - a[j * n + i] = tmp; - } - } -} - -float naive_norm(float *x, int m) { - float sum = 0; - for (int i = 0; i < m; i++) { - sum += pow(x[i], 2); - } - return sqrt(sum); -} - -void naive_matrix_multiply(float *a, float *b, float *c, int row1, int col1, - int col2) { - for (int y = 0; y < row1; y++) { - for (int x = 0; x < col2; x++) { - c[col2 * y + x] = 0; - for (int k = 0; k < col1; k++) { - c[col2 * y + x] += a[col1 * y + k] * b[col2 * k + x]; - } - } - } -} - -void naive_qr_decomp(float *A, float *Q, float *R, int n) { - memcpy(R, A, sizeof(float) * n * n); - - // Build identity matrix of size n * n - float *I = (float *)calloc(sizeof(float), n * n); - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - I[i * n + j] = (i == j); - } - } - - // Householder - for (int k = 0; k < n - 1; k++) { - int m = n - k; - - float *x = (float *)calloc(sizeof(float), m); - float *e = (float *)calloc(sizeof(float), m); - for (int i = 0; i < m; i++) { - int row = k + i; - x[i] = R[row * n + k]; - e[i] = I[row * n + k]; - } - - float alpha = -sgn(x[0]) * naive_norm(x, m); - - float *u = (float *)calloc(sizeof(float), m); - float *v = (float *)calloc(sizeof(float), m); - for (int i = 0; i < m; i++) { - u[i] = x[i] + alpha * e[i]; - } - float norm_u = naive_norm(u, m); - for (int i = 0; i < m; i++) { - v[i] = u[i] / norm_u; - } - - float *q_min = (float *)calloc(sizeof(float), m * m); - for (int i = 0; i < m; i++) { - for (int j = 0; j < m; j++) { - float q_min_i = ((i == j) ? 1.0 : 0.0) - 2 * v[i] * v[j]; - q_min[i * m + j] = q_min_i; - } - } - - float *q_t = (float *)calloc(sizeof(float), n * n); - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - float q_t_i; - if ((i < k) || (j < k)) { - q_t_i = (i == j) ? 1.0 : 0.0; - } else { - q_t_i = q_min[(i - k) * m + (j - k)]; - } - q_t[i * n + j] = q_t_i; - } - } - - if (k == 0) { - memcpy(Q, q_t, sizeof(float) * n * n); // Q = q_t - naive_matrix_multiply(q_t, A, R, n, n, n); // R = q_t * A - } else { - float *res = (float *)calloc(sizeof(float), n * n); - naive_matrix_multiply(q_t, Q, res, n, n, n); // R = q_t * A - memcpy(Q, res, sizeof(float) * n * n); - naive_matrix_multiply(q_t, R, res, n, n, n); // R = q_t * A - memcpy(R, res, sizeof(float) * n * n); - } - free(x); - free(e); - free(u); - free(v); - free(q_min); - free(q_t); - } - naive_transpose(Q, n); -} - -int main(void) { - float A[SIZE * SIZE] = {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4}; - float Q[SIZE * SIZE] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - float R[SIZE * SIZE] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - naive_qr_decomp(A, Q, R, SIZE); -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-fixed-size-FAIL.c b/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-fixed-size-FAIL.c deleted file mode 100644 index 6b8e4b72..00000000 --- a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-fixed-size-FAIL.c +++ /dev/null @@ -1,162 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#define SIZE 4 - -float sgn(float v) __attribute__((always_inline)); -float naive_norm(float *x, int m) __attribute__((always_inline)); -void naive_fixed_transpose(float *a) __attribute__((always_inline)); -void naive_fixed_matrix_multiply(float *a, float *b, float *c) - __attribute__((always_inline)); - -float sgn(float v) { return (v > 0) - (v < 0); } - -float naive_norm(float *x, int m) { - float sum = 0; - for (int i = 0; i < m; i++) { - sum += x[i] * x[i]; - } - return sqrtf(sum); -} - -// Naive with fixed size -void naive_fixed_transpose(float *a) { - for (int i = 0; i < SIZE; i++) { - for (int j = i + 1; j < SIZE; j++) { - float tmp = a[i * SIZE + j]; - a[i * SIZE + j] = a[j * SIZE + i]; - a[j * SIZE + i] = tmp; - } - } -} - -void naive_fixed_matrix_multiply(float *a, float *b, float *c) { - for (int y = 0; y < SIZE; y++) { - for (int x = 0; x < SIZE; x++) { - c[SIZE * y + x] = 0; - for (int k = 0; k < SIZE; k++) { - c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; - } - } - } -} - -void naive_fixed_qr_decomp(float *A, float *Q, float *R) { - memcpy(R, A, sizeof(float) * SIZE * SIZE); - - // Build identity matrix of size SIZE * SIZE - float *I = (float *)calloc(sizeof(float), SIZE * SIZE); - for (int i = 0; i < SIZE; i++) { - for (int j = 0; j < SIZE; j++) { - I[i * SIZE + j] = (i == j); - } - } - - // Householder - for (int k = 0; k < SIZE - 1; k++) { - int m = SIZE - k; - - float *x = (float *)calloc(sizeof(float), m); - float *e = (float *)calloc(sizeof(float), m); - for (int i = 0; i < m; i++) { - int row = k + i; - x[i] = R[row * SIZE + k]; - e[i] = I[row * SIZE + k]; - } - - float alpha = -sgn(x[0]) * naive_norm(x, m); - - float *u = (float *)calloc(sizeof(float), m); - float *v = (float *)calloc(sizeof(float), m); - for (int i = 0; i < m; i++) { - u[i] = x[i] + alpha * e[i]; - } - float norm_u = naive_norm(u, m); - for (int i = 0; i < m; i++) { - v[i] = u[i] / (norm_u + 0.00001f); - } - - float *q_min = (float *)calloc(sizeof(float), m * m); - for (int i = 0; i < m; i++) { - for (int j = 0; j < m; j++) { - float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; - q_min[i * m + j] = q_min_i; - } - } - - float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); - for (int i = 0; i < SIZE; i++) { - for (int j = 0; j < SIZE; j++) { - float q_t_i; - if ((i < k) || (j < k)) { - q_t_i = (i == j) ? 1.0f : 0.0f; - } else { - q_t_i = q_min[(i - k) * m + (j - k)]; - } - q_t[i * SIZE + j] = q_t_i; - } - } - - if (k == 0) { - memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t - naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A - } else { - float *res = (float *)calloc(sizeof(float), SIZE * SIZE); - naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A - memcpy(Q, res, sizeof(float) * SIZE * SIZE); - naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A - memcpy(R, res, sizeof(float) * SIZE * SIZE); - } - free(x); - free(e); - free(u); - free(v); - free(q_min); - free(q_t); - } - naive_fixed_transpose(Q); -} - -int main(void) { - float A[SIZE * SIZE] = {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4}; - float Q[SIZE * SIZE] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - float R[SIZE * SIZE] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - naive_fixed_qr_decomp(A, Q, R); - float expectedQ[SIZE * SIZE] = { - 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, - }; - for (int i = 0; i < SIZE; i++) { - for (int j = 0; j < SIZE; j++) { - printf("Q Output: %f\n", Q[i * SIZE + j]); - } - } - float expectedR[SIZE * SIZE] = { - 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, - }; - for (int i = 0; i < SIZE; i++) { - for (int j = 0; j < SIZE; j++) { - printf("R Output: %f\n", R[i * SIZE + j]); - } - } - // 1.000000 - // 2.000000 - // 3.000000 - // 4.000000 - // 1.000000 - // 2.000000 - // 3.000000 - // 4.000000 - // 1.000000 - // 2.000000 - // 3.000000 - // 4.000000 - // 1.000000 - // 2.000000 - // 3.000000 - // 4.000000 -} \ No newline at end of file From fba449d9221a41dc1037a44756427c11db1e4a50 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Tue, 10 May 2022 02:32:39 -0400 Subject: [PATCH 098/143] calloc test change --- .../Diospyros/LoadStoreMovement.cpp | 139 +++++++++++------- src/dios-egraphs/Diospyros/c-tests/calloc.c | 1 + 2 files changed, 84 insertions(+), 56 deletions(-) diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index fccd0071..0e363c83 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -30,7 +30,7 @@ struct LoadStoreMovementPass : public FunctionPass { } /** - * + * Move Loads as far forward as possible in LLVM IR */ void rewrite_loads(Function &F) { AliasAnalysis *AA = &getAnalysis().getAAResults(); @@ -94,75 +94,102 @@ struct LoadStoreMovementPass : public FunctionPass { // Otherwise, keep pushing back the load instruction --insertion_offset; } + } - // TODO - // First, insert clone all instructions, and insert them into the basic block at the very beginning - - // build ordered vector of cloned instructions - // build map from original vector to cloned vector - std::vector cloned_instrs = {}; - std::map original_to_clone_map = {}; - std::map clone_to_original_map = {}; - for (Instruction *instr : final_instrs_vec) { - Instruction *cloned_instr = instr->clone(); - cloned_instrs.push_back(cloned_instr); - original_to_clone_map[instr] = cloned_instr; - clone_to_original_map[cloned_instr] = instr; - } - - // Grab first instruction to build before at. - Instruction *first_instr = NULL; - for (auto &I : B) { - first_instr = dyn_cast(&I); - assert(first_instr != NULL); - break; - } - IRBuilder<> builder(first_instr); + // TODO + // First, insert clone all instructions, and insert them into the basic block at the very beginning + + // build ordered vector of cloned instructions + // build map from original vector to cloned vector + std::vector cloned_instrs = {}; + std::map original_to_clone_map = {}; + std::map clone_to_original_map = {}; + for (Instruction *instr : final_instrs_vec) { + Instruction *cloned_instr = instr->clone(); + cloned_instrs.push_back(cloned_instr); + original_to_clone_map[instr] = cloned_instr; + clone_to_original_map[cloned_instr] = instr; + } - for (Instruction *cloned_instr : cloned_instrs) { - // The cloned instruction has arguments pointing backwards to prior original instructions - // Some of these prior instructions themselves will themselves be cloned. - // We need to replace the prior original instructions with clones instructions - int num_operands = cloned_instr->getNumOperands(); - for (int i = 0; i < num_operands; i++) { - Value *clone_operand = cloned_instr->getOperand(i); - Instruction *clone_operand_instr = dyn_cast(clone_operand); - if (clone_operand_instr != NULL) { - if (original_to_clone_map.count(clone_operand_instr) > 0) { - Instruction *replacement_operand = original_to_clone_map[clone_operand_instr]; - Value *replacement_value = dyn_cast(replacement_operand); - assert(replacement_value != NULL); - cloned_instr->setOperand(i, replacement_value); - } + // Grab first instruction to build before at. + Instruction *first_instr = NULL; + for (auto &I : B) { + first_instr = dyn_cast(&I); + assert(first_instr != NULL); + break; + } + IRBuilder<> builder(first_instr); + builder.SetInsertPoint(&B); + + for (Instruction *cloned_instr : cloned_instrs) { + // The cloned instruction has arguments pointing backwards to prior original instructions + // Some of these prior instructions themselves will themselves be cloned. + // We need to replace the prior original instructions with clones instructions + int num_operands = cloned_instr->getNumOperands(); + for (int i = 0; i < num_operands; i++) { + Value *clone_operand = cloned_instr->getOperand(i); + Instruction *clone_operand_instr = dyn_cast(clone_operand); + if (clone_operand_instr != NULL) { + if (original_to_clone_map.count(clone_operand_instr) > 0) { + Instruction *replacement_operand = original_to_clone_map[clone_operand_instr]; + Value *replacement_value = dyn_cast(replacement_operand); + assert(replacement_value != NULL); + cloned_instr->setOperand(i, replacement_value); } + } else { + Instruction *original_instr = clone_to_original_map[cloned_instr]; + cloned_instr->setOperand(i, original_instr->getOperand(i)); } - - // Furthermore, we need to change all uses of the original instruction to be the new cloned instruction - Instruction *original_instr = clone_to_original_map[cloned_instr]; - for (auto &U : original_instr->uses()) { - User *user = U.getUser(); - user->setOperand(U.getOperandNo(), cloned_instr); - } + } - // Finish by inserting cloned instruction - builder.Insert(cloned_instr); + builder.Insert(cloned_instr); + + // Furthermore, we need to change all uses of the original instruction to be the new cloned instruction + Instruction *original_instr = clone_to_original_map[cloned_instr]; + for (auto &U : original_instr->uses()) { + User *user = U.getUser(); + user->setOperand(U.getOperandNo(), cloned_instr); } + + // Finish by inserting cloned instruction + // builder.Insert(cloned_instr); } + + // To be safe, we also check if any of the new cloned instructions uses an original instruction, and if so, raise an error + // for (Instruction *cloned_instr : cloned_instrs) { + // int num_operands = cloned_instr->getNumOperands(); + // for (int i = 0; i < num_operands; i++) { + // Value *operand = cloned_instr->getOperand(i); + // } + // } + + // for (Instruction *original_instr : all_instrs) { + // Instruction *cloned_instr = original_to_clone_map[original_instr]; + // for (auto &U : original_instr->uses()) { + // User *user = U.getUser(); + // user->setOperand(U.getOperandNo(), cloned_instr); + // } + // } // Finally, delete all the original instructions in the basic block // Do this in reverse order. - std::reverse(all_instrs.begin(), all_instrs.end()); - for (Instruction *instr : all_instrs) { - if (!instr->isTerminator()) { - instr->eraseFromParent(); - } - } + // std::reverse(all_instrs.begin(), all_instrs.end()); + // for (Instruction *instr : all_instrs) { + // instr->eraseFromParent(); + // } } } + /** + * Move Stores Back As Far As Possible in the LLVM IR + */ void rewrite_stores(Function &F) { AliasAnalysis *AA = &getAnalysis().getAAResults(); for (auto &B : F) { + // Collect all instructions + + // Collect all stores + // If a store } } @@ -177,8 +204,8 @@ struct LoadStoreMovementPass : public FunctionPass { F.getName().substr(0, NO_OPT_PREFIX.size()) == NO_OPT_PREFIX)) { return false; } - // rewrite_loads(F); - // rewrite_loads(F); + rewrite_loads(F); + // rewrite_stores(F); return true; } diff --git a/src/dios-egraphs/Diospyros/c-tests/calloc.c b/src/dios-egraphs/Diospyros/c-tests/calloc.c index d8e0f10a..b07ebe35 100644 --- a/src/dios-egraphs/Diospyros/c-tests/calloc.c +++ b/src/dios-egraphs/Diospyros/c-tests/calloc.c @@ -1,4 +1,5 @@ #include +#include #include #define SIZE 4 From 27a044ebc1bf9555f907c80476dcaa9d7ec871a1 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Tue, 10 May 2022 03:01:14 -0400 Subject: [PATCH 099/143] stores push back fails on many tests works on add.c otherwise same issue with it crashing/hanging/infiniteloop/assertion error/other LLVM ir error --- .../Diospyros/LoadStoreMovement.cpp | 127 ++++++++++++++++-- 1 file changed, 117 insertions(+), 10 deletions(-) diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index 0e363c83..80a9d6f9 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -96,7 +96,6 @@ struct LoadStoreMovementPass : public FunctionPass { } } - // TODO // First, insert clone all instructions, and insert them into the basic block at the very beginning // build ordered vector of cloned instructions @@ -173,10 +172,10 @@ struct LoadStoreMovementPass : public FunctionPass { // Finally, delete all the original instructions in the basic block // Do this in reverse order. - // std::reverse(all_instrs.begin(), all_instrs.end()); - // for (Instruction *instr : all_instrs) { - // instr->eraseFromParent(); - // } + std::reverse(all_instrs.begin(), all_instrs.end()); + for (Instruction *instr : all_instrs) { + instr->eraseFromParent(); + } } } @@ -186,10 +185,118 @@ struct LoadStoreMovementPass : public FunctionPass { void rewrite_stores(Function &F) { AliasAnalysis *AA = &getAnalysis().getAAResults(); for (auto &B : F) { - // Collect all instructions + // Grab all instructions + std::vector all_instrs = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + all_instrs.push_back(instr); + } + + // Perform Pushing Back of Store Instructions + std::vector final_instrs_vec = {}; + for (BasicBlock::reverse_iterator iter = B.rbegin(); + iter != B.rend(); ++iter) { + Instruction *instr = &(*iter); + assert(instr != NULL); + + // Place any non-Load Instructions at the end of the list of instructions + if (!isa(instr)) { + final_instrs_vec.push_back(instr); + continue; + } + + // Handle Load Instructions + int insertion_offset = final_instrs_vec.size(); + while(true) { + Instruction *store_instr = instr; + + // If there is no prior instruction, push back at current offset, and stop. + if (insertion_offset - 1 < 0) { + final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, store_instr); + break; + } + + // If the prior instruction is a terminator, do not push the current instruction back + Instruction *prior_instr = final_instrs_vec[insertion_offset - 1]; + if (prior_instr->isTerminator()) { + final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, store_instr); + break; + } + + // If the prior instruction alias with the store + // instruction, do not push the store back + if (!AA->isNoAlias(store_instr, prior_instr)) { + final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, store_instr); + break; + } + // Otherwise, keep pushing back the str instruction + --insertion_offset; + } + } + + // build ordered vector of cloned instructions + // build map from original vector to cloned vector + std::reverse(final_instrs_vec.begin(), + final_instrs_vec.end()); + std::vector cloned_instrs = {}; + std::map original_to_clone_map = {}; + std::map clone_to_original_map = {}; + for (Instruction *instr : final_instrs_vec) { + Instruction *cloned_instr = instr->clone(); + cloned_instrs.push_back(cloned_instr); + original_to_clone_map[instr] = cloned_instr; + clone_to_original_map[cloned_instr] = instr; + } + + // Grab first instruction to build before at. + Instruction *first_instr = NULL; + for (auto &I : B) { + first_instr = dyn_cast(&I); + assert(first_instr != NULL); + break; + } + IRBuilder<> builder(first_instr); + builder.SetInsertPoint(&B); + + for (Instruction *cloned_instr : cloned_instrs) { + // The cloned instruction has arguments pointing backwards to prior original instructions + // Some of these prior instructions themselves will themselves be cloned. + // We need to replace the prior original instructions with clones instructions + int num_operands = cloned_instr->getNumOperands(); + for (int i = 0; i < num_operands; i++) { + Value *clone_operand = cloned_instr->getOperand(i); + Instruction *clone_operand_instr = dyn_cast(clone_operand); + if (clone_operand_instr != NULL) { + if (original_to_clone_map.count(clone_operand_instr) > 0) { + Instruction *replacement_operand = original_to_clone_map[clone_operand_instr]; + Value *replacement_value = dyn_cast(replacement_operand); + assert(replacement_value != NULL); + cloned_instr->setOperand(i, replacement_value); + } + } else { + Instruction *original_instr = clone_to_original_map[cloned_instr]; + cloned_instr->setOperand(i, original_instr->getOperand(i)); + } + } + + // Furthermore, we need to change all uses of the original instruction to be the new cloned instruction + Instruction *original_instr = clone_to_original_map[cloned_instr]; + for (auto &U : original_instr->uses()) { + User *user = U.getUser(); + user->setOperand(U.getOperandNo(), cloned_instr); + } - // Collect all stores - // If a store + // Finish by inserting cloned instruction + builder.Insert(cloned_instr); + } + + // Finally, delete all the original instructions in the basic block + // Do this in reverse order. + std::reverse(all_instrs.begin(), all_instrs.end()); + for (Instruction *instr : all_instrs) { + instr->eraseFromParent(); + } } } @@ -204,8 +311,8 @@ struct LoadStoreMovementPass : public FunctionPass { F.getName().substr(0, NO_OPT_PREFIX.size()) == NO_OPT_PREFIX)) { return false; } - rewrite_loads(F); - // rewrite_stores(F); + // rewrite_loads(F); + rewrite_stores(F); return true; } From 4a98f7dd01391999149627b42f6405e41011fb5d Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 25 May 2022 02:50:56 -0400 Subject: [PATCH 100/143] start report --- .../Diospyros/LoadStoreMovement.cpp | 146 +++++++++++------- src/dios-egraphs/Diospyros/report.md | 9 ++ 2 files changed, 101 insertions(+), 54 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/report.md diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index 80a9d6f9..e9db6512 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -31,11 +31,10 @@ struct LoadStoreMovementPass : public FunctionPass { /** * Move Loads as far forward as possible in LLVM IR - */ + */ void rewrite_loads(Function &F) { AliasAnalysis *AA = &getAnalysis().getAAResults(); for (auto &B : F) { - // Grab all instructions std::vector all_instrs = {}; for (auto &I : B) { @@ -49,8 +48,9 @@ struct LoadStoreMovementPass : public FunctionPass { for (auto &I : B) { Instruction *instr = dyn_cast(&I); assert(instr != NULL); - - // Place any non-Load Instructions at the end of the list of instructions + + // Place any non-Load Instructions at the end of the list of + // instructions if (!isa(instr)) { final_instrs_vec.push_back(instr); continue; @@ -60,12 +60,16 @@ struct LoadStoreMovementPass : public FunctionPass { int insertion_offset = final_instrs_vec.size(); while (true) { Instruction *load_instr = instr; - // If there is no prior instruction, push back at current offset, and stop. + // If there is no prior instruction, push back at current + // offset, and stop. if (insertion_offset - 1 < 0) { - final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, load_instr); + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); break; } - Instruction *prior_instr = final_instrs_vec[insertion_offset - 1]; + Instruction *prior_instr = + final_instrs_vec[insertion_offset - 1]; // If the prior instruction is used in the load's // arguments, do not push it back int num_operands = load_instr->getNumOperands(); @@ -76,27 +80,32 @@ struct LoadStoreMovementPass : public FunctionPass { dyn_cast(load_operand); if (load_operand_instr != NULL) { if (load_operand_instr == prior_instr) { - final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, load_instr); + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); break_while = true; break; } } - } + } if (break_while) { break; } // If the prior instruction alias with the load // instruction, do not push it back if (!AA->isNoAlias(load_instr, prior_instr)) { - final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, load_instr); + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); break; } // Otherwise, keep pushing back the load instruction --insertion_offset; } } - - // First, insert clone all instructions, and insert them into the basic block at the very beginning + + // First, insert clone all instructions, and insert them into the + // basic block at the very beginning // build ordered vector of cloned instructions // build map from original vector to cloned vector @@ -121,30 +130,39 @@ struct LoadStoreMovementPass : public FunctionPass { builder.SetInsertPoint(&B); for (Instruction *cloned_instr : cloned_instrs) { - // The cloned instruction has arguments pointing backwards to prior original instructions - // Some of these prior instructions themselves will themselves be cloned. - // We need to replace the prior original instructions with clones instructions + // The cloned instruction has arguments pointing backwards to + // prior original instructions Some of these prior instructions + // themselves will themselves be cloned. We need to replace the + // prior original instructions with clones instructions int num_operands = cloned_instr->getNumOperands(); for (int i = 0; i < num_operands; i++) { Value *clone_operand = cloned_instr->getOperand(i); - Instruction *clone_operand_instr = dyn_cast(clone_operand); + Instruction *clone_operand_instr = + dyn_cast(clone_operand); if (clone_operand_instr != NULL) { - if (original_to_clone_map.count(clone_operand_instr) > 0) { - Instruction *replacement_operand = original_to_clone_map[clone_operand_instr]; - Value *replacement_value = dyn_cast(replacement_operand); + if (original_to_clone_map.count(clone_operand_instr) > + 0) { + Instruction *replacement_operand = + original_to_clone_map[clone_operand_instr]; + Value *replacement_value = + dyn_cast(replacement_operand); assert(replacement_value != NULL); cloned_instr->setOperand(i, replacement_value); } } else { - Instruction *original_instr = clone_to_original_map[cloned_instr]; - cloned_instr->setOperand(i, original_instr->getOperand(i)); + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + cloned_instr->setOperand(i, + original_instr->getOperand(i)); } } builder.Insert(cloned_instr); - - // Furthermore, we need to change all uses of the original instruction to be the new cloned instruction - Instruction *original_instr = clone_to_original_map[cloned_instr]; + + // Furthermore, we need to change all uses of the original + // instruction to be the new cloned instruction + Instruction *original_instr = + clone_to_original_map[cloned_instr]; for (auto &U : original_instr->uses()) { User *user = U.getUser(); user->setOperand(U.getOperandNo(), cloned_instr); @@ -154,8 +172,9 @@ struct LoadStoreMovementPass : public FunctionPass { // builder.Insert(cloned_instr); } - // To be safe, we also check if any of the new cloned instructions uses an original instruction, and if so, raise an error - // for (Instruction *cloned_instr : cloned_instrs) { + // To be safe, we also check if any of the new cloned instructions + // uses an original instruction, and if so, raise an error for + // (Instruction *cloned_instr : cloned_instrs) { // int num_operands = cloned_instr->getNumOperands(); // for (int i = 0; i < num_operands; i++) { // Value *operand = cloned_instr->getOperand(i); @@ -163,13 +182,14 @@ struct LoadStoreMovementPass : public FunctionPass { // } // for (Instruction *original_instr : all_instrs) { - // Instruction *cloned_instr = original_to_clone_map[original_instr]; - // for (auto &U : original_instr->uses()) { + // Instruction *cloned_instr = + // original_to_clone_map[original_instr]; for (auto &U : + // original_instr->uses()) { // User *user = U.getUser(); // user->setOperand(U.getOperandNo(), cloned_instr); // } // } - + // Finally, delete all the original instructions in the basic block // Do this in reverse order. std::reverse(all_instrs.begin(), all_instrs.end()); @@ -181,7 +201,7 @@ struct LoadStoreMovementPass : public FunctionPass { /** * Move Stores Back As Far As Possible in the LLVM IR - */ + */ void rewrite_stores(Function &F) { AliasAnalysis *AA = &getAnalysis().getAAResults(); for (auto &B : F) { @@ -199,8 +219,9 @@ struct LoadStoreMovementPass : public FunctionPass { iter != B.rend(); ++iter) { Instruction *instr = &(*iter); assert(instr != NULL); - - // Place any non-Load Instructions at the end of the list of instructions + + // Place any non-Load Instructions at the end of the list of + // instructions if (!isa(instr)) { final_instrs_vec.push_back(instr); continue; @@ -208,26 +229,35 @@ struct LoadStoreMovementPass : public FunctionPass { // Handle Load Instructions int insertion_offset = final_instrs_vec.size(); - while(true) { + while (true) { Instruction *store_instr = instr; - // If there is no prior instruction, push back at current offset, and stop. + // If there is no prior instruction, push back at current + // offset, and stop. if (insertion_offset - 1 < 0) { - final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, store_instr); + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + store_instr); break; } - // If the prior instruction is a terminator, do not push the current instruction back - Instruction *prior_instr = final_instrs_vec[insertion_offset - 1]; + // If the prior instruction is a terminator, do not push the + // current instruction back + Instruction *prior_instr = + final_instrs_vec[insertion_offset - 1]; if (prior_instr->isTerminator()) { - final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, store_instr); + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + store_instr); break; } // If the prior instruction alias with the store // instruction, do not push the store back if (!AA->isNoAlias(store_instr, prior_instr)) { - final_instrs_vec.insert(final_instrs_vec.begin() + insertion_offset, store_instr); + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + store_instr); break; } // Otherwise, keep pushing back the str instruction @@ -237,8 +267,7 @@ struct LoadStoreMovementPass : public FunctionPass { // build ordered vector of cloned instructions // build map from original vector to cloned vector - std::reverse(final_instrs_vec.begin(), - final_instrs_vec.end()); + std::reverse(final_instrs_vec.begin(), final_instrs_vec.end()); std::vector cloned_instrs = {}; std::map original_to_clone_map = {}; std::map clone_to_original_map = {}; @@ -260,28 +289,37 @@ struct LoadStoreMovementPass : public FunctionPass { builder.SetInsertPoint(&B); for (Instruction *cloned_instr : cloned_instrs) { - // The cloned instruction has arguments pointing backwards to prior original instructions - // Some of these prior instructions themselves will themselves be cloned. - // We need to replace the prior original instructions with clones instructions + // The cloned instruction has arguments pointing backwards to + // prior original instructions Some of these prior instructions + // themselves will themselves be cloned. We need to replace the + // prior original instructions with clones instructions int num_operands = cloned_instr->getNumOperands(); for (int i = 0; i < num_operands; i++) { Value *clone_operand = cloned_instr->getOperand(i); - Instruction *clone_operand_instr = dyn_cast(clone_operand); + Instruction *clone_operand_instr = + dyn_cast(clone_operand); if (clone_operand_instr != NULL) { - if (original_to_clone_map.count(clone_operand_instr) > 0) { - Instruction *replacement_operand = original_to_clone_map[clone_operand_instr]; - Value *replacement_value = dyn_cast(replacement_operand); + if (original_to_clone_map.count(clone_operand_instr) > + 0) { + Instruction *replacement_operand = + original_to_clone_map[clone_operand_instr]; + Value *replacement_value = + dyn_cast(replacement_operand); assert(replacement_value != NULL); cloned_instr->setOperand(i, replacement_value); } } else { - Instruction *original_instr = clone_to_original_map[cloned_instr]; - cloned_instr->setOperand(i, original_instr->getOperand(i)); + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + cloned_instr->setOperand(i, + original_instr->getOperand(i)); } } - - // Furthermore, we need to change all uses of the original instruction to be the new cloned instruction - Instruction *original_instr = clone_to_original_map[cloned_instr]; + + // Furthermore, we need to change all uses of the original + // instruction to be the new cloned instruction + Instruction *original_instr = + clone_to_original_map[cloned_instr]; for (auto &U : original_instr->uses()) { User *user = U.getUser(); user->setOperand(U.getOperandNo(), cloned_instr); @@ -312,7 +350,7 @@ struct LoadStoreMovementPass : public FunctionPass { return false; } // rewrite_loads(F); - rewrite_stores(F); + // rewrite_stores(F); return true; } diff --git a/src/dios-egraphs/Diospyros/report.md b/src/dios-egraphs/Diospyros/report.md new file mode 100644 index 00000000..73ca9a34 --- /dev/null +++ b/src/dios-egraphs/Diospyros/report.md @@ -0,0 +1,9 @@ +# Spring 2022 Report + +## + +## + +## + +## \ No newline at end of file From 6ec12b157aaccf8c5130e337e57a5d22407da027 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 26 May 2022 19:59:02 -0400 Subject: [PATCH 101/143] store movement works --- .../Diospyros/LoadStoreMovement.cpp | 61 ++++++++++++++++--- 1 file changed, 53 insertions(+), 8 deletions(-) diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index e9db6512..60723c60 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -252,14 +252,42 @@ struct LoadStoreMovementPass : public FunctionPass { break; } - // If the prior instruction alias with the store - // instruction, do not push the store back - if (!AA->isNoAlias(store_instr, prior_instr)) { + // If the prior Instruction is a call inst, do not push the + // current instruction back + // A call instruciton could have side effects to memory + // In addition, a call could be to @llvm.memset.p0i8.i64(i8* + // nonnull align 16 dereferenceable(40) %2, i8 0, i64 40, i1 + // false) or @memset_pattern16(i8* nonnull %2, i8* bitcast + // ([4 x float]* @.memset_pattern to i8*), i64 40) #6 which + // require alias analysis as well + + // TODO: discriminate calls to llvm memset + if (isa(prior_instr)) { final_instrs_vec.insert( final_instrs_vec.begin() + insertion_offset, store_instr); break; } + + // If the prior instruction alias with the store + // instruction, do not push the store back + if (prior_instr->mayReadOrWriteMemory()) { + Value *prior_addr = NULL; + if (isa(prior_instr)) { + prior_addr = prior_instr->getOperand(0); + } else if (isa(prior_instr)) { + prior_addr = prior_instr->getOperand(1); + } else { + throw "Unmatched Instruction Type"; + } + Value *store_addr = store_instr->getOperand(1); + if (!AA->isNoAlias(store_addr, prior_addr)) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + store_instr); + break; + } + } // Otherwise, keep pushing back the str instruction --insertion_offset; } @@ -289,6 +317,16 @@ struct LoadStoreMovementPass : public FunctionPass { builder.SetInsertPoint(&B); for (Instruction *cloned_instr : cloned_instrs) { + // set insert point to be before beginning if inserting phi + // instruction + if (isa(cloned_instr)) { + builder.SetInsertPoint(first_instr); + } + builder.Insert(cloned_instr); + if (isa(cloned_instr)) { + builder.SetInsertPoint(&B); + } + // The cloned instruction has arguments pointing backwards to // prior original instructions Some of these prior instructions // themselves will themselves be cloned. We need to replace the @@ -320,13 +358,20 @@ struct LoadStoreMovementPass : public FunctionPass { // instruction to be the new cloned instruction Instruction *original_instr = clone_to_original_map[cloned_instr]; - for (auto &U : original_instr->uses()) { - User *user = U.getUser(); - user->setOperand(U.getOperandNo(), cloned_instr); + if (Value *original_val = dyn_cast(original_instr)) { + Value *cloned_val = dyn_cast(cloned_instr); + assert(cloned_val != NULL); + original_val->replaceAllUsesWith(cloned_val); } + // for (auto &U : original_instr->uses()) { + // User *user = U.getUser(); + // errs() << "Cloned Instr\n"; + // errs() << *cloned_instr << "\n"; + // user->setOperand(U.getOperandNo(), cloned_instr); + // } // Finish by inserting cloned instruction - builder.Insert(cloned_instr); + // builder.Insert(cloned_instr); } // Finally, delete all the original instructions in the basic block @@ -350,7 +395,7 @@ struct LoadStoreMovementPass : public FunctionPass { return false; } // rewrite_loads(F); - // rewrite_stores(F); + rewrite_stores(F); return true; } From e444222814208af69aff226e1d6466402a37d95f Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 26 May 2022 20:33:35 -0400 Subject: [PATCH 102/143] fix load movement --- .../Diospyros/LoadStoreMovement.cpp | 106 ++++++++++-------- 1 file changed, 62 insertions(+), 44 deletions(-) diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index 60723c60..f6f49c89 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -68,8 +68,36 @@ struct LoadStoreMovementPass : public FunctionPass { load_instr); break; } + Instruction *prior_instr = final_instrs_vec[insertion_offset - 1]; + + // If the prior instruction is a phi node, do not push the + // current instruction back + if (isa(prior_instr)) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + + // If the prior Instruction is a call inst, do not push the + // current instruction back + // A call instruciton could have side effects to memory + // In addition, a call could be to @llvm.memset.p0i8.i64(i8* + // nonnull align 16 dereferenceable(40) %2, i8 0, i64 40, i1 + // false) or @memset_pattern16(i8* nonnull %2, i8* bitcast + // ([4 x float]* @.memset_pattern to i8*), i64 40) #6 which + // require alias analysis as well + + // TODO: discriminate calls to llvm memset + if (isa(prior_instr)) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + // If the prior instruction is used in the load's // arguments, do not push it back int num_operands = load_instr->getNumOperands(); @@ -91,13 +119,25 @@ struct LoadStoreMovementPass : public FunctionPass { if (break_while) { break; } + // If the prior instruction alias with the load - // instruction, do not push it back - if (!AA->isNoAlias(load_instr, prior_instr)) { - final_instrs_vec.insert( - final_instrs_vec.begin() + insertion_offset, - load_instr); - break; + // instruction, do not push the store back + if (prior_instr->mayReadOrWriteMemory()) { + Value *prior_addr = NULL; + if (isa(prior_instr)) { + prior_addr = prior_instr->getOperand(0); + } else if (isa(prior_instr)) { + prior_addr = prior_instr->getOperand(1); + } else { + throw "Unmatched Instruction Type"; + } + Value *load_addr = load_instr->getOperand(0); + if (!AA->isNoAlias(load_addr, prior_addr)) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } } // Otherwise, keep pushing back the load instruction --insertion_offset; @@ -130,8 +170,17 @@ struct LoadStoreMovementPass : public FunctionPass { builder.SetInsertPoint(&B); for (Instruction *cloned_instr : cloned_instrs) { + // set insert point to be before beginning if inserting phi + // instruction + if (isa(cloned_instr)) { + builder.SetInsertPoint(first_instr); + } + builder.Insert(cloned_instr); + if (isa(cloned_instr)) { + builder.SetInsertPoint(&B); + } // The cloned instruction has arguments pointing backwards to - // prior original instructions Some of these prior instructions + // prior original instructions. Some of these prior instructions // themselves will themselves be cloned. We need to replace the // prior original instructions with clones instructions int num_operands = cloned_instr->getNumOperands(); @@ -157,39 +206,17 @@ struct LoadStoreMovementPass : public FunctionPass { } } - builder.Insert(cloned_instr); - // Furthermore, we need to change all uses of the original // instruction to be the new cloned instruction Instruction *original_instr = clone_to_original_map[cloned_instr]; - for (auto &U : original_instr->uses()) { - User *user = U.getUser(); - user->setOperand(U.getOperandNo(), cloned_instr); + if (Value *original_val = dyn_cast(original_instr)) { + Value *cloned_val = dyn_cast(cloned_instr); + assert(cloned_val != NULL); + original_val->replaceAllUsesWith(cloned_val); } - - // Finish by inserting cloned instruction - // builder.Insert(cloned_instr); } - // To be safe, we also check if any of the new cloned instructions - // uses an original instruction, and if so, raise an error for - // (Instruction *cloned_instr : cloned_instrs) { - // int num_operands = cloned_instr->getNumOperands(); - // for (int i = 0; i < num_operands; i++) { - // Value *operand = cloned_instr->getOperand(i); - // } - // } - - // for (Instruction *original_instr : all_instrs) { - // Instruction *cloned_instr = - // original_to_clone_map[original_instr]; for (auto &U : - // original_instr->uses()) { - // User *user = U.getUser(); - // user->setOperand(U.getOperandNo(), cloned_instr); - // } - // } - // Finally, delete all the original instructions in the basic block // Do this in reverse order. std::reverse(all_instrs.begin(), all_instrs.end()); @@ -328,7 +355,7 @@ struct LoadStoreMovementPass : public FunctionPass { } // The cloned instruction has arguments pointing backwards to - // prior original instructions Some of these prior instructions + // prior original instructions. Some of these prior instructions // themselves will themselves be cloned. We need to replace the // prior original instructions with clones instructions int num_operands = cloned_instr->getNumOperands(); @@ -363,15 +390,6 @@ struct LoadStoreMovementPass : public FunctionPass { assert(cloned_val != NULL); original_val->replaceAllUsesWith(cloned_val); } - // for (auto &U : original_instr->uses()) { - // User *user = U.getUser(); - // errs() << "Cloned Instr\n"; - // errs() << *cloned_instr << "\n"; - // user->setOperand(U.getOperandNo(), cloned_instr); - // } - - // Finish by inserting cloned instruction - // builder.Insert(cloned_instr); } // Finally, delete all the original instructions in the basic block @@ -394,7 +412,7 @@ struct LoadStoreMovementPass : public FunctionPass { F.getName().substr(0, NO_OPT_PREFIX.size()) == NO_OPT_PREFIX)) { return false; } - // rewrite_loads(F); + rewrite_loads(F); rewrite_stores(F); return true; From a2239595337c9d67439e9c3ad0c26f9553feae7d Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 26 May 2022 20:35:54 -0400 Subject: [PATCH 103/143] remove old ld st movement file --- .../Diospyros/OldLoadStoreMovement.cpp | 282 ------------------ 1 file changed, 282 deletions(-) delete mode 100644 src/dios-egraphs/Diospyros/OldLoadStoreMovement.cpp diff --git a/src/dios-egraphs/Diospyros/OldLoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/OldLoadStoreMovement.cpp deleted file mode 100644 index da11c170..00000000 --- a/src/dios-egraphs/Diospyros/OldLoadStoreMovement.cpp +++ /dev/null @@ -1,282 +0,0 @@ - -#include - -#include -#include -#include - -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Pass.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" - -using namespace llvm; - -namespace { -struct LoadStoreMovementPass : public FunctionPass { - static char ID; - LoadStoreMovementPass() : FunctionPass(ID) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - } - - void rewrite_stores(Function &F) { - AliasAnalysis *AA = &getAnalysis().getAAResults(); - for (auto &B : F) { - bool has_float = false; - for (auto &I : B) { - if (I.getType()->isFloatTy()) { - has_float = true; - } - } - if (!has_float) { - continue; - } - // We also skip over all basic blocks without stores - bool has_store = false; - for (auto &I : B) { - if (auto *op = dyn_cast(&I)) { - has_store = true; - } - } - if (!has_store) { - continue; - } - - std::vector reversed_instructions = {}; - std::vector all_instructions = {}; - int head_pointer = - -1; // points to head location in all_instructions - Instruction *first_instr = NULL; - for (BasicBlock::reverse_iterator iter = B.rbegin(); - iter != B.rend(); ++iter) { - Instruction *I = &(*iter); - first_instr = I; - if (auto *store_op = dyn_cast(I)) { - if (head_pointer < 0) { - reversed_instructions.push_back(I); - } else { - int current_counter = head_pointer; - while (current_counter >= 0) { - Instruction *curr_instr = - reversed_instructions[current_counter]; - if (curr_instr->isTerminator()) { - ++current_counter; - break; - } else if (auto *other_store_op = - dyn_cast(curr_instr)) { - if (AA->isNoAlias( - store_op->getOperand(1), - other_store_op->getOperand(1))) { - --current_counter; - } else { - break; - } - } else if (auto *load_op = - dyn_cast(curr_instr)) { - if (AA->isNoAlias(store_op->getOperand(1), - load_op->getOperand(0))) { - --current_counter; - } else { - break; - } - } else { - --current_counter; - } - } - // Do the insertion - reversed_instructions.insert( - reversed_instructions.begin() + current_counter, I); - } - } else { - reversed_instructions.push_back(I); - } - ++head_pointer; - all_instructions.push_back(I); - } - if (first_instr == NULL) { - assert(false); - } - IRBuilder<> builder(first_instr); - // we add the instructions at the end - builder.SetInsertPoint(&B); - // here we are going to add back our instructions - std::reverse(reversed_instructions.begin(), - reversed_instructions.end()); - BasicBlock::InstListType &bb_instrs = B.getInstList(); - std::map original_to_clone_map = {}; - for (auto &I : reversed_instructions) { - // we clone the original instruciton, then insert into builder - Instruction *cloned_instr = I->clone(); - // when adding, need to take caution about the users - original_to_clone_map[I] = cloned_instr; - for (unsigned int i = 0; i < I->getNumOperands(); i++) { - Value *operand = I->getOperand(i); - Instruction *operand_instr = dyn_cast(operand); - if (original_to_clone_map.find(operand_instr) != - original_to_clone_map.end()) { - Instruction *clone_instr = - original_to_clone_map[operand_instr]; - Value *clone_value = dyn_cast(clone_instr); - cloned_instr->setOperand(i, clone_value); - } else { - cloned_instr->setOperand(i, operand); - } - } - bb_instrs.push_back(cloned_instr); - for (auto &U : I->uses()) { - User *user = U.getUser(); - user->setOperand(U.getOperandNo(), cloned_instr); - } - } - // here we need to delete all original instructions, going forwards - // with no reversal as they are in reversed order - for (auto &I : all_instructions) { - I->eraseFromParent(); - } - } - } - - void rewrite_loads(Function &F) { - AliasAnalysis *AA = &getAnalysis().getAAResults(); - std::map original_to_clone_map = {}; - std::vector all_instructions = {}; - for (auto &B : F) { - std::vector instructions = {}; - - int head_pointer = - -1; // points to head location in all_instructions - Instruction *first_instr = NULL; - for (auto &I : B) { - first_instr = &I; - if (auto *load_op = dyn_cast(&I)) { - if (isa(load_op->getOperand(0))) { - if (head_pointer < 0) { - instructions.push_back(&I); - } else { - int current_counter = head_pointer; - while (current_counter > 0) { - Instruction *curr_instr = - instructions[current_counter]; - if (auto *op = dyn_cast(&I)) { - ++current_counter; - break; - } - // else if (auto *other_load_op = - // dyn_cast(curr_instr)) - // { - // if (AA->isNoAlias( - // other_load_op->getOperand(0), - // load_op->getOperand(0))) { - // --current_counter; - // } else { - // break; - // } - // } - else if (auto *store_op = - dyn_cast(curr_instr)) { - if (AA->isNoAlias(store_op->getOperand(1), - load_op->getOperand(0))) { - --current_counter; - } else { - break; - } - } else { - --current_counter; - } - } - // Do the insertion - assert(current_counter >= 0); - instructions.insert( - instructions.begin() + current_counter, &I); - } - } else { - instructions.push_back(&I); - } - } else { - instructions.push_back(&I); - } - ++head_pointer; - all_instructions.push_back(&I); - } - if (first_instr == NULL) { - assert(false); - } - IRBuilder<> builder(first_instr); - // we add the instructions at the end - builder.SetInsertPoint(&B); - // here we are going to add back our instructions - BasicBlock::InstListType &bb_instrs = B.getInstList(); - for (auto &I : instructions) { - // we clone the original instruciton, then insert into builder - Instruction *cloned_instr = I->clone(); - // when adding, need to take caution about the users - original_to_clone_map[I] = cloned_instr; - for (unsigned int i = 0; i < I->getNumOperands(); i++) { - Value *operand = I->getOperand(i); - Instruction *operand_instr = dyn_cast(operand); - if (operand_instr != NULL) { - if (original_to_clone_map.find(operand_instr) != - original_to_clone_map.end()) { - Instruction *clone_instr = - original_to_clone_map[operand_instr]; - Value *clone_value = dyn_cast(clone_instr); - cloned_instr->setOperand(i, clone_value); - } else { - cloned_instr->setOperand(i, operand); - } - } - } - bb_instrs.push_back(cloned_instr); - Instruction *instr = &(*I); - for (auto &U : instr->uses()) { - User *user = U.getUser(); - user->setOperand(U.getOperandNo(), cloned_instr); - } - } - } - // here we need to delete all original instructions, going - // forwards with no reversal as they are in reversed order - std::reverse(all_instructions.begin(), all_instructions.end()); - for (auto &I : all_instructions) { - I->eraseFromParent(); - } - } - - virtual bool runOnFunction(Function &F) override { - /** - * In this pass, we walk backwards finding the first load from the - * bottom, and push it up as far as we can. We continue upwards, - * pushing loads upward. - * - * We gr - */ - if (F.getName() == "main" || - (F.getName().size() > 7 && F.getName().substr(0, 7) == "no_opt_")) { - return false; - } - // rewrite_stores(F); - // rewrite_loads(F); - - return true; - } -}; -} // namespace - -char LoadStoreMovementPass::ID = 0; - -// Automatically enable the pass. -// http://adriansampson.net/blog/clangpass.html -static void registerLoadStoreMovementPass(const PassManagerBuilder &, - legacy::PassManagerBase &PM) { - PM.add(new LoadStoreMovementPass()); -} -static RegisterStandardPasses RegisterMyPass( - PassManagerBuilder::EP_EarlyAsPossible, registerLoadStoreMovementPass); \ No newline at end of file From f2a28c0735de784b1737d94911d40aaf32d728a4 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 27 May 2022 03:28:36 -0400 Subject: [PATCH 104/143] update makefile, update tests, delete any cruft files --- src/dios-egraphs/Diospyros/Makefile | 45 +-- src/dios-egraphs/Diospyros/README.md | 52 ++- src/dios-egraphs/Diospyros/diospyros.hpp | 17 - src/dios-egraphs/Diospyros/documentation.md | 9 + .../Diospyros/llvm-output-tests/calloc.c | 1 + .../Diospyros/llvm-output-tests/div.c | 28 ++ .../Diospyros/llvm-output-tests/fft.c | 165 +++++++++ .../llvm-output-tests/local-array-1.c | 35 ++ .../llvm-output-tests/local-array-2.c | 67 ++++ .../llvm-output-tests/local-array-3.c | 59 ++++ .../llvm-output-tests/local-array-4.c | 55 +++ .../llvm-output-tests/qr-decomp-fixed-size.c | 267 +++++++++++++++ .../qr-decomp-local-arrays.c | 311 +++++++++++++++++ .../llvm-output-tests/qr-decomp-test-0.c | 81 +++++ .../Diospyros/llvm-output-tests/qr-decomp.c | 268 +++++++++++++++ .../Diospyros/llvm-output-tests/sub.c | 26 ++ .../llvm-output-tests/test1-local-array.c | 103 ++++++ .../llvm-output-tests/test2-local-array.c | 276 +++++++++++++++ .../llvm-output-tests/test3-local-array.c | 314 ++++++++++++++++++ .../Diospyros/llvm-tests/README.md | 2 - .../Diospyros/llvm-tests/load-prior-chunk.ll | 67 ---- .../Diospyros/llvm-tests/repeat-alloc.ll | 64 ---- .../Diospyros/llvm-tests/repeat-load.ll | 1 - .../Diospyros/llvm-tests/turnt.toml | 1 - .../Diospyros/no-egg-c-tests/calloc.c | 1 + .../Diospyros/no-egg-c-tests/div.c | 28 ++ .../Diospyros/no-egg-c-tests/fft.c | 165 +++++++++ .../Diospyros/no-egg-c-tests/local-array-1.c | 35 ++ .../Diospyros/no-egg-c-tests/local-array-2.c | 67 ++++ .../Diospyros/no-egg-c-tests/local-array-3.c | 59 ++++ .../Diospyros/no-egg-c-tests/local-array-4.c | 55 +++ .../no-egg-c-tests/qr-decomp-fixed-size.c | 267 +++++++++++++++ .../no-egg-c-tests/qr-decomp-local-arrays.c | 311 +++++++++++++++++ .../no-egg-c-tests/qr-decomp-test-0.c | 81 +++++ .../Diospyros/no-egg-c-tests/qr-decomp.c | 268 +++++++++++++++ .../Diospyros/no-egg-c-tests/sub.c | 26 ++ .../no-egg-c-tests/test1-local-array.c | 103 ++++++ .../no-egg-c-tests/test2-local-array.c | 276 +++++++++++++++ .../no-egg-c-tests/test3-local-array.c | 314 ++++++++++++++++++ src/dios-egraphs/Diospyros/reduce.sh | 11 - src/dios-egraphs/Diospyros/report.md | 9 - src/dios-egraphs/Diospyros/run_all.sh | 4 - src/dios-egraphs/Diospyros/runt.sh | 15 - src/dios-egraphs/Diospyros/runt.toml | 6 - 44 files changed, 4148 insertions(+), 267 deletions(-) delete mode 100644 src/dios-egraphs/Diospyros/diospyros.hpp create mode 100644 src/dios-egraphs/Diospyros/documentation.md create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/div.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/fft.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/local-array-1.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/local-array-2.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/local-array-3.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/local-array-4.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-fixed-size.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-local-arrays.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-0.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/sub.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/test1-local-array.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/test2-local-array.c create mode 100644 src/dios-egraphs/Diospyros/llvm-output-tests/test3-local-array.c delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/README.md delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/load-prior-chunk.ll delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/repeat-alloc.ll delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/repeat-load.ll delete mode 100644 src/dios-egraphs/Diospyros/llvm-tests/turnt.toml create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/div.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/fft.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/local-array-1.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/local-array-2.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/local-array-3.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/local-array-4.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-fixed-size.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-local-arrays.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-0.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/sub.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/test1-local-array.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/test2-local-array.c create mode 100644 src/dios-egraphs/Diospyros/no-egg-c-tests/test3-local-array.c delete mode 100755 src/dios-egraphs/Diospyros/reduce.sh delete mode 100644 src/dios-egraphs/Diospyros/report.md delete mode 100644 src/dios-egraphs/Diospyros/run_all.sh delete mode 100644 src/dios-egraphs/Diospyros/runt.sh delete mode 100644 src/dios-egraphs/Diospyros/runt.toml diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index 00ce628c..72902bc3 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -12,44 +12,6 @@ endif .PHONY: target/debug/libllvmlib.$(EXT) -run: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) $(test) - -run-o2: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -O2 -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) $(test) - -run-out: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) $(test) - ./a.out - -run-out-o2: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -O2 -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) $(test) - ./a.out - -run-all: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - bash run_all.sh - -emit: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -emit-llvm -S -o - $(test) - -emit-o2: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -O2 -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -emit-llvm -S -o - $(test) - -test-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o clang.ll $(test) - opt -S --inline --mem2reg --indvars --loop-simplify --loop-instsimplify --licm --loop-unroll --simplifycfg clang.ll -o opt.ll - $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) opt.ll -o finish.ll - opt -S --adce finish.ll -o final.ll - -min-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) - opt -S -always-inline --inline --mem2reg --scev-aa --unroll-threshold=1000000 --loop-unroll --simplifycfg --mem2reg --dse --adce build/clang.ll -o build/opt.ll - opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll - $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=false build/aa.ll -o build/diospyros.ll - opt -S --adce --dse build/diospyros.ll -o build/dce.ll - $(CLANG) build/dce.ll -o build/final - build/final - run-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp @$(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) @opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll @@ -94,9 +56,6 @@ no-opt-stdout: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp @opt -S --adce --dse build/diospyros.ll -o build/dce.ll @cat build/dce.ll -run-diospyros: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt=false -mllvm -print=false $(test) -o build/diospyros.ll - run-polybench: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp $(CLANG) -I polybench-tests/utilities -I polybench-tests/linear-algebra/kernels/atax $(test) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll @@ -106,8 +65,8 @@ run-polybench: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp $(CLANG) -I utilities polybench-tests/utilities/polybench.c build/dce.ll -o build/final build/final -test: set-up-mac runt.sh runt.toml target/debug/libllvmlib.$(EXT) - runt +test: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp + turnt c-tests/*.c target/debug/libllvmlib.$(EXT): $(LIB) @cargo build diff --git a/src/dios-egraphs/Diospyros/README.md b/src/dios-egraphs/Diospyros/README.md index 3f3e537e..cf5b41ae 100644 --- a/src/dios-egraphs/Diospyros/README.md +++ b/src/dios-egraphs/Diospyros/README.md @@ -7,6 +7,8 @@ This directory contains an experimental [LLVM][] pass that optimizes programs us To get started, you will need **LLVM 11.x.x**. Using [Homebrew][] on macOS, for example, try `brew install llvm@11` to get the right version. +You will also need Rust, for the main Diospyros library, and a version of Python3, for testing using [turnt][]. + Because our Rust library relies on [the `llvm-sys` crate][llvm-sys], you will need an existing installation of `llvm-config` on your `$PATH`. To use a Homebrew-installed LLVM, for example, you may need something like this: @@ -21,47 +23,43 @@ Add a file `.cargo/config` here, in this directory, with these [contents](https: "-C", "link-arg=dynamic_lookup", ] -Then, build the pass library with: - - $ cargo build +Further, add a build directory in your current directory with the command: -## Run the Pass + $ mkdir build -To build and run the [Clang][] pass on a test file, use this Makefile command: +If you would like, you can build the pass library with: - $ make run test=llvm-tests/a.c + $ cargo build -where `llvm-tests/a.c` is the path to any test file. +Otherwise, running with any of the commands in the next section should also work. -To build and run the Clang pass, with optimization, use the Makefile command: +Finally, note that the code for the Diospyros pass, in directory, `dios-egraphs`, must be in the directory immediately above the current one you are in, for the LLVM pass to build properly. - $ make run-opt test=llvm-tests/a.c - -where `llvm-tests/a.c` is the path to any test file. +## Run the Pass -To build and see emitted LLVM IR code, , with optimization, use the Makefile command: +To build and run the [Clang][] pass, with Diospyros, use the Makefile command: -$ make run-opt test=llvm-tests/a.c + $ make run-opt test=llvm-tests/a.c -where, again, `llvm-tests/a.c` is the path to any test file. +where `llvm-tests/a.c` is the path to any test file, for insstance `c-tests/add.c`. -To emit the generated LLVM IR code, either unoptimized or optimized: +To build and run the [Clang][] pass, with Diospyros printing out the vectorization choices, use the Makefile command: - $ make emit test=llvm-tests/a.c - $ make emit-o2 test=llvm-tests/a.c + $ make print-opt test=llvm-tests/a.c +To build and run the [Clang][] pass, with no usage of Diospyros, use the Makefile command: -To build, run the [Clang][] pass, and the run the associated program `./a.out`, run: + $ make no-opt test=llvm-tests/a.c - $ make run-out test=llvm-tests/a.c +To build and see emitted LLVM IR code, run any of the above build commands for the file you are interested in, then look in the `build` directory and open the `dce.ll` file, which is the final pre-executable IR code file. To run all the tests, run: - $ make test + $ turnt c-tests/*.c -To run all tests and get output, run: +Or alternately: - $ make run-all + $ make test To set up macOS settings, run: @@ -73,19 +71,19 @@ To clean the repository of build files, run: ## Testing -Test files provided in the llvm-tests/ folder can be run with [Runt][]. To install or update Runt: +Test files provided in the `c-tests/` folder can be run with [turnt][]. To install or update Turnt, run the command: - $ cargo install runt + $ pip3 install --user turnt Then, ensure that the test files produce the right output with: - $ runt + $ turnt c-tests/*.c -You can also pass the `--diff` flag to compare your output with the `.expect` files. +You can also pass the `--diff` flag to compare your output with the `.expect` files, and use the `--save` flag to save new `.expect` files. [llvm]: https://llvm.org [clang]: https://clang.llvm.org [llvm-sys]: https://crates.io/crates/llvm-sys [homebrew]: https://brew.sh -[runt]: https://github.com/rachitnigam/runt +[turnt]: https://github.com/cucapra/turnt diff --git a/src/dios-egraphs/Diospyros/diospyros.hpp b/src/dios-egraphs/Diospyros/diospyros.hpp deleted file mode 100644 index 17286add..00000000 --- a/src/dios-egraphs/Diospyros/diospyros.hpp +++ /dev/null @@ -1,17 +0,0 @@ -#include "llvm/IR/Argument.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/User.h" -#include "llvm/Pass.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" -#include "llvm/Transforms/Scalar/LoopUnrollPass.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" - -extern "C" void optimize(LLVMModuleRef mod, LLVMContextRef context, - LLVMBuilderRef builder, LLVMValueRef const *bb, - std::size_t size); \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/documentation.md b/src/dios-egraphs/Diospyros/documentation.md new file mode 100644 index 00000000..716d68c2 --- /dev/null +++ b/src/dios-egraphs/Diospyros/documentation.md @@ -0,0 +1,9 @@ +# Documentation + +## + +## + +## + +## \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/calloc.c b/src/dios-egraphs/Diospyros/llvm-output-tests/calloc.c index d8e0f10a..b07ebe35 100644 --- a/src/dios-egraphs/Diospyros/llvm-output-tests/calloc.c +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/calloc.c @@ -1,4 +1,5 @@ #include +#include #include #define SIZE 4 diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/div.c b/src/dios-egraphs/Diospyros/llvm-output-tests/div.c new file mode 100644 index 00000000..90c19534 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/div.c @@ -0,0 +1,28 @@ +#include +#include +#include +#define SIZE 4 +#define DELTA 0.1 + +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = b_in[0] / a_in[0]; + c_out[1] = b_in[1] / a_in[1]; + c_out[2] = b_in[2] / a_in[2]; + c_out[3] = b_in[3] / a_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 6, 8}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(fabs(c_out[0] - 5.0) < DELTA); + assert(fabs(c_out[1] - 3.0) < DELTA); + assert(fabs(c_out[2] - 2.0) < DELTA); + assert(fabs(c_out[3] - 2.0) < DELTA); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/fft.c b/src/dios-egraphs/Diospyros/llvm-output-tests/fft.c new file mode 100644 index 00000000..8ab52066 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/fft.c @@ -0,0 +1,165 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 8 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / 2], + float img_twid_in[SIZE / 2], float real_out[SIZE], + float img_out[SIZE]) { + int even = 0; + int odd = 0; + int log = 0; + int rootindex = 0; + int span = SIZE >> 1; + float temp = 0; + + for (int i = 0; i < SIZE; i++) { + real_out[i] = real_in[i]; + img_out[i] = img_in[i]; + } + + while (span != 0) { + odd = span; + while (odd < SIZE) { + odd = odd | span; + even = odd ^ span; + + temp = real_out[even] + real_out[odd]; + real_out[odd] = real_out[even] - real_out[odd]; + real_out[even] = temp; + + temp = img_out[even] + img_out[odd]; + img_out[odd] = img_out[even] - img_out[odd]; + img_out[even] = temp; + + rootindex = (even << log) & (SIZE - 1); + if (rootindex > 0) { + temp = real_twid_in[rootindex] * real_out[odd] - + img_twid_in[rootindex] * img_out[odd]; + img_out[odd] = real_twid_in[rootindex] * img_out[odd] + + img_twid_in[rootindex] * real_out[odd]; + real_out[odd] = temp; + } + odd += 1; + } + span >>= 1; + log += 1; + } +} + +void no_opt_fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / 2], + float img_twid_in[SIZE / 2], float real_out[SIZE], + float img_out[SIZE]) { + int even = 0; + int odd = 0; + int log = 0; + int rootindex = 0; + int span = SIZE >> 1; + float temp = 0; + + for (int i = 0; i < SIZE; i++) { + real_out[i] = real_in[i]; + img_out[i] = img_in[i]; + } + + while (span != 0) { + odd = span; + while (odd < SIZE) { + odd = odd | span; + even = odd ^ span; + + temp = real_out[even] + real_out[odd]; + real_out[odd] = real_out[even] - real_out[odd]; + real_out[even] = temp; + + temp = img_out[even] + img_out[odd]; + img_out[odd] = img_out[even] - img_out[odd]; + img_out[even] = temp; + + rootindex = (even << log) & (SIZE - 1); + if (rootindex > 0) { + temp = real_twid_in[rootindex] * real_out[odd] - + img_twid_in[rootindex] * img_out[odd]; + img_out[odd] = real_twid_in[rootindex] * img_out[odd] + + img_twid_in[rootindex] * real_out[odd]; + real_out[odd] = temp; + } + odd += 1; + } + span >>= 1; + log += 1; + } +} + + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float real_in[SIZE] = {0.0f}; + float img_in[SIZE] = {0.0f}; + float real_twid_in[SIZE / 2] = {0.0f}; + float img_twid_in[SIZE / 2] = {0.0f}; + float real_out[SIZE] = {0.0f}; + float img_out[SIZE] = {0.0f}; + + float expected_real_in[SIZE] = {0.0f}; + float expected_img_in[SIZE] = {0.0f}; + float expected_real_twid_in[SIZE / 2] = {0.0f}; + float expected_img_twid_in[SIZE / 2] = {0.0f}; + float expected_real_out[SIZE] = {0.0f}; + float expected_img_out[SIZE] = {0.0f}; + + for (int i = 0; i < SIZE; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + real_in[i] = n; + expected_real_in[i] = n; + } + for (int i = 0; i < SIZE; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + img_in[i] = n; + expected_img_in[i] = n; + } + for (int i = 0; i < SIZE / 2; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + real_twid_in[i] = n; + expected_real_twid_in[i] = n; + } + for (int i = 0; i < SIZE / 2; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + img_twid_in[i] = n; + expected_img_twid_in[i] = n; + } + for (int i = 0; i < SIZE; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + real_out[i] = n; + expected_real_out[i] = n; + } + for (int i = 0; i < SIZE; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + img_out[i] = n; + expected_img_out[i] = n; + } + + fft(real_in, img_in, real_twid_in, img_twid_in, real_out, img_out); + no_opt_fft(expected_real_in, expected_img_in, expected_real_twid_in, expected_img_twid_in, expected_real_out, expected_img_out); + + for (int i = 0; i < SIZE; i++) { + printf("Real Out Output: %f\n", real_out[i]); + printf("Expected Real Out Output: %f\n", expected_real_out[i]); + assert(fabs(real_out[i] - expected_real_out[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Img Out Output: %f\n", img_out[i]); + printf("Expected Img Out Output: %f\n", expected_img_out[i]); + assert(fabs(img_out[i] - expected_img_out[i]) < DELTA); + } +} diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/local-array-1.c b/src/dios-egraphs/Diospyros/llvm-output-tests/local-array-1.c new file mode 100644 index 00000000..1622fb14 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/local-array-1.c @@ -0,0 +1,35 @@ +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +void test(float A[SIZE]) { + float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = x[i]; + } +} + +void no_opt_test(float A[SIZE]) { + float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = x[i]; + } +} + +int main() { + float A[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + float expectedA[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + test(A); + no_opt_test(expectedA); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("expected: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } + return 0; +} diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/local-array-2.c b/src/dios-egraphs/Diospyros/llvm-output-tests/local-array-2.c new file mode 100644 index 00000000..b7be1f82 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/local-array-2.c @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] += x[i]; + } + for (int i = 0; i < SIZE; i++) { + C[i] += A[i]; + } + for (int i = 0; i < SIZE; i++) { + B[i] -= x[i]; + } + for (int i = 0; i < SIZE; i++) { + C[i] += B[i]; + } +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] += x[i]; + } + for (int i = 0; i < SIZE; i++) { + C[i] += A[i]; + } + for (int i = 0; i < SIZE; i++) { + B[i] -= x[i]; + } + for (int i = 0; i < SIZE; i++) { + C[i] += B[i]; + } +} + +int main() { + float A[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + float B[SIZE] = {[0 ... SIZE - 1] = 2.0f}; + float C[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float expectedA[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + float expectedB[SIZE] = {[0 ... SIZE - 1] = 2.0f}; + float expectedC[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("expected: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("expected: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("C Output: %f\n", C[i]); + printf("expected: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/local-array-3.c b/src/dios-egraphs/Diospyros/llvm-output-tests/local-array-3.c new file mode 100644 index 00000000..3c334c5e --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/local-array-3.c @@ -0,0 +1,59 @@ +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +#define SIZE 10 + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = (float)i; + } + C[i] = A[i] + x[i]; + } + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = (float)i; + } + C[i] = B[i] - x[i]; + } +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = (float)i; + } + C[i] = A[i] + x[i]; + } + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = (float)i; + } + C[i] = B[i] - x[i]; + } +} + +int main() { + float A[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + float B[SIZE] = {[0 ... SIZE - 1] = 2.0f}; + float C[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float expectedC[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + test(A, B, C); + no_opt_test(A, B, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("C Output: %f\n", C[i]); + printf("expected: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/local-array-4.c b/src/dios-egraphs/Diospyros/llvm-output-tests/local-array-4.c new file mode 100644 index 00000000..27a65675 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/local-array-4.c @@ -0,0 +1,55 @@ +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +void test(float A[SIZE]) { + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int j = 0; j < SIZE; j++) { + x[j] = 1.0f; + } + float sum = 0.0f; + for (int j = 0; j < SIZE; j++) { + sum += x[j]; + } + A[i] = sum; + } +} + +void no_opt_test(float A[SIZE]) { + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int j = 0; j < SIZE; j++) { + x[j] = 1.0f; + } + float sum = 0.0f; + for (int j = 0; j < SIZE; j++) { + sum += x[j]; + } + A[i] = sum; + } +} + +int main() { + float A[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = (float)i; + } + float expectedA[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + expectedA[i] = (float)i; + } + test(A); + no_opt_test(expectedA); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("expected: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } + return 0; +} diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-fixed-size.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-fixed-size.c new file mode 100644 index 00000000..69c40b58 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-fixed-size.c @@ -0,0 +1,267 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 4 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float *a) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float *a) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float *a, float *b, float *c) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0.0f; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float *a, float *b, float *c) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0.0f; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float *A, float *Q, float *R) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float *A, float *Q, float *R) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float expectedQ[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + + naive_fixed_qr_decomp(A, Q, R); + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-local-arrays.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-local-arrays.c new file mode 100644 index 00000000..430d8af1 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-local-arrays.c @@ -0,0 +1,311 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + float I[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float e[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + float u[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float v[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + float q_min[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + float q_t[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + float res[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + printf("%f\n", A[i]); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-0.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-0.c new file mode 100644 index 00000000..f43090bb --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp-test-0.c @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +// float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + A[k] = alpha; + } +} + +void no_opt_sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + A[k] = alpha; + } +} + +int main(void) { + float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; + sample_test(A, x, e); + float expectedA[SIZE] = {0}; + float expectedx[SIZE] = {0}; + float expectede[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedx, expectede); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + printf("X Output: %f\n", x[i]); + printf("Expected X Output: %f\n", expectedx[i]); + printf("E Output: %f\n", e[i]); + printf("Expected E Output: %f\n", expectede[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + assert(fabs(expectedx[i] - x[i]) < DELTA); + assert(fabs(expectede[i] - e[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp.c b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp.c new file mode 100644 index 00000000..16d181f0 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/qr-decomp.c @@ -0,0 +1,268 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_transpose(float *a, int n) __attribute__((always_inline)); +void naive_matrix_multiply(float *a, float *b, float *c, int row1, int col1, + int col2) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +// Naive implementation +void naive_transpose(float *a, int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} + +void no_opt_naive_transpose(float *a, int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void naive_matrix_multiply(float *a, float *b, float *c, int row1, int col1, + int col2) { + for (int y = 0; y < row1; y++) { + for (int x = 0; x < col2; x++) { + c[col2 * y + x] = 0.0f; + for (int k = 0; k < col1; k++) { + c[col2 * y + x] += a[col1 * y + k] * b[col2 * k + x]; + } + } + } +} + +void no_opt_naive_matrix_multiply(float *a, float *b, float *c, int row1, int col1, + int col2) { + for (int y = 0; y < row1; y++) { + for (int x = 0; x < col2; x++) { + c[col2 * y + x] = 0.0f; + for (int k = 0; k < col1; k++) { + c[col2 * y + x] += a[col1 * y + k] * b[col2 * k + x]; + } + } + } +} + +void naive_qr_decomp(float *A, float *Q, float *R, int n) { + memcpy(R, A, sizeof(float) * n * n); + + // Build identity matrix of size n * n + float *I = (float *)calloc(sizeof(float), n * n); + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + I[i * n + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < n - 1; k++) { + int m = n - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * n + k]; + e[i] = I[row * n + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / norm_u; + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), n * n); + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * n + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * n * n); // Q = q_t + naive_matrix_multiply(q_t, A, R, n, n, n); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), n * n); + naive_matrix_multiply(q_t, Q, res, n, n, n); // R = q_t * A + memcpy(Q, res, sizeof(float) * n * n); + naive_matrix_multiply(q_t, R, res, n, n, n); // R = q_t * A + memcpy(R, res, sizeof(float) * n * n); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_transpose(Q, n); +} + +void no_opt_naive_qr_decomp(float *A, float *Q, float *R, int n) { + memcpy(R, A, sizeof(float) * n * n); + + // Build identity matrix of size n * n + float *I = (float *)calloc(sizeof(float), n * n); + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + I[i * n + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < n - 1; k++) { + int m = n - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * n + k]; + e[i] = I[row * n + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / norm_u; + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), n * n); + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * n + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * n * n); // Q = q_t + no_opt_naive_matrix_multiply(q_t, A, R, n, n, n); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), n * n); + no_opt_naive_matrix_multiply(q_t, Q, res, n, n, n); // R = q_t * A + memcpy(Q, res, sizeof(float) * n * n); + no_opt_naive_matrix_multiply(q_t, R, res, n, n, n); // R = q_t * A + memcpy(R, res, sizeof(float) * n * n); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_transpose(Q, n); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + printf("%f\n", A[i]); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + naive_qr_decomp(A, Q, R, SIZE); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_qr_decomp(A, expectedQ, expectedR, SIZE); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/sub.c b/src/dios-egraphs/Diospyros/llvm-output-tests/sub.c new file mode 100644 index 00000000..3ef3621e --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/sub.c @@ -0,0 +1,26 @@ +#include +#include +#define SIZE 4 + +void diff(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] - b_in[0]; + c_out[1] = a_in[1] - b_in[1]; + c_out[2] = a_in[2] - b_in[2]; + c_out[3] = a_in[3] - b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 7, 8}; + float b_in[SIZE] = {5, 6, 3, 4}; + float c_out[SIZE]; + diff(a_in, b_in, c_out); + assert(c_out[0] == -4); + assert(c_out[1] == -4); + assert(c_out[2] == 4); + assert(c_out[3] == 4); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/test1-local-array.c b/src/dios-egraphs/Diospyros/llvm-output-tests/test1-local-array.c new file mode 100644 index 00000000..cb9ba117 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/test1-local-array.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +// float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + int m = SIZE - k; + + // float x[SIZE]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + // float e[SIZE]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + A[k] = alpha; + } +} + +void no_opt_sample_test(float A[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + int m = SIZE - k; + + // float x[SIZE]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + // float e[SIZE]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + A[k] = alpha; + } +} + +int main(void) { + float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; + sample_test(A); + float expectedA[SIZE] = {0}; + float expectedx[SIZE] = {0}; + float expectede[SIZE] = {0}; + no_opt_sample_test(expectedA); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + printf("X Output: %f\n", x[i]); + printf("Expected X Output: %f\n", expectedx[i]); + printf("E Output: %f\n", e[i]); + printf("Expected E Output: %f\n", expectede[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + assert(fabs(expectedx[i] - x[i]) < DELTA); + assert(fabs(expectede[i] - e[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/test2-local-array.c b/src/dios-egraphs/Diospyros/llvm-output-tests/test2-local-array.c new file mode 100644 index 00000000..d39765f3 --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/test2-local-array.c @@ -0,0 +1,276 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 3 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + float I[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float e[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + float u[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float v[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + float q_min[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + float q_t[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + printf("%f\n", A[i]); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-output-tests/test3-local-array.c b/src/dios-egraphs/Diospyros/llvm-output-tests/test3-local-array.c new file mode 100644 index 00000000..7495df9d --- /dev/null +++ b/src/dios-egraphs/Diospyros/llvm-output-tests/test3-local-array.c @@ -0,0 +1,314 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 3 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + float I[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float e[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + float u[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float v[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + float q_min[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + float q_t[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + // else { + // // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE + // * + // // SIZE); + // float res[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + // for (int i = 0; i < SIZE * SIZE; i++) { + // res[i] = 0.0f; + // } + // naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + // for (int i = 0; i < SIZE * SIZE; i++) { + // Q[i] = res[i]; + // } + // naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + // for (int i = 0; i < SIZE * SIZE; i++) { + // R[i] = res[i]; + // } + // } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + // else { + // float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + // no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // memcpy(Q, res, sizeof(float) * SIZE * SIZE); + // no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // memcpy(R, res, sizeof(float) * SIZE * SIZE); + // } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + printf("%f\n", A[i]); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/README.md b/src/dios-egraphs/Diospyros/llvm-tests/README.md deleted file mode 100644 index d0499362..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/README.md +++ /dev/null @@ -1,2 +0,0 @@ -LLVM Tests contains hand crafted LLVM tests, that test transformation of llvm ir, -using the diospyros pass. \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/load-prior-chunk.ll b/src/dios-egraphs/Diospyros/llvm-tests/load-prior-chunk.ll deleted file mode 100644 index d8888bf8..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/load-prior-chunk.ll +++ /dev/null @@ -1,67 +0,0 @@ -; ModuleID = 'build/opt.ll' -source_filename = "fail-tests/local-array-4.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@.str = private unnamed_addr constant [14 x i8] c"A Output: %f\0A\00", align 1 -@.memset_pattern = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 - -; Function Attrs: noinline nounwind ssp uwtable -define void @test(float* %0) #0 { -.preheader: - %a = alloca i32, align 4 - %b = alloca i32, align 4 - %c = store i32 0, i32* %a - %1 = alloca i64, align 8 - %tmpcast = bitcast i64* %1 to [2 x float]* - %2 = bitcast i64* %1 to i8* - %3 = bitcast i64* %1 to float* - store i64 0, i64* %1, align 8 - call void @memset_pattern16(i8* nonnull %2, i8* bitcast ([4 x float]* @.memset_pattern to i8*), i64 8) #4 - %4 = load float, float* %3, align 8 - %5 = fadd float %4, 0.000000e+00 - %6 = getelementptr inbounds [2 x float], [2 x float]* %tmpcast, i64 0, i64 1 - %7 = load float, float* %6, align 4 - %8 = fadd float %5, %7 - store float %8, float* %0, align 4 - ret void -} - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #1 - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #0 { - %1 = alloca i64, align 8 - %tmpcast = bitcast i64* %1 to [2 x float]* - %2 = bitcast i64* %1 to float* - store float 0.000000e+00, float* %2, align 8 - %3 = getelementptr inbounds [2 x float], [2 x float]* %tmpcast, i64 0, i64 1 - store float 1.000000e+00, float* %3, align 4 - call void @test(float* nonnull %2) - %4 = load float, float* %2, align 8 - %5 = fpext float %4 to double - %6 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %5) #4 - %7 = load float, float* %3, align 4 - %8 = fpext float %7 to double - %9 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %8) #4 - ret i32 0 -} - -declare i32 @printf(i8*, ...) #2 - -; Function Attrs: argmemonly nofree -declare void @memset_pattern16(i8* nocapture, i8* nocapture readonly, i64) #3 - -attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn writeonly } -attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { argmemonly nofree } -attributes #4 = { nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/llvm-tests/repeat-alloc.ll b/src/dios-egraphs/Diospyros/llvm-tests/repeat-alloc.ll deleted file mode 100644 index 390f7a24..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/repeat-alloc.ll +++ /dev/null @@ -1,64 +0,0 @@ -; ModuleID = 'build/opt.ll' -source_filename = "fail-tests/local-array-4.c" -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.14.0" - -@.str = private unnamed_addr constant [14 x i8] c"A Output: %f\0A\00", align 1 -@.memset_pattern = private unnamed_addr constant [4 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00], align 16 - -; Function Attrs: noinline nounwind ssp uwtable -define void @test(float* %0) #0 { -.preheader: - %1 = alloca i64, align 8 - %tmpcast = bitcast i64* %1 to [2 x float]* - %2 = bitcast i64* %1 to i8* - %3 = bitcast i64* %1 to float* - store i64 0, i64* %1, align 8 - call void @memset_pattern16(i8* nonnull %2, i8* bitcast ([4 x float]* @.memset_pattern to i8*), i64 8) #4 - %4 = load float, float* %3, align 8 - %5 = fadd float %4, 0.000000e+00 - %6 = getelementptr inbounds [2 x float], [2 x float]* %tmpcast, i64 0, i64 1 - %7 = load float, float* %6, align 4 - %8 = fadd float %5, %7 - store float %8, float* %0, align 4 - ret void -} - -; Function Attrs: argmemonly nounwind willreturn writeonly -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #1 - -; Function Attrs: noinline nounwind ssp uwtable -define i32 @main() #0 { - %1 = alloca i64, align 8 - %tmpcast = bitcast i64* %1 to [2 x float]* - %2 = bitcast i64* %1 to float* - store float 0.000000e+00, float* %2, align 8 - %3 = getelementptr inbounds [2 x float], [2 x float]* %tmpcast, i64 0, i64 1 - store float 1.000000e+00, float* %3, align 4 - call void @test(float* nonnull %2) - %4 = load float, float* %2, align 8 - %5 = fpext float %4 to double - %6 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %5) #4 - %7 = load float, float* %3, align 4 - %8 = fpext float %7 to double - %9 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i64 0, i64 0), double %8) #4 - ret i32 0 -} - -declare i32 @printf(i8*, ...) #2 - -; Function Attrs: argmemonly nofree -declare void @memset_pattern16(i8* nocapture, i8* nocapture readonly, i64) #3 - -attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn writeonly } -attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { argmemonly nofree } -attributes #4 = { nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{!"clang version 11.0.1"} diff --git a/src/dios-egraphs/Diospyros/llvm-tests/repeat-load.ll b/src/dios-egraphs/Diospyros/llvm-tests/repeat-load.ll deleted file mode 100644 index 2c63e1eb..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/repeat-load.ll +++ /dev/null @@ -1 +0,0 @@ -; repeats loads that cannot be reused \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/turnt.toml b/src/dios-egraphs/Diospyros/llvm-tests/turnt.toml deleted file mode 100644 index 10455d53..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/turnt.toml +++ /dev/null @@ -1 +0,0 @@ -command = "bash ../test-runner.sh llvm-tests/{filename}" diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/calloc.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/calloc.c index d8e0f10a..b07ebe35 100644 --- a/src/dios-egraphs/Diospyros/no-egg-c-tests/calloc.c +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/calloc.c @@ -1,4 +1,5 @@ #include +#include #include #define SIZE 4 diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/div.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/div.c new file mode 100644 index 00000000..90c19534 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/div.c @@ -0,0 +1,28 @@ +#include +#include +#include +#define SIZE 4 +#define DELTA 0.1 + +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = b_in[0] / a_in[0]; + c_out[1] = b_in[1] / a_in[1]; + c_out[2] = b_in[2] / a_in[2]; + c_out[3] = b_in[3] / a_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 6, 8}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(fabs(c_out[0] - 5.0) < DELTA); + assert(fabs(c_out[1] - 3.0) < DELTA); + assert(fabs(c_out[2] - 2.0) < DELTA); + assert(fabs(c_out[3] - 2.0) < DELTA); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/fft.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/fft.c new file mode 100644 index 00000000..8ab52066 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/fft.c @@ -0,0 +1,165 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 8 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / 2], + float img_twid_in[SIZE / 2], float real_out[SIZE], + float img_out[SIZE]) { + int even = 0; + int odd = 0; + int log = 0; + int rootindex = 0; + int span = SIZE >> 1; + float temp = 0; + + for (int i = 0; i < SIZE; i++) { + real_out[i] = real_in[i]; + img_out[i] = img_in[i]; + } + + while (span != 0) { + odd = span; + while (odd < SIZE) { + odd = odd | span; + even = odd ^ span; + + temp = real_out[even] + real_out[odd]; + real_out[odd] = real_out[even] - real_out[odd]; + real_out[even] = temp; + + temp = img_out[even] + img_out[odd]; + img_out[odd] = img_out[even] - img_out[odd]; + img_out[even] = temp; + + rootindex = (even << log) & (SIZE - 1); + if (rootindex > 0) { + temp = real_twid_in[rootindex] * real_out[odd] - + img_twid_in[rootindex] * img_out[odd]; + img_out[odd] = real_twid_in[rootindex] * img_out[odd] + + img_twid_in[rootindex] * real_out[odd]; + real_out[odd] = temp; + } + odd += 1; + } + span >>= 1; + log += 1; + } +} + +void no_opt_fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / 2], + float img_twid_in[SIZE / 2], float real_out[SIZE], + float img_out[SIZE]) { + int even = 0; + int odd = 0; + int log = 0; + int rootindex = 0; + int span = SIZE >> 1; + float temp = 0; + + for (int i = 0; i < SIZE; i++) { + real_out[i] = real_in[i]; + img_out[i] = img_in[i]; + } + + while (span != 0) { + odd = span; + while (odd < SIZE) { + odd = odd | span; + even = odd ^ span; + + temp = real_out[even] + real_out[odd]; + real_out[odd] = real_out[even] - real_out[odd]; + real_out[even] = temp; + + temp = img_out[even] + img_out[odd]; + img_out[odd] = img_out[even] - img_out[odd]; + img_out[even] = temp; + + rootindex = (even << log) & (SIZE - 1); + if (rootindex > 0) { + temp = real_twid_in[rootindex] * real_out[odd] - + img_twid_in[rootindex] * img_out[odd]; + img_out[odd] = real_twid_in[rootindex] * img_out[odd] + + img_twid_in[rootindex] * real_out[odd]; + real_out[odd] = temp; + } + odd += 1; + } + span >>= 1; + log += 1; + } +} + + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float real_in[SIZE] = {0.0f}; + float img_in[SIZE] = {0.0f}; + float real_twid_in[SIZE / 2] = {0.0f}; + float img_twid_in[SIZE / 2] = {0.0f}; + float real_out[SIZE] = {0.0f}; + float img_out[SIZE] = {0.0f}; + + float expected_real_in[SIZE] = {0.0f}; + float expected_img_in[SIZE] = {0.0f}; + float expected_real_twid_in[SIZE / 2] = {0.0f}; + float expected_img_twid_in[SIZE / 2] = {0.0f}; + float expected_real_out[SIZE] = {0.0f}; + float expected_img_out[SIZE] = {0.0f}; + + for (int i = 0; i < SIZE; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + real_in[i] = n; + expected_real_in[i] = n; + } + for (int i = 0; i < SIZE; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + img_in[i] = n; + expected_img_in[i] = n; + } + for (int i = 0; i < SIZE / 2; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + real_twid_in[i] = n; + expected_real_twid_in[i] = n; + } + for (int i = 0; i < SIZE / 2; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + img_twid_in[i] = n; + expected_img_twid_in[i] = n; + } + for (int i = 0; i < SIZE; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + real_out[i] = n; + expected_real_out[i] = n; + } + for (int i = 0; i < SIZE; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + img_out[i] = n; + expected_img_out[i] = n; + } + + fft(real_in, img_in, real_twid_in, img_twid_in, real_out, img_out); + no_opt_fft(expected_real_in, expected_img_in, expected_real_twid_in, expected_img_twid_in, expected_real_out, expected_img_out); + + for (int i = 0; i < SIZE; i++) { + printf("Real Out Output: %f\n", real_out[i]); + printf("Expected Real Out Output: %f\n", expected_real_out[i]); + assert(fabs(real_out[i] - expected_real_out[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Img Out Output: %f\n", img_out[i]); + printf("Expected Img Out Output: %f\n", expected_img_out[i]); + assert(fabs(img_out[i] - expected_img_out[i]) < DELTA); + } +} diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/local-array-1.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/local-array-1.c new file mode 100644 index 00000000..1622fb14 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/local-array-1.c @@ -0,0 +1,35 @@ +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +void test(float A[SIZE]) { + float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = x[i]; + } +} + +void no_opt_test(float A[SIZE]) { + float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = x[i]; + } +} + +int main() { + float A[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + float expectedA[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + test(A); + no_opt_test(expectedA); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("expected: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } + return 0; +} diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/local-array-2.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/local-array-2.c new file mode 100644 index 00000000..b7be1f82 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/local-array-2.c @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] += x[i]; + } + for (int i = 0; i < SIZE; i++) { + C[i] += A[i]; + } + for (int i = 0; i < SIZE; i++) { + B[i] -= x[i]; + } + for (int i = 0; i < SIZE; i++) { + C[i] += B[i]; + } +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] += x[i]; + } + for (int i = 0; i < SIZE; i++) { + C[i] += A[i]; + } + for (int i = 0; i < SIZE; i++) { + B[i] -= x[i]; + } + for (int i = 0; i < SIZE; i++) { + C[i] += B[i]; + } +} + +int main() { + float A[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + float B[SIZE] = {[0 ... SIZE - 1] = 2.0f}; + float C[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float expectedA[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + float expectedB[SIZE] = {[0 ... SIZE - 1] = 2.0f}; + float expectedC[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("expected: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("expected: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("C Output: %f\n", C[i]); + printf("expected: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/local-array-3.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/local-array-3.c new file mode 100644 index 00000000..3c334c5e --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/local-array-3.c @@ -0,0 +1,59 @@ +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +#define SIZE 10 + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = (float)i; + } + C[i] = A[i] + x[i]; + } + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = (float)i; + } + C[i] = B[i] - x[i]; + } +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = (float)i; + } + C[i] = A[i] + x[i]; + } + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = (float)i; + } + C[i] = B[i] - x[i]; + } +} + +int main() { + float A[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + float B[SIZE] = {[0 ... SIZE - 1] = 2.0f}; + float C[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float expectedC[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + test(A, B, C); + no_opt_test(A, B, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("C Output: %f\n", C[i]); + printf("expected: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/local-array-4.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/local-array-4.c new file mode 100644 index 00000000..27a65675 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/local-array-4.c @@ -0,0 +1,55 @@ +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +void test(float A[SIZE]) { + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int j = 0; j < SIZE; j++) { + x[j] = 1.0f; + } + float sum = 0.0f; + for (int j = 0; j < SIZE; j++) { + sum += x[j]; + } + A[i] = sum; + } +} + +void no_opt_test(float A[SIZE]) { + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int j = 0; j < SIZE; j++) { + x[j] = 1.0f; + } + float sum = 0.0f; + for (int j = 0; j < SIZE; j++) { + sum += x[j]; + } + A[i] = sum; + } +} + +int main() { + float A[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = (float)i; + } + float expectedA[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + expectedA[i] = (float)i; + } + test(A); + no_opt_test(expectedA); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("expected: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } + return 0; +} diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-fixed-size.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-fixed-size.c new file mode 100644 index 00000000..69c40b58 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-fixed-size.c @@ -0,0 +1,267 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 4 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float *a) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float *a) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float *a, float *b, float *c) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0.0f; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float *a, float *b, float *c) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0.0f; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float *A, float *Q, float *R) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float *A, float *Q, float *R) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float expectedQ[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + + naive_fixed_qr_decomp(A, Q, R); + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-local-arrays.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-local-arrays.c new file mode 100644 index 00000000..430d8af1 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-local-arrays.c @@ -0,0 +1,311 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + float I[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float e[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + float u[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float v[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + float q_min[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + float q_t[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + float res[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + printf("%f\n", A[i]); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-0.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-0.c new file mode 100644 index 00000000..f43090bb --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp-test-0.c @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +// float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + A[k] = alpha; + } +} + +void no_opt_sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + A[k] = alpha; + } +} + +int main(void) { + float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; + sample_test(A, x, e); + float expectedA[SIZE] = {0}; + float expectedx[SIZE] = {0}; + float expectede[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedx, expectede); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + printf("X Output: %f\n", x[i]); + printf("Expected X Output: %f\n", expectedx[i]); + printf("E Output: %f\n", e[i]); + printf("Expected E Output: %f\n", expectede[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + assert(fabs(expectedx[i] - x[i]) < DELTA); + assert(fabs(expectede[i] - e[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp.c new file mode 100644 index 00000000..16d181f0 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/qr-decomp.c @@ -0,0 +1,268 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_transpose(float *a, int n) __attribute__((always_inline)); +void naive_matrix_multiply(float *a, float *b, float *c, int row1, int col1, + int col2) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +// Naive implementation +void naive_transpose(float *a, int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} + +void no_opt_naive_transpose(float *a, int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void naive_matrix_multiply(float *a, float *b, float *c, int row1, int col1, + int col2) { + for (int y = 0; y < row1; y++) { + for (int x = 0; x < col2; x++) { + c[col2 * y + x] = 0.0f; + for (int k = 0; k < col1; k++) { + c[col2 * y + x] += a[col1 * y + k] * b[col2 * k + x]; + } + } + } +} + +void no_opt_naive_matrix_multiply(float *a, float *b, float *c, int row1, int col1, + int col2) { + for (int y = 0; y < row1; y++) { + for (int x = 0; x < col2; x++) { + c[col2 * y + x] = 0.0f; + for (int k = 0; k < col1; k++) { + c[col2 * y + x] += a[col1 * y + k] * b[col2 * k + x]; + } + } + } +} + +void naive_qr_decomp(float *A, float *Q, float *R, int n) { + memcpy(R, A, sizeof(float) * n * n); + + // Build identity matrix of size n * n + float *I = (float *)calloc(sizeof(float), n * n); + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + I[i * n + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < n - 1; k++) { + int m = n - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * n + k]; + e[i] = I[row * n + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / norm_u; + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), n * n); + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * n + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * n * n); // Q = q_t + naive_matrix_multiply(q_t, A, R, n, n, n); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), n * n); + naive_matrix_multiply(q_t, Q, res, n, n, n); // R = q_t * A + memcpy(Q, res, sizeof(float) * n * n); + naive_matrix_multiply(q_t, R, res, n, n, n); // R = q_t * A + memcpy(R, res, sizeof(float) * n * n); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_transpose(Q, n); +} + +void no_opt_naive_qr_decomp(float *A, float *Q, float *R, int n) { + memcpy(R, A, sizeof(float) * n * n); + + // Build identity matrix of size n * n + float *I = (float *)calloc(sizeof(float), n * n); + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + I[i * n + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < n - 1; k++) { + int m = n - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * n + k]; + e[i] = I[row * n + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / norm_u; + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), n * n); + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * n + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * n * n); // Q = q_t + no_opt_naive_matrix_multiply(q_t, A, R, n, n, n); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), n * n); + no_opt_naive_matrix_multiply(q_t, Q, res, n, n, n); // R = q_t * A + memcpy(Q, res, sizeof(float) * n * n); + no_opt_naive_matrix_multiply(q_t, R, res, n, n, n); // R = q_t * A + memcpy(R, res, sizeof(float) * n * n); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_transpose(Q, n); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + printf("%f\n", A[i]); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + naive_qr_decomp(A, Q, R, SIZE); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_qr_decomp(A, expectedQ, expectedR, SIZE); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/sub.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/sub.c new file mode 100644 index 00000000..3ef3621e --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/sub.c @@ -0,0 +1,26 @@ +#include +#include +#define SIZE 4 + +void diff(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] - b_in[0]; + c_out[1] = a_in[1] - b_in[1]; + c_out[2] = a_in[2] - b_in[2]; + c_out[3] = a_in[3] - b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 7, 8}; + float b_in[SIZE] = {5, 6, 3, 4}; + float c_out[SIZE]; + diff(a_in, b_in, c_out); + assert(c_out[0] == -4); + assert(c_out[1] == -4); + assert(c_out[2] == 4); + assert(c_out[3] == 4); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/test1-local-array.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/test1-local-array.c new file mode 100644 index 00000000..cb9ba117 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/test1-local-array.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +// float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + int m = SIZE - k; + + // float x[SIZE]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + // float e[SIZE]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + A[k] = alpha; + } +} + +void no_opt_sample_test(float A[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + int m = SIZE - k; + + // float x[SIZE]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + // float e[SIZE]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + A[k] = alpha; + } +} + +int main(void) { + float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; + sample_test(A); + float expectedA[SIZE] = {0}; + float expectedx[SIZE] = {0}; + float expectede[SIZE] = {0}; + no_opt_sample_test(expectedA); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + printf("X Output: %f\n", x[i]); + printf("Expected X Output: %f\n", expectedx[i]); + printf("E Output: %f\n", e[i]); + printf("Expected E Output: %f\n", expectede[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + assert(fabs(expectedx[i] - x[i]) < DELTA); + assert(fabs(expectede[i] - e[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/test2-local-array.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/test2-local-array.c new file mode 100644 index 00000000..d39765f3 --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/test2-local-array.c @@ -0,0 +1,276 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 3 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + float I[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float e[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + float u[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float v[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + float q_min[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + float q_t[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + printf("%f\n", A[i]); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/no-egg-c-tests/test3-local-array.c b/src/dios-egraphs/Diospyros/no-egg-c-tests/test3-local-array.c new file mode 100644 index 00000000..7495df9d --- /dev/null +++ b/src/dios-egraphs/Diospyros/no-egg-c-tests/test3-local-array.c @@ -0,0 +1,314 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 3 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + float I[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float e[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + float u[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float v[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + float q_min[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + float q_t[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + // else { + // // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE + // * + // // SIZE); + // float res[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + // for (int i = 0; i < SIZE * SIZE; i++) { + // res[i] = 0.0f; + // } + // naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + // for (int i = 0; i < SIZE * SIZE; i++) { + // Q[i] = res[i]; + // } + // naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + // for (int i = 0; i < SIZE * SIZE; i++) { + // R[i] = res[i]; + // } + // } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + // else { + // float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + // no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // memcpy(Q, res, sizeof(float) * SIZE * SIZE); + // no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // memcpy(R, res, sizeof(float) * SIZE * SIZE); + // } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + printf("%f\n", A[i]); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/reduce.sh b/src/dios-egraphs/Diospyros/reduce.sh deleted file mode 100755 index bb961006..00000000 --- a/src/dios-egraphs/Diospyros/reduce.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -/usr/local/opt/llvm/bin/clang -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll inline-float.c -opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll -opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll -/usr/local/opt/llvm/bin/clang -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.dylib -mllvm -opt -mllvm -print=false build/aa.ll -o build/diospyros.ll &> err1.txt -opt -S --adce --dse build/diospyros.ll -o build/dce.ll 2> err2.txt \ -&& $(CLANG) build/dce.ll -o build/final \ -&& build/final -output1=$(grep -c 'Error' err1.txt) -output2=$(grep -c 'Error' err2.txt) -echo $output1 || $output2 \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/report.md b/src/dios-egraphs/Diospyros/report.md deleted file mode 100644 index 73ca9a34..00000000 --- a/src/dios-egraphs/Diospyros/report.md +++ /dev/null @@ -1,9 +0,0 @@ -# Spring 2022 Report - -## - -## - -## - -## \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/run_all.sh b/src/dios-egraphs/Diospyros/run_all.sh deleted file mode 100644 index 241b3ed5..00000000 --- a/src/dios-egraphs/Diospyros/run_all.sh +++ /dev/null @@ -1,4 +0,0 @@ -for file in llvm-tests/*.c -do - make run test="$file" -done \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/runt.sh b/src/dios-egraphs/Diospyros/runt.sh deleted file mode 100644 index c797722d..00000000 --- a/src/dios-egraphs/Diospyros/runt.sh +++ /dev/null @@ -1,15 +0,0 @@ -#! /bin/sh -args=("$@") -FILE=target/debug/libllvmlib.so - -if ! [ -f $FILE ]; then - FILE=target/debug/libllvmlib.dylib -fi - -if [[ "$OSTYPE" == "darwin"* ]]; then - CLANG=/usr/local/opt/llvm/bin/clang -else - CLANG=clang -fi - -$CLANG -Xclang -load -Xclang $FILE -emit-llvm -S -o - ${args[0]} | awk '/define/{flag=1; next} /}/{flag=0} flag' \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/runt.toml b/src/dios-egraphs/Diospyros/runt.toml deleted file mode 100644 index 19995b09..00000000 --- a/src/dios-egraphs/Diospyros/runt.toml +++ /dev/null @@ -1,6 +0,0 @@ -ver = "0.3.1" - -[[tests]] -name = "llvm-diospyros tests" -paths = [ "llvm-tests/*.c" ] -cmd = "bash runt.sh {}" \ No newline at end of file From 8987f99bc62e48e44e6a1745a723f4c8e8425b94 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 27 May 2022 03:55:29 -0400 Subject: [PATCH 105/143] move store documentation added --- src/dios-egraphs/Diospyros/documentation.md | 32 ++++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/src/dios-egraphs/Diospyros/documentation.md b/src/dios-egraphs/Diospyros/documentation.md index 716d68c2..69a6189f 100644 --- a/src/dios-egraphs/Diospyros/documentation.md +++ b/src/dios-egraphs/Diospyros/documentation.md @@ -1,9 +1,33 @@ # Documentation -## +This is the documentation for work on Diospyros for LLVM, up until the end of the Spring 2022 semester. Below, documentation and design and decisions are split by file name. -## +## Diospyros.cpp -## +## LoadStoreMovement.cpp -## \ No newline at end of file +Load Store Movement moves loads forward towards the beginning of a basic block, and stores backwards, towards the end of a basic block. Load store movement depends heavily on alias analysis. As a result, alias analysis is required to be run **before** the load store movement pass, as the load store movement pass repeatedly queries the alias analysis. Load store movement only occurs to functions that are not named `main` nor have the prefix `no_opt_` attached to their name. + +Generally, the algorithm works as follows: a load or a store is chosen, and call this instruction `I`. Under certain conditions, `I` may be swapped with its neighbor. If conditions are correct, then the swap occurs. Swapping continues until no more swaps are possible. This occurs to all load and store instructions in the basic block. + +As a technical matter relating to LLVM, and as a note to myself for future implementation issues, replacing all uses in LLVM does not actually fix back pointers for PHI Nodes. Instead, `replaceAllUsesWith()` is the preferred approach, and does not cause crashes when the LLVM Pass is run. + +As a second note, insertion of the cloned instruction must occur before any changes to the cloned instruction are effected. + +As a third note, when doing alias analysis, one must make sure that instructions that are pointed typed are being compared. To make sure of this, I use the `mayReadOrWriteMemory()` function as a guard. I then use the `isNoAlias()` function to help calculate aliasing. + +As a fourth note, several instructions are not handled as optimally as possible. In particular, there may be calls to intrinsics like `@llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(40) %2, i8 0, i64 40, i1 false)` or `@memset_pattern16(i8* nonnull %2, i8* bitcast([4 x float]* @.memset_pattern to i8*), i64 40) #6`. However, I do not actually check pointer aliasing with these instructions. Call Instructions are treated as black boxes, which are assumed to always cause aliasing issues. This means that any memory intrinsics are always conservatively assumed to alias with a load or a store instruction, and no swap will occur. I intend to fix this, by iterating over call arguments to check whether each argument is a pointer or not, and the check aliasing with the store or load instruction. This will be more accurate and fined grained, and eliminate instances where a swap is not allowed to occur, when in fact the swap does not affect the semantics of the program. + +Finally, as a fifth note, one must insert cloned PHI Nodes **before** any prior PHI nodes in the basic block. Likewise, one must insert any cloned terminator instructions **after** any existing terminator instructions in the basic block (which always exists in LLVM). This means that the builder must shift locations constantly. As part of the implementation, I place the builder at the end of the basic block, move it before the first instruction when inserting cloned PHI Nodes, and then move it to the end of the basic block again, when inserting the remainder of the instructions, including terminator instructions. + +### Store Movement + +A store can be moved towards the end of a basic block, if the next instruction proceeding the store exists, is not a terminator, is not a call instruction of any sort, and is not an instruction using a pointer `p'`, which may alias with pointer `p` in the store instruction. If all of the prior conditions are met, the store is swapped with the proceeding instruction. The process continues iteratively, until there are no more possible swaps possible. + +### Load Movement + +A load can be moved towards the end of a basic block, if the prior instruction proceeding the store exists, is not a PHI Node, is not a call instruction of any sort, does not define an LLVM register that is used by the Load as an argument, and is not an instruction using a pointer `p'`, which may alias with pointer `p` in the load instruction. If all of the prior conditions are met, the load is swapped with the proceeding instruction. The process continues iteratively, until there are no more possible swaps possible. + +Note the extra condition that regarding the Load argument. This is critical, because the load may use a prior defined value, and we cannot move the load before when the value was defined. + +## lib.rs From bf5a41c8c8271a21f015d77cc11b74e8b13487dd Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 27 May 2022 04:05:44 -0400 Subject: [PATCH 106/143] add diospyros.cpp --- src/dios-egraphs/Diospyros/documentation.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/dios-egraphs/Diospyros/documentation.md b/src/dios-egraphs/Diospyros/documentation.md index 69a6189f..b74c5505 100644 --- a/src/dios-egraphs/Diospyros/documentation.md +++ b/src/dios-egraphs/Diospyros/documentation.md @@ -4,6 +4,10 @@ This is the documentation for work on Diospyros for LLVM, up until the end of th ## Diospyros.cpp +Diospyros.cpp is the starting point for the vectorization process. This pass is run on functions in the basic block, and only on functions that are not named `main` nor have the prefix `no_opt_` attached to their name. In addition, there are a multitude of `isa`-style functions at in this file, which are used on the Rust side to check instruction type. These `isa` functions are used because the Rust LLVM-Core library `isa` functions do not return booleans, instead returning `LLVMValueRefs`, which one cannot branch on. + +The heart of the Diospyros.cpp finds __runs__ of vectorizable instructions, which are then sent to the Diospyros rewriter. Vectorible instructions are instructions that are containing `FAdd`, `FSub`, `FMul`, `FDiv` or `FNeg` instruction types. Runs of vectorizable instructions are consecutive vectorizable instructions that occur before a `StoreInst` is detected in the basic block, or before a `LoadInst` is detected in the basic block. The first condition, to be before a `StoreInst`, is because the store may use the result of the vectorized computation. The second condition, to be before a `LoadInst`, is because the `load` may alias with a `store`, causing a read-write conflict. After a run is found, it is sent via the `optimize` function to be optimized by the Rust-side of the pass. + ## LoadStoreMovement.cpp Load Store Movement moves loads forward towards the beginning of a basic block, and stores backwards, towards the end of a basic block. Load store movement depends heavily on alias analysis. As a result, alias analysis is required to be run **before** the load store movement pass, as the load store movement pass repeatedly queries the alias analysis. Load store movement only occurs to functions that are not named `main` nor have the prefix `no_opt_` attached to their name. From 3c4c8a8e39c14d7bc9a7372a25a6773ee2de5143 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 27 May 2022 04:23:45 -0400 Subject: [PATCH 107/143] add llvm2egg docs --- src/dios-egraphs/Diospyros/documentation.md | 40 +++++++++++++++ src/dios-egraphs/Diospyros/src/lib.rs | 55 ++++++++++++++++----- 2 files changed, 83 insertions(+), 12 deletions(-) diff --git a/src/dios-egraphs/Diospyros/documentation.md b/src/dios-egraphs/Diospyros/documentation.md index b74c5505..86963747 100644 --- a/src/dios-egraphs/Diospyros/documentation.md +++ b/src/dios-egraphs/Diospyros/documentation.md @@ -35,3 +35,43 @@ A load can be moved towards the end of a basic block, if the prior instruction p Note the extra condition that regarding the Load argument. This is critical, because the load may use a prior defined value, and we cannot move the load before when the value was defined. ## lib.rs + +The Diospyros rewriting engine is applied in lib.rs. In particular, this file contains a translation from LLVM to `Egg` VecLang instructions, a translation from `Egg` VecLanf Instructions back to LLVM instructions. One can specify whether to print the `Egg` rewriting output, and whether to run the `Egg` rewriter at all, via a series of flags, passed in from Diospyros.cpp. + +### New VecLang + +The new VecLang now has a register construct, representing a black box register computation. A register represents a computed LLVM Value. It may be used if an LLVM Register is used across multiple runs or basic blocks. The new argument construct is similar to a register construct, and represents an LLVM argument to a function. + +### LLVM To Egg + +Runs are translated from a sequence of LLVM instructions to a graph of Egg nodes. LLVM instructions are recursively translated backwards to Egg nodes. Starting in reverse in ths sequence of LLVM instructions, any translatable instruction (Add, Sub, Mul, Div, Neg, Sqrt), is chosen, and translated backwards from. If an instruction has been translated already, it is not retranslated to an Egg Node. + +Each LLVM instruction is translated to an appropriate Egg Node: + +- Restricted Instructions (instructions that are used in multiple runs/basic blocks): Translated to a Register Node +- Instruction not in the current run: Translated to a register node, because it must have existed already in the basic block +- Binary Operations: Translated to a binary operator node of the correct binary operator, and then each LLVM operand is recursively translated. +- Unary Operations: Similar to Binary Operations +- Sqrt: Translated to a sqrt node, and the operand is recursively translated +- Constants: Translated to a number node +- Arguments: Translated to argument nodes, which are conceptually similar to a register node because they act as black boxes. + +Finally, Egg Nodes are padded to be a multiple of the vector Lane Width, which is usually 4, and the binary operation nodes are added on. + +Useful metadata: + +- llvm2reg: This TreeMap maps an llvm instruction to a register. +- llvm2arg: This treemap maps an llvm argument to a register. +- start_instructions: This is a vector of instructions where llvm2egg translation began +- start_ids: This is the vector of ids corresponding to the start instructions. +- prior_translated_instructions: These are all instructions that had been translated already in the current pass. +- instructions_in_chunk: All instructions in chunk/run (they are synonyms) +- restricted_instructions: All instructions which are not be translated and are to be represented as a register node. + +All metadata from this pass lies in a struct that is passed to all recursive calls in `llvm2egg`. + +### Egg to LLVM + +The graph of Egg nodes is translated back to LLVM instructions, and the LLVM instructions are built and inserted in place. + + diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 762b2c96..2f022d0c 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -511,9 +511,14 @@ unsafe fn llvm_to_egg( translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { // Mark instruction as translated, as it will be after it goes through the code below - if !translation_metadata.prior_translated_instructions.contains(&llvm_instr) { - translation_metadata.prior_translated_instructions.insert(llvm_instr); - } + if !translation_metadata + .prior_translated_instructions + .contains(&llvm_instr) + { + translation_metadata + .prior_translated_instructions + .insert(llvm_instr); + } // If, on a different pass, the instruction was translated already, then // just used the egg node representing the translation if translation_metadata.llvm2reg.contains_key(&llvm_instr) { @@ -632,7 +637,7 @@ unsafe fn llvm_to_egg_main( // Invariant: chunk instructions are not empty in size assert!(!instructions_in_chunk.is_empty()); - let mut prior_translated_instructions: BTreeSet = BTreeSet::new(); + let prior_translated_instructions: BTreeSet = BTreeSet::new(); // State Variable To Hold Maps During Translation let mut translation_metadata = LLVM2EggState { @@ -651,7 +656,11 @@ unsafe fn llvm_to_egg_main( // for each final instruction, iterate backwards from that final instruction and translate to egg for llvm_instr in llvm_instrs_in_chunk.iter().rev() { // only start translation back if it is a "translatable instruction" and it was not translated already - if can_start_translation_instr(*llvm_instr) && !translation_metadata.prior_translated_instructions.contains(&llvm_instr) { + if can_start_translation_instr(*llvm_instr) + && !translation_metadata + .prior_translated_instructions + .contains(&llvm_instr) + { let (new_egg_nodes, new_next_node_idx) = start_translating_llvm_to_egg( *llvm_instr, egg_nodes, @@ -696,7 +705,10 @@ struct Egg2LLVMState<'a> { module: LLVMModuleRef, } -unsafe fn arg_to_llvm(egg_node: &VecLang, translation_metadata: &mut Egg2LLVMState) -> LLVMValueRef { +unsafe fn arg_to_llvm( + egg_node: &VecLang, + translation_metadata: &mut Egg2LLVMState, +) -> LLVMValueRef { // TODO: Make More Efficient with BTREEMAP? let llvm2arg = &translation_metadata.llvm2egg_metadata.llvm2arg; for (llvm_instr, arg_node) in llvm2arg.iter() { @@ -712,7 +724,10 @@ unsafe fn arg_to_llvm(egg_node: &VecLang, translation_metadata: &mut Egg2LLVMSta ); } -unsafe fn reg_to_llvm(egg_node: &VecLang, translation_metadata: &mut Egg2LLVMState) -> LLVMValueRef { +unsafe fn reg_to_llvm( + egg_node: &VecLang, + translation_metadata: &mut Egg2LLVMState, +) -> LLVMValueRef { // TODO: Make More Efficient with BTREEMAP? let llvm2reg = &translation_metadata.llvm2egg_metadata.llvm2reg; for (llvm_instr, reg_node) in llvm2reg.iter() { @@ -720,16 +735,25 @@ unsafe fn reg_to_llvm(egg_node: &VecLang, translation_metadata: &mut Egg2LLVMSta if reg_node == egg_node { assert!(!isa_argument(*llvm_instr)); // do not clone an instruction translated earlier in the same chunk - if translation_metadata.prior_translated_nodes.contains(&*llvm_instr) { + if translation_metadata + .prior_translated_nodes + .contains(&*llvm_instr) + { return *llvm_instr; } // do not clone an instruction translated in a prior basic block / prior chunk - if !translation_metadata.llvm2egg_metadata.instructions_in_chunk.contains(&*llvm_instr) { + if !translation_metadata + .llvm2egg_metadata + .instructions_in_chunk + .contains(&*llvm_instr) + { return *llvm_instr; } let new_instr = LLVMInstructionClone(*llvm_instr); LLVMInsertIntoBuilder(translation_metadata.builder, new_instr); - translation_metadata.prior_translated_nodes.insert(new_instr); + translation_metadata + .prior_translated_nodes + .insert(new_instr); return new_instr; } } @@ -877,7 +901,11 @@ unsafe fn binop_to_llvm( } } -unsafe fn concat_to_llvm(left_vector: &Id, right_vector: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { +unsafe fn concat_to_llvm( + left_vector: &Id, + right_vector: &Id, + md: &mut Egg2LLVMState, +) -> LLVMValueRef { { let trans_v1 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_vector)], md); let mut trans_v2 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_vector)], md); @@ -1044,7 +1072,10 @@ unsafe fn nooptvec_to_llvm(boxed_ids: &Box<[Id]>, md: &mut Egg2LLVMState) -> () /// Egg To LLVM Dispatches translation of VecLanf Egg Nodes to LLVMValueRegs /// /// Side Effect: Builds and Insert LLVM instructions -unsafe fn egg_to_llvm(egg_node: &VecLang, translation_metadata: &mut Egg2LLVMState) -> LLVMValueRef { +unsafe fn egg_to_llvm( + egg_node: &VecLang, + translation_metadata: &mut Egg2LLVMState, +) -> LLVMValueRef { match egg_node { VecLang::NoOptVec(..) => panic!("No Opt Vector was found. Egg to LLVM Translation does not handle No Opt Vector nodes at this location."), VecLang::Symbol(..) => { From 7141454003ce6cd19b8b17b8be6ec4ca4902ad24 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 27 May 2022 04:32:52 -0400 Subject: [PATCH 108/143] egg2llvm docs added --- src/dios-egraphs/Diospyros/documentation.md | 25 ++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/dios-egraphs/Diospyros/documentation.md b/src/dios-egraphs/Diospyros/documentation.md index 86963747..91bdda02 100644 --- a/src/dios-egraphs/Diospyros/documentation.md +++ b/src/dios-egraphs/Diospyros/documentation.md @@ -74,4 +74,27 @@ All metadata from this pass lies in a struct that is passed to all recursive cal The graph of Egg nodes is translated back to LLVM instructions, and the LLVM instructions are built and inserted in place. - +Beginning at the last egg node, translation from Egg to LLVM occurs. After this, extracted values are pulled out to replace each of the original start instructions. We replace all used with the extracted value, then delete the start instruction. + +**NOTE: We Assume Egg rewriter will maintain relative positions of elements in vector**. This means we assume the Egg rewriter does not change where instructions are supposed to be, because we need the extraction to have the instruction in the correct place. + +Egg2LLVM occurs recursively, considering the current Egg Node. + +- Number: LLVM Float Constant Created +- Arg: Argument is pulled from LLVM2Arg metadata +- Register: Register value is pulled from LLVM2Reg metadata +- LitVec: members of the litvec are translated recursively, then a vector is built +- VecAdd/VecSub... : Translate each argument recursively, then build the vec operation on both vectors +- VecConcat: Concatenate two vectors by translating arguments to vectors, then building a shuffle operation in LLVM +- VecNeg/VecSqrt... : Translate arguments appropriately, then build correct LLVM Intrinsic +- VecMac: Translate arguments appropriately, and then build correct LLVM intrinsic +- VecSgn/Sgn/Ite/Get/Or/And/Lt/Symbol/NoOptVec : No translateion provided at the current time + +Metadata for this pass includes: + +- llvm2egg metadata: metadata from llvm2egg pass +- egg_nodes_vector: the vector of egg nodes +- prior_translated_nodes: TreeSet of any egg nodes that had already been translated +- builder +- context +- module \ No newline at end of file From ef7fd6a1b6c7fac9c6a034fb8864e9d3c7523b45 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sat, 11 Mar 2023 09:48:14 -0500 Subject: [PATCH 109/143] pre vec load working --- .../Diospyros/LoadStoreMovement.cpp | 84 +- src/dios-egraphs/Diospyros/Makefile | 12 +- .../Diospyros/benchmarks/baseline/mat-mul.c | 77 + .../Diospyros/benchmarks/optimized/mat-mul.c | 77 + .../Diospyros/c-tests/2d-2d-conv.c | 7 +- .../Diospyros/c-tests/2d-matrix-multiply.c | 5 +- src/dios-egraphs/Diospyros/c-tests/add.c | 3 +- src/dios-egraphs/Diospyros/diospyros.cpp | 23 +- src/dios-egraphs/Diospyros/src/lib.rs | 1812 +++++++++-------- 9 files changed, 1190 insertions(+), 910 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/benchmarks/baseline/mat-mul.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/optimized/mat-mul.c diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index f6f49c89..ad5c99cd 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -5,11 +5,14 @@ #include #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Type.h" #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" @@ -27,6 +30,7 @@ struct LoadStoreMovementPass : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); } /** @@ -34,6 +38,7 @@ struct LoadStoreMovementPass : public FunctionPass { */ void rewrite_loads(Function &F) { AliasAnalysis *AA = &getAnalysis().getAAResults(); + for (auto &B : F) { // Grab all instructions std::vector all_instrs = {}; @@ -51,7 +56,7 @@ struct LoadStoreMovementPass : public FunctionPass { // Place any non-Load Instructions at the end of the list of // instructions - if (!isa(instr)) { + if (!(isa(instr) || isa(instr))) { final_instrs_vec.push_back(instr); continue; } @@ -125,14 +130,38 @@ struct LoadStoreMovementPass : public FunctionPass { if (prior_instr->mayReadOrWriteMemory()) { Value *prior_addr = NULL; if (isa(prior_instr)) { - prior_addr = prior_instr->getOperand(0); + prior_addr = dyn_cast(prior_instr) + ->getPointerOperand(); + // prior_addr = prior_instr->getOperand(0); } else if (isa(prior_instr)) { - prior_addr = prior_instr->getOperand(1); + prior_addr = dyn_cast(prior_instr) + ->getPointerOperand(); + // prior_addr = prior_instr->getOperand(1); } else { throw "Unmatched Instruction Type"; } - Value *load_addr = load_instr->getOperand(0); - if (!AA->isNoAlias(load_addr, prior_addr)) { + Value *load_addr = dyn_cast(load_instr); + if (isa(load_instr)) { + load_addr = dyn_cast(load_instr) + ->getPointerOperand(); + // load_addr = load_instr->getOperand(0); + } + assert(load_addr != NULL); + if (!AA->isNoAlias( + load_addr, + LocationSize::precise( + load_addr->getType() + ->getPrimitiveSizeInBits()), + prior_addr, + LocationSize::precise( + prior_addr->getType() + ->getPrimitiveSizeInBits())) || + AA->isMustAlias( + load_addr, + prior_addr)) { // IDK WTF is happening, but + // apparently, the same pointers + // that mod / ref causes no + // alias?! final_instrs_vec.insert( final_instrs_vec.begin() + insertion_offset, load_instr); @@ -141,6 +170,7 @@ struct LoadStoreMovementPass : public FunctionPass { } // Otherwise, keep pushing back the load instruction --insertion_offset; + assert(insertion_offset >= 0); } } @@ -249,7 +279,7 @@ struct LoadStoreMovementPass : public FunctionPass { // Place any non-Load Instructions at the end of the list of // instructions - if (!isa(instr)) { + if (!(isa(instr) || isa(instr))) { final_instrs_vec.push_back(instr); continue; } @@ -296,6 +326,30 @@ struct LoadStoreMovementPass : public FunctionPass { break; } + // If the prior instruction is used in the store's + // arguments, do not push it back + // int num_operands = store_instr->uses(); + bool break_while = false; + // https://stackoverflow.com/questions/35370195/llvm-difference-between-uses-and-user-in-instruction-or-value-classes + for (auto U : store_instr->users()) { + if (auto use_instr = dyn_cast(U)) { + for (Instruction *older_instr : final_instrs_vec) { + if (use_instr == older_instr) { + final_instrs_vec.insert( + final_instrs_vec.begin() + + insertion_offset, + store_instr); + break_while = true; + break; + } + } + } + } + + if (break_while) { + break; + } + // If the prior instruction alias with the store // instruction, do not push the store back if (prior_instr->mayReadOrWriteMemory()) { @@ -308,7 +362,16 @@ struct LoadStoreMovementPass : public FunctionPass { throw "Unmatched Instruction Type"; } Value *store_addr = store_instr->getOperand(1); - if (!AA->isNoAlias(store_addr, prior_addr)) { + if (!AA->isNoAlias( + store_addr, + LocationSize::precise( + store_addr->getType() + ->getPrimitiveSizeInBits()), + prior_addr, + LocationSize::precise( + prior_addr->getType() + ->getPrimitiveSizeInBits())) || + AA->isMustAlias(store_addr, prior_addr)) { final_instrs_vec.insert( final_instrs_vec.begin() + insertion_offset, store_instr); @@ -317,6 +380,7 @@ struct LoadStoreMovementPass : public FunctionPass { } // Otherwise, keep pushing back the str instruction --insertion_offset; + assert(insertion_offset >= 0); } } @@ -428,5 +492,11 @@ static void registerLoadStoreMovementPass(const PassManagerBuilder &, legacy::PassManagerBase &PM) { PM.add(new LoadStoreMovementPass()); } + +static RegisterPass X("lsmovement", + "Load Store Movement Pass", + false /* Only looks at CFG */, + true /* Analysis Pass */); + static RegisterStandardPasses RegisterMyPass( PassManagerBuilder::EP_EarlyAsPossible, registerLoadStoreMovementPass); \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index 72902bc3..920852ce 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -15,12 +15,20 @@ endif run-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp @$(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) @opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll - @opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll - @$(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=false build/aa.ll -o build/diospyros.ll + @opt -S -load=target/debug/libllvmlib.$(EXT) --basic-aa --lsmovement --adce build/opt.ll -o build/aa.ll + @$(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=true build/aa.ll -o build/diospyros.ll @opt -S --adce --dse build/diospyros.ll -o build/dce.ll @$(CLANG) build/dce.ll -o build/final @build/final +run-baseline: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp + @$(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) + @opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll + @$(CLANG) -emit-llvm -S -fslp-vectorize -ftree-slp-vectorize build/opt.ll -o build/slp.ll + @opt -S --adce --dse build/slp.ll -o build/dce.ll + @$(CLANG) -o0 build/dce.ll -o build/final + @build/final + print-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll diff --git a/src/dios-egraphs/Diospyros/benchmarks/baseline/mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/baseline/mat-mul.c new file mode 100644 index 00000000..c468aa9a --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/baseline/mat-mul.c @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include +#include + +#define A_ROWS 12 +#define A_COLS 12 +#define B_COLS 12 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void matrix_multiply(float a_in[A_ROWS * A_COLS], float b_in[A_COLS * B_COLS], + float c_out[A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < 1000; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + printf("%ld milliseconds elapsed over 1000 iterations total\n", + (end - start)); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/optimized/mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/optimized/mat-mul.c new file mode 100644 index 00000000..c468aa9a --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/optimized/mat-mul.c @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include +#include + +#define A_ROWS 12 +#define A_COLS 12 +#define B_COLS 12 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void matrix_multiply(float a_in[A_ROWS * A_COLS], float b_in[A_COLS * B_COLS], + float c_out[A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < 1000; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + printf("%ld milliseconds elapsed over 1000 iterations total\n", + (end - start)); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/2d-2d-conv.c b/src/dios-egraphs/Diospyros/c-tests/2d-2d-conv.c index 4653f9c1..372be34f 100644 --- a/src/dios-egraphs/Diospyros/c-tests/2d-2d-conv.c +++ b/src/dios-egraphs/Diospyros/c-tests/2d-2d-conv.c @@ -1,5 +1,5 @@ -#include #include +#include #define I_ROWS 2 #define I_COLS 2 @@ -8,8 +8,9 @@ #define O_ROWS ((I_ROWS + F_ROWS) - 1) #define O_COLS ((I_COLS + F_COLS) - 1) -void convolution(float mat_in[I_ROWS][I_COLS], float f_in[F_ROWS][F_COLS], - float mat_out[O_ROWS][O_COLS]) { +void convolution(float mat_in[restrict I_ROWS][I_COLS], + float f_in[restrict F_ROWS][F_COLS], + float mat_out[restrict O_ROWS][O_COLS]) { for (int outRow = 0; outRow < O_ROWS; outRow++) { for (int outCol = 0; outCol < O_COLS; outCol++) { for (int fRow = 0; fRow < F_ROWS; fRow++) { diff --git a/src/dios-egraphs/Diospyros/c-tests/2d-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/2d-matrix-multiply.c index 371b7967..c26410bf 100644 --- a/src/dios-egraphs/Diospyros/c-tests/2d-matrix-multiply.c +++ b/src/dios-egraphs/Diospyros/c-tests/2d-matrix-multiply.c @@ -4,8 +4,9 @@ #define A_COLS 2 #define B_COLS 2 -void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], - float c_out[A_ROWS][B_COLS]) { +void matrix_multiply(float a_in[restrict A_ROWS][A_COLS], + float b_in[restrict A_COLS][B_COLS], + float c_out[restrict A_ROWS][B_COLS]) { for (int i = 0; i < A_ROWS; i++) { for (int j = 0; j < B_COLS; j++) { float sum = 0.0; diff --git a/src/dios-egraphs/Diospyros/c-tests/add.c b/src/dios-egraphs/Diospyros/c-tests/add.c index ebbcdf35..89d8646d 100644 --- a/src/dios-egraphs/Diospyros/c-tests/add.c +++ b/src/dios-egraphs/Diospyros/c-tests/add.c @@ -2,7 +2,8 @@ #include #define SIZE 4 -void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { +void sum(float a_in[restrict SIZE], float b_in[restrict SIZE], + float c_out[restrict SIZE]) { c_out[0] = a_in[0] + b_in[0]; c_out[1] = a_in[1] + b_in[1]; c_out[2] = a_in[2] + b_in[2]; diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index c208107a..c5cf6db0 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -36,8 +36,8 @@ llvm::cl::opt RunOpt("r", llvm::cl::desc("Enable Egg Optimization.")); llvm::cl::alias RunOptAlias("opt", llvm::cl::desc("Alias for -r"), llvm::cl::aliasopt(RunOpt)); -llvm::cl::opt PrintOpt("p", llvm::cl::desc("Print Egg Optimization.")); -llvm::cl::alias PrintOptAlias("print", llvm::cl::desc("Alias for -p"), +llvm::cl::opt PrintOpt("z", llvm::cl::desc("Print Egg Optimization.")); +llvm::cl::alias PrintOptAlias("print", llvm::cl::desc("Alias for -z"), llvm::cl::aliasopt(PrintOpt)); extern "C" void optimize(LLVMModuleRef mod, LLVMContextRef context, @@ -499,9 +499,15 @@ bool can_vectorize(Value *value) { return true; } else if (instr->getOpcode() == Instruction::FNeg) { return true; - } else if (isa_sqrt32(wrap(instr))) { - return true; } + // else if (isa(instr)) { + // return true; + // } else if (isa(instr)) { + // return true; + // } + // else if (isa_sqrt32(wrap(instr))) { + // return true; + // } return false; } @@ -548,7 +554,7 @@ struct DiospyrosPass : public FunctionPass { if (can_vectorize(val) && !vectorizable_flag) { if (!chunk_vector.empty()) { chunk_accumulator.push_back(chunk_vector); - } + } vectorizable_flag = true; chunk_vector = {wrap(val)}; } else if (can_vectorize(val) && vectorizable_flag) { @@ -558,7 +564,7 @@ struct DiospyrosPass : public FunctionPass { } else if (!can_vectorize(val) && vectorizable_flag) { if (!chunk_vector.empty()) { chunk_accumulator.push_back(chunk_vector); - } + } vectorizable_flag = false; chunk_vector = {wrap(val)}; } else { @@ -667,5 +673,10 @@ static void registerDiospyrosPass(const PassManagerBuilder &, legacy::PassManagerBase &PM) { PM.add(new DiospyrosPass()); } + +static RegisterPass X("diospyros", "Diospyros Pass", + false /* Only looks at CFG */, + true /* Analysis Pass */); + static RegisterStandardPasses RegisterMyPass( PassManagerBuilder::EP_EarlyAsPossible, registerDiospyrosPass); diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 2f022d0c..4d337282 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -4,131 +4,134 @@ use egg::*; use libc::size_t; use llvm::{core::*, prelude::*, LLVMOpcode::*, LLVMRealPredicate}; use std::{ - cmp, - collections::{BTreeMap, BTreeSet}, - os::raw::c_char, - slice::from_raw_parts, + cmp, + collections::{BTreeMap, BTreeSet}, + os::raw::c_char, + slice::from_raw_parts, }; extern "C" { - fn _llvm_index(val: LLVMValueRef, index: i32) -> i32; - fn _llvm_name(val: LLVMValueRef) -> *const c_char; - fn _isa_unop(val: LLVMValueRef) -> bool; - fn _isa_bop(val: LLVMValueRef) -> bool; - fn isa_constant(val: LLVMValueRef) -> bool; - fn isa_constfp(val: LLVMValueRef) -> bool; - fn _isa_gep(val: LLVMValueRef) -> bool; - fn _isa_load(val: LLVMValueRef) -> bool; - fn _isa_store(val: LLVMValueRef) -> bool; - fn isa_argument(val: LLVMValueRef) -> bool; - fn _isa_call(val: LLVMValueRef) -> bool; - fn _isa_fptrunc(val: LLVMValueRef) -> bool; - fn _isa_fpext(val: LLVMValueRef) -> bool; - fn _isa_alloca(val: LLVMValueRef) -> bool; - fn _isa_phi(val: LLVMValueRef) -> bool; - fn _isa_sextint(val: LLVMValueRef) -> bool; - fn _isa_sitofp(val: LLVMValueRef) -> bool; - fn isa_constaggregatezero(val: LLVMValueRef) -> bool; - fn _isa_constaggregate(val: LLVMValueRef) -> bool; - fn isa_integertype(val: LLVMValueRef) -> bool; - fn _isa_intptr(val: LLVMValueRef) -> bool; - fn _isa_floatptr(val: LLVMValueRef) -> bool; - fn _isa_floattype(val: LLVMValueRef) -> bool; - fn _isa_bitcast(val: LLVMValueRef) -> bool; - fn isa_sqrt32(val: LLVMValueRef) -> bool; - fn _isa_sqrt64(val: LLVMValueRef) -> bool; - fn get_constant_float(val: LLVMValueRef) -> f32; - fn build_constant_float(n: f64, context: LLVMContextRef) -> LLVMValueRef; + fn _llvm_index(val: LLVMValueRef, index: i32) -> i32; + fn _llvm_name(val: LLVMValueRef) -> *const c_char; + fn _isa_unop(val: LLVMValueRef) -> bool; + fn _isa_bop(val: LLVMValueRef) -> bool; + fn isa_constant(val: LLVMValueRef) -> bool; + fn isa_constfp(val: LLVMValueRef) -> bool; + fn _isa_gep(val: LLVMValueRef) -> bool; + fn _isa_load(val: LLVMValueRef) -> bool; + fn _isa_store(val: LLVMValueRef) -> bool; + fn isa_argument(val: LLVMValueRef) -> bool; + fn _isa_call(val: LLVMValueRef) -> bool; + fn _isa_fptrunc(val: LLVMValueRef) -> bool; + fn _isa_fpext(val: LLVMValueRef) -> bool; + fn _isa_alloca(val: LLVMValueRef) -> bool; + fn _isa_phi(val: LLVMValueRef) -> bool; + fn _isa_sextint(val: LLVMValueRef) -> bool; + fn _isa_sitofp(val: LLVMValueRef) -> bool; + fn isa_constaggregatezero(val: LLVMValueRef) -> bool; + fn _isa_constaggregate(val: LLVMValueRef) -> bool; + fn isa_integertype(val: LLVMValueRef) -> bool; + fn _isa_intptr(val: LLVMValueRef) -> bool; + fn _isa_floatptr(val: LLVMValueRef) -> bool; + fn _isa_floattype(val: LLVMValueRef) -> bool; + fn _isa_bitcast(val: LLVMValueRef) -> bool; + fn isa_sqrt32(val: LLVMValueRef) -> bool; + fn _isa_sqrt64(val: LLVMValueRef) -> bool; + fn get_constant_float(val: LLVMValueRef) -> f32; + fn build_constant_float(n: f64, context: LLVMContextRef) -> LLVMValueRef; } static mut ARG_IDX: u32 = 0; static mut REG_IDX: u32 = 0; unsafe fn gen_arg_idx() -> u32 { - ARG_IDX += 1; - return ARG_IDX; + ARG_IDX += 1; + return ARG_IDX; } unsafe fn gen_reg_idx() -> u32 { - REG_IDX += 1; - return REG_IDX; + REG_IDX += 1; + return REG_IDX; } // Reference Comparison: https://www.reddit.com/r/rust/comments/2r3wjk/is_there_way_to_compare_objects_by_address_in_rust/ // Compares whether addresses of LLVMValueRefs are the same. // Not the contents of the Value Refs fn cmp_val_ref_address(a1: &llvm::LLVMValue, a2: &llvm::LLVMValue) -> bool { - a1 as *const _ == a2 as *const _ + a1 as *const _ == a2 as *const _ } fn _cmp_typ(a1: &LLVMTypeRef, a2: &LLVMTypeRef) -> bool { - a1 as *const _ == a2 as *const _ + a1 as *const _ == a2 as *const _ } /// Converts LLVMValueRef binop to equivalent VecLang Binop node unsafe fn choose_binop(bop: &LLVMValueRef, ids: [Id; 2]) -> VecLang { - match LLVMGetInstructionOpcode(*bop) { - LLVMFAdd => VecLang::Add(ids), - LLVMFMul => VecLang::Mul(ids), - LLVMFSub => VecLang::Minus(ids), - LLVMFDiv => VecLang::Div(ids), - _ => panic!("Choose_Binop: Opcode Match Error"), - } + match LLVMGetInstructionOpcode(*bop) { + LLVMFAdd => VecLang::Add(ids), + LLVMFMul => VecLang::Mul(ids), + LLVMFSub => VecLang::Minus(ids), + LLVMFDiv => VecLang::Div(ids), + _ => panic!("Choose_Binop: Opcode Match Error"), + } } /// Translates VecLang binop expression node to the corresponding LLVMValueRef unsafe fn translate_binop( - enode: &VecLang, - left: LLVMValueRef, - right: LLVMValueRef, - builder: LLVMBuilderRef, - name: *const c_char, + enode: &VecLang, + left: LLVMValueRef, + right: LLVMValueRef, + builder: LLVMBuilderRef, + name: *const c_char, ) -> LLVMValueRef { - match enode { - VecLang::VecAdd(_) | VecLang::Add(_) => LLVMBuildFAdd(builder, left, right, name), - VecLang::VecMul(_) | VecLang::Mul(_) => LLVMBuildFMul(builder, left, right, name), - VecLang::VecMinus(_) | VecLang::Minus(_) => LLVMBuildFSub(builder, left, right, name), - VecLang::VecDiv(_) | VecLang::Div(_) => LLVMBuildFDiv(builder, left, right, name), - // use binary bitwise operators for or / and - VecLang::Or(_) => LLVMBuildOr(builder, left, right, name), - VecLang::And(_) => LLVMBuildAnd(builder, left, right, name), - VecLang::Lt(_) => LLVMBuildFCmp(builder, LLVMRealPredicate::LLVMRealOLT, left, right, name), - _ => panic!("Not a vector or scalar binop."), - } + match enode { + VecLang::VecAdd(_) | VecLang::Add(_) => LLVMBuildFAdd(builder, left, right, name), + VecLang::VecMul(_) | VecLang::Mul(_) => LLVMBuildFMul(builder, left, right, name), + VecLang::VecMinus(_) | VecLang::Minus(_) => LLVMBuildFSub(builder, left, right, name), + VecLang::VecDiv(_) | VecLang::Div(_) => LLVMBuildFDiv(builder, left, right, name), + // use binary bitwise operators for or / and + VecLang::Or(_) => LLVMBuildOr(builder, left, right, name), + VecLang::And(_) => LLVMBuildAnd(builder, left, right, name), + VecLang::Lt(_) => LLVMBuildFCmp(builder, LLVMRealPredicate::LLVMRealOLT, left, right, name), + _ => panic!("Not a vector or scalar binop."), + } } /// Translates VecLang unop expression node to the corresponding LLVMValueRef unsafe fn translate_unop( - enode: &VecLang, - n: LLVMValueRef, - builder: LLVMBuilderRef, - context: LLVMContextRef, - module: LLVMModuleRef, - name: *const c_char, + enode: &VecLang, + n: LLVMValueRef, + builder: LLVMBuilderRef, + context: LLVMContextRef, + module: LLVMModuleRef, + name: *const c_char, ) -> LLVMValueRef { - match enode { - VecLang::Sgn(_) => { - let one = LLVMConstReal(LLVMFloatTypeInContext(context), 1 as f64); - let param_types = [ - LLVMFloatTypeInContext(context), - LLVMFloatTypeInContext(context), - ] - .as_mut_ptr(); - let fn_type = LLVMFunctionType(LLVMFloatTypeInContext(context), param_types, 2, 0 as i32); - let func = LLVMAddFunction(module, b"llvm.copysign.f32\0".as_ptr() as *const _, fn_type); - let args = [one, n].as_mut_ptr(); - LLVMBuildCall(builder, func, args, 2, name) - } - VecLang::Sqrt(_) => { - let param_types = [LLVMFloatTypeInContext(context)].as_mut_ptr(); - let fn_type = LLVMFunctionType(LLVMFloatTypeInContext(context), param_types, 1, 0 as i32); - let func = LLVMAddFunction(module, b"llvm.sqrt.f32\0".as_ptr() as *const _, fn_type); - let args = [n].as_mut_ptr(); - LLVMBuildCall(builder, func, args, 1, name) + match enode { + VecLang::Sgn(_) => { + let one = LLVMConstReal(LLVMFloatTypeInContext(context), 1 as f64); + let param_types = [ + LLVMFloatTypeInContext(context), + LLVMFloatTypeInContext(context), + ] + .as_mut_ptr(); + let fn_type = + LLVMFunctionType(LLVMFloatTypeInContext(context), param_types, 2, 0 as i32); + let func = + LLVMAddFunction(module, b"llvm.copysign.f32\0".as_ptr() as *const _, fn_type); + let args = [one, n].as_mut_ptr(); + LLVMBuildCall(builder, func, args, 2, name) + } + VecLang::Sqrt(_) => { + let param_types = [LLVMFloatTypeInContext(context)].as_mut_ptr(); + let fn_type = + LLVMFunctionType(LLVMFloatTypeInContext(context), param_types, 1, 0 as i32); + let func = LLVMAddFunction(module, b"llvm.sqrt.f32\0".as_ptr() as *const _, fn_type); + let args = [n].as_mut_ptr(); + LLVMBuildCall(builder, func, args, 1, name) + } + VecLang::Neg(_) => LLVMBuildFNeg(builder, n, name), + _ => panic!("Not a scalar unop."), } - VecLang::Neg(_) => LLVMBuildFNeg(builder, n, name), - _ => panic!("Not a scalar unop."), - } } /// Main function to optimize: Takes in a basic block of instructions, @@ -136,86 +139,86 @@ unsafe fn translate_unop( #[no_mangle] pub fn optimize( - module: LLVMModuleRef, - context: LLVMContextRef, - builder: LLVMBuilderRef, - chunk_instrs: *const LLVMValueRef, - chunk_size: size_t, - restricted_instrs: *const LLVMValueRef, - restricted_size: size_t, - run_egg: bool, - print_opt: bool, + module: LLVMModuleRef, + context: LLVMContextRef, + builder: LLVMBuilderRef, + chunk_instrs: *const LLVMValueRef, + chunk_size: size_t, + restricted_instrs: *const LLVMValueRef, + restricted_size: size_t, + run_egg: bool, + print_opt: bool, ) -> () { - unsafe { - // preprocessing of instructions - let chunk_llvm_instrs = from_raw_parts(chunk_instrs, chunk_size); - let restricted_llvm_instrs = from_raw_parts(restricted_instrs, restricted_size); - - // llvm to egg - let (egg_expr, llvm2egg_metadata) = - llvm_to_egg_main(chunk_llvm_instrs, restricted_llvm_instrs, run_egg); - - // Bail if no egg Nodes to optimize - if egg_expr.as_ref().is_empty() { - eprintln!("No Egg Nodes in Optimization Vector"); - return; - } - - // optimization pass - if print_opt { - eprintln!("{}", egg_expr.pretty(10)); - } - let mut best_egg_expr = egg_expr.clone(); - if run_egg { - let pair = rules::run(&egg_expr, 180, true, !run_egg); - best_egg_expr = pair.1; + unsafe { + // preprocessing of instructions + let chunk_llvm_instrs = from_raw_parts(chunk_instrs, chunk_size); + let restricted_llvm_instrs = from_raw_parts(restricted_instrs, restricted_size); + + // llvm to egg + let (egg_expr, llvm2egg_metadata) = + llvm_to_egg_main(chunk_llvm_instrs, restricted_llvm_instrs, run_egg); + + // Bail if no egg Nodes to optimize + if egg_expr.as_ref().is_empty() { + eprintln!("No Egg Nodes in Optimization Vector"); + return; + } + + // optimization pass + if print_opt { + eprintln!("{}", egg_expr.pretty(10)); + } + let mut best_egg_expr = egg_expr.clone(); + if run_egg { + let pair = rules::run(&egg_expr, 180, true, !run_egg); + best_egg_expr = pair.1; + } + if print_opt { + eprintln!("{}", best_egg_expr.pretty(10)); + } + + // egg to llvm + egg_to_llvm_main( + best_egg_expr, + &llvm2egg_metadata, + module, + context, + builder, + run_egg, + ); } - if print_opt { - eprintln!("{}", best_egg_expr.pretty(10)); - } - - // egg to llvm - egg_to_llvm_main( - best_egg_expr, - &llvm2egg_metadata, - module, - context, - builder, - run_egg, - ); - } } // ------------ NEW CONVERSION FROM LLVM IR TO EGG EXPRESSIONS ------- enum LLVMOpType { - Argument, - Constant, - FNeg, - FAdd, - FSub, - FMul, - FDiv, - Sqrt32, - // TODO: SGN signum - UnhandledLLVMOpCode, + Argument, + Constant, + FNeg, + FAdd, + FSub, + FMul, + FDiv, + Sqrt32, + // TODO: SGN signum + UnhandledLLVMOpCode, } unsafe fn get_pow2(n: u32) -> u32 { - let mut pow = 1; - while pow < n { - pow *= 2; - } - return pow; + let mut pow = 1; + while pow < n { + pow *= 2; + } + return pow; } fn is_pow2(n: u32) -> bool { - if n == 1 { - return true; - } else if n % 2 == 1 { - return false; - } - return is_pow2(n / 2); + if n == 1 { + return true; + } else if n % 2 == 1 { + return false; + } + return is_pow2(n / 2); } /// New Pad Vector should round the number of elements up to a power of 2, and then recursive @@ -223,261 +226,261 @@ fn is_pow2(n: u32) -> bool { /// Raises assertion error if width is not a power of 2 /// If the vector has less than the width, we do not pad, and just append that vector to enodevect unsafe fn balanced_pad_vector<'a>( - binop_vec: &mut Vec, - enode_vec: &'a mut Vec, + binop_vec: &mut Vec, + enode_vec: &'a mut Vec, ) -> &'a mut Vec { - let width = config::vector_width(); - assert!(is_pow2(width as u32)); - let length = binop_vec.len(); - assert!( - length > 0, - "There must be 1 or more operators to vectorize." - ); - // Check vector less than width, and then return - if length < width { - enode_vec.push(VecLang::Vec(binop_vec.clone().into_boxed_slice())); - return enode_vec; - } - let closest_pow2 = get_pow2(cmp::max(length, width) as u32); - let diff = closest_pow2 - (length as u32); - for _ in 0..diff { - let zero = VecLang::Num(0); - enode_vec.push(zero); - let zero_idx = enode_vec.len() - 1; - binop_vec.push(Id::from(zero_idx)); - } - return build_concat(width, binop_vec, enode_vec); + let width = config::vector_width(); + assert!(is_pow2(width as u32)); + let length = binop_vec.len(); + assert!( + length > 0, + "There must be 1 or more operators to vectorize." + ); + // Check vector less than width, and then return + if length < width { + enode_vec.push(VecLang::Vec(binop_vec.clone().into_boxed_slice())); + return enode_vec; + } + let closest_pow2 = get_pow2(cmp::max(length, width) as u32); + let diff = closest_pow2 - (length as u32); + for _ in 0..diff { + let zero = VecLang::Num(0); + enode_vec.push(zero); + let zero_idx = enode_vec.len() - 1; + binop_vec.push(Id::from(zero_idx)); + } + return build_concat(width, binop_vec, enode_vec); } /// Recursively concatenate vectors together unsafe fn build_concat<'a>( - lane_width: usize, - binop_vec: &mut Vec, - enode_vec: &'a mut Vec, + lane_width: usize, + binop_vec: &mut Vec, + enode_vec: &'a mut Vec, ) -> &'a mut Vec { - if binop_vec.len() == lane_width { - enode_vec.push(VecLang::Vec(binop_vec.clone().into_boxed_slice())); - return enode_vec; - } - let num_binops = binop_vec.len(); - let halfway = num_binops / 2; - let (mut left, mut right) = (Vec::new(), Vec::new()); - for (i, b) in binop_vec.iter().enumerate() { - if i < halfway { - left.push(*b); - } else { - right.push(*b); + if binop_vec.len() == lane_width { + enode_vec.push(VecLang::Vec(binop_vec.clone().into_boxed_slice())); + return enode_vec; } - } - assert_eq!(left.len(), right.len()); - assert_eq!(left.len() + right.len(), num_binops); - assert_eq!(left.len() % lane_width, 0); - assert_eq!(right.len() % lane_width, 0); - let enode_vec1 = build_concat(lane_width, &mut left, enode_vec); - let idx1 = enode_vec1.len() - 1; - let enode_vec2 = build_concat(lane_width, &mut right, enode_vec1); - let idx2 = enode_vec2.len() - 1; - enode_vec2.push(VecLang::Concat([Id::from(idx1), Id::from(idx2)])); - return enode_vec2; + let num_binops = binop_vec.len(); + let halfway = num_binops / 2; + let (mut left, mut right) = (Vec::new(), Vec::new()); + for (i, b) in binop_vec.iter().enumerate() { + if i < halfway { + left.push(*b); + } else { + right.push(*b); + } + } + assert_eq!(left.len(), right.len()); + assert_eq!(left.len() + right.len(), num_binops); + assert_eq!(left.len() % lane_width, 0); + assert_eq!(right.len() % lane_width, 0); + let enode_vec1 = build_concat(lane_width, &mut left, enode_vec); + let idx1 = enode_vec1.len() - 1; + let enode_vec2 = build_concat(lane_width, &mut right, enode_vec1); + let idx2 = enode_vec2.len() - 1; + enode_vec2.push(VecLang::Concat([Id::from(idx1), Id::from(idx2)])); + return enode_vec2; } unsafe fn _llvm_print(inst: LLVMValueRef) -> () { - LLVMDumpValue(inst); - println!(); + LLVMDumpValue(inst); + println!(); } unsafe fn _llvm_recursive_print(inst: LLVMValueRef) -> () { - if isa_argument(inst) { - return LLVMDumpValue(inst); - } else if isa_constant(inst) { - return LLVMDumpValue(inst); - } - let num_ops = LLVMGetNumOperands(inst); - for i in 0..num_ops { - let operand = LLVMGetOperand(inst, i as u32); - _llvm_recursive_print(operand); - print!(" "); - } - println!(); - LLVMDumpValue(inst); - println!(); - return; + if isa_argument(inst) { + return LLVMDumpValue(inst); + } else if isa_constant(inst) { + return LLVMDumpValue(inst); + } + let num_ops = LLVMGetNumOperands(inst); + for i in 0..num_ops { + let operand = LLVMGetOperand(inst, i as u32); + _llvm_recursive_print(operand); + print!(" "); + } + println!(); + LLVMDumpValue(inst); + println!(); + return; } unsafe fn isa_fadd(llvm_instr: LLVMValueRef) -> bool { - match LLVMGetInstructionOpcode(llvm_instr) { - LLVMFAdd => true, - _ => false, - } + match LLVMGetInstructionOpcode(llvm_instr) { + LLVMFAdd => true, + _ => false, + } } unsafe fn isa_fsub(llvm_instr: LLVMValueRef) -> bool { - match LLVMGetInstructionOpcode(llvm_instr) { - LLVMFSub => true, - _ => false, - } + match LLVMGetInstructionOpcode(llvm_instr) { + LLVMFSub => true, + _ => false, + } } unsafe fn isa_fmul(llvm_instr: LLVMValueRef) -> bool { - match LLVMGetInstructionOpcode(llvm_instr) { - LLVMFMul => true, - _ => false, - } + match LLVMGetInstructionOpcode(llvm_instr) { + LLVMFMul => true, + _ => false, + } } unsafe fn isa_fdiv(llvm_instr: LLVMValueRef) -> bool { - match LLVMGetInstructionOpcode(llvm_instr) { - LLVMFDiv => true, - _ => false, - } + match LLVMGetInstructionOpcode(llvm_instr) { + LLVMFDiv => true, + _ => false, + } } unsafe fn isa_fneg(llvm_instr: LLVMValueRef) -> bool { - match LLVMGetInstructionOpcode(llvm_instr) { - LLVMFNeg => true, - _ => false, - } + match LLVMGetInstructionOpcode(llvm_instr) { + LLVMFNeg => true, + _ => false, + } } unsafe fn isa_supported_binop(llvm_instr: LLVMValueRef) -> bool { - return isa_fadd(llvm_instr) - || isa_fmul(llvm_instr) - || isa_fdiv(llvm_instr) - || isa_fsub(llvm_instr); + return isa_fadd(llvm_instr) + || isa_fmul(llvm_instr) + || isa_fdiv(llvm_instr) + || isa_fsub(llvm_instr); } unsafe fn isa_supported_unop(llvm_instr: LLVMValueRef) -> bool { - return isa_fneg(llvm_instr); + return isa_fneg(llvm_instr); } unsafe fn match_llvm_op(llvm_instr: &LLVMValueRef) -> LLVMOpType { - if isa_argument(*llvm_instr) { - return LLVMOpType::Argument; - } else if isa_fadd(*llvm_instr) { - return LLVMOpType::FAdd; - } else if isa_fsub(*llvm_instr) { - return LLVMOpType::FSub; - } else if isa_fmul(*llvm_instr) { - return LLVMOpType::FMul; - } else if isa_fdiv(*llvm_instr) { - return LLVMOpType::FDiv; - } else if isa_fneg(*llvm_instr) { - return LLVMOpType::FNeg; - } else if isa_constant(*llvm_instr) { - return LLVMOpType::Constant; - } else if isa_sqrt32(*llvm_instr) { - return LLVMOpType::Sqrt32; - } else { - return LLVMOpType::UnhandledLLVMOpCode; - } + if isa_argument(*llvm_instr) { + return LLVMOpType::Argument; + } else if isa_fadd(*llvm_instr) { + return LLVMOpType::FAdd; + } else if isa_fsub(*llvm_instr) { + return LLVMOpType::FSub; + } else if isa_fmul(*llvm_instr) { + return LLVMOpType::FMul; + } else if isa_fdiv(*llvm_instr) { + return LLVMOpType::FDiv; + } else if isa_fneg(*llvm_instr) { + return LLVMOpType::FNeg; + } else if isa_constant(*llvm_instr) { + return LLVMOpType::Constant; + } else if isa_sqrt32(*llvm_instr) { + return LLVMOpType::Sqrt32; + } else { + return LLVMOpType::UnhandledLLVMOpCode; + } } unsafe fn choose_unop(unop: &LLVMValueRef, id: Id) -> VecLang { - match LLVMGetInstructionOpcode(*unop) { - LLVMFNeg => VecLang::Neg([id]), - _ => panic!("Choose_Unop: Opcode Match Error"), - } + match LLVMGetInstructionOpcode(*unop) { + LLVMFNeg => VecLang::Neg([id]), + _ => panic!("Choose_Unop: Opcode Match Error"), + } } /// LLVM2EggState Contains Egg to LLVM Translation Metadata #[derive(Debug, Clone)] struct LLVM2EggState { - llvm2reg: BTreeMap, - llvm2arg: BTreeMap, - instructions_in_chunk: BTreeSet, - restricted_instructions: BTreeSet, - prior_translated_instructions: BTreeSet, - start_instructions: Vec, - start_ids: Vec, + llvm2reg: BTreeMap, + llvm2arg: BTreeMap, + instructions_in_chunk: BTreeSet, + restricted_instructions: BTreeSet, + prior_translated_instructions: BTreeSet, + start_instructions: Vec, + start_ids: Vec, } /// Translates LLVM Arg to an Egg Argument Node unsafe fn arg_to_egg( - llvm_instr: LLVMValueRef, - mut egg_nodes: Vec, - next_node_idx: u32, - translation_metadata: &mut LLVM2EggState, + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { - assert!(isa_argument(llvm_instr)); - let argument_idx = gen_arg_idx(); - let argument_node = VecLang::Arg(argument_idx); - egg_nodes.push(argument_node.clone()); - assert!(!translation_metadata.llvm2arg.contains_key(&llvm_instr)); - translation_metadata - .llvm2arg - .insert(llvm_instr, argument_node); - return (egg_nodes, next_node_idx + 1); + assert!(isa_argument(llvm_instr)); + let argument_idx = gen_arg_idx(); + let argument_node = VecLang::Arg(argument_idx); + egg_nodes.push(argument_node.clone()); + assert!(!translation_metadata.llvm2arg.contains_key(&llvm_instr)); + translation_metadata + .llvm2arg + .insert(llvm_instr, argument_node); + return (egg_nodes, next_node_idx + 1); } /// Translates Supported Binop Instruction to an Egg Bunary Operator Node /// /// Supported Binary Operators are: FAdd, FSub, FMul, FDiv unsafe fn bop_to_egg( - llvm_instr: LLVMValueRef, - egg_nodes: Vec, - next_node_idx: u32, - translation_metadata: &mut LLVM2EggState, + llvm_instr: LLVMValueRef, + egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { - assert!(isa_supported_binop(llvm_instr)); - let left = LLVMGetOperand(llvm_instr, 0); - let right = LLVMGetOperand(llvm_instr, 1); - let (left_egg_nodes, left_next_idx) = - llvm_to_egg(left, egg_nodes, next_node_idx, translation_metadata); - let (mut right_egg_nodes, right_next_idx) = - llvm_to_egg(right, left_egg_nodes, left_next_idx, translation_metadata); - let ids = [ - Id::from((left_next_idx - 1) as usize), - Id::from((right_next_idx - 1) as usize), - ]; - right_egg_nodes.push(choose_binop(&llvm_instr, ids)); - (right_egg_nodes, right_next_idx + 1) + assert!(isa_supported_binop(llvm_instr)); + let left = LLVMGetOperand(llvm_instr, 0); + let right = LLVMGetOperand(llvm_instr, 1); + let (left_egg_nodes, left_next_idx) = + llvm_to_egg(left, egg_nodes, next_node_idx, translation_metadata); + let (mut right_egg_nodes, right_next_idx) = + llvm_to_egg(right, left_egg_nodes, left_next_idx, translation_metadata); + let ids = [ + Id::from((left_next_idx - 1) as usize), + Id::from((right_next_idx - 1) as usize), + ]; + right_egg_nodes.push(choose_binop(&llvm_instr, ids)); + (right_egg_nodes, right_next_idx + 1) } /// Translates Supported Unop Instruction to an Egg Unary Operator Node /// /// Supported Unary Operators are: FNeg unsafe fn unop_to_egg( - llvm_instr: LLVMValueRef, - egg_nodes: Vec, - next_node_idx: u32, - translation_metadata: &mut LLVM2EggState, + llvm_instr: LLVMValueRef, + egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { - assert!(isa_supported_unop(llvm_instr)); - let neg_expr = LLVMGetOperand(llvm_instr, 0); - let (mut new_egg_nodes, new_next_idx) = - llvm_to_egg(neg_expr, egg_nodes, next_node_idx, translation_metadata); - let id = Id::from((new_next_idx - 1) as usize); - new_egg_nodes.push(choose_unop(&llvm_instr, id)); - (new_egg_nodes, new_next_idx + 1) + assert!(isa_supported_unop(llvm_instr)); + let neg_expr = LLVMGetOperand(llvm_instr, 0); + let (mut new_egg_nodes, new_next_idx) = + llvm_to_egg(neg_expr, egg_nodes, next_node_idx, translation_metadata); + let id = Id::from((new_next_idx - 1) as usize); + new_egg_nodes.push(choose_unop(&llvm_instr, id)); + (new_egg_nodes, new_next_idx + 1) } /// Translates Const Instruction to an Egg Number Node unsafe fn const_to_egg( - llvm_instr: LLVMValueRef, - mut egg_nodes: Vec, - next_node_idx: u32, - _translation_metadata: &mut LLVM2EggState, + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: u32, + _translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { - assert!(isa_constant(llvm_instr)); - let value = get_constant_float(llvm_instr); - egg_nodes.push(VecLang::Num(value as i32)); - (egg_nodes, next_node_idx + 1) + assert!(isa_constant(llvm_instr)); + let value = get_constant_float(llvm_instr); + egg_nodes.push(VecLang::Num(value as i32)); + (egg_nodes, next_node_idx + 1) } /// Translates Sqrt 32 Instruction to an Egg Square Root Node unsafe fn sqrt32_to_egg( - llvm_instr: LLVMValueRef, - egg_nodes: Vec, - next_node_idx: u32, - translation_metadata: &mut LLVM2EggState, + llvm_instr: LLVMValueRef, + egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { - assert!(isa_sqrt32(llvm_instr)); - let sqrt_operand = LLVMGetOperand(llvm_instr, 0); - let (mut new_enode_vec, new_next_node_idx) = - llvm_to_egg(sqrt_operand, egg_nodes, next_node_idx, translation_metadata); - let sqrt_node = VecLang::Sqrt([Id::from((new_next_node_idx - 1) as usize)]); - new_enode_vec.push(sqrt_node); - (new_enode_vec, new_next_node_idx + 1) + assert!(isa_sqrt32(llvm_instr)); + let sqrt_operand = LLVMGetOperand(llvm_instr, 0); + let (mut new_enode_vec, new_next_node_idx) = + llvm_to_egg(sqrt_operand, egg_nodes, next_node_idx, translation_metadata); + let sqrt_node = VecLang::Sqrt([Id::from((new_next_node_idx - 1) as usize)]); + new_enode_vec.push(sqrt_node); + (new_enode_vec, new_next_node_idx + 1) } /// Translates an Unhandled OpCode to an Egg Register. @@ -485,19 +488,19 @@ unsafe fn sqrt32_to_egg( /// This represents a blackbox computation that we bail on translating /// Assumes that the OpCode is actually a computation. If not, translation fails. unsafe fn unhandled_opcode_to_egg( - llvm_instr: LLVMValueRef, - mut egg_nodes: Vec, - next_node_idx: u32, - translation_metadata: &mut LLVM2EggState, + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { - let register_idx = gen_reg_idx(); - let register_node = VecLang::Reg(register_idx); - egg_nodes.push(register_node.clone()); - assert!(!translation_metadata.llvm2reg.contains_key(&llvm_instr)); - translation_metadata - .llvm2reg - .insert(llvm_instr, register_node); - (egg_nodes, next_node_idx + 1) + let register_idx = gen_reg_idx(); + let register_node = VecLang::Reg(register_idx); + egg_nodes.push(register_node.clone()); + assert!(!translation_metadata.llvm2reg.contains_key(&llvm_instr)); + translation_metadata + .llvm2reg + .insert(llvm_instr, register_node); + (egg_nodes, next_node_idx + 1) } /// Recursively Translate LLVM Instruction to Egg Nodes. @@ -505,578 +508,603 @@ unsafe fn unhandled_opcode_to_egg( /// TODO: Take care of chunk boundaries: translation should never overreach a chunk /// TODO: May need to keep track of llvm instructions across chunks unsafe fn llvm_to_egg( - llvm_instr: LLVMValueRef, - mut egg_nodes: Vec, - next_node_idx: u32, - translation_metadata: &mut LLVM2EggState, + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { - // Mark instruction as translated, as it will be after it goes through the code below - if !translation_metadata - .prior_translated_instructions - .contains(&llvm_instr) - { - translation_metadata - .prior_translated_instructions - .insert(llvm_instr); - } - // If, on a different pass, the instruction was translated already, then - // just used the egg node representing the translation - if translation_metadata.llvm2reg.contains_key(&llvm_instr) { - let translated_egg_node = translation_metadata - .llvm2reg - .get(&llvm_instr) - .expect("Key must exist"); - egg_nodes.push(translated_egg_node.clone()); - return (egg_nodes, next_node_idx + 1); - } - // If the current llvm instruction is a "restricted" instruction, do not translate, but make it a register - if translation_metadata - .restricted_instructions - .contains(&llvm_instr) - { - return unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); - } - // If the current llvm instruction is not in the current chunk, we must return a register - // The current llvm instruction must not be a arguments, because arguments will be outside every chunk - if !translation_metadata - .instructions_in_chunk - .contains(&llvm_instr) - && !isa_argument(llvm_instr) - { - return unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); - } - // Recurse Backwards on the current instruction, translating its children, - // based on the opcode of the parent. - return match match_llvm_op(&llvm_instr) { - LLVMOpType::FAdd | LLVMOpType::FSub | LLVMOpType::FMul | LLVMOpType::FDiv => { - bop_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + // Mark instruction as translated, as it will be after it goes through the code below + if !translation_metadata + .prior_translated_instructions + .contains(&llvm_instr) + { + translation_metadata + .prior_translated_instructions + .insert(llvm_instr); } - LLVMOpType::FNeg => unop_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata), - LLVMOpType::Constant => { - const_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + // If, on a different pass, the instruction was translated already, then + // just used the egg node representing the translation + if translation_metadata.llvm2reg.contains_key(&llvm_instr) { + let translated_egg_node = translation_metadata + .llvm2reg + .get(&llvm_instr) + .expect("Key must exist"); + egg_nodes.push(translated_egg_node.clone()); + return (egg_nodes, next_node_idx + 1); } - LLVMOpType::Argument => arg_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata), - LLVMOpType::Sqrt32 => sqrt32_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata), - LLVMOpType::UnhandledLLVMOpCode => { - unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + // If the current llvm instruction is a "restricted" instruction, do not translate, but make it a register + if translation_metadata + .restricted_instructions + .contains(&llvm_instr) + { + return unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); + } + // If the current llvm instruction is not in the current chunk, we must return a register + // The current llvm instruction must not be a arguments, because arguments will be outside every chunk + if !translation_metadata + .instructions_in_chunk + .contains(&llvm_instr) + && !isa_argument(llvm_instr) + { + return unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); } - }; + // Recurse Backwards on the current instruction, translating its children, + // based on the opcode of the parent. + return match match_llvm_op(&llvm_instr) { + LLVMOpType::FAdd | LLVMOpType::FSub | LLVMOpType::FMul | LLVMOpType::FDiv => { + bop_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + LLVMOpType::FNeg => unop_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata), + LLVMOpType::Constant => { + const_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + LLVMOpType::Argument => { + arg_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + LLVMOpType::Sqrt32 => { + sqrt32_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + LLVMOpType::UnhandledLLVMOpCode => { + unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + }; } unsafe fn start_translating_llvm_to_egg( - llvm_instr: LLVMValueRef, - egg_nodes: Vec, - next_node_idx: u32, - translation_metadata: &mut LLVM2EggState, + llvm_instr: LLVMValueRef, + egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, ) -> (Vec, u32) { - translation_metadata.start_instructions.push(llvm_instr); - let pair_result = llvm_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); - translation_metadata - .start_ids - .push(Id::from((pair_result.1 - 1) as usize)); - pair_result + translation_metadata.start_instructions.push(llvm_instr); + let pair_result = llvm_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); + translation_metadata + .start_ids + .push(Id::from((pair_result.1 - 1) as usize)); + pair_result } unsafe fn can_start_translation_instr(llvm_instr: LLVMValueRef) -> bool { - return match match_llvm_op(&llvm_instr) { - LLVMOpType::FAdd - | LLVMOpType::FMul - | LLVMOpType::FDiv - | LLVMOpType::FSub - | LLVMOpType::FNeg - | LLVMOpType::Constant - | LLVMOpType::Sqrt32 => true, - LLVMOpType::Argument | LLVMOpType::UnhandledLLVMOpCode => false, - }; + return match match_llvm_op(&llvm_instr) { + LLVMOpType::FAdd + | LLVMOpType::FMul + | LLVMOpType::FDiv + | LLVMOpType::FSub + | LLVMOpType::FNeg + | LLVMOpType::Constant + | LLVMOpType::Sqrt32 => true, + LLVMOpType::Argument | LLVMOpType::UnhandledLLVMOpCode => false, + }; } unsafe fn llvm_to_egg_main( - llvm_instrs_in_chunk: &[LLVMValueRef], - restricted_instrs: &[LLVMValueRef], - vectorize: bool, - // TODO: feed this in as an argument llvm_instr2egg_node: BTreeMap, + llvm_instrs_in_chunk: &[LLVMValueRef], + restricted_instrs: &[LLVMValueRef], + vectorize: bool, + // TODO: feed this in as an argument llvm_instr2egg_node: BTreeMap, ) -> (RecExpr, LLVM2EggState) { - let mut egg_nodes: Vec = Vec::new(); + let mut egg_nodes: Vec = Vec::new(); - // Map from (translated / opaque) llvm instructions to register egg graph nodes - let llvm_instr2reg_node: BTreeMap = BTreeMap::new(); - // Map from (translated) llvm instructions to argument egg graph nodes - let llvm_instr2arg_node: BTreeMap = BTreeMap::new(); + // Map from (translated / opaque) llvm instructions to register egg graph nodes + let llvm_instr2reg_node: BTreeMap = BTreeMap::new(); + // Map from (translated) llvm instructions to argument egg graph nodes + let llvm_instr2arg_node: BTreeMap = BTreeMap::new(); - // Ordered Vector of Starting LLVM instructions where translation began - let start_instructions: Vec = Vec::new(); + // Ordered Vector of Starting LLVM instructions where translation began + let start_instructions: Vec = Vec::new(); - // Ordered Set of Instructions in Chunk - let mut instructions_in_chunk: BTreeSet = BTreeSet::new(); - for llvm_instr in llvm_instrs_in_chunk.iter() { - instructions_in_chunk.insert(*llvm_instr); - } + // Ordered Set of Instructions in Chunk + let mut instructions_in_chunk: BTreeSet = BTreeSet::new(); + for llvm_instr in llvm_instrs_in_chunk.iter() { + instructions_in_chunk.insert(*llvm_instr); + } - // Ordered Set of Ids - let start_ids: Vec = Vec::new(); + // Ordered Set of Ids + let start_ids: Vec = Vec::new(); - // Ordered Set of Instructions NOT TO BE Translated, except as registers - let mut restricted_instrs_set: BTreeSet = BTreeSet::new(); - for llvm_instr in restricted_instrs.iter() { - restricted_instrs_set.insert(*llvm_instr); - } + // Ordered Set of Instructions NOT TO BE Translated, except as registers + let mut restricted_instrs_set: BTreeSet = BTreeSet::new(); + for llvm_instr in restricted_instrs.iter() { + restricted_instrs_set.insert(*llvm_instr); + } - // Invariant: every restricted instruction is in the chunk, using a pointer check - for restr_instr in restricted_instrs.iter() { - let mut found_match = false; - for instr in instructions_in_chunk.iter() { - if cmp_val_ref_address(&**restr_instr, &**instr) { - found_match = true; - break; - } + // Invariant: every restricted instruction is in the chunk, using a pointer check + for restr_instr in restricted_instrs.iter() { + let mut found_match = false; + for instr in instructions_in_chunk.iter() { + if cmp_val_ref_address(&**restr_instr, &**instr) { + found_match = true; + break; + } + } + if found_match { + continue; + } } - if found_match { - continue; + // Invariant: chunk instructions are not empty in size + assert!(!instructions_in_chunk.is_empty()); + + let prior_translated_instructions: BTreeSet = BTreeSet::new(); + + // State Variable To Hold Maps During Translation + let mut translation_metadata = LLVM2EggState { + llvm2reg: llvm_instr2reg_node, + llvm2arg: llvm_instr2arg_node, + instructions_in_chunk: instructions_in_chunk, + restricted_instructions: restricted_instrs_set, + prior_translated_instructions: prior_translated_instructions, + start_instructions: start_instructions, + start_ids: start_ids, + }; + + // Index of next node to translate + let mut next_node_idx: u32 = 0; + + // for each final instruction, iterate backwards from that final instruction and translate to egg + for llvm_instr in llvm_instrs_in_chunk.iter().rev() { + // only start translation back if it is a "translatable instruction" and it was not translated already + if can_start_translation_instr(*llvm_instr) + && !translation_metadata + .prior_translated_instructions + .contains(&llvm_instr) + { + let (new_egg_nodes, new_next_node_idx) = start_translating_llvm_to_egg( + *llvm_instr, + egg_nodes, + next_node_idx, + &mut translation_metadata, + ); + egg_nodes = new_egg_nodes; + next_node_idx = new_next_node_idx; + } } - } - // Invariant: chunk instructions are not empty in size - assert!(!instructions_in_chunk.is_empty()); - - let prior_translated_instructions: BTreeSet = BTreeSet::new(); - - // State Variable To Hold Maps During Translation - let mut translation_metadata = LLVM2EggState { - llvm2reg: llvm_instr2reg_node, - llvm2arg: llvm_instr2arg_node, - instructions_in_chunk: instructions_in_chunk, - restricted_instructions: restricted_instrs_set, - prior_translated_instructions: prior_translated_instructions, - start_instructions: start_instructions, - start_ids: start_ids, - }; - - // Index of next node to translate - let mut next_node_idx: u32 = 0; - - // for each final instruction, iterate backwards from that final instruction and translate to egg - for llvm_instr in llvm_instrs_in_chunk.iter().rev() { - // only start translation back if it is a "translatable instruction" and it was not translated already - if can_start_translation_instr(*llvm_instr) - && !translation_metadata - .prior_translated_instructions - .contains(&llvm_instr) - { - let (new_egg_nodes, new_next_node_idx) = start_translating_llvm_to_egg( - *llvm_instr, - egg_nodes, - next_node_idx, - &mut translation_metadata, - ); - egg_nodes = new_egg_nodes; - next_node_idx = new_next_node_idx; + + // For testing purposes: Handle no vectorization + if !vectorize { + let mut outer_vec_ids = Vec::new(); + for id in translation_metadata.start_ids.iter() { + outer_vec_ids.push(*id); + } + egg_nodes.push(VecLang::NoOptVec(outer_vec_ids.clone().into_boxed_slice())); + let rec_expr = RecExpr::from(egg_nodes); + return (rec_expr, translation_metadata); } - } - // For testing purposes: Handle no vectorization - if !vectorize { + // Generate a padded vector let mut outer_vec_ids = Vec::new(); for id in translation_metadata.start_ids.iter() { - outer_vec_ids.push(*id); + outer_vec_ids.push(*id); } - egg_nodes.push(VecLang::NoOptVec(outer_vec_ids.clone().into_boxed_slice())); - let rec_expr = RecExpr::from(egg_nodes); - return (rec_expr, translation_metadata); - } - - // Generate a padded vector - let mut outer_vec_ids = Vec::new(); - for id in translation_metadata.start_ids.iter() { - outer_vec_ids.push(*id); - } - balanced_pad_vector(&mut outer_vec_ids, &mut egg_nodes); + balanced_pad_vector(&mut outer_vec_ids, &mut egg_nodes); - let rec_expr = RecExpr::from(egg_nodes); + let rec_expr = RecExpr::from(egg_nodes); - return (rec_expr, translation_metadata); + return (rec_expr, translation_metadata); } /// Egg2LLVMState represent the state needed to translate from Egg to LLVM struct Egg2LLVMState<'a> { - llvm2egg_metadata: LLVM2EggState, - egg_nodes_vector: &'a [VecLang], - prior_translated_nodes: BTreeSet, - builder: LLVMBuilderRef, - context: LLVMContextRef, - module: LLVMModuleRef, + llvm2egg_metadata: LLVM2EggState, + egg_nodes_vector: &'a [VecLang], + prior_translated_nodes: BTreeSet, + builder: LLVMBuilderRef, + context: LLVMContextRef, + module: LLVMModuleRef, +} + +unsafe fn get_nodes_to_llvm( + egg_node: &VecLang, + translation_metadata: &mut Egg2LLVMState, +) -> LLVMValueRef { + // TODO: Make More Efficient with BTREEMAP? + let llvm2arg = &translation_metadata.llvm2egg_metadata.llvm2arg; + for (llvm_instr, arg_node) in llvm2arg.iter() { + // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. + if arg_node == egg_node { + assert!(isa_argument(*llvm_instr)); + return *llvm_instr; + } + } + panic!( + "Expected a successful lookup in llvm2arg, but cannot find Argument Egg Node: {:?}.", + egg_node + ); } unsafe fn arg_to_llvm( - egg_node: &VecLang, - translation_metadata: &mut Egg2LLVMState, + egg_node: &VecLang, + translation_metadata: &mut Egg2LLVMState, ) -> LLVMValueRef { - // TODO: Make More Efficient with BTREEMAP? - let llvm2arg = &translation_metadata.llvm2egg_metadata.llvm2arg; - for (llvm_instr, arg_node) in llvm2arg.iter() { - // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. - if arg_node == egg_node { - assert!(isa_argument(*llvm_instr)); - return *llvm_instr; + // TODO: Make More Efficient with BTREEMAP? + let llvm2arg = &translation_metadata.llvm2egg_metadata.llvm2arg; + for (llvm_instr, arg_node) in llvm2arg.iter() { + // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. + if arg_node == egg_node { + assert!(isa_argument(*llvm_instr)); + return *llvm_instr; + } } - } - panic!( - "Expected a successful lookup in llvm2arg, but cannot find Argument Egg Node: {:?}.", - egg_node - ); + panic!( + "Expected a successful lookup in llvm2arg, but cannot find Argument Egg Node: {:?}.", + egg_node + ); } unsafe fn reg_to_llvm( - egg_node: &VecLang, - translation_metadata: &mut Egg2LLVMState, + egg_node: &VecLang, + translation_metadata: &mut Egg2LLVMState, ) -> LLVMValueRef { - // TODO: Make More Efficient with BTREEMAP? - let llvm2reg = &translation_metadata.llvm2egg_metadata.llvm2reg; - for (llvm_instr, reg_node) in llvm2reg.iter() { - // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. - if reg_node == egg_node { - assert!(!isa_argument(*llvm_instr)); - // do not clone an instruction translated earlier in the same chunk - if translation_metadata - .prior_translated_nodes - .contains(&*llvm_instr) - { - return *llvm_instr; - } - // do not clone an instruction translated in a prior basic block / prior chunk - if !translation_metadata - .llvm2egg_metadata - .instructions_in_chunk - .contains(&*llvm_instr) - { - return *llvm_instr; - } - let new_instr = LLVMInstructionClone(*llvm_instr); - LLVMInsertIntoBuilder(translation_metadata.builder, new_instr); - translation_metadata - .prior_translated_nodes - .insert(new_instr); - return new_instr; + // TODO: Make More Efficient with BTREEMAP? + let llvm2reg = &translation_metadata.llvm2egg_metadata.llvm2reg; + for (llvm_instr, reg_node) in llvm2reg.iter() { + // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. + if reg_node == egg_node { + assert!(!isa_argument(*llvm_instr)); + // do not clone an instruction translated earlier in the same chunk + if translation_metadata + .prior_translated_nodes + .contains(&*llvm_instr) + { + return *llvm_instr; + } + // do not clone an instruction translated in a prior basic block / prior chunk + if !translation_metadata + .llvm2egg_metadata + .instructions_in_chunk + .contains(&*llvm_instr) + { + return *llvm_instr; + } + let new_instr = LLVMInstructionClone(*llvm_instr); + LLVMInsertIntoBuilder(translation_metadata.builder, new_instr); + translation_metadata + .prior_translated_nodes + .insert(new_instr); + return new_instr; + } } - } - panic!( - "Expected a successful lookup in llvm2reg, but cannot find Register Egg Node: {:?}.", - egg_node - ); + panic!( + "Expected a successful lookup in llvm2reg, but cannot find Register Egg Node: {:?}.", + egg_node + ); } unsafe fn num_to_llvm(n: &i32, md: &mut Egg2LLVMState) -> LLVMValueRef { - LLVMConstReal(LLVMFloatTypeInContext(md.context), *n as f64) + LLVMConstReal(LLVMFloatTypeInContext(md.context), *n as f64) } unsafe fn vec_to_llvm(boxed_ids: &Box<[Id]>, md: &mut Egg2LLVMState) -> LLVMValueRef { - // Convert the Boxed Ids to a Vector, and generate a vector of zeros - // Invariant: idvec must not be empty - let idvec = boxed_ids.to_vec(); - let idvec_len = idvec.len(); - assert!( - !idvec.is_empty(), - "Id Vec Cannot be empty when converting Vector to an LLVM Vector" - ); - let mut zeros = Vec::new(); - for _ in 0..idvec_len { - zeros.push(LLVMConstReal(LLVMFloatTypeInContext(md.context), 0 as f64)); - } - - // Convert the Vector of Zeros to a Mut PTr to construct an LLVM Zero Vector - // Invariant: zeros must not be empty - assert!( - !zeros.is_empty(), - "Zeros Vector Cannot be empty when converting Vector to an LLVM Vector" - ); - let zeros_ptr = zeros.as_mut_ptr(); - let mut vector = LLVMConstVector(zeros_ptr, idvec.len() as u32); - for (idx, &eggid) in idvec.iter().enumerate() { - let elt = &md.egg_nodes_vector[usize::from(eggid)]; - let mut elt_val = egg_to_llvm(elt, md); - // TODO: Can We Eliminate this BitCast in the future?? - // With the new formulation, will we ever have an integer type? - // Check if the elt is an int - if isa_integertype(elt_val) { - elt_val = LLVMBuildBitCast( - md.builder, - elt_val, - LLVMFloatTypeInContext(md.context), - b"\0".as_ptr() as *const _, - ); + // Convert the Boxed Ids to a Vector, and generate a vector of zeros + // Invariant: idvec must not be empty + let idvec = boxed_ids.to_vec(); + let idvec_len = idvec.len(); + assert!( + !idvec.is_empty(), + "Id Vec Cannot be empty when converting Vector to an LLVM Vector" + ); + let mut zeros = Vec::new(); + for _ in 0..idvec_len { + zeros.push(LLVMConstReal(LLVMFloatTypeInContext(md.context), 0 as f64)); } - // Construct the Vector - vector = LLVMBuildInsertElement( - md.builder, - vector, - elt_val, - LLVMConstInt(LLVMIntTypeInContext(md.context, 32), idx as u64, 0), - b"\0".as_ptr() as *const _, + // Convert the Vector of Zeros to a Mut PTr to construct an LLVM Zero Vector + // Invariant: zeros must not be empty + assert!( + !zeros.is_empty(), + "Zeros Vector Cannot be empty when converting Vector to an LLVM Vector" ); - } - vector + let zeros_ptr = zeros.as_mut_ptr(); + let mut vector = LLVMConstVector(zeros_ptr, idvec.len() as u32); + for (idx, &eggid) in idvec.iter().enumerate() { + let elt = &md.egg_nodes_vector[usize::from(eggid)]; + let mut elt_val = egg_to_llvm(elt, md); + // TODO: Can We Eliminate this BitCast in the future?? + // With the new formulation, will we ever have an integer type? + // Check if the elt is an int + if isa_integertype(elt_val) { + elt_val = LLVMBuildBitCast( + md.builder, + elt_val, + LLVMFloatTypeInContext(md.context), + b"\0".as_ptr() as *const _, + ); + } + + // Construct the Vector + vector = LLVMBuildInsertElement( + md.builder, + vector, + elt_val, + LLVMConstInt(LLVMIntTypeInContext(md.context, 32), idx as u64, 0), + b"\0".as_ptr() as *const _, + ); + } + vector } // TODO: Segregate Vec and Scalar Binops? unsafe fn binop_to_llvm( - binop_node: &VecLang, - left_id: &Id, - right_id: &Id, - md: &mut Egg2LLVMState, + binop_node: &VecLang, + left_id: &Id, + right_id: &Id, + md: &mut Egg2LLVMState, ) -> LLVMValueRef { - let left = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_id)], md); - let right = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_id)], md); - - // TODO: Can We Remove these Casts? - let left = if LLVMTypeOf(left) == LLVMIntTypeInContext(md.context, 32) { - LLVMBuildBitCast( - md.builder, - left, - LLVMFloatTypeInContext(md.context), - b"\0".as_ptr() as *const _, - ) - } else { - left - }; - - // TODO: Can We Remove these Casts? - let right = if LLVMTypeOf(right) == LLVMIntTypeInContext(md.context, 32) { - LLVMBuildBitCast( - md.builder, - right, - LLVMFloatTypeInContext(md.context), - b"\0".as_ptr() as *const _, - ) - } else { - right - }; - - // TODO: Can we eliminate these cases? - if isa_constfp(left) - && !isa_constaggregatezero(left) - && isa_constfp(right) - && !isa_constaggregatezero(right) - { - let mut loses_info = 1; - let nright = LLVMConstRealGetDouble(right, &mut loses_info); - let new_right = build_constant_float(nright, md.context); - let nleft = LLVMConstRealGetDouble(left, &mut loses_info); - let new_left = build_constant_float(nleft, md.context); - translate_binop( - binop_node, - new_left, - new_right, - md.builder, - b"\0".as_ptr() as *const _, - ) - } else if isa_constfp(right) && !isa_constaggregatezero(right) { - let mut loses_info = 1; - let n = LLVMConstRealGetDouble(right, &mut loses_info); - let new_right = build_constant_float(n, md.context); - translate_binop( - binop_node, - left, - new_right, - md.builder, - b"\0".as_ptr() as *const _, - ) - } else if isa_constfp(left) && !isa_constaggregatezero(left) { - let mut loses_info = 1; - let n = LLVMConstRealGetDouble(left, &mut loses_info); - let new_left = build_constant_float(n, md.context); - translate_binop( - binop_node, - new_left, - right, - md.builder, - b"\0".as_ptr() as *const _, - ) - } else { - translate_binop( - binop_node, - left, - right, - md.builder, - b"\0".as_ptr() as *const _, - ) - } + let left = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_id)], md); + let right = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_id)], md); + + // TODO: Can We Remove these Casts? + let left = if LLVMTypeOf(left) == LLVMIntTypeInContext(md.context, 32) { + LLVMBuildBitCast( + md.builder, + left, + LLVMFloatTypeInContext(md.context), + b"\0".as_ptr() as *const _, + ) + } else { + left + }; + + // TODO: Can We Remove these Casts? + let right = if LLVMTypeOf(right) == LLVMIntTypeInContext(md.context, 32) { + LLVMBuildBitCast( + md.builder, + right, + LLVMFloatTypeInContext(md.context), + b"\0".as_ptr() as *const _, + ) + } else { + right + }; + + // TODO: Can we eliminate these cases? + if isa_constfp(left) + && !isa_constaggregatezero(left) + && isa_constfp(right) + && !isa_constaggregatezero(right) + { + let mut loses_info = 1; + let nright = LLVMConstRealGetDouble(right, &mut loses_info); + let new_right = build_constant_float(nright, md.context); + let nleft = LLVMConstRealGetDouble(left, &mut loses_info); + let new_left = build_constant_float(nleft, md.context); + translate_binop( + binop_node, + new_left, + new_right, + md.builder, + b"\0".as_ptr() as *const _, + ) + } else if isa_constfp(right) && !isa_constaggregatezero(right) { + let mut loses_info = 1; + let n = LLVMConstRealGetDouble(right, &mut loses_info); + let new_right = build_constant_float(n, md.context); + translate_binop( + binop_node, + left, + new_right, + md.builder, + b"\0".as_ptr() as *const _, + ) + } else if isa_constfp(left) && !isa_constaggregatezero(left) { + let mut loses_info = 1; + let n = LLVMConstRealGetDouble(left, &mut loses_info); + let new_left = build_constant_float(n, md.context); + translate_binop( + binop_node, + new_left, + right, + md.builder, + b"\0".as_ptr() as *const _, + ) + } else { + translate_binop( + binop_node, + left, + right, + md.builder, + b"\0".as_ptr() as *const _, + ) + } } unsafe fn concat_to_llvm( - left_vector: &Id, - right_vector: &Id, - md: &mut Egg2LLVMState, + left_vector: &Id, + right_vector: &Id, + md: &mut Egg2LLVMState, ) -> LLVMValueRef { - { - let trans_v1 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_vector)], md); - let mut trans_v2 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_vector)], md); - - // In LLVM, it turns out all vectors need to be length power of 2 - // if the 2 vectors are not the same size, double the length of the smaller vector by padding with 0's in it - // manually concatenate 2 vectors by using a LLVM shuffle operation. - let v1_type = LLVMTypeOf(trans_v1); - let v1_size = LLVMGetVectorSize(v1_type); - let v2_type = LLVMTypeOf(trans_v2); - let v2_size = LLVMGetVectorSize(v2_type); - - // TODO: HACKY FIX FOR NOW - // assume both v1 and v2 are pow of 2 size - // assume v2 size smaller or equal to v1 size - // assume v2 is 1/2 size of v1 - if v1_size != v2_size { - // replicate v2 size - let mut zeros = Vec::new(); - for _ in 0..v2_size { - zeros.push(LLVMConstReal(LLVMFloatTypeInContext(md.context), 0 as f64)); - } - let zeros_ptr = zeros.as_mut_ptr(); - let zeros_vector = LLVMConstVector(zeros_ptr, v2_size); - let size = 2 * v2_size; - let mut indices = Vec::new(); - for i in 0..size { - indices.push(LLVMConstInt( - LLVMIntTypeInContext(md.context, 32), - i as u64, - 0, - )); - } - let mask = indices.as_mut_ptr(); - let mask_vector = LLVMConstVector(mask, size); - trans_v2 = LLVMBuildShuffleVector( - md.builder, - trans_v2, - zeros_vector, - mask_vector, - b"\0".as_ptr() as *const _, - ); - } - - let size = v1_size + v2_size; - let mut indices = Vec::new(); - for i in 0..size { - indices.push(LLVMConstInt( - LLVMIntTypeInContext(md.context, 32), - i as u64, - 0, - )); + { + let trans_v1 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_vector)], md); + let mut trans_v2 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_vector)], md); + + // In LLVM, it turns out all vectors need to be length power of 2 + // if the 2 vectors are not the same size, double the length of the smaller vector by padding with 0's in it + // manually concatenate 2 vectors by using a LLVM shuffle operation. + let v1_type = LLVMTypeOf(trans_v1); + let v1_size = LLVMGetVectorSize(v1_type); + let v2_type = LLVMTypeOf(trans_v2); + let v2_size = LLVMGetVectorSize(v2_type); + + // TODO: HACKY FIX FOR NOW + // assume both v1 and v2 are pow of 2 size + // assume v2 size smaller or equal to v1 size + // assume v2 is 1/2 size of v1 + if v1_size != v2_size { + // replicate v2 size + let mut zeros = Vec::new(); + for _ in 0..v2_size { + zeros.push(LLVMConstReal(LLVMFloatTypeInContext(md.context), 0 as f64)); + } + let zeros_ptr = zeros.as_mut_ptr(); + let zeros_vector = LLVMConstVector(zeros_ptr, v2_size); + let size = 2 * v2_size; + let mut indices = Vec::new(); + for i in 0..size { + indices.push(LLVMConstInt( + LLVMIntTypeInContext(md.context, 32), + i as u64, + 0, + )); + } + let mask = indices.as_mut_ptr(); + let mask_vector = LLVMConstVector(mask, size); + trans_v2 = LLVMBuildShuffleVector( + md.builder, + trans_v2, + zeros_vector, + mask_vector, + b"\0".as_ptr() as *const _, + ); + } + + let size = v1_size + v2_size; + let mut indices = Vec::new(); + for i in 0..size { + indices.push(LLVMConstInt( + LLVMIntTypeInContext(md.context, 32), + i as u64, + 0, + )); + } + + let mask = indices.as_mut_ptr(); + let mask_vector = LLVMConstVector(mask, size); + LLVMBuildShuffleVector( + md.builder, + trans_v1, + trans_v2, + mask_vector, + b"\0".as_ptr() as *const _, + ) } - - let mask = indices.as_mut_ptr(); - let mask_vector = LLVMConstVector(mask, size); - LLVMBuildShuffleVector( - md.builder, - trans_v1, - trans_v2, - mask_vector, - b"\0".as_ptr() as *const _, - ) - } } unsafe fn mac_to_llvm( - accumulator_vector: &Id, - left_prod_vector: &Id, - right_prod_vector: &Id, - md: &mut Egg2LLVMState, + accumulator_vector: &Id, + left_prod_vector: &Id, + right_prod_vector: &Id, + md: &mut Egg2LLVMState, ) -> LLVMValueRef { - let trans_acc = egg_to_llvm(&md.egg_nodes_vector[usize::from(*accumulator_vector)], md); - let trans_v1 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_prod_vector)], md); - let trans_v2 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_prod_vector)], md); - let vec_type = LLVMTypeOf(trans_acc); - let param_types = [vec_type, vec_type, vec_type].as_mut_ptr(); - let fn_type = LLVMFunctionType(vec_type, param_types, 3, 0 as i32); - let func = LLVMAddFunction(md.module, b"llvm.fma.f32\0".as_ptr() as *const _, fn_type); - let args = [trans_v1, trans_v2, trans_acc].as_mut_ptr(); - LLVMBuildCall(md.builder, func, args, 3, b"\0".as_ptr() as *const _) + let trans_acc = egg_to_llvm(&md.egg_nodes_vector[usize::from(*accumulator_vector)], md); + let trans_v1 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_prod_vector)], md); + let trans_v2 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_prod_vector)], md); + let vec_type = LLVMTypeOf(trans_acc); + let param_types = [vec_type, vec_type, vec_type].as_mut_ptr(); + let fn_type = LLVMFunctionType(vec_type, param_types, 3, 0 as i32); + // let vector_width = config::vector_width(); + // let fma_intrinsic_name = format!("llvm.fma.v{}f32\0", vector_width).as_bytes(); + let func = LLVMAddFunction(md.module, b"llvm.fma.v4f32\0".as_ptr() as *const _, fn_type); + let args = [trans_v1, trans_v2, trans_acc].as_mut_ptr(); + LLVMBuildCall(md.builder, func, args, 3, b"\0".as_ptr() as *const _) } unsafe fn scalar_unop_to_llvm(n: &Id, unop_node: &VecLang, md: &mut Egg2LLVMState) -> LLVMValueRef { - let mut number = egg_to_llvm(&md.egg_nodes_vector[usize::from(*n)], md); - if isa_integertype(number) { - number = LLVMBuildBitCast( - md.builder, - number, - LLVMFloatTypeInContext(md.context), - b"\0".as_ptr() as *const _, - ); - } - translate_unop( - unop_node, - number, - md.builder, - md.context, - md.module, - b"\0".as_ptr() as *const _, - ) + let mut number = egg_to_llvm(&md.egg_nodes_vector[usize::from(*n)], md); + if isa_integertype(number) { + number = LLVMBuildBitCast( + md.builder, + number, + LLVMFloatTypeInContext(md.context), + b"\0".as_ptr() as *const _, + ); + } + translate_unop( + unop_node, + number, + md.builder, + md.context, + md.module, + b"\0".as_ptr() as *const _, + ) } unsafe fn vecneg_to_llvm(vec: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { - let neg_vector = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); - LLVMBuildFNeg(md.builder, neg_vector, b"\0".as_ptr() as *const _) + let neg_vector = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); + LLVMBuildFNeg(md.builder, neg_vector, b"\0".as_ptr() as *const _) } unsafe fn vecsqrt_to_llvm(vec: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { - let sqrt_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); - let vec_type = LLVMTypeOf(sqrt_vec); - let param_types = [vec_type].as_mut_ptr(); - let fn_type = LLVMFunctionType(vec_type, param_types, 1, 0 as i32); - let func = LLVMAddFunction(md.module, b"llvm.sqrt.f32\0".as_ptr() as *const _, fn_type); - let args = [sqrt_vec].as_mut_ptr(); - LLVMBuildCall(md.builder, func, args, 1, b"\0".as_ptr() as *const _) + let sqrt_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); + let vec_type = LLVMTypeOf(sqrt_vec); + let param_types = [vec_type].as_mut_ptr(); + let fn_type = LLVMFunctionType(vec_type, param_types, 1, 0 as i32); + let func = LLVMAddFunction(md.module, b"llvm.sqrt.f32\0".as_ptr() as *const _, fn_type); + let args = [sqrt_vec].as_mut_ptr(); + LLVMBuildCall(md.builder, func, args, 1, b"\0".as_ptr() as *const _) } unsafe fn vecsgn_to_llvm(vec: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { - let sgn_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); - let vec_type = LLVMTypeOf(sgn_vec); - let vec_size = LLVMGetVectorSize(vec_type); - let mut ones = Vec::new(); - for _ in 0..vec_size { - ones.push(LLVMConstReal(LLVMFloatTypeInContext(md.context), 1 as f64)); - } - let ones_ptr = ones.as_mut_ptr(); - let ones_vector = LLVMConstVector(ones_ptr, vec_size); - let param_types = [vec_type, vec_type].as_mut_ptr(); - let fn_type = LLVMFunctionType(vec_type, param_types, 2, 0 as i32); - let func = LLVMAddFunction( - md.module, - b"llvm.copysign.f32\0".as_ptr() as *const _, - fn_type, - ); - let args = [ones_vector, sgn_vec].as_mut_ptr(); - LLVMBuildCall(md.builder, func, args, 2, b"\0".as_ptr() as *const _) + let sgn_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); + let vec_type = LLVMTypeOf(sgn_vec); + let vec_size = LLVMGetVectorSize(vec_type); + let mut ones = Vec::new(); + for _ in 0..vec_size { + ones.push(LLVMConstReal(LLVMFloatTypeInContext(md.context), 1 as f64)); + } + let ones_ptr = ones.as_mut_ptr(); + let ones_vector = LLVMConstVector(ones_ptr, vec_size); + let param_types = [vec_type, vec_type].as_mut_ptr(); + let fn_type = LLVMFunctionType(vec_type, param_types, 2, 0 as i32); + let func = LLVMAddFunction( + md.module, + b"llvm.copysign.f32\0".as_ptr() as *const _, + fn_type, + ); + let args = [ones_vector, sgn_vec].as_mut_ptr(); + LLVMBuildCall(md.builder, func, args, 2, b"\0".as_ptr() as *const _) } /** * Vector representing No Optimization: Egg will not have modified the vector at all. */ unsafe fn nooptvec_to_llvm(boxed_ids: &Box<[Id]>, md: &mut Egg2LLVMState) -> () { - // Convert the Boxed Ids to a Vector, and generate a vector of zeros - // Invariant: idvec must not be empty - let idvec = boxed_ids.to_vec(); - assert!( - !idvec.is_empty(), - "Id Vec Cannot be empty when converting Vector to an LLVM Vector" - ); - for (i, &eggid) in idvec.iter().enumerate() { - let egg_node = &md.egg_nodes_vector[usize::from(eggid)]; - let new_instr = egg_to_llvm(egg_node, md); - let old_instr = md - .llvm2egg_metadata - .start_instructions - .get(i) - .expect("Index Must Exist In Start Instructions"); - LLVMReplaceAllUsesWith(*old_instr, new_instr); - LLVMInstructionRemoveFromParent(*old_instr); - } + // Convert the Boxed Ids to a Vector, and generate a vector of zeros + // Invariant: idvec must not be empty + let idvec = boxed_ids.to_vec(); + assert!( + !idvec.is_empty(), + "Id Vec Cannot be empty when converting Vector to an LLVM Vector" + ); + for (i, &eggid) in idvec.iter().enumerate() { + let egg_node = &md.egg_nodes_vector[usize::from(eggid)]; + let new_instr = egg_to_llvm(egg_node, md); + let old_instr = md + .llvm2egg_metadata + .start_instructions + .get(i) + .expect("Index Must Exist In Start Instructions"); + LLVMReplaceAllUsesWith(*old_instr, new_instr); + LLVMInstructionEraseFromParent(*old_instr); + } } /// Egg To LLVM Dispatches translation of VecLanf Egg Nodes to LLVMValueRegs /// /// Side Effect: Builds and Insert LLVM instructions unsafe fn egg_to_llvm( - egg_node: &VecLang, - translation_metadata: &mut Egg2LLVMState, + egg_node: &VecLang, + translation_metadata: &mut Egg2LLVMState, ) -> LLVMValueRef { - match egg_node { + match egg_node { VecLang::NoOptVec(..) => panic!("No Opt Vector was found. Egg to LLVM Translation does not handle No Opt Vector nodes at this location."), VecLang::Symbol(..) => { panic!("Symbol was found. Egg to LLVM Translation does not handle symbol nodes.") @@ -1084,6 +1112,9 @@ unsafe fn egg_to_llvm( VecLang::Get(..) => { panic!("Get was found. Egg to LLVM Translation does not handle get nodes.") } + VecLang::Set(..) => { + panic!("Set was found. Egg to LLVM Translation does not handle set nodes.") + } VecLang::Ite(..) => panic!("Ite was found. Egg to LLVM Translation does not handle ite nodes."), VecLang::Or(..) => panic!("Or was found. Egg to LLVM Translation does not handle or nodes."), VecLang::And(..) => panic!("And was found. Egg to LLVM Translation does not handle and nodes."), @@ -1116,77 +1147,80 @@ unsafe fn egg_to_llvm( // VecSgn compliant with c++ LibMath copysign function, which differs with sgn at x = 0. VecLang::VecSgn([v]) => vecsgn_to_llvm(v, translation_metadata), VecLang::Sgn([n]) | VecLang::Sqrt([n]) | VecLang::Neg([n]) => scalar_unop_to_llvm(n, egg_node, translation_metadata), + VecLang::VecLoad(..) => panic!("VecLoad is not yet implemented"), + VecLang::VecStore(..) => panic!("VecStore is not yet implemented"), } } unsafe fn is_nooptvec(egg_expr: &VecLang) -> bool { - match egg_expr { - VecLang::NoOptVec(..) => true, - _ => false, - } + match egg_expr { + VecLang::NoOptVec(..) => true, + _ => false, + } } unsafe fn get_noopt_eggnodes(egg_expr: &VecLang) -> &Box<[Id]> { - match egg_expr { - VecLang::NoOptVec(boxed_ids) => boxed_ids, - _ => panic!("Not a NoOptVec!"), - } + match egg_expr { + VecLang::NoOptVec(boxed_ids) => boxed_ids, + _ => panic!("Not a NoOptVec!"), + } } // TODO: Add non-vectorized version as well! unsafe fn egg_to_llvm_main( - expr: RecExpr, - llvm2egg_metadata: &LLVM2EggState, - module: LLVMModuleRef, - context: LLVMContextRef, - builder: LLVMBuilderRef, - vectorize: bool, + expr: RecExpr, + llvm2egg_metadata: &LLVM2EggState, + module: LLVMModuleRef, + context: LLVMContextRef, + builder: LLVMBuilderRef, + vectorize: bool, ) -> () { - // Walk the RecExpr of Egg Nodes and translate it in place to LLVM - let egg_nodes = expr.as_ref(); - let last_egg_node = egg_nodes - .last() - .expect("No match for last element of vector of Egg Terms."); - - // Nodes converted to llvm already, not to be retranslated - let prior_translated_nodes: BTreeSet = BTreeSet::new(); - - let mut translation_metadata = Egg2LLVMState { - egg_nodes_vector: egg_nodes, - llvm2egg_metadata: llvm2egg_metadata.clone(), - prior_translated_nodes: prior_translated_nodes, - builder: builder, - context: context, - module: module, - }; - // If vectorize was not true, we are finished, because nooptvectorize_to_llvm will generate the required code. - if !vectorize { - assert!(is_nooptvec(last_egg_node)); - return nooptvec_to_llvm(get_noopt_eggnodes(last_egg_node), &mut translation_metadata); - } + // Walk the RecExpr of Egg Nodes and translate it in place to LLVM + let egg_nodes = expr.as_ref(); + let last_egg_node = egg_nodes + .last() + .expect("No match for last element of vector of Egg Terms."); + + // Nodes converted to llvm already, not to be retranslated + let prior_translated_nodes: BTreeSet = BTreeSet::new(); + + let mut translation_metadata = Egg2LLVMState { + egg_nodes_vector: egg_nodes, + llvm2egg_metadata: llvm2egg_metadata.clone(), + prior_translated_nodes: prior_translated_nodes, + builder: builder, + context: context, + module: module, + }; + // If vectorize was not true, we are finished, because nooptvectorize_to_llvm will generate the required code. + if !vectorize { + assert!(is_nooptvec(last_egg_node)); + return nooptvec_to_llvm(get_noopt_eggnodes(last_egg_node), &mut translation_metadata); + } - // Regular translation from vectorization - - assert!(!is_nooptvec(last_egg_node)); - let llvm_vector = egg_to_llvm(last_egg_node, &mut translation_metadata); - - // BELOW HERE, we allow for vectorization output, and we stitch our work back into the current LLVM code - - // NOTE: We Assume Egg rewriter will maintain relative positions of elements in vector - // Extract the elements of the vector, to be assigned back to where they are to be used. - let num_extractions = llvm2egg_metadata.start_instructions.len(); - for i in (0..num_extractions).rev() { - let old_instr = llvm2egg_metadata - .start_instructions - .get(i) - .expect("Index should be in vector."); - // Build the extracted value - let index = LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0); - let extracted_value = - LLVMBuildExtractElement(builder, llvm_vector, index, b"\0".as_ptr() as *const _); - // Replace all the uses of the old instruction with the new extracted value - // Old instruction cannot have been removed. - LLVMReplaceAllUsesWith(*old_instr, extracted_value); - LLVMInstructionRemoveFromParent(*old_instr); - } + // Regular translation from vectorization + + assert!(!is_nooptvec(last_egg_node)); + let llvm_vector = egg_to_llvm(last_egg_node, &mut translation_metadata); + + // BELOW HERE, we allow for vectorization output, and we stitch our work back into the current LLVM code + + // NOTE: We Assume Egg rewriter will maintain relative positions of elements in vector + // REVIEW ASSUMPTION! + // Extract the elements of the vector, to be assigned back to where they are to be used. + let num_extractions = llvm2egg_metadata.start_instructions.len(); + for i in (0..num_extractions).rev() { + let old_instr = llvm2egg_metadata + .start_instructions + .get(i) + .expect("Index should be in vector."); + // Build the extracted value + let index = LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0); + let extracted_value = + LLVMBuildExtractElement(builder, llvm_vector, index, b"\0".as_ptr() as *const _); + // Replace all the uses of the old instruction with the new extracted value + // Old instruction cannot have been removed. + LLVMReplaceAllUsesWith(*old_instr, extracted_value); + LLVMInstructionEraseFromParent(*old_instr); + } } From 7bd80fb064248ed64281d16c0e8a79f0ad36ef99 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sat, 11 Mar 2023 19:22:55 -0500 Subject: [PATCH 110/143] add changes to veclang in src folder --- src/dios-egraphs/src/cost.rs | 12 ++++++++++-- src/dios-egraphs/src/rules.rs | 4 ++++ src/dios-egraphs/src/veclang.rs | 10 ++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/dios-egraphs/src/cost.rs b/src/dios-egraphs/src/cost.rs index f11d5c1f..849c6626 100644 --- a/src/dios-egraphs/src/cost.rs +++ b/src/dios-egraphs/src/cost.rs @@ -13,8 +13,9 @@ impl CostFunction for VecCostFn<'_> { fn cost(&mut self, enode: &VecLang, mut costs: C) -> Self::Cost where C: FnMut(Id) -> Self::Cost, - { + { const NO_OPTIMIZATION: f64 = 0.0; + const VECTORIZED_MEMORY_ACCESS: f64 = 0.0001; const LITERAL: f64 = 0.001; const STRUCTURE: f64 = 0.1; const VEC_OP: f64 = 1.; @@ -22,7 +23,13 @@ impl CostFunction for VecCostFn<'_> { const BIG: f64 = 100.0; let op_cost = match enode { // No Optimization case for testing purposes - VecLang::NoOptVec(..) => NO_OPTIMIZATION, + VecLang::NoOptVec(..) => NO_OPTIMIZATION, + + // Vectorized Memory Accesses are cheaper than individual memory loads and stores + // Note: This assumes that masked-gathers or masked-scattters to vectors or memory + // are implemented on the target, and are cheap, according to the LLVM cost model + VecLang::VecLoad(..) => VECTORIZED_MEMORY_ACCESS, + VecLang::VecStore(..) => VECTORIZED_MEMORY_ACCESS, // You get literals for extremely cheap VecLang::Num(..) => LITERAL, @@ -30,6 +37,7 @@ impl CostFunction for VecCostFn<'_> { VecLang::Arg(..) => LITERAL, VecLang::Symbol(..) => LITERAL, VecLang::Get(..) => LITERAL, + VecLang::Set(..) => LITERAL, // And list structures for quite cheap VecLang::List(..) => STRUCTURE, diff --git a/src/dios-egraphs/src/rules.rs b/src/dios-egraphs/src/rules.rs index a037f0f9..87a1fb12 100644 --- a/src/dios-egraphs/src/rules.rs +++ b/src/dios-egraphs/src/rules.rs @@ -158,6 +158,10 @@ pub fn rules(no_ac: bool, no_vec: bool) -> Vec> { // Vector rules if !no_vec { rules.extend(vec![ + // Get load fusion rule + rw!("vec-load-gets"; "(Vec (Get ?a0 ?b0) (Get ?a1 ?b1) (Get ?a2 ?b2) (Get ?a3 ?b3))" => "(VecLoad (Vec ?a0 ?a1 ?a2 ?a3) (Vec ?b0 ?b1 ?b2 ?b3))"), + // Set store fusion rule + rw!("vec-store-sets"; "(Vec (Set ?a0 ?b0 ?c0) (Set ?a1 ?b1 ?c1) (Set ?a2 ?b2 ?c2) (Set ?a3 ?b3 ?c3))" => "(VecStore (Vec ?a0 ?a1 ?a2 ?a3) (Vec ?b0 ?b1 ?b2 ?b3) (Vec ?c0 ?c1 ?c2 ?c3))"), // Special MAC fusion rule rw!("vec-mac-add-mul"; "(VecAdd ?v0 (VecMul ?v1 ?v2))" diff --git a/src/dios-egraphs/src/veclang.rs b/src/dios-egraphs/src/veclang.rs index 739adbce..21ab97e2 100644 --- a/src/dios-egraphs/src/veclang.rs +++ b/src/dios-egraphs/src/veclang.rs @@ -38,8 +38,12 @@ define_language! { // Vector with all literals "LitVec" = LitVec(Box<[Id]>), + // Get is a read of memory "Get" = Get([Id; 2]), + // Set is a modification of memory + "Set" = Set([Id; 3]), + // Used for partitioning and recombining lists "Concat" = Concat([Id; 2]), @@ -58,6 +62,12 @@ define_language! { // MAC takes 3 lists: acc, v1, v2 "VecMAC" = VecMAC([Id; 3]), + // VecLoad takes 2 lists: base address vector and offset vector + "VecLoad" = VecLoad([Id; 2]), + + // VecStore takes 2 lists: base address vector and offset vector + "VecStore" = VecStore([Id; 3]), + // Info specific to register // RegInfo(egg::Symbol), From 25c03d1c0b07dadb9d2640def2aa8413b179d205 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 17 Mar 2023 21:09:53 -0400 Subject: [PATCH 111/143] get vecload to work --- .gitignore | 3 +- .../Diospyros/LoadStoreMovement.cpp | 12 + .../Diospyros/c-tests/qr-decomp-fixed-size.c | 38 +-- src/dios-egraphs/Diospyros/diospyros.cpp | 16 +- src/dios-egraphs/Diospyros/src/lib.rs | 228 ++++++++++++++++-- src/dios-egraphs/src/cost.rs | 2 + src/dios-egraphs/src/rules.rs | 4 +- src/dios-egraphs/src/veclang.rs | 10 +- 8 files changed, 260 insertions(+), 53 deletions(-) diff --git a/.gitignore b/.gitignore index 09c7ec69..64bc7fc8 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,5 @@ Cargo.lock .clang-format* .cargo .vscode -polybench-tests/ \ No newline at end of file +polybench-tests/ +notes/ \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index ad5c99cd..903241a3 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -465,6 +465,16 @@ struct LoadStoreMovementPass : public FunctionPass { } } + // Move All Bitcasts as early as possible, avoiding moving instructions + // by removing dependencies. The idea behind this is to move bitcasts + // out of the way so that vectorization can occur properlu. + void rewrite_bitcasts(Function &F) { + for (auto &B : F) { + for (auto &I : B) { + } + } + } + virtual bool runOnFunction(Function &F) override { /** * In this pass, we walk backwards finding the first load from the @@ -476,6 +486,8 @@ struct LoadStoreMovementPass : public FunctionPass { F.getName().substr(0, NO_OPT_PREFIX.size()) == NO_OPT_PREFIX)) { return false; } + // Might want to iterate to convergence + // first move bitcasts rewrite_loads(F); rewrite_stores(F); diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-fixed-size.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-fixed-size.c index 69c40b58..199a5729 100644 --- a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-fixed-size.c +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-fixed-size.c @@ -84,7 +84,7 @@ void naive_fixed_qr_decomp(float *A, float *Q, float *R) { memcpy(R, A, sizeof(float) * SIZE * SIZE); // Build identity matrix of size SIZE * SIZE - float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + float *I = (float *)calloc(SIZE * SIZE, sizeof(float)); for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { I[i * SIZE + j] = (i == j); @@ -95,8 +95,8 @@ void naive_fixed_qr_decomp(float *A, float *Q, float *R) { for (int k = 0; k < SIZE - 1; k++) { int m = SIZE - k; - float *x = (float *)calloc(sizeof(float), m); - float *e = (float *)calloc(sizeof(float), m); + float *x = (float *)calloc(m, sizeof(float)); + float *e = (float *)calloc(m, sizeof(float)); for (int i = 0; i < m; i++) { int row = k + i; x[i] = R[row * SIZE + k]; @@ -105,8 +105,8 @@ void naive_fixed_qr_decomp(float *A, float *Q, float *R) { float alpha = -sgn(x[0]) * naive_norm(x, m); - float *u = (float *)calloc(sizeof(float), m); - float *v = (float *)calloc(sizeof(float), m); + float *u = (float *)calloc(m, sizeof(float)); + float *v = (float *)calloc(m, sizeof(float)); for (int i = 0; i < m; i++) { u[i] = x[i] + alpha * e[i]; } @@ -115,7 +115,7 @@ void naive_fixed_qr_decomp(float *A, float *Q, float *R) { v[i] = u[i] / (norm_u + 0.00001f); } - float *q_min = (float *)calloc(sizeof(float), m * m); + float *q_min = (float *)calloc(m * m, sizeof(float)); for (int i = 0; i < m; i++) { for (int j = 0; j < m; j++) { float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; @@ -123,7 +123,7 @@ void naive_fixed_qr_decomp(float *A, float *Q, float *R) { } } - float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + float *q_t = (float *)calloc(SIZE * SIZE, sizeof(float)); for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { float q_t_i; @@ -140,7 +140,7 @@ void naive_fixed_qr_decomp(float *A, float *Q, float *R) { memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A } else { - float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + float *res = (float *)calloc(SIZE * SIZE, sizeof(float)); naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A memcpy(Q, res, sizeof(float) * SIZE * SIZE); naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A @@ -160,7 +160,7 @@ void no_opt_naive_fixed_qr_decomp(float *A, float *Q, float *R) { memcpy(R, A, sizeof(float) * SIZE * SIZE); // Build identity matrix of size SIZE * SIZE - float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + float *I = (float *)calloc(SIZE * SIZE, sizeof(float)); for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { I[i * SIZE + j] = (i == j); @@ -171,8 +171,8 @@ void no_opt_naive_fixed_qr_decomp(float *A, float *Q, float *R) { for (int k = 0; k < SIZE - 1; k++) { int m = SIZE - k; - float *x = (float *)calloc(sizeof(float), m); - float *e = (float *)calloc(sizeof(float), m); + float *x = (float *)calloc(m, sizeof(float)); + float *e = (float *)calloc(m, sizeof(float)); for (int i = 0; i < m; i++) { int row = k + i; x[i] = R[row * SIZE + k]; @@ -181,8 +181,8 @@ void no_opt_naive_fixed_qr_decomp(float *A, float *Q, float *R) { float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); - float *u = (float *)calloc(sizeof(float), m); - float *v = (float *)calloc(sizeof(float), m); + float *u = (float *)calloc(m, sizeof(float)); + float *v = (float *)calloc(m, sizeof(float)); for (int i = 0; i < m; i++) { u[i] = x[i] + alpha * e[i]; } @@ -191,7 +191,7 @@ void no_opt_naive_fixed_qr_decomp(float *A, float *Q, float *R) { v[i] = u[i] / (norm_u + 0.00001f); } - float *q_min = (float *)calloc(sizeof(float), m * m); + float *q_min = (float *)calloc(m * m, sizeof(float)); for (int i = 0; i < m; i++) { for (int j = 0; j < m; j++) { float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; @@ -199,7 +199,7 @@ void no_opt_naive_fixed_qr_decomp(float *A, float *Q, float *R) { } } - float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + float *q_t = (float *)calloc(SIZE * SIZE, sizeof(float)); for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { float q_t_i; @@ -213,10 +213,10 @@ void no_opt_naive_fixed_qr_decomp(float *A, float *Q, float *R) { } if (k == 0) { - memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t - no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A } else { - float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + float *res = (float *)calloc(SIZE * SIZE, sizeof(float)); no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A memcpy(Q, res, sizeof(float) * SIZE * SIZE); no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A @@ -256,7 +256,7 @@ int main(void) { assert(fabs(expectedQ[i] - Q[i]) < DELTA); } } - + for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { printf("R Output: %f\n", R[i * SIZE + j]); diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index c5cf6db0..80ec377b 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -499,10 +499,12 @@ bool can_vectorize(Value *value) { return true; } else if (instr->getOpcode() == Instruction::FNeg) { return true; + } else if (isa(instr)) { + return true; + } else if (isa(instr)) { + return true; } - // else if (isa(instr)) { - // return true; - // } else if (isa(instr)) { + // else if (isa(instr)) { // return true; // } // else if (isa_sqrt32(wrap(instr))) { @@ -585,7 +587,10 @@ struct DiospyrosPass : public FunctionPass { // optimixe on bool has_vectorizable_instrs = false; for (auto &instr : chunk_vector) { - if (can_vectorize(unwrap(instr))) { + if (can_vectorize(unwrap(instr)) && + !isa(unwrap(instr)) && + !isa(unwrap(instr)) && + !isa(unwrap(instr))) { has_vectorizable_instrs = true; } } @@ -624,6 +629,9 @@ struct DiospyrosPass : public FunctionPass { // instruction" int insert_pos = 0; bool has_seen_vectorizable = false; + for (auto chunk_instr : chunk_vector) { + errs() << *unwrap(chunk_instr) << "\n"; + } for (int i = 0; i < chunk_vector.size(); i++) { if (can_vectorize(unwrap(chunk_vector[i]))) { has_seen_vectorizable = true; diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 4d337282..5ccc4a51 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -17,8 +17,8 @@ extern "C" { fn _isa_bop(val: LLVMValueRef) -> bool; fn isa_constant(val: LLVMValueRef) -> bool; fn isa_constfp(val: LLVMValueRef) -> bool; - fn _isa_gep(val: LLVMValueRef) -> bool; - fn _isa_load(val: LLVMValueRef) -> bool; + fn isa_gep(val: LLVMValueRef) -> bool; + fn isa_load(val: LLVMValueRef) -> bool; fn _isa_store(val: LLVMValueRef) -> bool; fn isa_argument(val: LLVMValueRef) -> bool; fn _isa_call(val: LLVMValueRef) -> bool; @@ -43,6 +43,7 @@ extern "C" { static mut ARG_IDX: u32 = 0; static mut REG_IDX: u32 = 0; +static mut GET_IDX: u32 = 0; unsafe fn gen_arg_idx() -> u32 { ARG_IDX += 1; @@ -54,6 +55,11 @@ unsafe fn gen_reg_idx() -> u32 { return REG_IDX; } +unsafe fn gen_get_idx() -> u32 { + GET_IDX += 1; + return GET_IDX; +} + // Reference Comparison: https://www.reddit.com/r/rust/comments/2r3wjk/is_there_way_to_compare_objects_by_address_in_rust/ // Compares whether addresses of LLVMValueRefs are the same. // Not the contents of the Value Refs @@ -202,6 +208,7 @@ enum LLVMOpType { Sqrt32, // TODO: SGN signum UnhandledLLVMOpCode, + Load, } unsafe fn get_pow2(n: u32) -> u32 { @@ -370,6 +377,8 @@ unsafe fn match_llvm_op(llvm_instr: &LLVMValueRef) -> LLVMOpType { return LLVMOpType::Constant; } else if isa_sqrt32(*llvm_instr) { return LLVMOpType::Sqrt32; + } else if isa_load(*llvm_instr) { + return LLVMOpType::Load; } else { return LLVMOpType::UnhandledLLVMOpCode; } @@ -387,6 +396,7 @@ unsafe fn choose_unop(unop: &LLVMValueRef, id: Id) -> VecLang { struct LLVM2EggState { llvm2reg: BTreeMap, llvm2arg: BTreeMap, + get2gep: BTreeMap, instructions_in_chunk: BTreeSet, restricted_instructions: BTreeSet, prior_translated_instructions: BTreeSet, @@ -483,6 +493,34 @@ unsafe fn sqrt32_to_egg( (new_enode_vec, new_next_node_idx + 1) } +/// Translates a Load to an Egg Get Node +/// +/// The translation of a load is a Get Node, which can then possibly be vectorized +/// Adds the gep address of the load to the translation metadata so that it can +/// be referenced when translating from Egg to LLVM +/// +/// Fails if the llvm instruction under translation is not a load +unsafe fn load_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, +) -> (Vec, u32) { + assert!(isa_load(llvm_instr)); + let gep_id = gen_get_idx(); + let gep_node = VecLang::Gep(gep_id); + egg_nodes.push(gep_node.clone()); + let load_node = VecLang::Load([Id::from(next_node_idx as usize)]); + let llvm_gep_instr = LLVMGetOperand(llvm_instr, 0); + _llvm_print(llvm_gep_instr); + // assert!(isa_gep(llvm_gep_instr) || isa_argument(llvm_gep_instr)); + translation_metadata.get2gep.insert(gep_id, llvm_gep_instr); + egg_nodes.push(load_node.clone()); + assert!(!translation_metadata.llvm2reg.contains_key(&llvm_instr)); + translation_metadata.llvm2reg.insert(llvm_instr, load_node); + (egg_nodes, next_node_idx + 2) +} + /// Translates an Unhandled OpCode to an Egg Register. /// /// This represents a blackbox computation that we bail on translating @@ -564,6 +602,7 @@ unsafe fn llvm_to_egg( LLVMOpType::Sqrt32 => { sqrt32_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) } + LLVMOpType::Load => load_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata), LLVMOpType::UnhandledLLVMOpCode => { unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) } @@ -592,7 +631,8 @@ unsafe fn can_start_translation_instr(llvm_instr: LLVMValueRef) -> bool { | LLVMOpType::FSub | LLVMOpType::FNeg | LLVMOpType::Constant - | LLVMOpType::Sqrt32 => true, + | LLVMOpType::Sqrt32 + | LLVMOpType::Load => true, LLVMOpType::Argument | LLVMOpType::UnhandledLLVMOpCode => false, }; } @@ -610,6 +650,9 @@ unsafe fn llvm_to_egg_main( // Map from (translated) llvm instructions to argument egg graph nodes let llvm_instr2arg_node: BTreeMap = BTreeMap::new(); + // Map from (translated) Egg get ID to an original LLVM get node + let getid2gep: BTreeMap = BTreeMap::new(); + // Ordered Vector of Starting LLVM instructions where translation began let start_instructions: Vec = Vec::new(); @@ -650,6 +693,7 @@ unsafe fn llvm_to_egg_main( let mut translation_metadata = LLVM2EggState { llvm2reg: llvm_instr2reg_node, llvm2arg: llvm_instr2arg_node, + get2gep: getid2gep, instructions_in_chunk: instructions_in_chunk, restricted_instructions: restricted_instrs_set, prior_translated_instructions: prior_translated_instructions, @@ -663,7 +707,7 @@ unsafe fn llvm_to_egg_main( // for each final instruction, iterate backwards from that final instruction and translate to egg for llvm_instr in llvm_instrs_in_chunk.iter().rev() { // only start translation back if it is a "translatable instruction" and it was not translated already - if can_start_translation_instr(*llvm_instr) + if can_start_translation_instr(*llvm_instr) // TODO: Need to DFS back from this instruction and make sure invariants for translation hold, e.g. no bitcasts somewhere down the translation tree. && !translation_metadata .prior_translated_instructions .contains(&llvm_instr) @@ -674,8 +718,10 @@ unsafe fn llvm_to_egg_main( next_node_idx, &mut translation_metadata, ); + println!("{}", new_next_node_idx); egg_nodes = new_egg_nodes; next_node_idx = new_next_node_idx; + println!("{}", egg_nodes.len()); } } @@ -712,31 +758,159 @@ struct Egg2LLVMState<'a> { module: LLVMModuleRef, } -unsafe fn get_nodes_to_llvm( - egg_node: &VecLang, - translation_metadata: &mut Egg2LLVMState, -) -> LLVMValueRef { - // TODO: Make More Efficient with BTREEMAP? - let llvm2arg = &translation_metadata.llvm2egg_metadata.llvm2arg; - for (llvm_instr, arg_node) in llvm2arg.iter() { - // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. - if arg_node == egg_node { - assert!(isa_argument(*llvm_instr)); - return *llvm_instr; +/// Translates a Gep node to an ID that the node holds. This ID is matche dto +/// a gep instruction in the get2gep map +/// +/// Used in conjunction with Load to LLVM and VecLoad to LLVM +unsafe fn gep_to_llvm(egg_node: &VecLang, _md: &mut Egg2LLVMState) -> u32 { + match *egg_node { + VecLang::Gep(gep_id) => gep_id, + _ => panic!("Non Gep nodes cannot be translated in gep_to_llvm."), + } +} + +/// Translates a Load Egg Node back to an LLVM Load INstruction +/// +/// Assumes that every load is implicitly from a Float * / Single Level Float Pointer +unsafe fn load_to_llvm(gep_id: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { + let original_gep_id = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep_id)], md); + let get2gep = &md.llvm2egg_metadata.get2gep; + for (gep_id, gep_instr) in get2gep.iter() { + if original_gep_id == *gep_id { + // assert!(isa_gep(*gep_instr) || isa_argument(*gep_instr)); + let new_load_instr = LLVMBuildLoad(md.builder, *gep_instr, b"\0".as_ptr() as *const _); + return new_load_instr; } } - panic!( - "Expected a successful lookup in llvm2arg, but cannot find Argument Egg Node: {:?}.", - egg_node - ); + panic!("Load2LLVM: Expected a successful lookup in get2gep, but cannot find Gep ID."); } -unsafe fn arg_to_llvm( - egg_node: &VecLang, - translation_metadata: &mut Egg2LLVMState, +unsafe fn loadvec_to_llvm( + gep1_id: &Id, + gep2_id: &Id, + gep3_id: &Id, + gep4_id: &Id, + md: &mut Egg2LLVMState, ) -> LLVMValueRef { + // Set Opaque Pointer ness + let gep1_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); + let gep2_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep2_id)], md); + let gep3_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep3_id)], md); + let gep4_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep4_id)], md); + + let gep1_llvm_instr = md + .llvm2egg_metadata + .get2gep + .get(&gep1_id_val) + .expect("Value of gep1 id should exist in get2gep"); + let gep2_llvm_instr = md + .llvm2egg_metadata + .get2gep + .get(&gep2_id_val) + .expect("Value of gep2 id should exist in get2gep"); + let gep3_llvm_instr = md + .llvm2egg_metadata + .get2gep + .get(&gep3_id_val) + .expect("Value of gep3 id should exist in get2gep"); + let gep4_llvm_instr = md + .llvm2egg_metadata + .get2gep + .get(&gep4_id_val) + .expect("Value of gep4 id should exist in get2gep"); + + let vector_width = 4; + let floatptr_type = LLVMTypeOf(*gep1_llvm_instr); + let vec4ptr_type = LLVMVectorType(floatptr_type, vector_width); + let vec4f_type = LLVMVectorType(LLVMFloatTypeInContext(md.context), vector_width); + let vec4b_type = LLVMVectorType(LLVMInt1TypeInContext(md.context), vector_width); + let int_type = LLVMIntTypeInContext(md.context, 32); + + // Parameter Types are:: vector of pointers, offset int, mask vector booleans and pass through vector + // Pasthru is poison according to LLVM + let param_types = [vec4ptr_type, int_type, vec4b_type, vec4f_type].as_mut_ptr(); + // Output type is a 4 length vector + let fn_type = LLVMFunctionType(vec4f_type, param_types, 4, 0 as i32); + // Build the Vector Load Intrinsic + let func = LLVMAddFunction( + md.module, + b"llvm.masked.gather.v4f32.v4p0\0".as_ptr() as *const _, + fn_type, + ); + + // Build Arguments + + let mut zeros = Vec::new(); + for _ in 0..4 { + zeros.push(LLVMConstReal(LLVMFloatTypeInContext(md.context), 0 as f64)); + } + let zeros_ptr = zeros.as_mut_ptr(); + let zero_vector = LLVMConstVector(zeros_ptr, 4); + + let pointer_to_int_value = LLVMBuildPtrToInt( + md.builder, + LLVMConstInt(LLVMIntTypeInContext(md.context, 32), 0 as u64, 0), + LLVMIntTypeInContext(md.context, 32), + b"pointer-to-int\0".as_ptr() as *const _, + ); + let pointer_to_float_value = LLVMBuildBitCast( + md.builder, + pointer_to_int_value, + floatptr_type, + b"pointer-to-float-bit-cast\0".as_ptr() as *const _, + ); + let mut pointer_to_floats = Vec::new(); + for _ in 0..4 { + pointer_to_floats.push(pointer_to_float_value); + } + let pointer_to_floats_ptr = pointer_to_floats.as_mut_ptr(); + let mut pointer_vector = LLVMConstVector(pointer_to_floats_ptr, 4); + + let mut llvm_ptrs = vec![ + *gep1_llvm_instr, + *gep2_llvm_instr, + *gep3_llvm_instr, + *gep4_llvm_instr, + ]; + for idx in 0..4 { + // Grow the Vector + pointer_vector = LLVMBuildInsertElement( + md.builder, + pointer_vector, + *llvm_ptrs.get(idx).expect("Index must be in vector"), + LLVMConstInt(LLVMIntTypeInContext(md.context, 32), idx as u64, 0), + b"\0".as_ptr() as *const _, + ); + } + + let offset = LLVMConstInt(LLVMIntTypeInContext(md.context, 32), 0 as u64, 0); + + let mut mask_values = vec![ + LLVMConstInt(LLVMIntTypeInContext(md.context, 1), 1 as u64, 0), + LLVMConstInt(LLVMIntTypeInContext(md.context, 1), 1 as u64, 0), + LLVMConstInt(LLVMIntTypeInContext(md.context, 1), 1 as u64, 0), + LLVMConstInt(LLVMIntTypeInContext(md.context, 1), 1 as u64, 0), + ]; + let ptr_to_mask_values = mask_values.as_mut_ptr(); + let mask_vector = LLVMConstVector(ptr_to_mask_values, 4); + + // let mut poison_values = vec![ + // *gep1_llvm_instr, + // *gep2_llvm_instr, + // *gep3_llvm_instr, + // *gep4_llvm_instr, + // ]; + // let ptr_to_poison_values = poison_values.as_mut_ptr(); + // let pass_thru_vector = LLVMConstVector(ptr_to_poison_values, 4); + + // let panic!("LoadVec2LLVM is unimplemented"); + let args = [pointer_vector, offset, mask_vector, zero_vector].as_mut_ptr(); + LLVMBuildCall(md.builder, func, args, 4, b"\0".as_ptr() as *const _) +} + +unsafe fn arg_to_llvm(egg_node: &VecLang, md: &mut Egg2LLVMState) -> LLVMValueRef { // TODO: Make More Efficient with BTREEMAP? - let llvm2arg = &translation_metadata.llvm2egg_metadata.llvm2arg; + let llvm2arg = &md.llvm2egg_metadata.llvm2arg; for (llvm_instr, arg_node) in llvm2arg.iter() { // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. if arg_node == egg_node { @@ -1112,6 +1286,12 @@ unsafe fn egg_to_llvm( VecLang::Get(..) => { panic!("Get was found. Egg to LLVM Translation does not handle get nodes.") } + VecLang::Gep(..) => { + panic!("Gep was found. Egg to LLVM Translation does not handle gep nodes.") + } + VecLang::Load([gep_id]) => { + load_to_llvm(gep_id, translation_metadata) + } VecLang::Set(..) => { panic!("Set was found. Egg to LLVM Translation does not handle set nodes.") } @@ -1147,7 +1327,7 @@ unsafe fn egg_to_llvm( // VecSgn compliant with c++ LibMath copysign function, which differs with sgn at x = 0. VecLang::VecSgn([v]) => vecsgn_to_llvm(v, translation_metadata), VecLang::Sgn([n]) | VecLang::Sqrt([n]) | VecLang::Neg([n]) => scalar_unop_to_llvm(n, egg_node, translation_metadata), - VecLang::VecLoad(..) => panic!("VecLoad is not yet implemented"), + VecLang::VecLoad([gep1_id, gep2_id, gep3_id, gep4_id]) => loadvec_to_llvm(gep1_id, gep2_id, gep3_id, gep4_id, translation_metadata), VecLang::VecStore(..) => panic!("VecStore is not yet implemented"), } } diff --git a/src/dios-egraphs/src/cost.rs b/src/dios-egraphs/src/cost.rs index 849c6626..f9adace3 100644 --- a/src/dios-egraphs/src/cost.rs +++ b/src/dios-egraphs/src/cost.rs @@ -38,6 +38,8 @@ impl CostFunction for VecCostFn<'_> { VecLang::Symbol(..) => LITERAL, VecLang::Get(..) => LITERAL, VecLang::Set(..) => LITERAL, + VecLang::Load(..) => LITERAL, + VecLang::Gep(..) => LITERAL, // And list structures for quite cheap VecLang::List(..) => STRUCTURE, diff --git a/src/dios-egraphs/src/rules.rs b/src/dios-egraphs/src/rules.rs index 87a1fb12..2ed10f7d 100644 --- a/src/dios-egraphs/src/rules.rs +++ b/src/dios-egraphs/src/rules.rs @@ -158,8 +158,8 @@ pub fn rules(no_ac: bool, no_vec: bool) -> Vec> { // Vector rules if !no_vec { rules.extend(vec![ - // Get load fusion rule - rw!("vec-load-gets"; "(Vec (Get ?a0 ?b0) (Get ?a1 ?b1) (Get ?a2 ?b2) (Get ?a3 ?b3))" => "(VecLoad (Vec ?a0 ?a1 ?a2 ?a3) (Vec ?b0 ?b1 ?b2 ?b3))"), + // Load load fusion rule + rw!("vec-load-Loads"; "(Vec (Load ?a0) (Load ?a1) (Load ?a2) (Load ?a3))" => "(VecLoad ?a0 ?a1 ?a2 ?a3)"), // Set store fusion rule rw!("vec-store-sets"; "(Vec (Set ?a0 ?b0 ?c0) (Set ?a1 ?b1 ?c1) (Set ?a2 ?b2 ?c2) (Set ?a3 ?b3 ?c3))" => "(VecStore (Vec ?a0 ?a1 ?a2 ?a3) (Vec ?b0 ?b1 ?b2 ?b3) (Vec ?c0 ?c1 ?c2 ?c3))"), // Special MAC fusion rule diff --git a/src/dios-egraphs/src/veclang.rs b/src/dios-egraphs/src/veclang.rs index 21ab97e2..53344a76 100644 --- a/src/dios-egraphs/src/veclang.rs +++ b/src/dios-egraphs/src/veclang.rs @@ -10,6 +10,12 @@ define_language! { // Argument points to a argument, denoted by a number Arg(u32), + Gep(u32), + + // Load is a read of memory + "Load" = Load([Id; 1]), + + // Id is a key to identify EClasses within an EGraph, represents // children nodes "+" = Add([Id; 2]), @@ -38,7 +44,6 @@ define_language! { // Vector with all literals "LitVec" = LitVec(Box<[Id]>), - // Get is a read of memory "Get" = Get([Id; 2]), // Set is a modification of memory @@ -62,8 +67,7 @@ define_language! { // MAC takes 3 lists: acc, v1, v2 "VecMAC" = VecMAC([Id; 3]), - // VecLoad takes 2 lists: base address vector and offset vector - "VecLoad" = VecLoad([Id; 2]), + "VecLoad" = VecLoad([Id; 4]), // VecStore takes 2 lists: base address vector and offset vector "VecStore" = VecStore([Id; 3]), From f88713735c4bbff327db75e82430becb1c0d98b3 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 23 Mar 2023 19:59:35 -0400 Subject: [PATCH 112/143] vectorization occurs succesfully on all test cases Changed up the chunking procedure Uses alias analysis nlow looks at trees of handled instructions Also, stores and loads now use vector scatters and gathers Problem is this is much slower on some architectures than aligned and conseuctie loads and stores Not sure how to fix that, will think about it for now --- src/dios-egraphs/Diospyros/c-tests/2d-conv.c | 2 +- src/dios-egraphs/Diospyros/c-tests/fft.c | 10 +- src/dios-egraphs/Diospyros/diospyros.cpp | 474 ++++++++++++++++- src/dios-egraphs/Diospyros/src/lib.rs | 512 +++++++++++++++++-- src/dios-egraphs/src/cost.rs | 1 + src/dios-egraphs/src/rules.rs | 4 +- src/dios-egraphs/src/veclang.rs | 5 +- 7 files changed, 946 insertions(+), 62 deletions(-) diff --git a/src/dios-egraphs/Diospyros/c-tests/2d-conv.c b/src/dios-egraphs/Diospyros/c-tests/2d-conv.c index 46d4d15c..3d1be0ea 100644 --- a/src/dios-egraphs/Diospyros/c-tests/2d-conv.c +++ b/src/dios-egraphs/Diospyros/c-tests/2d-conv.c @@ -1,5 +1,5 @@ -#include #include +#include #define I_ROWS 2 #define I_COLS 2 diff --git a/src/dios-egraphs/Diospyros/c-tests/fft.c b/src/dios-egraphs/Diospyros/c-tests/fft.c index 8ab52066..a3ba2fbd 100644 --- a/src/dios-egraphs/Diospyros/c-tests/fft.c +++ b/src/dios-egraphs/Diospyros/c-tests/fft.c @@ -55,9 +55,9 @@ void fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / 2], } } -void no_opt_fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / 2], - float img_twid_in[SIZE / 2], float real_out[SIZE], - float img_out[SIZE]) { +void no_opt_fft(float real_in[SIZE], float img_in[SIZE], + float real_twid_in[SIZE / 2], float img_twid_in[SIZE / 2], + float real_out[SIZE], float img_out[SIZE]) { int even = 0; int odd = 0; int log = 0; @@ -99,7 +99,6 @@ void no_opt_fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE } } - int main(void) { // time_t t = time(NULL); // srand((unsigned)time(&t)); @@ -150,7 +149,8 @@ int main(void) { } fft(real_in, img_in, real_twid_in, img_twid_in, real_out, img_out); - no_opt_fft(expected_real_in, expected_img_in, expected_real_twid_in, expected_img_twid_in, expected_real_out, expected_img_out); + no_opt_fft(expected_real_in, expected_img_in, expected_real_twid_in, + expected_img_twid_in, expected_real_out, expected_img_out); for (int i = 0; i < SIZE; i++) { printf("Real Out Output: %f\n", real_out[i]); diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index 80ec377b..41d6a4dd 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -9,9 +9,13 @@ #include #include +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" @@ -139,6 +143,13 @@ extern "C" int llvm_index(LLVMValueRef val, int index) { return gen_fresh_index(); } +/** + * Generates an LLVM Opaque Pointer Type wrapped as an LLVMType Ref + */ +extern "C" LLVMTypeRef generate_opaque_pointer(LLVMTypeRef element_type) { + return wrap(PointerType::getUnqual(unwrap(element_type))); +} + /** * True iff a value is an LLVM Unary Operation */ @@ -503,16 +514,411 @@ bool can_vectorize(Value *value) { return true; } else if (isa(instr)) { return true; + } else if (isa(instr)) { + return true; } - // else if (isa(instr)) { - // return true; - // } // else if (isa_sqrt32(wrap(instr))) { // return true; // } return false; } +/** + * True iff an instruction is a mem intrinsic. + */ +bool isa_mem_intrinsic(Instruction *instr) { + if (isa(instr)) { + return true; + } else if (isa(instr)) { + return true; + } else if (isa(instr)) { + return true; + } else if (isa(instr)) { + // hopefully this covers all memory intrinsics + return true; + } + return false; +} + +/** + * True iff 2 addresses MIGHT alias. + * + * LLVM has a edge case when comparing the same pointer, which is why there is a + * MustAlias check + */ +bool may_alias(Value *addr1, Value *addr2, AliasAnalysis *AA) { + // IDK why I have to check both, but + // something about comparing a address + // to itself causes this?!~, problem + // first found in LSMovement + return (!AA->isNoAlias(addr1, + LocationSize::precise( + addr1->getType()->getPrimitiveSizeInBits()), + addr2, + LocationSize::precise( + addr2->getType()->getPrimitiveSizeInBits())) || + AA->isMustAlias(addr1, addr2)); +} + +using chunk_t = std::vector; +using chunks_t = std::vector>; + +/** + * True iff is a special type of instruction for chunking + * + */ +bool isa_special_chunk_instr(Instruction *instr) { + return isa_mem_intrinsic(instr) || isa(instr) || + isa(instr) || isa(instr); +} + +/* +Build chunks of instructions + +A chunk is the longest contiguous section of instructions that ends in a +sequence of stores. + +A chunk does not need to contain a store instruction. + +Assumes: LoadStoreMovement pass is run before the Diospyros pass +**/ +std::vector> build_chunks(BasicBlock *B, + AliasAnalysis *AA) { + std::vector> chunks = {}; + + bool has_seen_store = false; + bool stores_alias_in_chunk = false; + std::vector curr_chunk = {}; + + // Track Last Stores seen + std::vector last_stores = {}; + for (auto &I : *B) { + // the first two cases are meant to create chunks with non-handled + // instructions + if (has_seen_store && isa_special_chunk_instr(&I)) { + if (curr_chunk.size() > 0 && !stores_alias_in_chunk) { + chunks.push_back(curr_chunk); + } + has_seen_store = false; + stores_alias_in_chunk = false; + curr_chunk = {}; + last_stores = {}; + curr_chunk.push_back(&I); + chunks.push_back(curr_chunk); + curr_chunk = {}; + } else if (!has_seen_store && isa_special_chunk_instr(&I)) { + if (curr_chunk.size() > 0 && !stores_alias_in_chunk) { + chunks.push_back(curr_chunk); + } + has_seen_store = false; + stores_alias_in_chunk = false; + curr_chunk = {}; + last_stores = {}; + curr_chunk.push_back(&I); + chunks.push_back(curr_chunk); + curr_chunk = {}; + } else if (!has_seen_store && isa(I) && + !isa_special_chunk_instr(&I)) { + has_seen_store = true; + curr_chunk.push_back(&I); + last_stores.push_back(&I); + } else if (!has_seen_store && !isa(I) && + !isa_special_chunk_instr(&I)) { + curr_chunk.push_back(&I); + } else if (has_seen_store && !isa(I) && + !isa_special_chunk_instr(&I)) { + if (curr_chunk.size() > 0 && !stores_alias_in_chunk) { + chunks.push_back(curr_chunk); + } + has_seen_store = false; + stores_alias_in_chunk = false; + curr_chunk = {}; + last_stores = {}; + curr_chunk.push_back(&I); + } else { // has seen store and is a store instruction + Value *curr_store_addr = I.getOperand(1); + for (auto other_store : last_stores) { + if (other_store != &I) { + Value *other_store_addr = other_store->getOperand(1); + if (may_alias(curr_store_addr, other_store_addr, AA)) { + stores_alias_in_chunk = true; + } + } + } + curr_chunk.push_back(&I); + last_stores.push_back(&I); + } + } + if (curr_chunk.size() > 0 && !stores_alias_in_chunk) { + chunks.push_back(curr_chunk); + } + + // Filter to make sure no chunks are empty + chunks_t final_chunks = {}; + for (auto chunk : chunks) { + if (!chunk.empty()) { + final_chunks.push_back(chunk); + } + } + + for (std::size_t i = 0; i < final_chunks.size(); ++i) { + errs() << "This is chunk " << i << "\n"; + for (auto instr : final_chunks[i]) { + errs() << *instr << "\n"; + } + } + + return final_chunks; +} + +using ad_tree_t = std::vector; +using ad_trees_t = std::vector; + +/** + * Recurse LLVM starts at an LLVM instruction and finds + * all of its arguments, and recursively so on, until + * either a load / number / arg is reached + * + * Non handled instructions are bailed out of by returning a failure + * Instructions with a load or arg that leaks into another chunk + * also leads to a failure bailout. + * + * Returns a Tuple (Success/Failure , Instructions accumulated) + */ +std::pair recurse_llvm(Value *value, + std::set chunk_instrs) { + // Constants + if (isa(value)) { + // DO not add constant, if i recall, constants are not llvm + // instructions + return std::make_pair(true, std::vector{}); + } + if (Instruction *instr = dyn_cast(value)) { + // No Longer in Chunk + if (chunk_instrs.count(instr) == 0) { + errs() << "Instruction has left the chunk\n" << *instr << "\n"; + return std::make_pair(false, {}); + } + + // Base case instructions + if (isa(instr) || isa(instr)) { + return std::make_pair(true, std::vector{instr}); + } + + // Recurse on Store Instructions + if (isa(instr) && + instr->getOperand(0)->getType()->isFloatTy()) { + auto [child_b, child_tree] = + recurse_llvm(instr->getOperand(0), chunk_instrs); + if (child_b) { + child_tree.push_back(instr); + return std::make_pair(true, child_tree); + } + } + + // Recurse on supported unary operators OR Store Instructions + if (instr->getOpcode() == Instruction::FNeg) { + auto [child_b, child_tree] = + recurse_llvm(instr->getOperand(0), chunk_instrs); + if (child_b) { + child_tree.push_back(instr); + return std::make_pair(true, child_tree); + } + } + + // Recurse on supported binary operators + if (instr->getOpcode() == Instruction::FAdd || + instr->getOpcode() == Instruction::FSub || + instr->getOpcode() == Instruction::FDiv || + instr->getOpcode() == Instruction::FMul) { + auto [left_b, left_tree] = + recurse_llvm(instr->getOperand(0), chunk_instrs); + auto [right_b, right_tree] = + recurse_llvm(instr->getOperand(1), chunk_instrs); + if (left_b && right_b) { + left_tree.insert(left_tree.end(), right_tree.begin(), + right_tree.end()); + left_tree.push_back(instr); + return std::make_pair(true, left_tree); + } + } + } + + // Unhandled Instruction + errs() << "Unhandled Instruction\n" << *value << "\n"; + return std::make_pair(false, std::vector{}); +} + +/** + * An AD Tree is just a vector of instructions reachable from a unique store + * instruction + * + */ +ad_trees_t build_ad_trees(chunk_t chunk) { + ad_trees_t ad_trees = {}; + std::set chunk_instrs = {}; + for (auto instr : chunk) { + chunk_instrs.insert(instr); + } + for (auto instr : chunk) { + if (isa(instr)) { + // ad_tree_t new_tree = {}; + auto [success_b, ad_tree] = recurse_llvm(instr, chunk_instrs); + if (success_b) { + assert(ad_tree.size() != 0); + } + if (success_b) { + ad_trees.push_back(ad_tree); + } + } + } + for (auto ad_tree : ad_trees) { + errs() << "New AD Tree\n"; + for (auto instr : ad_tree) { + errs() << *instr << "\n"; + } + } + return ad_trees; +} + +/** + * Joins adtrees together into vecotrs of instructions + * + */ +std::vector join_trees( + std::vector> trees_to_join) { + std::vector final_vector = {}; + for (auto tree : trees_to_join) { + final_vector.insert(final_vector.end(), tree.begin(), tree.end()); + } + return final_vector; +} + +/** + * True iff there is some load in a joined section of adtrees that MIGHT alias a + * store in the same tree. + * + * Load-store aliasing causes problems in some situation where you have stores + * as functions of the same loads, but no vectoriszation occurs, so the code is + * rewritten linearly, and a memory dependency is introduced + * + * From a bug in FFT.c + */ +chunks_t remove_load_store_alias(chunks_t chunks, AliasAnalysis *AA) { + chunks_t final_chunks = {}; + + std::vector load_addresses = {}; + std::vector store_addresses = {}; + for (auto chunk : chunks) { + for (auto instr : chunk) { + if (isa(instr)) { + Value *load_address = + dyn_cast(instr)->getPointerOperand(); + load_addresses.push_back(load_address); + } else if (isa(instr)) { + Value *store_address = + dyn_cast(instr)->getPointerOperand(); + store_addresses.push_back(store_address); + } + } + bool can_add_to_final_chunks = true; + for (auto load_address : load_addresses) { + for (auto store_address : store_addresses) { + if (may_alias(load_address, store_address, AA)) { + can_add_to_final_chunks = false; + } + } + } + if (can_add_to_final_chunks) { + final_chunks.push_back(chunk); + } + } + return final_chunks; +} + +/** + * Converts chunks into vectors, representing joined AD Trees + * + */ +std::vector> chunks_into_joined_trees( + chunks_t chunks, AliasAnalysis *AA) { + std::vector> trees = {}; + for (auto chunk : chunks) { + ad_trees_t ad_trees = build_ad_trees(chunk); + + // Join trees if the store instructions in the trees + // do not alias each other + std::vector> joinable_trees = {}; + for (auto tree : ad_trees) { + // check if stores alias in the trees + assert(tree.size() > 0); + Instruction *curr_store = tree.back(); + Value *curr_store_addr = curr_store->getOperand(1); + bool can_add_tree = true; + for (auto other_tree : joinable_trees) { + assert(other_tree.size() > 0); + Instruction *other_store = other_tree.back(); + Value *other_store_addr = other_store->getOperand(1); + if (may_alias(curr_store_addr, other_store_addr, AA)) { + can_add_tree = false; + break; + } + } + if (can_add_tree) { + joinable_trees.push_back(tree); + } else { + assert(joinable_trees.size() > 0); + auto joined_trees = join_trees(joinable_trees); + trees.push_back(joined_trees); + joinable_trees = {tree}; + } + } + if (joinable_trees.size() > 0) { + auto joined_trees = join_trees(joinable_trees); + trees.push_back(joined_trees); + } + } + // Do final removal of any sequences with store-load aliasing + return remove_load_store_alias(trees, AA); +} + +/* +Build AD Trees for each Chunk +**/ + +/// Map instr2ref over a vector +std::vector> instr2ref(chunks_t chunks) { + std::vector> mapped_instrs = {}; + for (auto chunk : chunks) { + std::vector mapped_chunk = {}; + for (auto instr : chunk) { + mapped_chunk.push_back(wrap(instr)); + } + mapped_instrs.push_back(mapped_chunk); + } + return mapped_instrs; +} + +/** + * Run Optimization Procedure on Vector representing concatenated ad trees + * + */ +void optimize(std::vector chunk, Function &F) { + assert(chunk.size() != 0); + // Place the builder at the last instruction in the entire chunk. + Value *last_value = unwrap(chunk.back()); + Instruction *last_instr = dyn_cast(last_value); + assert(last_instr != NULL); + IRBuilder<> builder(last_instr); + + Module *mod = F.getParent(); + LLVMContext &context = F.getContext(); + std::vector restricted_instrs = {}; + optimize(wrap(mod), wrap(&context), wrap(&builder), chunk.data(), + chunk.size(), restricted_instrs.data(), restricted_instrs.size(), + RunOpt, PrintOpt); +} + /** * Below is the main DiospyrosPass that activates the Rust lib.rs code, * which calls the Egg vectorizer and rewrites the optimized code in place. @@ -523,7 +929,16 @@ struct DiospyrosPass : public FunctionPass { static char ID; DiospyrosPass() : FunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + } + virtual bool runOnFunction(Function &F) override { + // We need Alias Analysis still, because it is possible groups of + // stores can addresses that alias. + AliasAnalysis *AA = &getAnalysis().getAAResults(); + // do not optimize on main function or no_opt functions. if (F.getName() == MAIN_FUNCTION_NAME || (F.getName().size() > NO_OPT_PREFIX.size() && @@ -532,8 +947,20 @@ struct DiospyrosPass : public FunctionPass { } bool has_changes = false; for (auto &B : F) { - // TODO: Consider removing as the new procedure can overcome this - // We skip over basic blocks without floating point types + auto chunks = build_chunks(&B, AA); + auto trees = chunks_into_joined_trees(chunks, AA); + auto treerefs = instr2ref(trees); + + for (auto tree_chunk : treerefs) { + if (tree_chunk.size() != 0) { + optimize(tree_chunk, F); + } + } + } + return false; + for (auto &B : F) { + // TODO: Consider removing as the new procedure can overcome + // this We skip over basic blocks without floating point types bool has_float = false; for (auto &I : B) { if (I.getType()->isFloatTy()) { @@ -550,9 +977,19 @@ struct DiospyrosPass : public FunctionPass { std::vector> chunk_accumulator; std::vector chunk_vector = {}; bool vectorizable_flag = false; + bool has_seen_store = false; for (auto &I : B) { Value *val = dyn_cast(&I); assert(val != NULL); + // When you finish seeing stores, and see some other + // instruction afterwards, stop the current chunk to + // vectorize + if (isa(val)) { + has_seen_store = true; + } + if (has_seen_store && !isa(val)) { + vectorizable_flag = false; + } if (can_vectorize(val) && !vectorizable_flag) { if (!chunk_vector.empty()) { chunk_accumulator.push_back(chunk_vector); @@ -583,6 +1020,11 @@ struct DiospyrosPass : public FunctionPass { continue; } + errs() << "Here is a chunk: \n"; + for (auto chunk_instr : chunk_vector) { + errs() << *unwrap(chunk_instr) << "\n"; + } + // check if the chunk vector actually has instructions to // optimixe on bool has_vectorizable_instrs = false; @@ -598,14 +1040,25 @@ struct DiospyrosPass : public FunctionPass { continue; } - // If an instruction is used multiple times outside the chunk, - // add it to a restricted list. + // check if the chunk vector has at least one store + bool has_store = false; + for (auto &instr : chunk_vector) { + if (isa(unwrap(instr))) { + has_store = true; + } + } + if (!has_store) { + continue; + } + + // If an instruction is used multiple times outside the + // chunk, add it to a restricted list. // TODO: only consider future chunks! std::vector restricted_instrs = {}; for (auto chunk_instr : chunk_vector) { for (auto j = i + 1; j < chunk_accumulator.size(); ++j) { - // guaranteed to be a different chunk vector ahead of - // the origianl one. + // guaranteed to be a different chunk vector ahead + // of the origianl one. bool must_restrict = false; auto &other_chunk_vector = chunk_accumulator[j]; for (auto other_chunk_instr : other_chunk_vector) { @@ -629,9 +1082,6 @@ struct DiospyrosPass : public FunctionPass { // instruction" int insert_pos = 0; bool has_seen_vectorizable = false; - for (auto chunk_instr : chunk_vector) { - errs() << *unwrap(chunk_instr) << "\n"; - } for (int i = 0; i < chunk_vector.size(); i++) { if (can_vectorize(unwrap(chunk_vector[i]))) { has_seen_vectorizable = true; diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 5ccc4a51..82b47409 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -17,9 +17,9 @@ extern "C" { fn _isa_bop(val: LLVMValueRef) -> bool; fn isa_constant(val: LLVMValueRef) -> bool; fn isa_constfp(val: LLVMValueRef) -> bool; - fn isa_gep(val: LLVMValueRef) -> bool; + fn _isa_gep(val: LLVMValueRef) -> bool; fn isa_load(val: LLVMValueRef) -> bool; - fn _isa_store(val: LLVMValueRef) -> bool; + fn isa_store(val: LLVMValueRef) -> bool; fn isa_argument(val: LLVMValueRef) -> bool; fn _isa_call(val: LLVMValueRef) -> bool; fn _isa_fptrunc(val: LLVMValueRef) -> bool; @@ -39,6 +39,7 @@ extern "C" { fn _isa_sqrt64(val: LLVMValueRef) -> bool; fn get_constant_float(val: LLVMValueRef) -> f32; fn build_constant_float(n: f64, context: LLVMContextRef) -> LLVMValueRef; + fn generate_opaque_pointer(element_type: LLVMTypeRef) -> LLVMTypeRef; } static mut ARG_IDX: u32 = 0; @@ -209,6 +210,7 @@ enum LLVMOpType { // TODO: SGN signum UnhandledLLVMOpCode, Load, + Store, } unsafe fn get_pow2(n: u32) -> u32 { @@ -379,6 +381,8 @@ unsafe fn match_llvm_op(llvm_instr: &LLVMValueRef) -> LLVMOpType { return LLVMOpType::Sqrt32; } else if isa_load(*llvm_instr) { return LLVMOpType::Load; + } else if isa_store(*llvm_instr) { + return LLVMOpType::Store; } else { return LLVMOpType::UnhandledLLVMOpCode; } @@ -510,17 +514,49 @@ unsafe fn load_to_egg( let gep_id = gen_get_idx(); let gep_node = VecLang::Gep(gep_id); egg_nodes.push(gep_node.clone()); - let load_node = VecLang::Load([Id::from(next_node_idx as usize)]); let llvm_gep_instr = LLVMGetOperand(llvm_instr, 0); - _llvm_print(llvm_gep_instr); // assert!(isa_gep(llvm_gep_instr) || isa_argument(llvm_gep_instr)); translation_metadata.get2gep.insert(gep_id, llvm_gep_instr); + + let load_node = VecLang::Load([Id::from(next_node_idx as usize)]); egg_nodes.push(load_node.clone()); assert!(!translation_metadata.llvm2reg.contains_key(&llvm_instr)); translation_metadata.llvm2reg.insert(llvm_instr, load_node); (egg_nodes, next_node_idx + 2) } +unsafe fn store_to_egg( + llvm_instr: LLVMValueRef, + egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, +) -> (Vec, u32) { + assert!(isa_store(llvm_instr)); + let llvm_val_instr = LLVMGetOperand(llvm_instr, 0); + let llvm_gep_instr = LLVMGetOperand(llvm_instr, 1); + let (mut new_egg_nodes, new_next_idx) = llvm_to_egg( + llvm_val_instr, + egg_nodes, + next_node_idx, + translation_metadata, + ); + + let gep_id = gen_get_idx(); + let gep_node = VecLang::Gep(gep_id); + new_egg_nodes.push(gep_node.clone()); + translation_metadata.get2gep.insert(gep_id, llvm_gep_instr); + + let store_node = VecLang::Store([ + Id::from((new_next_idx - 1) as usize), // reference to the recursive translation + Id::from(new_next_idx as usize), // reference to a GEP node + ]); + new_egg_nodes.push(store_node.clone()); + assert!(!translation_metadata.llvm2reg.contains_key(&llvm_instr)); + translation_metadata.llvm2reg.insert(llvm_instr, store_node); + + (new_egg_nodes, new_next_idx + 2) // Add 2 because we built a gep, then also a store node +} + /// Translates an Unhandled OpCode to an Egg Register. /// /// This represents a blackbox computation that we bail on translating @@ -603,6 +639,9 @@ unsafe fn llvm_to_egg( sqrt32_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) } LLVMOpType::Load => load_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata), + LLVMOpType::Store => { + store_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } LLVMOpType::UnhandledLLVMOpCode => { unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) } @@ -632,7 +671,8 @@ unsafe fn can_start_translation_instr(llvm_instr: LLVMValueRef) -> bool { | LLVMOpType::FNeg | LLVMOpType::Constant | LLVMOpType::Sqrt32 - | LLVMOpType::Load => true, + | LLVMOpType::Load + | LLVMOpType::Store => true, LLVMOpType::Argument | LLVMOpType::UnhandledLLVMOpCode => false, }; } @@ -718,10 +758,8 @@ unsafe fn llvm_to_egg_main( next_node_idx, &mut translation_metadata, ); - println!("{}", new_next_node_idx); egg_nodes = new_egg_nodes; next_node_idx = new_next_node_idx; - println!("{}", egg_nodes.len()); } } @@ -785,6 +823,20 @@ unsafe fn load_to_llvm(gep_id: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { panic!("Load2LLVM: Expected a successful lookup in get2gep, but cannot find Gep ID."); } +unsafe fn store_to_llvm(val_id: &Id, gep_id: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { + let original_gep_id = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep_id)], md); + let llvm_val_instr = egg_to_llvm(&md.egg_nodes_vector[usize::from(*val_id)], md); + let get2gep = &md.llvm2egg_metadata.get2gep; + for (gep_id, gep_instr) in get2gep.iter() { + if original_gep_id == *gep_id { + // assert!(isa_gep(*gep_instr) || isa_argument(*gep_instr)); + let new_store_instr = LLVMBuildStore(md.builder, llvm_val_instr, *gep_instr); + return new_store_instr; + } + } + panic!("Store2LLVM: Expected a successful lookup in get2gep, but cannot find Gep ID."); +} + unsafe fn loadvec_to_llvm( gep1_id: &Id, gep2_id: &Id, @@ -834,7 +886,7 @@ unsafe fn loadvec_to_llvm( // Build the Vector Load Intrinsic let func = LLVMAddFunction( md.module, - b"llvm.masked.gather.v4f32.v4p0\0".as_ptr() as *const _, + b"llvm.masked.gather.v4f32.v4p0f32\0".as_ptr() as *const _, fn_type, ); @@ -856,7 +908,7 @@ unsafe fn loadvec_to_llvm( let pointer_to_float_value = LLVMBuildBitCast( md.builder, pointer_to_int_value, - floatptr_type, + generate_opaque_pointer(LLVMFloatTypeInContext(md.context)), b"pointer-to-float-bit-cast\0".as_ptr() as *const _, ); let mut pointer_to_floats = Vec::new(); @@ -866,7 +918,7 @@ unsafe fn loadvec_to_llvm( let pointer_to_floats_ptr = pointer_to_floats.as_mut_ptr(); let mut pointer_vector = LLVMConstVector(pointer_to_floats_ptr, 4); - let mut llvm_ptrs = vec![ + let llvm_ptrs = vec![ *gep1_llvm_instr, *gep2_llvm_instr, *gep3_llvm_instr, @@ -894,20 +946,120 @@ unsafe fn loadvec_to_llvm( let ptr_to_mask_values = mask_values.as_mut_ptr(); let mask_vector = LLVMConstVector(ptr_to_mask_values, 4); - // let mut poison_values = vec![ - // *gep1_llvm_instr, - // *gep2_llvm_instr, - // *gep3_llvm_instr, - // *gep4_llvm_instr, - // ]; - // let ptr_to_poison_values = poison_values.as_mut_ptr(); - // let pass_thru_vector = LLVMConstVector(ptr_to_poison_values, 4); - - // let panic!("LoadVec2LLVM is unimplemented"); let args = [pointer_vector, offset, mask_vector, zero_vector].as_mut_ptr(); LLVMBuildCall(md.builder, func, args, 4, b"\0".as_ptr() as *const _) } +unsafe fn storevec_to_llvm( + val_vec_id: &Id, + gep1_id: &Id, + gep2_id: &Id, + gep3_id: &Id, + gep4_id: &Id, + md: &mut Egg2LLVMState, +) -> LLVMValueRef { + // Recursively translate val_vec_id to an LLVM Vector Instr + let llvm_val_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*val_vec_id)], md); + + // Set Opaque Pointer ness + let gep1_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); + let gep2_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep2_id)], md); + let gep3_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep3_id)], md); + let gep4_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep4_id)], md); + + let gep1_llvm_instr = md + .llvm2egg_metadata + .get2gep + .get(&gep1_id_val) + .expect("Value of gep1 id should exist in get2gep"); + let gep2_llvm_instr = md + .llvm2egg_metadata + .get2gep + .get(&gep2_id_val) + .expect("Value of gep2 id should exist in get2gep"); + let gep3_llvm_instr = md + .llvm2egg_metadata + .get2gep + .get(&gep3_id_val) + .expect("Value of gep3 id should exist in get2gep"); + let gep4_llvm_instr = md + .llvm2egg_metadata + .get2gep + .get(&gep4_id_val) + .expect("Value of gep4 id should exist in get2gep"); + + let vector_width = 4; + let floatptr_type = LLVMTypeOf(*gep1_llvm_instr); + let vec4ptr_type = LLVMVectorType(floatptr_type, vector_width); + let vec4f_type = LLVMVectorType(LLVMFloatTypeInContext(md.context), vector_width); + let vec4b_type = LLVMVectorType(LLVMInt1TypeInContext(md.context), vector_width); + let int_type = LLVMIntTypeInContext(md.context, 32); + let void_type = LLVMVoidTypeInContext(md.context); + + // Parameter Types are: vector of values, vector of pointers, offset int, mask vector booleans + let param_types = [vec4f_type, vec4ptr_type, int_type, vec4b_type].as_mut_ptr(); + // Output type is a void_type + let fn_type = LLVMFunctionType(void_type, param_types, 4, 0 as i32); + // Build the Vector Load Intrinsic + let func = LLVMAddFunction( + md.module, + b"llvm.masked.scatter.v4f32.v4p0f32\0".as_ptr() as *const _, + fn_type, + ); + + // Build Arguments + + let pointer_to_int_value = LLVMBuildPtrToInt( + md.builder, + LLVMConstInt(LLVMIntTypeInContext(md.context, 32), 0 as u64, 0), + LLVMIntTypeInContext(md.context, 32), + b"pointer-to-int\0".as_ptr() as *const _, + ); + let pointer_to_float_value = LLVMBuildBitCast( + md.builder, + pointer_to_int_value, + floatptr_type, + b"pointer-to-float-bit-cast\0".as_ptr() as *const _, + ); + let mut pointer_to_floats = Vec::new(); + for _ in 0..4 { + pointer_to_floats.push(pointer_to_float_value); + } + let pointer_to_floats_ptr = pointer_to_floats.as_mut_ptr(); + let mut pointer_vector = LLVMConstVector(pointer_to_floats_ptr, 4); + + let llvm_ptrs = vec![ + *gep1_llvm_instr, + *gep2_llvm_instr, + *gep3_llvm_instr, + *gep4_llvm_instr, + ]; + for idx in 0..4 { + // Grow the Vector + pointer_vector = LLVMBuildInsertElement( + md.builder, + pointer_vector, + *llvm_ptrs.get(idx).expect("Index must be in vector"), + LLVMConstInt(LLVMIntTypeInContext(md.context, 32), idx as u64, 0), + b"\0".as_ptr() as *const _, + ); + } + + let offset = LLVMConstInt(LLVMIntTypeInContext(md.context, 32), 0 as u64, 0); + + let mut mask_values = vec![ + LLVMConstInt(LLVMIntTypeInContext(md.context, 1), 1 as u64, 0), + LLVMConstInt(LLVMIntTypeInContext(md.context, 1), 1 as u64, 0), + LLVMConstInt(LLVMIntTypeInContext(md.context, 1), 1 as u64, 0), + LLVMConstInt(LLVMIntTypeInContext(md.context, 1), 1 as u64, 0), + ]; + let ptr_to_mask_values = mask_values.as_mut_ptr(); + let mask_vector = LLVMConstVector(ptr_to_mask_values, 4); + + let args = [llvm_val_vec, pointer_vector, offset, mask_vector].as_mut_ptr(); + LLVMBuildCall(md.builder, func, args, 4, b"\0".as_ptr() as *const _) +} + unsafe fn arg_to_llvm(egg_node: &VecLang, md: &mut Egg2LLVMState) -> LLVMValueRef { // TODO: Make More Efficient with BTREEMAP? let llvm2arg = &md.llvm2egg_metadata.llvm2arg; @@ -1016,6 +1168,18 @@ unsafe fn vec_to_llvm(boxed_ids: &Box<[Id]>, md: &mut Egg2LLVMState) -> LLVMValu vector } +unsafe fn nooptvector_to_llvm(boxed_ids: &Box<[Id]>, md: &mut Egg2LLVMState) -> LLVMValueRef { + // Convert the Boxed Ids to a Vector, and generate a vector of zeros + // Invariant: idvec must not be empty + let idvec = boxed_ids.to_vec(); + let mut elt_val = LLVMConstInt(LLVMIntTypeInContext(md.context, 32), 0 as u64, 0); + for eggid in idvec { + let elt = &md.egg_nodes_vector[usize::from(eggid)]; + elt_val = egg_to_llvm(elt, md); + } + elt_val +} + // TODO: Segregate Vec and Scalar Binops? unsafe fn binop_to_llvm( binop_node: &VecLang, @@ -1101,6 +1265,7 @@ unsafe fn binop_to_llvm( } } +// TODO: fix up concat errors due to having vecstores. unsafe fn concat_to_llvm( left_vector: &Id, right_vector: &Id, @@ -1109,6 +1274,7 @@ unsafe fn concat_to_llvm( { let trans_v1 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_vector)], md); let mut trans_v2 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_vector)], md); + return trans_v2; // In LLVM, it turns out all vectors need to be length power of 2 // if the 2 vectors are not the same size, double the length of the smaller vector by padding with 0's in it @@ -1279,7 +1445,6 @@ unsafe fn egg_to_llvm( translation_metadata: &mut Egg2LLVMState, ) -> LLVMValueRef { match egg_node { - VecLang::NoOptVec(..) => panic!("No Opt Vector was found. Egg to LLVM Translation does not handle No Opt Vector nodes at this location."), VecLang::Symbol(..) => { panic!("Symbol was found. Egg to LLVM Translation does not handle symbol nodes.") } @@ -1288,10 +1453,13 @@ unsafe fn egg_to_llvm( } VecLang::Gep(..) => { panic!("Gep was found. Egg to LLVM Translation does not handle gep nodes.") - } + } VecLang::Load([gep_id]) => { load_to_llvm(gep_id, translation_metadata) } + VecLang::Store([val_id, gep_id]) => { + store_to_llvm(val_id, gep_id, translation_metadata) + } VecLang::Set(..) => { panic!("Set was found. Egg to LLVM Translation does not handle set nodes.") } @@ -1307,6 +1475,7 @@ unsafe fn egg_to_llvm( VecLang::LitVec(boxed_ids) | VecLang::Vec(boxed_ids) | VecLang::List(boxed_ids) => { vec_to_llvm(&*boxed_ids, translation_metadata) } + VecLang::NoOptVec(boxed_ids) => nooptvector_to_llvm(boxed_ids, translation_metadata), VecLang::VecAdd([l, r]) | VecLang::VecMinus([l, r]) | VecLang::VecMul([l, r]) @@ -1328,7 +1497,252 @@ unsafe fn egg_to_llvm( VecLang::VecSgn([v]) => vecsgn_to_llvm(v, translation_metadata), VecLang::Sgn([n]) | VecLang::Sqrt([n]) | VecLang::Neg([n]) => scalar_unop_to_llvm(n, egg_node, translation_metadata), VecLang::VecLoad([gep1_id, gep2_id, gep3_id, gep4_id]) => loadvec_to_llvm(gep1_id, gep2_id, gep3_id, gep4_id, translation_metadata), - VecLang::VecStore(..) => panic!("VecStore is not yet implemented"), + VecLang::VecStore([val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id]) => storevec_to_llvm(val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id, translation_metadata), + } +} + +// Function types for constructor anonymous functions +type VecLangSingleConstructor = fn([Id; 1]) -> VecLang; +type VecLangPairConstructor = fn([Id; 2]) -> VecLang; +type VecLangTripleConstructor = fn([Id; 3]) -> VecLang; +type VecLangQuadConstructor = fn([Id; 4]) -> VecLang; +type VecLangQuintConstructor = fn([Id; 5]) -> VecLang; +type VecLangBoxedConstructor = fn(bool, Box<[Id]>) -> VecLang; + +/// Canonicalizes a Enode with a single inpit constructor +unsafe fn canonicalize_single( + can_change_vector: bool, + constructor: VecLangSingleConstructor, + single_vector: &Id, + old_egg_nodes: &[VecLang], +) -> Vec { + let mut trans_v1 = canonicalize_egg( + false, + &old_egg_nodes[usize::from(*single_vector)], + old_egg_nodes, + ); + trans_v1.push(constructor([*single_vector])); + trans_v1 +} + +/// Canonicalizes a Enode with a pair constructor +unsafe fn canonicalize_pair( + is_concat: bool, + can_change_vector: bool, + constructor: VecLangPairConstructor, + left_vector: &Id, + right_vector: &Id, + old_egg_nodes: &[VecLang], +) -> Vec { + let trans_v1 = canonicalize_egg( + if !is_concat { false } else { can_change_vector }, + &old_egg_nodes[usize::from(*left_vector)], + old_egg_nodes, + ); + let trans_v2 = canonicalize_egg( + if !is_concat { false } else { can_change_vector }, + &old_egg_nodes[usize::from(*right_vector)], + old_egg_nodes, + ); + let mut whole_vector = [trans_v1, trans_v2].concat(); + whole_vector.push(constructor([*left_vector, *right_vector])); + whole_vector +} + +/// Canonicalizes a Enode with a triple input constructor +unsafe fn canonicalize_triple( + can_change_vector: bool, + constructor: VecLangTripleConstructor, + first_vector: &Id, + second_vector: &Id, + third_vector: &Id, + old_egg_nodes: &[VecLang], +) -> Vec { + let trans_v1 = canonicalize_egg( + false, + &old_egg_nodes[usize::from(*first_vector)], + old_egg_nodes, + ); + let trans_v2 = canonicalize_egg( + false, + &old_egg_nodes[usize::from(*second_vector)], + old_egg_nodes, + ); + let trans_v3 = canonicalize_egg( + false, + &old_egg_nodes[usize::from(*third_vector)], + old_egg_nodes, + ); + let mut whole_vector = [trans_v1, trans_v2, trans_v3].concat(); + whole_vector.push(constructor([*first_vector, *second_vector, *third_vector])); + whole_vector +} + +/// Canonicalizes a Enode with a quadruple input constructor +unsafe fn canonicalize_quadruple( + can_change_vector: bool, + constructor: VecLangQuadConstructor, + first_vector: &Id, + second_vector: &Id, + third_vector: &Id, + fourth_vector: &Id, + old_egg_nodes: &[VecLang], +) -> Vec { + let trans_v1 = canonicalize_egg( + false, + &old_egg_nodes[usize::from(*first_vector)], + old_egg_nodes, + ); + let trans_v2 = canonicalize_egg( + false, + &old_egg_nodes[usize::from(*second_vector)], + old_egg_nodes, + ); + let trans_v3 = canonicalize_egg( + false, + &old_egg_nodes[usize::from(*third_vector)], + old_egg_nodes, + ); + let trans_v4 = canonicalize_egg( + false, + &old_egg_nodes[usize::from(*fourth_vector)], + old_egg_nodes, + ); + let mut whole_vector = [trans_v1, trans_v2, trans_v3, trans_v4].concat(); + whole_vector.push(constructor([ + *first_vector, + *second_vector, + *third_vector, + *fourth_vector, + ])); + whole_vector +} + +/// Canonicalizes a Enode with a quintuple input constructor +unsafe fn canonicalize_quintuple( + can_change_vector: bool, + constructor: VecLangQuintConstructor, + first_vector: &Id, + second_vector: &Id, + third_vector: &Id, + fourth_vector: &Id, + fifth_vector: &Id, + old_egg_nodes: &[VecLang], +) -> Vec { + let trans_v1 = canonicalize_egg( + false, + &old_egg_nodes[usize::from(*first_vector)], + old_egg_nodes, + ); + let trans_v2 = canonicalize_egg( + false, + &old_egg_nodes[usize::from(*second_vector)], + old_egg_nodes, + ); + let trans_v3 = canonicalize_egg( + false, + &old_egg_nodes[usize::from(*third_vector)], + old_egg_nodes, + ); + let trans_v4 = canonicalize_egg( + false, + &old_egg_nodes[usize::from(*fourth_vector)], + old_egg_nodes, + ); + let trans_v5 = canonicalize_egg( + false, + &old_egg_nodes[usize::from(*fifth_vector)], + old_egg_nodes, + ); + let mut whole_vector = [trans_v1, trans_v2, trans_v3, trans_v4, trans_v5].concat(); + whole_vector.push(constructor([ + *first_vector, + *second_vector, + *third_vector, + *fourth_vector, + *fifth_vector, + ])); + whole_vector +} + +unsafe fn canonicalize_vec_type( + can_change_vector: bool, + constructor: VecLangBoxedConstructor, + boxed_ids: &Box<[Id]>, + old_egg_nodes: &[VecLang], +) -> Vec { + let mut whole_vector: Vec = Vec::new(); + let mut new_boxed_ids: Vec = Vec::new(); + for id in boxed_ids.iter() { + new_boxed_ids.push(*id); + let trans_vec = canonicalize_egg(false, &old_egg_nodes[usize::from(*id)], old_egg_nodes); + for elt in trans_vec { + whole_vector.push(elt); + } + } + let boxed = new_boxed_ids.into_boxed_slice(); + whole_vector.push(constructor(can_change_vector, boxed)); + whole_vector +} + +/// Modify the Egg expression so that the first instance of a Vector operation is replaced by a NoOpVector expression node +/// The reason is that in this version of Diospyros, stores and vecstores explictly mark where a store is to be done. +/// The outermost vectors encountered will not store anything. Replacing them with NoOps will allow translation to occur properly. +unsafe fn canonicalize_egg( + can_change_vector: bool, + curr_egg_node: &VecLang, + old_egg_nodes: &[VecLang], +) -> Vec { + match curr_egg_node { + VecLang::NoOptVec(..) => panic!("No Opt Vector was found. Egg canonicalization does not handle No Opt Vector nodes at this location."), + VecLang::Symbol(..) => { + panic!("Symbol was found. Egg canonicalization does not handle symbol nodes.") + } + VecLang::Get(..) => { + panic!("Get was found. Egg canonicalization does not handle get nodes.") + } + VecLang::Gep(g) => vec![VecLang::Gep(*g)], + VecLang::Load([gep_id]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::Load(single)}, gep_id, old_egg_nodes ), + VecLang::Store([val_id, gep_id]) => canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::Store(pair)}, val_id, gep_id, old_egg_nodes), + VecLang::Set(..) => { + panic!("Set was found. Egg canonicalization does not handle set nodes.") + } + VecLang::Ite(..) => panic!("Ite was found. Egg canonicalization does not handle ite nodes."), + VecLang::Or(..) => panic!("Or was found. Egg canonicalization does not handle or nodes."), + VecLang::And(..) => panic!("And was found. Egg canonicalization does not handle and nodes."), + VecLang::Lt(..) => panic!("Lt was found. Egg canonicalizationdoes not handle lt nodes."), + VecLang::Sgn(..) => panic!("Sgn was found. Egg canonicalization does not handle sgn nodes. TODO: In the future, tis node will be handled alongside sqrt and neg scalar nodes."), + VecLang::VecSgn(..) => panic!("VecSgn was found. Egg canonicalization does not handle vecsgn nodes. TODO: In the future, this node will be handled alongside VecSqrt and VecNeg vector nodes."), + VecLang::Arg(a) => vec![VecLang::Arg(*a)], + VecLang::Reg(r) => vec![VecLang::Reg(*r)], + VecLang::Num(n) => vec![VecLang::Num(*n)], + VecLang::List(_) => panic!("List was found. Egg canonicalization does not handle list nodes."), + VecLang::LitVec(boxed_ids) => canonicalize_vec_type(can_change_vector, |change_vec_type, boxed| -> VecLang {if change_vec_type {VecLang::NoOptVec(boxed)} else {VecLang::LitVec(boxed)}}, boxed_ids, old_egg_nodes), + VecLang::Vec(boxed_ids) => canonicalize_vec_type(can_change_vector, |change_vec_type, boxed| -> VecLang {if change_vec_type {VecLang::NoOptVec(boxed)} else {VecLang::Vec(boxed)}}, boxed_ids, old_egg_nodes), + VecLang::VecAdd([l, r])=> canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::VecAdd(pair)}, l, r, old_egg_nodes), + VecLang::VecMinus([l, r])=> canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::VecMinus(pair)},l, r, old_egg_nodes), + VecLang::VecMul([l, r])=> canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::VecMul(pair)}, l, r, old_egg_nodes), + VecLang::VecDiv([l, r])=> canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::VecDiv(pair)}, l, r, old_egg_nodes), + VecLang::Add([l, r]) => canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::Add(pair)}, l, r, old_egg_nodes), + VecLang::Minus([l, r]) => canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::Minus(pair)}, l, r, old_egg_nodes), + VecLang::Mul([l, r]) => canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::Mul(pair)}, l, r, old_egg_nodes), + VecLang::Div([l, r]) => canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::Div(pair)}, l, r, old_egg_nodes), + VecLang::Concat([l, r]) => canonicalize_pair(true, can_change_vector, |pair| -> VecLang {VecLang::Concat(pair)},l, r, old_egg_nodes), + VecLang::VecMAC([acc, v1, v2]) => canonicalize_triple(can_change_vector, |triple| -> VecLang {VecLang::VecMAC(triple)},acc, v1, v2, old_egg_nodes), + + + // TODO: VecNeg, VecSqrt, VecSgn all have not been tested, need test cases. + // TODO: LLVM actually supports many more vector intrinsics, including + // vector sine/cosine instructions for floats. + VecLang::VecNeg([v]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::VecNeg(single)}, v, old_egg_nodes ), + VecLang::VecSqrt([v]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::VecSqrt(single)}, v, old_egg_nodes ), + // VecSgn compliant with c++ LibMath copysign function, which differs with sgn at x = 0. + VecLang::VecSgn([v]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::VecSgn(single)}, v, old_egg_nodes ), + VecLang::Sgn([n]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::Sgn(single)}, n, old_egg_nodes ), + VecLang::Sqrt([n]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::Sqrt(single)}, n, old_egg_nodes ), + VecLang::Neg([n]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::Neg(single)}, n, old_egg_nodes ), + VecLang::VecLoad([gep1_id, gep2_id, gep3_id, gep4_id]) => canonicalize_quadruple(can_change_vector,|quad| -> VecLang {VecLang::VecLoad(quad)}, gep1_id, gep2_id, gep3_id, gep4_id, old_egg_nodes ), + VecLang::VecStore([val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id]) => canonicalize_quintuple(can_change_vector,|quint| -> VecLang {VecLang::VecStore(quint)}, val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id, old_egg_nodes ), } } @@ -1364,43 +1778,61 @@ unsafe fn egg_to_llvm_main( // Nodes converted to llvm already, not to be retranslated let prior_translated_nodes: BTreeSet = BTreeSet::new(); + // Regular translation from vectorization + + assert!(!is_nooptvec(last_egg_node)); + + let canonicalized_egg_nodes = canonicalize_egg(true, last_egg_node, egg_nodes); + let canonicalized_last_node = canonicalized_egg_nodes + .last() + .expect("No match for last element of vector of Canonicalized Egg Terms."); + let mut translation_metadata = Egg2LLVMState { - egg_nodes_vector: egg_nodes, + egg_nodes_vector: &canonicalized_egg_nodes, llvm2egg_metadata: llvm2egg_metadata.clone(), prior_translated_nodes: prior_translated_nodes, builder: builder, context: context, module: module, }; + // If vectorize was not true, we are finished, because nooptvectorize_to_llvm will generate the required code. if !vectorize { assert!(is_nooptvec(last_egg_node)); return nooptvec_to_llvm(get_noopt_eggnodes(last_egg_node), &mut translation_metadata); } - // Regular translation from vectorization + // let llvm_vector = + egg_to_llvm(canonicalized_last_node, &mut translation_metadata); - assert!(!is_nooptvec(last_egg_node)); - let llvm_vector = egg_to_llvm(last_egg_node, &mut translation_metadata); - - // BELOW HERE, we allow for vectorization output, and we stitch our work back into the current LLVM code - - // NOTE: We Assume Egg rewriter will maintain relative positions of elements in vector - // REVIEW ASSUMPTION! - // Extract the elements of the vector, to be assigned back to where they are to be used. + // remove starting stores let num_extractions = llvm2egg_metadata.start_instructions.len(); for i in (0..num_extractions).rev() { let old_instr = llvm2egg_metadata .start_instructions .get(i) .expect("Index should be in vector."); - // Build the extracted value - let index = LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0); - let extracted_value = - LLVMBuildExtractElement(builder, llvm_vector, index, b"\0".as_ptr() as *const _); - // Replace all the uses of the old instruction with the new extracted value - // Old instruction cannot have been removed. - LLVMReplaceAllUsesWith(*old_instr, extracted_value); LLVMInstructionEraseFromParent(*old_instr); } + + // BELOW HERE, we allow for vectorization output, and we stitch our work back into the current LLVM code + + // NOTE: We Assume Egg rewriter will maintain relative positions of elements in vector + // REVIEW ASSUMPTION! + // Extract the elements of the vector, to be assigned back to where they are to be used. + // let num_extractions = llvm2egg_metadata.start_instructions.len(); + // for i in (0..num_extractions).rev() { + // let old_instr = llvm2egg_metadata + // .start_instructions + // .get(i) + // .expect("Index should be in vector."); + // // Build the extracted value + // let index = LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0); + // let extracted_value = + // LLVMBuildExtractElement(builder, llvm_vector, index, b"\0".as_ptr() as *const _); + // // Replace all the uses of the old instruction with the new extracted value + // // Old instruction cannot have been removed. + // LLVMReplaceAllUsesWith(*old_instr, extracted_value); + // LLVMInstructionEraseFromParent(*old_instr); + // } } diff --git a/src/dios-egraphs/src/cost.rs b/src/dios-egraphs/src/cost.rs index f9adace3..08ee28e4 100644 --- a/src/dios-egraphs/src/cost.rs +++ b/src/dios-egraphs/src/cost.rs @@ -39,6 +39,7 @@ impl CostFunction for VecCostFn<'_> { VecLang::Get(..) => LITERAL, VecLang::Set(..) => LITERAL, VecLang::Load(..) => LITERAL, + VecLang::Store(..) => LITERAL, VecLang::Gep(..) => LITERAL, // And list structures for quite cheap diff --git a/src/dios-egraphs/src/rules.rs b/src/dios-egraphs/src/rules.rs index 2ed10f7d..821c9fb6 100644 --- a/src/dios-egraphs/src/rules.rs +++ b/src/dios-egraphs/src/rules.rs @@ -67,7 +67,7 @@ pub fn run( .with_iter_limit(10_000) .run(&rules); - // print reason to STDERR. + // print reason to STDERR eprintln!( "Stopped after {} iterations, reason: {:?}", runner.iterations.len(), @@ -161,7 +161,7 @@ pub fn rules(no_ac: bool, no_vec: bool) -> Vec> { // Load load fusion rule rw!("vec-load-Loads"; "(Vec (Load ?a0) (Load ?a1) (Load ?a2) (Load ?a3))" => "(VecLoad ?a0 ?a1 ?a2 ?a3)"), // Set store fusion rule - rw!("vec-store-sets"; "(Vec (Set ?a0 ?b0 ?c0) (Set ?a1 ?b1 ?c1) (Set ?a2 ?b2 ?c2) (Set ?a3 ?b3 ?c3))" => "(VecStore (Vec ?a0 ?a1 ?a2 ?a3) (Vec ?b0 ?b1 ?b2 ?b3) (Vec ?c0 ?c1 ?c2 ?c3))"), + rw!("vec-store-sets"; "(Vec (Store ?a0 ?b0) (Store ?a1 ?b1) (Store ?a2 ?b2) (Store ?a3 ?b3))" => "(VecStore (Vec ?a0 ?a1 ?a2 ?a3) ?b0 ?b1 ?b2 ?b3)"), // Special MAC fusion rule rw!("vec-mac-add-mul"; "(VecAdd ?v0 (VecMul ?v1 ?v2))" diff --git a/src/dios-egraphs/src/veclang.rs b/src/dios-egraphs/src/veclang.rs index 53344a76..6955bda7 100644 --- a/src/dios-egraphs/src/veclang.rs +++ b/src/dios-egraphs/src/veclang.rs @@ -15,6 +15,8 @@ define_language! { // Load is a read of memory "Load" = Load([Id; 1]), + // Store is a write to memory + "Store" = Store([Id; 2]), // Id is a key to identify EClasses within an EGraph, represents // children nodes @@ -69,8 +71,7 @@ define_language! { "VecLoad" = VecLoad([Id; 4]), - // VecStore takes 2 lists: base address vector and offset vector - "VecStore" = VecStore([Id; 3]), + "VecStore" = VecStore([Id; 5]), // Info specific to register // RegInfo(egg::Symbol), From bcb4528bcad19748822301209d63316210142404 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 24 Mar 2023 17:34:17 -0400 Subject: [PATCH 113/143] change mat-mul benchmark to 10000 multiplications of matricies Modify makefile and use restrict keyword to get slp vectorizer to work for llvm, slp is still 4 times faster at least Guess problem is with strided loads and stores --- src/dios-egraphs/Diospyros/Makefile | 2 +- .../Diospyros/benchmarks/optimized/mat-mul.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index 920852ce..817a2bf8 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -24,7 +24,7 @@ run-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp run-baseline: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp @$(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) @opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll - @$(CLANG) -emit-llvm -S -fslp-vectorize -ftree-slp-vectorize build/opt.ll -o build/slp.ll + @opt -S --slp-vectorizer build/opt.ll -o build/slp.ll @opt -S --adce --dse build/slp.ll -o build/dce.ll @$(CLANG) -o0 build/dce.ll -o build/final @build/final diff --git a/src/dios-egraphs/Diospyros/benchmarks/optimized/mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/optimized/mat-mul.c index c468aa9a..898b75ea 100644 --- a/src/dios-egraphs/Diospyros/benchmarks/optimized/mat-mul.c +++ b/src/dios-egraphs/Diospyros/benchmarks/optimized/mat-mul.c @@ -11,8 +11,9 @@ #define MAX_FLOAT 100.00f #define DELTA 0.1f -void matrix_multiply(float a_in[A_ROWS * A_COLS], float b_in[A_COLS * B_COLS], - float c_out[A_ROWS * B_COLS]) { +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { for (int y = 0; y < A_ROWS; y++) { for (int x = 0; x < B_COLS; x++) { c_out[B_COLS * y + x] = 0; @@ -60,7 +61,7 @@ int main(void) { start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; // calculate up c_out - for (int i = 0; i < 1000; i++) { + for (int i = 0; i < 10000; i++) { matrix_multiply(a_in, b_in, c_out); } @@ -70,7 +71,7 @@ int main(void) { // report difference in runtime double diff = difftime(end, start); - printf("%ld milliseconds elapsed over 1000 iterations total\n", + printf("%ld milliseconds elapsed over 10000 iterations total\n", (end - start)); return 0; From e67827e1b3bfce47b881372f4558f460a6f73084 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sat, 25 Mar 2023 00:12:47 -0400 Subject: [PATCH 114/143] add functions only 1 in rust llvm this allows opt to run the diospyros pass correctly rather than crashing not sure why clang does not crash but instead silently patches the errors --- src/dios-egraphs/Diospyros/Makefile | 2 +- src/dios-egraphs/Diospyros/src/lib.rs | 72 +++++++++++++++++++++++---- 2 files changed, 62 insertions(+), 12 deletions(-) diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index 817a2bf8..695656c7 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -16,7 +16,7 @@ run-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp @$(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) @opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll @opt -S -load=target/debug/libllvmlib.$(EXT) --basic-aa --lsmovement --adce build/opt.ll -o build/aa.ll - @$(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=true build/aa.ll -o build/diospyros.ll + @opt -S -load=target/debug/libllvmlib.$(EXT) --diospyros -opt -print=true build/aa.ll -o build/diospyros.ll @opt -S --adce --dse build/diospyros.ll -o build/dce.ll @$(CLANG) build/dce.ll -o build/final @build/final diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 82b47409..68ee513f 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -61,6 +61,24 @@ unsafe fn gen_get_idx() -> u32 { return GET_IDX; } +// Map from Func Name to LLVM FUnc +// Have to use Vec because BTReeMap is unstable at constant in Rust 1.58. New versions +// of Rust break LLVM 11.0 so I cannot upgrade. +static mut FUNC_NAME2LLVM_FUNC: Vec<(&str, LLVMValueRef)> = Vec::new(); + +static FMA_NAME: &str = "llvm.fma.v4f32"; +static SCATTER: &str = "llvm.masked.scatter.v4f32.v4p0f32"; +static GATHER: &str = "llvm.masked.gather.v4f32.v4p0f32"; + +unsafe fn get_func_llvm_value(name: &str) -> Option { + for (func_name, value) in FUNC_NAME2LLVM_FUNC.clone() { + if func_name == name { + return Some(value); + } + } + return None; +} + // Reference Comparison: https://www.reddit.com/r/rust/comments/2r3wjk/is_there_way_to_compare_objects_by_address_in_rust/ // Compares whether addresses of LLVMValueRefs are the same. // Not the contents of the Value Refs @@ -884,11 +902,21 @@ unsafe fn loadvec_to_llvm( // Output type is a 4 length vector let fn_type = LLVMFunctionType(vec4f_type, param_types, 4, 0 as i32); // Build the Vector Load Intrinsic - let func = LLVMAddFunction( - md.module, - b"llvm.masked.gather.v4f32.v4p0f32\0".as_ptr() as *const _, - fn_type, - ); + let func_name = &GATHER; + let llvm_masked_gather_func = get_func_llvm_value(&func_name); + + let func = match llvm_masked_gather_func { + Some(value) => value, + None => { + let new_func = LLVMAddFunction( + md.module, + b"llvm.masked.gather.v4f32.v4p0f32\0".as_ptr() as *const _, + fn_type, + ); + FUNC_NAME2LLVM_FUNC.push((&func_name, new_func)); + new_func + } + }; // Build Arguments @@ -1001,11 +1029,21 @@ unsafe fn storevec_to_llvm( // Output type is a void_type let fn_type = LLVMFunctionType(void_type, param_types, 4, 0 as i32); // Build the Vector Load Intrinsic - let func = LLVMAddFunction( - md.module, - b"llvm.masked.scatter.v4f32.v4p0f32\0".as_ptr() as *const _, - fn_type, - ); + let func_name = &SCATTER; + let llvm_masked_scatter_func = get_func_llvm_value(&func_name); + + let func = match llvm_masked_scatter_func { + Some(value) => value, + None => { + let new_func = LLVMAddFunction( + md.module, + b"llvm.masked.scatter.v4f32.v4p0f32\0".as_ptr() as *const _, + fn_type, + ); + FUNC_NAME2LLVM_FUNC.push((&func_name, new_func)); + new_func + } + }; // Build Arguments @@ -1352,7 +1390,19 @@ unsafe fn mac_to_llvm( let fn_type = LLVMFunctionType(vec_type, param_types, 3, 0 as i32); // let vector_width = config::vector_width(); // let fma_intrinsic_name = format!("llvm.fma.v{}f32\0", vector_width).as_bytes(); - let func = LLVMAddFunction(md.module, b"llvm.fma.v4f32\0".as_ptr() as *const _, fn_type); + + let func_name = &FMA_NAME; + let llvm_fma_func = get_func_llvm_value(&func_name); + + let func = match llvm_fma_func { + Some(value) => value, + None => { + let new_func = + LLVMAddFunction(md.module, b"llvm.fma.v4f32\0".as_ptr() as *const _, fn_type); + FUNC_NAME2LLVM_FUNC.push((&func_name, new_func)); + new_func + } + }; let args = [trans_v1, trans_v2, trans_acc].as_mut_ptr(); LLVMBuildCall(md.builder, func, args, 3, b"\0".as_ptr() as *const _) } From a6e4b1eb7862e5f7e8562c6913e04008a3711f2c Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Sun, 7 May 2023 19:19:09 -0400 Subject: [PATCH 115/143] get aligned and consec stores to work --- src/dios-egraphs/Diospyros/Makefile | 4 +- .../Diospyros/benchmarks/baseline/vvadd.c | 65 +++ .../Diospyros/benchmarks/optimized/vvadd.c | 65 +++ src/dios-egraphs/Diospyros/c-tests/add.c | 8 +- src/dios-egraphs/Diospyros/diospyros.cpp | 521 ++++++++++++------ src/dios-egraphs/Diospyros/src/lib.rs | 47 +- src/dios-egraphs/src/alignconsecsearcher.rs | 3 + 7 files changed, 534 insertions(+), 179 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/benchmarks/baseline/vvadd.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/optimized/vvadd.c create mode 100644 src/dios-egraphs/src/alignconsecsearcher.rs diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index 695656c7..20156de3 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -15,9 +15,9 @@ endif run-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp @$(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) @opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll - @opt -S -load=target/debug/libllvmlib.$(EXT) --basic-aa --lsmovement --adce build/opt.ll -o build/aa.ll + @opt -S -load=target/debug/libllvmlib.$(EXT) --basic-aa --lsmovement --dse --adce build/opt.ll -o build/aa.ll @opt -S -load=target/debug/libllvmlib.$(EXT) --diospyros -opt -print=true build/aa.ll -o build/diospyros.ll - @opt -S --adce --dse build/diospyros.ll -o build/dce.ll + @opt -S --adce --dse --gvn build/diospyros.ll -o build/dce.ll @$(CLANG) build/dce.ll -o build/final @build/final diff --git a/src/dios-egraphs/Diospyros/benchmarks/baseline/vvadd.c b/src/dios-egraphs/Diospyros/benchmarks/baseline/vvadd.c new file mode 100644 index 00000000..06490264 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/baseline/vvadd.c @@ -0,0 +1,65 @@ +#include +#include +#include +#include +#include +#include + +#define A_ROWS 12 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f +#define NITER 1000000000 + +void vvadd(float a_in[restrict A_ROWS], float b_in[restrict A_ROWS], + float c_out[restrict A_ROWS]) { + for (int i = 0; i < A_ROWS; i++) { + c_out[i] = a_in[i] + b_in[i]; + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS]; + for (int i = 0; i < A_ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS]; + for (int i = 0; i < A_ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS]; + for (int i = 0; i < A_ROWS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + vvadd(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/optimized/vvadd.c b/src/dios-egraphs/Diospyros/benchmarks/optimized/vvadd.c new file mode 100644 index 00000000..06490264 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/optimized/vvadd.c @@ -0,0 +1,65 @@ +#include +#include +#include +#include +#include +#include + +#define A_ROWS 12 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f +#define NITER 1000000000 + +void vvadd(float a_in[restrict A_ROWS], float b_in[restrict A_ROWS], + float c_out[restrict A_ROWS]) { + for (int i = 0; i < A_ROWS; i++) { + c_out[i] = a_in[i] + b_in[i]; + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS]; + for (int i = 0; i < A_ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS]; + for (int i = 0; i < A_ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS]; + for (int i = 0; i < A_ROWS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + vvadd(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/add.c b/src/dios-egraphs/Diospyros/c-tests/add.c index 89d8646d..21b3f656 100644 --- a/src/dios-egraphs/Diospyros/c-tests/add.c +++ b/src/dios-egraphs/Diospyros/c-tests/add.c @@ -15,14 +15,14 @@ int main(int argc, char **argv) { float b_in[SIZE] = {5, 6, 7, 8}; float c_out[SIZE]; sum(a_in, b_in, c_out); - assert(c_out[0] == 6); - assert(c_out[1] == 8); - assert(c_out[2] == 10); - assert(c_out[3] == 12); printf("first: %f\n", c_out[0]); printf("second: %f\n", c_out[1]); printf("third: %f\n", c_out[2]); printf("fourth: %f\n", c_out[3]); + assert(c_out[0] == 6); + assert(c_out[1] == 8); + assert(c_out[2] == 10); + assert(c_out[3] == 12); // expected: 6, 8, 10, 12 return 0; } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index 41d6a4dd..d1babfec 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -11,7 +12,9 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -24,11 +27,13 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/Pass.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Scalar/LoopUnrollPass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Vectorize/SLPVectorizer.h" using namespace llvm; @@ -65,6 +70,9 @@ const std::string NO_OPT_PREFIX = "no_opt_"; const int SQRT_OPERATOR = 3; const int BINARY_OPERATOR = 2; +const uint32_t VECTOR_WIDTH = 4; +const uint32_t FLOAT_SIZE_IN_BYTES = 4; + /** * Fresh counters for temps and array generation */ @@ -836,12 +844,321 @@ chunks_t remove_load_store_alias(chunks_t chunks, AliasAnalysis *AA) { return final_chunks; } +/** + * return the index to in baseOfArrayVec that store is an offset from, or + * NULLOPT if not matching + */ +int get_base_reference(StoreInst *store, std::vector base_of_array_vec, + ScalarEvolution *SE) { + for (int i = 0; i < base_of_array_vec.size(); i++) { + Value *base_array_ptr = base_of_array_vec[i]; + assert(base_array_ptr->getType()->isPointerTy()); + Value *store_ptr = store->getPointerOperand(); + const SCEV *store_ptr_se = SE->getSCEV(store_ptr); + const SCEV *base_ptr_se = SE->getSCEV(base_array_ptr); + const SCEV *diff = SE->getMinusSCEV(store_ptr_se, base_ptr_se); + APInt min_val = SE->getSignedRangeMin(diff); + APInt max_val = SE->getSignedRangeMax(diff); + if (min_val == max_val) { + return i; + } + } + return -1; +} + +// Check Alignment +inline bool is_aligned(int diff_from_base) { return diff_from_base % 16 == 0; } + +/** + * Given a group of stores trees, greedily assign the store trees into + * new sets of store trees such that each set is all consecutive and aligned + * Returns a the sets of groups of stores trees with this property. + */ +std::vector group_trees( + ad_trees_t group_of_trees, std::map store_to_offset) { + std::vector trees_used = {}; + for (auto _ : group_of_trees) { + trees_used.push_back(false); + } + + std::vector result = {}; + uint32_t start_offset = 0; + while (std::any_of(trees_used.begin(), trees_used.end(), + [](bool b) { return !b; })) { + // get the smallest starting offset + uint32_t min_offset = UINT32_MAX; + for (int i = 0; i < group_of_trees.size(); i++) { + if (trees_used[i]) { + continue; + } + auto tree = group_of_trees[i]; + StoreInst *store = dyn_cast(tree.back()); + int offset = store_to_offset[store] / FLOAT_SIZE_IN_BYTES; + if (offset < min_offset) { + min_offset = offset; + } + } + min_offset = min_offset - (min_offset % VECTOR_WIDTH); + std::set required_offsets = {}; + for (int i = min_offset; i < min_offset + VECTOR_WIDTH; i++) { + required_offsets.emplace(i); + } + ad_trees_t current_group = {}; + for (int i = 0; i < VECTOR_WIDTH; i++) { + current_group.push_back({}); + } + std::set current_offsets = {}; + for (int i = 0; i < group_of_trees.size(); i++) { + if (trees_used[i]) { + continue; + } + auto tree = group_of_trees[i]; + StoreInst *store = dyn_cast(tree.back()); + int offset = store_to_offset[store] / FLOAT_SIZE_IN_BYTES; + int rounded_offset = offset % 4; + if (required_offsets.count(offset) != 0 && + current_offsets.count(offset) == 0) { + current_offsets.emplace(offset); + trees_used[i] = true; + current_group[rounded_offset] = tree; + } + } + bool can_add_result = true; + for (auto curr_tree : current_group) { + if (curr_tree.empty()) { + can_add_result = false; + } + } + if (can_add_result) { + result.push_back(current_group); + } + } + + return result; +} + +/** + * Sort the stores in the ad_trees so that an aligned store + * is first, followed by consecutive stores + */ +ad_trees_t sort_ad_trees(ad_trees_t ad_trees, + std::vector base_of_array_vec, + ScalarEvolution *SE) { + // First, group ad_trees according to the base array they belong to. + // If a tree does not reference a base array, exclude that tree entirely + std::vector> groups_of_trees = {}; + for (int i = 0; i < base_of_array_vec.size(); i++) { + groups_of_trees.push_back({}); + } + + std::map store_to_base_map = {}; + for (ad_tree_t ad_tree : ad_trees) { + if (ad_tree.size() != 0) { + if (StoreInst *store = dyn_cast(ad_tree.back())) { + int base_ref = get_base_reference(store, base_of_array_vec, SE); + if (base_ref >= 0) { + groups_of_trees[base_ref].push_back(ad_tree); + store_to_base_map[store] = base_ref; + } + } + } + } + + auto store_sorter = [=](const ad_tree_t &a, const ad_tree_t &b) { + StoreInst *store_a = dyn_cast(a.back()); + StoreInst *store_b = dyn_cast(b.back()); + + // get the base references + Value *ref_a = base_of_array_vec[store_to_base_map.at(store_a)]; + Value *ref_b = base_of_array_vec[store_to_base_map.at(store_b)]; + + // get the difference from the store to its reference + Value *store_a_ptr = store_a->getPointerOperand(); + const SCEV *store_a_ptr_se = SE->getSCEV(store_a_ptr); + const SCEV *ref_a_ptr_se = SE->getSCEV(ref_a); + const SCEV *diff_a = SE->getMinusSCEV(store_a_ptr_se, ref_a_ptr_se); + APInt min_val_a = SE->getSignedRangeMin(diff_a); + APInt max_val_a = SE->getSignedRangeMax(diff_a); + assert(min_val_a == max_val_a); + int val_a = (int)max_val_a.roundToDouble(); + + Value *store_b_ptr = store_b->getPointerOperand(); + const SCEV *store_b_ptr_se = SE->getSCEV(store_b_ptr); + const SCEV *ref_b_ptr_se = SE->getSCEV(ref_b); + const SCEV *diff_b = SE->getMinusSCEV(store_b_ptr_se, ref_b_ptr_se); + APInt min_val_b = SE->getSignedRangeMin(diff_b); + APInt max_val_b = SE->getSignedRangeMax(diff_b); + assert(min_val_b == max_val_b); + int val_b = (int)max_val_b.roundToDouble(); + + return val_a < val_b; + }; + + // Sort each group of ad_trees by the stores in each group + for (int i = 0; i < groups_of_trees.size(); i++) { + // NO IDEA WHY THIS WORKS, BUT ITERATING OVER ELEMENTS I SORTS PROPERLY + // BUT ITERATING OVER USING COLON DOES NOT! + std::sort(groups_of_trees[i].begin(), groups_of_trees[i].end(), + store_sorter); + } + + // Build a map mapping stores to their respective offsets + std::map store_to_offset = {}; + for (auto group : groups_of_trees) { + // skip empty groups + if (group.empty()) { + continue; + } + for (auto tree : group) { + // Grab basic information about the tree + StoreInst *store = dyn_cast(tree.back()); + // Get base ref for the first store + Value *base_ref = base_of_array_vec[store_to_base_map.at(store)]; + // get the difference from the store to its reference + Value *store_ptr = store->getPointerOperand(); + const SCEV *store_ptr_se = SE->getSCEV(store_ptr); + const SCEV *ref_ptr_se = SE->getSCEV(base_ref); + const SCEV *diff = SE->getMinusSCEV(store_ptr_se, ref_ptr_se); + APInt min_val = SE->getSignedRangeMin(diff); + APInt max_val = SE->getSignedRangeMax(diff); + assert(min_val == max_val); + int offset = (int)max_val.roundToDouble(); + store_to_offset[store] = offset; + } + } + + // Grab only ad_trees that contain a 16 byte aligned reference at the + // beginning + // Also the trees must be consecutive stores, e.g. the stores must differ by + // 4 bytes each time + // Finally, split the trees into smaller subtrees of size 4 + + // We do this by accumulating a running sequence of ad_trees that satisfy + // the prerequisite conditions above + + std::vector> pruned_groups_of_trees = {}; + // std::vector running_collection = {}; + // int current_offset = -4; + for (auto group : groups_of_trees) { + // skip empty groups + if (group.empty()) { + continue; + } + + std::vector new_groups_of_trees = + group_trees(group, store_to_offset); + for (auto new_group : new_groups_of_trees) { + pruned_groups_of_trees.push_back(new_group); + } + + // for (auto tree : group) { + // // Grab basic information about the tree + // StoreInst *store = dyn_cast(tree.back()); + // // Get base ref for the first store + // Value *ref_a = base_of_array_vec[store_to_base_map.at(store)]; + // // get the difference from the store to its reference + // Value *store_a_ptr = store->getPointerOperand(); + // const SCEV *store_a_ptr_se = SE->getSCEV(store_a_ptr); + // const SCEV *ref_a_ptr_se = SE->getSCEV(ref_a); + // const SCEV *diff_a = SE->getMinusSCEV(store_a_ptr_se, + // ref_a_ptr_se); APInt min_val_a = SE->getSignedRangeMin(diff_a); + // APInt max_val_a = SE->getSignedRangeMax(diff_a); + // assert(min_val_a == max_val_a); + // int val_a = (int)max_val_a.roundToDouble(); + + // // If the running collection is empty + // // If the current tree is rooted at an aligned address, add + // to + // // the collection, and set the current offset + // // If the current tree is not rooted at an + // // aligned address, skip this tree. Set offset as -4 + // // Else if the running collection is not empty, and the length is + // // not the VECTOR WIDTH + // // Check the current offset of the tree, and if it s 4 off + // the + // // past offset, add on the tree and set its offset + // // Otherwise skip this tree and clear the running + // collection. + // // Set offset as -4. + // // Otherwise the running collection is not empty and the length + // is + // // the Vector Width + // // Remove the running collection and add it to a pruned + // group + // // If the current tree is at an aligned address, add to + // // collection. Set current offset + // // Otherwise, the tree is not rooted at an aligned + // // address and skip. Set offset as -4 + // if (running_collection.empty()) { + // if (is_aligned(val_a)) { + // running_collection.push_back(tree); + // current_offset = val_a; + // } else { + // current_offset = -4; + // } + // } else if (!running_collection.empty() && + // running_collection.size() < VECTOR_WIDTH) { + // if (current_offset + FLOAT_SIZE_IN_BYTES == val_a) { + // running_collection.push_back(tree); + // current_offset = val_a; + // } else { + // running_collection = {}; + // current_offset = -4; + // } + // } else if (!running_collection.empty() && + // running_collection.size() == VECTOR_WIDTH) { + // pruned_groups_of_trees.push_back(running_collection); + // running_collection = {}; + // if (is_aligned(val_a)) { + // running_collection.push_back(tree); + // current_offset = val_a; + // } else { + // current_offset = -4; + // } + // } else { + // throw "sort_ad_trees: Impossible case: Cannot have the size + // greater than VECTOR_WIDTH"; + // } + // } + + // if (!running_collection.empty() && + // running_collection.size() == VECTOR_WIDTH) { + // pruned_groups_of_trees.push_back(running_collection); + // } + } + errs() << "Pruned Group of Trees\n"; + for (auto group_of_trees : pruned_groups_of_trees) { + for (auto tree : group_of_trees) { + for (auto instr : tree) { + errs() << *instr << "\n"; + } + } + } + + // Compress group of trees back into 1 ad_tree + ad_trees_t result = {}; + for (auto group_of_trees : pruned_groups_of_trees) { + chunk_t combined_chunk = join_trees(group_of_trees); + int num_stores = 0; + for (auto instr : combined_chunk) { + if (isa(instr)) { + num_stores++; + } + } + assert(num_stores == VECTOR_WIDTH); + result.push_back(combined_chunk); + } + + return result; +} + /** * Converts chunks into vectors, representing joined AD Trees * */ std::vector> chunks_into_joined_trees( - chunks_t chunks, AliasAnalysis *AA) { + chunks_t chunks, AliasAnalysis *AA, std::vector base_of_array_vec, + ScalarEvolution *SE) { std::vector> trees = {}; for (auto chunk : chunks) { ad_trees_t ad_trees = build_ad_trees(chunk); @@ -849,6 +1166,7 @@ std::vector> chunks_into_joined_trees( // Join trees if the store instructions in the trees // do not alias each other std::vector> joinable_trees = {}; + std::vector>> tree_groups = {}; for (auto tree : ad_trees) { // check if stores alias in the trees assert(tree.size() > 0); @@ -868,14 +1186,22 @@ std::vector> chunks_into_joined_trees( joinable_trees.push_back(tree); } else { assert(joinable_trees.size() > 0); - auto joined_trees = join_trees(joinable_trees); - trees.push_back(joined_trees); + tree_groups.push_back(joinable_trees); joinable_trees = {tree}; } } if (joinable_trees.size() > 0) { - auto joined_trees = join_trees(joinable_trees); - trees.push_back(joined_trees); + tree_groups.push_back(joinable_trees); + } + + // Rearrange the joinable trees by changing their store ordering + // Then Merge Joinable trees into trees + for (auto tree_group : tree_groups) { + ad_trees_t new_ad_trees = + sort_ad_trees(tree_group, base_of_array_vec, SE); + for (auto chunk : new_ad_trees) { + trees.push_back(chunk); + } } } // Do final removal of any sequences with store-load aliasing @@ -903,7 +1229,7 @@ std::vector> instr2ref(chunks_t chunks) { * Run Optimization Procedure on Vector representing concatenated ad trees * */ -void optimize(std::vector chunk, Function &F) { +void run_optimization(std::vector chunk, Function &F) { assert(chunk.size() != 0); // Place the builder at the last instruction in the entire chunk. Value *last_value = unwrap(chunk.back()); @@ -932,12 +1258,15 @@ struct DiospyrosPass : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); } virtual bool runOnFunction(Function &F) override { // We need Alias Analysis still, because it is possible groups of // stores can addresses that alias. AliasAnalysis *AA = &getAnalysis().getAAResults(); + ScalarEvolution *SE = + &getAnalysis().getSE(); // do not optimize on main function or no_opt functions. if (F.getName() == MAIN_FUNCTION_NAME || @@ -945,178 +1274,30 @@ struct DiospyrosPass : public FunctionPass { F.getName().substr(0, NO_OPT_PREFIX.size()) == NO_OPT_PREFIX)) { return false; } - bool has_changes = false; - for (auto &B : F) { - auto chunks = build_chunks(&B, AA); - auto trees = chunks_into_joined_trees(chunks, AA); - auto treerefs = instr2ref(trees); - for (auto tree_chunk : treerefs) { - if (tree_chunk.size() != 0) { - optimize(tree_chunk, F); + // get all "Base" Arrays on which vectorization can occur. These are + // defined as argument inputs with a pointer type + std::vector base_of_array_vec = {}; + for (auto &a : F.args()) { + if (a.getType()->isPointerTy()) { + if (Value *arg_val = dyn_cast(&a)) { + base_of_array_vec.push_back(arg_val); } } } - return false; - for (auto &B : F) { - // TODO: Consider removing as the new procedure can overcome - // this We skip over basic blocks without floating point types - bool has_float = false; - for (auto &I : B) { - if (I.getType()->isFloatTy()) { - has_float = true; - } - } - if (!has_float) { - continue; - } - // Assumes Alias Analysis Movement Pass has been done previously - // Pulls out Instructions into sections of code called "Chunks" - // - std::vector> chunk_accumulator; - std::vector chunk_vector = {}; - bool vectorizable_flag = false; - bool has_seen_store = false; - for (auto &I : B) { - Value *val = dyn_cast(&I); - assert(val != NULL); - // When you finish seeing stores, and see some other - // instruction afterwards, stop the current chunk to - // vectorize - if (isa(val)) { - has_seen_store = true; - } - if (has_seen_store && !isa(val)) { - vectorizable_flag = false; - } - if (can_vectorize(val) && !vectorizable_flag) { - if (!chunk_vector.empty()) { - chunk_accumulator.push_back(chunk_vector); - } - vectorizable_flag = true; - chunk_vector = {wrap(val)}; - } else if (can_vectorize(val) && vectorizable_flag) { - chunk_vector.push_back(wrap(val)); - } else if (!can_vectorize(val) && !vectorizable_flag) { - chunk_vector.push_back(wrap(val)); - } else if (!can_vectorize(val) && vectorizable_flag) { - if (!chunk_vector.empty()) { - chunk_accumulator.push_back(chunk_vector); - } - vectorizable_flag = false; - chunk_vector = {wrap(val)}; - } else { - throw "No other cases possible!"; - } - } - if (!chunk_vector.empty()) { - chunk_accumulator.push_back(chunk_vector); - } - - for (int i = 0; i < chunk_accumulator.size(); ++i) { - auto &chunk_vector = chunk_accumulator[i]; - if (chunk_vector.empty()) { - continue; - } - - errs() << "Here is a chunk: \n"; - for (auto chunk_instr : chunk_vector) { - errs() << *unwrap(chunk_instr) << "\n"; - } - - // check if the chunk vector actually has instructions to - // optimixe on - bool has_vectorizable_instrs = false; - for (auto &instr : chunk_vector) { - if (can_vectorize(unwrap(instr)) && - !isa(unwrap(instr)) && - !isa(unwrap(instr)) && - !isa(unwrap(instr))) { - has_vectorizable_instrs = true; - } - } - if (!has_vectorizable_instrs) { - continue; - } - - // check if the chunk vector has at least one store - bool has_store = false; - for (auto &instr : chunk_vector) { - if (isa(unwrap(instr))) { - has_store = true; - } - } - if (!has_store) { - continue; - } - - // If an instruction is used multiple times outside the - // chunk, add it to a restricted list. - // TODO: only consider future chunks! - std::vector restricted_instrs = {}; - for (auto chunk_instr : chunk_vector) { - for (auto j = i + 1; j < chunk_accumulator.size(); ++j) { - // guaranteed to be a different chunk vector ahead - // of the origianl one. - bool must_restrict = false; - auto &other_chunk_vector = chunk_accumulator[j]; - for (auto other_chunk_instr : other_chunk_vector) { - if (unwrap(chunk_instr) == - unwrap(other_chunk_instr)) { - restricted_instrs.push_back(chunk_instr); - must_restrict = true; - break; - } - } - if (must_restrict) { - break; - } - } - } + bool has_changes = true; + for (auto &B : F) { + auto chunks = build_chunks(&B, AA); + auto trees = + chunks_into_joined_trees(chunks, AA, base_of_array_vec, SE); + auto treerefs = instr2ref(trees); - has_changes = has_changes || true; - assert(chunk_vector.size() != 0); - - // Place builder at first instruction that is not a "handled - // instruction" - int insert_pos = 0; - bool has_seen_vectorizable = false; - for (int i = 0; i < chunk_vector.size(); i++) { - if (can_vectorize(unwrap(chunk_vector[i]))) { - has_seen_vectorizable = true; - insert_pos++; - } else if (!has_seen_vectorizable) { - insert_pos++; - } else { - break; - } - } - Value *last_instr_val = NULL; - if (insert_pos >= chunk_vector.size()) { - last_instr_val = unwrap(chunk_vector[insert_pos - 1]); - } else { - last_instr_val = unwrap(chunk_vector[insert_pos]); - } - assert(last_instr_val != NULL); - Instruction *last_instr = dyn_cast(last_instr_val); - assert(last_instr != NULL); - if (insert_pos >= chunk_vector.size()) { - last_instr = last_instr->getNextNode(); - assert(last_instr != NULL); + for (auto tree_chunk : treerefs) { + if (tree_chunk.size() != 0) { + run_optimization(tree_chunk, F); } - IRBuilder<> builder(last_instr); - - Module *mod = F.getParent(); - LLVMContext &context = F.getContext(); - optimize(wrap(mod), wrap(&context), wrap(&builder), - chunk_vector.data(), chunk_vector.size(), - restricted_instrs.data(), restricted_instrs.size(), - RunOpt, PrintOpt); } - - // TODO: delete old instructions that are memory related; adce - // will handle the remainder } return has_changes; }; diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 68ee513f..590f29ca 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -159,6 +159,13 @@ unsafe fn translate_unop( } } +/// Calculate cost for any Egg expression. +/// Uses a custom model that I developed, to see if an optimization should go through +/// or not. +pub fn calculate_cost() -> u32 { + return 0; +} + /// Main function to optimize: Takes in a basic block of instructions, /// optimizes it, and then translates it to LLVM IR code, in place. @@ -682,6 +689,7 @@ unsafe fn start_translating_llvm_to_egg( unsafe fn can_start_translation_instr(llvm_instr: LLVMValueRef) -> bool { return match match_llvm_op(&llvm_instr) { + LLVMOpType::Store => true, LLVMOpType::FAdd | LLVMOpType::FMul | LLVMOpType::FDiv @@ -690,8 +698,8 @@ unsafe fn can_start_translation_instr(llvm_instr: LLVMValueRef) -> bool { | LLVMOpType::Constant | LLVMOpType::Sqrt32 | LLVMOpType::Load - | LLVMOpType::Store => true, - LLVMOpType::Argument | LLVMOpType::UnhandledLLVMOpCode => false, + | LLVMOpType::Argument + | LLVMOpType::UnhandledLLVMOpCode => false, }; } @@ -763,7 +771,7 @@ unsafe fn llvm_to_egg_main( let mut next_node_idx: u32 = 0; // for each final instruction, iterate backwards from that final instruction and translate to egg - for llvm_instr in llvm_instrs_in_chunk.iter().rev() { + for llvm_instr in llvm_instrs_in_chunk.iter() { // only start translation back if it is a "translatable instruction" and it was not translated already if can_start_translation_instr(*llvm_instr) // TODO: Need to DFS back from this instruction and make sure invariants for translation hold, e.g. no bitcasts somewhere down the translation tree. && !translation_metadata @@ -889,6 +897,24 @@ unsafe fn loadvec_to_llvm( .get(&gep4_id_val) .expect("Value of gep4 id should exist in get2gep"); + // New code to handle a vector load + // let address_space = LLVMGetPointerAddressSpace(LLVMTypeOf(*gep1_llvm_instr)); + // let bitcase_scalar_to_vector_type = LLVMBuildBitCast( + // md.builder, + // *gep1_llvm_instr, + // LLVMPointerType( + // LLVMVectorType(LLVMFloatTypeInContext(md.context), 4), + // address_space, + // ), + // b"scalar-to-vector-type-bit-cast\0".as_ptr() as *const _, + // ); + // let load = LLVMBuildLoad( + // md.builder, + // bitcase_scalar_to_vector_type, + // b"vector-load\0".as_ptr() as *const _, + // ); + // return load; + let vector_width = 4; let floatptr_type = LLVMTypeOf(*gep1_llvm_instr); let vec4ptr_type = LLVMVectorType(floatptr_type, vector_width); @@ -1016,6 +1042,21 @@ unsafe fn storevec_to_llvm( .get(&gep4_id_val) .expect("Value of gep4 id should exist in get2gep"); + // New code to handle a vector store + // Currently, this is the only type of store that can be generated because stores are not split. + let address_space = LLVMGetPointerAddressSpace(LLVMTypeOf(*gep1_llvm_instr)); + let bitcase_scalar_to_vector_type = LLVMBuildBitCast( + md.builder, + *gep1_llvm_instr, + LLVMPointerType( + LLVMVectorType(LLVMFloatTypeInContext(md.context), 4), + address_space, + ), + b"scalar-to-vector-type-bit-cast\0".as_ptr() as *const _, + ); + let store = LLVMBuildStore(md.builder, llvm_val_vec, bitcase_scalar_to_vector_type); + return store; + let vector_width = 4; let floatptr_type = LLVMTypeOf(*gep1_llvm_instr); let vec4ptr_type = LLVMVectorType(floatptr_type, vector_width); diff --git a/src/dios-egraphs/src/alignconsecsearcher.rs b/src/dios-egraphs/src/alignconsecsearcher.rs new file mode 100644 index 00000000..dd53b7fd --- /dev/null +++ b/src/dios-egraphs/src/alignconsecsearcher.rs @@ -0,0 +1,3 @@ +/// Search for sequences of Loads and Stores that are Aligned and Consecutive +/// +/// This helps LLVM generate more efficient code for non-DSP processors \ No newline at end of file From ca683bf3e3df4eb868ea5cdbd2085a631a689ac5 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Mon, 8 May 2023 03:32:51 -0400 Subject: [PATCH 116/143] gather load information like offset and base of array --- src/dios-egraphs/Diospyros/diospyros.cpp | 134 +++++++++++++++++++---- src/dios-egraphs/Diospyros/src/lib.rs | 17 ++- 2 files changed, 129 insertions(+), 22 deletions(-) diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index d1babfec..bad74e87 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -13,8 +13,10 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -49,13 +51,20 @@ llvm::cl::opt PrintOpt("z", llvm::cl::desc("Print Egg Optimization.")); llvm::cl::alias PrintOptAlias("print", llvm::cl::desc("Alias for -z"), llvm::cl::aliasopt(PrintOpt)); -extern "C" void optimize(LLVMModuleRef mod, LLVMContextRef context, - LLVMBuilderRef builder, - LLVMValueRef const *chunk_instrs, - std::size_t chunk_size, - LLVMValueRef const *restricted_instrs, - std::size_t restricted_size, bool run_egg, - bool print_opt); +/// Struct representing load info, same as on Rust side +typedef struct load_info { + LLVMValueRef load; + int32_t base_id; + int32_t offset; +} load_info_t; + +/// Forward Declaration of Optimize function +extern "C" bool optimize( + LLVMModuleRef mod, LLVMContextRef context, LLVMBuilderRef builder, + LLVMValueRef const *chunk_instrs, std::size_t chunk_size, + LLVMValueRef const *restricted_instrs, std::size_t restricted_size, + load_info_t const *load_info, std::size_t load_info_size, bool run_egg, + bool print_opt); const std::string ARRAY_NAME = "no-array-name"; const std::string TEMP_NAME = "no-temp-name"; @@ -848,22 +857,29 @@ chunks_t remove_load_store_alias(chunks_t chunks, AliasAnalysis *AA) { * return the index to in baseOfArrayVec that store is an offset from, or * NULLOPT if not matching */ -int get_base_reference(StoreInst *store, std::vector base_of_array_vec, - ScalarEvolution *SE) { +std::pair get_base_reference(Instruction *mem_instr, + std::vector base_of_array_vec, + ScalarEvolution *SE) { for (int i = 0; i < base_of_array_vec.size(); i++) { Value *base_array_ptr = base_of_array_vec[i]; assert(base_array_ptr->getType()->isPointerTy()); - Value *store_ptr = store->getPointerOperand(); - const SCEV *store_ptr_se = SE->getSCEV(store_ptr); + Value *mem_instr_ptr = NULL; + if (StoreInst *store_instr = dyn_cast(mem_instr)) { + mem_instr_ptr = store_instr->getPointerOperand(); + } else if (LoadInst *load_instr = dyn_cast(mem_instr)) { + mem_instr_ptr = load_instr->getPointerOperand(); + } + const SCEV *mem_instr_ptr_se = SE->getSCEV(mem_instr_ptr); const SCEV *base_ptr_se = SE->getSCEV(base_array_ptr); - const SCEV *diff = SE->getMinusSCEV(store_ptr_se, base_ptr_se); + const SCEV *diff = SE->getMinusSCEV(mem_instr_ptr_se, base_ptr_se); APInt min_val = SE->getSignedRangeMin(diff); APInt max_val = SE->getSignedRangeMax(diff); if (min_val == max_val) { - return i; + int val = (int)max_val.roundToDouble(); + return {i, val}; } } - return -1; + return {-1, -1}; } // Check Alignment @@ -955,7 +971,8 @@ ad_trees_t sort_ad_trees(ad_trees_t ad_trees, for (ad_tree_t ad_tree : ad_trees) { if (ad_tree.size() != 0) { if (StoreInst *store = dyn_cast(ad_tree.back())) { - int base_ref = get_base_reference(store, base_of_array_vec, SE); + auto [base_ref, _] = + get_base_reference(store, base_of_array_vec, SE); if (base_ref >= 0) { groups_of_trees[base_ref].push_back(ad_tree); store_to_base_map[store] = base_ref; @@ -1229,7 +1246,8 @@ std::vector> instr2ref(chunks_t chunks) { * Run Optimization Procedure on Vector representing concatenated ad trees * */ -void run_optimization(std::vector chunk, Function &F) { +bool run_optimization(std::vector chunk, Function &F, + std::vector load_info) { assert(chunk.size() != 0); // Place the builder at the last instruction in the entire chunk. Value *last_value = unwrap(chunk.back()); @@ -1240,9 +1258,43 @@ void run_optimization(std::vector chunk, Function &F) { Module *mod = F.getParent(); LLVMContext &context = F.getContext(); std::vector restricted_instrs = {}; - optimize(wrap(mod), wrap(&context), wrap(&builder), chunk.data(), - chunk.size(), restricted_instrs.data(), restricted_instrs.size(), - RunOpt, PrintOpt); + + return optimize(wrap(mod), wrap(&context), wrap(&builder), chunk.data(), + chunk.size(), restricted_instrs.data(), + restricted_instrs.size(), load_info.data(), + load_info.size(), RunOpt, PrintOpt); +} + +/** + * Match each load with a pair of base id and offset + * + * NOTE: A load might be associated with more than 1 base, we choose the first. + * THIS COULD BE A BUG in the future! + */ +std::vector match_loads(std::vector loads, + std::vector base_load_locations, + ScalarEvolution *SE) { + std::vector results = {}; + for (LoadInst *load : loads) { + bool continue_iteration = false; + for (Value *base_loc : base_load_locations) { + auto [load_base, load_offset] = + get_base_reference(load, base_load_locations, SE); + if (load_base >= 0) { + load_info_t new_load = {.load = wrap(load), + .base_id = load_base, + .offset = static_cast( + load_offset / FLOAT_SIZE_IN_BYTES)}; + results.push_back(new_load); + continue_iteration = true; + break; + } + } + if (continue_iteration) { + continue; + } + } + return results; } /** @@ -1259,6 +1311,7 @@ struct DiospyrosPass : public FunctionPass { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } virtual bool runOnFunction(Function &F) override { @@ -1267,6 +1320,8 @@ struct DiospyrosPass : public FunctionPass { AliasAnalysis *AA = &getAnalysis().getAAResults(); ScalarEvolution *SE = &getAnalysis().getSE(); + TargetLibraryInfo *TLI = + &getAnalysis().getTLI(F); // do not optimize on main function or no_opt functions. if (F.getName() == MAIN_FUNCTION_NAME || @@ -1286,6 +1341,45 @@ struct DiospyrosPass : public FunctionPass { } } + // Grab information on load base locations + std::vector base_load_locations = {}; + for (auto &a : F.args()) { + if (a.getType()->isPointerTy()) { + if (Value *arg_val = dyn_cast(&a)) { + base_load_locations.push_back(arg_val); + } + } + } + for (auto &B : F) { + for (auto &I : B) { + if (Value *V = dyn_cast(&I)) { + if (isMallocOrCallocLikeFn(V, TLI)) { + base_load_locations.push_back(V); + } + } + } + } + std::map base_load_to_id = {}; + int count = 0; + for (auto instr : base_load_locations) { + base_load_to_id[instr] = count++; + } + + // Grab information on loads + std::vector loads = {}; + for (auto &B : F) { + for (auto &I : B) { + if (LoadInst *load_instr = dyn_cast(&I)) { + if (std::find(loads.begin(), loads.end(), load_instr) == + loads.end()) { + loads.push_back(load_instr); + } + } + } + } + std::vector load_info = + match_loads(loads, base_load_locations, SE); + bool has_changes = true; for (auto &B : F) { auto chunks = build_chunks(&B, AA); @@ -1295,7 +1389,7 @@ struct DiospyrosPass : public FunctionPass { for (auto tree_chunk : treerefs) { if (tree_chunk.size() != 0) { - run_optimization(tree_chunk, F); + has_changes = run_optimization(tree_chunk, F, load_info); } } } diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 590f29ca..352d937b 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -166,6 +166,14 @@ pub fn calculate_cost() -> u32 { return 0; } +/// Struct representing load info, same as on C++ side +#[repr(C)] +pub struct load_info_t { + pub load: LLVMValueRef, + pub base_id: i32, + pub offset: i32, +} + /// Main function to optimize: Takes in a basic block of instructions, /// optimizes it, and then translates it to LLVM IR code, in place. @@ -178,13 +186,16 @@ pub fn optimize( chunk_size: size_t, restricted_instrs: *const LLVMValueRef, restricted_size: size_t, + load_info: *const load_info_t, + load_info_size: size_t, run_egg: bool, print_opt: bool, -) -> () { +) -> bool { unsafe { // preprocessing of instructions let chunk_llvm_instrs = from_raw_parts(chunk_instrs, chunk_size); let restricted_llvm_instrs = from_raw_parts(restricted_instrs, restricted_size); + let load_info = from_raw_parts(load_info, load_info_size); // llvm to egg let (egg_expr, llvm2egg_metadata) = @@ -193,7 +204,7 @@ pub fn optimize( // Bail if no egg Nodes to optimize if egg_expr.as_ref().is_empty() { eprintln!("No Egg Nodes in Optimization Vector"); - return; + return false; } // optimization pass @@ -218,6 +229,8 @@ pub fn optimize( builder, run_egg, ); + + return true; } } From 24584f875d958ce787b56efcddffbea27ebd5d6d Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Mon, 8 May 2023 21:11:25 -0400 Subject: [PATCH 117/143] get aligned generation to start working qr decomp and simple tests' --- src/dios-egraphs/Diospyros/src/lib.rs | 90 ++++++++++++++++------- src/dios-egraphs/src/cost.rs | 1 + src/dios-egraphs/src/rules.rs | 100 +++++++++++++++++++++++++- src/dios-egraphs/src/veclang.rs | 7 +- 4 files changed, 172 insertions(+), 26 deletions(-) diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 352d937b..44340ab5 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -168,6 +168,7 @@ pub fn calculate_cost() -> u32 { /// Struct representing load info, same as on C++ side #[repr(C)] +#[derive(Debug, Clone)] pub struct load_info_t { pub load: LLVMValueRef, pub base_id: i32, @@ -198,8 +199,12 @@ pub fn optimize( let load_info = from_raw_parts(load_info, load_info_size); // llvm to egg - let (egg_expr, llvm2egg_metadata) = - llvm_to_egg_main(chunk_llvm_instrs, restricted_llvm_instrs, run_egg); + let (egg_expr, llvm2egg_metadata) = llvm_to_egg_main( + chunk_llvm_instrs, + restricted_llvm_instrs, + run_egg, + load_info, + ); // Bail if no egg Nodes to optimize if egg_expr.as_ref().is_empty() { @@ -444,6 +449,7 @@ struct LLVM2EggState { prior_translated_instructions: BTreeSet, start_instructions: Vec, start_ids: Vec, + load_info: BTreeMap, } /// Translates LLVM Arg to an Egg Argument Node @@ -535,6 +541,11 @@ unsafe fn sqrt32_to_egg( (new_enode_vec, new_next_node_idx + 1) } +/// Grab the associated index of load in the load_info vector, otherwise u32::max +unsafe fn get_load_idx(load: LLVMValueRef, load_info: &[load_info_t]) -> u32 { + return u32::MAX; +} + /// Translates a Load to an Egg Get Node /// /// The translation of a load is a Get Node, which can then possibly be vectorized @@ -556,11 +567,24 @@ unsafe fn load_to_egg( // assert!(isa_gep(llvm_gep_instr) || isa_argument(llvm_gep_instr)); translation_metadata.get2gep.insert(gep_id, llvm_gep_instr); - let load_node = VecLang::Load([Id::from(next_node_idx as usize)]); + let result = translation_metadata.load_info.get(&llvm_instr); + let (base_id, offset) = match result { + None => (-1, -1), + Some(n) => *n, + }; + let base_node = VecLang::Num(base_id); + egg_nodes.push(base_node.clone()); + let offset_node = VecLang::Num(offset); + egg_nodes.push(offset_node.clone()); + let load_node = VecLang::Load([ + Id::from(next_node_idx as usize), + Id::from((next_node_idx + 1) as usize), + Id::from((next_node_idx + 2) as usize), + ]); egg_nodes.push(load_node.clone()); assert!(!translation_metadata.llvm2reg.contains_key(&llvm_instr)); translation_metadata.llvm2reg.insert(llvm_instr, load_node); - (egg_nodes, next_node_idx + 2) + (egg_nodes, next_node_idx + 4) } unsafe fn store_to_egg( @@ -720,6 +744,7 @@ unsafe fn llvm_to_egg_main( llvm_instrs_in_chunk: &[LLVMValueRef], restricted_instrs: &[LLVMValueRef], vectorize: bool, + load_info: &[load_info_t], // TODO: feed this in as an argument llvm_instr2egg_node: BTreeMap, ) -> (RecExpr, LLVM2EggState) { let mut egg_nodes: Vec = Vec::new(); @@ -750,6 +775,12 @@ unsafe fn llvm_to_egg_main( restricted_instrs_set.insert(*llvm_instr); } + // Load Info map + let mut load_info_map: BTreeMap = BTreeMap::new(); + for triple in load_info.iter() { + load_info_map.insert(triple.load, (triple.base_id, triple.offset)); + } + // Invariant: every restricted instruction is in the chunk, using a pointer check for restr_instr in restricted_instrs.iter() { let mut found_match = false; @@ -778,6 +809,7 @@ unsafe fn llvm_to_egg_main( prior_translated_instructions: prior_translated_instructions, start_instructions: start_instructions, start_ids: start_ids, + load_info: load_info_map, }; // Index of next node to translate @@ -876,6 +908,32 @@ unsafe fn store_to_llvm(val_id: &Id, gep_id: &Id, md: &mut Egg2LLVMState) -> LLV panic!("Store2LLVM: Expected a successful lookup in get2gep, but cannot find Gep ID."); } +unsafe fn aligned_consec_loadvec_to_llvm(gep1_id: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { + // New code to handle an aligned and consecutive vector load + let gep1_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); + let gep1_llvm_instr = md + .llvm2egg_metadata + .get2gep + .get(&gep1_id_val) + .expect("Value of gep1 id should exist in get2gep"); + let address_space = LLVMGetPointerAddressSpace(LLVMTypeOf(*gep1_llvm_instr)); + let bitcase_scalar_to_vector_type = LLVMBuildBitCast( + md.builder, + *gep1_llvm_instr, + LLVMPointerType( + LLVMVectorType(LLVMFloatTypeInContext(md.context), 4), + address_space, + ), + b"scalar-to-vector-type-bit-cast\0".as_ptr() as *const _, + ); + let load = LLVMBuildLoad( + md.builder, + bitcase_scalar_to_vector_type, + b"vector-load\0".as_ptr() as *const _, + ); + return load; +} + unsafe fn loadvec_to_llvm( gep1_id: &Id, gep2_id: &Id, @@ -910,24 +968,6 @@ unsafe fn loadvec_to_llvm( .get(&gep4_id_val) .expect("Value of gep4 id should exist in get2gep"); - // New code to handle a vector load - // let address_space = LLVMGetPointerAddressSpace(LLVMTypeOf(*gep1_llvm_instr)); - // let bitcase_scalar_to_vector_type = LLVMBuildBitCast( - // md.builder, - // *gep1_llvm_instr, - // LLVMPointerType( - // LLVMVectorType(LLVMFloatTypeInContext(md.context), 4), - // address_space, - // ), - // b"scalar-to-vector-type-bit-cast\0".as_ptr() as *const _, - // ); - // let load = LLVMBuildLoad( - // md.builder, - // bitcase_scalar_to_vector_type, - // b"vector-load\0".as_ptr() as *const _, - // ); - // return load; - let vector_width = 4; let floatptr_type = LLVMTypeOf(*gep1_llvm_instr); let vec4ptr_type = LLVMVectorType(floatptr_type, vector_width); @@ -1558,7 +1598,7 @@ unsafe fn egg_to_llvm( VecLang::Gep(..) => { panic!("Gep was found. Egg to LLVM Translation does not handle gep nodes.") } - VecLang::Load([gep_id]) => { + VecLang::Load([gep_id, _, _]) => { load_to_llvm(gep_id, translation_metadata) } VecLang::Store([val_id, gep_id]) => { @@ -1602,6 +1642,7 @@ unsafe fn egg_to_llvm( VecLang::Sgn([n]) | VecLang::Sqrt([n]) | VecLang::Neg([n]) => scalar_unop_to_llvm(n, egg_node, translation_metadata), VecLang::VecLoad([gep1_id, gep2_id, gep3_id, gep4_id]) => loadvec_to_llvm(gep1_id, gep2_id, gep3_id, gep4_id, translation_metadata), VecLang::VecStore([val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id]) => storevec_to_llvm(val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id, translation_metadata), + VecLang::AlignedConsecVecLoad([gep_id]) => aligned_consec_loadvec_to_llvm(gep_id, translation_metadata), } } @@ -1806,7 +1847,7 @@ unsafe fn canonicalize_egg( panic!("Get was found. Egg canonicalization does not handle get nodes.") } VecLang::Gep(g) => vec![VecLang::Gep(*g)], - VecLang::Load([gep_id]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::Load(single)}, gep_id, old_egg_nodes ), + VecLang::Load([gep_id, base_id, offset]) => canonicalize_triple(can_change_vector,|triple| -> VecLang {VecLang::Load(triple)}, gep_id, base_id, offset, old_egg_nodes ), VecLang::Store([val_id, gep_id]) => canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::Store(pair)}, val_id, gep_id, old_egg_nodes), VecLang::Set(..) => { panic!("Set was found. Egg canonicalization does not handle set nodes.") @@ -1847,6 +1888,7 @@ unsafe fn canonicalize_egg( VecLang::Neg([n]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::Neg(single)}, n, old_egg_nodes ), VecLang::VecLoad([gep1_id, gep2_id, gep3_id, gep4_id]) => canonicalize_quadruple(can_change_vector,|quad| -> VecLang {VecLang::VecLoad(quad)}, gep1_id, gep2_id, gep3_id, gep4_id, old_egg_nodes ), VecLang::VecStore([val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id]) => canonicalize_quintuple(can_change_vector,|quint| -> VecLang {VecLang::VecStore(quint)}, val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id, old_egg_nodes ), + VecLang::AlignedConsecVecLoad([get_id]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::AlignedConsecVecLoad(single)}, get_id, old_egg_nodes ), } } diff --git a/src/dios-egraphs/src/cost.rs b/src/dios-egraphs/src/cost.rs index 08ee28e4..4c0bd9dc 100644 --- a/src/dios-egraphs/src/cost.rs +++ b/src/dios-egraphs/src/cost.rs @@ -28,6 +28,7 @@ impl CostFunction for VecCostFn<'_> { // Vectorized Memory Accesses are cheaper than individual memory loads and stores // Note: This assumes that masked-gathers or masked-scattters to vectors or memory // are implemented on the target, and are cheap, according to the LLVM cost model + VecLang::AlignedConsecVecLoad(..) => VECTORIZED_MEMORY_ACCESS, VecLang::VecLoad(..) => VECTORIZED_MEMORY_ACCESS, VecLang::VecStore(..) => VECTORIZED_MEMORY_ACCESS, diff --git a/src/dios-egraphs/src/rules.rs b/src/dios-egraphs/src/rules.rs index 821c9fb6..f046a326 100644 --- a/src/dios-egraphs/src/rules.rs +++ b/src/dios-egraphs/src/rules.rs @@ -126,6 +126,102 @@ pub fn build_litvec_rule() -> Rewrite { if is_all_same_memory_or_zero(&mem_vars)) } +// This returns a function that implements Condition +fn is_true( + var1: &'static str, + var2: &'static str, + var3: &'static str, + var4: &'static str, + var5: &'static str, + var6: &'static str, + var7: &'static str, + var8: &'static str, +) -> impl Fn(&mut EGraph, Id, &Subst) -> bool { + let var1: Var = var1.parse().unwrap(); + let var2: Var = var2.parse().unwrap(); + let var3: Var = var3.parse().unwrap(); + let var4: Var = var4.parse().unwrap(); + let var5: Var = var5.parse().unwrap(); + let var6: Var = var6.parse().unwrap(); + let var7: Var = var7.parse().unwrap(); + let var8: Var = var8.parse().unwrap(); + move |egraph, _, subst| unsafe { + let mut first_base = -10; + for e in egraph[subst[var1]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + first_base = n; + } + } + assert!(first_base != -10); + let mut second_base = -10; + for e in egraph[subst[var2]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + second_base = n; + } + } + assert!(second_base != -10); + let mut third_base = -10; + for e in egraph[subst[var3]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + third_base = n; + } + } + assert!(third_base != -10); + let mut fourth_base = -10; + for e in egraph[subst[var4]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + fourth_base = n; + } + } + assert!(fourth_base != -10); + + if !(first_base == second_base && first_base == third_base && first_base == fourth_base) { + return false; + } + + let mut first_offset = -10; + for e in egraph[subst[var5]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + first_offset = n; + } + } + assert!(first_offset != -10); + let mut second_offset = -10; + for e in egraph[subst[var6]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + second_offset = n; + } + } + assert!(second_offset != -10); + let mut third_offset = -10; + for e in egraph[subst[var7]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + third_offset = n; + } + } + assert!(third_offset != -10); + let mut fourth_offset = -10; + for e in egraph[subst[var8]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + fourth_offset = n; + } + } + assert!(fourth_offset != -10); + + if !(first_offset + 1 == second_offset + && first_offset + 2 == third_offset + && first_offset + 3 == fourth_offset) + { + return false; + } + if !(first_offset % 4 == 0) { + return false; + } + + return true; + } +} + pub fn rules(no_ac: bool, no_vec: bool) -> Vec> { let mut rules: Vec> = vec![ rw!("add-0"; "(+ 0 ?a)" => "?a"), @@ -158,8 +254,10 @@ pub fn rules(no_ac: bool, no_vec: bool) -> Vec> { // Vector rules if !no_vec { rules.extend(vec![ + // Aligned Consec Load rule + rw!("vec-load-aligned-consec"; "(Vec (Load ?a0 ?b0 ?o0) (Load ?a1 ?b1 ?o1) (Load ?a2 ?b2 ?o2) (Load ?a3 ?b3 ?o3))" => "(AlignedConsecVecLoad ?a0)" if is_true("?b0", "?b1", "?b2", "?b3", "?o0", "?o1", "?o2", "?o3")), // Load load fusion rule - rw!("vec-load-Loads"; "(Vec (Load ?a0) (Load ?a1) (Load ?a2) (Load ?a3))" => "(VecLoad ?a0 ?a1 ?a2 ?a3)"), + rw!("vec-load-Loads"; "(Vec (Load ?a0 ?b0 ?o0) (Load ?a1 ?b1 ?o1) (Load ?a2 ?b2 ?o2) (Load ?a3 ?b3 ?o3))" => "(VecLoad ?a0 ?a1 ?a2 ?a3)"), // Set store fusion rule rw!("vec-store-sets"; "(Vec (Store ?a0 ?b0) (Store ?a1 ?b1) (Store ?a2 ?b2) (Store ?a3 ?b3))" => "(VecStore (Vec ?a0 ?a1 ?a2 ?a3) ?b0 ?b1 ?b2 ?b3)"), // Special MAC fusion rule diff --git a/src/dios-egraphs/src/veclang.rs b/src/dios-egraphs/src/veclang.rs index 6955bda7..4a86e797 100644 --- a/src/dios-egraphs/src/veclang.rs +++ b/src/dios-egraphs/src/veclang.rs @@ -13,7 +13,10 @@ define_language! { Gep(u32), // Load is a read of memory - "Load" = Load([Id; 1]), + // The FIRST subelement is the ID of the parent of this load + // The SECOND subelement is a ID of the base of the array where the load occurs + // The THIRD subelement is the offset from the base of the array. Offsets are in number of floats away from base. + "Load" = Load([Id; 3]), // Store is a write to memory "Store" = Store([Id; 2]), @@ -71,6 +74,8 @@ define_language! { "VecLoad" = VecLoad([Id; 4]), + "AlignedConsecVecLoad" = AlignedConsecVecLoad([Id; 1]), + "VecStore" = VecStore([Id; 5]), // Info specific to register From 59bb456eb4e94da74d53f4c0790f99e36207668b Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Tue, 9 May 2023 17:30:03 -0400 Subject: [PATCH 118/143] add and test load permute --- src/dios-egraphs/Diospyros/c-tests/permuted.c | 27 +++ src/dios-egraphs/Diospyros/src/lib.rs | 2 + src/dios-egraphs/src/alignconsecsearcher.rs | 160 +++++++++++++++++- src/dios-egraphs/src/cost.rs | 6 +- src/dios-egraphs/src/lib.rs | 1 + src/dios-egraphs/src/rules.rs | 29 +++- src/dios-egraphs/src/veclang.rs | 2 + 7 files changed, 220 insertions(+), 7 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/c-tests/permuted.c diff --git a/src/dios-egraphs/Diospyros/c-tests/permuted.c b/src/dios-egraphs/Diospyros/c-tests/permuted.c new file mode 100644 index 00000000..ca9f879e --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/permuted.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 4 + +void permuted(float a_in[restrict SIZE], float b_in[restrict SIZE], + float c_out[restrict SIZE]) { + c_out[1] = a_in[2] + b_in[1]; + c_out[0] = a_in[1] + b_in[0]; + c_out[3] = a_in[3] + b_in[2]; + c_out[2] = a_in[0] + b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + permuted(a_in, b_in, c_out); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + assert(c_out[0] == 7); + assert(c_out[1] == 9); + assert(c_out[2] == 9); + assert(c_out[3] == 11); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 44340ab5..4465f51f 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -1643,6 +1643,7 @@ unsafe fn egg_to_llvm( VecLang::VecLoad([gep1_id, gep2_id, gep3_id, gep4_id]) => loadvec_to_llvm(gep1_id, gep2_id, gep3_id, gep4_id, translation_metadata), VecLang::VecStore([val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id]) => storevec_to_llvm(val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id, translation_metadata), VecLang::AlignedConsecVecLoad([gep_id]) => aligned_consec_loadvec_to_llvm(gep_id, translation_metadata), + VecLang::Shuffle(..) => panic!("Shuffle to LLVM Unimplemented"), } } @@ -1889,6 +1890,7 @@ unsafe fn canonicalize_egg( VecLang::VecLoad([gep1_id, gep2_id, gep3_id, gep4_id]) => canonicalize_quadruple(can_change_vector,|quad| -> VecLang {VecLang::VecLoad(quad)}, gep1_id, gep2_id, gep3_id, gep4_id, old_egg_nodes ), VecLang::VecStore([val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id]) => canonicalize_quintuple(can_change_vector,|quint| -> VecLang {VecLang::VecStore(quint)}, val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id, old_egg_nodes ), VecLang::AlignedConsecVecLoad([get_id]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::AlignedConsecVecLoad(single)}, get_id, old_egg_nodes ), + VecLang::Shuffle(..) => panic!("Shuffle Normalization Unimplemented"), } } diff --git a/src/dios-egraphs/src/alignconsecsearcher.rs b/src/dios-egraphs/src/alignconsecsearcher.rs index dd53b7fd..babefa3b 100644 --- a/src/dios-egraphs/src/alignconsecsearcher.rs +++ b/src/dios-egraphs/src/alignconsecsearcher.rs @@ -1,3 +1,157 @@ -/// Search for sequences of Loads and Stores that are Aligned and Consecutive -/// -/// This helps LLVM generate more efficient code for non-DSP processors \ No newline at end of file +use crate::veclang::VecLang; +use egg::*; + +/// Search for permutations of sequences of Loads and Stores that are Aligned and Consecutive +/// +/// This module creates an Applier, which attempts to find successful permutations of loads ands stores to be aligned and consecutive + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PermuteLoad { + pub a0: Var, + pub a1: Var, + pub a2: Var, + pub a3: Var, + pub b0: Var, + pub b1: Var, + pub b2: Var, + pub b3: Var, + pub o0: Var, + pub o1: Var, + pub o2: Var, + pub o3: Var, +} + +impl> Applier for PermuteLoad { + /// We are going to look for permutations of the four offsets that could + /// allow for consecutive and aligned loading to occur with a shuffle operation + fn apply_one(&self, egraph: &mut EGraph, matched_id: Id, subst: &Subst) -> Vec { + let mut first_base = -10; + for e in egraph[subst[self.b0]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + first_base = n; + } + } + assert!(first_base != -10); + let mut second_base = -10; + for e in egraph[subst[self.b1]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + second_base = n; + } + } + assert!(second_base != -10); + let mut third_base = -10; + for e in egraph[subst[self.b2]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + third_base = n; + } + } + assert!(third_base != -10); + let mut fourth_base = -10; + for e in egraph[subst[self.b3]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + fourth_base = n; + } + } + assert!(fourth_base != -10); + + if !(first_base == second_base + && first_base == third_base + && first_base == fourth_base + && first_base >= 0) + { + return vec![]; + } + + let mut first_offset = -10; + for e in egraph[subst[self.o0]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + first_offset = n; + } + } + assert!(first_offset != -10); + let mut second_offset = -10; + for e in egraph[subst[self.o1]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + second_offset = n; + } + } + assert!(second_offset != -10); + let mut third_offset = -10; + for e in egraph[subst[self.o2]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + third_offset = n; + } + } + assert!(third_offset != -10); + let mut fourth_offset = -10; + for e in egraph[subst[self.o3]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + fourth_offset = n; + } + } + assert!(fourth_offset != -10); + + let off0_id: Id = subst[self.o0]; + let off1_id: Id = subst[self.o1]; + let off2_id: Id = subst[self.o2]; + let off3_id: Id = subst[self.o3]; + let base0_id: Id = subst[self.a0]; + let base1_id: Id = subst[self.a1]; + let base2_id: Id = subst[self.a2]; + let base3_id: Id = subst[self.a3]; + + // deduplicate + let mut undedup_offsets = vec![first_offset, second_offset, third_offset, fourth_offset]; + undedup_offsets.dedup(); + if undedup_offsets.len() < 4 { + return vec![]; + } + + let mut offsets: Vec<(i32, Id, Id)> = Vec::new(); + offsets.push((first_offset, off0_id, base0_id)); + offsets.push((second_offset, off1_id, base1_id)); + offsets.push((third_offset, off2_id, base2_id)); + offsets.push((fourth_offset, off3_id, base3_id)); + offsets.sort_by(|o1, o2| o1.0.partial_cmp(&o2.0).unwrap()); + + if offsets[0].0 % 4 != 0 { + return vec![]; + } + + if !(offsets[0].0 + 1 == offsets[1].0 + && offsets[0].0 + 2 == offsets[2].0 + && offsets[0].0 + 3 == offsets[3].0) + { + return vec![]; + } + + let mut shuffle_vec: Vec = Vec::new(); + let offset_ids_vec: Vec = vec![off0_id, off1_id, off2_id, off3_id]; + for off_id in offset_ids_vec { + for (i, (_, other_off_id, _)) in offsets.iter().enumerate() { + if off_id == *other_off_id { + shuffle_vec.push(i as u32); + } + } + } + // the identity permutation does not count, as it gets handled elsewhere + if shuffle_vec == vec![0, 1, 2, 3] { + return vec![]; + } + + let mut shuffle_ids_vec: Vec = Vec::new(); + for elt in shuffle_vec { + let new_shuf_id = egraph.add(VecLang::Num(elt as i32)); + shuffle_ids_vec.push(new_shuf_id); + } + let (_, _, first_base_id) = offsets[0]; + let aligned_consec_load_vec = egraph.add(VecLang::AlignedConsecVecLoad([first_base_id])); + let shuffle_shuf_arg = egraph.add(VecLang::Vec(shuffle_ids_vec.into_boxed_slice())); + let shuffle_vec_op = egraph.add(VecLang::Shuffle([ + aligned_consec_load_vec, + shuffle_shuf_arg, + ])); + + vec![shuffle_vec_op] + } +} diff --git a/src/dios-egraphs/src/cost.rs b/src/dios-egraphs/src/cost.rs index 4c0bd9dc..5ce004f4 100644 --- a/src/dios-egraphs/src/cost.rs +++ b/src/dios-egraphs/src/cost.rs @@ -15,6 +15,7 @@ impl CostFunction for VecCostFn<'_> { C: FnMut(Id) -> Self::Cost, { const NO_OPTIMIZATION: f64 = 0.0; + const ALIGNED_CONSEC_VECTORIZED_MEMORY_ACCESS: f64 = -2.0; const VECTORIZED_MEMORY_ACCESS: f64 = 0.0001; const LITERAL: f64 = 0.001; const STRUCTURE: f64 = 0.1; @@ -28,9 +29,9 @@ impl CostFunction for VecCostFn<'_> { // Vectorized Memory Accesses are cheaper than individual memory loads and stores // Note: This assumes that masked-gathers or masked-scattters to vectors or memory // are implemented on the target, and are cheap, according to the LLVM cost model - VecLang::AlignedConsecVecLoad(..) => VECTORIZED_MEMORY_ACCESS, + VecLang::AlignedConsecVecLoad(..) => ALIGNED_CONSEC_VECTORIZED_MEMORY_ACCESS, VecLang::VecLoad(..) => VECTORIZED_MEMORY_ACCESS, - VecLang::VecStore(..) => VECTORIZED_MEMORY_ACCESS, + VecLang::VecStore(..) => ALIGNED_CONSEC_VECTORIZED_MEMORY_ACCESS, // You get literals for extremely cheap VecLang::Num(..) => LITERAL, @@ -78,6 +79,7 @@ impl CostFunction for VecCostFn<'_> { VecLang::VecNeg(..) => VEC_OP, VecLang::VecSqrt(..) => VEC_OP, VecLang::VecSgn(..) => VEC_OP, + VecLang::Shuffle(..) => VEC_OP, _ => VEC_OP, }; enode.fold(op_cost, |sum, id| sum + costs(id)) diff --git a/src/dios-egraphs/src/lib.rs b/src/dios-egraphs/src/lib.rs index 95b589d0..99887dc2 100644 --- a/src/dios-egraphs/src/lib.rs +++ b/src/dios-egraphs/src/lib.rs @@ -1,3 +1,4 @@ +pub mod alignconsecsearcher; pub mod binopsearcher; pub mod config; pub mod cost; diff --git a/src/dios-egraphs/src/rules.rs b/src/dios-egraphs/src/rules.rs index f046a326..4c7cc3bc 100644 --- a/src/dios-egraphs/src/rules.rs +++ b/src/dios-egraphs/src/rules.rs @@ -3,6 +3,7 @@ use egg::{rewrite as rw, *}; use itertools::Itertools; use crate::{ + alignconsecsearcher::*, binopsearcher::build_binop_or_zero_rule, config::*, cost::VecCostFn, @@ -127,7 +128,7 @@ pub fn build_litvec_rule() -> Rewrite { } // This returns a function that implements Condition -fn is_true( +fn memory_is_aligned_and_consec( var1: &'static str, var2: &'static str, var3: &'static str, @@ -255,7 +256,7 @@ pub fn rules(no_ac: bool, no_vec: bool) -> Vec> { if !no_vec { rules.extend(vec![ // Aligned Consec Load rule - rw!("vec-load-aligned-consec"; "(Vec (Load ?a0 ?b0 ?o0) (Load ?a1 ?b1 ?o1) (Load ?a2 ?b2 ?o2) (Load ?a3 ?b3 ?o3))" => "(AlignedConsecVecLoad ?a0)" if is_true("?b0", "?b1", "?b2", "?b3", "?o0", "?o1", "?o2", "?o3")), + rw!("vec-load-aligned-consec"; "(Vec (Load ?a0 ?b0 ?o0) (Load ?a1 ?b1 ?o1) (Load ?a2 ?b2 ?o2) (Load ?a3 ?b3 ?o3))" => "(AlignedConsecVecLoad ?a0)" if memory_is_aligned_and_consec("?b0", "?b1", "?b2", "?b3", "?o0", "?o1", "?o2", "?o3")), // Load load fusion rule rw!("vec-load-Loads"; "(Vec (Load ?a0 ?b0 ?o0) (Load ?a1 ?b1 ?o1) (Load ?a2 ?b2 ?o2) (Load ?a3 ?b3 ?o3))" => "(VecLoad ?a0 ?a1 ?a2 ?a3)"), // Set store fusion rule @@ -287,5 +288,29 @@ pub fn rules(no_ac: bool, no_vec: bool) -> Vec> { rw!("assoc-mul"; "(* (* ?a ?b) ?c)" => "(* ?a (* ?b ?c))"), ]); } + + // Data Movement Rules + // shuffle rules + rules.extend(vec![ + // Basic associativity/commutativity/identities + // rw!("shuffle-op"; "(+ ?a ?b)" => "(+ ?b ?a)"), + rw!("shuffle-load-vec"; "(Vec (Load ?a0 ?b0 ?o0) (Load ?a1 ?b1 ?o1) (Load ?a2 ?b2 ?o2) (Load ?a3 ?b3 ?o3))" => { PermuteLoad { + a0: "?a0".parse().unwrap(), + a1: "?a1".parse().unwrap(), + a2: "?a2".parse().unwrap(), + a3: "?a3".parse().unwrap(), + b0: "?b0".parse().unwrap(), + b1: "?b1".parse().unwrap(), + b2: "?b2".parse().unwrap(), + b3: "?b3".parse().unwrap(), + o0: "?o0".parse().unwrap(), + o1: "?o1".parse().unwrap(), + o2: "?o2".parse().unwrap(), + o3: "?o3".parse().unwrap(), + }}), + ]); + + // split vec rules + rules } diff --git a/src/dios-egraphs/src/veclang.rs b/src/dios-egraphs/src/veclang.rs index 4a86e797..94e8730b 100644 --- a/src/dios-egraphs/src/veclang.rs +++ b/src/dios-egraphs/src/veclang.rs @@ -78,6 +78,8 @@ define_language! { "VecStore" = VecStore([Id; 5]), + "Shuffle" = Shuffle([Id; 2]), + // Info specific to register // RegInfo(egg::Symbol), From 730aa01d3216e7005c6c8c7a20d07faa189f10bf Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Tue, 9 May 2023 18:09:50 -0400 Subject: [PATCH 119/143] add in llvm shuffle op --- src/dios-egraphs/Diospyros/src/lib.rs | 59 +++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 4465f51f..8773a60f 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -1,5 +1,9 @@ extern crate llvm_sys as llvm; -use dioslib::{config, rules, veclang::VecLang}; +use dioslib::{ + config::{self, vector_width}, + rules, + veclang::VecLang, +}; use egg::*; use libc::size_t; use llvm::{core::*, prelude::*, LLVMOpcode::*, LLVMRealPredicate}; @@ -1557,6 +1561,55 @@ unsafe fn vecsgn_to_llvm(vec: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { LLVMBuildCall(md.builder, func, args, 2, b"\0".as_ptr() as *const _) } +unsafe fn get_shuf_vec_data(shuf_vec_id: &Id, md: &mut Egg2LLVMState) -> Vec { + let mut results_vec = Vec::new(); + match &md.egg_nodes_vector[usize::from(*shuf_vec_id)] { + VecLang::Vec(boxed_ids) => { + let idvec = boxed_ids.to_vec(); + let idvec_len = idvec.len(); + for (idx, &eggid) in idvec.iter().enumerate() { + match &md.egg_nodes_vector[usize::from(eggid)] { + VecLang::Num(n) => results_vec.push(*n), + _ => panic!("Each element of a shuf vec needs to be a num"), + } + } + } + _ => panic!("Shuf Vec Id should point to a vector of numbers"), + } + return results_vec; +} + +/** + * Shuffle Node to an LLVM Shuffle Op + */ +unsafe fn shuffle_to_llvm( + data_vec_id: &Id, + shuf_vec_id: &Id, + md: &mut Egg2LLVMState, +) -> LLVMValueRef { + let data_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*data_vec_id)], md); + let shuf_data = get_shuf_vec_data(shuf_vec_id, md); + + // Build up shuf mask + let mut mask = Vec::new(); + for val in shuf_data { + mask.push(LLVMConstInt( + LLVMInt32TypeInContext(md.context), + val as u64, + 0 as i32, + )); + } + + let shuf_mask = LLVMConstVector(mask.as_mut_ptr(), vector_width() as u32); + LLVMBuildShuffleVector( + md.builder, + data_vec, + data_vec, + shuf_mask, + b"\0".as_ptr() as *const _, + ) +} + /** * Vector representing No Optimization: Egg will not have modified the vector at all. */ @@ -1643,7 +1696,7 @@ unsafe fn egg_to_llvm( VecLang::VecLoad([gep1_id, gep2_id, gep3_id, gep4_id]) => loadvec_to_llvm(gep1_id, gep2_id, gep3_id, gep4_id, translation_metadata), VecLang::VecStore([val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id]) => storevec_to_llvm(val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id, translation_metadata), VecLang::AlignedConsecVecLoad([gep_id]) => aligned_consec_loadvec_to_llvm(gep_id, translation_metadata), - VecLang::Shuffle(..) => panic!("Shuffle to LLVM Unimplemented"), + VecLang::Shuffle([data_vec_id, shuf_vec_id]) => shuffle_to_llvm(data_vec_id, shuf_vec_id, translation_metadata), } } @@ -1890,7 +1943,7 @@ unsafe fn canonicalize_egg( VecLang::VecLoad([gep1_id, gep2_id, gep3_id, gep4_id]) => canonicalize_quadruple(can_change_vector,|quad| -> VecLang {VecLang::VecLoad(quad)}, gep1_id, gep2_id, gep3_id, gep4_id, old_egg_nodes ), VecLang::VecStore([val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id]) => canonicalize_quintuple(can_change_vector,|quint| -> VecLang {VecLang::VecStore(quint)}, val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id, old_egg_nodes ), VecLang::AlignedConsecVecLoad([get_id]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::AlignedConsecVecLoad(single)}, get_id, old_egg_nodes ), - VecLang::Shuffle(..) => panic!("Shuffle Normalization Unimplemented"), + VecLang::Shuffle([data_vec_id, shuf_vec_id]) => canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::Shuffle(pair)}, data_vec_id, shuf_vec_id, old_egg_nodes), } } From fe0893ff8f147b374552bb6ab01de9ddc3fbfdf8 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Mon, 12 Jun 2023 21:43:40 -0400 Subject: [PATCH 120/143] add more benchmarks --- src/dios-egraphs/Diospyros/Makefile | 8 +- .../Diospyros/benchmarks/optimized/conv-2d.c | 89 ++++++ .../Diospyros/benchmarks/optimized/mat-mul.c | 13 +- .../Diospyros/benchmarks/optimized/qprod.c | 117 +++++++ .../benchmarks/optimized/qr-decomp.c | 292 ++++++++++++++++++ .../benchmarks/optimized/stencil-2d.c | 71 +++++ .../Diospyros/c-tests/add-mul-interleave.c | 27 ++ .../Diospyros/plot-utilities/plot.py | 6 + src/dios-egraphs/Diospyros/src/lib.rs | 135 +++++++- 9 files changed, 737 insertions(+), 21 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/benchmarks/optimized/conv-2d.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/optimized/qprod.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/optimized/qr-decomp.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/optimized/stencil-2d.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/add-mul-interleave.c create mode 100644 src/dios-egraphs/Diospyros/plot-utilities/plot.py diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index 20156de3..6eb6d0ec 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -13,20 +13,20 @@ endif .PHONY: target/debug/libllvmlib.$(EXT) run-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - @$(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) + @$(CLANG) -target arm64-apple-macos11 -mmacosx-version-min=11.0 -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) @opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll @opt -S -load=target/debug/libllvmlib.$(EXT) --basic-aa --lsmovement --dse --adce build/opt.ll -o build/aa.ll @opt -S -load=target/debug/libllvmlib.$(EXT) --diospyros -opt -print=true build/aa.ll -o build/diospyros.ll @opt -S --adce --dse --gvn build/diospyros.ll -o build/dce.ll - @$(CLANG) build/dce.ll -o build/final + @$(CLANG) -target arm64-apple-macos11 -mmacosx-version-min=11.0 build/dce.ll -o build/final @build/final run-baseline: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - @$(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) + @$(CLANG) -target arm64-apple-macos11 -mmacosx-version-min=11.0 -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) @opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll @opt -S --slp-vectorizer build/opt.ll -o build/slp.ll @opt -S --adce --dse build/slp.ll -o build/dce.ll - @$(CLANG) -o0 build/dce.ll -o build/final + @$(CLANG) -target arm64-apple-macos11 -mmacosx-version-min=11.0 -o0 build/dce.ll -o build/final @build/final print-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp diff --git a/src/dios-egraphs/Diospyros/benchmarks/optimized/conv-2d.c b/src/dios-egraphs/Diospyros/benchmarks/optimized/conv-2d.c new file mode 100644 index 00000000..a47953a0 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/optimized/conv-2d.c @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include + +#define I_ROWS 5 +#define I_COLS 5 +#define F_ROWS 3 +#define F_COLS 3 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = 0; + } + convolution(mat_in, f_in, mat_out); + // calculate expected + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + expected[outRow * O_COLS + outCol] += v; + } + } + } + } + } + for (int i = 0; i < O_ROWS * O_COLS; i++) { + printf("--------------------------\n"); + printf("calculated: %f\n", mat_out[i]); + printf("expected: %f\n", expected[i]); + printf("difference: %f\n", expected[i] - mat_out[i]); + } + for (int i = 0; i < O_ROWS * O_COLS; i++) { + assert(fabs(expected[i] - mat_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/optimized/mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/optimized/mat-mul.c index 898b75ea..e5437869 100644 --- a/src/dios-egraphs/Diospyros/benchmarks/optimized/mat-mul.c +++ b/src/dios-egraphs/Diospyros/benchmarks/optimized/mat-mul.c @@ -5,11 +5,12 @@ #include #include -#define A_ROWS 12 -#define A_COLS 12 -#define B_COLS 12 +#define A_ROWS 5 +#define A_COLS 5 +#define B_COLS 5 #define MAX_FLOAT 100.00f #define DELTA 0.1f +#define NITER 1000000000 void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], float b_in[restrict A_COLS * B_COLS], @@ -61,7 +62,7 @@ int main(void) { start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; // calculate up c_out - for (int i = 0; i < 10000; i++) { + for (int i = 0; i < NITER; i++) { matrix_multiply(a_in, b_in, c_out); } @@ -71,8 +72,8 @@ int main(void) { // report difference in runtime double diff = difftime(end, start); - printf("%ld milliseconds elapsed over 10000 iterations total\n", - (end - start)); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); return 0; } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/optimized/qprod.c b/src/dios-egraphs/Diospyros/benchmarks/optimized/qprod.c new file mode 100644 index 00000000..403efae3 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/optimized/qprod.c @@ -0,0 +1,117 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 4 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +__attribute__((always_inline)) void naive_cross_product(float *lhs, float *rhs, + float *result) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +__attribute__((always_inline)) void naive_point_product(float *q, float *p, + float *result) { + float qvec[3] = {q[0], q[1], q[2]}; + float uv[3]; + naive_cross_product(qvec, p, uv); + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + float qxuv[3]; + naive_cross_product(qvec, uv, qxuv); + + for (int i = 0; i < 3; i++) { + result[i] = p[i] + q[3] * uv[i] + qxuv[i]; + } +} + +void naive_quaternion_product(float *a_q, float *a_t, float *b_q, float *b_t, + float *r_q, float *r_t) { + r_q[3] = + a_q[3] * b_q[3] - a_q[0] * b_q[0] - a_q[1] * b_q[1] - a_q[2] * b_q[2]; + r_q[0] = + a_q[3] * b_q[0] + a_q[0] * b_q[3] + a_q[1] * b_q[2] - a_q[2] * b_q[1]; + r_q[1] = + a_q[3] * b_q[1] + a_q[1] * b_q[3] + a_q[2] * b_q[0] - a_q[0] * b_q[2]; + r_q[2] = + a_q[3] * b_q[2] + a_q[2] * b_q[3] + a_q[0] * b_q[1] - a_q[1] * b_q[0]; + + naive_point_product(a_q, b_t, r_t); + for (int i = 0; i < 3; i++) { + r_t[i] += a_t[i]; + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float a_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + a_q[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float a_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + a_t[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float b_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + b_q[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float b_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + b_t[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float r_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + r_q[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float r_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + r_t[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expectedq[SIZE]; + for (int i = 0; i < SIZE; i++) { + expectedq[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expectedt[SIZE]; + for (int i = 0; i < SIZE; i++) { + expectedt[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + naive_quaternion_product(a_q, a_t, b_q, b_t, r_q, r_t); + expectedq[3] = + a_q[3] * b_q[3] - a_q[0] * b_q[0] - a_q[1] * b_q[1] - a_q[2] * b_q[2]; + expectedq[0] = + a_q[3] * b_q[0] + a_q[0] * b_q[3] + a_q[1] * b_q[2] - a_q[2] * b_q[1]; + expectedq[1] = + a_q[3] * b_q[1] + a_q[1] * b_q[3] + a_q[2] * b_q[0] - a_q[0] * b_q[2]; + expectedq[2] = + a_q[3] * b_q[2] + a_q[2] * b_q[3] + a_q[0] * b_q[1] - a_q[1] * b_q[0]; + + naive_point_product(a_q, b_t, expectedt); + for (int i = 0; i < 3; i++) { + expectedt[i] += a_t[i]; + } + for (int i = 0; i < SIZE; i++) { + printf("Calculated q: %f\n", r_q[i]); + printf("Expected q: %f\n", expectedq[i]); + assert(fabs(expectedq[i] - r_q[i]) < DELTA); + } + for (int i = 0; i < 3; i++) { + printf("Calculated t: %f\n", r_t[i]); + printf("Expected t: %f\n", expectedt[i]); + assert(fabs(expectedt[i] - r_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/optimized/qr-decomp.c b/src/dios-egraphs/Diospyros/benchmarks/optimized/qr-decomp.c new file mode 100644 index 00000000..f454cdb0 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/optimized/qr-decomp.c @@ -0,0 +1,292 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 4 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f +#define NITER 100000 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float *a) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float *a) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float *a, float *b, float *c) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0.0f; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float *a, float *b, float *c) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0.0f; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float *A, float *Q, float *R) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(m, sizeof(float)); + float *e = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(m, sizeof(float)); + float *v = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(m * m, sizeof(float)); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(SIZE * SIZE, sizeof(float)); + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float *A, float *Q, float *R) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(m, sizeof(float)); + float *e = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(m, sizeof(float)); + float *v = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(m * m, sizeof(float)); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(SIZE * SIZE, sizeof(float)); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float expectedQ[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + naive_fixed_qr_decomp(A, Q, R); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/optimized/stencil-2d.c b/src/dios-egraphs/Diospyros/benchmarks/optimized/stencil-2d.c new file mode 100644 index 00000000..8432ebd7 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/optimized/stencil-2d.c @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include + +#define ROW_SIZE 15 +#define COL_SIZE 16 +#define F_SIZE 9 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + stencil(orig_in, sol_out, filter_in); + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + expected[(r * COL_SIZE) + c] = temp; + } + } + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("calculated: %f\n", sol_out[i]); + printf("expected: %f\n", expected[i]); + printf("difference: %f\n", expected[i] - sol_out[i]); + } + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + assert(fabs(expected[i] - sol_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/add-mul-interleave.c b/src/dios-egraphs/Diospyros/c-tests/add-mul-interleave.c new file mode 100644 index 00000000..f200b105 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/add-mul-interleave.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 4 + +void interleave(float a_in[restrict SIZE], float b_in[restrict SIZE], + float c_out[restrict SIZE]) { + c_out[0] = a_in[0] + b_in[0]; + c_out[1] = a_in[1] * b_in[1]; + c_out[2] = a_in[2] + b_in[2]; + c_out[3] = a_in[3] * b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + interleave(a_in, b_in, c_out); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + assert(c_out[0] == 6); + assert(c_out[1] == 12); + assert(c_out[2] == 10); + assert(c_out[3] == 32); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/plot-utilities/plot.py b/src/dios-egraphs/Diospyros/plot-utilities/plot.py new file mode 100644 index 00000000..5392f961 --- /dev/null +++ b/src/dios-egraphs/Diospyros/plot-utilities/plot.py @@ -0,0 +1,6 @@ +import matplotlib as plt +import sys + + +def plot(): + csv_file = sys.argv[1] diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 8773a60f..0ba591ee 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -912,7 +912,11 @@ unsafe fn store_to_llvm(val_id: &Id, gep_id: &Id, md: &mut Egg2LLVMState) -> LLV panic!("Store2LLVM: Expected a successful lookup in get2gep, but cannot find Gep ID."); } -unsafe fn aligned_consec_loadvec_to_llvm(gep1_id: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { +unsafe fn aligned_consec_loadvec_to_llvm( + gep1_id: &Id, + load_vector_width: u32, + md: &mut Egg2LLVMState, +) -> LLVMValueRef { // New code to handle an aligned and consecutive vector load let gep1_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); let gep1_llvm_instr = md @@ -925,7 +929,7 @@ unsafe fn aligned_consec_loadvec_to_llvm(gep1_id: &Id, md: &mut Egg2LLVMState) - md.builder, *gep1_llvm_instr, LLVMPointerType( - LLVMVectorType(LLVMFloatTypeInContext(md.context), 4), + LLVMVectorType(LLVMFloatTypeInContext(md.context), load_vector_width), address_space, ), b"scalar-to-vector-type-bit-cast\0".as_ptr() as *const _, @@ -943,6 +947,8 @@ unsafe fn loadvec_to_llvm( gep2_id: &Id, gep3_id: &Id, gep4_id: &Id, + base_ids_vec: &Id, + offsets_id_vec: &Id, md: &mut Egg2LLVMState, ) -> LLVMValueRef { // Set Opaque Pointer ness @@ -951,6 +957,12 @@ unsafe fn loadvec_to_llvm( let gep3_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep3_id)], md); let gep4_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep4_id)], md); + let mut base_data = get_shuf_vec_data(base_ids_vec, md); + let mut offsets_data = get_shuf_vec_data(offsets_id_vec, md); + + base_data.dedup(); + offsets_data.dedup(); + let gep1_llvm_instr = md .llvm2egg_metadata .get2gep @@ -972,6 +984,52 @@ unsafe fn loadvec_to_llvm( .get(&gep4_id_val) .expect("Value of gep4 id should exist in get2gep"); + // special case: when all the bases and offsets are the same, do a shuffle, whcih can represent a splat. Splats are fast operations + // Build up shuf mask + if base_data.len() == 1 && offsets_data.len() == 1 { + let mut mask = Vec::new(); + for _ in 0..4 { + mask.push(LLVMConstInt( + LLVMInt32TypeInContext(md.context), + 0 as u64, + 0 as i32, + )); + } + + let single_load = LLVMBuildLoad( + md.builder, + *gep1_llvm_instr, + b"splat-load\0".as_ptr() as *const _, + ); + + let mut zeros = Vec::new(); + for _ in 0..4 { + zeros.push(LLVMConstReal(LLVMFloatTypeInContext(md.context), 0 as f64)); + } + let zeros_ptr = zeros.as_mut_ptr(); + let mut zero_vector = LLVMConstVector(zeros_ptr, 4); + for (idx, llvm_instr) in vec![single_load].iter().enumerate() { + // Construct the Vector + zero_vector = LLVMBuildInsertElement( + md.builder, + zero_vector, + *llvm_instr, + LLVMConstInt(LLVMIntTypeInContext(md.context, 32), idx as u64, 0), + b"\0".as_ptr() as *const _, + ); + } + + let shuf_mask = LLVMConstVector(mask.as_mut_ptr(), vector_width() as u32); + let shuffle_vec = LLVMBuildShuffleVector( + md.builder, + zero_vector, + zero_vector, + shuf_mask, + b"\0".as_ptr() as *const _, + ); + return shuffle_vec; + } + let vector_width = 4; let floatptr_type = LLVMTypeOf(*gep1_llvm_instr); let vec4ptr_type = LLVMVectorType(floatptr_type, vector_width); @@ -1563,18 +1621,21 @@ unsafe fn vecsgn_to_llvm(vec: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { unsafe fn get_shuf_vec_data(shuf_vec_id: &Id, md: &mut Egg2LLVMState) -> Vec { let mut results_vec = Vec::new(); - match &md.egg_nodes_vector[usize::from(*shuf_vec_id)] { - VecLang::Vec(boxed_ids) => { + let match_expr = &md.egg_nodes_vector[usize::from(*shuf_vec_id)]; + match match_expr { + VecLang::DataVec(boxed_ids) => { let idvec = boxed_ids.to_vec(); - let idvec_len = idvec.len(); - for (idx, &eggid) in idvec.iter().enumerate() { + for eggid in idvec { match &md.egg_nodes_vector[usize::from(eggid)] { VecLang::Num(n) => results_vec.push(*n), _ => panic!("Each element of a shuf vec needs to be a num"), } } } - _ => panic!("Shuf Vec Id should point to a vector of numbers"), + _ => { + println!("{:?}", match_expr); + panic!("Shuf Vec Id should point to a vector of numbers") + } } return results_vec; } @@ -1610,6 +1671,34 @@ unsafe fn shuffle_to_llvm( ) } +unsafe fn join_to_llvm( + left_vec_id: &Id, + right_vec_id: &Id, + md: &mut Egg2LLVMState, +) -> LLVMValueRef { + let left_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_vec_id)], md); + let right_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_vec_id)], md); + + // Build up shuf mask + let mut mask = Vec::new(); + for i in 0..4 { + mask.push(LLVMConstInt( + LLVMInt32TypeInContext(md.context), + i as u64, + 0 as i32, + )); + } + let shuf_mask = LLVMConstVector(mask.as_mut_ptr(), vector_width() as u32); + + LLVMBuildShuffleVector( + md.builder, + left_vec, + right_vec, + shuf_mask, + b"\0".as_ptr() as *const _, + ) +} + /** * Vector representing No Optimization: Egg will not have modified the vector at all. */ @@ -1672,6 +1761,7 @@ unsafe fn egg_to_llvm( VecLang::LitVec(boxed_ids) | VecLang::Vec(boxed_ids) | VecLang::List(boxed_ids) => { vec_to_llvm(&*boxed_ids, translation_metadata) } + VecLang::DataVec(boxed_ids) => panic!("Cannot translate a datavec"), VecLang::NoOptVec(boxed_ids) => nooptvector_to_llvm(boxed_ids, translation_metadata), VecLang::VecAdd([l, r]) | VecLang::VecMinus([l, r]) @@ -1693,10 +1783,13 @@ unsafe fn egg_to_llvm( // VecSgn compliant with c++ LibMath copysign function, which differs with sgn at x = 0. VecLang::VecSgn([v]) => vecsgn_to_llvm(v, translation_metadata), VecLang::Sgn([n]) | VecLang::Sqrt([n]) | VecLang::Neg([n]) => scalar_unop_to_llvm(n, egg_node, translation_metadata), - VecLang::VecLoad([gep1_id, gep2_id, gep3_id, gep4_id]) => loadvec_to_llvm(gep1_id, gep2_id, gep3_id, gep4_id, translation_metadata), + VecLang::VecLoad([gep1_id, gep2_id, gep3_id, gep4_id, base_ids_vec, offsets_id_vec]) => loadvec_to_llvm(gep1_id, gep2_id, gep3_id, gep4_id, base_ids_vec, offsets_id_vec, translation_metadata), VecLang::VecStore([val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id]) => storevec_to_llvm(val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id, translation_metadata), - VecLang::AlignedConsecVecLoad([gep_id]) => aligned_consec_loadvec_to_llvm(gep_id, translation_metadata), + VecLang::AlignedConsecVecLoad([gep_id]) => aligned_consec_loadvec_to_llvm(gep_id, 4, translation_metadata), VecLang::Shuffle([data_vec_id, shuf_vec_id]) => shuffle_to_llvm(data_vec_id, shuf_vec_id, translation_metadata), + VecLang::Join([left, right]) => join_to_llvm(left, right, translation_metadata), + VecLang::VecTwo(boxed_ids) => vec_to_llvm(&*boxed_ids, translation_metadata), + VecLang::AlignedConsecVecLoad2([gep_id]) => aligned_consec_loadvec_to_llvm(gep_id, 2, translation_metadata), } } @@ -1864,6 +1957,22 @@ unsafe fn canonicalize_quintuple( whole_vector } +unsafe fn canoncalize_ntuple( + can_change_vector: bool, + vector_elements: &[Id], + final_element: VecLang, + old_egg_nodes: &[VecLang], +) -> Vec { + let mut whole_vector = Vec::new(); + for vec_elt in vector_elements { + let mut trans_val = + canonicalize_egg(false, &old_egg_nodes[usize::from(*vec_elt)], old_egg_nodes); + whole_vector.append(&mut trans_val); + } + whole_vector.push(final_element); + whole_vector +} + unsafe fn canonicalize_vec_type( can_change_vector: bool, constructor: VecLangBoxedConstructor, @@ -1918,6 +2027,7 @@ unsafe fn canonicalize_egg( VecLang::List(_) => panic!("List was found. Egg canonicalization does not handle list nodes."), VecLang::LitVec(boxed_ids) => canonicalize_vec_type(can_change_vector, |change_vec_type, boxed| -> VecLang {if change_vec_type {VecLang::NoOptVec(boxed)} else {VecLang::LitVec(boxed)}}, boxed_ids, old_egg_nodes), VecLang::Vec(boxed_ids) => canonicalize_vec_type(can_change_vector, |change_vec_type, boxed| -> VecLang {if change_vec_type {VecLang::NoOptVec(boxed)} else {VecLang::Vec(boxed)}}, boxed_ids, old_egg_nodes), + VecLang::DataVec(boxed_ids) => canonicalize_vec_type(can_change_vector, |change_vec_type, boxed| -> VecLang {VecLang::DataVec(boxed)}, boxed_ids, old_egg_nodes), VecLang::VecAdd([l, r])=> canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::VecAdd(pair)}, l, r, old_egg_nodes), VecLang::VecMinus([l, r])=> canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::VecMinus(pair)},l, r, old_egg_nodes), VecLang::VecMul([l, r])=> canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::VecMul(pair)}, l, r, old_egg_nodes), @@ -1940,10 +2050,13 @@ unsafe fn canonicalize_egg( VecLang::Sgn([n]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::Sgn(single)}, n, old_egg_nodes ), VecLang::Sqrt([n]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::Sqrt(single)}, n, old_egg_nodes ), VecLang::Neg([n]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::Neg(single)}, n, old_egg_nodes ), - VecLang::VecLoad([gep1_id, gep2_id, gep3_id, gep4_id]) => canonicalize_quadruple(can_change_vector,|quad| -> VecLang {VecLang::VecLoad(quad)}, gep1_id, gep2_id, gep3_id, gep4_id, old_egg_nodes ), + VecLang::VecLoad([gep1_id, gep2_id, gep3_id, gep4_id, base_ids_vec, offsets_id_vec]) => canoncalize_ntuple(can_change_vector, &[*gep1_id, *gep2_id, *gep3_id, *gep4_id, *base_ids_vec, *offsets_id_vec], VecLang::VecLoad([*gep1_id, *gep2_id, *gep3_id, *gep4_id, *base_ids_vec, *offsets_id_vec]), old_egg_nodes), VecLang::VecStore([val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id]) => canonicalize_quintuple(can_change_vector,|quint| -> VecLang {VecLang::VecStore(quint)}, val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id, old_egg_nodes ), - VecLang::AlignedConsecVecLoad([get_id]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::AlignedConsecVecLoad(single)}, get_id, old_egg_nodes ), + VecLang::AlignedConsecVecLoad([gep_id]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::AlignedConsecVecLoad(single)}, gep_id, old_egg_nodes ), VecLang::Shuffle([data_vec_id, shuf_vec_id]) => canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::Shuffle(pair)}, data_vec_id, shuf_vec_id, old_egg_nodes), + VecLang::Join([left, right]) => canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::Join(pair)}, left, right, old_egg_nodes), + VecLang::VecTwo(boxed_ids) => canonicalize_vec_type(can_change_vector, |change_vec_type, boxed| -> VecLang {VecLang::VecTwo(boxed)}, boxed_ids, old_egg_nodes), + VecLang::AlignedConsecVecLoad2([gep_id]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::AlignedConsecVecLoad2(single)}, gep_id, old_egg_nodes ), } } From 44c75b742165bd5bc58286e90eb3d47dcd811bb6 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 5 Jul 2023 21:43:25 -0400 Subject: [PATCH 121/143] all tests pass for the memory checks on geps and for optimization of the compilation time with the templates, bugs fixed --- .../Diospyros/benchmarks/optimized/conv-2d.c | 85 ++-- .../Diospyros/benchmarks/optimized/qprod.c | 77 ++- .../benchmarks/optimized/stencil-2d.c | 30 ++ .../c-tests/4-by-4-matrix-multiply.c | 53 +++ src/dios-egraphs/Diospyros/c-tests/fft.c | 69 ++- src/dios-egraphs/Diospyros/diospyros.cpp | 309 ++++++------ src/dios-egraphs/Diospyros/src/lib.rs | 442 ++++++++++++++---- .../Diospyros/stencil-2d-results.txt | 8 + 8 files changed, 784 insertions(+), 289 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/c-tests/4-by-4-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/stencil-2d-results.txt diff --git a/src/dios-egraphs/Diospyros/benchmarks/optimized/conv-2d.c b/src/dios-egraphs/Diospyros/benchmarks/optimized/conv-2d.c index a47953a0..08e9d403 100644 --- a/src/dios-egraphs/Diospyros/benchmarks/optimized/conv-2d.c +++ b/src/dios-egraphs/Diospyros/benchmarks/optimized/conv-2d.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #define I_ROWS 5 @@ -12,6 +13,7 @@ #define O_COLS ((I_COLS + F_COLS) - 1) #define MAX_FLOAT 100.00f #define DELTA 0.1f +#define NITER 1000000000 void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], float mat_out[O_ROWS * O_COLS]) { @@ -55,35 +57,62 @@ int main(void) { for (int i = 0; i < O_ROWS * O_COLS; i++) { expected[i] = 0; } - convolution(mat_in, f_in, mat_out); - // calculate expected - for (int outRow = 0; outRow < O_ROWS; outRow++) { - for (int outCol = 0; outCol < O_COLS; outCol++) { - for (int fRow = 0; fRow < F_ROWS; fRow++) { - for (int fCol = 0; fCol < F_COLS; fCol++) { - int fRowTrans = F_ROWS - 1 - fRow; - int fColTrans = F_COLS - 1 - fCol; - int iRow = outRow - fRowTrans; - int iCol = outCol - fColTrans; - if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && - iCol < I_COLS) { - float v = mat_in[iRow * I_COLS + iCol] * - f_in[fRowTrans * F_COLS + fColTrans]; - expected[outRow * O_COLS + outCol] += v; - } - } - } - } - } - for (int i = 0; i < O_ROWS * O_COLS; i++) { - printf("--------------------------\n"); - printf("calculated: %f\n", mat_out[i]); - printf("expected: %f\n", expected[i]); - printf("difference: %f\n", expected[i] - mat_out[i]); - } - for (int i = 0; i < O_ROWS * O_COLS; i++) { - assert(fabs(expected[i] - mat_out[i]) < DELTA); + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; + + // // calculate expected + // for (int outRow = 0; outRow < O_ROWS; outRow++) { + // for (int outCol = 0; outCol < O_COLS; outCol++) { + // for (int fRow = 0; fRow < F_ROWS; fRow++) { + // for (int fCol = 0; fCol < F_COLS; fCol++) { + // int fRowTrans = F_ROWS - 1 - fRow; + // int fColTrans = F_COLS - 1 - fCol; + // int iRow = outRow - fRowTrans; + // int iCol = outCol - fColTrans; + + // if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + // iCol < I_COLS) { + // float v = mat_in[iRow * I_COLS + iCol] * + // f_in[fRowTrans * F_COLS + fColTrans]; + // expected[outRow * O_COLS + outCol] += v; + // } + // } + // } + // } + // } + // for (int i = 0; i < O_ROWS * O_COLS; i++) { + // printf("--------------------------\n"); + // printf("calculated: %f\n", mat_out[i]); + // printf("expected: %f\n", expected[i]); + // printf("difference: %f\n", expected[i] - mat_out[i]); + // } + // for (int i = 0; i < O_ROWS * O_COLS; i++) { + // assert(fabs(expected[i] - mat_out[i]) < DELTA); + // } + // return 0; } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/optimized/qprod.c b/src/dios-egraphs/Diospyros/benchmarks/optimized/qprod.c index 403efae3..00675f1b 100644 --- a/src/dios-egraphs/Diospyros/benchmarks/optimized/qprod.c +++ b/src/dios-egraphs/Diospyros/benchmarks/optimized/qprod.c @@ -4,12 +4,14 @@ #include #include #include +#include #include #define SIZE 4 #define MAX_FLOAT 100.00f #define DELTA 0.1f +#define NITER 1000000000 __attribute__((always_inline)) void naive_cross_product(float *lhs, float *rhs, float *result) { @@ -90,28 +92,59 @@ int main(void) { for (int i = 0; i < SIZE; i++) { expectedt[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); } - naive_quaternion_product(a_q, a_t, b_q, b_t, r_q, r_t); - expectedq[3] = - a_q[3] * b_q[3] - a_q[0] * b_q[0] - a_q[1] * b_q[1] - a_q[2] * b_q[2]; - expectedq[0] = - a_q[3] * b_q[0] + a_q[0] * b_q[3] + a_q[1] * b_q[2] - a_q[2] * b_q[1]; - expectedq[1] = - a_q[3] * b_q[1] + a_q[1] * b_q[3] + a_q[2] * b_q[0] - a_q[0] * b_q[2]; - expectedq[2] = - a_q[3] * b_q[2] + a_q[2] * b_q[3] + a_q[0] * b_q[1] - a_q[1] * b_q[0]; - naive_point_product(a_q, b_t, expectedt); - for (int i = 0; i < 3; i++) { - expectedt[i] += a_t[i]; - } - for (int i = 0; i < SIZE; i++) { - printf("Calculated q: %f\n", r_q[i]); - printf("Expected q: %f\n", expectedq[i]); - assert(fabs(expectedq[i] - r_q[i]) < DELTA); - } - for (int i = 0; i < 3; i++) { - printf("Calculated t: %f\n", r_t[i]); - printf("Expected t: %f\n", expectedt[i]); - assert(fabs(expectedt[i] - r_t[i]) < DELTA); + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + naive_quaternion_product(a_q, a_t, b_q, b_t, r_q, r_t); } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; + + // expectedq[3] = + // a_q[3] * b_q[3] - a_q[0] * b_q[0] - a_q[1] * b_q[1] - a_q[2] * + // b_q[2]; + // expectedq[0] = + // a_q[3] * b_q[0] + a_q[0] * b_q[3] + a_q[1] * b_q[2] - a_q[2] * + // b_q[1]; + // expectedq[1] = + // a_q[3] * b_q[1] + a_q[1] * b_q[3] + a_q[2] * b_q[0] - a_q[0] * + // b_q[2]; + // expectedq[2] = + // a_q[3] * b_q[2] + a_q[2] * b_q[3] + a_q[0] * b_q[1] - a_q[1] * + // b_q[0]; + + // naive_point_product(a_q, b_t, expectedt); + // for (int i = 0; i < 3; i++) { + // expectedt[i] += a_t[i]; + // } + // for (int i = 0; i < SIZE; i++) { + // printf("Calculated q: %f\n", r_q[i]); + // printf("Expected q: %f\n", expectedq[i]); + // assert(fabs(expectedq[i] - r_q[i]) < DELTA); + // } + // for (int i = 0; i < 3; i++) { + // printf("Calculated t: %f\n", r_t[i]); + // printf("Expected t: %f\n", expectedt[i]); + // assert(fabs(expectedt[i] - r_t[i]) < DELTA); + // } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/optimized/stencil-2d.c b/src/dios-egraphs/Diospyros/benchmarks/optimized/stencil-2d.c index 8432ebd7..a9331ba3 100644 --- a/src/dios-egraphs/Diospyros/benchmarks/optimized/stencil-2d.c +++ b/src/dios-egraphs/Diospyros/benchmarks/optimized/stencil-2d.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #define ROW_SIZE 15 @@ -10,6 +11,7 @@ #define MAX_FLOAT 100.00f #define DELTA 0.1f +#define NITER 1000000000 void stencil(float orig_in[ROW_SIZE * COL_SIZE], float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { @@ -46,6 +48,34 @@ int main(void) { for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { expected[i] = 1; } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; + stencil(orig_in, sol_out, filter_in); for (int r = 0; r < ROW_SIZE - 2; r++) { for (int c = 0; c < COL_SIZE - 2; c++) { diff --git a/src/dios-egraphs/Diospyros/c-tests/4-by-4-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/4-by-4-matrix-multiply.c new file mode 100644 index 00000000..4e9f294a --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/4-by-4-matrix-multiply.c @@ -0,0 +1,53 @@ +#include +#include + +#define A_ROWS 4 +#define A_COLS 4 +#define B_COLS 4 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + float a_in[A_ROWS][A_COLS] = { + {1, 2, 3, 1}, {1, 2, 3, 1}, {4, 5, 6, 1}, {7, 8, 9, 1}}; + float b_in[A_COLS][B_COLS] = { + {1, 2, 3, 1}, {1, 2, 3, 1}, {4, 5, 6, 1}, {7, 8, 9, 1}}; + float c_out[A_ROWS][B_COLS] = { + {1, 2, 3, 1}, {1, 2, 3, 1}, {4, 5, 6, 1}, {7, 8, 9, 1}}; + float expected_c_out[A_ROWS][B_COLS] = { + {1, 2, 3, 1}, {1, 2, 3, 1}, {4, 5, 6, 1}, {7, 8, 9, 1}}; + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected_c_out); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + assert(expected_c_out[i][j] == c_out[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/fft.c b/src/dios-egraphs/Diospyros/c-tests/fft.c index a3ba2fbd..cb0cb66d 100644 --- a/src/dios-egraphs/Diospyros/c-tests/fft.c +++ b/src/dios-egraphs/Diospyros/c-tests/fft.c @@ -11,9 +11,12 @@ #define MAX_FLOAT 100.00f #define DELTA 0.1f -void fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / 2], - float img_twid_in[SIZE / 2], float real_out[SIZE], - float img_out[SIZE]) { +#define MAX_FOR_LOOP_ITERATIONS 1000 + +void fft_for_loop_version(float real_in[SIZE], float img_in[SIZE], + float real_twid_in[SIZE / 2], + float img_twid_in[SIZE / 2], float real_out[SIZE], + float img_out[SIZE]) { int even = 0; int odd = 0; int log = 0; @@ -26,9 +29,17 @@ void fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / 2], img_out[i] = img_in[i]; } - while (span != 0) { +#pragma unroll + for (int i = 0; i < MAX_FOR_LOOP_ITERATIONS; i++) { + if (span == 0) { + break; + } odd = span; - while (odd < SIZE) { +#pragma unroll + for (int j = 0; j < MAX_FOR_LOOP_ITERATIONS; j++) { + if (odd >= SIZE) { + break; + } odd = odd | span; even = odd ^ span; @@ -55,6 +66,51 @@ void fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / 2], } } +// void fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / +// 2], +// float img_twid_in[SIZE / 2], float real_out[SIZE], +// float img_out[SIZE]) { +// int even = 0; +// int odd = 0; +// int log = 0; +// int rootindex = 0; +// int span = SIZE >> 1; +// float temp = 0; + +// for (int i = 0; i < SIZE; i++) { +// real_out[i] = real_in[i]; +// img_out[i] = img_in[i]; +// } + +// while (span != 0) { +// odd = span; +// while (odd < SIZE) { +// odd = odd | span; +// even = odd ^ span; + +// temp = real_out[even] + real_out[odd]; +// real_out[odd] = real_out[even] - real_out[odd]; +// real_out[even] = temp; + +// temp = img_out[even] + img_out[odd]; +// img_out[odd] = img_out[even] - img_out[odd]; +// img_out[even] = temp; + +// rootindex = (even << log) & (SIZE - 1); +// if (rootindex > 0) { +// temp = real_twid_in[rootindex] * real_out[odd] - +// img_twid_in[rootindex] * img_out[odd]; +// img_out[odd] = real_twid_in[rootindex] * img_out[odd] + +// img_twid_in[rootindex] * real_out[odd]; +// real_out[odd] = temp; +// } +// odd += 1; +// } +// span >>= 1; +// log += 1; +// } +// } + void no_opt_fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / 2], float img_twid_in[SIZE / 2], float real_out[SIZE], float img_out[SIZE]) { @@ -148,7 +204,8 @@ int main(void) { expected_img_out[i] = n; } - fft(real_in, img_in, real_twid_in, img_twid_in, real_out, img_out); + fft_for_loop_version(real_in, img_in, real_twid_in, img_twid_in, real_out, + img_out); no_opt_fft(expected_real_in, expected_img_in, expected_real_twid_in, expected_img_twid_in, expected_real_out, expected_img_out); diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index bad74e87..7bc1a7cc 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -678,13 +678,6 @@ std::vector> build_chunks(BasicBlock *B, } } - for (std::size_t i = 0; i < final_chunks.size(); ++i) { - errs() << "This is chunk " << i << "\n"; - for (auto instr : final_chunks[i]) { - errs() << *instr << "\n"; - } - } - return final_chunks; } @@ -702,8 +695,10 @@ using ad_trees_t = std::vector; * * Returns a Tuple (Success/Failure , Instructions accumulated) */ -std::pair recurse_llvm(Value *value, - std::set chunk_instrs) { +std::pair recurse_llvm( + Value *value, std::set chunk_instrs, + std::set basic_block_instrs, bool not_for_mem_constraint) { + errs() << *value << "\n"; // Constants if (isa(value)) { // DO not add constant, if i recall, constants are not llvm @@ -711,22 +706,44 @@ std::pair recurse_llvm(Value *value, return std::make_pair(true, std::vector{}); } if (Instruction *instr = dyn_cast(value)) { - // No Longer in Chunk - if (chunk_instrs.count(instr) == 0) { - errs() << "Instruction has left the chunk\n" << *instr << "\n"; - return std::make_pair(false, {}); + if (not_for_mem_constraint) { + // No Longer in Chunk + if (chunk_instrs.count(instr) == 0) { + return std::make_pair(false, {}); + } + } else { + // No Longer in Basic Block + if (basic_block_instrs.count(instr) == 0) { + return std::make_pair(false, {}); + } } // Base case instructions if (isa(instr) || isa(instr)) { + // there should not be a load isntr when checking memory instrs + if (!not_for_mem_constraint && isa(instr)) { + return std::make_pair(false, std::vector{instr}); + } return std::make_pair(true, std::vector{instr}); } + // allow for alloca in mem constraint checking + if (!not_for_mem_constraint && isa(instr)) { + return std::make_pair(true, std::vector{instr}); + } + + // Phi is trouble, stop at Phis - previously caused recursion to fill + // stack, and also change results. + if (isa(instr)) { + return std::make_pair(false, std::vector{instr}); + } + // Recurse on Store Instructions if (isa(instr) && instr->getOperand(0)->getType()->isFloatTy()) { auto [child_b, child_tree] = - recurse_llvm(instr->getOperand(0), chunk_instrs); + recurse_llvm(instr->getOperand(0), chunk_instrs, + basic_block_instrs, not_for_mem_constraint); if (child_b) { child_tree.push_back(instr); return std::make_pair(true, child_tree); @@ -736,7 +753,8 @@ std::pair recurse_llvm(Value *value, // Recurse on supported unary operators OR Store Instructions if (instr->getOpcode() == Instruction::FNeg) { auto [child_b, child_tree] = - recurse_llvm(instr->getOperand(0), chunk_instrs); + recurse_llvm(instr->getOperand(0), chunk_instrs, + basic_block_instrs, not_for_mem_constraint); if (child_b) { child_tree.push_back(instr); return std::make_pair(true, child_tree); @@ -749,9 +767,11 @@ std::pair recurse_llvm(Value *value, instr->getOpcode() == Instruction::FDiv || instr->getOpcode() == Instruction::FMul) { auto [left_b, left_tree] = - recurse_llvm(instr->getOperand(0), chunk_instrs); + recurse_llvm(instr->getOperand(0), chunk_instrs, + basic_block_instrs, not_for_mem_constraint); auto [right_b, right_tree] = - recurse_llvm(instr->getOperand(1), chunk_instrs); + recurse_llvm(instr->getOperand(1), chunk_instrs, + basic_block_instrs, not_for_mem_constraint); if (left_b && right_b) { left_tree.insert(left_tree.end(), right_tree.begin(), right_tree.end()); @@ -761,9 +781,83 @@ std::pair recurse_llvm(Value *value, } } - // Unhandled Instruction - errs() << "Unhandled Instruction\n" << *value << "\n"; - return std::make_pair(false, std::vector{}); + if (not_for_mem_constraint) { + // Unhandled Instruction + return std::make_pair(false, std::vector{}); + } + + if (Instruction *value_as_instr = dyn_cast(value)) { + std::vector combined_instrs = {}; + bool combined_result = true; + for (auto &operand : value_as_instr->operands()) { + auto [child_result, child_tree] = + recurse_llvm(operand, chunk_instrs, basic_block_instrs, + not_for_mem_constraint); + combined_result = combined_result && child_result; + combined_instrs.insert(combined_instrs.end(), child_tree.begin(), + child_tree.end()); + } + return std::make_pair(combined_result, combined_instrs); + } + return std::make_pair(true, std::vector{}); +} + +bool check_single_memory_address_constraint( + Value *memory_address_value, ad_trees_t prior_ad_trees, + std::set basic_block_instrs) { + auto [success, accumulated_instrs] = + recurse_llvm(memory_address_value, {}, basic_block_instrs, false); + // success only if all instructions are inside the same basic block + // success also only if instructions tree has no memory operaitons + // except for alloc/argument + if (!success) { + return false; + } + bool contained_in_prior_ad_tree = false; + for (auto instr : accumulated_instrs) { + for (auto prior_ad_tree : prior_ad_trees) { + for (auto prior_instr : prior_ad_tree) { + if (instr == prior_instr) { + contained_in_prior_ad_tree = true; + } + } + } + } + return !contained_in_prior_ad_tree; +} + +/** + Check each memory address for each memory operation in each ad tree + satisfies the following constraints: + 1. address computation tree contains no memory operations except for + alloc / argument + 2. each address computation instruction is not contained in a prior ad + tree + 3. each address computation only exists within 1 single basic block +*/ +bool check_memory_constraints(ad_tree_t curr_ad_tree, ad_trees_t prior_ad_trees, + std::set basic_block_instrs) { + bool constraint_success = true; + for (auto instr : curr_ad_tree) { + if (StoreInst *store = dyn_cast(instr)) { + Value *store_pointer = store->getPointerOperand(); + if (!check_single_memory_address_constraint( + store_pointer, prior_ad_trees, basic_block_instrs)) { + constraint_success = false; + if (!constraint_success) { + } + break; + } + } else if (LoadInst *load = dyn_cast(instr)) { + Value *load_pointer = load->getPointerOperand(); + if (!check_single_memory_address_constraint( + load_pointer, prior_ad_trees, basic_block_instrs)) { + constraint_success = false; + break; + } + } + } + return constraint_success; } /** @@ -771,7 +865,8 @@ std::pair recurse_llvm(Value *value, * instruction * */ -ad_trees_t build_ad_trees(chunk_t chunk) { +ad_trees_t build_ad_trees(chunk_t chunk, + std::set basic_block_instrs) { ad_trees_t ad_trees = {}; std::set chunk_instrs = {}; for (auto instr : chunk) { @@ -780,21 +875,24 @@ ad_trees_t build_ad_trees(chunk_t chunk) { for (auto instr : chunk) { if (isa(instr)) { // ad_tree_t new_tree = {}; - auto [success_b, ad_tree] = recurse_llvm(instr, chunk_instrs); + auto [success_b, ad_tree] = + recurse_llvm(instr, chunk_instrs, {}, true); if (success_b) { assert(ad_tree.size() != 0); + } else { + continue; } - if (success_b) { + + // Check each memory address for each memory operation in each + // ad tree + bool mem_constraint_result = + check_memory_constraints(ad_tree, ad_trees, basic_block_instrs); + + if (mem_constraint_result) { ad_trees.push_back(ad_tree); } } } - for (auto ad_tree : ad_trees) { - errs() << "New AD Tree\n"; - for (auto instr : ad_tree) { - errs() << *instr << "\n"; - } - } return ad_trees; } @@ -812,36 +910,45 @@ std::vector join_trees( } /** - * True iff there is some load in a joined section of adtrees that MIGHT alias a - * store in the same tree. + * True iff there is some load in a joined section of adtrees that MIGHT + * alias a store in the same tree. * - * Load-store aliasing causes problems in some situation where you have stores - * as functions of the same loads, but no vectoriszation occurs, so the code is - * rewritten linearly, and a memory dependency is introduced + * Load-store aliasing causes problems in some situation where you have + * stores as functions of the same loads, but no vectoriszation occurs, so + * the code is rewritten linearly, and a memory dependency is introduced * * From a bug in FFT.c */ chunks_t remove_load_store_alias(chunks_t chunks, AliasAnalysis *AA) { chunks_t final_chunks = {}; - std::vector load_addresses = {}; - std::vector store_addresses = {}; + std::vector> load_addresses = {}; + std::vector> store_addresses = {}; + int chunk_idx = 0; for (auto chunk : chunks) { + chunk_idx++; + for (auto instr : chunk) { if (isa(instr)) { Value *load_address = dyn_cast(instr)->getPointerOperand(); - load_addresses.push_back(load_address); + load_addresses.push_back({load_address, chunk_idx}); } else if (isa(instr)) { Value *store_address = dyn_cast(instr)->getPointerOperand(); - store_addresses.push_back(store_address); + store_addresses.push_back({store_address, chunk_idx}); } } bool can_add_to_final_chunks = true; - for (auto load_address : load_addresses) { - for (auto store_address : store_addresses) { - if (may_alias(load_address, store_address, AA)) { + for (auto [load_address, chunk_idx_load] : load_addresses) { + for (auto [store_address, chunk_idx_store] : store_addresses) { + if ((chunk_idx_load != + chunk_idx_store) && // if thhe load and store come + // from the same chunk, they + // cannot alias in a problem from + // the vectorizaiton as the loads + // will still come before stores + may_alias(load_address, store_address, AA)) { can_add_to_final_chunks = false; } } @@ -1013,8 +1120,8 @@ ad_trees_t sort_ad_trees(ad_trees_t ad_trees, // Sort each group of ad_trees by the stores in each group for (int i = 0; i < groups_of_trees.size(); i++) { - // NO IDEA WHY THIS WORKS, BUT ITERATING OVER ELEMENTS I SORTS PROPERLY - // BUT ITERATING OVER USING COLON DOES NOT! + // NO IDEA WHY THIS WORKS, BUT ITERATING OVER ELEMENTS I SORTS + // PROPERLY BUT ITERATING OVER USING COLON DOES NOT! std::sort(groups_of_trees[i].begin(), groups_of_trees[i].end(), store_sorter); } @@ -1046,16 +1153,14 @@ ad_trees_t sort_ad_trees(ad_trees_t ad_trees, // Grab only ad_trees that contain a 16 byte aligned reference at the // beginning - // Also the trees must be consecutive stores, e.g. the stores must differ by - // 4 bytes each time - // Finally, split the trees into smaller subtrees of size 4 + // Also the trees must be consecutive stores, e.g. the stores must + // differ by 4 bytes each time Finally, split the trees into smaller + // subtrees of size 4 - // We do this by accumulating a running sequence of ad_trees that satisfy - // the prerequisite conditions above + // We do this by accumulating a running sequence of ad_trees that + // satisfy the prerequisite conditions above std::vector> pruned_groups_of_trees = {}; - // std::vector running_collection = {}; - // int current_offset = -4; for (auto group : groups_of_trees) { // skip empty groups if (group.empty()) { @@ -1067,89 +1172,6 @@ ad_trees_t sort_ad_trees(ad_trees_t ad_trees, for (auto new_group : new_groups_of_trees) { pruned_groups_of_trees.push_back(new_group); } - - // for (auto tree : group) { - // // Grab basic information about the tree - // StoreInst *store = dyn_cast(tree.back()); - // // Get base ref for the first store - // Value *ref_a = base_of_array_vec[store_to_base_map.at(store)]; - // // get the difference from the store to its reference - // Value *store_a_ptr = store->getPointerOperand(); - // const SCEV *store_a_ptr_se = SE->getSCEV(store_a_ptr); - // const SCEV *ref_a_ptr_se = SE->getSCEV(ref_a); - // const SCEV *diff_a = SE->getMinusSCEV(store_a_ptr_se, - // ref_a_ptr_se); APInt min_val_a = SE->getSignedRangeMin(diff_a); - // APInt max_val_a = SE->getSignedRangeMax(diff_a); - // assert(min_val_a == max_val_a); - // int val_a = (int)max_val_a.roundToDouble(); - - // // If the running collection is empty - // // If the current tree is rooted at an aligned address, add - // to - // // the collection, and set the current offset - // // If the current tree is not rooted at an - // // aligned address, skip this tree. Set offset as -4 - // // Else if the running collection is not empty, and the length is - // // not the VECTOR WIDTH - // // Check the current offset of the tree, and if it s 4 off - // the - // // past offset, add on the tree and set its offset - // // Otherwise skip this tree and clear the running - // collection. - // // Set offset as -4. - // // Otherwise the running collection is not empty and the length - // is - // // the Vector Width - // // Remove the running collection and add it to a pruned - // group - // // If the current tree is at an aligned address, add to - // // collection. Set current offset - // // Otherwise, the tree is not rooted at an aligned - // // address and skip. Set offset as -4 - // if (running_collection.empty()) { - // if (is_aligned(val_a)) { - // running_collection.push_back(tree); - // current_offset = val_a; - // } else { - // current_offset = -4; - // } - // } else if (!running_collection.empty() && - // running_collection.size() < VECTOR_WIDTH) { - // if (current_offset + FLOAT_SIZE_IN_BYTES == val_a) { - // running_collection.push_back(tree); - // current_offset = val_a; - // } else { - // running_collection = {}; - // current_offset = -4; - // } - // } else if (!running_collection.empty() && - // running_collection.size() == VECTOR_WIDTH) { - // pruned_groups_of_trees.push_back(running_collection); - // running_collection = {}; - // if (is_aligned(val_a)) { - // running_collection.push_back(tree); - // current_offset = val_a; - // } else { - // current_offset = -4; - // } - // } else { - // throw "sort_ad_trees: Impossible case: Cannot have the size - // greater than VECTOR_WIDTH"; - // } - // } - - // if (!running_collection.empty() && - // running_collection.size() == VECTOR_WIDTH) { - // pruned_groups_of_trees.push_back(running_collection); - // } - } - errs() << "Pruned Group of Trees\n"; - for (auto group_of_trees : pruned_groups_of_trees) { - for (auto tree : group_of_trees) { - for (auto instr : tree) { - errs() << *instr << "\n"; - } - } } // Compress group of trees back into 1 ad_tree @@ -1175,10 +1197,10 @@ ad_trees_t sort_ad_trees(ad_trees_t ad_trees, */ std::vector> chunks_into_joined_trees( chunks_t chunks, AliasAnalysis *AA, std::vector base_of_array_vec, - ScalarEvolution *SE) { + ScalarEvolution *SE, std::set basic_block_instrs) { std::vector> trees = {}; for (auto chunk : chunks) { - ad_trees_t ad_trees = build_ad_trees(chunk); + ad_trees_t ad_trees = build_ad_trees(chunk, basic_block_instrs); // Join trees if the store instructions in the trees // do not alias each other @@ -1268,8 +1290,8 @@ bool run_optimization(std::vector chunk, Function &F, /** * Match each load with a pair of base id and offset * - * NOTE: A load might be associated with more than 1 base, we choose the first. - * THIS COULD BE A BUG in the future! + * NOTE: A load might be associated with more than 1 base, we choose the + * first. THIS COULD BE A BUG in the future! */ std::vector match_loads(std::vector loads, std::vector base_load_locations, @@ -1382,9 +1404,15 @@ struct DiospyrosPass : public FunctionPass { bool has_changes = true; for (auto &B : F) { + // Grab instructions in basic block + std::set basic_block_instrs = {}; + for (auto &I : B) { + basic_block_instrs.insert(&I); + } + auto chunks = build_chunks(&B, AA); - auto trees = - chunks_into_joined_trees(chunks, AA, base_of_array_vec, SE); + auto trees = chunks_into_joined_trees(chunks, AA, base_of_array_vec, + SE, basic_block_instrs); auto treerefs = instr2ref(trees); for (auto tree_chunk : treerefs) { @@ -1413,3 +1441,6 @@ static RegisterPass X("diospyros", "Diospyros Pass", static RegisterStandardPasses RegisterMyPass( PassManagerBuilder::EP_EarlyAsPossible, registerDiospyrosPass); + +// TODO check that no gep have a load in the calculation chain or some +// memory address \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 0ba591ee..8d1dd5e1 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -9,9 +9,9 @@ use libc::size_t; use llvm::{core::*, prelude::*, LLVMOpcode::*, LLVMRealPredicate}; use std::{ cmp, - collections::{BTreeMap, BTreeSet}, + collections::{BTreeMap, BTreeSet, HashMap}, os::raw::c_char, - slice::from_raw_parts, + slice::from_raw_parts, convert::TryInto, }; extern "C" { @@ -36,7 +36,7 @@ extern "C" { fn _isa_constaggregate(val: LLVMValueRef) -> bool; fn isa_integertype(val: LLVMValueRef) -> bool; fn _isa_intptr(val: LLVMValueRef) -> bool; - fn _isa_floatptr(val: LLVMValueRef) -> bool; + fn isa_floatptr(val: LLVMValueRef) -> bool; fn _isa_floattype(val: LLVMValueRef) -> bool; fn _isa_bitcast(val: LLVMValueRef) -> bool; fn isa_sqrt32(val: LLVMValueRef) -> bool; @@ -83,6 +83,17 @@ unsafe fn get_func_llvm_value(name: &str) -> Option { return None; } +static mut VECTORIZATION_MAP : Vec<(Vec, RecExpr, HashMap)> = Vec::new(); + +unsafe fn get_vectorization(vec: Vec) -> Option<(RecExpr, HashMap)> { + for (stringvec, vectorization, map) in VECTORIZATION_MAP.clone() { + if stringvec == vec { + return Some((vectorization, map)); + } + } + return None; +} + // Reference Comparison: https://www.reddit.com/r/rust/comments/2r3wjk/is_there_way_to_compare_objects_by_address_in_rust/ // Compares whether addresses of LLVMValueRefs are the same. // Not the contents of the Value Refs @@ -179,6 +190,152 @@ pub struct load_info_t { pub offset: i32, } +/// Value Numbering Metadata +#[derive(Debug, Clone)] +struct ValueNumberingState<'a> { + egg_nodes_vector: &'a [VecLang], +} + +/// Value Numbering Mutable Metadata +#[derive(Debug, Clone)] +struct ValueNumberingMutableState { + node2value: HashMap, + currvalue: u32, +} + + +unsafe fn value_number_args(op_name: String, args: &[Id], immd: &ValueNumberingState, md: &mut ValueNumberingMutableState) -> Vec { + let mut vec0 = vec![]; + for arg in args.into_iter() { + let node = &immd.egg_nodes_vector[usize::from(*arg)].clone(); + let vec = value_number_store_tree(node, immd, md); + vec0.extend(vec); + } + let mut final_vec1 = vec![op_name, "(".to_string()]; + let final_vec2 = vec![")".to_string()]; + final_vec1.extend(vec0); + final_vec1.extend(final_vec2); + return final_vec1; +} + +unsafe fn value_number_boxed_args(op_name: String, args: &Box<[Id]>, immd: &ValueNumberingState, md: &mut ValueNumberingMutableState) -> Vec { + let mut vec0 = vec![]; + for arg in args.into_iter() { + let vec = value_number_store_tree(&immd.egg_nodes_vector[usize::from(*arg)], immd, md); + vec0.extend(vec); + } + let mut final_vec1 = vec![op_name, "(".to_string()]; + let final_vec2 = vec![")".to_string()]; + final_vec1.extend(vec0); + final_vec1.extend(final_vec2); + return final_vec1; +} + +unsafe fn op_to_vec_string(op_name:String, val:i32) -> Vec { + let str0 = op_name; + let str1 = "("; + let str2 = val.to_string(); + let str3 = ")"; + return vec![format!("{}{}{}{}", str0, str1, str2, str3).to_string()]; +} + +unsafe fn value_number_check_node(op_name: String, key: VecLang, _immd: &ValueNumberingState, md: &mut ValueNumberingMutableState) -> Vec { + if md.node2value.contains_key(&key) { + let val = *md.node2value.get(&key).expect("Key should exist"); + return op_to_vec_string(op_name, val.try_into().unwrap()); + } else { + let val = md.currvalue; + md.node2value.insert(key, md.currvalue); + md.currvalue += 1; + return op_to_vec_string(op_name, val.try_into().unwrap()); + } +} + +unsafe fn get_array_offset_or_base(egg_node: &VecLang) -> i32 { + match egg_node { + VecLang::Num(n) => *n, + _ => panic!("Array Offset or Base must be in a Num construct"), + } +} + +unsafe fn load_to_tree(arg1: &Id, arg2: &Id, arg3: &Id, immd: &ValueNumberingState, md: &mut ValueNumberingMutableState) -> Vec { + let mut load_vec = vec![]; + let node1 = &immd.egg_nodes_vector[usize::from(*arg1)].clone(); + let vec1 = value_number_store_tree(node1, immd, md); + let node2 = &immd.egg_nodes_vector[usize::from(*arg2)].clone(); + let array_base = get_array_offset_or_base(node2); // ignore array base + let node3 = &immd.egg_nodes_vector[usize::from(*arg3)].clone(); + let array_offset = get_array_offset_or_base(node3); + let mut final_vec1 = vec![String::from("Load"), "(".to_string()]; + load_vec.extend(vec1.clone()); + load_vec.extend(vec1); // ignore base, repeat vec1 + let final_vec2 = vec![(array_offset % vector_width() as i32).to_string()]; + let final_vec3 = vec![")".to_string()]; + load_vec.extend(final_vec2); + load_vec.extend(final_vec3); + return load_vec; +} + +unsafe fn value_number_store_tree(egg_node: &VecLang, immd: &ValueNumberingState, md: &mut ValueNumberingMutableState) -> Vec { + match egg_node { + VecLang::Symbol(..) => { + panic!("Symbol was found. Value Numbering does not handle symbol nodes.") + } + VecLang::Get(..) => { + panic!("Get was found. Value Numbering does not handle get nodes.") + } + VecLang::Gep(i) => return value_number_check_node(String::from("Gep"), VecLang::Gep(*i), immd, md), + VecLang::Load([a1, a2, a3]) => return load_to_tree(a1, a2, a3, immd, md), // value_number_args(String::from("Load"), &[*a1, *a1, *a1], immd, md), + VecLang::Store(args) => return value_number_args(String::from("Store"), args, immd, md), + VecLang::Set(..) => { + panic!("Set was found. Value Numbering does not handle set nodes.") + } + VecLang::Ite(..) => panic!("Ite was found. Value Numbering does not handle ite nodes."), + VecLang::Or(..) => panic!("Or was found. Value Numbering does not handle or nodes."), + VecLang::And(..) => panic!("And was found. Value Numbering does not handle and nodes."), + VecLang::Lt(..) => panic!("Lt was found. Value Numbering does not handle lt nodes."), + VecLang::Sgn(..) => panic!("Sgn was found. Value Numbering does not handle sgn nodes. TODO: In the future, tis node will be handled alongside sqrt and neg scalar nodes."), + VecLang::VecSgn(..) => panic!("VecSgn was found. Value Numbering does not handle vecsgn nodes. TODO: In the future, this node will be handled alongside VecSqrt and VecNeg vector nodes."), + VecLang::Arg(a) => return value_number_check_node(String::from("Arg"), VecLang::Arg(*a), immd, md), + VecLang::Reg(r) => return value_number_check_node(String::from("Reg"), VecLang::Reg(*r), immd, md), + VecLang::Num(n) => return op_to_vec_string(String::from("Num"), *n), + VecLang::LitVec(boxed_ids) => return value_number_boxed_args(String::from("LitVec"), boxed_ids, immd, md), + VecLang::Vec(boxed_ids) => return value_number_boxed_args(String::from("Vec"), boxed_ids, immd, md), + VecLang::List(boxed_ids) => return value_number_boxed_args(String::from("List"), boxed_ids, immd, md), + VecLang::DataVec(boxed_ids) => return value_number_boxed_args(String::from("DataVec"), boxed_ids, immd, md), + VecLang::NoOptVec(boxed_ids) => return value_number_boxed_args(String::from("NoOptVec"), boxed_ids, immd, md), + VecLang::VecAdd(args) => return value_number_args(String::from("VecAdd"), args, immd, md), + VecLang::VecMinus(args) => return value_number_args(String::from("VecMinus"), args, immd, md), + VecLang::VecMul(args) => return value_number_args(String::from("VecMul"), args, immd, md), + VecLang::VecDiv(args) => return value_number_args(String::from("VecDiv"), args, immd, md), + VecLang::Add(args) => return value_number_args(String::from("Add"), args, immd, md), + VecLang::Minus(args) => return value_number_args(String::from("Minus"), args, immd, md), + VecLang::Mul(args) => return value_number_args(String::from("Mul"), args, immd, md), + VecLang::Div(args) => return value_number_args(String::from("Div"), args,immd, md), + VecLang::Concat(args) => return value_number_args(String::from("Concat"), args,immd, md), + VecLang::VecMAC(args) => return value_number_args(String::from("VecMac"), args, immd, md), + + + // TODO: VecNeg, VecSqrt, VecSgn all have not been tested, need test cases. + // TODO: LLVM actually supports many more vector intrinsics, including + // vector sine/cosine instructions for floats. + VecLang::VecNeg(args) => return value_number_args(String::from("VecNeg"), args, immd, md), + VecLang::VecSqrt(args) => return value_number_args(String::from("VecSqrt"), args, immd, md), + // VecSgn compliant with c++ LibMath copysign function, which differs with sgn at x = 0. + VecLang::VecSgn(args) => return value_number_args(String::from("VecSgn"), args, immd, md), + VecLang::Sgn(args) => return value_number_args(String::from("Sgn"), args, immd, md), + VecLang::Sqrt(args) => return value_number_args(String::from("Sqrt"), args, immd, md), + VecLang::Neg(args) => return value_number_args(String::from("Neg"), args, immd, md), + VecLang::VecLoad(args) => return value_number_args(String::from("VecLoad"), args, immd, md), + VecLang::VecStore(args) => return value_number_args(String::from("VecStore"), args,immd, md), + VecLang::AlignedConsecVecLoad(args) => return value_number_args(String::from("AlignedConsecVecLoad"), args, immd, md), + VecLang::Shuffle(args) => return value_number_args(String::from("Shuffle"), args, immd, md), + VecLang::Join(args) => return value_number_args(String::from("Join"), args, immd, md), + VecLang::VecTwo(boxed_ids) => return value_number_boxed_args(String::from("VecTwo"), boxed_ids, immd, md), + VecLang::AlignedConsecVecLoad2(args) => return value_number_args(String::from("AlignedConsecVecLoad2"), args, immd, md), + } +} + /// Main function to optimize: Takes in a basic block of instructions, /// optimizes it, and then translates it to LLVM IR code, in place. @@ -203,7 +360,7 @@ pub fn optimize( let load_info = from_raw_parts(load_info, load_info_size); // llvm to egg - let (egg_expr, llvm2egg_metadata) = llvm_to_egg_main( + let (egg_expr, mut llvm2egg_metadata) = llvm_to_egg_main( chunk_llvm_instrs, restricted_llvm_instrs, run_egg, @@ -216,6 +373,48 @@ pub fn optimize( return false; } + let root_of_tree = egg_expr.as_ref().last().expect("There should be a root egg node"); + let immd = &ValueNumberingState { egg_nodes_vector: egg_expr.as_ref() }; + let md = &mut ValueNumberingMutableState { node2value: HashMap::new(), currvalue: 0 }; + let value_numbered_tree = value_number_store_tree(root_of_tree, immd, md); + let veclang2val = &md.node2value; + let mut val2veclang = HashMap::new(); + for (key, val) in veclang2val.iter() { + val2veclang.insert(val, key); + } + if print_opt { + eprintln!("This is the value tree"); + eprintln!("{:?}", value_numbered_tree); + } + if let Some((vectorization, old_veclang2val)) = get_vectorization(value_numbered_tree.clone()) { + let mut old_val2veclang = HashMap::new(); + for (old_k, old_v) in old_veclang2val.iter() { + old_val2veclang.insert(old_v, old_k); + } + let mut oldveclang2newveclang = HashMap::new(); + for (old_val, old_veclang) in old_val2veclang.iter() { + if val2veclang.contains_key(old_val) { + oldveclang2newveclang.insert((**old_veclang).clone(), (**val2veclang.get(old_val).expect("Key must exist")).clone()); + } + } + llvm2egg_metadata.template_enode2actual_enode = oldveclang2newveclang; + + if print_opt { + eprintln!("Current tree matches old tree: Using old vectorization. "); + } + // egg to llvm + egg_to_llvm_main( + vectorization, + &llvm2egg_metadata, + module, + context, + builder, + run_egg, + ); + + return true; + } + // optimization pass if print_opt { eprintln!("{}", egg_expr.pretty(10)); @@ -229,6 +428,14 @@ pub fn optimize( eprintln!("{}", best_egg_expr.pretty(10)); } + VECTORIZATION_MAP.push((value_numbered_tree, best_egg_expr.clone(), veclang2val.clone())); + // build identity map + let mut oldveclang2newveclang = HashMap::new(); + for (key, _) in veclang2val.iter() { + oldveclang2newveclang.insert((*key).clone(), (*key).clone()); + } + llvm2egg_metadata.template_enode2actual_enode = oldveclang2newveclang; + // egg to llvm egg_to_llvm_main( best_egg_expr, @@ -454,6 +661,7 @@ struct LLVM2EggState { start_instructions: Vec, start_ids: Vec, load_info: BTreeMap, + template_enode2actual_enode: HashMap, } /// Translates LLVM Arg to an Egg Argument Node @@ -803,6 +1011,8 @@ unsafe fn llvm_to_egg_main( let prior_translated_instructions: BTreeSet = BTreeSet::new(); + let template_enode2actual_enode = HashMap::new(); + // State Variable To Hold Maps During Translation let mut translation_metadata = LLVM2EggState { llvm2reg: llvm_instr2reg_node, @@ -814,6 +1024,7 @@ unsafe fn llvm_to_egg_main( start_instructions: start_instructions, start_ids: start_ids, load_info: load_info_map, + template_enode2actual_enode: template_enode2actual_enode, }; // Index of next node to translate @@ -871,45 +1082,75 @@ struct Egg2LLVMState<'a> { module: LLVMModuleRef, } +// unsafe fn gep_to_llvm(original_egg_node: &VecLang, md: &mut Egg2LLVMState) -> u32 { +// let egg_node = md.llvm2egg_metadata.template_enode2actual_enode.get(original_egg_node).expect("Reg to LLVM expects egg node in template2actual map."); +// match *egg_node { +// VecLang::Gep(gep_id) => gep_id, +// _ => { +// println!("{:?}", *egg_node); +// panic!("Non Gep nodes cannot be translated in gep_to_llvm.") +// } +// } +// } + /// Translates a Gep node to an ID that the node holds. This ID is matche dto /// a gep instruction in the get2gep map /// /// Used in conjunction with Load to LLVM and VecLoad to LLVM -unsafe fn gep_to_llvm(egg_node: &VecLang, _md: &mut Egg2LLVMState) -> u32 { - match *egg_node { +unsafe fn gep_to_llvm(original_egg_node: &VecLang, md: &mut Egg2LLVMState) -> LLVMValueRef { + let egg_node = md.llvm2egg_metadata.template_enode2actual_enode.get(original_egg_node).expect("Reg to LLVM expects egg node in template2actual map."); + let new_gep_id = match *egg_node { VecLang::Gep(gep_id) => gep_id, - _ => panic!("Non Gep nodes cannot be translated in gep_to_llvm."), - } + _ => { + println!("{:?}", *egg_node); + panic!("Non Gep nodes cannot be translated in gep_to_llvm.") + } + }; + let gep_instr = *md + .llvm2egg_metadata + .get2gep + .get(&new_gep_id) + .expect("Value of gep1 id should exist in get2gep"); + assert!(isa_floatptr(gep_instr)); + gep_instr } /// Translates a Load Egg Node back to an LLVM Load INstruction /// /// Assumes that every load is implicitly from a Float * / Single Level Float Pointer unsafe fn load_to_llvm(gep_id: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { - let original_gep_id = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep_id)], md); - let get2gep = &md.llvm2egg_metadata.get2gep; - for (gep_id, gep_instr) in get2gep.iter() { - if original_gep_id == *gep_id { - // assert!(isa_gep(*gep_instr) || isa_argument(*gep_instr)); - let new_load_instr = LLVMBuildLoad(md.builder, *gep_instr, b"\0".as_ptr() as *const _); - return new_load_instr; - } - } - panic!("Load2LLVM: Expected a successful lookup in get2gep, but cannot find Gep ID."); + // let original_gep_id = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep_id)], md); + // let get2gep = &md.llvm2egg_metadata.get2gep; + // for (gep_id, gep_instr) in get2gep.iter() { + // if original_gep_id == *gep_id { + // // assert!(isa_gep(*gep_instr) || isa_argument(*gep_instr)); + // let new_load_instr = LLVMBuildLoad(md.builder, *gep_instr, b"\0".as_ptr() as *const _); + // return new_load_instr; + // } + // } + // panic!("Load2LLVM: Expected a successful lookup in get2gep, but cannot find Gep ID."); + + let gep_instr = egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep_id)], md); + let new_load_instr = LLVMBuildLoad(md.builder, gep_instr, b"\0".as_ptr() as *const _); + return new_load_instr; } unsafe fn store_to_llvm(val_id: &Id, gep_id: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { - let original_gep_id = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep_id)], md); + // let original_gep_id = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep_id)], md); let llvm_val_instr = egg_to_llvm(&md.egg_nodes_vector[usize::from(*val_id)], md); - let get2gep = &md.llvm2egg_metadata.get2gep; - for (gep_id, gep_instr) in get2gep.iter() { - if original_gep_id == *gep_id { - // assert!(isa_gep(*gep_instr) || isa_argument(*gep_instr)); - let new_store_instr = LLVMBuildStore(md.builder, llvm_val_instr, *gep_instr); - return new_store_instr; - } - } - panic!("Store2LLVM: Expected a successful lookup in get2gep, but cannot find Gep ID."); + // let get2gep = &md.llvm2egg_metadata.get2gep; + // for (gep_id, gep_instr) in get2gep.iter() { + // if original_gep_id == *gep_id { + // // assert!(isa_gep(*gep_instr) || isa_argument(*gep_instr)) + // let new_store_instr = LLVMBuildStore(md.builder, llvm_val_instr, *gep_instr); + // return new_store_instr; + // } + // } + // panic!("Store2LLVM: Expected a successful lookup in get2gep, but cannot find Gep ID."); + + let gep_instr = egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep_id)], md); + let new_store_instr = LLVMBuildStore(md.builder, llvm_val_instr, gep_instr); + return new_store_instr; } unsafe fn aligned_consec_loadvec_to_llvm( @@ -918,16 +1159,17 @@ unsafe fn aligned_consec_loadvec_to_llvm( md: &mut Egg2LLVMState, ) -> LLVMValueRef { // New code to handle an aligned and consecutive vector load - let gep1_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); - let gep1_llvm_instr = md - .llvm2egg_metadata - .get2gep - .get(&gep1_id_val) - .expect("Value of gep1 id should exist in get2gep"); - let address_space = LLVMGetPointerAddressSpace(LLVMTypeOf(*gep1_llvm_instr)); + // let gep1_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); + // let gep1_llvm_instr = md + // .llvm2egg_metadata + // .get2gep + // .get(&gep1_id_val) + // .expect("Value of gep1 id should exist in get2gep"); + let gep1_llvm_instr = egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); + let address_space = LLVMGetPointerAddressSpace(LLVMTypeOf(gep1_llvm_instr)); let bitcase_scalar_to_vector_type = LLVMBuildBitCast( md.builder, - *gep1_llvm_instr, + gep1_llvm_instr, LLVMPointerType( LLVMVectorType(LLVMFloatTypeInContext(md.context), load_vector_width), address_space, @@ -952,10 +1194,10 @@ unsafe fn loadvec_to_llvm( md: &mut Egg2LLVMState, ) -> LLVMValueRef { // Set Opaque Pointer ness - let gep1_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); - let gep2_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep2_id)], md); - let gep3_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep3_id)], md); - let gep4_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep4_id)], md); + // let gep1_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); + // let gep2_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep2_id)], md); + // let gep3_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep3_id)], md); + // let gep4_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep4_id)], md); let mut base_data = get_shuf_vec_data(base_ids_vec, md); let mut offsets_data = get_shuf_vec_data(offsets_id_vec, md); @@ -963,26 +1205,31 @@ unsafe fn loadvec_to_llvm( base_data.dedup(); offsets_data.dedup(); - let gep1_llvm_instr = md - .llvm2egg_metadata - .get2gep - .get(&gep1_id_val) - .expect("Value of gep1 id should exist in get2gep"); - let gep2_llvm_instr = md - .llvm2egg_metadata - .get2gep - .get(&gep2_id_val) - .expect("Value of gep2 id should exist in get2gep"); - let gep3_llvm_instr = md - .llvm2egg_metadata - .get2gep - .get(&gep3_id_val) - .expect("Value of gep3 id should exist in get2gep"); - let gep4_llvm_instr = md - .llvm2egg_metadata - .get2gep - .get(&gep4_id_val) - .expect("Value of gep4 id should exist in get2gep"); + // let gep1_llvm_instr = md + // .llvm2egg_metadata + // .get2gep + // .get(&gep1_id_val) + // .expect("Value of gep1 id should exist in get2gep"); + // let gep2_llvm_instr = md + // .llvm2egg_metadata + // .get2gep + // .get(&gep2_id_val) + // .expect("Value of gep2 id should exist in get2gep"); + // let gep3_llvm_instr = md + // .llvm2egg_metadata + // .get2gep + // .get(&gep3_id_val) + // .expect("Value of gep3 id should exist in get2gep"); + // let gep4_llvm_instr = md + // .llvm2egg_metadata + // .get2gep + // .get(&gep4_id_val) + // .expect("Value of gep4 id should exist in get2gep"); + + let gep1_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); + let gep2_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep2_id)], md); + let gep3_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep3_id)], md); + let gep4_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep4_id)], md); // special case: when all the bases and offsets are the same, do a shuffle, whcih can represent a splat. Splats are fast operations // Build up shuf mask @@ -1131,31 +1378,36 @@ unsafe fn storevec_to_llvm( let llvm_val_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*val_vec_id)], md); // Set Opaque Pointer ness - let gep1_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); - let gep2_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep2_id)], md); - let gep3_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep3_id)], md); - let gep4_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep4_id)], md); - - let gep1_llvm_instr = md - .llvm2egg_metadata - .get2gep - .get(&gep1_id_val) - .expect("Value of gep1 id should exist in get2gep"); - let gep2_llvm_instr = md - .llvm2egg_metadata - .get2gep - .get(&gep2_id_val) - .expect("Value of gep2 id should exist in get2gep"); - let gep3_llvm_instr = md - .llvm2egg_metadata - .get2gep - .get(&gep3_id_val) - .expect("Value of gep3 id should exist in get2gep"); - let gep4_llvm_instr = md - .llvm2egg_metadata - .get2gep - .get(&gep4_id_val) - .expect("Value of gep4 id should exist in get2gep"); + // let gep1_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); + // let gep2_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep2_id)], md); + // let gep3_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep3_id)], md); + // let gep4_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep4_id)], md); + + // let gep1_llvm_instr = md + // .llvm2egg_metadata + // .get2gep + // .get(&gep1_id_val) + // .expect("Value of gep1 id should exist in get2gep"); + // let gep2_llvm_instr = md + // .llvm2egg_metadata + // .get2gep + // .get(&gep2_id_val) + // .expect("Value of gep2 id should exist in get2gep"); + // let gep3_llvm_instr = md + // .llvm2egg_metadata + // .get2gep + // .get(&gep3_id_val) + // .expect("Value of gep3 id should exist in get2gep"); + // let gep4_llvm_instr = md + // .llvm2egg_metadata + // .get2gep + // .get(&gep4_id_val) + // .expect("Value of gep4 id should exist in get2gep"); + + let gep1_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); + let gep2_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep2_id)], md); + let gep3_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep3_id)], md); + let gep4_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep4_id)], md); // New code to handle a vector store // Currently, this is the only type of store that can be generated because stores are not split. @@ -1254,9 +1506,10 @@ unsafe fn storevec_to_llvm( LLVMBuildCall(md.builder, func, args, 4, b"\0".as_ptr() as *const _) } -unsafe fn arg_to_llvm(egg_node: &VecLang, md: &mut Egg2LLVMState) -> LLVMValueRef { +unsafe fn arg_to_llvm(original_egg_node: &VecLang, md: &mut Egg2LLVMState) -> LLVMValueRef { // TODO: Make More Efficient with BTREEMAP? let llvm2arg = &md.llvm2egg_metadata.llvm2arg; + let egg_node = md.llvm2egg_metadata.template_enode2actual_enode.get(original_egg_node).expect("Arg to LLVM expects egg node in template2actual map."); for (llvm_instr, arg_node) in llvm2arg.iter() { // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. if arg_node == egg_node { @@ -1271,24 +1524,25 @@ unsafe fn arg_to_llvm(egg_node: &VecLang, md: &mut Egg2LLVMState) -> LLVMValueRe } unsafe fn reg_to_llvm( - egg_node: &VecLang, - translation_metadata: &mut Egg2LLVMState, + original_egg_node: &VecLang, + md: &mut Egg2LLVMState, ) -> LLVMValueRef { // TODO: Make More Efficient with BTREEMAP? - let llvm2reg = &translation_metadata.llvm2egg_metadata.llvm2reg; + let llvm2reg = &md.llvm2egg_metadata.llvm2reg; + let egg_node = md.llvm2egg_metadata.template_enode2actual_enode.get(original_egg_node).expect("Reg to LLVM expects egg node in template2actual map."); for (llvm_instr, reg_node) in llvm2reg.iter() { // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. if reg_node == egg_node { assert!(!isa_argument(*llvm_instr)); // do not clone an instruction translated earlier in the same chunk - if translation_metadata + if md .prior_translated_nodes .contains(&*llvm_instr) { return *llvm_instr; } // do not clone an instruction translated in a prior basic block / prior chunk - if !translation_metadata + if !md .llvm2egg_metadata .instructions_in_chunk .contains(&*llvm_instr) @@ -1296,8 +1550,8 @@ unsafe fn reg_to_llvm( return *llvm_instr; } let new_instr = LLVMInstructionClone(*llvm_instr); - LLVMInsertIntoBuilder(translation_metadata.builder, new_instr); - translation_metadata + LLVMInsertIntoBuilder(md.builder, new_instr); + md .prior_translated_nodes .insert(new_instr); return new_instr; @@ -1738,7 +1992,7 @@ unsafe fn egg_to_llvm( panic!("Get was found. Egg to LLVM Translation does not handle get nodes.") } VecLang::Gep(..) => { - panic!("Gep was found. Egg to LLVM Translation does not handle gep nodes.") + gep_to_llvm(egg_node, translation_metadata) } VecLang::Load([gep_id, _, _]) => { load_to_llvm(gep_id, translation_metadata) diff --git a/src/dios-egraphs/Diospyros/stencil-2d-results.txt b/src/dios-egraphs/Diospyros/stencil-2d-results.txt new file mode 100644 index 00000000..2e53f347 --- /dev/null +++ b/src/dios-egraphs/Diospyros/stencil-2d-results.txt @@ -0,0 +1,8 @@ +stencil2d opt: 288115 milliseconds elapsed over 1000000000 iterations total +stencil2d baseline: 355675 milliseconds elapsed over 1000000000 iterations total + +conv2d opt: 43573 milliseconds elapsed over 1000000000 iterations total +conv2d baseline: 66823 milliseconds elapsed over 1000000000 iterations total + +qprod opt: 5684 milliseconds elapsed over 1000000000 iterations total +qprod baselibe: 5974 milliseconds elapsed over 1000000000 iterations total From 1c5d84ad05a6772e2ab5d9817469b6acf0492ea5 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 5 Jul 2023 21:44:20 -0400 Subject: [PATCH 122/143] add new constructs in veclang, though permutestore and alignedconsecsearcher have bugs and NEED TO BE FIXED --- src/dios-egraphs/src/alignconsecsearcher.rs | 2 +- src/dios-egraphs/src/cost.rs | 19 ++- src/dios-egraphs/src/lib.rs | 1 + src/dios-egraphs/src/permutestore.rs | 55 ++++++++ src/dios-egraphs/src/rules.rs | 137 +++++++++++++++++--- src/dios-egraphs/src/veclang.rs | 7 +- 6 files changed, 196 insertions(+), 25 deletions(-) create mode 100644 src/dios-egraphs/src/permutestore.rs diff --git a/src/dios-egraphs/src/alignconsecsearcher.rs b/src/dios-egraphs/src/alignconsecsearcher.rs index babefa3b..e3188450 100644 --- a/src/dios-egraphs/src/alignconsecsearcher.rs +++ b/src/dios-egraphs/src/alignconsecsearcher.rs @@ -146,7 +146,7 @@ impl> Applier for PermuteLoad { } let (_, _, first_base_id) = offsets[0]; let aligned_consec_load_vec = egraph.add(VecLang::AlignedConsecVecLoad([first_base_id])); - let shuffle_shuf_arg = egraph.add(VecLang::Vec(shuffle_ids_vec.into_boxed_slice())); + let shuffle_shuf_arg = egraph.add(VecLang::DataVec(shuffle_ids_vec.into_boxed_slice())); let shuffle_vec_op = egraph.add(VecLang::Shuffle([ aligned_consec_load_vec, shuffle_shuf_arg, diff --git a/src/dios-egraphs/src/cost.rs b/src/dios-egraphs/src/cost.rs index 5ce004f4..bb4f08af 100644 --- a/src/dios-egraphs/src/cost.rs +++ b/src/dios-egraphs/src/cost.rs @@ -14,8 +14,20 @@ impl CostFunction for VecCostFn<'_> { where C: FnMut(Id) -> Self::Cost, { + // const NO_OPTIMIZATION: f64 = 0.0; + // const NO_COST: f64 = 0.0; + // const ALIGNED_CONSEC_VECTORIZED_MEMORY_ACCESS: f64 = -1.0; + // const VECTORIZED_MEMORY_ACCESS: f64 = 0.0001; + // const LITERAL: f64 = 0.001; + // const STRUCTURE: f64 = 0.1; + // const VEC_OP: f64 = 1.; + // const OP: f64 = 1.; + // const BIG: f64 = 100.0; + + // New cost model const NO_OPTIMIZATION: f64 = 0.0; - const ALIGNED_CONSEC_VECTORIZED_MEMORY_ACCESS: f64 = -2.0; + const NO_COST: f64 = 0.0; + const ALIGNED_CONSEC_VECTORIZED_MEMORY_ACCESS: f64 = -1.0; const VECTORIZED_MEMORY_ACCESS: f64 = 0.0001; const LITERAL: f64 = 0.001; const STRUCTURE: f64 = 0.1; @@ -29,6 +41,7 @@ impl CostFunction for VecCostFn<'_> { // Vectorized Memory Accesses are cheaper than individual memory loads and stores // Note: This assumes that masked-gathers or masked-scattters to vectors or memory // are implemented on the target, and are cheap, according to the LLVM cost model + VecLang::AlignedConsecVecLoad2(..) => ALIGNED_CONSEC_VECTORIZED_MEMORY_ACCESS * 0.5, VecLang::AlignedConsecVecLoad(..) => ALIGNED_CONSEC_VECTORIZED_MEMORY_ACCESS, VecLang::VecLoad(..) => VECTORIZED_MEMORY_ACCESS, VecLang::VecStore(..) => ALIGNED_CONSEC_VECTORIZED_MEMORY_ACCESS, @@ -49,7 +62,7 @@ impl CostFunction for VecCostFn<'_> { VecLang::Concat(..) => STRUCTURE, // Vectors are cheap if they have literal values - VecLang::Vec(vals) => { + VecLang::Vec(vals) | VecLang::VecTwo(vals) => { // For now, workaround to determine if children are num, symbol, // or get let non_literals = vals.iter().any(|&x| costs(x) > 3. * LITERAL); @@ -59,6 +72,7 @@ impl CostFunction for VecCostFn<'_> { STRUCTURE } } + VecLang::DataVec(..) => NO_COST, VecLang::LitVec(..) => LITERAL, // But scalar and vector ops cost something @@ -80,6 +94,7 @@ impl CostFunction for VecCostFn<'_> { VecLang::VecSqrt(..) => VEC_OP, VecLang::VecSgn(..) => VEC_OP, VecLang::Shuffle(..) => VEC_OP, + VecLang::Join(..) => VEC_OP, _ => VEC_OP, }; enode.fold(op_cost, |sum, id| sum + costs(id)) diff --git a/src/dios-egraphs/src/lib.rs b/src/dios-egraphs/src/lib.rs index 99887dc2..211263fb 100644 --- a/src/dios-egraphs/src/lib.rs +++ b/src/dios-egraphs/src/lib.rs @@ -3,6 +3,7 @@ pub mod binopsearcher; pub mod config; pub mod cost; pub mod macsearcher; +pub mod permutestore; pub mod rewriteconcats; pub mod rules; pub mod searchutils; diff --git a/src/dios-egraphs/src/permutestore.rs b/src/dios-egraphs/src/permutestore.rs new file mode 100644 index 00000000..0d6d5bad --- /dev/null +++ b/src/dios-egraphs/src/permutestore.rs @@ -0,0 +1,55 @@ +use crate::veclang::VecLang; +use egg::*; +use itertools::Itertools; + +/// Search for permutations of sequences of Loads and Stores that are Aligned and Consecutive +/// +/// This module creates an Applier, which attempts to find successful permutations of loads ands stores to be aligned and consecutive + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PermuteStore { + pub a0: Var, + pub a1: Var, + pub a2: Var, + pub a3: Var, + pub b0: Var, + pub b1: Var, + pub b2: Var, + pub b3: Var, +} + +impl> Applier for PermuteStore { + /// We generate all permutations of the vecstore + fn apply_one(&self, egraph: &mut EGraph, matched_id: Id, subst: &Subst) -> Vec { + let a0_id: Id = subst[self.a0]; + let a1_id: Id = subst[self.a1]; + let a2_id: Id = subst[self.a2]; + let a3_id: Id = subst[self.a3]; + let base0_id: Id = subst[self.a0]; + let base1_id: Id = subst[self.a1]; + let base2_id: Id = subst[self.a2]; + let base3_id: Id = subst[self.a3]; + + let original_list = vec![ + (a0_id, base0_id), + (a1_id, base1_id), + (a2_id, base2_id), + (a3_id, base3_id), + ]; + let perms = original_list.iter().permutations(4); + let mut new_vec_stores = vec![]; + for perm in perms { + let gep_vec_id = egraph.add(VecLang::Vec( + vec![perm[0].0, perm[1].0, perm[2].0, perm[3].0].into_boxed_slice(), + )); + let vec_store_node = + VecLang::VecStore([gep_vec_id, perm[0].1, perm[1].1, perm[2].1, perm[3].1]); + let vec_store_id = egraph.add(vec_store_node); + + // add in the shuffle + new_vec_stores.push(vec_store_id); + } + + new_vec_stores + } +} diff --git a/src/dios-egraphs/src/rules.rs b/src/dios-egraphs/src/rules.rs index 4c7cc3bc..cc0d9356 100644 --- a/src/dios-egraphs/src/rules.rs +++ b/src/dios-egraphs/src/rules.rs @@ -8,6 +8,7 @@ use crate::{ config::*, cost::VecCostFn, macsearcher::build_mac_rule, + permutestore::*, searchutils::*, veclang::{EGraph, VecLang}, }; @@ -127,6 +128,62 @@ pub fn build_litvec_rule() -> Rewrite { if is_all_same_memory_or_zero(&mem_vars)) } +fn memory_is_aligned_and_consec2( + var1: &'static str, + var2: &'static str, + var3: &'static str, + var4: &'static str, +) -> impl Fn(&mut EGraph, Id, &Subst) -> bool { + let var1: Var = var1.parse().unwrap(); + let var2: Var = var2.parse().unwrap(); + let var3: Var = var3.parse().unwrap(); + let var4: Var = var4.parse().unwrap(); + move |egraph, _, subst| unsafe { + let mut first_base = -10; + for e in egraph[subst[var1]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + first_base = n; + } + } + assert!(first_base != -10); + let mut second_base = -10; + for e in egraph[subst[var2]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + second_base = n; + } + } + assert!(second_base != -10); + + if !(first_base == second_base) { + return false; + } + + let mut first_offset = -10; + for e in egraph[subst[var3]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + first_offset = n; + } + } + assert!(first_offset != -10); + let mut second_offset = -10; + for e in egraph[subst[var4]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + second_offset = n; + } + } + assert!(second_offset != -10); + + if !(first_offset + 1 == second_offset) { + return false; + } + if !(first_offset % 4 == 0) { + return false; + } + + return true; + } +} + // This returns a function that implements Condition fn memory_is_aligned_and_consec( var1: &'static str, @@ -258,13 +315,27 @@ pub fn rules(no_ac: bool, no_vec: bool) -> Vec> { // Aligned Consec Load rule rw!("vec-load-aligned-consec"; "(Vec (Load ?a0 ?b0 ?o0) (Load ?a1 ?b1 ?o1) (Load ?a2 ?b2 ?o2) (Load ?a3 ?b3 ?o3))" => "(AlignedConsecVecLoad ?a0)" if memory_is_aligned_and_consec("?b0", "?b1", "?b2", "?b3", "?o0", "?o1", "?o2", "?o3")), // Load load fusion rule - rw!("vec-load-Loads"; "(Vec (Load ?a0 ?b0 ?o0) (Load ?a1 ?b1 ?o1) (Load ?a2 ?b2 ?o2) (Load ?a3 ?b3 ?o3))" => "(VecLoad ?a0 ?a1 ?a2 ?a3)"), + rw!("vec-load-loads"; "(Vec (Load ?a0 ?b0 ?o0) (Load ?a1 ?b1 ?o1) (Load ?a2 ?b2 ?o2) (Load ?a3 ?b3 ?o3))" => "(VecLoad ?a0 ?a1 ?a2 ?a3 (DataVec ?b0 ?b1 ?b2 ?b3) (DataVec ?o0 ?o1 ?o2 ?o3))"), // Set store fusion rule rw!("vec-store-sets"; "(Vec (Store ?a0 ?b0) (Store ?a1 ?b1) (Store ?a2 ?b2) (Store ?a3 ?b3))" => "(VecStore (Vec ?a0 ?a1 ?a2 ?a3) ?b0 ?b1 ?b2 ?b3)"), + + // rw!("vec-store-permutations"; "(VecStore (Vec ?a0 ?a1 ?a2 ?a3) ?b0 ?b1 ?b2 ?b3)" => { PermuteStore { + // a0: "?a0".parse().unwrap(), + // a1: "?a1".parse().unwrap(), + // a2: "?a2".parse().unwrap(), + // a3: "?a3".parse().unwrap(), + // b0: "?b0".parse().unwrap(), + // b1: "?b1".parse().unwrap(), + // b2: "?b2".parse().unwrap(), + // b3: "?b3".parse().unwrap(), + // }}), + + // also should have split for into vecstore2 as well + // Special MAC fusion rule - rw!("vec-mac-add-mul"; - "(VecAdd ?v0 (VecMul ?v1 ?v2))" - => "(VecMAC ?v0 ?v1 ?v2)"), + // rw!("vec-mac-add-mul"; + // "(VecAdd ?v0 (VecMul ?v1 ?v2))" + // => "(VecMAC ?v0 ?v1 ?v2)"), // Custom searchers build_unop_rule("neg", "VecNeg"), build_unop_rule("sqrt", "VecSqrt"), @@ -273,7 +344,10 @@ pub fn rules(no_ac: bool, no_vec: bool) -> Vec> { build_binop_or_zero_rule("+", "VecAdd"), build_binop_or_zero_rule("*", "VecMul"), build_binop_or_zero_rule("-", "VecMinus"), - build_mac_rule(), + // build_mac_rule(), + + // rw!("intros-join"; "(Vec ?a ?b ?c ?d)" => "(Join (VecTwo ?a ?b) (VecTwo ?c ?d))"), + rw!("intros-aligned-vec-load2"; "(VecTwo (Load ?a0 ?b0 ?o0) (Load ?a1 ?b1 ?o1))" => "(AlignedConsecVecLoad2 ?a0)" if memory_is_aligned_and_consec2("?b0", "?b1", "?o0", "?o1")), ]); } else { eprintln!("Skipping vector rules") @@ -289,25 +363,46 @@ pub fn rules(no_ac: bool, no_vec: bool) -> Vec> { ]); } + // // Context Rules + // rules.extend(vec![ + // // rw!("commute-add-context"; "(Vec (+ ?a0 ?b0) (- ?a1 ?b1) (+ ?a2 ?b2) (- ?a3 ?b3))" => "(Shuffle (Vec (+ ?a0 ?b0) (+ ?a2 ?b2) (- ?a1 ?b1) (- ?a3 ?b3)) (DataVec 0 2 1 3))"), + // // rw!("commute-add-context"; "(Vec (- ?a0 ?b0) (+ ?a1 ?b1) (- ?a2 ?b2) (+ ?a3 ?b3))" => "(Shuffle (Vec (- ?a0 ?b0) (- ?a2 ?b2) (+ ?a1 ?b1) (+ ?a3 ?b3)) (DataVec 0 2 1 3))"), + + // rw!("commute-add-context"; "(Vec (+ ?a0 ?b0) (* ?a1 ?b1) (+ ?a2 ?b2) (* ?a3 ?b3))" => "(Shuffle (Vec (+ ?a0 ?b0) (+ ?a2 ?b2) (* ?a1 ?b1) (* ?a3 ?b3)) (DataVec 0 2 1 3))"), + // rw!("commute-add-context"; "(Vec (* ?a0 ?b0) (+ ?a1 ?b1) (* ?a2 ?b2) (+ ?a3 ?b3))" => "(Shuffle (Vec (* ?a0 ?b0) (* ?a2 ?b2) (+ ?a1 ?b1) (+ ?a3 ?b3)) (DataVec 0 2 1 3))"), + // ]); + // Data Movement Rules // shuffle rules rules.extend(vec![ - // Basic associativity/commutativity/identities - // rw!("shuffle-op"; "(+ ?a ?b)" => "(+ ?b ?a)"), - rw!("shuffle-load-vec"; "(Vec (Load ?a0 ?b0 ?o0) (Load ?a1 ?b1 ?o1) (Load ?a2 ?b2 ?o2) (Load ?a3 ?b3 ?o3))" => { PermuteLoad { - a0: "?a0".parse().unwrap(), - a1: "?a1".parse().unwrap(), - a2: "?a2".parse().unwrap(), - a3: "?a3".parse().unwrap(), - b0: "?b0".parse().unwrap(), - b1: "?b1".parse().unwrap(), - b2: "?b2".parse().unwrap(), - b3: "?b3".parse().unwrap(), - o0: "?o0".parse().unwrap(), - o1: "?o1".parse().unwrap(), - o2: "?o2".parse().unwrap(), - o3: "?o3".parse().unwrap(), - }}), + // rw!("vec2-permutation"; "(VecTwo ?a ?b)" => "(VecTwo ?b ?a)"), + + // The below commented out rules are completely wrong and should never occur or be used. + // rw!("shuffle-op1"; "(Vec (VecAdd ?a ?b) (VecMinus ?c ?d) (VecAdd ?e ?f) (VecMinus ?g ?h))" => "(Shuffle (Vec (VecAdd ?a ?b) (VecAdd ?e ?f) (VecMinus ?c ?d) (VecMinus ?g ?h)) (DataVec 0 2 1 3))"), + + + // rw!("shuffle-op-A1M1A2M2-A1A2M1M2"; "(Vec (VecAdd ?a ?b) (VecMul ?c ?d) (VecAdd ?e ?f) (VecMul ?g ?h))" => "(Shuffle (Vec (VecAdd ?a ?b) (VecAdd ?e ?f) (VecMul ?c ?d) (VecMul ?g ?h)) (DataVec 0 2 1 3))"), + // rw!("shuffle-op-A1M1A2M2-A2A1M1M2"; "(Vec (VecAdd ?a ?b) (VecMul ?c ?d) (VecAdd ?e ?f) (VecMul ?g ?h))" => "(Shuffle (Vec (VecAdd ?e ?f) (VecAdd ?a ?b) (VecMul ?c ?d) (VecMul ?g ?h)) (DataVec 2 0 1 3))"), + // rw!("shuffle-op-A1M1A2M2-A1A2M2M1"; "(Vec (VecAdd ?a ?b) (VecMul ?c ?d) (VecAdd ?e ?f) (VecMul ?g ?h))" => "(Shuffle (Vec (VecAdd ?a ?b) (VecAdd ?e ?f) (VecMul ?g ?h) (VecMul ?c ?d)) (DataVec 0 2 3 1))"), + // rw!("shuffle-op-A1M1A2M2-A2A1M2M1"; "(Vec (VecAdd ?a ?b) (VecMul ?c ?d) (VecAdd ?e ?f) (VecMul ?g ?h))" => "(Shuffle (Vec (VecAdd ?e ?f) (VecAdd ?a ?b) (VecMul ?g ?h) (VecMul ?c ?d)) (DataVec 2 0 3 1))"), + // rw!("shuffle-op4"; "(Vec (VecAdd ?a ?b) (VecMul ?c ?d) (VecAdd ?e ?f) (VecMul ?g ?h))" => "(Shuffle (Vec (VecAdd ?e ?f) (VecAdd ?a ?b) (VecMul ?c ?d) (VecMul ?g ?h)) (DataVec 2 0 1 3))"), + // rw!("shuffle-op5"; "(Vec (VecMul ?a ?b) (VecAdd ?c ?d) (VecMul ?e ?f) (VecAdd ?g ?h))" => "(Shuffle (Vec (VecMul ?a ?b) (VecMul ?e ?f) (VecAdd ?c ?d) (VecAdd ?g ?h)) (DataVec 0 2 1 3))"), + + + // rw!("shuffle-load-vec"; "(Vec (Load ?a0 ?b0 ?o0) (Load ?a1 ?b1 ?o1) (Load ?a2 ?b2 ?o2) (Load ?a3 ?b3 ?o3))" => { PermuteLoad { + // a0: "?a0".parse().unwrap(), + // a1: "?a1".parse().unwrap(), + // a2: "?a2".parse().unwrap(), + // a3: "?a3".parse().unwrap(), + // b0: "?b0".parse().unwrap(), + // b1: "?b1".parse().unwrap(), + // b2: "?b2".parse().unwrap(), + // b3: "?b3".parse().unwrap(), + // o0: "?o0".parse().unwrap(), + // o1: "?o1".parse().unwrap(), + // o2: "?o2".parse().unwrap(), + // o3: "?o3".parse().unwrap(), + // }}), ]); // split vec rules diff --git a/src/dios-egraphs/src/veclang.rs b/src/dios-egraphs/src/veclang.rs index 94e8730b..9bbfe116 100644 --- a/src/dios-egraphs/src/veclang.rs +++ b/src/dios-egraphs/src/veclang.rs @@ -41,7 +41,9 @@ define_language! { "List" = List(Box<[Id]>), // Vectors have width elements + "VecTwo" = VecTwo(Box<[Id]>), "Vec" = Vec(Box<[Id]>), + "DataVec" = DataVec(Box<[Id]>), // Vectors have width elements, not to be optimized (for testing purposes) "NoOptVec" = NoOptVec(Box<[Id]>), @@ -72,14 +74,17 @@ define_language! { // MAC takes 3 lists: acc, v1, v2 "VecMAC" = VecMAC([Id; 3]), - "VecLoad" = VecLoad([Id; 4]), + "VecLoad" = VecLoad([Id; 6]), + "AlignedConsecVecLoad2" = AlignedConsecVecLoad2([Id; 1]), "AlignedConsecVecLoad" = AlignedConsecVecLoad([Id; 1]), "VecStore" = VecStore([Id; 5]), "Shuffle" = Shuffle([Id; 2]), + "Join" = Join([Id; 2]), + // Info specific to register // RegInfo(egg::Symbol), From 277f63e779fa6411738bc1e7bfe00cef1befa2ec Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Wed, 5 Jul 2023 21:46:44 -0400 Subject: [PATCH 123/143] revert fft back to while loops, remove some dead code for the gep_to_llvm function --- src/dios-egraphs/Diospyros/c-tests/fft.c | 67 +++--------------------- src/dios-egraphs/Diospyros/src/lib.rs | 11 ---- 2 files changed, 6 insertions(+), 72 deletions(-) diff --git a/src/dios-egraphs/Diospyros/c-tests/fft.c b/src/dios-egraphs/Diospyros/c-tests/fft.c index cb0cb66d..aee3b33d 100644 --- a/src/dios-egraphs/Diospyros/c-tests/fft.c +++ b/src/dios-egraphs/Diospyros/c-tests/fft.c @@ -13,10 +13,9 @@ #define MAX_FOR_LOOP_ITERATIONS 1000 -void fft_for_loop_version(float real_in[SIZE], float img_in[SIZE], - float real_twid_in[SIZE / 2], - float img_twid_in[SIZE / 2], float real_out[SIZE], - float img_out[SIZE]) { +void fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / 2], + float img_twid_in[SIZE / 2], float real_out[SIZE], + float img_out[SIZE]) { int even = 0; int odd = 0; int log = 0; @@ -29,17 +28,9 @@ void fft_for_loop_version(float real_in[SIZE], float img_in[SIZE], img_out[i] = img_in[i]; } -#pragma unroll - for (int i = 0; i < MAX_FOR_LOOP_ITERATIONS; i++) { - if (span == 0) { - break; - } + while (span != 0) { odd = span; -#pragma unroll - for (int j = 0; j < MAX_FOR_LOOP_ITERATIONS; j++) { - if (odd >= SIZE) { - break; - } + while (odd < SIZE) { odd = odd | span; even = odd ^ span; @@ -66,51 +57,6 @@ void fft_for_loop_version(float real_in[SIZE], float img_in[SIZE], } } -// void fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / -// 2], -// float img_twid_in[SIZE / 2], float real_out[SIZE], -// float img_out[SIZE]) { -// int even = 0; -// int odd = 0; -// int log = 0; -// int rootindex = 0; -// int span = SIZE >> 1; -// float temp = 0; - -// for (int i = 0; i < SIZE; i++) { -// real_out[i] = real_in[i]; -// img_out[i] = img_in[i]; -// } - -// while (span != 0) { -// odd = span; -// while (odd < SIZE) { -// odd = odd | span; -// even = odd ^ span; - -// temp = real_out[even] + real_out[odd]; -// real_out[odd] = real_out[even] - real_out[odd]; -// real_out[even] = temp; - -// temp = img_out[even] + img_out[odd]; -// img_out[odd] = img_out[even] - img_out[odd]; -// img_out[even] = temp; - -// rootindex = (even << log) & (SIZE - 1); -// if (rootindex > 0) { -// temp = real_twid_in[rootindex] * real_out[odd] - -// img_twid_in[rootindex] * img_out[odd]; -// img_out[odd] = real_twid_in[rootindex] * img_out[odd] + -// img_twid_in[rootindex] * real_out[odd]; -// real_out[odd] = temp; -// } -// odd += 1; -// } -// span >>= 1; -// log += 1; -// } -// } - void no_opt_fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / 2], float img_twid_in[SIZE / 2], float real_out[SIZE], float img_out[SIZE]) { @@ -204,8 +150,7 @@ int main(void) { expected_img_out[i] = n; } - fft_for_loop_version(real_in, img_in, real_twid_in, img_twid_in, real_out, - img_out); + fft(real_in, img_in, real_twid_in, img_twid_in, real_out, img_out); no_opt_fft(expected_real_in, expected_img_in, expected_real_twid_in, expected_img_twid_in, expected_real_out, expected_img_out); diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 8d1dd5e1..8e951622 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -1082,17 +1082,6 @@ struct Egg2LLVMState<'a> { module: LLVMModuleRef, } -// unsafe fn gep_to_llvm(original_egg_node: &VecLang, md: &mut Egg2LLVMState) -> u32 { -// let egg_node = md.llvm2egg_metadata.template_enode2actual_enode.get(original_egg_node).expect("Reg to LLVM expects egg node in template2actual map."); -// match *egg_node { -// VecLang::Gep(gep_id) => gep_id, -// _ => { -// println!("{:?}", *egg_node); -// panic!("Non Gep nodes cannot be translated in gep_to_llvm.") -// } -// } -// } - /// Translates a Gep node to an ID that the node holds. This ID is matche dto /// a gep instruction in the get2gep map /// From 2c843fac00e95315aa4398ba14a49d5e00a57b4c Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 6 Jul 2023 01:58:06 -0400 Subject: [PATCH 124/143] add the other template test --- .../qr-decomp-fixed-size-debug-template.c | 2 ++ src/dios-egraphs/Diospyros/diospyros.cpp | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) rename src/dios-egraphs/Diospyros/{fail-tests => c-tests}/qr-decomp-fixed-size-debug-template.c (98%) diff --git a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-fixed-size-debug-template.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-fixed-size-debug-template.c similarity index 98% rename from src/dios-egraphs/Diospyros/fail-tests/qr-decomp-fixed-size-debug-template.c rename to src/dios-egraphs/Diospyros/c-tests/qr-decomp-fixed-size-debug-template.c index 889076ac..a5c7f2f4 100644 --- a/src/dios-egraphs/Diospyros/fail-tests/qr-decomp-fixed-size-debug-template.c +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-fixed-size-debug-template.c @@ -243,12 +243,14 @@ int main(void) { for (int j = 0; j < SIZE; j++) { printf("Q Output: %f\n", Q[i * SIZE + j]); printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(Q[i * SIZE + j] == expectedQ[i * SIZE + j]); } } for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { printf("R Output: %f\n", R[i * SIZE + j]); printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(R[i * SIZE + j] == expectedR[i * SIZE + j]); } } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index 7bc1a7cc..9fb422c7 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -698,7 +698,6 @@ using ad_trees_t = std::vector; std::pair recurse_llvm( Value *value, std::set chunk_instrs, std::set basic_block_instrs, bool not_for_mem_constraint) { - errs() << *value << "\n"; // Constants if (isa(value)) { // DO not add constant, if i recall, constants are not llvm From 042f36448dc6a7a2fb5118a596225eae1d647d0c Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 6 Jul 2023 02:32:32 -0400 Subject: [PATCH 125/143] add 12 more basic tests --- .../Diospyros/c-tests/all-arith-ops.c | 74 ++++++++++++++++ .../Diospyros/c-tests/branching-add.c | 74 ++++++++++++++++ .../Diospyros/c-tests/branching-mul.c | 74 ++++++++++++++++ .../Diospyros/c-tests/branching-sub.c | 74 ++++++++++++++++ .../Diospyros/c-tests/multi-loops.c | 88 +++++++++++++++++++ .../Diospyros/c-tests/rar-dependency.c | 70 +++++++++++++++ .../Diospyros/c-tests/raw-dependency.c | 70 +++++++++++++++ .../Diospyros/c-tests/reduction-add.c | 86 ++++++++++++++++++ .../Diospyros/c-tests/reduction-mul.c | 86 ++++++++++++++++++ .../Diospyros/c-tests/reduction-sub.c | 86 ++++++++++++++++++ .../Diospyros/c-tests/war-dependency.c | 70 +++++++++++++++ .../Diospyros/c-tests/waw-dependency.c | 70 +++++++++++++++ 12 files changed, 922 insertions(+) create mode 100644 src/dios-egraphs/Diospyros/c-tests/all-arith-ops.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/branching-add.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/branching-mul.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/branching-sub.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/multi-loops.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/rar-dependency.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/raw-dependency.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/reduction-add.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/reduction-mul.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/reduction-sub.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/war-dependency.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/waw-dependency.c diff --git a/src/dios-egraphs/Diospyros/c-tests/all-arith-ops.c b/src/dios-egraphs/Diospyros/c-tests/all-arith-ops.c new file mode 100644 index 00000000..a3b81c76 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/all-arith-ops.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] * b_in[i] + (c_in[i] - a_in[i]); + } else { + d_out[i] = a_in[i] * c_in[i] + (c_in[i] - a_in[i]); + } + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] * b_in[i] + (c_in[i] - a_in[i]); + } else { + d_out[i] = a_in[i] * c_in[i] + (c_in[i] - a_in[i]); + } + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/branching-add.c b/src/dios-egraphs/Diospyros/c-tests/branching-add.c new file mode 100644 index 00000000..229453aa --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/branching-add.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] + b_in[i]; + } else { + d_out[i] = a_in[i] + c_in[i]; + } + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] + b_in[i]; + } else { + d_out[i] = a_in[i] + c_in[i]; + } + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/branching-mul.c b/src/dios-egraphs/Diospyros/c-tests/branching-mul.c new file mode 100644 index 00000000..6c42c7ed --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/branching-mul.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] * b_in[i]; + } else { + d_out[i] = a_in[i] * c_in[i]; + } + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] * b_in[i]; + } else { + d_out[i] = a_in[i] * c_in[i]; + } + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/branching-sub.c b/src/dios-egraphs/Diospyros/c-tests/branching-sub.c new file mode 100644 index 00000000..bf7914ca --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/branching-sub.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] + b_in[i] - c_in[i]; + } else { + d_out[i] = a_in[i] + c_in[i] - b_in[i]; + } + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] + b_in[i] - c_in[i]; + } else { + d_out[i] = a_in[i] + c_in[i] - b_in[i]; + } + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/multi-loops.c b/src/dios-egraphs/Diospyros/c-tests/multi-loops.c new file mode 100644 index 00000000..e4790b9d --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/multi-loops.c @@ -0,0 +1,88 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] * b_in[i]; + } else { + d_out[i] = a_in[i] * c_in[i]; + } + } + for (int i = 0; i < ROWS; i++) { + if (i % 5 == 0) { + d_out[i] = a_in[i] - b_in[i]; + } else { + d_out[i] = a_in[i] + c_in[i]; + } + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] * b_in[i]; + } else { + d_out[i] = a_in[i] * c_in[i]; + } + } + for (int i = 0; i < ROWS; i++) { + if (i % 5 == 0) { + d_out[i] = a_in[i] - b_in[i]; + } else { + d_out[i] = a_in[i] + c_in[i]; + } + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/rar-dependency.c b/src/dios-egraphs/Diospyros/c-tests/rar-dependency.c new file mode 100644 index 00000000..f43e02b3 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/rar-dependency.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + d_out[0] = a_in[0] + a_in[0] * c_in[0]; + d_out[1] = d_out[0] - c_in[0]; + d_out[2] = a_in[0] + a_in[2] * c_in[2]; + d_out[3] = d_out[2] - c_in[3]; + d_out[4] = a_in[2] + b_in[2] * c_in[2]; +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + d_out[0] = a_in[0] + a_in[0] * c_in[0]; + d_out[1] = d_out[0] - c_in[0]; + d_out[2] = a_in[0] + a_in[2] * c_in[2]; + d_out[3] = d_out[2] - c_in[3]; + d_out[4] = a_in[2] + b_in[2] * c_in[2]; +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = d_out[i]; + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/raw-dependency.c b/src/dios-egraphs/Diospyros/c-tests/raw-dependency.c new file mode 100644 index 00000000..208582cf --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/raw-dependency.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + d_out[0] = a_in[0] + b_in[0] * c_in[0]; + d_out[1] = d_out[0] - c_in[0]; + d_out[2] = a_in[2] + b_in[2] * c_in[2]; + d_out[3] = d_out[3] - c_in[3]; + d_out[4] = a_in[2] + b_in[2] * c_in[2]; +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + d_out[0] = a_in[0] + b_in[0] * c_in[0]; + d_out[1] = d_out[0] - c_in[0]; + d_out[2] = a_in[2] + b_in[2] * c_in[2]; + d_out[3] = d_out[3] - c_in[3]; + d_out[4] = a_in[2] + b_in[2] * c_in[2]; +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = d_out[i]; + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/reduction-add.c b/src/dios-egraphs/Diospyros/c-tests/reduction-add.c new file mode 100644 index 00000000..a8d74233 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/reduction-add.c @@ -0,0 +1,86 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float result = 0.0f; + for (int i = 0; i < ROWS; i++) { + result += a_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result += b_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result += c_in[i]; + } + for (int i = 0; i < ROWS; i++) { + d_out[i] = result; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float result = 0.0f; + for (int i = 0; i < ROWS; i++) { + result += a_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result += b_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result += c_in[i]; + } + for (int i = 0; i < ROWS; i++) { + d_out[i] = result; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/reduction-mul.c b/src/dios-egraphs/Diospyros/c-tests/reduction-mul.c new file mode 100644 index 00000000..933cd7ab --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/reduction-mul.c @@ -0,0 +1,86 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float result = 1.0f; + for (int i = 0; i < ROWS; i++) { + result *= a_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result *= b_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result *= c_in[i]; + } + for (int i = 0; i < ROWS; i++) { + d_out[i] = result; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float result = 1.0f; + for (int i = 0; i < ROWS; i++) { + result *= a_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result *= b_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result *= c_in[i]; + } + for (int i = 0; i < ROWS; i++) { + d_out[i] = result; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/reduction-sub.c b/src/dios-egraphs/Diospyros/c-tests/reduction-sub.c new file mode 100644 index 00000000..2eac3e09 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/reduction-sub.c @@ -0,0 +1,86 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float result = 100.0f; + for (int i = 0; i < ROWS; i++) { + result -= a_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result -= b_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result -= c_in[i]; + } + for (int i = 0; i < ROWS; i++) { + d_out[i] = result; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float result = 100.0f; + for (int i = 0; i < ROWS; i++) { + result -= a_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result -= b_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result -= c_in[i]; + } + for (int i = 0; i < ROWS; i++) { + d_out[i] = result; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/war-dependency.c b/src/dios-egraphs/Diospyros/c-tests/war-dependency.c new file mode 100644 index 00000000..e5401f12 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/war-dependency.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + d_out[0] = a_in[0] + b_in[0] * c_in[0]; + d_out[0] = d_out[0] - c_in[0]; + d_out[2] = a_in[2] + b_in[2] * c_in[2]; + d_out[3] = d_out[3] - c_in[3]; + d_out[4] = a_in[2] + b_in[2] * c_in[2]; +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + d_out[0] = a_in[0] + b_in[0] * c_in[0]; + d_out[0] = d_out[0] - c_in[0]; + d_out[2] = a_in[2] + b_in[2] * c_in[2]; + d_out[3] = d_out[3] - c_in[3]; + d_out[4] = a_in[2] + b_in[2] * c_in[2]; +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = d_out[i]; + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/waw-dependency.c b/src/dios-egraphs/Diospyros/c-tests/waw-dependency.c new file mode 100644 index 00000000..6cb8fe16 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/waw-dependency.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + d_out[0] = a_in[0] + b_in[0] * c_in[0]; + d_out[0] = d_out[0] - c_in[0]; + d_out[0] = a_in[2] + b_in[2] * c_in[2]; + d_out[0] = d_out[3] - c_in[3]; + d_out[0] = a_in[2] + b_in[2] * c_in[2]; +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + d_out[0] = a_in[0] + b_in[0] * c_in[0]; + d_out[0] = d_out[0] - c_in[0]; + d_out[0] = a_in[2] + b_in[2] * c_in[2]; + d_out[0] = d_out[3] - c_in[3]; + d_out[0] = a_in[2] + b_in[2] * c_in[2]; +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = d_out[i]; + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file From 2a99cced390fc97897b84f3ca0c6c530f2bad941 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 6 Jul 2023 17:19:15 -0400 Subject: [PATCH 126/143] add 13 more tests with some having small randomly populated matrices --- .../Diospyros/c-tests/array-reversal.c | 74 +++++++++++++ .../Diospyros/c-tests/array-symmetric.c | 66 +++++++++++ .../Diospyros/c-tests/dot-product.c | 76 +++++++++++++ .../Diospyros/c-tests/local-var.c | 78 +++++++++++++ .../Diospyros/c-tests/matrix-scalar-product.c | 65 +++++++++++ .../Diospyros/c-tests/matrix-vector-product.c | 79 ++++++++++++++ .../Diospyros/c-tests/nested-loops.c | 70 ++++++++++++ .../Diospyros/c-tests/overwrite.c | 89 +++++++++++++++ .../c-tests/random-2-by-2-matrix-multiply.c | 79 ++++++++++++++ .../Diospyros/c-tests/random-2d-2d-conv.c | 103 ++++++++++++++++++ .../Diospyros/c-tests/random-2d-conv.c | 92 ++++++++++++++++ .../c-tests/random-3-by-3-matrix-multiply.c | 79 ++++++++++++++ .../Diospyros/c-tests/random-stencil-2d-v2.c | 74 +++++++++++++ .../Diospyros/c-tests/random-stencil-2d.c | 74 +++++++++++++ 14 files changed, 1098 insertions(+) create mode 100644 src/dios-egraphs/Diospyros/c-tests/array-reversal.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/array-symmetric.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/dot-product.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/local-var.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/matrix-scalar-product.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/matrix-vector-product.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/nested-loops.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/overwrite.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/random-2-by-2-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/random-2d-2d-conv.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/random-2d-conv.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/random-3-by-3-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v2.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/random-stencil-2d.c diff --git a/src/dios-egraphs/Diospyros/c-tests/array-reversal.c b/src/dios-egraphs/Diospyros/c-tests/array-reversal.c new file mode 100644 index 00000000..e6b9acd6 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/array-reversal.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float temp_array[ROWS]; + for (int i = 0; i < ROWS; i++) { + temp_array[i] = d_out[i]; + } + for (int i = 0; i < ROWS; i++) { + d_out[i] = temp_array[ROWS - i - 1]; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float temp_array[ROWS]; + for (int i = 0; i < ROWS; i++) { + temp_array[i] = d_out[i]; + } + for (int i = 0; i < ROWS; i++) { + d_out[i] = temp_array[ROWS - i - 1]; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = d_out[i]; + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/array-symmetric.c b/src/dios-egraphs/Diospyros/c-tests/array-symmetric.c new file mode 100644 index 00000000..61c8f47a --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/array-symmetric.c @@ -0,0 +1,66 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + d_out[i] = d_out[ROWS - i - 1]; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + d_out[i] = d_out[ROWS - i - 1]; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = d_out[i]; + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/dot-product.c b/src/dios-egraphs/Diospyros/c-tests/dot-product.c new file mode 100644 index 00000000..e61d8cbb --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/dot-product.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float sum = 0.0f; + for (int i = 0; i < ROWS; i++) { + sum += a_in[i] * b_in[i]; + } + for (int i = 0; i < ROWS; i++) { + float temp = (float)i + sum; + d_out[i] = temp; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float sum = 0.0f; + for (int i = 0; i < ROWS; i++) { + sum += a_in[i] * b_in[i]; + } + for (int i = 0; i < ROWS; i++) { + float temp = (float)i + sum; + d_out[i] = temp; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/local-var.c b/src/dios-egraphs/Diospyros/c-tests/local-var.c new file mode 100644 index 00000000..45e2c951 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/local-var.c @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + float local; + if (i % 3 == 0) { + local = a_in[i] + b_in[i]; + } else { + local = a_in[i] + c_in[i]; + } + d_out[i] = local * 2.0f + 3.5f; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + float local; + if (i % 3 == 0) { + local = a_in[i] + b_in[i]; + } else { + local = a_in[i] + c_in[i]; + } + d_out[i] = local * 2.0f + 3.5f; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/matrix-scalar-product.c b/src/dios-egraphs/Diospyros/c-tests/matrix-scalar-product.c new file mode 100644 index 00000000..3fa98354 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/matrix-scalar-product.c @@ -0,0 +1,65 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 3 +#define A_COLS 3 +#define B_COLS 3 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b, + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = a_in[i][j] * b; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], float b, + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = a_in[i][j] * b; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + float c_out[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b, c_out); + no_opt_matrix_multiply(a_in, b, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/matrix-vector-product.c b/src/dios-egraphs/Diospyros/c-tests/matrix-vector-product.c new file mode 100644 index 00000000..1a257e74 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/matrix-vector-product.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 3 +#define A_COLS 3 +#define B_COLS 1 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_COLS][B_COLS]; + for (int i = 0; i < A_COLS; i++) { + for (int j = 0; j < B_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/nested-loops.c b/src/dios-egraphs/Diospyros/c-tests/nested-loops.c new file mode 100644 index 00000000..a58a4d7e --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/nested-loops.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + for (int j = 0; j < ROWS; j++) { + d_out[i] = a_in[j] * b_in[j] * c_in[i]; + } + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + for (int j = 0; j < ROWS; j++) { + d_out[i] = a_in[j] * b_in[j] * c_in[i]; + } + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/overwrite.c b/src/dios-egraphs/Diospyros/c-tests/overwrite.c new file mode 100644 index 00000000..98236fbf --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/overwrite.c @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 3 +#define A_COLS 3 +#define B_COLS 3 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = 3.5f; + } + } + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = 3.5f; + } + } + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-2-by-2-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/random-2-by-2-matrix-multiply.c new file mode 100644 index 00000000..f1ef604a --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-2-by-2-matrix-multiply.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 2 +#define A_COLS 2 +#define B_COLS 2 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-2d-2d-conv.c b/src/dios-egraphs/Diospyros/c-tests/random-2d-2d-conv.c new file mode 100644 index 00000000..75605001 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-2d-2d-conv.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define I_ROWS 2 +#define I_COLS 2 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[restrict I_ROWS][I_COLS], + float f_in[restrict F_ROWS][F_COLS], + float mat_out[restrict O_ROWS][O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + mat_out[outRow][outCol] += v; + } + } + } + } + } +} + +void no_opt_convolution(float mat_in[restrict I_ROWS][I_COLS], + float f_in[restrict F_ROWS][F_COLS], + float mat_out[restrict O_ROWS][O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + mat_out[outRow][outCol] += v; + } + } + } + } + } +} + +int main(void) { + srand(1); // set seed + + float mat_in[I_ROWS][I_COLS]; + for (int i = 0; i < I_ROWS; i++) { + for (int j = 0; j < I_COLS; j++) { + mat_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float f_in[F_ROWS][F_COLS]; + for (int i = 0; i < F_ROWS; i++) { + for (int j = 0; j < F_COLS; j++) { + f_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float mat_out[O_ROWS][O_COLS]; + for (int i = 0; i < O_COLS; i++) { + for (int j = 0; j < O_COLS; j++) { + mat_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[O_ROWS][O_COLS]; + for (int i = 0; i < O_COLS; i++) { + for (int j = 0; j < O_COLS; j++) { + expected[i][j] = mat_out[i][j]; + } + } + + convolution(mat_in, f_in, mat_out); + no_opt_convolution(mat_in, f_in, expected); + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + printf("output: %f\n", mat_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(mat_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-2d-conv.c b/src/dios-egraphs/Diospyros/c-tests/random-2d-conv.c new file mode 100644 index 00000000..e706c392 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-2d-conv.c @@ -0,0 +1,92 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define I_ROWS 2 +#define I_COLS 2 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +void no_opt_convolution(float mat_in[I_ROWS * I_COLS], + float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + srand(1); // set seed + + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + convolution(mat_in, f_in, mat_out); + no_opt_convolution(mat_in, f_in, expected); + for (int i = 0; i < O_ROWS * O_COLS; i++) { + printf("output: %f\n", mat_out[i]); + printf("expected: %f\n", expected[i]); + assert(mat_out[i] == expected[i]); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-3-by-3-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/random-3-by-3-matrix-multiply.c new file mode 100644 index 00000000..c83c6491 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-3-by-3-matrix-multiply.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 3 +#define A_COLS 3 +#define B_COLS 3 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v2.c b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v2.c new file mode 100644 index 00000000..ae105af4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v2.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROW_SIZE 8 +#define COL_SIZE 7 +#define F_SIZE 4 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 2; k1++) { + for (int k2 = 0; k2 < 2; k2++) { + temp += filter_in[k1 * 2 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +void no_opt_stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], + float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 2; k1++) { + for (int k2 = 0; k2 < 2; k2++) { + temp += filter_in[k1 * 2 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + srand(1); // set seed + + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = sol_out[i]; + } + stencil(orig_in, sol_out, filter_in); + no_opt_stencil(orig_in, expected, filter_in); + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("%f\n", sol_out[i]); + printf("%f\n", expected[i]); + assert(expected[i] == sol_out[i]); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d.c b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d.c new file mode 100644 index 00000000..279dfca1 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROW_SIZE 5 +#define COL_SIZE 7 +#define F_SIZE 9 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +void no_opt_stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], + float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + srand(1); // set seed + + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = sol_out[i]; + } + stencil(orig_in, sol_out, filter_in); + no_opt_stencil(orig_in, expected, filter_in); + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("%f\n", sol_out[i]); + printf("%f\n", expected[i]); + assert(expected[i] == sol_out[i]); + } + return 0; +} \ No newline at end of file From a269f357429b03bedfb2457db2d8e4078699eb08 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Thu, 6 Jul 2023 20:14:21 -0400 Subject: [PATCH 127/143] refactor large matching functions with more generic function, delete dead code, clean up code base --- src/dios-egraphs/Diospyros/src/lib.rs | 533 +++----------------------- 1 file changed, 60 insertions(+), 473 deletions(-) diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index 8e951622..31c7e53e 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -71,7 +71,7 @@ unsafe fn gen_get_idx() -> u32 { static mut FUNC_NAME2LLVM_FUNC: Vec<(&str, LLVMValueRef)> = Vec::new(); static FMA_NAME: &str = "llvm.fma.v4f32"; -static SCATTER: &str = "llvm.masked.scatter.v4f32.v4p0f32"; +static _SCATTER: &str = "llvm.masked.scatter.v4f32.v4p0f32"; static GATHER: &str = "llvm.masked.gather.v4f32.v4p0f32"; unsafe fn get_func_llvm_value(name: &str) -> Option { @@ -258,15 +258,12 @@ unsafe fn get_array_offset_or_base(egg_node: &VecLang) -> i32 { } } -unsafe fn load_to_tree(arg1: &Id, arg2: &Id, arg3: &Id, immd: &ValueNumberingState, md: &mut ValueNumberingMutableState) -> Vec { +unsafe fn load_to_tree(arg1: &Id, _arg2: &Id, arg3: &Id, immd: &ValueNumberingState, md: &mut ValueNumberingMutableState) -> Vec { let mut load_vec = vec![]; let node1 = &immd.egg_nodes_vector[usize::from(*arg1)].clone(); let vec1 = value_number_store_tree(node1, immd, md); - let node2 = &immd.egg_nodes_vector[usize::from(*arg2)].clone(); - let array_base = get_array_offset_or_base(node2); // ignore array base let node3 = &immd.egg_nodes_vector[usize::from(*arg3)].clone(); let array_offset = get_array_offset_or_base(node3); - let mut final_vec1 = vec![String::from("Load"), "(".to_string()]; load_vec.extend(vec1.clone()); load_vec.extend(vec1); // ignore base, repeat vec1 let final_vec2 = vec![(array_offset % vector_width() as i32).to_string()]; @@ -753,11 +750,6 @@ unsafe fn sqrt32_to_egg( (new_enode_vec, new_next_node_idx + 1) } -/// Grab the associated index of load in the load_info vector, otherwise u32::max -unsafe fn get_load_idx(load: LLVMValueRef, load_info: &[load_info_t]) -> u32 { - return u32::MAX; -} - /// Translates a Load to an Egg Get Node /// /// The translation of a load is a Get Node, which can then possibly be vectorized @@ -1108,35 +1100,13 @@ unsafe fn gep_to_llvm(original_egg_node: &VecLang, md: &mut Egg2LLVMState) -> LL /// /// Assumes that every load is implicitly from a Float * / Single Level Float Pointer unsafe fn load_to_llvm(gep_id: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { - // let original_gep_id = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep_id)], md); - // let get2gep = &md.llvm2egg_metadata.get2gep; - // for (gep_id, gep_instr) in get2gep.iter() { - // if original_gep_id == *gep_id { - // // assert!(isa_gep(*gep_instr) || isa_argument(*gep_instr)); - // let new_load_instr = LLVMBuildLoad(md.builder, *gep_instr, b"\0".as_ptr() as *const _); - // return new_load_instr; - // } - // } - // panic!("Load2LLVM: Expected a successful lookup in get2gep, but cannot find Gep ID."); - let gep_instr = egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep_id)], md); let new_load_instr = LLVMBuildLoad(md.builder, gep_instr, b"\0".as_ptr() as *const _); return new_load_instr; } unsafe fn store_to_llvm(val_id: &Id, gep_id: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { - // let original_gep_id = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep_id)], md); let llvm_val_instr = egg_to_llvm(&md.egg_nodes_vector[usize::from(*val_id)], md); - // let get2gep = &md.llvm2egg_metadata.get2gep; - // for (gep_id, gep_instr) in get2gep.iter() { - // if original_gep_id == *gep_id { - // // assert!(isa_gep(*gep_instr) || isa_argument(*gep_instr)) - // let new_store_instr = LLVMBuildStore(md.builder, llvm_val_instr, *gep_instr); - // return new_store_instr; - // } - // } - // panic!("Store2LLVM: Expected a successful lookup in get2gep, but cannot find Gep ID."); - let gep_instr = egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep_id)], md); let new_store_instr = LLVMBuildStore(md.builder, llvm_val_instr, gep_instr); return new_store_instr; @@ -1148,12 +1118,6 @@ unsafe fn aligned_consec_loadvec_to_llvm( md: &mut Egg2LLVMState, ) -> LLVMValueRef { // New code to handle an aligned and consecutive vector load - // let gep1_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); - // let gep1_llvm_instr = md - // .llvm2egg_metadata - // .get2gep - // .get(&gep1_id_val) - // .expect("Value of gep1 id should exist in get2gep"); let gep1_llvm_instr = egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); let address_space = LLVMGetPointerAddressSpace(LLVMTypeOf(gep1_llvm_instr)); let bitcase_scalar_to_vector_type = LLVMBuildBitCast( @@ -1183,38 +1147,12 @@ unsafe fn loadvec_to_llvm( md: &mut Egg2LLVMState, ) -> LLVMValueRef { // Set Opaque Pointer ness - // let gep1_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); - // let gep2_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep2_id)], md); - // let gep3_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep3_id)], md); - // let gep4_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep4_id)], md); - let mut base_data = get_shuf_vec_data(base_ids_vec, md); let mut offsets_data = get_shuf_vec_data(offsets_id_vec, md); base_data.dedup(); offsets_data.dedup(); - // let gep1_llvm_instr = md - // .llvm2egg_metadata - // .get2gep - // .get(&gep1_id_val) - // .expect("Value of gep1 id should exist in get2gep"); - // let gep2_llvm_instr = md - // .llvm2egg_metadata - // .get2gep - // .get(&gep2_id_val) - // .expect("Value of gep2 id should exist in get2gep"); - // let gep3_llvm_instr = md - // .llvm2egg_metadata - // .get2gep - // .get(&gep3_id_val) - // .expect("Value of gep3 id should exist in get2gep"); - // let gep4_llvm_instr = md - // .llvm2egg_metadata - // .get2gep - // .get(&gep4_id_val) - // .expect("Value of gep4 id should exist in get2gep"); - let gep1_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); let gep2_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep2_id)], md); let gep3_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep3_id)], md); @@ -1358,45 +1296,15 @@ unsafe fn loadvec_to_llvm( unsafe fn storevec_to_llvm( val_vec_id: &Id, gep1_id: &Id, - gep2_id: &Id, - gep3_id: &Id, - gep4_id: &Id, + _gep2_id: &Id, + _gep3_id: &Id, + _gep4_id: &Id, md: &mut Egg2LLVMState, ) -> LLVMValueRef { // Recursively translate val_vec_id to an LLVM Vector Instr let llvm_val_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*val_vec_id)], md); - // Set Opaque Pointer ness - // let gep1_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); - // let gep2_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep2_id)], md); - // let gep3_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep3_id)], md); - // let gep4_id_val = gep_to_llvm(&md.egg_nodes_vector[usize::from(*gep4_id)], md); - - // let gep1_llvm_instr = md - // .llvm2egg_metadata - // .get2gep - // .get(&gep1_id_val) - // .expect("Value of gep1 id should exist in get2gep"); - // let gep2_llvm_instr = md - // .llvm2egg_metadata - // .get2gep - // .get(&gep2_id_val) - // .expect("Value of gep2 id should exist in get2gep"); - // let gep3_llvm_instr = md - // .llvm2egg_metadata - // .get2gep - // .get(&gep3_id_val) - // .expect("Value of gep3 id should exist in get2gep"); - // let gep4_llvm_instr = md - // .llvm2egg_metadata - // .get2gep - // .get(&gep4_id_val) - // .expect("Value of gep4 id should exist in get2gep"); - let gep1_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); - let gep2_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep2_id)], md); - let gep3_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep3_id)], md); - let gep4_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep4_id)], md); // New code to handle a vector store // Currently, this is the only type of store that can be generated because stores are not split. @@ -1412,87 +1320,6 @@ unsafe fn storevec_to_llvm( ); let store = LLVMBuildStore(md.builder, llvm_val_vec, bitcase_scalar_to_vector_type); return store; - - let vector_width = 4; - let floatptr_type = LLVMTypeOf(*gep1_llvm_instr); - let vec4ptr_type = LLVMVectorType(floatptr_type, vector_width); - let vec4f_type = LLVMVectorType(LLVMFloatTypeInContext(md.context), vector_width); - let vec4b_type = LLVMVectorType(LLVMInt1TypeInContext(md.context), vector_width); - let int_type = LLVMIntTypeInContext(md.context, 32); - let void_type = LLVMVoidTypeInContext(md.context); - - // Parameter Types are: vector of values, vector of pointers, offset int, mask vector booleans - let param_types = [vec4f_type, vec4ptr_type, int_type, vec4b_type].as_mut_ptr(); - // Output type is a void_type - let fn_type = LLVMFunctionType(void_type, param_types, 4, 0 as i32); - // Build the Vector Load Intrinsic - let func_name = &SCATTER; - let llvm_masked_scatter_func = get_func_llvm_value(&func_name); - - let func = match llvm_masked_scatter_func { - Some(value) => value, - None => { - let new_func = LLVMAddFunction( - md.module, - b"llvm.masked.scatter.v4f32.v4p0f32\0".as_ptr() as *const _, - fn_type, - ); - FUNC_NAME2LLVM_FUNC.push((&func_name, new_func)); - new_func - } - }; - - // Build Arguments - - let pointer_to_int_value = LLVMBuildPtrToInt( - md.builder, - LLVMConstInt(LLVMIntTypeInContext(md.context, 32), 0 as u64, 0), - LLVMIntTypeInContext(md.context, 32), - b"pointer-to-int\0".as_ptr() as *const _, - ); - let pointer_to_float_value = LLVMBuildBitCast( - md.builder, - pointer_to_int_value, - floatptr_type, - b"pointer-to-float-bit-cast\0".as_ptr() as *const _, - ); - let mut pointer_to_floats = Vec::new(); - for _ in 0..4 { - pointer_to_floats.push(pointer_to_float_value); - } - let pointer_to_floats_ptr = pointer_to_floats.as_mut_ptr(); - let mut pointer_vector = LLVMConstVector(pointer_to_floats_ptr, 4); - - let llvm_ptrs = vec![ - *gep1_llvm_instr, - *gep2_llvm_instr, - *gep3_llvm_instr, - *gep4_llvm_instr, - ]; - for idx in 0..4 { - // Grow the Vector - pointer_vector = LLVMBuildInsertElement( - md.builder, - pointer_vector, - *llvm_ptrs.get(idx).expect("Index must be in vector"), - LLVMConstInt(LLVMIntTypeInContext(md.context, 32), idx as u64, 0), - b"\0".as_ptr() as *const _, - ); - } - - let offset = LLVMConstInt(LLVMIntTypeInContext(md.context, 32), 0 as u64, 0); - - let mut mask_values = vec![ - LLVMConstInt(LLVMIntTypeInContext(md.context, 1), 1 as u64, 0), - LLVMConstInt(LLVMIntTypeInContext(md.context, 1), 1 as u64, 0), - LLVMConstInt(LLVMIntTypeInContext(md.context, 1), 1 as u64, 0), - LLVMConstInt(LLVMIntTypeInContext(md.context, 1), 1 as u64, 0), - ]; - let ptr_to_mask_values = mask_values.as_mut_ptr(); - let mask_vector = LLVMConstVector(ptr_to_mask_values, 4); - - let args = [llvm_val_vec, pointer_vector, offset, mask_vector].as_mut_ptr(); - LLVMBuildCall(md.builder, func, args, 4, b"\0".as_ptr() as *const _) } unsafe fn arg_to_llvm(original_egg_node: &VecLang, md: &mut Egg2LLVMState) -> LLVMValueRef { @@ -1702,77 +1529,14 @@ unsafe fn binop_to_llvm( } } -// TODO: fix up concat errors due to having vecstores. unsafe fn concat_to_llvm( left_vector: &Id, right_vector: &Id, md: &mut Egg2LLVMState, ) -> LLVMValueRef { - { - let trans_v1 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_vector)], md); - let mut trans_v2 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_vector)], md); - return trans_v2; - - // In LLVM, it turns out all vectors need to be length power of 2 - // if the 2 vectors are not the same size, double the length of the smaller vector by padding with 0's in it - // manually concatenate 2 vectors by using a LLVM shuffle operation. - let v1_type = LLVMTypeOf(trans_v1); - let v1_size = LLVMGetVectorSize(v1_type); - let v2_type = LLVMTypeOf(trans_v2); - let v2_size = LLVMGetVectorSize(v2_type); - - // TODO: HACKY FIX FOR NOW - // assume both v1 and v2 are pow of 2 size - // assume v2 size smaller or equal to v1 size - // assume v2 is 1/2 size of v1 - if v1_size != v2_size { - // replicate v2 size - let mut zeros = Vec::new(); - for _ in 0..v2_size { - zeros.push(LLVMConstReal(LLVMFloatTypeInContext(md.context), 0 as f64)); - } - let zeros_ptr = zeros.as_mut_ptr(); - let zeros_vector = LLVMConstVector(zeros_ptr, v2_size); - let size = 2 * v2_size; - let mut indices = Vec::new(); - for i in 0..size { - indices.push(LLVMConstInt( - LLVMIntTypeInContext(md.context, 32), - i as u64, - 0, - )); - } - let mask = indices.as_mut_ptr(); - let mask_vector = LLVMConstVector(mask, size); - trans_v2 = LLVMBuildShuffleVector( - md.builder, - trans_v2, - zeros_vector, - mask_vector, - b"\0".as_ptr() as *const _, - ); - } - - let size = v1_size + v2_size; - let mut indices = Vec::new(); - for i in 0..size { - indices.push(LLVMConstInt( - LLVMIntTypeInContext(md.context, 32), - i as u64, - 0, - )); - } - - let mask = indices.as_mut_ptr(); - let mask_vector = LLVMConstVector(mask, size); - LLVMBuildShuffleVector( - md.builder, - trans_v1, - trans_v2, - mask_vector, - b"\0".as_ptr() as *const _, - ) - } + let _ = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_vector)], md); + let trans_v2 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_vector)], md); + return trans_v2; } unsafe fn mac_to_llvm( @@ -1787,8 +1551,6 @@ unsafe fn mac_to_llvm( let vec_type = LLVMTypeOf(trans_acc); let param_types = [vec_type, vec_type, vec_type].as_mut_ptr(); let fn_type = LLVMFunctionType(vec_type, param_types, 3, 0 as i32); - // let vector_width = config::vector_width(); - // let fma_intrinsic_name = format!("llvm.fma.v{}f32\0", vector_width).as_bytes(); let func_name = &FMA_NAME; let llvm_fma_func = get_func_llvm_value(&func_name); @@ -2004,7 +1766,7 @@ unsafe fn egg_to_llvm( VecLang::LitVec(boxed_ids) | VecLang::Vec(boxed_ids) | VecLang::List(boxed_ids) => { vec_to_llvm(&*boxed_ids, translation_metadata) } - VecLang::DataVec(boxed_ids) => panic!("Cannot translate a datavec"), + VecLang::DataVec(_) => panic!("Cannot translate a datavec"), VecLang::NoOptVec(boxed_ids) => nooptvector_to_llvm(boxed_ids, translation_metadata), VecLang::VecAdd([l, r]) | VecLang::VecMinus([l, r]) @@ -2037,203 +1799,49 @@ unsafe fn egg_to_llvm( } // Function types for constructor anonymous functions -type VecLangSingleConstructor = fn([Id; 1]) -> VecLang; -type VecLangPairConstructor = fn([Id; 2]) -> VecLang; -type VecLangTripleConstructor = fn([Id; 3]) -> VecLang; -type VecLangQuadConstructor = fn([Id; 4]) -> VecLang; -type VecLangQuintConstructor = fn([Id; 5]) -> VecLang; type VecLangBoxedConstructor = fn(bool, Box<[Id]>) -> VecLang; -/// Canonicalizes a Enode with a single inpit constructor -unsafe fn canonicalize_single( - can_change_vector: bool, - constructor: VecLangSingleConstructor, - single_vector: &Id, - old_egg_nodes: &[VecLang], -) -> Vec { - let mut trans_v1 = canonicalize_egg( - false, - &old_egg_nodes[usize::from(*single_vector)], - old_egg_nodes, - ); - trans_v1.push(constructor([*single_vector])); - trans_v1 -} - -/// Canonicalizes a Enode with a pair constructor -unsafe fn canonicalize_pair( - is_concat: bool, - can_change_vector: bool, - constructor: VecLangPairConstructor, - left_vector: &Id, - right_vector: &Id, - old_egg_nodes: &[VecLang], -) -> Vec { - let trans_v1 = canonicalize_egg( - if !is_concat { false } else { can_change_vector }, - &old_egg_nodes[usize::from(*left_vector)], - old_egg_nodes, - ); - let trans_v2 = canonicalize_egg( - if !is_concat { false } else { can_change_vector }, - &old_egg_nodes[usize::from(*right_vector)], - old_egg_nodes, - ); - let mut whole_vector = [trans_v1, trans_v2].concat(); - whole_vector.push(constructor([*left_vector, *right_vector])); - whole_vector -} - -/// Canonicalizes a Enode with a triple input constructor -unsafe fn canonicalize_triple( - can_change_vector: bool, - constructor: VecLangTripleConstructor, - first_vector: &Id, - second_vector: &Id, - third_vector: &Id, - old_egg_nodes: &[VecLang], -) -> Vec { - let trans_v1 = canonicalize_egg( - false, - &old_egg_nodes[usize::from(*first_vector)], - old_egg_nodes, - ); - let trans_v2 = canonicalize_egg( - false, - &old_egg_nodes[usize::from(*second_vector)], - old_egg_nodes, - ); - let trans_v3 = canonicalize_egg( - false, - &old_egg_nodes[usize::from(*third_vector)], - old_egg_nodes, - ); - let mut whole_vector = [trans_v1, trans_v2, trans_v3].concat(); - whole_vector.push(constructor([*first_vector, *second_vector, *third_vector])); - whole_vector -} - -/// Canonicalizes a Enode with a quadruple input constructor -unsafe fn canonicalize_quadruple( - can_change_vector: bool, - constructor: VecLangQuadConstructor, - first_vector: &Id, - second_vector: &Id, - third_vector: &Id, - fourth_vector: &Id, - old_egg_nodes: &[VecLang], -) -> Vec { - let trans_v1 = canonicalize_egg( - false, - &old_egg_nodes[usize::from(*first_vector)], - old_egg_nodes, - ); - let trans_v2 = canonicalize_egg( - false, - &old_egg_nodes[usize::from(*second_vector)], - old_egg_nodes, - ); - let trans_v3 = canonicalize_egg( - false, - &old_egg_nodes[usize::from(*third_vector)], - old_egg_nodes, - ); - let trans_v4 = canonicalize_egg( - false, - &old_egg_nodes[usize::from(*fourth_vector)], - old_egg_nodes, - ); - let mut whole_vector = [trans_v1, trans_v2, trans_v3, trans_v4].concat(); - whole_vector.push(constructor([ - *first_vector, - *second_vector, - *third_vector, - *fourth_vector, - ])); - whole_vector -} - -/// Canonicalizes a Enode with a quintuple input constructor -unsafe fn canonicalize_quintuple( +// Canonicalize the Concat Constructor +unsafe fn canonicalize_concat( can_change_vector: bool, - constructor: VecLangQuintConstructor, - first_vector: &Id, - second_vector: &Id, - third_vector: &Id, - fourth_vector: &Id, - fifth_vector: &Id, + vector_elements: Box<[Id]>, old_egg_nodes: &[VecLang], ) -> Vec { - let trans_v1 = canonicalize_egg( - false, - &old_egg_nodes[usize::from(*first_vector)], - old_egg_nodes, - ); - let trans_v2 = canonicalize_egg( - false, - &old_egg_nodes[usize::from(*second_vector)], - old_egg_nodes, - ); - let trans_v3 = canonicalize_egg( - false, - &old_egg_nodes[usize::from(*third_vector)], - old_egg_nodes, - ); - let trans_v4 = canonicalize_egg( - false, - &old_egg_nodes[usize::from(*fourth_vector)], - old_egg_nodes, - ); - let trans_v5 = canonicalize_egg( - false, - &old_egg_nodes[usize::from(*fifth_vector)], - old_egg_nodes, - ); - let mut whole_vector = [trans_v1, trans_v2, trans_v3, trans_v4, trans_v5].concat(); - whole_vector.push(constructor([ - *first_vector, - *second_vector, - *third_vector, - *fourth_vector, - *fifth_vector, - ])); + let mut whole_vector = Vec::new(); + for vec_elt in vector_elements.iter() { + // note the first argument is to can_change_vector, which is unlike any other constructor + let mut trans_val = + canonicalize_egg(can_change_vector, &old_egg_nodes[usize::from(*vec_elt)], old_egg_nodes); + whole_vector.append(&mut trans_val); + } + // note specialization to concat constructor + whole_vector.push(VecLang::Concat(box2arr(vector_elements))); whole_vector } unsafe fn canoncalize_ntuple( can_change_vector: bool, - vector_elements: &[Id], - final_element: VecLang, + constructor: VecLangBoxedConstructor, + vector_elements: Box<[Id]>, old_egg_nodes: &[VecLang], ) -> Vec { let mut whole_vector = Vec::new(); - for vec_elt in vector_elements { + for vec_elt in vector_elements.iter() { let mut trans_val = canonicalize_egg(false, &old_egg_nodes[usize::from(*vec_elt)], old_egg_nodes); whole_vector.append(&mut trans_val); } - whole_vector.push(final_element); + whole_vector.push(constructor(can_change_vector, vector_elements)); whole_vector } -unsafe fn canonicalize_vec_type( - can_change_vector: bool, - constructor: VecLangBoxedConstructor, - boxed_ids: &Box<[Id]>, - old_egg_nodes: &[VecLang], -) -> Vec { - let mut whole_vector: Vec = Vec::new(); - let mut new_boxed_ids: Vec = Vec::new(); - for id in boxed_ids.iter() { - new_boxed_ids.push(*id); - let trans_vec = canonicalize_egg(false, &old_egg_nodes[usize::from(*id)], old_egg_nodes); - for elt in trans_vec { - whole_vector.push(elt); - } - } - let boxed = new_boxed_ids.into_boxed_slice(); - whole_vector.push(constructor(can_change_vector, boxed)); - whole_vector +// Solution to convert by: +// https://stackoverflow.com/questions/29570607/is-there-a-good-way-to-convert-a-vect-to-an-array +// https://stackoverflow.com/questions/35751927/how-to-convert-a-boxed-array-into-a-vec-in-rust +unsafe fn box2arr(b: Box<[T]>) -> [T; N] { + let v = b.into_vec(); + v.try_into() + .unwrap_or_else(|v: Vec| panic!("Expected a Vec of length {} but it was {}", N, v.len())) } /// Modify the Egg expression so that the first instance of a Vector operation is replaced by a NoOpVector expression node @@ -2253,8 +1861,8 @@ unsafe fn canonicalize_egg( panic!("Get was found. Egg canonicalization does not handle get nodes.") } VecLang::Gep(g) => vec![VecLang::Gep(*g)], - VecLang::Load([gep_id, base_id, offset]) => canonicalize_triple(can_change_vector,|triple| -> VecLang {VecLang::Load(triple)}, gep_id, base_id, offset, old_egg_nodes ), - VecLang::Store([val_id, gep_id]) => canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::Store(pair)}, val_id, gep_id, old_egg_nodes), + VecLang::Load(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Load(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Store(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Store(box2arr(b))}, Box::new(*args), old_egg_nodes), VecLang::Set(..) => { panic!("Set was found. Egg canonicalization does not handle set nodes.") } @@ -2268,38 +1876,38 @@ unsafe fn canonicalize_egg( VecLang::Reg(r) => vec![VecLang::Reg(*r)], VecLang::Num(n) => vec![VecLang::Num(*n)], VecLang::List(_) => panic!("List was found. Egg canonicalization does not handle list nodes."), - VecLang::LitVec(boxed_ids) => canonicalize_vec_type(can_change_vector, |change_vec_type, boxed| -> VecLang {if change_vec_type {VecLang::NoOptVec(boxed)} else {VecLang::LitVec(boxed)}}, boxed_ids, old_egg_nodes), - VecLang::Vec(boxed_ids) => canonicalize_vec_type(can_change_vector, |change_vec_type, boxed| -> VecLang {if change_vec_type {VecLang::NoOptVec(boxed)} else {VecLang::Vec(boxed)}}, boxed_ids, old_egg_nodes), - VecLang::DataVec(boxed_ids) => canonicalize_vec_type(can_change_vector, |change_vec_type, boxed| -> VecLang {VecLang::DataVec(boxed)}, boxed_ids, old_egg_nodes), - VecLang::VecAdd([l, r])=> canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::VecAdd(pair)}, l, r, old_egg_nodes), - VecLang::VecMinus([l, r])=> canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::VecMinus(pair)},l, r, old_egg_nodes), - VecLang::VecMul([l, r])=> canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::VecMul(pair)}, l, r, old_egg_nodes), - VecLang::VecDiv([l, r])=> canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::VecDiv(pair)}, l, r, old_egg_nodes), - VecLang::Add([l, r]) => canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::Add(pair)}, l, r, old_egg_nodes), - VecLang::Minus([l, r]) => canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::Minus(pair)}, l, r, old_egg_nodes), - VecLang::Mul([l, r]) => canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::Mul(pair)}, l, r, old_egg_nodes), - VecLang::Div([l, r]) => canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::Div(pair)}, l, r, old_egg_nodes), - VecLang::Concat([l, r]) => canonicalize_pair(true, can_change_vector, |pair| -> VecLang {VecLang::Concat(pair)},l, r, old_egg_nodes), - VecLang::VecMAC([acc, v1, v2]) => canonicalize_triple(can_change_vector, |triple| -> VecLang {VecLang::VecMAC(triple)},acc, v1, v2, old_egg_nodes), + VecLang::LitVec(args) => canoncalize_ntuple(can_change_vector, |can_change_vector, b| -> VecLang {if can_change_vector {VecLang::NoOptVec(b)} else {VecLang::LitVec(b)}}, args.clone(), old_egg_nodes), + VecLang::Vec(args) => canoncalize_ntuple(can_change_vector, |can_change_vector, b| -> VecLang {if can_change_vector {VecLang::NoOptVec(b)} else {VecLang::Vec(b)}}, args.clone(), old_egg_nodes), + VecLang::DataVec(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::DataVec(b)}, args.clone(), old_egg_nodes), + VecLang::VecAdd(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecAdd(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::VecMinus(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecMinus(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::VecMul(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecMul(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::VecDiv(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecDiv(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Add(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Add(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Minus(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Minus(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Mul(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Mul(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Div(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Div(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Concat(args) => canonicalize_concat(can_change_vector, Box::new(*args), old_egg_nodes), + VecLang::VecMAC(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecMAC(box2arr(b))}, Box::new(*args), old_egg_nodes), // TODO: VecNeg, VecSqrt, VecSgn all have not been tested, need test cases. // TODO: LLVM actually supports many more vector intrinsics, including // vector sine/cosine instructions for floats. - VecLang::VecNeg([v]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::VecNeg(single)}, v, old_egg_nodes ), - VecLang::VecSqrt([v]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::VecSqrt(single)}, v, old_egg_nodes ), + VecLang::VecNeg(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecNeg(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::VecSqrt(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecSqrt(box2arr(b))}, Box::new(*args), old_egg_nodes), // VecSgn compliant with c++ LibMath copysign function, which differs with sgn at x = 0. - VecLang::VecSgn([v]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::VecSgn(single)}, v, old_egg_nodes ), - VecLang::Sgn([n]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::Sgn(single)}, n, old_egg_nodes ), - VecLang::Sqrt([n]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::Sqrt(single)}, n, old_egg_nodes ), - VecLang::Neg([n]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::Neg(single)}, n, old_egg_nodes ), - VecLang::VecLoad([gep1_id, gep2_id, gep3_id, gep4_id, base_ids_vec, offsets_id_vec]) => canoncalize_ntuple(can_change_vector, &[*gep1_id, *gep2_id, *gep3_id, *gep4_id, *base_ids_vec, *offsets_id_vec], VecLang::VecLoad([*gep1_id, *gep2_id, *gep3_id, *gep4_id, *base_ids_vec, *offsets_id_vec]), old_egg_nodes), - VecLang::VecStore([val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id]) => canonicalize_quintuple(can_change_vector,|quint| -> VecLang {VecLang::VecStore(quint)}, val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id, old_egg_nodes ), - VecLang::AlignedConsecVecLoad([gep_id]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::AlignedConsecVecLoad(single)}, gep_id, old_egg_nodes ), - VecLang::Shuffle([data_vec_id, shuf_vec_id]) => canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::Shuffle(pair)}, data_vec_id, shuf_vec_id, old_egg_nodes), - VecLang::Join([left, right]) => canonicalize_pair(false, can_change_vector, |pair| -> VecLang {VecLang::Join(pair)}, left, right, old_egg_nodes), - VecLang::VecTwo(boxed_ids) => canonicalize_vec_type(can_change_vector, |change_vec_type, boxed| -> VecLang {VecLang::VecTwo(boxed)}, boxed_ids, old_egg_nodes), - VecLang::AlignedConsecVecLoad2([gep_id]) => canonicalize_single(can_change_vector,|single| -> VecLang {VecLang::AlignedConsecVecLoad2(single)}, gep_id, old_egg_nodes ), + VecLang::VecSgn(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecSgn(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Sgn(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Sgn(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Sqrt(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Sqrt(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Neg(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Neg(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::VecLoad(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecLoad(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::VecStore(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecStore(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::AlignedConsecVecLoad(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::AlignedConsecVecLoad(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Shuffle(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Shuffle(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Join(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Join(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::VecTwo(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecTwo(b)}, args.clone(), old_egg_nodes), + VecLang::AlignedConsecVecLoad2(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::AlignedConsecVecLoad2(box2arr(b))}, Box::new(*args), old_egg_nodes), } } @@ -2371,25 +1979,4 @@ unsafe fn egg_to_llvm_main( .expect("Index should be in vector."); LLVMInstructionEraseFromParent(*old_instr); } - - // BELOW HERE, we allow for vectorization output, and we stitch our work back into the current LLVM code - - // NOTE: We Assume Egg rewriter will maintain relative positions of elements in vector - // REVIEW ASSUMPTION! - // Extract the elements of the vector, to be assigned back to where they are to be used. - // let num_extractions = llvm2egg_metadata.start_instructions.len(); - // for i in (0..num_extractions).rev() { - // let old_instr = llvm2egg_metadata - // .start_instructions - // .get(i) - // .expect("Index should be in vector."); - // // Build the extracted value - // let index = LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0); - // let extracted_value = - // LLVMBuildExtractElement(builder, llvm_vector, index, b"\0".as_ptr() as *const _); - // // Replace all the uses of the old instruction with the new extracted value - // // Old instruction cannot have been removed. - // LLVMReplaceAllUsesWith(*old_instr, extracted_value); - // LLVMInstructionEraseFromParent(*old_instr); - // } } From 0c41dc6d3214995a953d5c591c00a366b20f9958 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Fri, 7 Jul 2023 19:04:34 -0400 Subject: [PATCH 128/143] bring tests to 120 total, all pass --- .../c-tests/cols-greater-matrix-multiply.c | 79 ++++++++++ .../c-tests/random-1-by-1-matrix-multiply.c | 79 ++++++++++ .../c-tests/random-matrix-multiply-1.c | 79 ++++++++++ .../c-tests/random-matrix-multiply-2.c | 79 ++++++++++ .../Diospyros/c-tests/random-point-product.c | 107 +++++++++++++ .../Diospyros/c-tests/random-q-prod.c | 149 ++++++++++++++++++ .../Diospyros/c-tests/random-stencil-2d-v3.c | 74 +++++++++ .../Diospyros/c-tests/random-stencil-2d-v4.c | 74 +++++++++ .../Diospyros/c-tests/random-stencil-2d-v5.c | 74 +++++++++ .../Diospyros/c-tests/repeat-thrice.c | 79 ++++++++++ .../Diospyros/c-tests/repeat-twice.c | 73 +++++++++ .../c-tests/rows-greater-matrix-multiply.c | 79 ++++++++++ src/dios-egraphs/Diospyros/c-tests/skip.c | 70 ++++++++ 13 files changed, 1095 insertions(+) create mode 100644 src/dios-egraphs/Diospyros/c-tests/cols-greater-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/random-1-by-1-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/random-matrix-multiply-1.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/random-matrix-multiply-2.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/random-point-product.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/random-q-prod.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v3.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v4.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v5.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/repeat-thrice.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/repeat-twice.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/rows-greater-matrix-multiply.c create mode 100644 src/dios-egraphs/Diospyros/c-tests/skip.c diff --git a/src/dios-egraphs/Diospyros/c-tests/cols-greater-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/cols-greater-matrix-multiply.c new file mode 100644 index 00000000..0e3a93d8 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/cols-greater-matrix-multiply.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 5 +#define A_COLS 4 +#define B_COLS 7 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_COLS][B_COLS]; + for (int i = 0; i < A_COLS; i++) { + for (int j = 0; j < B_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-1-by-1-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/random-1-by-1-matrix-multiply.c new file mode 100644 index 00000000..10ede436 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-1-by-1-matrix-multiply.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 1 +#define A_COLS 1 +#define B_COLS 1 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-matrix-multiply-1.c b/src/dios-egraphs/Diospyros/c-tests/random-matrix-multiply-1.c new file mode 100644 index 00000000..d199fe28 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-matrix-multiply-1.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 4 +#define A_COLS 2 +#define B_COLS 4 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_COLS][B_COLS]; + for (int i = 0; i < A_COLS; i++) { + for (int j = 0; j < B_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-matrix-multiply-2.c b/src/dios-egraphs/Diospyros/c-tests/random-matrix-multiply-2.c new file mode 100644 index 00000000..f2d33210 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-matrix-multiply-2.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 6 +#define A_COLS 1 +#define B_COLS 5 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_COLS][B_COLS]; + for (int i = 0; i < A_COLS; i++) { + for (int j = 0; j < B_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-point-product.c b/src/dios-egraphs/Diospyros/c-tests/random-point-product.c new file mode 100644 index 00000000..8d386a23 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-point-product.c @@ -0,0 +1,107 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define SIZE 4 + +void cross_product(float lhs[3], float rhs[3], float result[3]) + __attribute__((always_inline)); + +void cross_product(float lhs[3], float rhs[3], float result[3]) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +void point_product(float q_in[4], float p_in[4], float result_out[4]) { + float qvec[3] = {q_in[0], q_in[1], q_in[2]}; + // qvec = {0, 1, 2} + + float uv[3]; + cross_product(qvec, p_in, uv); + // uv = {1 * 2 - 2 * 1, 2 * 0 - 0 * 2, 0 * 1 - 1 * 0} = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + // uv = {0, 0 , 0} + float qxuv[3]; + cross_product(qvec, uv, qxuv); + // qxuv = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + result_out[i] = p_in[i] + q_in[3] * uv[i] + qxuv[i]; + } +} + +// --- NO OPTS --- + +void no_opt_cross_product(float lhs[3], float rhs[3], float result[3]) + __attribute__((always_inline)); + +void no_opt_cross_product(float lhs[3], float rhs[3], float result[3]) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +void no_opt_point_product(float q_in[4], float p_in[4], float result_out[4]) { + float qvec[3] = {q_in[0], q_in[1], q_in[2]}; + // qvec = {0, 1, 2} + + float uv[3]; + no_opt_cross_product(qvec, p_in, uv); + // uv = {1 * 2 - 2 * 1, 2 * 0 - 0 * 2, 0 * 1 - 1 * 0} = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + // uv = {0, 0 , 0} + float qxuv[3]; + no_opt_cross_product(qvec, uv, qxuv); + // qxuv = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + result_out[i] = p_in[i] + q_in[3] * uv[i] + qxuv[i]; + } +} + +int main(void) { + srand(100); // set seed + + float q_in[SIZE]; + for (int i = 0; i < SIZE; i++) { + q_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float p_in[SIZE]; + for (int i = 0; i < SIZE; i++) { + p_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float result_out[SIZE]; + for (int i = 0; i < SIZE; i++) { + result_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[SIZE]; + for (int i = 0; i < SIZE; i++) { + expected[i] = result_out[i]; + } + point_product(q_in, p_in, result_out); + no_opt_point_product(q_in, p_in, expected); + for (int i = 0; i < SIZE; i++) { + printf("%f\n", result_out[i]); + printf("%f\n", expected[i]); + assert(expected[i] == result_out[i]); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-q-prod.c b/src/dios-egraphs/Diospyros/c-tests/random-q-prod.c new file mode 100644 index 00000000..b5c2fc0e --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-q-prod.c @@ -0,0 +1,149 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define SIZE 4 + +__attribute__((always_inline)) void naive_cross_product(float *lhs, float *rhs, + float *result) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +__attribute__((always_inline)) void naive_point_product(float *q, float *p, + float *result) { + float qvec[3] = {q[0], q[1], q[2]}; + float uv[3]; + naive_cross_product(qvec, p, uv); + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + float qxuv[3]; + naive_cross_product(qvec, uv, qxuv); + + for (int i = 0; i < 3; i++) { + result[i] = p[i] + q[3] * uv[i] + qxuv[i]; + } +} + +void naive_quaternion_product(float *a_q, float *a_t, float *b_q, float *b_t, + float *r_q, float *r_t) { + r_q[3] = + a_q[3] * b_q[3] - a_q[0] * b_q[0] - a_q[1] * b_q[1] - a_q[2] * b_q[2]; + r_q[0] = + a_q[3] * b_q[0] + a_q[0] * b_q[3] + a_q[1] * b_q[2] - a_q[2] * b_q[1]; + r_q[1] = + a_q[3] * b_q[1] + a_q[1] * b_q[3] + a_q[2] * b_q[0] - a_q[0] * b_q[2]; + r_q[2] = + a_q[3] * b_q[2] + a_q[2] * b_q[3] + a_q[0] * b_q[1] - a_q[1] * b_q[0]; + + naive_point_product(a_q, b_t, r_t); + for (int i = 0; i < 3; i++) { + r_t[i] += a_t[i]; + } +} + +// --- NO OPTS --- + +__attribute__((always_inline)) void no_opt_naive_cross_product(float *lhs, + float *rhs, + float *result) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +__attribute__((always_inline)) void no_opt_naive_point_product(float *q, + float *p, + float *result) { + float qvec[3] = {q[0], q[1], q[2]}; + float uv[3]; + no_opt_naive_cross_product(qvec, p, uv); + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + float qxuv[3]; + no_opt_naive_cross_product(qvec, uv, qxuv); + + for (int i = 0; i < 3; i++) { + result[i] = p[i] + q[3] * uv[i] + qxuv[i]; + } +} + +void no_opt_naive_quaternion_product(float *a_q, float *a_t, float *b_q, + float *b_t, float *r_q, float *r_t) { + r_q[3] = + a_q[3] * b_q[3] - a_q[0] * b_q[0] - a_q[1] * b_q[1] - a_q[2] * b_q[2]; + r_q[0] = + a_q[3] * b_q[0] + a_q[0] * b_q[3] + a_q[1] * b_q[2] - a_q[2] * b_q[1]; + r_q[1] = + a_q[3] * b_q[1] + a_q[1] * b_q[3] + a_q[2] * b_q[0] - a_q[0] * b_q[2]; + r_q[2] = + a_q[3] * b_q[2] + a_q[2] * b_q[3] + a_q[0] * b_q[1] - a_q[1] * b_q[0]; + + no_opt_naive_point_product(a_q, b_t, r_t); + for (int i = 0; i < 3; i++) { + r_t[i] += a_t[i]; + } +} + +int main(void) { + srand(1); // set seed + + float a_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + a_q[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float a_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + a_t[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float b_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + b_q[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float b_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + b_t[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float r_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + r_q[i] = 0.0f; + } + float r_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + r_t[i] = 0.0f; + } + float expectedq[SIZE]; + for (int i = 0; i < SIZE; i++) { + expectedq[i] = 0.0f; + } + float expectedt[SIZE]; + for (int i = 0; i < SIZE; i++) { + expectedt[i] = 0.0f; + } + naive_quaternion_product(a_q, a_t, b_q, b_t, r_q, r_t); + no_opt_naive_quaternion_product(a_q, a_t, b_q, b_t, expectedq, expectedt); + for (int i = 0; i < SIZE; i++) { + printf("%f\n", r_q[i]); + assert(expectedq[i] == r_q[i]); + } + for (int i = 0; i < SIZE; i++) { + printf("%f\n", r_t[i]); + assert(expectedt[i] == r_t[i]); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v3.c b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v3.c new file mode 100644 index 00000000..e8f15af3 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v3.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROW_SIZE 5 +#define COL_SIZE 5 +#define F_SIZE 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 2; k1++) { + for (int k2 = 0; k2 < 2; k2++) { + temp += filter_in[k1 * 2 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +void no_opt_stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], + float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 2; k1++) { + for (int k2 = 0; k2 < 2; k2++) { + temp += filter_in[k1 * 2 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + srand(1); // set seed + + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = sol_out[i]; + } + stencil(orig_in, sol_out, filter_in); + no_opt_stencil(orig_in, expected, filter_in); + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("%f\n", sol_out[i]); + printf("%f\n", expected[i]); + assert(expected[i] == sol_out[i]); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v4.c b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v4.c new file mode 100644 index 00000000..b8d2bae8 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v4.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROW_SIZE 2 +#define COL_SIZE 3 +#define F_SIZE 1 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 2; k1++) { + for (int k2 = 0; k2 < 2; k2++) { + temp += filter_in[k1 * 2 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +void no_opt_stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], + float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 2; k1++) { + for (int k2 = 0; k2 < 2; k2++) { + temp += filter_in[k1 * 2 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + srand(1); // set seed + + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = sol_out[i]; + } + stencil(orig_in, sol_out, filter_in); + no_opt_stencil(orig_in, expected, filter_in); + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("%f\n", sol_out[i]); + printf("%f\n", expected[i]); + assert(expected[i] == sol_out[i]); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v5.c b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v5.c new file mode 100644 index 00000000..335eb8e3 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v5.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROW_SIZE 1 +#define COL_SIZE 1 +#define F_SIZE 1 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +void no_opt_stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], + float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + srand(1); // set seed + + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = sol_out[i]; + } + stencil(orig_in, sol_out, filter_in); + no_opt_stencil(orig_in, expected, filter_in); + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("%f\n", sol_out[i]); + printf("%f\n", expected[i]); + assert(expected[i] == sol_out[i]); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/repeat-thrice.c b/src/dios-egraphs/Diospyros/c-tests/repeat-thrice.c new file mode 100644 index 00000000..298a4126 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/repeat-thrice.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define ROWS 12 +#define THIRD 4 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < THIRD; i++) { + d_out[i] = a_in[i] + b_in[i]; + } + for (int i = THIRD; i < 2 * THIRD; i++) { + d_out[i] = c_in[i] + b_in[i]; + } + for (int i = 2 * THIRD; i < 3 * THIRD; i++) { + d_out[i] = c_in[i] + b_in[i]; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < THIRD; i++) { + d_out[i] = a_in[i] + b_in[i]; + } + for (int i = THIRD; i < 2 * THIRD; i++) { + d_out[i] = c_in[i] + b_in[i]; + } + for (int i = 2 * THIRD; i < 3 * THIRD; i++) { + d_out[i] = c_in[i] + b_in[i]; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/repeat-twice.c b/src/dios-egraphs/Diospyros/c-tests/repeat-twice.c new file mode 100644 index 00000000..e60b3534 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/repeat-twice.c @@ -0,0 +1,73 @@ +#include +#include +#include +#include +#include + +#define ROWS 8 +#define HALF 4 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < HALF; i++) { + d_out[i] = a_in[i] + b_in[i]; + } + for (int i = HALF; i < ROWS; i++) { + d_out[i] = c_in[i] + b_in[i]; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < HALF; i++) { + d_out[i] = a_in[i] + b_in[i]; + } + for (int i = HALF; i < ROWS; i++) { + d_out[i] = c_in[i] + b_in[i]; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/rows-greater-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/rows-greater-matrix-multiply.c new file mode 100644 index 00000000..422f0e10 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/rows-greater-matrix-multiply.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 5 +#define A_COLS 3 +#define B_COLS 2 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_COLS][B_COLS]; + for (int i = 0; i < A_COLS; i++) { + for (int j = 0; j < B_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/skip.c b/src/dios-egraphs/Diospyros/c-tests/skip.c new file mode 100644 index 00000000..485518ea --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/skip.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include + +#define ROWS 8 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i != 5) { + d_out[i] = a_in[i] + b_in[i]; + } + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i != 5) { + d_out[i] = a_in[i] + b_in[i]; + } + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = d_out[i]; + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file From 5dbd34bffffeb410b3bd4912390c7774371b9c7f Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Mon, 24 Jul 2023 03:10:35 -0400 Subject: [PATCH 129/143] address rewriting was not optimal found that it slowed down code by 2x sometimes data gathered automatically using scriot script also builds plots and tables and csvs --- .../Diospyros/AddressRewriting.cpp | 531 +++++++++++ .../Diospyros/LoadStoreMovement.cpp | 849 +++++++++++++++++- src/dios-egraphs/Diospyros/Makefile | 53 +- .../Diospyros/VectorizationUtilities.cpp | 252 ++++++ .../benchmarks/conv/3-by-3-and-2-by-2-conv.c | 94 ++ .../benchmarks/conv/3-by-3-and-3-by-3-conv.c | 94 ++ .../benchmarks/conv/3-by-3-and-4-by-4-conv.c | 94 ++ .../benchmarks/conv/4-by-4-and-2-by-2-conv.c | 94 ++ .../benchmarks/conv/4-by-4-and-4-by-4-conv.c | 94 ++ .../benchmarks/conv/5-by-5-and-2-by-2-conv.c | 94 ++ .../5-by-5-and-3-by-3-conv.c} | 50 +- .../benchmarks/conv/5-by-5-and-4-by-4-conv.c | 94 ++ .../benchmarks/conv/6-by-6-and-2-by-2-conv.c | 94 ++ .../benchmarks/conv/6-by-6-and-3-by-3-conv.c | 94 ++ .../benchmarks/conv/6-by-6-and-4-by-4-conv.c | 94 ++ .../benchmarks/conv/8-by-8-and-2-by-2-conv.c | 94 ++ .../benchmarks/conv/8-by-8-and-3-by-3-conv.c | 94 ++ .../benchmarks/conv/8-by-8-and-4-by-4-conv.c | 94 ++ .../Diospyros/benchmarks/conv/test-utils.h | 6 + .../benchmarks/mat-mul/10-by-10-mat-mul.c | 85 ++ .../benchmarks/mat-mul/11-by-11-mat-mul.c | 85 ++ .../mat-mul.c => mat-mul/12-by-12-mat-mul.c} | 22 +- .../benchmarks/mat-mul/15-by-15-mat-mul.c | 85 ++ .../benchmarks/mat-mul/16-by-16-mat-mul.c | 85 ++ .../vvadd.c => mat-mul/2-by-2-mat-mul.c} | 50 +- .../vvadd.c => mat-mul/3-by-3-mat-mul.c} | 50 +- .../benchmarks/mat-mul/4-by-4-mat-mul.c | 85 ++ .../mat-mul.c => mat-mul/5-by-5-mat-mul.c} | 12 +- .../benchmarks/mat-mul/6-by-6-mat-mul.c | 85 ++ .../benchmarks/mat-mul/7-by-7-mat-mul.c | 85 ++ .../benchmarks/mat-mul/8-by-8-mat-mul.c | 85 ++ .../benchmarks/mat-mul/9-by-9-mat-mul.c | 85 ++ .../Diospyros/benchmarks/mat-mul/test-utils.h | 6 + .../benchmarks/{optimized => q-prod}/qprod.c | 42 +- .../Diospyros/benchmarks/q-prod/test-utils.h | 6 + .../benchmarks/qr-decomp/3-qr-decomp.c | 176 ++++ .../qr-decomp.c => qr-decomp/4-qr-decomp.c} | 142 +-- .../benchmarks/qr-decomp/5-qr-decomp.c | 176 ++++ .../benchmarks/qr-decomp/6-qr-decomp.c | 176 ++++ .../benchmarks/qr-decomp/test-utils.h | 6 + .../stencil/12-by-12-and-2-by-2-stencil.c | 85 ++ .../stencil/12-by-12-and-3-by-3-stencil.c | 84 ++ .../stencil/16-by-16-and-2-by-2-stencil.c | 85 ++ .../16-by-16-and-3-by-3-stencil.c} | 43 +- .../stencil/4-by-4-and-2-by-2-stencil.c | 85 ++ .../stencil/4-by-4-and-3-by-3-stencil.c | 84 ++ .../stencil/5-by-5-and-2-by-2-stencil.c | 85 ++ .../stencil/5-by-5-and-3-by-3-stencil.c | 84 ++ .../stencil/6-by-6-and-2-by-2-stencil.c | 85 ++ .../stencil/6-by-6-and-3-by-3-stencil.c | 84 ++ .../stencil/8-by-8-and-2-by-2-stencil.c | 85 ++ .../stencil/8-by-8-and-3-by-3-stencil.c | 84 ++ .../Diospyros/benchmarks/stencil/test-utils.h | 6 + .../Diospyros/benchmarks/test-utils.h | 6 + src/dios-egraphs/Diospyros/build.rs | 12 + .../Diospyros/c-tests/2d-matrix-multiply.c | 5 + .../data-plots/data/all-data/all-data.csv | 45 + .../data/individual-data/conv-data.csv | 15 + .../data/individual-data/mat-mul-data.csv | 14 + .../data/individual-data/q-prod-data.csv | 2 + .../data/individual-data/qr-decomp-data.csv | 5 + .../data/individual-data/stencil-data.csv | 13 + .../data/original-data/all-data.csv | 27 + .../plots/baseline-diospyros-peedup.png | Bin 0 -> 44382 bytes .../plots/baseline-slp-diospyros-speedup.png | Bin 0 -> 48867 bytes .../Diospyros/data-plots/plots/conv.png.png | Bin 0 -> 32212 bytes .../Diospyros/data-plots/plots/mat-mul.png | Bin 0 -> 37124 bytes .../Diospyros/data-plots/plots/q-prod.png | Bin 0 -> 22755 bytes .../plots/slp-diospyros-speedup.png | Bin 0 -> 45270 bytes .../Diospyros/data-plots/plots/speedup.png | Bin 0 -> 44382 bytes .../Diospyros/data-plots/plots/stencil.png | Bin 0 -> 41939 bytes .../Diospyros/data-plots/table/table.txt | 52 ++ .../data-plots/utils/gen-latex-table.py | 51 ++ .../data-plots/utils/merge-all-data.py | 35 + .../Diospyros/data-plots/utils/order.py | 35 + .../data-plots/utils/plot-base-slp-dios.py | 69 ++ .../data-plots/utils/run-all-benchmarks.sh | 5 + .../data-plots/utils/run-benchmarks.py | 54 ++ .../Diospyros/plot-utilities/plot.py | 6 - .../Diospyros/stencil-2d-results.txt | 8 - 80 files changed, 5695 insertions(+), 335 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/AddressRewriting.cpp create mode 100644 src/dios-egraphs/Diospyros/VectorizationUtilities.cpp create mode 100644 src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-2-by-2-conv.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-3-by-3-conv.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-4-by-4-conv.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/conv/4-by-4-and-2-by-2-conv.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/conv/4-by-4-and-4-by-4-conv.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-2-by-2-conv.c rename src/dios-egraphs/Diospyros/benchmarks/{optimized/conv-2d.c => conv/5-by-5-and-3-by-3-conv.c} (65%) create mode 100644 src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-4-by-4-conv.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-2-by-2-conv.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-3-by-3-conv.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-4-by-4-conv.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-2-by-2-conv.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-3-by-3-conv.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-4-by-4-conv.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/conv/test-utils.h create mode 100644 src/dios-egraphs/Diospyros/benchmarks/mat-mul/10-by-10-mat-mul.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/mat-mul/11-by-11-mat-mul.c rename src/dios-egraphs/Diospyros/benchmarks/{baseline/mat-mul.c => mat-mul/12-by-12-mat-mul.c} (79%) create mode 100644 src/dios-egraphs/Diospyros/benchmarks/mat-mul/15-by-15-mat-mul.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/mat-mul/16-by-16-mat-mul.c rename src/dios-egraphs/Diospyros/benchmarks/{baseline/vvadd.c => mat-mul/2-by-2-mat-mul.c} (53%) rename src/dios-egraphs/Diospyros/benchmarks/{optimized/vvadd.c => mat-mul/3-by-3-mat-mul.c} (53%) create mode 100644 src/dios-egraphs/Diospyros/benchmarks/mat-mul/4-by-4-mat-mul.c rename src/dios-egraphs/Diospyros/benchmarks/{optimized/mat-mul.c => mat-mul/5-by-5-mat-mul.c} (91%) create mode 100644 src/dios-egraphs/Diospyros/benchmarks/mat-mul/6-by-6-mat-mul.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/mat-mul/7-by-7-mat-mul.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/mat-mul/8-by-8-mat-mul.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/mat-mul/9-by-9-mat-mul.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/mat-mul/test-utils.h rename src/dios-egraphs/Diospyros/benchmarks/{optimized => q-prod}/qprod.c (77%) create mode 100644 src/dios-egraphs/Diospyros/benchmarks/q-prod/test-utils.h create mode 100644 src/dios-egraphs/Diospyros/benchmarks/qr-decomp/3-qr-decomp.c rename src/dios-egraphs/Diospyros/benchmarks/{optimized/qr-decomp.c => qr-decomp/4-qr-decomp.c} (55%) create mode 100644 src/dios-egraphs/Diospyros/benchmarks/qr-decomp/5-qr-decomp.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/qr-decomp/6-qr-decomp.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/qr-decomp/test-utils.h create mode 100644 src/dios-egraphs/Diospyros/benchmarks/stencil/12-by-12-and-2-by-2-stencil.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/stencil/12-by-12-and-3-by-3-stencil.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/stencil/16-by-16-and-2-by-2-stencil.c rename src/dios-egraphs/Diospyros/benchmarks/{optimized/stencil-2d.c => stencil/16-by-16-and-3-by-3-stencil.c} (70%) create mode 100644 src/dios-egraphs/Diospyros/benchmarks/stencil/4-by-4-and-2-by-2-stencil.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/stencil/4-by-4-and-3-by-3-stencil.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/stencil/5-by-5-and-2-by-2-stencil.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/stencil/5-by-5-and-3-by-3-stencil.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/stencil/6-by-6-and-2-by-2-stencil.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/stencil/6-by-6-and-3-by-3-stencil.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/stencil/8-by-8-and-2-by-2-stencil.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/stencil/8-by-8-and-3-by-3-stencil.c create mode 100644 src/dios-egraphs/Diospyros/benchmarks/stencil/test-utils.h create mode 100644 src/dios-egraphs/Diospyros/benchmarks/test-utils.h create mode 100644 src/dios-egraphs/Diospyros/data-plots/data/all-data/all-data.csv create mode 100644 src/dios-egraphs/Diospyros/data-plots/data/individual-data/conv-data.csv create mode 100644 src/dios-egraphs/Diospyros/data-plots/data/individual-data/mat-mul-data.csv create mode 100644 src/dios-egraphs/Diospyros/data-plots/data/individual-data/q-prod-data.csv create mode 100644 src/dios-egraphs/Diospyros/data-plots/data/individual-data/qr-decomp-data.csv create mode 100644 src/dios-egraphs/Diospyros/data-plots/data/individual-data/stencil-data.csv create mode 100644 src/dios-egraphs/Diospyros/data-plots/data/original-data/all-data.csv create mode 100644 src/dios-egraphs/Diospyros/data-plots/plots/baseline-diospyros-peedup.png create mode 100644 src/dios-egraphs/Diospyros/data-plots/plots/baseline-slp-diospyros-speedup.png create mode 100644 src/dios-egraphs/Diospyros/data-plots/plots/conv.png.png create mode 100644 src/dios-egraphs/Diospyros/data-plots/plots/mat-mul.png create mode 100644 src/dios-egraphs/Diospyros/data-plots/plots/q-prod.png create mode 100644 src/dios-egraphs/Diospyros/data-plots/plots/slp-diospyros-speedup.png create mode 100644 src/dios-egraphs/Diospyros/data-plots/plots/speedup.png create mode 100644 src/dios-egraphs/Diospyros/data-plots/plots/stencil.png create mode 100644 src/dios-egraphs/Diospyros/data-plots/table/table.txt create mode 100644 src/dios-egraphs/Diospyros/data-plots/utils/gen-latex-table.py create mode 100644 src/dios-egraphs/Diospyros/data-plots/utils/merge-all-data.py create mode 100644 src/dios-egraphs/Diospyros/data-plots/utils/order.py create mode 100644 src/dios-egraphs/Diospyros/data-plots/utils/plot-base-slp-dios.py create mode 100644 src/dios-egraphs/Diospyros/data-plots/utils/run-all-benchmarks.sh create mode 100644 src/dios-egraphs/Diospyros/data-plots/utils/run-benchmarks.py delete mode 100644 src/dios-egraphs/Diospyros/plot-utilities/plot.py delete mode 100644 src/dios-egraphs/Diospyros/stencil-2d-results.txt diff --git a/src/dios-egraphs/Diospyros/AddressRewriting.cpp b/src/dios-egraphs/Diospyros/AddressRewriting.cpp new file mode 100644 index 00000000..fcfce044 --- /dev/null +++ b/src/dios-egraphs/Diospyros/AddressRewriting.cpp @@ -0,0 +1,531 @@ +#include + +#include +#include +#include +#include +#include + +#include "VectorizationUtilities.cpp" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +const std::string MAIN_FUNCTION_NAME = "main"; +const std::string NO_OPT_PREFIX = "no_opt_"; +const std::string GATHER_NAME = "llvm.masked.gather.v4f32.v4p0f32"; +const uint32_t VECTOR_WIDTH = 4; +const uint32_t FLOAT_WIDTH = 4; + +namespace { +struct AddressRewritingPass : public FunctionPass { + static char ID; + AddressRewritingPass() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + + using chunks_t = Chunking::chunks_t; + using chunk_t = Chunking::chunk_t; + + std::vector get_gather_calls(chunk_t chunk) { + std::vector gather_calls = {}; + for (auto instr : chunk) { + if (Gather::isa_gather_instruction(instr)) { + gather_calls.push_back(instr); + } + } + return gather_calls; + } + + Instruction *get_first_non_phi_instr(chunk_t chunk) { + assert(!chunk.empty()); + for (auto instr : chunk) { + if (!isa(instr)) { + return instr; + } + } + return chunk.back(); + } + + std::map> + get_gather_addresses_from_chunk(chunk_t chunk) { + std::map> result = {}; + for (auto instr : chunk) { + if (Gather::isa_gather_instruction(instr)) { + std::vector addresses = + Gather::get_gather_addresses(instr); + result.emplace(instr, addresses); + } + } + return result; + } + + std::vector join_all_addresses( + std::map> gather_map) { + std::vector result = {}; + for (auto [_, addresses] : gather_map) { + result.insert(result.end(), addresses.begin(), addresses.end()); + } + return result; + } + + std::map> get_base_offsets( + std::vector addresses, std::vector array_bases, + ScalarEvolution *SE) { + std::map> result = {}; + for (auto address : addresses) { + result.emplace(address, + Array::get_base_reference(address, array_bases, SE)); + } + return result; + } + + std::vector find_minimum_cover_for_offsets( + std::set offsets) { + std::set minimum_cover = {}; + for (auto offset : offsets) { + assert(offset >= 0); + int remainder = + ((offset / FLOAT_WIDTH) % VECTOR_WIDTH) * FLOAT_WIDTH; + int aligned_addr = offset - remainder; + minimum_cover.insert(aligned_addr); + } + std::vector result = {}; + for (auto offset : minimum_cover) { + assert(offset >= 0); + result.push_back(offset); + } + std::sort(result.begin(), result.end()); + return result; + } + + /** + * Rewrites Load-Gather Addresses in a Chunk + * + * Grabs Chunks + * For each chunk: + * Gets Gather Addresses + * Gets Gather call instruction + * Builds a map from Gather Address to Gather call instruction [many to + * one] + * + * Maps each address to pair of an array base and an offset from the + * array base + * Remove addresses with unknown array base or array offset + * Build a map between gather call instruction and the array base(s) + * and the correspoding offsets + * Use the Gather Map from address to call and + * map from address to pair of base / offset + * + * For each gather represented + * Generate a "cover" for all the unique array offsets using + * aligned & consecutive load operations + * Build a map from each load to the array offsets and base + * it corresponds to + * Generate appropriate shuffles + * Stitch in the shuffles to the old gather call instruction + * by inserting at the beginning of the chunk + * Remove the old gather call instruction + * + * + * E.g. suppose we do a gather from A[3], A[5], B[2], C[7] + * We build a load(A[0-3]), load(A[4-7]), load(B[0-3]), load(c[4-7]) + * We then build the appropriate shuffles, here 3 shuffles are needed + * Then we stitch in the instruction + * + * We would have gathered + */ + void rewrite_addresses(BasicBlock &B, std::vector array_bases, + AliasAnalysis *AA, ScalarEvolution *SE, + LLVMContext &C, unsigned address_space) { + chunks_t chunks = Chunking::build_chunks(&B, AA); + + // Define floating point 4 wide vector pointer type, e.g. <4 x float> * + Type *float_ty = Type::getFloatTy(C); + VectorType *vector_4_float_ty = VectorType::get(float_ty, VECTOR_WIDTH); + PointerType *pointer_to_vector_4_float_ty = + PointerType::get(vector_4_float_ty, address_space); + + for (chunk_t chunk : chunks) { + if (chunk.empty()) { + continue; + } + errs() << "This is the chunk\n"; + for (auto instr : chunk) { + errs() << *instr << "\n"; + } + std::vector gather_calls = get_gather_calls(chunk); + std::map> gather2addresses = + get_gather_addresses_from_chunk(chunk); + std::vector all_addresses = + join_all_addresses(gather2addresses); + // get offsets and base arrays for all addresses + std::map> addresses2base_and_offset = + get_base_offsets(all_addresses, array_bases, SE); + + // start construction after the first element that is non-phi in the + // chunk + Instruction *first_instr = get_first_non_phi_instr(chunk); + assert(first_instr != NULL); + IRBuilder<> builder(first_instr); + for (auto gather : gather_calls) { + assert(Gather::isa_gather_instruction(gather)); + errs() << "Gather Instruction\n"; + errs() << *gather << "\n"; + // get all base arrays and offsets required for the gather + std::set> all_base_offset_pairs = {}; + for (auto address : gather2addresses[gather]) { + all_base_offset_pairs.insert( + addresses2base_and_offset[address]); + } + // if any of the base array or offsets are negative, skip this + // gather + // Also get all base arrays required for the cover + // And for each base array, get all the offsets needed + std::set base_arrays = {}; + std::map> base2offsets = {}; + bool continue_loop = false; + for (auto [base, offset] : all_base_offset_pairs) { + base_arrays.insert(base); + if (base2offsets.count(base) == 1) { + base2offsets[base].insert(offset); + } else { + base2offsets[base] = {offset}; + } + if (base < 0 || offset < 0) { + continue_loop = true; + break; + } + } + if (continue_loop) { + continue; + } + + std::map>, + std::vector> + vector_load_addresses2usage_flags = {}; + std::map>, Value *> + vector_load_addresses2load_instr = {}; + // find minimium covers for each offsets + for (auto [base, offsets] : base2offsets) { + assert(base >= 0); + std::vector minimum_cover = + find_minimum_cover_for_offsets(offsets); + // build the minimum covers + for (auto aligned_offset : minimum_cover) { + Value *bitcast_instr = builder.CreateBitOrPointerCast( + array_bases[base], pointer_to_vector_4_float_ty, + "bitcast-for-alignment"); + Value *aligned_gep_instr = builder.CreateConstGEP1_32( + vector_4_float_ty, bitcast_instr, + (aligned_offset / (FLOAT_WIDTH * VECTOR_WIDTH)), + "gep-for-aligned-addr"); + Value *aligned_load = builder.CreateLoad( + vector_4_float_ty, aligned_gep_instr, false, + "load-aligned-addr"); + std::vector load_addresses = {}; + for (int i = 0; i < VECTOR_WIDTH; i++) { + load_addresses.push_back(aligned_offset + + i * FLOAT_WIDTH); + } + if (vector_load_addresses2usage_flags.count( + {base, load_addresses}) == 0) { + std::vector all_false = {}; + for (int i = 0; i < VECTOR_WIDTH; i++) { + all_false.push_back(false); + } + vector_load_addresses2usage_flags[{ + base, load_addresses}] = all_false; + } + for (auto offset : offsets) { + std::vector::iterator itr = + std::find(load_addresses.begin(), + load_addresses.end(), offset); + if (itr != load_addresses.cend()) { + int index = + std::distance(load_addresses.begin(), itr); + vector_load_addresses2usage_flags[{ + base, load_addresses}][index] = true; + } + } + vector_load_addresses2load_instr[{ + base, load_addresses}] = aligned_load; + } + } + // the correct order of the base/offset pairs for the gather + std::vector> + actual_ordered_base_offsets = {}; + std::vector addresses = gather2addresses[gather]; + std::reverse(addresses.begin(), addresses.end()); + for (auto address : addresses) { + actual_ordered_base_offsets.push_back( + addresses2base_and_offset[address]); + } + errs() << "Actual Gather Ordered Base and Offsets\n"; + for (auto [base, offset] : actual_ordered_base_offsets) { + errs() << base << ", " << offset << "\n"; + } + + std::vector< + std::pair>, + std::vector>> + to_merge = {}; + for (auto [pair, usage_flags] : + vector_load_addresses2usage_flags) { + to_merge.push_back({pair, usage_flags}); + } + assert(to_merge.size() != 0); + auto [first_base, first_load_addresses] = + to_merge.front().first; + std::vector first_usage = to_merge.front().second; + Value *final_shuffle = vector_load_addresses2load_instr[{ + first_base, first_load_addresses}]; + Value *initial_load = final_shuffle; + + // merge size of 1: + if (to_merge.size() == 1) { + std::vector shuffle_indices = {}; + for (auto [actual_base, actual_offset] : + actual_ordered_base_offsets) { + bool found_and_added = false; + for (int i = 0; i < first_load_addresses.size(); i++) { + auto first_load_address = first_load_addresses[i]; + if ((first_base == actual_base) && + (first_load_address == actual_offset)) { + shuffle_indices.push_back(i); + found_and_added = true; + } + } + if (!found_and_added) { + shuffle_indices.push_back(0); + } + } + assert(shuffle_indices.size() == VECTOR_WIDTH); + const std::vector mask_vector = shuffle_indices; + // build the shuffles back to the gather + ArrayRef mask = ArrayRef(mask_vector); + final_shuffle = builder.CreateShuffleVector( + final_shuffle, final_shuffle, mask, "one-shuffle-only"); + + // if the shuffle indices is the identity, just use the load + std::vector identity{0, 1, 2, 3}; + if (shuffle_indices == identity) { + final_shuffle = initial_load; + } + } else { + // merge size of 2 or more + auto [second_base, second_load_addresses] = + to_merge[1].first; + + // do the first 2 together + std::vector shuffle_indices = {}; + for (auto [actual_base, actual_offset] : + actual_ordered_base_offsets) { + bool found_and_added = false; + for (int i = 0; i < first_load_addresses.size(); i++) { + auto first_load_address = first_load_addresses[i]; + if ((first_base == actual_base) && + (first_load_address == actual_offset)) { + shuffle_indices.push_back(i); + found_and_added = true; + } + } + for (int i = 0; i < second_load_addresses.size(); i++) { + auto second_load_address = second_load_addresses[i]; + if ((second_base == actual_base) && + (second_load_address == actual_offset)) { + shuffle_indices.push_back(i + VECTOR_WIDTH); + found_and_added = true; + } + } + if (!found_and_added) { + shuffle_indices.push_back(0); + } + } + assert(shuffle_indices.size() == VECTOR_WIDTH); + const std::vector mask_vector = shuffle_indices; + // build the shuffles back to the gather + ArrayRef mask = ArrayRef(mask_vector); + Value *second_load = vector_load_addresses2load_instr[{ + second_base, second_load_addresses}]; + final_shuffle = builder.CreateShuffleVector( + final_shuffle, second_load, mask, "one-shuffle-only"); + + // do the remainder + // finish after first merge + for (int i = 2; i < to_merge.size(); i++) { + auto [remaining_pair, _] = to_merge[i]; + uint32_t remaining_base = remaining_pair.first; + std::vector remaining_offsets = + remaining_pair.second; + // do a shuffle into the correct positions + std::vector shuffle_indices = {}; + int j = 0; + for (auto [actual_base, actual_offset] : + actual_ordered_base_offsets) { + bool found_and_added = false; + for (int i = 0; i < remaining_offsets.size(); i++) { + uint32_t remaining_offset = + remaining_offsets[i]; + if ((actual_base == remaining_base) && + (actual_offset == remaining_offset)) { + shuffle_indices.push_back(i); + found_and_added = true; + } + } + if (!found_and_added) { + shuffle_indices.push_back(j + VECTOR_WIDTH); + } + ++j; + } + assert(shuffle_indices.size() == VECTOR_WIDTH); + const std::vector mask_vector = shuffle_indices; + // build the shuffles back to the gather + ArrayRef mask = ArrayRef(mask_vector); + final_shuffle = builder.CreateShuffleVector( + vector_load_addresses2load_instr[remaining_pair], + final_shuffle, mask, "one-shuffle-only"); + } + } + + // // for the first pair, do the shuffling into correct position + // // do a shuffle into the correct positions + + // std::vector shuffle_indices = {}; + // for (auto [actual_base, actual_offset] : + // actual_ordered_base_offsets) { + // bool found_and_added = false; + // for (int i = 0; i < first_load_addresses.size(); i++) { + // auto first_load_address = first_load_addresses[i]; + // if ((first_base == actual_base) && + // (first_load_address == actual_offset)) { + // shuffle_indices.push_back(i); + // found_and_added = true; + // } + // } + // if (!found_and_added) { + // shuffle_indices.push_back(0); + // } + // } + // assert(shuffle_indices.size() == VECTOR_WIDTH); + // const std::vector mask_vector = shuffle_indices; + // // build the shuffles back to the gather + // ArrayRef mask = ArrayRef(mask_vector); + // final_shuffle = builder.CreateShuffleVector( + // final_shuffle, final_shuffle, mask, "one-shuffle-only"); + + // // finish after first merge + // for (int i = 1; i < to_merge.size(); i++) { + // auto [remaining_pair, _] = to_merge[i]; + // uint32_t remaining_base = remaining_pair.first; + // std::vector remaining_offsets = + // remaining_pair.second; + // // do a shuffle into the correct positions + // std::vector shuffle_indices = {}; + // int j = 0; + // for (auto [actual_base, actual_offset] : + // actual_ordered_base_offsets) { + // bool found_and_added = false; + // for (int i = 0; i < remaining_offsets.size(); i++) { + // uint32_t remaining_offset = remaining_offsets[i]; + // if ((actual_base == remaining_base) && + // (actual_offset == remaining_offset)) { + // shuffle_indices.push_back(i); + // found_and_added = true; + // } + // } + // if (!found_and_added) { + // shuffle_indices.push_back(j + VECTOR_WIDTH); + // } + // ++j; + // } + // assert(shuffle_indices.size() == VECTOR_WIDTH); + // const std::vector mask_vector = shuffle_indices; + // // build the shuffles back to the gather + // ArrayRef mask = ArrayRef(mask_vector); + // final_shuffle = builder.CreateShuffleVector( + // vector_load_addresses2load_instr[remaining_pair], + // final_shuffle, mask, "one-shuffle-only"); + // } + + // replace all uses of gather with the final shuffle + // then remove gather + gather->replaceAllUsesWith(final_shuffle); + gather->eraseFromParent(); + } + } + } + + virtual bool runOnFunction(Function &F) override { + if (F.getName() == MAIN_FUNCTION_NAME || + (F.getName().size() > NO_OPT_PREFIX.size() && + F.getName().substr(0, NO_OPT_PREFIX.size()) == NO_OPT_PREFIX)) { + return false; + } + AliasAnalysis *AA = &getAnalysis().getAAResults(); + ScalarEvolution *SE = + &getAnalysis().getSE(); + TargetLibraryInfo *TLI = + &getAnalysis().getTLI(F); + std::vector array_bases = Array::get_array_bases(F, TLI); + LLVMContext &C = F.getContext(); + unsigned address_space = 0; + bool found_address_space = false; + for (auto &B : F) { + for (auto &I : B) { + if (GetElementPtrInst *gep = dyn_cast(&I)) { + address_space = gep->getAddressSpace(); + found_address_space = true; + break; + } + } + } + if (!found_address_space) { + return false; + } + for (auto &B : F) { + rewrite_addresses(B, array_bases, AA, SE, C, address_space); + } + return true; + } +}; +} // namespace + +char AddressRewritingPass::ID = 0; + +// Automatically enable the pass. +// http://adriansampson.net/blog/clangpass.html +static void registerAddressRewritingPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + PM.add(new AddressRewritingPass()); +} + +static RegisterPass X("addressrw", + "Address Rewriting Pass", + false /* Only looks at CFG */, + true /* Analysis Pass */); + +static RegisterStandardPasses RegisterMyPass( + PassManagerBuilder::EP_EarlyAsPossible, registerAddressRewritingPass); \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index 903241a3..25fa2b89 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -22,6 +22,8 @@ using namespace llvm; const std::string MAIN_FUNCTION_NAME = "main"; const std::string NO_OPT_PREFIX = "no_opt_"; +const std::string GATHER_NAME = "llvm.masked.gather.v4f32.v4p0f32"; +const uint32_t VECTOR_WIDTH = 4; namespace { struct LoadStoreMovementPass : public FunctionPass { @@ -33,6 +35,778 @@ struct LoadStoreMovementPass : public FunctionPass { AU.addRequired(); } + bool isa_gather_instruction(Instruction *instr) { + if (CallInst *call_instr = dyn_cast(instr)) { + Function *fun = call_instr->getCalledFunction(); + // Source: + // https://stackoverflow.com/questions/11686951/how-can-i-get-function-name-from-callinst-in-llvm + // Fun Could be NULL, in which case indirect call occurs, i cannot + // get name. + if (fun) { + if (fun->getName() == GATHER_NAME) { + return true; + } + } + } + return false; + } + + bool may_alias(Value *mem_addr1, Value *mem_addr2, AliasAnalysis *AA) { + return (!AA->isNoAlias( + mem_addr1, + LocationSize::precise( + mem_addr1->getType()->getPrimitiveSizeInBits()), + mem_addr2, + LocationSize::precise( + mem_addr2->getType()->getPrimitiveSizeInBits())) || + AA->isMustAlias(mem_addr1, mem_addr2)); + } + + std::vector get_gather_addresses(Instruction *call_to_gather) { + assert(isa_gather_instruction(call_to_gather)); + + Instruction *insert_element_instr = + dyn_cast(call_to_gather->getOperand(0)); + if (insert_element_instr == NULL) { + throw "Gather Arguments Pointer Vector was NULL"; + } + + std::vector gather_addresses = {}; + // hardcode to gathers of length 4 only + for (int i = 0; i < VECTOR_WIDTH; i++) { + Value *pointer = insert_element_instr->getOperand(1); + gather_addresses.push_back(pointer); + Instruction *new_insert_element_instr = + dyn_cast(insert_element_instr->getOperand(0)); + insert_element_instr = new_insert_element_instr; + } + return gather_addresses; + } + + std::vector get_gather_insert_instrs( + Instruction *call_to_gather) { + assert(isa_gather_instruction(call_to_gather)); + + Instruction *insert_element_instr = + dyn_cast(call_to_gather->getOperand(0)); + if (insert_element_instr == NULL) { + throw "Gather Arguments Pointer Vector was NULL"; + } + + std::vector insert_instrs = {}; + // hardcode to gathers of length 4 only + for (int i = 0; i < VECTOR_WIDTH; i++) { + Value *pointer = insert_element_instr->getOperand(1); + insert_instrs.push_back(insert_element_instr); + Instruction *new_insert_element_instr = + dyn_cast(insert_element_instr->getOperand(0)); + insert_element_instr = new_insert_element_instr; + } + return insert_instrs; + } + + /** + * True iff the gather instr can be moved before prior_instr + */ + bool can_move_gather_instruction_before(Instruction *gather_instr, + Instruction *prior_instr, + AliasAnalysis *AA) { + // If the prior instruction is a phi node, you cannot move the + // instrution back + if (isa(prior_instr)) { + return false; + } + + // If the prior Instruction is a call inst [which is not to a gather + // intrinsic], do not push the current instruction back A call + // instruciton could have side effects to memory In addition, a call + // could be to @llvm.memset.p0i8.i64(i8* nonnull align 16 + // dereferenceable(40) %2, i8 0, i64 40, i1 false) or + // @memset_pattern16(i8* nonnull %2, i8* bitcast + // ([4 x float]* @.memset_pattern to i8*), i64 40) #6 which + // require alias analysis as well + + if (isa(prior_instr) && + !isa_gather_instruction(prior_instr)) { + return false; + } + + // If the prior instruction is a gather instruction, do comparisons of + // the addresses + + std::vector current_instr_addrs = + get_gather_addresses(gather_instr); + if (isa(prior_instr) && isa_gather_instruction(prior_instr)) { + std::vector prior_instr_addrs = + get_gather_addresses(prior_instr); + for (auto curr_addr : current_instr_addrs) { + for (auto prior_addr : prior_instr_addrs) { + assert(curr_addr->getType()->isPointerTy()); + assert(prior_addr->getType()->isPointerTy()); + if (may_alias(curr_addr, prior_addr, AA)) { + return false; + } + } + } + } + + // If the prior instruction is used in the load's + // arguments, do not push it back + + // In this case, the prior instruction could only be the geps in + // the current instruction addreses accessed + std::vector current_uses = + get_gather_insert_instrs(gather_instr); + for (auto current_use : current_uses) { + if (current_use == prior_instr) { + return false; + } + } + + // If the prior instruction alias with the load + // instruction, do not push the store back + // We do not rehandle gather instructions, which were already handled. + if (prior_instr->mayReadOrWriteMemory() && + !isa_gather_instruction(prior_instr)) { + Value *prior_addr = NULL; + if (isa(prior_instr)) { + prior_addr = + dyn_cast(prior_instr)->getPointerOperand(); + } else if (isa(prior_instr)) { + prior_addr = + dyn_cast(prior_instr)->getPointerOperand(); + } else { + errs() << *prior_instr << "\n"; + throw "Unmatched Instruction Type"; + } + for (auto curr_addr : current_instr_addrs) { + assert(curr_addr->getType()->isPointerTy()); + assert(prior_addr->getType()->isPointerTy()); + if (may_alias(curr_addr, prior_addr, AA)) { + return false; + } + } + } + return true; + } + + void move_forward_gather_instrs(Function &F) { + AliasAnalysis *AA = &getAnalysis().getAAResults(); + + for (auto &B : F) { + // Grab all instructions + std::vector all_instrs = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + all_instrs.push_back(instr); + } + + // Perform Pushing Back of Gather Instructions + std::vector final_instrs_vec = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + + // Place any non-Gather Instructions at the end of the list of + // instructions + if (!(isa_gather_instruction(instr))) { + final_instrs_vec.push_back(instr); + continue; + } + + // Handle Load Instructions + int insertion_offset = final_instrs_vec.size(); + while (true) { + Instruction *gather_instr = instr; + // If there is no prior instruction, push back at current + // offset, and stop. + if (insertion_offset - 1 < 0) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + gather_instr); + break; + } + + Instruction *prior_instr = + final_instrs_vec[insertion_offset - 1]; + + if (!can_move_gather_instruction_before(gather_instr, + prior_instr, AA)) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + gather_instr); + break; + } + + // Otherwise, keep pushing back the load instruction + --insertion_offset; + assert(insertion_offset >= 0); + } + } + + // First, insert clone all instructions, and insert them into the + // basic block at the very beginning + + // build ordered vector of cloned instructions + // build map from original vector to cloned vector + std::vector cloned_instrs = {}; + std::map original_to_clone_map = {}; + std::map clone_to_original_map = {}; + for (Instruction *instr : final_instrs_vec) { + Instruction *cloned_instr = instr->clone(); + cloned_instrs.push_back(cloned_instr); + original_to_clone_map[instr] = cloned_instr; + clone_to_original_map[cloned_instr] = instr; + } + + // Grab first instruction to build before at. + Instruction *first_instr = NULL; + for (auto &I : B) { + first_instr = dyn_cast(&I); + assert(first_instr != NULL); + break; + } + IRBuilder<> builder(first_instr); + builder.SetInsertPoint(&B); + + for (Instruction *cloned_instr : cloned_instrs) { + // set insert point to be before beginning if inserting phi + // instruction + if (isa(cloned_instr)) { + builder.SetInsertPoint(first_instr); + } + builder.Insert(cloned_instr); + if (isa(cloned_instr)) { + builder.SetInsertPoint(&B); + } + // The cloned instruction has arguments pointing backwards to + // prior original instructions. Some of these prior instructions + // themselves will themselves be cloned. We need to replace the + // prior original instructions with clones instructions + int num_operands = cloned_instr->getNumOperands(); + for (int i = 0; i < num_operands; i++) { + Value *clone_operand = cloned_instr->getOperand(i); + Instruction *clone_operand_instr = + dyn_cast(clone_operand); + if (clone_operand_instr != NULL) { + if (original_to_clone_map.count(clone_operand_instr) > + 0) { + Instruction *replacement_operand = + original_to_clone_map[clone_operand_instr]; + Value *replacement_value = + dyn_cast(replacement_operand); + assert(replacement_value != NULL); + cloned_instr->setOperand(i, replacement_value); + } + } else { + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + cloned_instr->setOperand(i, + original_instr->getOperand(i)); + } + } + + // Furthermore, we need to change all uses of the original + // instruction to be the new cloned instruction + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + if (Value *original_val = dyn_cast(original_instr)) { + Value *cloned_val = dyn_cast(cloned_instr); + assert(cloned_val != NULL); + original_val->replaceAllUsesWith(cloned_val); + } + } + + // Finally, delete all the original instructions in the basic block + // Do this in reverse order. + std::reverse(all_instrs.begin(), all_instrs.end()); + for (Instruction *instr : all_instrs) { + instr->eraseFromParent(); + } + } + } + + void move_forward_gep_instrs(Function &F) { + AliasAnalysis *AA = &getAnalysis().getAAResults(); + + for (auto &B : F) { + // Grab all instructions + std::vector all_instrs = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + all_instrs.push_back(instr); + } + + // Perform Pushing Back of Gep Instructions + std::vector final_instrs_vec = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + + // Place any non-Gep Instructions at the end of the list of + // instructions + if (!(isa(instr))) { + final_instrs_vec.push_back(instr); + continue; + } + + // Handle Gep Instructions + int insertion_offset = final_instrs_vec.size(); + while (true) { + Instruction *load_instr = instr; + // If there is no prior instruction, push back at current + // offset, and stop. + if (insertion_offset - 1 < 0) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + + Instruction *prior_instr = + final_instrs_vec[insertion_offset - 1]; + + // If the prior instruction is a phi node, do not push the + // current instruction back + if (isa(prior_instr)) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + + // If the prior instruction is used in the load's + // arguments, do not push it back + int num_operands = load_instr->getNumOperands(); + bool break_while = false; + for (int i = 0; i < num_operands; i++) { + Value *load_operand = load_instr->getOperand(i); + Instruction *load_operand_instr = + dyn_cast(load_operand); + if (load_operand_instr != NULL) { + if (load_operand_instr == prior_instr) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break_while = true; + break; + } + } + } + if (break_while) { + break; + } + + // Otherwise, keep pushing back the load instruction + --insertion_offset; + assert(insertion_offset >= 0); + } + } + + // First, insert clone all instructions, and insert them into the + // basic block at the very beginning + + // build ordered vector of cloned instructions + // build map from original vector to cloned vector + std::vector cloned_instrs = {}; + std::map original_to_clone_map = {}; + std::map clone_to_original_map = {}; + for (Instruction *instr : final_instrs_vec) { + Instruction *cloned_instr = instr->clone(); + cloned_instrs.push_back(cloned_instr); + original_to_clone_map[instr] = cloned_instr; + clone_to_original_map[cloned_instr] = instr; + } + + // Grab first instruction to build before at. + Instruction *first_instr = NULL; + for (auto &I : B) { + first_instr = dyn_cast(&I); + assert(first_instr != NULL); + break; + } + IRBuilder<> builder(first_instr); + builder.SetInsertPoint(&B); + + for (Instruction *cloned_instr : cloned_instrs) { + // set insert point to be before beginning if inserting phi + // instruction + if (isa(cloned_instr)) { + builder.SetInsertPoint(first_instr); + } + builder.Insert(cloned_instr); + if (isa(cloned_instr)) { + builder.SetInsertPoint(&B); + } + // The cloned instruction has arguments pointing backwards to + // prior original instructions. Some of these prior instructions + // themselves will themselves be cloned. We need to replace the + // prior original instructions with clones instructions + int num_operands = cloned_instr->getNumOperands(); + for (int i = 0; i < num_operands; i++) { + Value *clone_operand = cloned_instr->getOperand(i); + Instruction *clone_operand_instr = + dyn_cast(clone_operand); + if (clone_operand_instr != NULL) { + if (original_to_clone_map.count(clone_operand_instr) > + 0) { + Instruction *replacement_operand = + original_to_clone_map[clone_operand_instr]; + Value *replacement_value = + dyn_cast(replacement_operand); + assert(replacement_value != NULL); + cloned_instr->setOperand(i, replacement_value); + } + } else { + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + cloned_instr->setOperand(i, + original_instr->getOperand(i)); + } + } + + // Furthermore, we need to change all uses of the original + // instruction to be the new cloned instruction + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + if (Value *original_val = dyn_cast(original_instr)) { + Value *cloned_val = dyn_cast(cloned_instr); + assert(cloned_val != NULL); + original_val->replaceAllUsesWith(cloned_val); + } + } + + // Finally, delete all the original instructions in the basic block + // Do this in reverse order. + std::reverse(all_instrs.begin(), all_instrs.end()); + for (Instruction *instr : all_instrs) { + instr->eraseFromParent(); + } + } + } + + void move_forward_bitcast_instrs(Function &F) { + AliasAnalysis *AA = &getAnalysis().getAAResults(); + + for (auto &B : F) { + // Grab all instructions + std::vector all_instrs = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + all_instrs.push_back(instr); + } + + // Perform Pushing Back of Bitcast Instructions + std::vector final_instrs_vec = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + + // Place any non-Bitcast Instructions at the end of the list of + // instructions + if (!(isa(instr))) { + final_instrs_vec.push_back(instr); + continue; + } + + // Handle Gep Instructions + int insertion_offset = final_instrs_vec.size(); + while (true) { + Instruction *load_instr = instr; + // If there is no prior instruction, push back at current + // offset, and stop. + if (insertion_offset - 1 < 0) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + + Instruction *prior_instr = + final_instrs_vec[insertion_offset - 1]; + + // If the prior instruction is a phi node, do not push the + // current instruction back + if (isa(prior_instr)) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + + // If the prior instruction is used in the load's + // arguments, do not push it back + int num_operands = load_instr->getNumOperands(); + bool break_while = false; + for (int i = 0; i < num_operands; i++) { + Value *load_operand = load_instr->getOperand(i); + Instruction *load_operand_instr = + dyn_cast(load_operand); + if (load_operand_instr != NULL) { + if (load_operand_instr == prior_instr) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break_while = true; + break; + } + } + } + if (break_while) { + break; + } + + // Otherwise, keep pushing back the load instruction + --insertion_offset; + assert(insertion_offset >= 0); + } + } + + // First, insert clone all instructions, and insert them into the + // basic block at the very beginning + + // build ordered vector of cloned instructions + // build map from original vector to cloned vector + std::vector cloned_instrs = {}; + std::map original_to_clone_map = {}; + std::map clone_to_original_map = {}; + for (Instruction *instr : final_instrs_vec) { + Instruction *cloned_instr = instr->clone(); + cloned_instrs.push_back(cloned_instr); + original_to_clone_map[instr] = cloned_instr; + clone_to_original_map[cloned_instr] = instr; + } + + // Grab first instruction to build before at. + Instruction *first_instr = NULL; + for (auto &I : B) { + first_instr = dyn_cast(&I); + assert(first_instr != NULL); + break; + } + IRBuilder<> builder(first_instr); + builder.SetInsertPoint(&B); + + for (Instruction *cloned_instr : cloned_instrs) { + // set insert point to be before beginning if inserting phi + // instruction + if (isa(cloned_instr)) { + builder.SetInsertPoint(first_instr); + } + builder.Insert(cloned_instr); + if (isa(cloned_instr)) { + builder.SetInsertPoint(&B); + } + // The cloned instruction has arguments pointing backwards to + // prior original instructions. Some of these prior instructions + // themselves will themselves be cloned. We need to replace the + // prior original instructions with clones instructions + int num_operands = cloned_instr->getNumOperands(); + for (int i = 0; i < num_operands; i++) { + Value *clone_operand = cloned_instr->getOperand(i); + Instruction *clone_operand_instr = + dyn_cast(clone_operand); + if (clone_operand_instr != NULL) { + if (original_to_clone_map.count(clone_operand_instr) > + 0) { + Instruction *replacement_operand = + original_to_clone_map[clone_operand_instr]; + Value *replacement_value = + dyn_cast(replacement_operand); + assert(replacement_value != NULL); + cloned_instr->setOperand(i, replacement_value); + } + } else { + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + cloned_instr->setOperand(i, + original_instr->getOperand(i)); + } + } + + // Furthermore, we need to change all uses of the original + // instruction to be the new cloned instruction + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + if (Value *original_val = dyn_cast(original_instr)) { + Value *cloned_val = dyn_cast(cloned_instr); + assert(cloned_val != NULL); + original_val->replaceAllUsesWith(cloned_val); + } + } + + // Finally, delete all the original instructions in the basic block + // Do this in reverse order. + std::reverse(all_instrs.begin(), all_instrs.end()); + for (Instruction *instr : all_instrs) { + instr->eraseFromParent(); + } + } + } + + void move_forward_insert_element_instrs(Function &F) { + AliasAnalysis *AA = &getAnalysis().getAAResults(); + + for (auto &B : F) { + // Grab all instructions + std::vector all_instrs = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + all_instrs.push_back(instr); + } + + // Perform Pushing Back of Gep Instructions + std::vector final_instrs_vec = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + + // Place any non-Gep Instructions at the end of the list of + // instructions + if (!(isa(instr))) { + final_instrs_vec.push_back(instr); + continue; + } + + // Handle Gep Instructions + int insertion_offset = final_instrs_vec.size(); + while (true) { + Instruction *load_instr = instr; + // If there is no prior instruction, push back at current + // offset, and stop. + if (insertion_offset - 1 < 0) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + + Instruction *prior_instr = + final_instrs_vec[insertion_offset - 1]; + + // If the prior instruction is a phi node, do not push the + // current instruction back + if (isa(prior_instr)) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + + // If the prior instruction is used in the load's + // arguments, do not push it back + int num_operands = load_instr->getNumOperands(); + bool break_while = false; + for (int i = 0; i < num_operands; i++) { + Value *load_operand = load_instr->getOperand(i); + Instruction *load_operand_instr = + dyn_cast(load_operand); + if (load_operand_instr != NULL) { + if (load_operand_instr == prior_instr) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break_while = true; + break; + } + } + } + if (break_while) { + break; + } + + // Otherwise, keep pushing back the load instruction + --insertion_offset; + assert(insertion_offset >= 0); + } + } + + // First, insert clone all instructions, and insert them into the + // basic block at the very beginning + + // build ordered vector of cloned instructions + // build map from original vector to cloned vector + std::vector cloned_instrs = {}; + std::map original_to_clone_map = {}; + std::map clone_to_original_map = {}; + for (Instruction *instr : final_instrs_vec) { + Instruction *cloned_instr = instr->clone(); + cloned_instrs.push_back(cloned_instr); + original_to_clone_map[instr] = cloned_instr; + clone_to_original_map[cloned_instr] = instr; + } + + // Grab first instruction to build before at. + Instruction *first_instr = NULL; + for (auto &I : B) { + first_instr = dyn_cast(&I); + assert(first_instr != NULL); + break; + } + IRBuilder<> builder(first_instr); + builder.SetInsertPoint(&B); + + for (Instruction *cloned_instr : cloned_instrs) { + // set insert point to be before beginning if inserting phi + // instruction + if (isa(cloned_instr)) { + builder.SetInsertPoint(first_instr); + } + builder.Insert(cloned_instr); + if (isa(cloned_instr)) { + builder.SetInsertPoint(&B); + } + // The cloned instruction has arguments pointing backwards to + // prior original instructions. Some of these prior instructions + // themselves will themselves be cloned. We need to replace the + // prior original instructions with clones instructions + int num_operands = cloned_instr->getNumOperands(); + for (int i = 0; i < num_operands; i++) { + Value *clone_operand = cloned_instr->getOperand(i); + Instruction *clone_operand_instr = + dyn_cast(clone_operand); + if (clone_operand_instr != NULL) { + if (original_to_clone_map.count(clone_operand_instr) > + 0) { + Instruction *replacement_operand = + original_to_clone_map[clone_operand_instr]; + Value *replacement_value = + dyn_cast(replacement_operand); + assert(replacement_value != NULL); + cloned_instr->setOperand(i, replacement_value); + } + } else { + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + cloned_instr->setOperand(i, + original_instr->getOperand(i)); + } + } + + // Furthermore, we need to change all uses of the original + // instruction to be the new cloned instruction + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + if (Value *original_val = dyn_cast(original_instr)) { + Value *cloned_val = dyn_cast(cloned_instr); + assert(cloned_val != NULL); + original_val->replaceAllUsesWith(cloned_val); + } + } + + // Finally, delete all the original instructions in the basic block + // Do this in reverse order. + std::reverse(all_instrs.begin(), all_instrs.end()); + for (Instruction *instr : all_instrs) { + instr->eraseFromParent(); + } + } + } + /** * Move Loads as far forward as possible in LLVM IR */ @@ -168,6 +942,31 @@ struct LoadStoreMovementPass : public FunctionPass { break; } } + + // if (isa_gather_instruction(prior_instr)) { + // std::vector gather_addresses = + // get_gather_addresses(prior_instr); + // Value *load_addr = dyn_cast(load_instr); + // if (isa(load_instr)) { + // load_addr = dyn_cast(load_instr) + // ->getPointerOperand(); + // } + // assert(load_addr != NULL); + // bool gather_break = false; + // for (auto gather_address : gather_addresses) { + // if (may_alias(gather_address, load_addr, AA)) { + // gather_break = true; + // break; + // } + // } + // if (gather_break) { + // final_instrs_vec.insert( + // final_instrs_vec.begin() + insertion_offset, + // load_instr); + // break; + // } + // } + // Otherwise, keep pushing back the load instruction --insertion_offset; assert(insertion_offset >= 0); @@ -378,6 +1177,31 @@ struct LoadStoreMovementPass : public FunctionPass { break; } } + + // if (isa_gather_instruction(prior_instr)) { + // std::vector gather_addresses = + // get_gather_addresses(prior_instr); + // Value *store_addr = dyn_cast(store_instr); + // if (isa(store_instr)) { + // store_addr = dyn_cast(store_instr) + // ->getPointerOperand(); + // } + // assert(store_addr != NULL); + // bool gather_break = false; + // for (auto gather_address : gather_addresses) { + // if (may_alias(gather_address, store_addr, AA)) { + // gather_break = true; + // break; + // } + // } + // if (gather_break) { + // final_instrs_vec.insert( + // final_instrs_vec.begin() + insertion_offset, + // store_instr); + // break; + // } + // } + // Otherwise, keep pushing back the str instruction --insertion_offset; assert(insertion_offset >= 0); @@ -465,14 +1289,14 @@ struct LoadStoreMovementPass : public FunctionPass { } } - // Move All Bitcasts as early as possible, avoiding moving instructions - // by removing dependencies. The idea behind this is to move bitcasts - // out of the way so that vectorization can occur properlu. - void rewrite_bitcasts(Function &F) { + std::vector func_to_vec(Function &F) { + std::vector result = {}; for (auto &B : F) { for (auto &I : B) { + result.push_back(&I); } } + return result; } virtual bool runOnFunction(Function &F) override { @@ -487,9 +1311,20 @@ struct LoadStoreMovementPass : public FunctionPass { return false; } // Might want to iterate to convergence - // first move bitcasts - rewrite_loads(F); - rewrite_stores(F); + const int N_ITER = 1; + for (int i = 0; i < N_ITER; i++) { + std::vector original_func = func_to_vec(F); + // move_forward_gep_instrs(F); + // move_forward_insert_element_instrs(F); + // move_forward_bitcast_instrs(F); + // move_forward_gather_instrs(F); + rewrite_loads(F); + rewrite_stores(F); + std::vector rewritten_func = func_to_vec(F); + if (original_func == rewritten_func) { + break; + } + } return true; } diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index 6eb6d0ec..1a6a0a94 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -2,12 +2,12 @@ ifeq ($(shell uname),Darwin) EXT := dylib CLANG := /usr/local/opt/llvm/bin/clang SETUP := bash set_up_mac.sh - LIB := src/lib.rs Cargo.toml .cargo/config + LIB := src/lib.rs Cargo.toml .cargo/config else EXT := so CLANG = clang SETUP := : - LIB := src/lib.rs Cargo.toml + LIB := src/lib.rs Cargo.toml endif .PHONY: target/debug/libllvmlib.$(EXT) @@ -21,7 +21,7 @@ run-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp @$(CLANG) -target arm64-apple-macos11 -mmacosx-version-min=11.0 build/dce.ll -o build/final @build/final -run-baseline: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp +run-slp: @$(CLANG) -target arm64-apple-macos11 -mmacosx-version-min=11.0 -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) @opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll @opt -S --slp-vectorizer build/opt.ll -o build/slp.ll @@ -29,49 +29,10 @@ run-baseline: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp @$(CLANG) -target arm64-apple-macos11 -mmacosx-version-min=11.0 -o0 build/dce.ll -o build/final @build/final -print-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) - opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll - opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll - $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=true build/aa.ll -o build/diospyros.ll - opt -S --adce --dse build/diospyros.ll -o build/dce.ll - $(CLANG) build/dce.ll -o build/final - build/final - -no-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) - opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll - opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll - $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt=false -mllvm -print=false build/aa.ll -o build/diospyros.ll - opt -S --adce --dse build/diospyros.ll -o build/dce.ll - $(CLANG) build/dce.ll -o build/final - build/final - -run-opt-stdout: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - @$(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) - @opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll - @opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll - @$(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=false build/aa.ll -o build/diospyros.ll - @opt -S --adce --dse build/diospyros.ll -o build/dce.ll - @cat build/dce.ll - -no-opt-stdout: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - @$(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) - @opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll - @opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll - @$(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt=false -mllvm -print=false build/aa.ll -o build/diospyros.ll - @rm build/dce.ll - @opt -S --adce --dse build/diospyros.ll -o build/dce.ll - @cat build/dce.ll - -run-polybench: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -I polybench-tests/utilities -I polybench-tests/linear-algebra/kernels/atax $(test) -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll - opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll - opt -S --cfl-steens-aa build/opt.ll -o build/aa.ll - $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -mllvm -opt -mllvm -print=true build/aa.ll -o build/diospyros.ll - opt -S --adce --dse build/diospyros.ll -o build/dce.ll - $(CLANG) -I utilities polybench-tests/utilities/polybench.c build/dce.ll -o build/final - build/final +run-baseline: + @$(CLANG) -target arm64-apple-macos11 -mmacosx-version-min=11.0 -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) + @$(CLANG) -target arm64-apple-macos11 -mmacosx-version-min=11.0 -O3 -fno-vectorize -fno-tree-vectorize -fno-slp-vectorize -o build/final build/clang.ll + @build/final test: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp turnt c-tests/*.c diff --git a/src/dios-egraphs/Diospyros/VectorizationUtilities.cpp b/src/dios-egraphs/Diospyros/VectorizationUtilities.cpp new file mode 100644 index 00000000..7f8a770b --- /dev/null +++ b/src/dios-egraphs/Diospyros/VectorizationUtilities.cpp @@ -0,0 +1,252 @@ +#include +#include + +#include +#include +#include + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" + +using namespace llvm; + +namespace VectorConstants { +const std::string GATHER_NAME = "llvm.masked.gather.v4f32.v4p0f32"; +const uint32_t VECTOR_WIDTH = 4; +} // namespace VectorConstants + +namespace Array { +// get all "Base" Arrays on which vectorization can occur. These are +// defined as argument inputs with a pointer type +std::vector get_array_bases(Function &F, TargetLibraryInfo *TLI) { + std::vector base_of_array_vec = {}; + for (auto &a : F.args()) { + if (a.getType()->isPointerTy()) { + if (Value *arg_val = dyn_cast(&a)) { + base_of_array_vec.push_back(arg_val); + } + } + } + for (auto &B : F) { + for (auto &I : B) { + if (Value *V = dyn_cast(&I)) { + if (isMallocOrCallocLikeFn(V, TLI)) { + base_of_array_vec.push_back(V); + } + } + } + } + return base_of_array_vec; +} + +/** + * return the index to in baseOfArrayVec that store is an offset from, or + * NULLOPT if not matching + */ +std::pair get_base_reference(Value *mem_instr_ptr, + std::vector base_of_array_vec, + ScalarEvolution *SE) { + for (int i = 0; i < base_of_array_vec.size(); i++) { + Value *base_array_ptr = base_of_array_vec[i]; + assert(base_array_ptr->getType()->isPointerTy()); + const SCEV *mem_instr_ptr_se = SE->getSCEV(mem_instr_ptr); + const SCEV *base_ptr_se = SE->getSCEV(base_array_ptr); + const SCEV *diff = SE->getMinusSCEV(mem_instr_ptr_se, base_ptr_se); + APInt min_val = SE->getSignedRangeMin(diff); + APInt max_val = SE->getSignedRangeMax(diff); + if (min_val == max_val) { + int val = (int)max_val.roundToDouble(); + return {i, val}; + } + } + return {-1, -1}; +} +} // namespace Array + +namespace Alias { +bool may_alias(Value *addr1, Value *addr2, AliasAnalysis *AA) { + // Both isNoALias and isMustAlias have to be checked for unknown reasons + return (!AA->isNoAlias(addr1, + LocationSize::precise( + addr1->getType()->getPrimitiveSizeInBits()), + addr2, + LocationSize::precise( + addr2->getType()->getPrimitiveSizeInBits())) || + AA->isMustAlias(addr1, addr2)); +} +} // namespace Alias + +namespace Gather { +bool isa_gather_instruction(Instruction *instr) { + if (CallInst *call_instr = dyn_cast(instr)) { + Function *fun = call_instr->getCalledFunction(); + // Source: + // https://stackoverflow.com/questions/11686951/how-can-i-get-function-name-from-callinst-in-llvm + // Fun Could be NULL, in which case indirect call occurs, i cannot + // get name. + if (fun) { + if (fun->getName() == VectorConstants::GATHER_NAME) { + return true; + } + } + } + return false; +} + +std::vector get_gather_addresses(Instruction *call_to_gather) { + assert(isa_gather_instruction(call_to_gather)); + + Instruction *insert_element_instr = + dyn_cast(call_to_gather->getOperand(0)); + if (insert_element_instr == NULL) { + throw "Gather Arguments Pointer Vector was NULL"; + } + + std::vector gather_addresses = {}; + // hardcode to gathers of length 4 only + for (int i = 0; i < VectorConstants::VECTOR_WIDTH; i++) { + Value *pointer = insert_element_instr->getOperand(1); + gather_addresses.push_back(pointer); + Instruction *new_insert_element_instr = + dyn_cast(insert_element_instr->getOperand(0)); + insert_element_instr = new_insert_element_instr; + } + return gather_addresses; +} + +} // namespace Gather + +namespace Chunking { +using chunk_t = std::vector; +using chunks_t = std::vector>; + +/** + * True iff an instruction is a mem intrinsic. + */ +bool isa_mem_intrinsic(Instruction *instr) { + if (isa(instr)) { + return true; + } else if (isa(instr)) { + return true; + } else if (isa(instr)) { + return true; + } else if (isa(instr)) { + return true; + } + return false; +} + +/** + * True iff is a special type of instruction for chunking + * + */ +bool isa_special_chunk_instr(Instruction *instr) { + return isa_mem_intrinsic(instr) || isa(instr) || + isa(instr) || + (isa(instr) && !Gather::isa_gather_instruction(instr)); +} + +/* +Build chunks of instructions + +A chunk is the longest contiguous section of instructions that ends in a +sequence of stores. + +A chunk does not need to contain a store instruction. + +Assumes: LoadStoreMovement pass is run before the Diospyros pass +**/ +std::vector> build_chunks(BasicBlock *B, + AliasAnalysis *AA) { + std::vector> chunks = {}; + + bool has_seen_store = false; + bool stores_alias_in_chunk = false; + std::vector curr_chunk = {}; + + // Track Last Stores seen + std::vector last_stores = {}; + for (auto &I : *B) { + // the first two cases are meant to create chunks with non-handled + // instructions + if (has_seen_store && isa_special_chunk_instr(&I)) { + if (curr_chunk.size() > 0 && !stores_alias_in_chunk) { + chunks.push_back(curr_chunk); + } + has_seen_store = false; + stores_alias_in_chunk = false; + curr_chunk = {}; + last_stores = {}; + curr_chunk.push_back(&I); + chunks.push_back(curr_chunk); + curr_chunk = {}; + } else if (!has_seen_store && isa_special_chunk_instr(&I)) { + if (curr_chunk.size() > 0 && !stores_alias_in_chunk) { + chunks.push_back(curr_chunk); + } + has_seen_store = false; + stores_alias_in_chunk = false; + curr_chunk = {}; + last_stores = {}; + curr_chunk.push_back(&I); + chunks.push_back(curr_chunk); + curr_chunk = {}; + } else if (!has_seen_store && isa(I) && + !isa_special_chunk_instr(&I)) { + has_seen_store = true; + curr_chunk.push_back(&I); + last_stores.push_back(&I); + } else if (!has_seen_store && !isa(I) && + !isa_special_chunk_instr(&I)) { + curr_chunk.push_back(&I); + } else if (has_seen_store && !isa(I) && + !isa_special_chunk_instr(&I)) { + if (curr_chunk.size() > 0 && !stores_alias_in_chunk) { + chunks.push_back(curr_chunk); + } + has_seen_store = false; + stores_alias_in_chunk = false; + curr_chunk = {}; + last_stores = {}; + curr_chunk.push_back(&I); + } else { // has seen store and is a store instruction + Value *curr_store_addr = I.getOperand(1); + for (auto other_store : last_stores) { + if (other_store != &I) { + Value *other_store_addr = other_store->getOperand(1); + if (Alias::may_alias(curr_store_addr, other_store_addr, + AA)) { + stores_alias_in_chunk = true; + } + } + } + curr_chunk.push_back(&I); + last_stores.push_back(&I); + } + } + if (curr_chunk.size() > 0 && !stores_alias_in_chunk) { + chunks.push_back(curr_chunk); + } + + // Filter to make sure no chunks are empty + chunks_t final_chunks = {}; + for (auto chunk : chunks) { + if (!chunk.empty()) { + final_chunks.push_back(chunk); + } + } + + return final_chunks; +} +} // namespace Chunking \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-2-by-2-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-2-by-2-conv.c new file mode 100644 index 00000000..96a530ec --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-2-by-2-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 3 +#define I_COLS 3 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-3-by-3-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-3-by-3-conv.c new file mode 100644 index 00000000..693ac896 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-3-by-3-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 3 +#define I_COLS 3 +#define F_ROWS 3 +#define F_COLS 3 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-4-by-4-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-4-by-4-conv.c new file mode 100644 index 00000000..f29588ba --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-4-by-4-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 3 +#define I_COLS 3 +#define F_ROWS 4 +#define F_COLS 4 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/4-by-4-and-2-by-2-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/4-by-4-and-2-by-2-conv.c new file mode 100644 index 00000000..a7ec8612 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/4-by-4-and-2-by-2-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 4 +#define I_COLS 4 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/4-by-4-and-4-by-4-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/4-by-4-and-4-by-4-conv.c new file mode 100644 index 00000000..af54c8eb --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/4-by-4-and-4-by-4-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 4 +#define I_COLS 4 +#define F_ROWS 4 +#define F_COLS 4 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-2-by-2-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-2-by-2-conv.c new file mode 100644 index 00000000..8c1089b5 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-2-by-2-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 5 +#define I_COLS 5 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/optimized/conv-2d.c b/src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-3-by-3-conv.c similarity index 65% rename from src/dios-egraphs/Diospyros/benchmarks/optimized/conv-2d.c rename to src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-3-by-3-conv.c index 08e9d403..907cff88 100644 --- a/src/dios-egraphs/Diospyros/benchmarks/optimized/conv-2d.c +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-3-by-3-conv.c @@ -5,15 +5,14 @@ #include #include +#include "test-utils.h" + #define I_ROWS 5 #define I_COLS 5 #define F_ROWS 3 #define F_COLS 3 #define O_ROWS ((I_ROWS + F_ROWS) - 1) #define O_COLS ((I_COLS + F_COLS) - 1) -#define MAX_FLOAT 100.00f -#define DELTA 0.1f -#define NITER 1000000000 void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], float mat_out[O_ROWS * O_COLS]) { @@ -51,11 +50,11 @@ int main(void) { } float mat_out[O_ROWS * O_COLS]; for (int i = 0; i < O_ROWS * O_COLS; i++) { - mat_out[i] = 0; + mat_out[i] = 0.0f; } float expected[O_ROWS * O_COLS]; for (int i = 0; i < O_ROWS * O_COLS; i++) { - expected[i] = 0; + expected[i] = mat_out[i]; } // This stackoverflow post explains how to calculate walk clock time. @@ -80,39 +79,16 @@ int main(void) { // report difference in runtime double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), NITER); - return 0; - - // // calculate expected - // for (int outRow = 0; outRow < O_ROWS; outRow++) { - // for (int outCol = 0; outCol < O_COLS; outCol++) { - // for (int fRow = 0; fRow < F_ROWS; fRow++) { - // for (int fCol = 0; fCol < F_COLS; fCol++) { - // int fRowTrans = F_ROWS - 1 - fRow; - // int fColTrans = F_COLS - 1 - fCol; - // int iRow = outRow - fRowTrans; - // int iCol = outCol - fColTrans; - - // if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && - // iCol < I_COLS) { - // float v = mat_in[iRow * I_COLS + iCol] * - // f_in[fRowTrans * F_COLS + fColTrans]; - // expected[outRow * O_COLS + outCol] += v; - // } - // } - // } - // } - // } - // for (int i = 0; i < O_ROWS * O_COLS; i++) { - // printf("--------------------------\n"); - // printf("calculated: %f\n", mat_out[i]); - // printf("expected: %f\n", expected[i]); - // printf("difference: %f\n", expected[i] - mat_out[i]); - // } - // for (int i = 0; i < O_ROWS * O_COLS; i++) { - // assert(fabs(expected[i] - mat_out[i]) < DELTA); - // } - // return 0; } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-4-by-4-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-4-by-4-conv.c new file mode 100644 index 00000000..47d28610 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-4-by-4-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 5 +#define I_COLS 5 +#define F_ROWS 4 +#define F_COLS 4 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-2-by-2-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-2-by-2-conv.c new file mode 100644 index 00000000..45029b62 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-2-by-2-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 6 +#define I_COLS 6 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-3-by-3-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-3-by-3-conv.c new file mode 100644 index 00000000..42e82adf --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-3-by-3-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 6 +#define I_COLS 6 +#define F_ROWS 3 +#define F_COLS 3 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-4-by-4-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-4-by-4-conv.c new file mode 100644 index 00000000..521f343d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-4-by-4-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 6 +#define I_COLS 6 +#define F_ROWS 4 +#define F_COLS 4 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-2-by-2-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-2-by-2-conv.c new file mode 100644 index 00000000..29a4d1fc --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-2-by-2-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 8 +#define I_COLS 8 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-3-by-3-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-3-by-3-conv.c new file mode 100644 index 00000000..ec9e557d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-3-by-3-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 8 +#define I_COLS 8 +#define F_ROWS 3 +#define F_COLS 3 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-4-by-4-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-4-by-4-conv.c new file mode 100644 index 00000000..465f73b6 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-4-by-4-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 8 +#define I_COLS 8 +#define F_ROWS 4 +#define F_COLS 4 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/test-utils.h b/src/dios-egraphs/Diospyros/benchmarks/conv/test-utils.h new file mode 100644 index 00000000..8b4a8d5d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/test-utils.h @@ -0,0 +1,6 @@ +#define NITER 10000000 + +#define MAX_FLOAT 100.00f +#define DELTA 0.01f + +#define FILE_PATH "data.txt" diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/10-by-10-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/10-by-10-mat-mul.c new file mode 100644 index 00000000..7cc2ca55 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/10-by-10-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 10 +#define A_COLS 10 +#define B_COLS 10 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/11-by-11-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/11-by-11-mat-mul.c new file mode 100644 index 00000000..77b9ee13 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/11-by-11-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 11 +#define A_COLS 11 +#define B_COLS 11 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/baseline/mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/12-by-12-mat-mul.c similarity index 79% rename from src/dios-egraphs/Diospyros/benchmarks/baseline/mat-mul.c rename to src/dios-egraphs/Diospyros/benchmarks/mat-mul/12-by-12-mat-mul.c index c468aa9a..886ce9b3 100644 --- a/src/dios-egraphs/Diospyros/benchmarks/baseline/mat-mul.c +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/12-by-12-mat-mul.c @@ -5,14 +5,15 @@ #include #include +#include "test-utils.h" + #define A_ROWS 12 #define A_COLS 12 #define B_COLS 12 -#define MAX_FLOAT 100.00f -#define DELTA 0.1f -void matrix_multiply(float a_in[A_ROWS * A_COLS], float b_in[A_COLS * B_COLS], - float c_out[A_ROWS * B_COLS]) { +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { for (int y = 0; y < A_ROWS; y++) { for (int x = 0; x < B_COLS; x++) { c_out[B_COLS * y + x] = 0; @@ -60,7 +61,7 @@ int main(void) { start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; // calculate up c_out - for (int i = 0; i < 1000; i++) { + for (int i = 0; i < NITER; i++) { matrix_multiply(a_in, b_in, c_out); } @@ -70,8 +71,15 @@ int main(void) { // report difference in runtime double diff = difftime(end, start); - printf("%ld milliseconds elapsed over 1000 iterations total\n", - (end - start)); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); return 0; } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/15-by-15-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/15-by-15-mat-mul.c new file mode 100644 index 00000000..35e5c95e --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/15-by-15-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 15 +#define A_COLS 15 +#define B_COLS 15 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/16-by-16-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/16-by-16-mat-mul.c new file mode 100644 index 00000000..1323e38d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/16-by-16-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 16 +#define A_COLS 16 +#define B_COLS 16 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/baseline/vvadd.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/2-by-2-mat-mul.c similarity index 53% rename from src/dios-egraphs/Diospyros/benchmarks/baseline/vvadd.c rename to src/dios-egraphs/Diospyros/benchmarks/mat-mul/2-by-2-mat-mul.c index 06490264..517e4ae6 100644 --- a/src/dios-egraphs/Diospyros/benchmarks/baseline/vvadd.c +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/2-by-2-mat-mul.c @@ -5,15 +5,23 @@ #include #include -#define A_ROWS 12 -#define MAX_FLOAT 100.00f -#define DELTA 0.1f -#define NITER 1000000000 +#include "test-utils.h" -void vvadd(float a_in[restrict A_ROWS], float b_in[restrict A_ROWS], - float c_out[restrict A_ROWS]) { - for (int i = 0; i < A_ROWS; i++) { - c_out[i] = a_in[i] + b_in[i]; +#define A_ROWS 2 +#define A_COLS 2 +#define B_COLS 2 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } } } @@ -21,20 +29,25 @@ int main(void) { time_t t = time(NULL); srand((unsigned)time(&t)); // load in a_in - float a_in[A_ROWS]; - for (int i = 0; i < A_ROWS; i++) { + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); } // load in b_in - float b_in[A_ROWS]; - for (int i = 0; i < A_ROWS; i++) { + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); } // set up c_out - float c_out[A_ROWS]; - for (int i = 0; i < A_ROWS; i++) { + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } // This stackoverflow post explains how to calculate walk clock time. // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c @@ -49,7 +62,7 @@ int main(void) { // calculate up c_out for (int i = 0; i < NITER; i++) { - vvadd(a_in, b_in, c_out); + matrix_multiply(a_in, b_in, c_out); } // end timer @@ -58,6 +71,13 @@ int main(void) { // report difference in runtime double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), NITER); diff --git a/src/dios-egraphs/Diospyros/benchmarks/optimized/vvadd.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/3-by-3-mat-mul.c similarity index 53% rename from src/dios-egraphs/Diospyros/benchmarks/optimized/vvadd.c rename to src/dios-egraphs/Diospyros/benchmarks/mat-mul/3-by-3-mat-mul.c index 06490264..fd58b5ed 100644 --- a/src/dios-egraphs/Diospyros/benchmarks/optimized/vvadd.c +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/3-by-3-mat-mul.c @@ -5,15 +5,23 @@ #include #include -#define A_ROWS 12 -#define MAX_FLOAT 100.00f -#define DELTA 0.1f -#define NITER 1000000000 +#include "test-utils.h" -void vvadd(float a_in[restrict A_ROWS], float b_in[restrict A_ROWS], - float c_out[restrict A_ROWS]) { - for (int i = 0; i < A_ROWS; i++) { - c_out[i] = a_in[i] + b_in[i]; +#define A_ROWS 3 +#define A_COLS 3 +#define B_COLS 3 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } } } @@ -21,20 +29,25 @@ int main(void) { time_t t = time(NULL); srand((unsigned)time(&t)); // load in a_in - float a_in[A_ROWS]; - for (int i = 0; i < A_ROWS; i++) { + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); } // load in b_in - float b_in[A_ROWS]; - for (int i = 0; i < A_ROWS; i++) { + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); } // set up c_out - float c_out[A_ROWS]; - for (int i = 0; i < A_ROWS; i++) { + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } // This stackoverflow post explains how to calculate walk clock time. // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c @@ -49,7 +62,7 @@ int main(void) { // calculate up c_out for (int i = 0; i < NITER; i++) { - vvadd(a_in, b_in, c_out); + matrix_multiply(a_in, b_in, c_out); } // end timer @@ -58,6 +71,13 @@ int main(void) { // report difference in runtime double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), NITER); diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/4-by-4-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/4-by-4-mat-mul.c new file mode 100644 index 00000000..67c745b0 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/4-by-4-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 4 +#define A_COLS 4 +#define B_COLS 4 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/optimized/mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/5-by-5-mat-mul.c similarity index 91% rename from src/dios-egraphs/Diospyros/benchmarks/optimized/mat-mul.c rename to src/dios-egraphs/Diospyros/benchmarks/mat-mul/5-by-5-mat-mul.c index e5437869..48d0b2ce 100644 --- a/src/dios-egraphs/Diospyros/benchmarks/optimized/mat-mul.c +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/5-by-5-mat-mul.c @@ -5,12 +5,11 @@ #include #include +#include "test-utils.h" + #define A_ROWS 5 #define A_COLS 5 #define B_COLS 5 -#define MAX_FLOAT 100.00f -#define DELTA 0.1f -#define NITER 1000000000 void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], float b_in[restrict A_COLS * B_COLS], @@ -72,6 +71,13 @@ int main(void) { // report difference in runtime double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), NITER); diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/6-by-6-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/6-by-6-mat-mul.c new file mode 100644 index 00000000..824b8c5a --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/6-by-6-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 6 +#define A_COLS 6 +#define B_COLS 6 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/7-by-7-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/7-by-7-mat-mul.c new file mode 100644 index 00000000..637e6d8c --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/7-by-7-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 7 +#define A_COLS 7 +#define B_COLS 7 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/8-by-8-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/8-by-8-mat-mul.c new file mode 100644 index 00000000..7eb44ae2 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/8-by-8-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 8 +#define A_COLS 8 +#define B_COLS 8 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/9-by-9-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/9-by-9-mat-mul.c new file mode 100644 index 00000000..1d0538f6 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/9-by-9-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 9 +#define A_COLS 9 +#define B_COLS 9 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/test-utils.h b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/test-utils.h new file mode 100644 index 00000000..8b4a8d5d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/test-utils.h @@ -0,0 +1,6 @@ +#define NITER 10000000 + +#define MAX_FLOAT 100.00f +#define DELTA 0.01f + +#define FILE_PATH "data.txt" diff --git a/src/dios-egraphs/Diospyros/benchmarks/optimized/qprod.c b/src/dios-egraphs/Diospyros/benchmarks/q-prod/qprod.c similarity index 77% rename from src/dios-egraphs/Diospyros/benchmarks/optimized/qprod.c rename to src/dios-egraphs/Diospyros/benchmarks/q-prod/qprod.c index 00675f1b..7185a9ad 100644 --- a/src/dios-egraphs/Diospyros/benchmarks/optimized/qprod.c +++ b/src/dios-egraphs/Diospyros/benchmarks/q-prod/qprod.c @@ -7,11 +7,9 @@ #include #include -#define SIZE 4 +#include "test-utils.h" -#define MAX_FLOAT 100.00f -#define DELTA 0.1f -#define NITER 1000000000 +#define SIZE 4 __attribute__((always_inline)) void naive_cross_product(float *lhs, float *rhs, float *result) { @@ -115,36 +113,16 @@ int main(void) { // report difference in runtime double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), NITER); return 0; - - // expectedq[3] = - // a_q[3] * b_q[3] - a_q[0] * b_q[0] - a_q[1] * b_q[1] - a_q[2] * - // b_q[2]; - // expectedq[0] = - // a_q[3] * b_q[0] + a_q[0] * b_q[3] + a_q[1] * b_q[2] - a_q[2] * - // b_q[1]; - // expectedq[1] = - // a_q[3] * b_q[1] + a_q[1] * b_q[3] + a_q[2] * b_q[0] - a_q[0] * - // b_q[2]; - // expectedq[2] = - // a_q[3] * b_q[2] + a_q[2] * b_q[3] + a_q[0] * b_q[1] - a_q[1] * - // b_q[0]; - - // naive_point_product(a_q, b_t, expectedt); - // for (int i = 0; i < 3; i++) { - // expectedt[i] += a_t[i]; - // } - // for (int i = 0; i < SIZE; i++) { - // printf("Calculated q: %f\n", r_q[i]); - // printf("Expected q: %f\n", expectedq[i]); - // assert(fabs(expectedq[i] - r_q[i]) < DELTA); - // } - // for (int i = 0; i < 3; i++) { - // printf("Calculated t: %f\n", r_t[i]); - // printf("Expected t: %f\n", expectedt[i]); - // assert(fabs(expectedt[i] - r_t[i]) < DELTA); - // } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/q-prod/test-utils.h b/src/dios-egraphs/Diospyros/benchmarks/q-prod/test-utils.h new file mode 100644 index 00000000..8b4a8d5d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/q-prod/test-utils.h @@ -0,0 +1,6 @@ +#define NITER 10000000 + +#define MAX_FLOAT 100.00f +#define DELTA 0.01f + +#define FILE_PATH "data.txt" diff --git a/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/3-qr-decomp.c b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/3-qr-decomp.c new file mode 100644 index 00000000..66aabfb1 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/3-qr-decomp.c @@ -0,0 +1,176 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define SIZE 3 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float *a) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float *a, float *b, float *c) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0.0f; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float *A, float *Q, float *R) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(m, sizeof(float)); + float *e = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(m, sizeof(float)); + float *v = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(m * m, sizeof(float)); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(SIZE * SIZE, sizeof(float)); + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_fixed_transpose(Q); +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float expectedQ[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + naive_fixed_qr_decomp(A, Q, R); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/optimized/qr-decomp.c b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/4-qr-decomp.c similarity index 55% rename from src/dios-egraphs/Diospyros/benchmarks/optimized/qr-decomp.c rename to src/dios-egraphs/Diospyros/benchmarks/qr-decomp/4-qr-decomp.c index f454cdb0..bf1e8ef1 100644 --- a/src/dios-egraphs/Diospyros/benchmarks/optimized/qr-decomp.c +++ b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/4-qr-decomp.c @@ -7,10 +7,9 @@ #include #include +#include "test-utils.h" + #define SIZE 4 -#define MAX_FLOAT 100.00f -#define DELTA 0.1f -#define NITER 100000 float sgn(float v) __attribute__((always_inline)); float naive_norm(float *x, int m) __attribute__((always_inline)); @@ -20,8 +19,6 @@ void naive_fixed_matrix_multiply(float *a, float *b, float *c) float sgn(float v) { return (v > 0) - (v < 0); } -float no_opt_sgn(float v) { return (v > 0) - (v < 0); } - float naive_norm(float *x, int m) { float sum = 0; for (int i = 0; i < m; i++) { @@ -30,14 +27,6 @@ float naive_norm(float *x, int m) { return sqrtf(sum); } -float no_opt_naive_norm(float *x, int m) { - float sum = 0; - for (int i = 0; i < m; i++) { - sum += x[i] * x[i]; - } - return sqrtf(sum); -} - // Naive with fixed size void naive_fixed_transpose(float *a) { for (int i = 0; i < SIZE; i++) { @@ -49,16 +38,6 @@ void naive_fixed_transpose(float *a) { } } -void no_opt_naive_fixed_transpose(float *a) { - for (int i = 0; i < SIZE; i++) { - for (int j = i + 1; j < SIZE; j++) { - float tmp = a[i * SIZE + j]; - a[i * SIZE + j] = a[j * SIZE + i]; - a[j * SIZE + i] = tmp; - } - } -} - void naive_fixed_matrix_multiply(float *a, float *b, float *c) { for (int y = 0; y < SIZE; y++) { for (int x = 0; x < SIZE; x++) { @@ -70,17 +49,6 @@ void naive_fixed_matrix_multiply(float *a, float *b, float *c) { } } -void no_opt_naive_fixed_matrix_multiply(float *a, float *b, float *c) { - for (int y = 0; y < SIZE; y++) { - for (int x = 0; x < SIZE; x++) { - c[SIZE * y + x] = 0.0f; - for (int k = 0; k < SIZE; k++) { - c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; - } - } - } -} - void naive_fixed_qr_decomp(float *A, float *Q, float *R) { memcpy(R, A, sizeof(float) * SIZE * SIZE); @@ -157,83 +125,7 @@ void naive_fixed_qr_decomp(float *A, float *Q, float *R) { naive_fixed_transpose(Q); } -void no_opt_naive_fixed_qr_decomp(float *A, float *Q, float *R) { - memcpy(R, A, sizeof(float) * SIZE * SIZE); - - // Build identity matrix of size SIZE * SIZE - float *I = (float *)calloc(SIZE * SIZE, sizeof(float)); - for (int i = 0; i < SIZE; i++) { - for (int j = 0; j < SIZE; j++) { - I[i * SIZE + j] = (i == j); - } - } - - // Householder - for (int k = 0; k < SIZE - 1; k++) { - int m = SIZE - k; - - float *x = (float *)calloc(m, sizeof(float)); - float *e = (float *)calloc(m, sizeof(float)); - for (int i = 0; i < m; i++) { - int row = k + i; - x[i] = R[row * SIZE + k]; - e[i] = I[row * SIZE + k]; - } - - float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); - - float *u = (float *)calloc(m, sizeof(float)); - float *v = (float *)calloc(m, sizeof(float)); - for (int i = 0; i < m; i++) { - u[i] = x[i] + alpha * e[i]; - } - float norm_u = no_opt_naive_norm(u, m); - for (int i = 0; i < m; i++) { - v[i] = u[i] / (norm_u + 0.00001f); - } - - float *q_min = (float *)calloc(m * m, sizeof(float)); - for (int i = 0; i < m; i++) { - for (int j = 0; j < m; j++) { - float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; - q_min[i * m + j] = q_min_i; - } - } - - float *q_t = (float *)calloc(SIZE * SIZE, sizeof(float)); - for (int i = 0; i < SIZE; i++) { - for (int j = 0; j < SIZE; j++) { - float q_t_i; - if ((i < k) || (j < k)) { - q_t_i = (i == j) ? 1.0f : 0.0f; - } else { - q_t_i = q_min[(i - k) * m + (j - k)]; - } - q_t[i * SIZE + j] = q_t_i; - } - } - - if (k == 0) { - memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t - no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A - } else { - float *res = (float *)calloc(SIZE * SIZE, sizeof(float)); - no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A - memcpy(Q, res, sizeof(float) * SIZE * SIZE); - no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A - memcpy(R, res, sizeof(float) * SIZE * SIZE); - } - free(x); - free(e); - free(u); - free(v); - free(q_min); - free(q_t); - } - no_opt_naive_fixed_transpose(Q); -} - -int main(void) { +int main(void) __attribute__((optimize("no-unroll-loops"))) { // time_t t = time(NULL); // srand((unsigned)time(&t)); @@ -258,7 +150,8 @@ int main(void) { gettimeofday(&timecheck, NULL); start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; - // calculate up c_out +// calculate up c_out +#pragma nounroll for (int i = 0; i < NITER; i++) { naive_fixed_qr_decomp(A, Q, R); } @@ -269,24 +162,15 @@ int main(void) { // report difference in runtime double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), NITER); - no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); - - for (int i = 0; i < SIZE; i++) { - for (int j = 0; j < SIZE; j++) { - printf("Q Output: %f\n", Q[i * SIZE + j]); - printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); - assert(fabs(expectedQ[i] - Q[i]) < DELTA); - } - } - - for (int i = 0; i < SIZE; i++) { - for (int j = 0; j < SIZE; j++) { - printf("R Output: %f\n", R[i * SIZE + j]); - printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); - assert(fabs(expectedR[i] - R[i]) < DELTA); - } - } + return 0; } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/5-qr-decomp.c b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/5-qr-decomp.c new file mode 100644 index 00000000..69eb4448 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/5-qr-decomp.c @@ -0,0 +1,176 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define SIZE 5 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float *a) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float *a, float *b, float *c) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0.0f; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float *A, float *Q, float *R) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(m, sizeof(float)); + float *e = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(m, sizeof(float)); + float *v = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(m * m, sizeof(float)); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(SIZE * SIZE, sizeof(float)); + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_fixed_transpose(Q); +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float expectedQ[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + naive_fixed_qr_decomp(A, Q, R); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/6-qr-decomp.c b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/6-qr-decomp.c new file mode 100644 index 00000000..6716741a --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/6-qr-decomp.c @@ -0,0 +1,176 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define SIZE 6 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float *a) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float *a, float *b, float *c) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0.0f; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float *A, float *Q, float *R) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(m, sizeof(float)); + float *e = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(m, sizeof(float)); + float *v = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(m * m, sizeof(float)); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(SIZE * SIZE, sizeof(float)); + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_fixed_transpose(Q); +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float expectedQ[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + naive_fixed_qr_decomp(A, Q, R); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/test-utils.h b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/test-utils.h new file mode 100644 index 00000000..8b4a8d5d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/test-utils.h @@ -0,0 +1,6 @@ +#define NITER 10000000 + +#define MAX_FLOAT 100.00f +#define DELTA 0.01f + +#define FILE_PATH "data.txt" diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/12-by-12-and-2-by-2-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/12-by-12-and-2-by-2-stencil.c new file mode 100644 index 00000000..feee7103 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/12-by-12-and-2-by-2-stencil.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 12 +#define COL_SIZE 12 +#define F_SIZE 4 +#define STENCIL_DIM 2 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < STENCIL_DIM; k1++) { + for (int k2 = 0; k2 < STENCIL_DIM; k2++) { + temp += filter_in[k1 * STENCIL_DIM + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/12-by-12-and-3-by-3-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/12-by-12-and-3-by-3-stencil.c new file mode 100644 index 00000000..6bba3f8d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/12-by-12-and-3-by-3-stencil.c @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 12 +#define COL_SIZE 12 +#define F_SIZE 9 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/16-by-16-and-2-by-2-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/16-by-16-and-2-by-2-stencil.c new file mode 100644 index 00000000..b4dfbe46 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/16-by-16-and-2-by-2-stencil.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 16 +#define COL_SIZE 16 +#define F_SIZE 4 +#define STENCIL_DIM 2 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < STENCIL_DIM; k1++) { + for (int k2 = 0; k2 < STENCIL_DIM; k2++) { + temp += filter_in[k1 * STENCIL_DIM + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/optimized/stencil-2d.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/16-by-16-and-3-by-3-stencil.c similarity index 70% rename from src/dios-egraphs/Diospyros/benchmarks/optimized/stencil-2d.c rename to src/dios-egraphs/Diospyros/benchmarks/stencil/16-by-16-and-3-by-3-stencil.c index a9331ba3..1a784419 100644 --- a/src/dios-egraphs/Diospyros/benchmarks/optimized/stencil-2d.c +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/16-by-16-and-3-by-3-stencil.c @@ -5,14 +5,12 @@ #include #include -#define ROW_SIZE 15 +#include "test-utils.h" + +#define ROW_SIZE 16 #define COL_SIZE 16 #define F_SIZE 9 -#define MAX_FLOAT 100.00f -#define DELTA 0.1f -#define NITER 1000000000 - void stencil(float orig_in[ROW_SIZE * COL_SIZE], float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { for (int r = 0; r < ROW_SIZE - 2; r++) { @@ -29,7 +27,7 @@ void stencil(float orig_in[ROW_SIZE * COL_SIZE], } } -int main(void) { +int main(void) __attribute__((optimize("no-unroll-loops"))) { time_t t = time(NULL); srand((unsigned)time(&t)); float orig_in[ROW_SIZE * COL_SIZE]; @@ -60,7 +58,8 @@ int main(void) { gettimeofday(&timecheck, NULL); start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; - // calculate up c_out +// calculate up c_out +#pragma nounroll for (int i = 0; i < NITER; i++) { stencil(orig_in, sol_out, filter_in); } @@ -71,31 +70,15 @@ int main(void) { // report difference in runtime double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), NITER); return 0; - - stencil(orig_in, sol_out, filter_in); - for (int r = 0; r < ROW_SIZE - 2; r++) { - for (int c = 0; c < COL_SIZE - 2; c++) { - float temp = 0; - for (int k1 = 0; k1 < 3; k1++) { - for (int k2 = 0; k2 < 3; k2++) { - temp += filter_in[k1 * 3 + k2] * - orig_in[(r + k1) * COL_SIZE + c + k2]; - } - } - expected[(r * COL_SIZE) + c] = temp; - } - } - for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { - printf("calculated: %f\n", sol_out[i]); - printf("expected: %f\n", expected[i]); - printf("difference: %f\n", expected[i] - sol_out[i]); - } - for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { - assert(fabs(expected[i] - sol_out[i]) < DELTA); - } - return 0; } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/4-by-4-and-2-by-2-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/4-by-4-and-2-by-2-stencil.c new file mode 100644 index 00000000..1a5a123c --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/4-by-4-and-2-by-2-stencil.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 4 +#define COL_SIZE 4 +#define F_SIZE 4 +#define STENCIL_DIM 2 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < STENCIL_DIM; k1++) { + for (int k2 = 0; k2 < STENCIL_DIM; k2++) { + temp += filter_in[k1 * STENCIL_DIM + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/4-by-4-and-3-by-3-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/4-by-4-and-3-by-3-stencil.c new file mode 100644 index 00000000..5238d224 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/4-by-4-and-3-by-3-stencil.c @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 4 +#define COL_SIZE 4 +#define F_SIZE 9 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/5-by-5-and-2-by-2-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/5-by-5-and-2-by-2-stencil.c new file mode 100644 index 00000000..72d69d35 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/5-by-5-and-2-by-2-stencil.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 5 +#define COL_SIZE 5 +#define F_SIZE 4 +#define STENCIL_DIM 2 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < STENCIL_DIM; k1++) { + for (int k2 = 0; k2 < STENCIL_DIM; k2++) { + temp += filter_in[k1 * STENCIL_DIM + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/5-by-5-and-3-by-3-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/5-by-5-and-3-by-3-stencil.c new file mode 100644 index 00000000..a36889ec --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/5-by-5-and-3-by-3-stencil.c @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 5 +#define COL_SIZE 5 +#define F_SIZE 9 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/6-by-6-and-2-by-2-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/6-by-6-and-2-by-2-stencil.c new file mode 100644 index 00000000..aa2cfda6 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/6-by-6-and-2-by-2-stencil.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 6 +#define COL_SIZE 6 +#define F_SIZE 4 +#define STENCIL_DIM 2 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < STENCIL_DIM; k1++) { + for (int k2 = 0; k2 < STENCIL_DIM; k2++) { + temp += filter_in[k1 * STENCIL_DIM + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/6-by-6-and-3-by-3-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/6-by-6-and-3-by-3-stencil.c new file mode 100644 index 00000000..0e048be4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/6-by-6-and-3-by-3-stencil.c @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 6 +#define COL_SIZE 6 +#define F_SIZE 9 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/8-by-8-and-2-by-2-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/8-by-8-and-2-by-2-stencil.c new file mode 100644 index 00000000..76b38531 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/8-by-8-and-2-by-2-stencil.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 8 +#define COL_SIZE 8 +#define F_SIZE 4 +#define STENCIL_DIM 2 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < STENCIL_DIM; k1++) { + for (int k2 = 0; k2 < STENCIL_DIM; k2++) { + temp += filter_in[k1 * STENCIL_DIM + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/8-by-8-and-3-by-3-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/8-by-8-and-3-by-3-stencil.c new file mode 100644 index 00000000..188adc54 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/8-by-8-and-3-by-3-stencil.c @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 8 +#define COL_SIZE 8 +#define F_SIZE 9 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/test-utils.h b/src/dios-egraphs/Diospyros/benchmarks/stencil/test-utils.h new file mode 100644 index 00000000..8b4a8d5d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/test-utils.h @@ -0,0 +1,6 @@ +#define NITER 10000000 + +#define MAX_FLOAT 100.00f +#define DELTA 0.01f + +#define FILE_PATH "data.txt" diff --git a/src/dios-egraphs/Diospyros/benchmarks/test-utils.h b/src/dios-egraphs/Diospyros/benchmarks/test-utils.h new file mode 100644 index 00000000..8b4a8d5d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/test-utils.h @@ -0,0 +1,6 @@ +#define NITER 10000000 + +#define MAX_FLOAT 100.00f +#define DELTA 0.01f + +#define FILE_PATH "data.txt" diff --git a/src/dios-egraphs/Diospyros/build.rs b/src/dios-egraphs/Diospyros/build.rs index 5aed4b51..2b73e6db 100644 --- a/src/dios-egraphs/Diospyros/build.rs +++ b/src/dios-egraphs/Diospyros/build.rs @@ -31,4 +31,16 @@ fn main() { } build_diospyros.flag("-fexceptions"); build_diospyros.compile("libdiospass.a"); + + // Build the AddressRewriting C++ file. + let mut build_address_rewriting = cc::Build::new(); + build_address_rewriting + .cpp(true) + .warnings(false) // LLVM headers have lots of spurious warnings. + .file("AddressRewriting.cpp"); + for flag in cxxflags.split_ascii_whitespace() { + build_address_rewriting.flag(&flag); + } + build_address_rewriting.flag("-fexceptions"); + build_address_rewriting.compile("libadrwpass.a"); } diff --git a/src/dios-egraphs/Diospyros/c-tests/2d-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/2d-matrix-multiply.c index c26410bf..4b7f7961 100644 --- a/src/dios-egraphs/Diospyros/c-tests/2d-matrix-multiply.c +++ b/src/dios-egraphs/Diospyros/c-tests/2d-matrix-multiply.c @@ -1,3 +1,4 @@ +#include #include #define A_ROWS 2 @@ -27,6 +28,10 @@ int main(void) { printf("second: %f\n", c_out[0][1]); printf("third: %f\n", c_out[1][0]); printf("fourth: %f\n", c_out[1][1]); + assert(c_out[0][0] == 7); + assert(c_out[0][1] == 10); + assert(c_out[1][0] == 15); + assert(c_out[1][1] == 22); // expected (7, 10, 15, 22) return 0; } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/data-plots/data/all-data/all-data.csv b/src/dios-egraphs/Diospyros/data-plots/data/all-data/all-data.csv new file mode 100644 index 00000000..7f3293b5 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/data/all-data/all-data.csv @@ -0,0 +1,45 @@ +Group,Benchmark,Baseline,SLP,Diospyros +qr-decomp-data,3-qr-decomp,6218,6710,6932 +qr-decomp-data,4-qr-decomp,10648,11612,11410 +qr-decomp-data,5-qr-decomp,15842,18499,18311 +qr-decomp-data,6-qr-decomp,22060,25880,26048 +mat-mul-data,2-by-2-mat-mul,15,17,23 +mat-mul-data,3-by-3-mat-mul,46,73,68 +mat-mul-data,4-by-4-mat-mul,111,125,78 +mat-mul-data,5-by-5-mat-mul,219,194,246 +mat-mul-data,6-by-6-mat-mul,385,399,373 +mat-mul-data,7-by-7-mat-mul,643,283,672 +mat-mul-data,8-by-8-mat-mul,944,321,371 +mat-mul-data,9-by-9-mat-mul,1264,896,1287 +mat-mul-data,10-by-10-mat-mul,1747,1252,1381 +mat-mul-data,11-by-11-mat-mul,2383,1733,2341 +mat-mul-data,12-by-12-mat-mul,3032,1928,1626 +mat-mul-data,15-by-15-mat-mul,6224,3461,5812 +mat-mul-data,16-by-16-mat-mul,7734,3652,3580 +stencil-data,4-by-4-and-2-by-2-stencil,30,39,33 +stencil-data,5-by-5-and-2-by-2-stencil,64,79,75 +stencil-data,6-by-6-and-2-by-2-stencil,111,143,129 +stencil-data,8-by-8-and-2-by-2-stencil,257,310,227 +stencil-data,12-by-12-and-2-by-2-stencil,695,849,608 +stencil-data,16-by-16-and-2-by-2-stencil,1733,1655,1193 +stencil-data,4-by-4-and-3-by-3-stencil,65,80,77 +stencil-data,5-by-5-and-3-by-3-stencil,165,174,166 +stencil-data,6-by-6-and-3-by-3-stencil,263,313,251 +stencil-data,8-by-8-and-3-by-3-stencil,578,687,498 +stencil-data,12-by-12-and-3-by-3-stencil,1685,1908,1569 +stencil-data,16-by-16-and-3-by-3-stencil,3276,3711,3072 +conv-data,3-by-3-and-2-by-2-conv,99.0,111.0,90.0 +conv-data,3-by-3-and-3-by-3-conv,340.0,243.0,174.0 +conv-data,3-by-3-and-4-by-4-conv,650.0,428.0,278.0 +conv-data,4-by-4-and-2-by-2-conv,257.0,194.0,139.0 +conv-data,4-by-4-and-4-by-4-conv,2652.0,774.0,505.0 +conv-data,5-by-5-and-2-by-2-conv,358.0,295.0,212.0 +conv-data,5-by-5-and-3-by-3-conv,885.0,666.0,432.0 +conv-data,5-by-5-and-4-by-4-conv,3726.0,1230.0,705.0 +conv-data,6-by-6-and-2-by-2-conv,509.0,422.0,277.0 +conv-data,6-by-6-and-3-by-3-conv,2546.0,973.0,603.0 +conv-data,6-by-6-and-4-by-4-conv,5261.0,1795.0,1105.0 +conv-data,8-by-8-and-2-by-2-conv,1554.0,754.0,464.0 +conv-data,8-by-8-and-3-by-3-conv,4185.0,1692.0,925.0 +conv-data,8-by-8-and-4-by-4-conv,9788.0,3259.0,1787.0 +q-prod-data,qprod,54.0,62.0,58.0 diff --git a/src/dios-egraphs/Diospyros/data-plots/data/individual-data/conv-data.csv b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/conv-data.csv new file mode 100644 index 00000000..325909f9 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/conv-data.csv @@ -0,0 +1,15 @@ +Benchmark,Baseline,SLP,Diospyros +3-by-3-and-2-by-2-conv,99.0,111.0,90.0 +3-by-3-and-3-by-3-conv,340.0,243.0,174.0 +3-by-3-and-4-by-4-conv,650.0,428.0,278.0 +4-by-4-and-2-by-2-conv,257.0,194.0,139.0 +4-by-4-and-4-by-4-conv,2652.0,774.0,505.0 +5-by-5-and-2-by-2-conv,358.0,295.0,212.0 +5-by-5-and-3-by-3-conv,885.0,666.0,432.0 +5-by-5-and-4-by-4-conv,3726.0,1230.0,705.0 +6-by-6-and-2-by-2-conv,509.0,422.0,277.0 +6-by-6-and-3-by-3-conv,2546.0,973.0,603.0 +6-by-6-and-4-by-4-conv,5261.0,1795.0,1105.0 +8-by-8-and-2-by-2-conv,1554.0,754.0,464.0 +8-by-8-and-3-by-3-conv,4185.0,1692.0,925.0 +8-by-8-and-4-by-4-conv,9788.0,3259.0,1787.0 diff --git a/src/dios-egraphs/Diospyros/data-plots/data/individual-data/mat-mul-data.csv b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/mat-mul-data.csv new file mode 100644 index 00000000..4903f5a5 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/mat-mul-data.csv @@ -0,0 +1,14 @@ +Benchmark,Baseline,SLP,Diospyros +2-by-2-mat-mul,15,17,23 +3-by-3-mat-mul,46,73,68 +4-by-4-mat-mul,111,125,78 +5-by-5-mat-mul,219,194,246 +6-by-6-mat-mul,385,399,373 +7-by-7-mat-mul,643,283,672 +8-by-8-mat-mul,944,321,371 +9-by-9-mat-mul,1264,896,1287 +10-by-10-mat-mul,1747,1252,1381 +11-by-11-mat-mul,2383,1733,2341 +12-by-12-mat-mul,3032,1928,1626 +15-by-15-mat-mul,6224,3461,5812 +16-by-16-mat-mul,7734,3652,3580 diff --git a/src/dios-egraphs/Diospyros/data-plots/data/individual-data/q-prod-data.csv b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/q-prod-data.csv new file mode 100644 index 00000000..e95cb4a4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/q-prod-data.csv @@ -0,0 +1,2 @@ +Benchmark,Baseline,SLP,Diospyros +qprod,54.0,62.0,58.0 diff --git a/src/dios-egraphs/Diospyros/data-plots/data/individual-data/qr-decomp-data.csv b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/qr-decomp-data.csv new file mode 100644 index 00000000..600db937 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/qr-decomp-data.csv @@ -0,0 +1,5 @@ +Benchmark,Baseline,SLP,Diospyros +3-qr-decomp,6218,6710,6932 +4-qr-decomp,10648,11612,11410 +5-qr-decomp,15842,18499,18311 +6-qr-decomp,22060,25880,26048 diff --git a/src/dios-egraphs/Diospyros/data-plots/data/individual-data/stencil-data.csv b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/stencil-data.csv new file mode 100644 index 00000000..b75a34e7 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/stencil-data.csv @@ -0,0 +1,13 @@ +Benchmark,Baseline,SLP,Diospyros +4-by-4-and-2-by-2-stencil,30,39,33 +5-by-5-and-2-by-2-stencil,64,79,75 +6-by-6-and-2-by-2-stencil,111,143,129 +8-by-8-and-2-by-2-stencil,257,310,227 +12-by-12-and-2-by-2-stencil,695,849,608 +16-by-16-and-2-by-2-stencil,1733,1655,1193 +4-by-4-and-3-by-3-stencil,65,80,77 +5-by-5-and-3-by-3-stencil,165,174,166 +6-by-6-and-3-by-3-stencil,263,313,251 +8-by-8-and-3-by-3-stencil,578,687,498 +12-by-12-and-3-by-3-stencil,1685,1908,1569 +16-by-16-and-3-by-3-stencil,3276,3711,3072 diff --git a/src/dios-egraphs/Diospyros/data-plots/data/original-data/all-data.csv b/src/dios-egraphs/Diospyros/data-plots/data/original-data/all-data.csv new file mode 100644 index 00000000..e7672edd --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/data/original-data/all-data.csv @@ -0,0 +1,27 @@ +Benchmark,Baseline,SLP,Diospyros +2-by-2-mat-mul,15,17,23 +3-by-3-mat-mul,46,73,68 +4-by-4-mat-mul,111,125,78 +5-by-5-mat-mul,219,194,246 +6-by-6-mat-mul,385,399,373 +7-by-7-mat-mul,643,283,672 +8-by-8-mat-mul,944,321,371 +9-by-9-mat-mul,1264,896,1287 +10-by-10-mat-mul,1747,1252,1381 +11-by-11-mat-mul,2383,1733,2341 +12-by-12-mat-mul,3032,1928,1626 +15-by-15-mat-mul,6224,3461,5812 +16-by-16-mat-mul,7734,3652,3580 +3-qr-decomp,6218,6710,6932 +4-qr-decomp,10648,11612,11410 +5-qr-decomp,15842,18499,18311 +6-qr-decomp,22060,25880,26048 +4-by-4-stencil2d,65,80,77 +5-by-5-stencil2d,165,174,166 +6-by-6-stencil2d,263,313,251 +8-by-8-stencil2d,578,687,498 +12-by-12-stencil2d,1685,1908,1569 +15-by-16-stencil2d,3100,3606,2883 +16-by-16-stencil2d,3276,3711,3072 +conv2d,866,657,424 +qprod,55,63,57 diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/baseline-diospyros-peedup.png b/src/dios-egraphs/Diospyros/data-plots/plots/baseline-diospyros-peedup.png new file mode 100644 index 0000000000000000000000000000000000000000..274651c991488003f1b2e86d7f1c50f99ef9a194 GIT binary patch literal 44382 zcmeFZcU06{w=Gy;RtzXAD2gCak|3fe!B8SmvPcrif&@X39Lz+K45Fe0$)ZS*Aecyn zq99R{5|p5TNKW?LoZDaDd&ld3_l@p5-amcD_`aiuqUu+B@3q!kbI!H>R8O5;yL#Jd z3Wc&(UXH3xq0swKD0E*}F2{e_U)$D$e@Qr=IOC{cXYS~1^kr0c=6I9VPTtpy+Fv$!9rNZLvs(_WYtAEol6wTdL!~bx)hmY zO9~~UTAq4T)8*dJc4tkdrq&fB7Dtm-@}4=ejPcnnai*n584q9fQ#0{f=Ki|FByhh- zUW(t#$>#WHbMfVOn~jYtI#la@wr$>!xToyhjI71w3$@g_QPL(wdVhDv8G9faVu{Ol1J&VsvV=HY9yQ1CCYEy zvgPq$yy2~%Bs$@X*Hc5?G?3d-aGy67Mtea$&)7) zA4twsCAM7~Hm7YanV-<(Q+?<&_vfQbYUu4`mAHY&s^RJQ3a3iuMiqPQOTFefg@jIB z{@KhfA|hgJZJqJ9F3H@~G;xr_LsKD~_mSH?m1_Pzn*IJEr%;>%g_dlyB=5fqv`cKQ zgE2bzols#z=?6>iTpeCY+dS0ql7BP*X{G07spo6s(<-(F$Rj5u>#6ntKwvmG6p$&@WrAm%gbXyKr%yCxn<>_w7t3~^wAEq=lHEN!@k=A}#*G_QpB^1A!-L`Z z5|v^^MNP^X^7i34r#Q5RsiP?P7 z#PLhiRcU2IJY|ml%J1~g%*lRUaM@Objn>N`yJ4ba_1eZP+ z7&bOG=D@k!%FoX~J<)2n%x6?z*OMZw|6-SNtoY_NYu1SD+ZVQc?Y2ro@A)KM9%<>b z+$s+Q%&H?dW4-t^Q|{D1G0&`aB#s+n4Mxan_qu#9PS zq@bwF$T<;F(a;kCTabv9dxlH@p3d_g?eVM6(9w{8>d(Z-fnyQ&K+2tb7Q>E1n$*zH z+c?Tqew%m_RpO-8A01n^ih&^6t#m3ED z8;lE?DgKb(>BW!JP#(y#8*dQd`scg++nZ~9yUGH{`f_lbyv-W&SUJ`|Jw09N-a$16 z1_smdfd=(bFHe#^UKAB6#){kYG-m3JBGjte3N9sG{Pu=}Lp@QBk(?Cd8hJeHnKNfr zQY2hQ6PcNrD}Q%akdWdC4d2Vc%q*9wn@gpV^w-thy*rpqhMZ#sWVYV=R1Hzvwu7ry ztqQ?o@s0Pj=4WN-JeQrDpEn!%`g$W16X%*O0(aJI6(X_4W!F(mD|8-e3qhh%`1n97 zNiW|v+p1v?N!c$73l+-zH+QypTtG(9m0!+xYU{#}?IE<%t&_>=59OS*b{xsKmj7lDH;pME;?9gU{&^XE@dPlnKaINl$nXO&Jr z(dWbaDNeq;I6k0-9om9GVUcpn>G=?=9Vuk62SFGivoLjBQc{x2*`IiRW~{$1U0ayM zov-go3HOPNdotct@#V&TsVlhwU+=3j%OXU;1xDy-SOkcnYV3w^ZE0pgpy@w z->*Mgp3|~dtl#;nxtW=yp6m>Zj}LfHXE0;wWEv)R9CAEzg?`z{%=iV$tC{gZqjTqc zkWSJY^|DP@QJ3QLmQwVKT^Kp8(k)q@(l}V3x{11U6VEBlNUY1+iHV6+nzVfvHF2)l zJV)A6!flL~x|B;H{0NSCwg=MJ2FCYsGHjGP(b2xyX4P++nzs7+`O(~P9By8_wuG#j zK(=T4d%V&Yh?RYQ1%M)Ii> z3v<)3+1b7Q@Ljnu_eaI7<#V!% zk*Vo*#@({TTRC^vFw1n~s1%sU(d8oUEiD~KX-Mdk!PUt-L6h57zN{7v*jZXjhah$q=nRz9}Hz446ZTyLE zU0w0Ib|~;07*QLGb0xo+)E2w`ag2*Q>?}lGT6FoBdSudE;0zwyu*Bnq`{a}CpPk-+ zZxj?r4AiG42&C)g4(OTC$;UY}zD8h$n_->LcN9xt8E;|7=_p9fs~BG+Y_wYQtr}J* zDMYBBJ^AaIfI(r-zX0kH!ynq`EYbN`pz^yt`F{$PMIexxl8n#b?169t~Ld5#11@(*OZpVW)&*`u&c?EEh8 zg=vel>Suh&r^@f&zYkaZToKH+RZmZkya$QYv!z~wl%98fo4S7e%Cts`6DL8hsHoWR z+ot*HBTi0E-dTK1EEBKl5pq6tKQ=UIk#}l3THVs3vpHEVgo6O80-TD&!BM_)IOV-x zUte<=`FgY^_d-xrow(4aZfwPsD&?Hz~H1}x5>=f!bP9f(r%oEcMd;vYf9xPE=9D_#`&ZojCg`pkVUPR`0d zKR%Ydf3HM~63+6t8Tw~(GRk9mG?liSgEr_e@l%In;<3RdmE5zFL+we3?_nnfbF*OBSc z3EFXe{d=s39ObB?GxyzGTq+S}5#wizUD!$a1tdux*jbmUcc5Wb#r@}U2IhwYpPP$a zFWEadg#Mi$_T~tEHo4n##${EsLlBm<65o~JUUHBQI}efKtbi3KCARD5&x{PcJnI(C z)+oSavR5cvo0cqDLe>aSr3zW3E-?9JLC?gmR)_f+CqDJWO1&ock*`-!NqrL%5(coi z)uq~=VeZ+0`M9cws*+U?@* zPU;gEviQQxpfEv@TZPZ2Y<%9DZ&%%09o6aTfs!JKUrwFzc&x9IZdw(Z+W0Id=l#g~ z2Y_f|*3GJ}Z9u#PO9iq@s80;F^9$ z-}y2c+t>(_im^pdR}PgZqUqG^w(}|i>e)}9R;m_gD7dobWIf8uVB2>RsypP*{;oT!p=pHRa3-T!Wpf9%HU(CBDh z(h>P@=BEMOCn9^RKhc)}+zJ)5tfLk>_H$^ZX{y;+oI7_XZLXIVv58S_yl4k{VaRg{#%QLB@;i60yEoZ(E83~QpvT{wIB z)6QU>=jMI^0g87A&+937Jih$v3xUE4N=kvOGG6-S!ED0hqymdp0X~&|`JzjuMo_r9 zxfRf2nvV6=S`KNNwwLbO_>Np8GBeIRJNCWiK0nfF$k~@}W^Udn(|3&C-mF60^`>W) zgs$B#?i(AccURtEw6XNPThnZaw5Jk**P96^2E)~3I4 z<3`O)NlUa1;fhg0@}wnJZli|UYNC4zFLEAEE6D(0jIlCMRIC+3P*h55-|z%%`76u*8`bFj@cr> zgoK1dEoxX>o*G>n-Nq!SJ9fWxy-+*kJR|^Iq}jf5 zT>OiP^Ple@GzAOs^)w$exwVpw__;)`;7x79$XyMYi_?+dOQ6ABO$Z6GBPBWyi)W(3@w<7_`85u3V(Hm849}d24t316>pY7SWD2a2+UWl0yYlV#O4$}4cQP_^t*YHQ+cT_85%pg1Xt+#( zKH9* z2M!>Y`wHk}Q@gsl!h0_?W=wx=I(X=iE=f+pXN&J3&Xe6mO|W!9+YgXNDplyCG=QwY{)Eo@d*>XT>_!4Oke2S_q0J4UfnP z{8U_A+%xj+t>ta|#z)6juD#d$C`GGZ?UmzGxIgJv@nsKr`#C!6C=KT_^mR7Rz$y-vu6Y7V2CZbb60>=bf)T_E;%~0}Dd% z+LbF;vJtM6Ls?N+f*5g|yM*f)Xvz{c^!R;a>9S>2;HviJegwwk&oTsar(bkgnUqPK zEK%ckE6tQ%jw!{8D}zu_8@F%g2aMKiyma|;B$y~dD4fyJscLy{ zVTSLJwq2SUy8LcOsdp)9HPC&Uc9wXKmj0brDVZ5a*UX&y5PKjBmAwidmuUI|UZC(W3n)sqyeo*KQSa+(aEq%Cu_9~4nLjn!sDU7`c>2IoY&30{rI z3|PABSgVxA&5R7J2RDH2JA!6}bWK(bY2QHhrB2NDRBa+ZyL|a_W$XcAfGY&W5&^wD zrQEan`mD$%R{$^GR93ExjEubKV&sZ5H%my$UjIx$^m_bQ)Ff8=)3*R{J1s1TEq&qS)HogjSkNLR*j21bbtBm%07e|`W z{v4Fk8`#iCXvoNi;!rYx0($>I>TrXq3>c?t>FNARN=oN!ZEpjXm;gsiA&4WK>b5U` zS5d)p{H?E?|2IR1wQG$oTnGRWo#>8v!y|MqE-vTLGo%``Nx3nVl$4;Wqyz>9ee3J3Kb1&a58+hcIaT#jURV0aTXjxNl=ZX(u_cdErEL)a(JE9Tk;p^A0 z6!e=f3JSh`l==I9WH5Qhayjb5+Vkg(jL7bgUiKhplDn;m<1A9%3zBHbjLglKYiMZH zr)#eVkFK7%&*qC5c&vnTudnVE5xEi;wuvy=4YPURkI4RkXbu1nd0Su4)Y#Y$ zp34-A(^97!zosBlnmY3sL`}Bm?1Wg6cA`b|Q=@E$UWHX_*KXF(_-rk(qxUpQE0sF7 z3cYOzmb~&uoOex6aL4vp%0@;;Px=Sao_he*LID)2(IS~u-eE5k+UWTv`kx=8_HqLf zM3O`wEVB?j6n&4%@><#l{0r6XrssdsO{9H>`2e^V<|hqN$GivYwaM#r*7~67S0HTY z^5rphA5G||9*~zb=o1=;D)H;v+grvLF2wf#il&tCpsX;;9~${0!A3M0hhtzxnp;}< zrsd2<=sZ-uRF@zxE$TILJRSMlx7pD{UHBhJIE#;1Km{s!S60?v$Q-^5loCJiLrb1F zH+lSmBk0eNjpXr?CjrNF60vn)wZjCv*X|>-(9AnJun`+X#Kf2mA1<2Q#vCoZlR_BA z(XoCFMn=BV8D}CZ&e4;#^+RzFD)yS!C0`MEsZeO0869|LMn=+A0*#4P3$OUYbj8=V zZ&MU-GY1F2%4d6dUVh84X)qT8od<~W6t92v(&rySEz@qI|6Z4pvm;bIA~{*I@x0h5 zQu{!Lq{NK_S2i#*_Nv?3N^Bqg+44TK6a2GI(dEq1W)OB(^(pK~l#m(HOzrzcxdC|u z1O=To9_D$rj7VROkHHNCn{cqRvpYa*0QDK1gEVf@3#Nq;=kI>mM?S1_UqV;jd%@%0 z)bPX1EG&LOK|#&U&0t>w0(Trfy;ItQmvowpAtEN_as>{(@9OI{6{3U$0rgeeH@o-O zaiwZ!Jpc(20FoYL2d%|aIfQgQc6N3UVi*q}KFlKN@|1ewM3UZvo+zJTXc?f9iM9iJ zcoX6)RW~>7p0uZKuClp_2ZQwV(u<%H)j{y#ctzn%`p3n^wSn@U)CI54fL zW#GMus!6&zLdYXh$>XA82YHM)XxF{4Hz~OM>n=cnO#3fD22DQ>yZbyG95h!%@n#UL zjEn>W7?=51At^?*C=qSItew;(ly5TtXHmDY^NftE)~$;qXbh#@*Ed{H@3f9LAz2G9 zS+E>(Oc`xMtn#9yp@4TW99STY_1yz@LL3~xG?ApAMN9*d@l_l5HtEUG%c(W4%B=1@0AsR^akGe$*LjL?4+WzIAdlFXD8Rf9a-W zZDaGf%^bTX>N?uI%g>JjUgQ5L<1(_VK^!ey7=S9l zCW#=zvcaPgWSs~N*WuTf{rIvK-d^@AWr8I_*JxklrXDKR=(c0d3cGSAcMK#+(G>?!)IPN1AR-ZJ?L}sbydKaZ2Ly z&**`nxIhKSv0dTf%7|91YbE&1a;xI(`pRq3m;5HrxEnB&`ByLo0~m z6y7sY#i7CA75+dSv;ALjM$DujVN8!V89EsR>i34Rtss@AF0wXt^K%9B;dT0_8{msu39EUp$V&1X;Q=^_27taI=k^-H$k7Qny-~#WzB|ls9RnRu@9KXpB z0@4py+Kx(nS6)secSrrYF)+Mxi-kOdlr4BWEQl75pTiQs4#6#o2<(I_BIGNWLtnW8 z#aHN5hz5yu4?*UUkCXP~1YPX9fc5Aa8WMuu5Cthi0Xj+#pg4BJvW}CqmEiISM?&OU zyd7cZsZ=7GRK*_1LfoX$Bpmxx2}8~pf{&7WeRY``NKEjw05!!=PQ5-2E+P737woC!12^rCX)UW^I z;!!|R;1Z{S`VgY>qqUA0@9yp<`MECP_Vdww8j%uQuau%24Nw2Dab3Zw(s7*7tp)?t?23cT?iXbN2K;1kj|-* zc3*M;!HiA8GKb1|doeQVJUw?M$GSzGFfAk*fVZ)%jlY2o*56kS9gODeCtE{wEZ`#h zlj9Qc62g3;P=!F#rs0s7A?I^&kS|9tFBX=6);Bmfic~CQ;uBOtgAvUHyCs zHBamj|EF|HG8|yC5L_qNLac{HFMBmp9zn#lCs+8SHJlp60;%U(X>m<&Vgy|m$;r)4 zLvL9I=tWiqdqT2hjEr~5_3PJDCe*S`<;hEbkBw>OZIv&S=mp{+oSsW+(6vF|OFQ7gk|2d(e`Lp8vFqP#UoDD5|R7A=U3xoV4KjosxIZ zb{c?7%^X;f(!PMzaDcUi;L{xNcxc?iozVA{y?JvCxtkkY$8g3;e2FBe0(C{sb`tho z*Yyh><%vs)NPGmEP(z0}r7wQEcV*}>8bzSj}J01;9G3D zr1w4Y(Rj^-MC=DHc7^SF|Ec%>Z?ZH3dT~gF!7(_c3P(T`dYnp}Dl8@yjH)Rr0z!W} z-0%WSbsEr=a2a_haOUZeNCBPDUuF=>IqH&BN|xj3g zd=9E#2-s^6RB~%;G=PuX3J0ws@O8$?#1 z-g3}Z6>!rrtXz2qK#d0dqpY!UB6ILdb2IVoXjY>ZaEObG_uK9x9wy+-1vK5!T)pH= z&@AIT=vN*genoUC4n38OkQZHkh(CDnfVA`t*-#+;lann9TS@DVW^5oBkJYC!hTfF* z;Khd|JrhLq{rh(XnG)hLM7abi{PjhPfF`U6`~~dCZ=!YvzmM5>k~ENjU6BZ28aRB% z)9c9!-5agSrwE-bK7$@233*mM(aZqk6NLxiry7BM-i_!8w2`1_3)UQ~D z?jPVa-VQ$abxC}?;xQ_R^*@Zu;r|u9``_my#`pc-`{pFzH!QMOU{^Gtxkd>K13xv` zlfJ>%LHK?ir3927(S_h#+5n6RB{2kM3>@MnBH0_C2Bvf~+1ast{E zlLdVM07G$B6Lrs`V_NhPcoFDLlolr8v-+Tb=4J-7>=!YG{?IXIFb5Ai(kBWA%_lIGfh*TTlp?0AW)s9>*69@J+& zB=i;fx(GB4$ps&feaz5$NoY~YYFa0Jjy(cQ@D7qycnGX zb;ByKgKT4Ct>v1p$go<$=m|(h&PL*3wX-P#B?51+K>Lp%bX%2`yekyRLWfJ?yF!XP z)Q}CTop>f;7!a&O;$Zm2gAS{~Z7W1)I0u-Zw(;p?ClVC4Q^rdGJqg%E{=mMb{L&a4?+6(z+Luds`ou?PxKdl1( zsz{{eQaeU+@}(^gDN_Um1y3Z@@&?mY!tz5XPxxXVz6~Xt+0-VpsWSjg6pa4 z+qaO^0w40ryxmw8NPPxT-^gFb)Nk|nLvE8#iQT? z?)x0pz3L&zR+u1r9|wUT)c^-knQBZvsy~P|ZWSjluN-I?9vA_$O)AzQ|KCBBR^3>$ zm3XGI&)0Gg>jDgAT*(2lSh&Qk4#mqp!Id)Y4Rc&i1)Fy`iZov}8V?RARH8<2mZD{> z?k9vFW3Stx`)dyF-K&b;LS4`q?+xCx{~_4`YC#-M>gvsVDsiR=cc!yt>sB6u5Afgm zo|-PcB~Vxex&sP*A;Qt3vI6eP}juImW1l~#gEX%dbPK=_oDrPW++==-+j~9 z*W-8D*1A*;9?}8}Ek1Yng?MtzWaUb0hOo|yYWV}T z)WvFir6lmL>cuYhq=SUoe)L~YD!uL;&Q}tGn&`ZQX$7_)C?q(NT5z|fg3bd>GjW|Y zW*G|O4Z#C|p;Z87dRvj4fbhfpImyx*IZk6p$0t{M7uYmN_uGSZ1FRge!mnIO z43|5i0`Y6S&UO9DyP%#2knBw`C#*9OgpUF0OaefKSGlw=`~2dUjXb4CLC+uv4kZli zY)YgGyzgW-g!FB<6&AO{9ow-N%vT{q0jp_JJfX-OTEsqTL&+$ffK=#1e`{9bm)T)C z`M-Xq_zJ{SD5RH4m;nj`(PECklK6w1%erhN+qj;Nizh!*af(?_&$L0pgCf3hU~U-Amo?Xw>LLfbY-w?1kHml4nt#3!*meQXdKY3SPSuyP#Eg< z-|`APN80y|UQt|%k-n(Mqz9l<_kgN_gm(vL&Oa;5?(*g^-@b%br_DHMn+1JhoZ#9b zGLKaZMqofW9%{FNrur{sW{hv~Rhz-tKm|`T!~X%gutPmSp%1l+CH_DdATFi(uFhvz zXXGTlMx}n=HjOCzB$b+q541va?k7S6d)_pO^K=aiiA3gp{_aZ-`=iwy~CM415Ecr9K z9ga}P={yuk+RXgSI2i=00^`#Qa*Zp1-= zuN>?+rbv0DY2v)b!Le@Lx<&5_Vu~B|7BOV&JTqQ)%Lj^gd_GO^EpM;?VuGOG)&8R? zLnrLJ_*NnP%5rr^&o2(uQLK!NNs;_QbR?$L%CVu4m^qBArOa6Wi`2m|JvVJv%~n9V zf@S~3Us)t({`a;n|2MOpN;@$GDs5w1-6opT(IRc}ynqv_F7UDkiHQ!M4@Qvt8k2o&I{V82Z*}A zoKPm125Ml#8`LKEL5u3Vg`o(-Kj`c0gGD<@SYKi}gWmGEh_o;NB2U&u5{iX)&|`Gf zIi$2Q@G+oD;&m5bw?b=F^+}pAeE5=K$d!pjH81SD31vC$4ji~5A{M_Zrw!q3FE9!+ z$sS0brz3&CXjt8{&!6`lpOu5y2gaMYJPHT@BYzT(-ner0+gt0SP@DZ?V`Jg_sYcD= z#54t_%Fuuy1rTQ-vF!Q!k}(#m#tc3)T6oCQr+Z`eUx<`+894&gHmTZ*5TCui`XLlA zU|zyK6308In3yJ3z!XrD7Q~LcVtj7oZ_s@r8Uc4HqS&0#)g{(9j6W$rq;}FG0%<&J z82`p2j3LlM0a@h!6o3;=-TCqUVQtu=W0xf1B^)zT9;5$wc6cxSPk565|HlM6{lg&t zJ1+MBlVyp~52Fa_zfdJZq2=Y}1Pmc|CiR*+F++1uLc$M6So4X#4$=Nt_be7LQsoCQ~_Sr!)6JX@Vc=vsjZ1q+Q3K&1!BFKlxAvHps}xpp^^b*M8D-v5I66 zhqM2W*}(FN47|vM@DGF|l~j4OAQ4d0sv$QxyyzjTAu)^~?FQNc4yk);blfZkd_9@q zfP!-WyEyXD=}cjcuED|Bp&9b}0(m6eugB52dy{biQu2H8e+~e|V)?MBDPNi=^s=_O zn9|9!juA><^dRhZI6*&Ew$-o_3{Ow2T}cSiG118(mhc~NgPIy}<@Mjrk)JRJW6n1; zt8_sZeq4ScmJqT|Jyo5{Knw0TU)~CUD^)7zu3fv@6-)jjA1o9xggi{NPYr`S^EMS3 z^3}BM$5WroY|)MzNV6k<)H%AOczFITIecS$Gd(M=5~BQnL>dy*wIm=^G&kcovQb&#XSZN?!G4pdH3()#-PScB(h zhfAx``dyN=h2MkI)auC#l_i^AsRUwv3ROTJZIBr($oQz}-S))Z2DbDL%;CLql8p0d z>5ZV5Oflt8$Q##%IoI3o>a{$b)No3c?ILm~DllL0B^uE+(FJ)9>i^JD-a6ipZ<4`J zhL*{^EC9oU1bah_m62Njh}{ii(znZ`Fq=heVVD;SHxYqU{7OX$9(mHWl1;}(MM6IL zA=d^I1{sMmg<%-==Q*ois*2NQ`Ma;np@$RW4^iace`WPc511K9ds67n&q0Q-$j8Gs z%NirC9!uGe`FkP+11P_*Jsn6^&3Isj`A5CH{pcAoN(=_38l(*71UXL{m%9Fu&B(}D zJSb2tE5Q{wa6a+%g%t(&*N z7cRd!#I~))_{J^39_t`H_Okl%pxd`)K^T8}VsIFPJp6jy12efFstO18JZZ#N{`&13 z9r7aO2)d2Diu%R^Ky;Xl-+|S_ad6sQqTpwm9VDMU-${xnx@;qGw)mi4 z7uO`7rolNBZf97*uVDEsF0>IH%xfN=WmpGy15F%6e$Fd@Z>dC)If{Hhp}@($7G9Tw z7%0ITpG^pR7-2GVT zF{IKpL9*9&pS(?}>|p!Z0|;hhZ5;@QmBpZZI38K0;-Hzq0qbTT92O&Rr4kPDZa->v zz;nOV&~tnDLc>|HyfpZ8^uwl!VQ^HD((0P4da0gHT1~;d%qLHr_=X^+gzD~W&Ao67;+Tb=WM*IA zdFWBAKx9&U&;`D&tJ_HI+acAK%AYbxlLj#!95e z1SILcx}-IrH?CtqodJfr*9e-)4w25yhebpfQBV8Yiw@w_?S)lj>}Sg+l%W)@^es4) z6R)3jnh&)V>=hT6#Tc&|^ef~UEauw&yjNaU(G;)5y4fR`b4-B#{=%_;BZV+<&~o2l z>NZl)9umN}uU~x-Xc%Z+LCG%Je=2TZMjX}iMr`bMkTJ_C2R&!RaGisQRXs2DBTPfu>Mu;~$3RZArbo3Z}Nx#v2Dh=ReJgOB1IeEgWq;DEy(yL$|0OATH_)|Y$0Wl+4{A>}?rLM!%|R@b=~&oq7ti7T18ktI(I zOP>f7dCrP|$+HQ7V8ZLE0@-;9I&xAxbT(=uBWjVwOu`|82E>YC#$(5h(Qn+eiLd~0 z(zA<-iYl5Ud3+#K6y$sWAW=DcE%a zQ&LhU{nt=%4tKy9zHaN*t#}qA7(x~3-C!-jU-+W#=00gsIl&=U1 zIOBvPg$*AJ#Y#x=eGCMybjF0o3U4^EtO~NSgy-hwE`o`p{O#P9@BMc{scgwu`jaDH z#^*68`n9L$I!wi>ky$#=m+!Z#e+9jXLJ8g}bqphfnijJxD=Es{Tq8$S)Y;3_0 zv=Y!*F(0sH#nl=1SWQfCJObJ3vM?t>`3ekj1)Bg*K}d9T%Fj}em%Ms;0`NqI_7|c} zeu!L>a^@Ka0QLd>5)T%W^1!1gfDwu_s5Qsn*Nb>}+P&-Y?1T+$;9F6o%b~`L?l?R> z%PuXw6XCD1U%aO7(!*%`cT|7tS77pg~bnHl3Nnd(}D85!OvGZ^xMh-2*_`{#e4 zM7i?tQeu)zL8VWNj3<*xVDL4(Ug64@WvDFZ5KIqp-|?#c1oAkDFrEV(wTOxOG_5t> zbH7*kB(j~t%h;nIo)(iZkD%4r=?%g>|y5LHV=z%jJ3k{OP<{AH1jo)UO)vmX7SUhvmCI%a9E~ zI2yxhb#3#gZxwP2|Aj4(Uj|lLNOM}Dn3D1!p@T_~SIC(nn4ZE|*b)#} zN6(+{J3JWU@%M=OU&_)ll3MjDoS?SNK%0v_uTkl9^OVW!~Beu|M% zj1h|*x4%;FHxD9771s3(z5I0$>fb@bSA~-p+$zHn_0Bpt>QZgRaT|>oD3%)lyN?HW zrc;gRQ4O1z)B{zq7J?Yb1r$J`o0!3W{pJll*7@T_ zi|%lXc=(xo{QOqI+7y6X&1-R;^0lk$DxN=Gw;K)|)u-oYjW6mBO5b&4NB#p`lvmG8D$bodN5PD&*-NyKNi6(kZ9Pt2qdluY zHLU_sjGMETf=brdIEh_-)7W@=KeZg_45Qz{2yqk4h7JcNSI-P|i=48>-8w1SSvzMY zeg)t_@*QuXkc|CxXy`Vw9z6x;aXqRKK+z*|4bGXTyAECcnUdqhJqfZ+6IX^vJX*uR za2;R=c975^duB}?Yed4$bqGm#`Xj^nw>*fkoxo`-!98z5%>aYnqZ)p;)7lw9_eso ze7p>dnh1tYF(I=Now`@n?R*5!35;gCO%4Si42bW8`H&+o$dc6nv2ih`*kqzS6GU!-0a47gp zR+qnf_v-a)I$%E4jf5OEMr`q7qT1?b&X`b3a^QBHU^FI}?_@^zwd_v)$7hf!@olvl zKTCIDl+`CO@*a3XqRbc6KMsltc_E=?=y|xjO3Gz~f-imj`SX3`o*)cp!JnQcWP_~! z2A>FIr9RUegtbC?okD`|cu2tiNpH{`FT%Qd0B4 zEz6g#f0iTo4#Hw`crfC$4&^IQ$zt-jSFG|4FkBV$It`b<{4I4GUkwAS`|z_BxyM`J zx?{pUS0@2yMSvh04q?XpB*4gC3_+Q-U>ajD=3L075xKVJvyZ&3qbURAGNNw-1O6CG ze}g2Bj_57e-|!zWan_c4dy@ledHOHJI~4pi7?4|yyuXrxL9@}jb*s`S+lNlC*|oT2 z=Lp)PPZ*{kz05QnQXV(5{w{D!do|m%GS;vmfi-T(c z`}3x=lMMrz6)+5BN;culklCyLC@0ufaHrkd>gqM9DksslYYzA3%TFgr;5tR{QV+4q zh~x)Wa)BTM9^$ajdOxAQKlGZP0lGMfg*#_$eeavlrhm$KIPU6N4ZAn0pP_s?0skaI zaJ@>}$6jy&-*B-^+cQF`!^X4$!F+mP?8(QFys}y3J#tSHd|LOd?xSHKOoRL;i?}Xoy}vu~C&0=>z|Sd2?cu$oN?b%a9fz#b8}@9y_E1r zYeQA^B5I3@ao5^)wC4#x6jQjeO9XRz;X(!|xb>=;`9^P-BU!A1|8%!cVyxKhol^eL z(0RKbTfQ93I1M7nAQ_EbAFji~NQ4i{9_-ij1ahaQ@}Y}L%b3MIWCZ%G-6p2`Yxi{2 zV1>S+0imL1{>D3b;e|0ih$eKOC4G!^)G{aop{ppx6w^`wSyr#45Gl?6mCc+&>jOIZ6yV z{Z_cSPh!qdv$_<-8hAoufYh{J+%<-_IG}EaIp(PM;%14Le{IU*J3;i z=}FMzP#)%NfL&}m{H@1KO*vr#p$DKR4Lmq3#pmmW$t`%q4Cu;=>_SEshCVfMXPrHJ z_6#&h7^yM#cIXIvQGQ)T#e#HsIXTy%VCAND;8q3}71h=O)}1@o;jA@qF0Go3+w^%2 zvB6953J5UZsN$usA!tSP0>Pia@RR0z4lrU6tZ;8=v^^hc`dQ{30(^aakd{ynA2Qq; z0TqKw@0Njjt3dJycZ!pAy^4$m7B8{&(SZcQ1%44GmzR-8irRT4QIQWvQx@&2=Imkg zN@ClT@0xt{eE(1DIEipN^Yk2@{}w@iAcj3}?QvfYD~z-_2BlW>V5s7OmTBt;X3Le& zdfA-xrnrDwM!CC+5|!47thtNCLYibu5TVK5ikx~NHaG13_bs07;&6wQSqm5sBMXb< zZOi#NIhS^Hbo9ZFV&xKw+>vIhfGc~hVbHfXWsHLA4gbwiy+sxuW z%V$C#y)3l!cgJ||C5(2!DwHmr0pV&2?YA1{2pepAEo$z)Mg^=Z{>+Zsx)vmQ;La@E__P#tUj2jhGuXtiI7Wc~sS{Civj;L`p^0>T zLYhVnK8ISYFxZDHkyJ6i!&QSb44(91AluYH1Al#{-c|@cQxN$&bc8c|D&~+N!N5dk z3*f3*($5*_8s=JUhJLCgm;j63RoR3q>FI;+>wk$}XcQob8oA^aT3aw(iQx$t20D+6 zZqm5+v9r^mFQmeQqK1h!GBRVi>I3HK%?S;iYP-3qaN(YLeKa{p8;F9e(}uXONu=)6$=GJw8$NM7$s4AY~xrZb3^* z8&WE^?_Q3pWv?J(q?pGNcZ=q#3$n&sQFAMP6IcLuMY}!x!&mG|rkBJ1o<=2#@o>Wd zzD1Iz@L)M#=W~?RDa<1+9`QqmwgCl~fv&*q2;tl1y=MA(5Ebcxn0}BHJWYOUN8cHzXiUu zho=lTg1u^KVF5Qye@9Fp9Qh7yfdh8-usYLcReq;TKrVpUGbD?rk{7*S06GYl%w9#$Oi z?TN1DF1SsX935yc>JN}cUjYwcozy*>KMxK*Bs;n>gRp@x98aOu`vheVF_TW4$BoCt zGB-Wy=;TCNGR-JUpl{@{s7KYS3)8ZM2GXiu;Ea@E$|s!Kg$1O9hKBaYtOE>u;XJ$( z1e!leb^63J$9{F(D0Q@fK0V@W`X`)WLKF9%D#zjD^K+$L@u55S%7+g9&WnM4ThKs* z|AL2_yJ6w;?6rB<{hS$g!rV;u|7k)qKhRt#_jQjTQzwzvu z2rlS(m{tK2XDtSiV3XVK9@C<@l+zbRMT*>As|*O4Y1COr1~dT=QGKy4rZH(AYw`%o z3OK74bnOMQ4~g>`k0tU^-k2h!1j&Yd+ahzs;0Kt2bHJ1-hNaTrm!5k6^@58UXi7Er z1NdxJz_Et+Jllen<5<_lQiY|ID4oY?5=%iq1R)mQV%PB4nwhikb3P=TXw4iJ@+wkh zvc#wFMM5Iy;#Zf>p890JP=li%~`SN)E0*BNYHAqiufPS@y^2`i06c>6Q&;Nx4 zz8>2Gs(K5W~z zZ+mY4m0Mtj#K#o{BI2wI?*N~cX08b;gIarARpoCx;d+x<|H5NK@Y95$7sSd(OqFX7 zH{sf$yJfipaR|UU+@bmX)&Kb^idxp#0VX6RCB=Lkwm*GaC3Gs?WlZKVh8~j%^nO z`%U4Er?8kS1ZXlT7LJR z+pp@3PHc~ftZR9{(cDpH`mey>G{i<}(m40YP(N?Ke)q!bMH@G6^!PAKztr~^_bYyF z1eXqg%{-`S&ZNA$QzsQ38Rk5SZK*9%TB`U~k4j+c)`jN+NR8_b*~0FF4)i?S`U#V^L0r0Dy?WKQjG6Gxs4s?sKH7}U32LA) zEOsl?G&~+nG!`ynBW*@D;9h+m)c!P1#uQbaRQ>VUs}*_-xT%K+T^C`R1xC36Xy{*i zdo41%|Iu57JYmcg)oJdDKTrP&QbNy6b9d|By)L2^r#$zca*BpYJfRso9^E(9U+Aj_ z(RM;LH^$4W4}xHCjLLhU;?_PXx4=JpK6lT9AJlfC7ZWtauaDNx|6iu9nWAPhaI7GD z02wv_#pn0C4bZsW+WIgx+d{S81+U6;W!i3K?{A-j!>|O1B!rowUuh?u9$I9j0zg|m z$CfAe_iFN*hb=>{%@TD4-_k_K4lq_+;SP(>j<>x+3|9pC?7V+}yXtB=za{mTXF;(G zen7;zQ_Ld^td*DZP#)hzdAU4h=`ga16!4Ai4)vg>+MsK{Up}eeFFZ{!Rf$?d(Imi5 zREUxT2DpP=Fx@>|nD2e>a|5%FQ)AYX*cTwj36PRVwF*(P>?-a&L2fjyDoW`8u3VY! z(QibO?ms$SJ=V`GZ0U$cmz1!xuC1E6yK$qiKn|L&%uGzsUNn-4tGHZo{pN2%+Hez8 z2e9cy=8kcqQrliWi*|Lq(aSM6_Ug5c(!et!t1`4^hk4p}{a<(3@Zmw59y{4aV2&ip zAbmNQY#JyJfCz>oj_^}P0CCsPUTd=SdV1D{x?Bg(1Cb9-2q^W&jk!eJ!-6Ns%j+Rv zyKRdFStd}-CY_#r`j~7p-cvCkm{#ySIMmnX=3M@+J%pEl>4|X0z9;!T!--@0O-Be` z>g3^W=E2lrJ1Uj3o$J>IzZU*aNB96?004q=Mm7`B!HYxa3E2`=o9oPKg5rmAc~NS+ z9CSIMw?!~0D|X>cL}4ouyZJDhA8C4^7~S5W4ZQ#7<}6bNuvUvtLdc6 z5b>&(rltg1)OfvP!meGt_wBo4+qI{K`z{v4vOzR;J<$|2f&t{(rC$;v&jwHaZQ@QY97!NZpX~wYWzji>2>#7Y}foei@Wa96s zs|zhMP=qFO2LFAD%o*GT_bC9r=>7VVe<3exa;HQ>XVL)l9$H+=H`o^vQr6>{F`g+% zWz8>WS(0~u+tgHB+zZCu+QWi2nz{{it0O(1$>2cv{J~)Fe|0EtL;(-^%K1oKgP&p_ zIJs`Zossegm&!2M^X;FV16RmN_wRp@B5Xq9cFjWh_tdbJTsc80Q1JaHlHug-;6hUL z7xmxIhPaI2&c(hTwKjFL6wVL<&-FfgjS)uIeZKpiO|+x815WAyMsh^7=VAyyC*pHK zL^o)BfprNqQphT@UH7I3R2$`XmW+L3JAd9hQF2p#g;7kCa8X@cY#e3IThP8HmSs8u zu|SdDqN%3iU6R(`G^e*h%p|jHml+&egbAHFnkI{C>FI9aq&g!WQ)_Kb{&E_JsJYsW zu2F*#0S2#d1&a9!>UExCQW}a469ypyiP}eKaSsRxaEPs@rxCYGM3Nef>OXT&{g1A# z*Ma;Oh<}^G~e$8xa{A4RW;CM_pV*)v~!&( z>_y6WMww%84nx-%#etBwAsk@~JG7Np@jvKzMUt7sgRcfk<+?^#5XB?OlC+XxuiK zu9+7vCe_B(Sx%J8xBo{962S{e<$c~;9)Vxs#dULqA(hxcLoBO8%<4-96`?gzu`5X$y}G;SWWpC2<0JR+MwnfT|bcp>504swahOwxTK6VnqQ4~AE^nd8ghOWj$+aGG~Ojh}y|!iPn2 zyN}-kY$T#83ObrP1N`I;*oglJ2Gg-OKLAeHX>pe4+s~gxCdcdS&hzKf9yKHs`d(8T zJ$`(5nv}Z`u2Pbc=Yw~(@}-DH`9W{7WP9`ey@$wHpoLU3x0x7J*=cC&n!sF8_z)Wq^1Bh3l%)_xvS7T_-}CpKbRL|N0O9Ml<&z1>_lx z$IPJcM!&_j3eG!sYQsE!MIo(l@wk(dyxoW!{CL+&oGV`m1I?)nO+V5i9lIeU?I#dY zx5zJABOG?DSh0A{`D;I-2bO<2X}{-;ge|o}FAC7RZXG)PORZQvISB^GE zoat3(WKL0B0w9`8;+G^75{Pa=wNZwx_&!A#J2PuDm04A*Qab4$D=eEep0d0uxp>k z+BFHCaRvCeEW~q({Q&}e6S@=t#aQ1P{=R`?aQ-JI@l%~XtrFydpb+#|4(3}<$^I!y zv$*;Xm~4d6E5>H~TsB14K%wg?N&rH|uJ5&j1Zmr2z}PqZa!(rn^V>Q9;((j5kOM>q z%Q=p_T*pl?VA8C(w4a2yPbDSOi_JG^<~*6nw-mS#|79?t*W?r)K>xr8X8im1plv92 z*3y`)E06pF$7Ou>s7g!qsWY{~1(efy*8jfRciJ=~#EY3QibbWT~mqewMd3t)i8S7ilD1UX%a8PZlXB}Ibs`EQfRk3!VUO)sRk7z1-;_+k0 z4q_TDkinS|uWsz0i#CWRv{&&QWepD#`pA=5bBP@nrqS0kE!Zd!r?s8nE~McE#CQuE zjq;f3FE%@ss72(eE7k=oC#8R>Dc(&tMSXOuu#D9Yl%PKNrgBCa9XDC(wCM7EfmR87f{5 zJSNHsv@{49wbXvRgj!gh>0nhXuPY(M+Cta+X!6N|h8HOj+4VDz<|kQ(6x84UG$$wT zK?CZU>ZBc?byjgI!3q2E!HS2~K=^sY#Eiq{B!kUi77#Iwt^Fj* zXPh%)08jbs3Nz;*8!Jb1+#!tD`Q}-zB+RrxXsJG$q2X6Deb%g@Tw4z&E^Q}XAoxU3 z_I@mmFVV1+zgSPM_t*GiIQ&CG!uI?TvE?)J8rjnjxEirM86=5V$S5dsprw72O!AB5emLNkuY5B zc9V69^XtwuN^fZoO&MzH>S>gd0TLcu@qqtdA(pEByK|@UJqh1Axf)KN+*om8`?#EW zm~gUDzVyNE@s+QrHadefzPN1N&n>b(1yJw}1$6hGJr4pHbM9ltqzVo?t7uez*9$yl zTUWFVqC;a@ihww(S2)>?Gnn+*u8ufB7*aFrqrRFGBK+}W^42O|KjjWOWywcTclmiq zd9G!C-y3G+$wNLq%i(hoEkcJe_w!S+F+}nZmC_jv^~v=F00@dTF=yd z?d5Q51t$NGZaZ%NUZ>~I*e#4-lQ9+8tv6=mV3JcGogQIU;-6}Hf#ff2+4J{~vl`cy zaO8~3elYsm6r4tn2A^@*-1kj;w~98jMJBE6=o~0eI?OL(;#ThY)@?2n9;Z(K+v!Np zR?26%seB={c{9XUgJ~XmDk&YFGM2(8~ zv3C≷EhSfqp|nZue>pV*+HiZ(t3R^P<(IFpH`kmm#_j8JV|8p|w`6k`@|8S|w40 zfF9fw_XTw6Eq^ZiwsE2g1~3Of+0Jusgx-=`s4IFYA>0*uT{@Ejz_aScE&2BSj;81) z=}J5anc+^W@a7;`5-zo+O>9rGW5$?ABM1NxQ?RjE$VR@C;e-JQWUjsq}`nmoC$_(CF& zyB@-pvZD9vAhB~4oK)mUF(d*+7X1Exqp4{H6pHAIC=_zgu-en1TFGWt&3$ibUP56i zdUP6PbqEM-Mm%Xe?v#}cWwK!TzQrq93>#&SuEAX>ZQzo~#1v69q^Et=Z7UZt&4QFt z*OQ7{=se?^8z^=TQ@m}u5Grys0HZkSy@$_QHA71;=q^(u5&Z6K*!xFfJ$<0fIqhF9 zEf!4Y)O!vA#w3Y{@m$0J5z~NwuU(|l?y`iq?K^j>lJI-;V?u7Im?V5U604t&H;fOG zX@N(=Y_4b-6m!kOFMv~|SVuAu^`xkI0OEy&)h+Tt{QuGqO5A61aEHaRFZLi})4fr{ zM~<63_eT9QN6kWKJ91w^y4~uDW~trVTPxZxu{{Ju#2fN2JkAJ#{#RSJN=5lI_Kx+Q zz|R4jjnAqPHV0DOE(A;A2fhUHI}luJZd;RIoQ*4R05e*Kj^%6$Dy=36ilq;vb$n7f zQ1R3Hs?tvQPs!r<4I>c=8U3=kZIK}jk2$_@#BslzRCHUr?0k2(2U9$iM3VzZPYd@` zmQ6P`TRGC^k_p-Dp;51q#qvls-fWGp6MXiVW~wCV84hFEgLB?R33d-Pm#T>?%^c|U zXyoyVcfPvVszrcxN)A$=}91!YP~2nSuqP<^f>up#Z-Ef!S*=$Je=1v6o* z^95T27iJxpgSlv?8pTEnxl=D^fAgkka!_UQojb$z$e5WO0XUzQ;pj8hBXu&GYmR ze3KSB`?E>N!=dZKn*%ruFvh}M#l}A;^=+`Sipp~T1~t9_0iWo~s{8NUlenmUh_k>2=__nYMZg3a z5)~<+3e4Mi<+g!e+XMUL0{B9C$F-NfdY{qNcWsQwr92`rY{JtxM0=n(&JN500{sph zxXIaywAR;`HguvvI?9!cep&yD7O)%B3wn;GWo(4o-H*j68)yUT%`5y-dk86*=o&Q( zpH8CHzssuMc)hlt?7(1}WBd{yr4u`ww_;#{lfeOTYUxmDpD7MP3qS0lgfiXZnHIhD zY}`Zh)rf1vsI`P3Vwf7ATD;}!WYgK%W%!0M1lI@m>OKHcApgs*Ea_$7bbnBf^FJPp zOPn)@dnFXW6Vn1Y2+_b$FmK`U;1R6S-Uvv3S7gOlAbi9WXnByM2N99c*9DIH1NAzbQ~9vIvzT zWgUb%QZxyPiFUy|E{ydFDnF{%DE)FoeU;9V#uB0FWMh!Y_bE%3G-lXbuQ1Fh&MC_A zVXc(fi9OodorRrrOVX}a<{xZ|rzf7y^Z8CR+OTPp2d|E>O^W_|KKW7n7g*fLUvpe5 zEJGX;G(eigf})^#c09$8Y_A0>jVo43%&0*h2Wa;H!x_URijaBvRQk`KyS6gaNel)! zxyLzkH0)crJ#h^V*9!p#iqh$NlT9ckW)O7p&TYB3q24iVPrh^+Fypu`OMt z#QD*!ptUlL&PJv_OjvhgxR^Sk1NZg3KIwhyv+o%B0}9;a4FaY-p+a3x8^Al7Ouk{g zY1(1uNhMzde=j<68t@afpI)Ou$uvGwdSIQ(yjIJXpfSZ!d7ir}N`CMoGsdSVYG|`u zrY2+@Mqj@8_NwvsO<|%pGa(H zRqy<`Ol_>G`(nHwwQiMJ>%${7+UFr8Y_`R;?WZ#oog`O#f^)#8o$WPVYJqEAjpjhE z4M#if|JF@;z_5|$9q3??p_B&~9RX+a~lYi)u#U#x_tIg;VsvEP?O&&8Z!r6V0o*^Q& zh-&kn2{AB$W85e$Y zdASP&Sjak(v8tc7|Ibjc*1Jgm3O^TW`oD@g*Qa&BRMMMRi`S?QWz>Jfh$%Etr@+;g z1!c4_ZfrpeV)*kf8V_HA=xfuS!mXqyZt2RggM5x3XML?s?STIM$MEmvixoAq z_B1UqX{5&n{N%G4~+02#LKc+QPx8 z8dg9s1;9gA>31oC_KOX6!hD~!wjw?(;W>I>mw2L%!UbbFtwM;^N# zNR5EX*(hsEukF4pR!o#m1LAMw-t49d+FPswmdV~EE0T>^$y%CWA0eiiscSmBrJVbr zB8;iIdI6k#bUuqV@-ziMAf>ZsuFGauH8ke-G)kjI*L$j-m*=*PmJ7euUOqh<46D(# z>fJjzfkRr18#Cqzo^$_WkGsmFHqdZS?h6QzzuOcd>B@w`erdAhVUF*FTxC9%*s}!2 zRzFB%HaZdu>N+Z)K~e_ZyLpn;FgYn3^ZE<#d+Na>5DU!lY_f3)U*<9S@)KvBi;kNx zd)tihW8*HGtxG8>^Z1S?mPTxS>%!txArW2A9y_QW>&>fj>LE!fJ6jbpa^HTksoPmu|urh~`iXY#?rPM=?q7TE6$@vn$2UT&cKf?xuTG zlvS{6V4+`W7A4r~%mdbt_+mF5BjUZF$`Raz6K7>=SO1)2@7M3Iuz@X2QWPjc7H{O3 z^W*}6vEuYo3=ww`%hI>1=2cvIxqF!ypA~9PeopjS@AfWJZ|5{v>D}Uq-eD_)qv_Nc zpOa_q{V@?yK2NAK2tl<+=)*M!T~@mq?E#Y%z?~RfqrWBRS|VtXGS(N*x-}0RWR@CvS-nSmwxc> z9(rH?jlZ*ozm38C(9wNz8MK*rhd zOa?w5>%okLPXiZUq@82@#d%qEM*~4J&tSRh5S@HYL}yMjx{)+(8`#$JWQzqZEPNMz4k_p`7@U|t=K+T~j2?7&%_!IJ2J&t; z`X^PHP!k*19T(Ms5p3ADEh*v5^M9pr)DZ)@wrlS-ZK__Zz?M1kw^5(46rLR2_nI1w zG;*4;>mt&y=(~yS7TkxpJyzD{MSdqoJZ?_h0oP~HM!0JeUp9Wst#Yk9v!+p|bD{4rNBtRRw9OnJd1vJL)nTBrb6v$l zJ-c)UjKqMPTJeg;HYRsE33fki;ihTZ8o#I2i474l;7K zuZvG(t_o<51?jQTs!J5Nt++?b9}R6wL44f!@`*@!MIX%ZJ0q5efB3j5y>0)6|1azO zG-lZd37w)+n{G}|J~sC<*cSP%^S`t*+uIuWB%=k-PU;YX4<4ORr--`xOH^3ukAU*Q zyUiZ3qvR;|`5_vM{xpXYqKuw~OmM{`=WB(rmkIY$ZG zQ8Uc_pHq=A!M6+mXajHx1nB~PcM;)YkLId4R33ZY`fR0Q;@f3AO-ep_pGipy%Iq=k zs%w5_sLKer3)+QA{!4uFR(~|w=6AC5Tf6DH@;(~)d=StS+|6T}GxIl>RW+y>t+V@$ zj-~j-oS>tv*-DU=CeAxz)EqbNX;AJz{i=6E>mEejarJRbEUgEBte>qnz+l_?qRw9l z3?N5wO3}i(g%4{%6Uhn+{
J`Z8y*q*5kaxtx}@AyfC0K6Peg;Dtv)PO}c@p zvO_GpIlA(Q4g>@Y5|gcT*OlB8AkkB{P9Dq<-a9qGQC}ej355egdY$+wO?ur0Z6PZ6 z6D4Pyb|$frh$iN(MY9LYDC}lrlB}1))?5%OCV2#Epx#T1m*K?PQDFc3HmB8#?08;) zYm}KxCm)4g&TbBgVUv@N+A|pW&UoI$rk>t1*9V+-5$xns0k0&O4v_U4TU?eTqvLcr zJe$%Y=-8P-N=8eYPZ!*s0}#4Ych0&?KODBN=vS-Pq)@oK*Lnt71c@u~YmgFQ4xs$g zj@R>F{R@56F(G{fiet{)4H{f|&HPQi;Z!lgorJ}-UECs@%y$uX z6_9Nr{C=sp;lseG1%-ixa)n&@GRNX17PGBK_Vb44#6Z_Lv;_y4_zK5}>+6pnE8hR= z-K*Cf0*#1OV3L9&qMb9T{i;;Lcz>};Syi$hDp^g!iShgfk&3|Fcp>Y!TOrV{>t4Kk zIWMB%Jc&JSNn_B_UJ`%|#(jSL!K#C0U>!MsCVeLN8S}`y5`7Fj4Q-^=$At>!&sJ7= zD@j$)Iy=q{#97P+2njcFfBX(*+TH<(4zyv0Oe}p8;gzB6IAQg#H?%O42yJi^8dUVu z{ZS|h&E5gP=y3Yrzy1NyVnQUGcNRxaSkeH4J^wx|wZ{VJO~!r;xW!^-B=b|#*SGV9 zw(@Vv>+pk=7!HiWJE0!Bwb%SyUd>v9QNW)F@&P5$9NlDiCCQDvLHM}sO<@awFw*S0^tM2mee)l8e z7zzQn9>r3m#s|A$D=Co*Gad@(aUs0qPG~tgI$jODV_4*rG%3+7{K1-6z870M?I#dg z*uTulJ?HCt@cj9aOph#1S5#2AMV%(BRlKZzyu=9S*PMh*90YdA&RrC?eEp{v$-B6@ z@|FEo!s9_EH#lwS^l8Z8plx=BODZ#Tqxv(EjiSI~V|Wg*8?HWz<=gD64&_|FvU-Tm zE7M<23m-nVwai)dp-^YkW(kXig69`PX?cexuX3=nOWf5CrxF_-?STk=srLSVhW%)2 zilq9WI7(RcB5gLiBH$0TNEq@#dJEA8O4WxlbI{>@Lc&DD2>})luaxgOPWiTE#*7&o z8F)1~Z{n&kcW!80T-@Et0>>nMp9p(B^XixC+jj5Xefe|zJ{L~0@H>i2N{W8>&Z40K zN;rkZcE^9m8Bge_35Ajh;yhve`16!n$6IZJFJ_H&*pV8reTiK{-I_D$$t~)|xseYt z5CKK`8X6fLz3_H>sy+KGLB|_^{D^L9+8K=VkIBlF10)fu&}pr9rT2^P#M;R8?!Z^Pr&MBt`3Z9x)Bfm|>IAic`4^wz9U~ zQkA%T{CJuLT0R@n;Jb!8xs@z=Qx5V32A)rR3MW|{Z#5_~N#eilLz*Z%$3^`*ov5s$ z+V}w7yap_TJRElHP+*>^@PNUDfOl~dm4ozV{U8(hVhv%kVnu%`n=0lb76Xe(zS151 ztD~do(|4Lm|)gPIc4fvL^fx)P4;3qMD{Azi5d8$}7WFZOr zx!?wtAiBD`;b4-I26uX@p{!L1ICZL!{g2~SRQw>i7t<~#CixboUM^KX46$SYy)bO< zzBMj()Tx>j5vi0~a&pJKyrL){{fPVQ$1E7U&P59w8Fht$J?$5q`|k(3vkGSn#A;rEufmfA(9!p9Vr=XISY5G}NoH2k;}R@p z=qZ3%!&VK2;bc2AAG z%g?um|8;W80brk^5F7S++KQ`qD9|w6%~Qn@$s&0>Fa@uv&Q zhdf^+3Z_EaBwzqUZA_0`T&Qp~hk44>b6a-ro&hB|*;GEFMxq{`^fpYRc!>6|kOG85 z!1DIOa1M#m*k$JulumtQBJCautw}YhVpG%zh z?6_YHth<2XtRJ25VtxHVSK5$GrUfg0{V<^`aMeHcSO0z65p9Ab2%2E4 zw#~Br*OUDtyLUv7dbDKi-af)D5N}9BLqljZH)<=LUp}PsTfpWK6ZQ92zkfe(a$x0I z=W;FFUSI59dEs+^3=DfJGpl;Yr!ZHoY;#&RU z=g)5=n*IHbIzxzh&X})OWo#N<-u=gqb13ibwGP?;QMI{46GMAqJ&doTGre=Kb?erN zi9D`P_Z~eMr&bg~$5OUCckZlpb8r3i!_>H=H8fURwnOIIrd`IX#SLSL`LV`AXv4p` z#kiBf+O^({1|2=y->xc{9kk;@UHT(N?jjKx9sTan^oqNT0ogNX>mL|sNh$|wE!dO9 zc#t~`fSbo4B?{m}jD3p6zDt)be~CR-rW}p`GLm0K^5Dof$gF%jyx;%*5O$kuT`2$F zeXgywh3P*2OSmFYL~DcWq@CLC>N;Dz7&ytX=Hh41#AxL#6$p<(f7^egAi^UE<3{ z#db8LmBNl56)QaFdytEX{8RRhI*;t*w@nUtd{gI@o4=t(Jp%rh_~rKAc@Y z{HB1-!obLA(eE;eEtVHtpy2knQ%R`7LEr7O!&cA&3hhY;}U6NQf=hqn4ycw97 zL?!E2h9Ka2`o(34(FbJPV<12~ZT`7hS6|=GDd<}3Ba-z7ZfSQ4d7%M-BBjF(qpB9n z@SI;&ew(c4*^Vx0Y1zZfqa|ip7JJ1i89|)m*VBcg*P!Q6DR*W6pevbC@62{^LI9^M zfMO*6Tzj^5#R?BVf#Y`u$=(PIoV3~_CUl>f!@8oN=)~$i5;xlaXduKn1m87&V9$m> zad8t6&~E_1kcd1R%D!YVLFQW$nP{IDwB0)0zA|G{Ky!f;f$vXt>YN_hWBm+(Sftu! zd`rv7IL-Skf^HueT2i>)0&Q&1UcLN2U7U3JerK(b?3-mJL4Lj*MIx0?IIU{_qeoFF zFAE{bbtVUHdFbD#XU}k!U#6s|j}4z|eX*_h(~}%gs^y7%2c20sd! z;fF1a?g*E!9YAuuYinya^R%?boK2kImv{FSO^K~@wwH{oY=4PbhFZwe6DCg{OETH> zqtrSN7G}$@o(DCdNyqr*Z3e!6#_DeIo#F67;H#UO5ZI-gJ$ASe5 z3b=j>BS*SXV$P>P5uc`EyZ+hSb@%LM>C~bb2&3>_ZetT$^|PUUfaf#L4;F|XC@Gdv zyfKHUEph+$?SpV1cXiagDjwi-Gzz!^q;_P0u6&Sjl>;o;~xY1Z;zl;7e{?Zf>qGD!W7TMgQ-sf)c_4`hS$gx@f_sCs+r?^}dn$e}v$u-Xq{;W= z_f$|J3Evv^g%_h#M)vdF!aOcv8$WV$5sdj)zBAmb%schFggOxZNd@4p|{Pm&AlaZrG&!6?6uBvLxTG^HH7cOYhQcm6UP&#ifq+4&swmu@vMJ47l z?%b|+;SlTT2olLVqz-v5V8pKOB{vq+e-!nPzbR zx?zUhXEG8?M(@8pCNeU0m#OiqXCqe7YM+J}dHhb$RR>4jEH58EWZG7Vr%!ciN-kD( z>hofoe$iud0{jF;MUcYDLH~u@(f+!R?9S6YQ1u5i*AjGS=J8hy5q?;Y4KC8cP8l~! zPuPVS#!3zwHpzRs@fqgQ86eoXZ$^U6TKYzYPBpfbWfXjw^3J(@cQ>>d8!h;76bO;Fv5pz(qNQ6$!$``(VeoD??*5o~P z2cMPWJCeA8RquiF1J&?zXo<}pGy@hO^6kKX9i!Ny8ujS3Ejd%Tu=O1=r_7>^ z$|5mc$?{XZ{)b^V(N^4HLYgL$8O4Frb5AiIPX2GvBB6_BeC@o_G=nq8jvf129k@<6 z?nlY9XY0x8_a8odW?B}_x50tWoo@1M4o;;gAiFV`V!(O@@P}I!73-9hv;v79;7ZR& zA&fe67^uy@V;+!*_`FHO61Ma_)R73FZvU?HxjnTCtgX26v+RA&`B>u#&x(sj(GHMx z_FAzT#qX8(sfXK+BER*2gP{56Lx-HGbq>XU3EB>CnB8}Pof2-qb{16HW_Fh6Hx#1n znlGWcQyaoV3UwKg^DS+Okk!%2?p|k?@Mo^&r-JZreb?*i9{agvaDf0%)?cFD{}IjtN9bN#gas&58qivNmxb@BmWJekdnY zA9t?})*I{RuUilxDxa*Za7y)EM0Mf!y#LR)(3w8HRIhm;zt4f<-2mm4)ag~>j$#tm z7(a^%6DGXDVE&KY1&t}t8;da96OR6=dxla``^sng>qc#K&{<*+%Pc>4A=c*q!8?03 zFIuAFL8C8k<@{3i%K7t}$~Co%KLUida+r%`^Z9&wx zzHb;gp>!5u@!m*~7}F6;{XqlAz@dFvwUg71fGU=#(O$>UqH@0Rm_$hGBW&o)<1 zk01T`9|{5tF%3Rg&nyc6?@_U$asR~eNh2KE~6RE5u6>JrvyC??IRerZ<5vlC@Sf~Rah>kyB-R;c-jHu+nJKtIR%nSv5Vsz_>-^|q4IpmB>moB-D?C#r8Y-MEy zz&Ws-exUVnkkXIMg})}eJ~(nYz|<-a&B=jR;;-eq-*t3y8XNAB(r?(HD=OU&YNANj z)*kjUN8fQpnYVY3H(^^X36!ENNnDkDP?&1HL^n-ae=KSqKQ7xfbX4JT;x1f}Tyh+A z*O%5hW7EI?^EJdj!8dZ574aYusWqtbOOswO@mhK z2zXG%sO8WIsx@gDQ z*PX09ZI)H0s4_VwJ$1cXmEKP{Xfe7^p=lp*lzd=83Ej`QFgi0MvCpDa2o8as1n5EO zTHYLaDLrR4F^sX9<#*^YR77^wZbtFo>JTX!q$s0F=!&bv2K<0Irh=5CyJgYW`~0en zR#u~!;9_FZEK{}AY|VEebaQui@5I34l;Ct-{qOSXt1+{%-+tixy_Sk&A5<1g)l}Zs{F=uuz6U_ zb^8MzQJt76TAgQlXOUxXRm)F*x=UQFx$`2TtDE}c1g`18;lr0gA%)|vd;n0OCMs~P z@2vKHr{->5Rt99nVk9b}8pI9bRA4h8LJ|1VOclcRgFFK(vT9W+6-GDg6gs#NS6;)TFGQ0x&ZjE)j zJo#PRw=RDszDuY(t-qt#Df9f}J$5pdK7G$V>A1<$v2W&@rQ~U0TXL zG0B2Z6@xU_O&6jv5Cgp1K_`*9rhJrr@vr_;c^j2|^HQ6WOP;SulewIhk+tvdE4n|P zBaw4@8>z8?=_nl1kI;w2QkJru)OfXSDQG%4zDotm6j$lrB=?fc>BS zpX9z@5#0KY><;W$P%r4{tePm>2CrfLo|IZU*9?wZwYOrqIvIzlkm;1RZJwKY!>ArMPFOD)P zd=cZURi8iWGfh3#{Q8~%-6czo9{C6|tBQ1}w*0z=shOE*ny504zKWj`xbN$&ZX366 zU&dAmRlxYT`1k>#Muc||&8jzVGEast!RyEJ~SIcGM zs8I(*Fyi)9ef+qXb}|Avd)&o~F@ynMAf(b4FPXN++Njd)j*jB16j1in0QhZW z@_6^GNt3+J?foK*ie>KQ=FZ(bfUoJao;Xql8!{^J0kw*N5hbM{$4;I-+ehN>JjItU zeQ5)?@iX-zb45l*9%&V`2Gk@!LjW_ceZAdF!VPh_+mRE&!7o24MaIOqANiMA(1vUP zZnq<1uwLROYWwxfmZ4s3m_d{3hQ6GjajzB02|o8PEV5}TJs5J|wHM%Nq#Sm53nR>d zP=vm0XWjEYfW=$wL;0-7wPJqEk+Dp0$28khpyArupkt}~+V`vbyWaKieNg?*4k=##lPBZk!s&`nteG4G%96fkC}RRAwl32)HZ+_E z!pyujUDk}gU=92my74j10sj6+uJju*M-N4^@$%)Gd{%C~;E*`Q^zJM@7AwV%COUzW zyrg(_MPV^(^l7Epx4H4B*V1Un!v-|`;3vtW-KKnxjf*=5nxse9Qu5+40_NX0NUBvz zT9SA31_BM*&GgP`mF#@r$YSMD5t-77_a;r45RUR-Pj$e`%_wj-+Suq&8wju1lk>tP zHQe|&7T0n}Zuj%^kFmsBmzR@GmbUa6J6v4aB?m{I;K0Di79iF8$W%DzVm^g{?|xFC z-OJFxz`@l1`S^&N+bh_OUr~CTw>8RltZyxx;cM-T4rJM=89)xDg=YmXBcUZ!otD}=g+$_ z{R1s#xM>AkD|?RG*bdN5_MHltr6*1&=L2Lvn-palX!WhJFBq*T7N;a(%=o&ybAT3|Q)-3!5m98V7_@LM)V2jD#n(7q9}5Xjkx5%#Oi zmPA1t8yo-7&}Bx^dSRex!-h_>{Ip?|gr@a~&LXT!*QvX7J;9%rdQZfs`@iY`{2mc^ YbL_Kw=QP?R_@B;#rP@jJ)*t+T0AGifWB>pF literal 0 HcmV?d00001 diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/baseline-slp-diospyros-speedup.png b/src/dios-egraphs/Diospyros/data-plots/plots/baseline-slp-diospyros-speedup.png new file mode 100644 index 0000000000000000000000000000000000000000..b3abc1541c173312ae180b0e35cb239f54c50235 GIT binary patch literal 48867 zcmd432{@K*+ctWkXiySmD3p1gD@iC48q8zLToff4N~TgGMR6mQlrbVhNE1rRPzp&% z=E_ip%*5JH&-)Gk`v0|kYpws=*0$bld*9yYiTl2;^E}Su*!TU|k26$%|LzsbxR+5B zwL(W*(~zR*11XBGeF-D}&8FI~-S}gx$1ZaZqZ5ZcylvebseQH{ZZ0Q0T%7GTcsaVC zaz1ffL3)$4tmFnK4-dCf$}%#p|NaBgC)|(7sQDU;<3pCZXC4tU%LBhY0eZ? ztgoZF!}#>=fmUy0_Qn>bp~3UItUGc77qjLjie5f%v%~JC^!VDdp^J~%ZOqjYFt8sK z8s|J4mizei`8U#^&2t!Y>Wx>O{-$=?+n;%#@#UPh4_j}S)zrLu;qB!$9=GkWlcuJ+ zCjO(WA@fLK;s0@k(QRO5WxXD5OKIaTlI=OE)%fRLPR7869}NEA{cFc1SG9tcvI#rr zHotuN(y2am#Yj&z6aHP(qa#OjRrA7rw?0ccJ=~e$7&kR_XZzW;$B!Sch~KtFk$Z*o z!KXB_sfwD&>38+I!=2@|FV0SQG;Q%~GR?AFj8B-E80^=4YR%0UXc1CUqSl=6%&>0V zI!Gv}Gx(L!pS28L zUS6__iWj1z*Botpcy+kz?Ssz`jM7$$8{faySuDKw#@*@6t5>huo;VR9Xa8n{qN3vV zu9lT$~!tD=|7 z9D1%@pP#lj?mW*nujF1;1zppcvsx9`o4D?N+UnWIuA-u%pJ^^P+FQ?LYHE6V^e5~7 z1l6lsPkp!fcy~u!e7Vf$>jJw!Chl79JJISL7awih|0P#|d&_ZVYRS^2FR|3JP9Ha@ zoF1a;)AT-ce-#=Z|8=0d;`(rG)HCnF&X++<=7la?J`=y=fA_XNImOhFVG?xxI;+q4 zKxk)qn1HP8Dh_F@(og9|awj`q>opH1?h3Q9vpYXD-i`Vy`4$5 z$tT6?%xV*Nb@cZyr$RzPO5?YA=eV|P&OGpV<*(*~vZtQ?*A$NIcl>+c@$nsc5uU7K zyOre(YCt`Lb#X`|{7qY<6uft*(~XH?f>L_|#*^ zrIkE3Ha6G3&oD-QIO~&k%S4uX$L~MgD{><)u4AC-QN^t-*?lk4^b!v5Pt6O#pTf$A zbwWcz)PH|{Qm>|wuAid2#`o`7V^u#L17m4<`QrY@?0J3$bq$TOy*g%Rf2V!_voDKQ z6J+t|@J1Dt^|8_ib&cyMx3-nxIW`2$%~FB>f2Z$x_J96T=(-NOvBJ!(QSaM{!?Fqr zM)A+6p+6tg?1$Q4nisikyv!jJo^F&TKIk~c*YWeG)9W{H_{|<#^~PxZ`TqX8h#qey z0=SZL`%x2*1yjc#^-?U(Ix{@=jg6Ozi~WPs5-fWColmz!@A~@UEJsgutlr3_hc++I zXEbtOW&HN-+vKmuZThKuxo{p{RaPzynE&gY=$fE%<~(-(RbAa`Mn*tYqtO!Mnb zFDfduukfky3hjxxaPAykW>%J*d$+cxmX@&Pi7z>}qWX8Kk?zVML}?d-J#;+Q>K6Ym zfm}T)J>O<8UjFlom#-C#NbM|{XtWNnEe)g-HP6{_d9752%QLr*oirJ1Uh-vDMY4=5 z*M~G_TR#}j_IvH!Q?;h(*)tOM_}bSoQU@q1;O`iRm6er^gTn=R$Ex;^_cX-5ZxO=# z#Kgpe9&Pga{WVm0-_5&j9j|0%Wdq~mdGJrPp9aIfS{~Ov^*m;7Ve#_g$5p$+*6u(*!9yT3g@V|(~;Fz58gci=zJa7h9dV70Q>c|84YMu1c_Ve`o71$kq7s-d@r;+dP!`4b!w!gW$F2m31 z&zZl|o*sRlmVW>K{XU{2&-oiR1rM@sMDEn9!Hyr3eGjrxM(F4&AF{1kbMa)?o0r&> z!-o%(&pO(WaWQFkM8@a(%@UJ6vDP^ry@p?!nqHQcQaE*G$dqiqyx)fN+}FQH&%9!V z!M&Y}aW4Kos!qzzj(BqF$E$DOc>nzPU>5JKQ$Y9Jdyu!e&~-Wf!?|u1wZ+7aC1b^_?ee$=D}xcNkYR8MLj>o zD{Dh7V;2oX)NJJEr&s^F%c=8q=zX(n-sy?K2;@v#-uV=e{ z{W_h#da2_1PBWp5QZozTflu)m((A z5oyRPIM%qbu&{7wY^?m^a*iC|DR*{BbA1zr(ez&bKkv6JY!6Bx%2ASK!O@)j{6*Ay zWsfCSxi{?$T`eSQ`)bj1znQhjnHu<4NDLP>xA3{P6z)nlOp$f0Vh_>I>&eZ{eTgt7 zt1P6whsAsF+u}fWz8@E{SEwXEQ5C00dxLUvWM`-Qvxi1UU#rc}GVI>HyI~q(vfJF8 zvv}(JIv|C;w>GcHvMhK77(zQWU3!kesrBimfnUE+)xtPr4&B8{X532BOTnQG=hgkbr+KH9Rm&!a97(b6jp&=(3a|rIwL}_X_HWC|zr` zgqhp-cU;IPIt3rFRQIq{WPy6?)8xE=TRry~TXyVoCf%wD?Hs)X)zF98IF;ok^K)T9 z8~4W_yZ7imKRdA!+jkMk9BI$cuV$>jk>x<4%elvox8R8Zrk3AUI!V9f_*eC4ag)gH z{wTxECBJu6XdUF;kBcV;5V{%yFdy!+HajG-jQop?$-B7 zY;tmPgRHE6!_tRl*@w&vj;`U}bo2s3TR>Tv`)vf@TRg8p!BGNN2|5;~k*w}ml+kG)oXir8=yjlS7 zqazxH39eUxTb8qLx?x{aNGJb9I__+aD8`*yyE33sbudJWFy_KkmCjC-} zo|{t@l~E$QG`s6k_Ry;{GIQRw%*IKhSAP@1mo3@4R7KOPAN`yuR$E(JH3DFw`_Sr1@{S~bAM;>7gkT?%bznYS|r-DZ?dr@ARzSWRgFQvOWb}*`LUsY|=?kLGyNl8iMm>zn5`kv%7->E#Tf==VZgKRyI5HMn? zNuFbUCUWxftEtYPKSecynXd!F5`K~B+1lEg;yX1W_FM1PT-Jfd-6(FWSFPHxCq|N$ zfq@}6KYy=#i6vHHH5XU9?OM);?6? zpC!4?tv!&^9+{cWb^KQB_l&v*C@fm-;^LBw3|ZBW46ux97!#J1-0R}%nt~%$HIau3 zovvDpkTR3vleGz?u2sj%goRY#+?Rj+sIPUEJH_$@09)iX@8e5ZSU9fF^XxG)GCDIo zntrtP)16a@gwTh((^+|Vq%JO7v)atetik=4=jr*mzx@32&UJ#7@!KBH&J31hjA=#i zN=;8sj|_Eau(D!_L#S&=X(GB3bDqN9zrSXD*j-&+y~n+$N(kx6Buhho>&wU=ntj&5z`ziAU<&xcxH_u(>?;qoo~I>; z-*Y=!I5;E&5$PcAq9otOUXBQS6f!;5U-sbx3#m;&jB>8cil=>i2wnP8aLg=m?BKzJ z9o^l@&!tT>IW6;@&LIN`2n#dvpT|xm0@kczRSB50n))+3s;R9VGNNsVyen$y+LjZCP%Za+GG+dqN35n-nY9)Bog8Z4v0ml$D(qu9(r&Ip zY6lNQNys^GBEx-Q)|tr#CVzLAkUPu0R0*zKQQsD9p>- ziNdgYyXoF6l)>~vjTt6{XX*PCrS5)u?%cT|kKWZ!f&2iT9M6u3~TS>OcalEk%ubThuZu@}?K-c$u zvVJM1ZRdFR?j2!%vvV_px+~oF?q%M5{Ob}KnQTqho%n{U^oy5?W~+U_U5($axxF=L z9zPc}6|QG(%?HpBn2?arSKEVFv2}J11;C;YdA6`p0Jj-~#z01^kd+kUo1S_~D01&h z{MJvYei;@;pfIgm9~c|Up-U6JCn;lmf0eRl-vu_2{epevcZMuyHpYiS+xGLir+Jf;J$mcRpLt1<=$9~iXj4TKtGF(} zgc>_j`AY?XbQy2h`6buB=+uvu4-dV_c-o;;gCcK`YqtxmkRLUkFuS?gKOZz*x1D1N zSliqc^T}IEs<@0=#ml>r65AYf8b>cLA%5JC;@otU9Y?i&VBm^jimq*Y={cn{BMy-> zd(DNnZQF(dkWQce@ZhsfphPAAS*5_DyBn00I00gH@=HD)+LAvN9#FFn2ntZ)VnM+s zY_FW{s|&_VE1UQ8HhYCf@Tt8)9j!$s5Go>zG5)J1_UDXo_Mz?6{If%Cpi{tyri(Mv z$FRlEamt%U0N;g=md(hu$x)mNhnb1q0GuN-y|3xT*)724D>XDU%+1a3s-L4b&v!D2 z{NZ;rx!kku>sOVA#tE6lE!5#1PuzQU0*2`0Jkq2Oa&Pw;Ed?8jdLFeWyijfkboT1; z)~7;G_9GWWd-AMbFAI($*tmytAX&@Z`v%wcnuD|3D)p{^<#Bbd7P>g@$Y=QY_`!Dx zYJA|^Th^~%KQujdjIdmUfZE(-lDu>)-a_{1CsC82K`w>Ech64z{_^CM$jbFffv5)( zzwUuHUq+n<+#CR2u6%y%*m2d*m%=rCQf6{hdsIGx{evRmliU0Hma(v~%#OF5sABo` z_C6{_-EyUZ&shg9K~gKLu3j#nb%k)Ud)e7606QrwY+ua^_nfM!j3CvS(*9J_Z$y|0 z0_7EcBlkp`s!QwBHOo1qUnBlhmDcMSHuCA|>6u$v+JUR!3pV4!nIt402&#Sf@Z@+t zkUN&{6*lT*dnw)C1XUh@o$^B07S%8Q@4#+>p2`PH=EL#dL+QC!+Ge(Z<6(o|B1)sa zUK)S1J<0E#TuZJ!Q|kWs%Q{iQLfntXfQ_8KJ>N=nHLO&k{=JR$yyCXcrj$2+xRwK)(CRY_qgXGS&%3x5w%Glxi5cqiR-YaoJYXU7~j47`hd8neW`CocsG5J_E)9b0Epn!R=mCW@rj80_eJxMHY_6oo3F30u?AafvEKm` z2G1WgJb1wjWrp7V{v>P!!Q*6IiFi1j+w=C$DL|B0fpiSAHl>t`-!!{PhVe_FK3c}g zMFjmLPn?7#18m8QEJO|kZQ|F!020BSjQ(lBOQ@mM?8oS$o_^;*nZs&V#+d2p4MvGG z1{U@EAc{~AFJiA%YiF>&7w_HMI6l~x&ArnJSu4HjtYL9?zrRnXK)Yn;&)OuNZwH-P zVgxgvj{o^%RB(x?mqguRT){1BzoFoGD>pvEVVxrLFEaODifGFXb)3sX{?WWA3WYFp&T#wg<}&tx;gr z`O~yGAG^8p;GRIEH4gd$6l-}>NH||7E&wjYQ6)HlB zUbFRV*V=i5W}I!dO>$G_sDOk59{*Z&4*v>SL9xkm-In850_LW6n?U7QB6;vBJ3`5< zSwL823*g}5vuDqSXeWMf!a>exY;KBL&1i+3`V!AD^nOd*28h75r-LhXer@GoV~hGJ z=SW1DBDW4rjiW-l543>bDOo-B5Uc$XfWjA)kidsMNpNa`2q)2a9^tN`8JQ_{HDG^F zP5idn_fLz9Rk;eK#fkMf|#6-duUA`8vF1*|?XOsK-ba(FD z$*?t|?iUY$a4UrKZePa1!9nEg@$vD~QzJssRz-mtmsWNFsX|b>d~37o&JzE>Zlga9 zKA3Qr=Ehd7sx>e9r z*(DZx;A)~U1u?FmC^Ri(w`|#sLm+#4$Oh{I&b!%QgC`^h(CH*#7tw=H6%&(^a=m_8 z_6SoU%PYX8nfdtmzLo^=9XfQVB1(h>2%q4h(O)fF4jwkt$@Tyb3M@P!MaKYjyF(c;v9 ze|r(Ol1JI5E?GzHxSG$X69JV(fP-Ga44j;K=mj@E^~+Pw2X)#dpsYCn+0bHafE^Lq zHb{L=_}#?K!T2eX+OfP#ps+dK&$wVeZ}QNJYmK05sQ9qOZ41KcqJ z*_{s!U5anN09a-B<_cH3?%U6wNsEXSO-fKXLhB0WIpl|X&h@E3zrEOY`}XZstgLha z0Rb*ed5eM5cjN0jIy%k)y4yhHF9NV2+lvajUd?|!FE4MEzQX?f`;|O?t~y+EJM`7X z-A5&8G@^r+ZcN%KG=VTiLER-N5xUFE4(=4c*ZKP`!}9}Q})+mz>6{`$2h z;e8N8X;?iSH@BRl2WRJVL z4w%JAAIz|n5f|4*!WYMJ;Q*DE+p>iNvS*yq$wZt6(f${HGp5)wNl0E%U!R=X^XTyV zZjkxV;H9jg>kcI%w?#^rNsAe!UO|vcH`&@49=Mb2^f*Fq8*?dwD(dTDETv(gi$!Ie zLK-lEE?9040C|RIj2RBAXGG7+TkcX!?|CVdVa!MhXIVOrB$P z6_{TSj%1ZGp91qc#Dj6a(3Sy>qjmdq-OEZtLZAw2qY!V@($ca!dQ==IA__GA!Dk)m zgvzaFMv{rpf(9K?Zz9oa5;M)b;W7B_;HLG#74yMi+I`sodI>atUyw!g9@z1RK`|S; z0Q*O$oIa#O$`WbbZP-=~Caeq4_`0-I9f!yf`fBCM7MlJm`CB(`XoFjJ_h*;HFfgqY ze6sJAt&OH8j*oQTtySN~Tc^!Zrp+5WB97S9hAR%2S zDm+m#H!T4njBD1Q@a<;wsgvvZ`T28>HA=7F?!ykDJ+wK2)p0j76&j+QwjE?JUfFXe zx*x+(Rfy^u7cZ3PvG9xNga^Cg$3j9pm>JaZA^bdLn*$Rz0H@CXo!|qWi|R{1jK2=4 zyBxAXQj(bjFQfK6Co`q?5}0$M^3QtGE>DSu_IuB%Hj(cI4@0Tm-T1?RIJ*-R+cx4a zYkw?Ru>zFGGcldTD2%RM83T}Fc=TXV4BL>n)@l!&f*nK;v^y07syMg{Xw%)oXx3JP9`R}TsK z8MX=%ZD1e;RC?IURP<{1`&%4nRO@(+PJT5PtZt;<0I~$35(}WGf&8iSuqo*dWLUnMCeWm1j*VP$8h4a^A$2(Ut2e7*DS+xWnV%D`soUFM66 zi%ExfPH8#z(N6~;o}!Q=s=9CMTh=lDftx_@kKd6nN)`=XQHwl5RH=4UUfpPMTc|HO z505!Ik{&@r9{vZ4ozSxY00WY=1E5^!*3Xh>TwJ4<@FMxgk2op%(hGL>_9hW7U8g)f z%duW2)hJXqTyE{4(1p_vMyHz(QY)>&af=_CB z-9E<*zjyIkcnyBzA&C0W0WfulcsDk?a^v%Ed~VtkjU;>H)~zGD&0J#DOo2G|$1V@@ zZS#_>%>$%K1gh$#A$a(8vru#GKr^ULtv>RNH#fUp#$z1T^*8KeyB6;-wYyZ4mizUG zrqI-G%~3J5@@yO(7fVP;C@Cx3!O%hI2g)(yg2(|-xB|(ICTc(rG23M9@(Zv?RHkdt zV{hENNt)@TN$^eWWalEF&c%o)^fO6cA9+H1v)V8pT^m3hy4ENTL6q84cT`E=T>*Y zz@oxAA|~L0=qQvL!_O0Fpx~)UFSq9f=|H!(@}M1l{@y*?jFCzF>OIhTDiHn8M2P)F ze}Y7O>#(zf;%DM7GDt<~C?#j9w@wK1lN!2inrP8r4H>9Xya%e!3ewF-yV3s56@vnn z_0Y`VMY)QE(*XtW4PHHZLfX3c3NSE!N^?PzhQ106Z5aP{lE*N=KF>MLdCvnydG3YG zUZse~)Iy8N0K%;C1Je~y`gdz;E`slX5opxo_gAi~>$bc>Z9X~NxodW)j3wmqwDq$t zSeM$-OCsP7`Vu|p7+UW z)zJ)MUBCVcBu>Io2&+O6ohAMb|H%%fPmtaD=^-2DLv(sSlECVm%yc!xVL-N{i8P;~ zUW3%XI&)6vhv1g)ZPps@j&A!ywEMB?5M~0ivVL*2^wU%i=_d65c*F0zXfK}gzstE+ zvlyR7+b6We)81!_CblsF9c%R6J3gq*Yog;1JVws)m33QIZaVr&9S4P+0cIX$#o9v9 zjZ0ZsStHG_UeSY7+)(xu7dLx2yLDIP_`lXN6~`TYZ+=pqiDdQ6>(@?zlOZHn9kTo@FVrX++?Rp#0g%s}Q?af4?n^@Y^G?g?Sh29uB zE>%^_N@hov6Fo}dNKI*dy~6gqc&Qx5YsgIU7eS>+Wwd|063(8~O4X}Zu27VL0oTmr zwg>;%|Kj-3+$SlBJRmA!A|7HAbPvK%v|e)?@U$GdS~qUo@b!>NeE6^5Aulxv-G=}$ z;*LP!)&O(ML2fi2ECUNi!vf@(G@Nozc4>h+q*|vwqFXQ+($b0915K7IJlocS*6y76 z-74olJ868P1V3t3?8}vX=*26N=O9UXymK8drO3M!i$WqG*>-ID^XJbvcN)M0^)g0H z%xCFPEDci8-CGP3Oc;`SqFwoA!mbh7mw>A8q2*WQgG@_Gxp7PLmoE-(#~*S0^E6^a z;O^`^55=PcnE;Kv?Z6zhX?lFV0DDzFWO4ncdN>*iQTuk`$SEjSH!+g@lIYTBbohIP zHjGZ_)L+cbK9ScCYOhxA7oyGmLw8HgivK>FrL`i=kC62oyf){p*}oWBjJEH|=#{FE zqcxe6Md?4#zzPR3W1!r-E9k(`fg@S*8HeCDXEC8TVDKu_I1SH+F0#d2sTd(tDAA*X6r_KP0wt zVd{g_&4C^Y{k2Kf95^|Pr~4#0xwtk0xspy7gs13<2%c?>M~)onu8Cg<2)_#g!#~C1 z!cw;OpFcyuo=cmXxuHrDC)3WIJ7FdR?s5PAF7)o*4JNAagIxx=pbH5TBF-&H1}|Zn z!fu_O9NLHu|3$PeUVZ(#o_qkb4XDal16@#xoIYoA;mltLu_8V;fNq_LvgsWb%G4}K zjx4D7ZB;*RCKle;kily2?qDFPs3;z;2B?wIYQJ=LkQ_SrAiH=c3McgALe0TlnwmJ8 zZ0M#D&(7u(->yKNdfC#lj#>q|Sw=<%4TG+#Xo;aee>9MM;Xb4JA(t~j0DA@b@*IpE z9mp7*a0O9!VGki{GJLHMGQr5tl70ZRoV2_FAZwC>NT&(WdJon>qL7edK;9l*VE*%{ zv|Ye$RNy%VCVsT*$&$_f{VV6$XN*IhTrD?yP4e%O(|CxCM(oc{@X3=Pg^<9vHT_cL ze$QbYk8GJ-jp7~#30Bau7~(bYp1_{619A%;1(|9$lm=ORV)wf^wv8J%IyL8uLA9kw zGt20g#xuX`{JSWM7()A|{c)Rr5PC~q17FSoAd*%TvFJe7r6@G=w5v774e~rz z*z9cXJFt9t7<=P#7M4p_udX0x9Nkh_Ql!eq2t9zcX&S&Xaa4dm!*W{|WURYTVswX? zD3bI6GoTeL#>s+!zylSN1PNiQfU6Cvm)A(7E()N~p2=`wdF{C$uwx~$f^K+t9^HJe z48>DDRTuF?86(jGyH^$!6^T|8M-cHlVmm0(1|p4w`vysju#F@*1l}^p+ee`z5C#VW zNyu;k?#%0t7Bga$FwHE*e&!x;RR+m=g;a|!8oJHs_JD|q1t2%}8Y}{4m=1stOzNhH z>od56fJ9&7hw)mPy>%(kHqx7xt(6Rc2xyBvAqO9wcaw+!y3&dY(MoEQ5a{O{{P$zZ z9*)VG86vgbfVjK+mq3=oEy@j@z#^Y9@y#w@u66o=f&FYQkbs}M1AZZd`#sWp1LVqU zA#Nc-LCvYN5O6JzBp`jywZWm>J~$WwSx3Yt8`1+BHhCE@<{=l|OG_hbB3g|SN_5iO z%ATNl#-(7c(G$F?z9HyY>z(m~b4#-YiESNT<~4wn?U5sw;6h5i@dNy8BYa3lYHl0E zyf{5f4{i4Rix)5Q-XTqZn3N$vhG3Dk1E@_`5qFk2uqY6&Snk3@yEgPO%KPwqD6~JLCpO9{n1{fBfY9 zM?`YjO`FQ0L_kr9V2MdGPB#SAUra1(Xsx#+b|zt4Gf38E&3CG$SGUsZB(9AO8z^GW z#_zBd=)Ds~zG5dzwEfKZFC~-(5tB7ynHj-sP z@Y>nFJsgzjx(OV0HElOgr?fDrtC}Ua(r&<^up|!1Q^Zj!* zF9PT_5dhHXYmppjEFkXf;L*$Q7#$t`_~NWL4EKUuA;OMWWVm#9)~D|4KwFcQxYLOt zoH{A@7P23~ssve}V(+IN*QyN(anP-Dwt*~bUgEzUeWUb@!c7Va#GbxDYl&&l*UyiR zLd#VEePDv2JAtK$<&oqDB&7!#PDZP_xtA#@D1d0^Y`*3ZK65*n$=Ba|>y)d~Dn)~7 zuniut37ccbs&ie{Y+hYh3fRR29eMPn=h?Boi}2jp0_FB5C{WsJrat!c@M=Xb8 zzvX#QH*Qu{gMFqS`hfWZBv9S7_26j+rj5ka0mAgNF`KWiF*~8DNrwZQD@YDGC!6rz zQq)63D-qis120<2wsR1}S!>^tDi4g{(BU6XyD&Pd!bLckbsT=dw;Hon3p_SY31|di zvk@W3!mCm-Frd#}37`E@ugLt@1g(j#tAL#Y)xP{~X_F=dkNTAFdJAJG2Agrp-^Qtn zntag?ByjGn?82MW$t`^He-h;uP~U&~*KWeA5?xCD_Kz5;{oN#J#`yh0NJt+20^|H| zoM^$4UJviED#enfJQ>~VFFl|jp~SJ-!0;cC|34EEqA-f?aZ8BuJ)p)vt&~YecI*G? zJ4U52fLmD;=6YtNdj~2p5%8e6`nf$rrB4D#BO@vB20*EaqI)8Q@^z)XPC>j$?J|D1 z6ynKp$kMMNX%Rcs^MHBY72JyIKqq-EB)1ZP2(0Y}@_YKsnU}B+pa#D$y$v|c>Uc+T zJ6i?NpmDZYmgPm*{X0SIUmS0*XiW<> z7p0$~P<0&|hkDXNZV1ZRpU;T{;PAi!dtE;X}ID_Id&M2Fj2T zV_|8@FDfe9#&;wEy_cOi#ah_ipdhqr$@de-sg0 zA@aI~ODGh8+_MwMiB3$+`6mxT16sty^dM$26H{4Lm3WgYL~=4XFbgM$=)RGCd|fs{>kkq_yU@4sJiF66OU1FuS>VdhYXD2OS>*QN|qky1Wd@Rz^}A|CORb zL7+u{O@i7IKRQK<g!-=eGS4!iU+1_{*+-wNqgJ|0#;n~CMbkL6I&};B>pt6 zS}CD1no^Zdxgj@y0YQW5HeC{i#@A>ueax{fdsnrVB-+AQ=ZL^Rm{e0xc%)hV9=mq@ z_?@K-Pgl1hZY_n)V#M0-*4Mw5Cn+kr1oBuZ>Vv|+Ki78pL)b82;2`kNd$4RS2&1ns z^?;UF84423Q^u<3nne>y23212Qq<_kNL6O?LNmfLb#Wkw{)d14Bli({`2LYH;!GLt zr=?+fE^1%~9F6P7IbUSQ?Y~n0g{WIaL;9Iu;@&6K9z%T4~;26 zQx!D)Er2Ew^Y>P)A@4dqQC>iYHm>qYKGoYzO&iDq6&By8hTxaGvCUf=aT?iVL7t$h zQ2rNd4y4@-wvdH>2h8 zjN2bYl7Fmo=HjLf0pU0FV*p8x=M)Lkr3s4k@a|A@a843AC^+bJIV%#0N_@tMU-Er#?PFgv{aKds8grY0`DK-r0szrm=r z^b~RO048jNu_rEC3i82Azx|&snc>-^o#{4Egyw?gy?-Z3DtPbhy~h}qQ2){#VDGRc zLZ13p_LJMZf4_U@f*)aZJcb39w#;8bqLN6dFjhLYA31gE)Vn*|w>`=O>4fm^Xh|uZ z_@)BODl_y)2KVmCwy2*lIJ|0WQ)`%+f>pUT(?Wq=+KLnJ0KH!O-A(Uj>u-T|n&6n01_-Bw`w75V`7vOJ$Xj#b8CUKva zzD9Q)V{?Y@SJG3!qBHkTQtL1@09xbcF^w@YO$a$^KHKntiJ8*%^rfJ1_DG_ntE0IH zb2lvhm}8rqo+gtUy?2U}k_1o=$UxNI8?tmDXWCw^m}P^$C7uoQU;Xv~ytZbq5IzZ< zrmUi33FPFL*nyAfMk3@jVdo=LgLP8G++zzl@4jV$I6g8LgD4nHVgh~GK_(==zCq^< zczf?-m4($#-pj*;al{(ZN+G?dp`jsW=Sybr0Mo>bw9fqc{yiAIV>=uMI5b@&3Hh;^ z9O|g*N0SWQg0!=T$U(2uNDXJ;PT+GkbCj6iDiv5? z8Yr~J=0YFRHABFy;^YiLhemvG_pV(XEhkFKF(_)|;&KH9p%Xf2hTlS<9lRlZ>cV-5UO$K|uwwBg7xJ~ek#k^91> zOZKyq!yq3g|Klujm!i%R{e`%U5LU#a4EiO2@y$szh>^iZ7rXazMGu@kV%n`cz8xQ{ zOY$*}H(9q_+cHKF)x|KC{A_r@q3r!TlX3|f!g{XHEL5JMiMVzpZt+;8b)BCwZ}jUL=R8lo+}}uemMsPSpCFE zHO0Ix@Y=q7B3o%;NLF~E_-k)*u%yfZDd2`tsU1Op6DM}uH4-uWt=CWkaaO|7jp-nO z4<&+XMU>}8xCd!v3} z6f`WaLXDzAqf8OD?AQc@jjaFdY5W}~d6z>63OaXght`p+__e$cnomRhCCAeu*Pwf` zf^8Wcnt=cTWJ-qLFjH|A437crSFZSf694rMx_00uebl6$F*F5`wr}kc9W}&3eH(s@hqR8k18pJeP z8;UpqAPv0xPd^3u;R?uh7Y+cLs^{PyIokdM6tJ|WMmp&cNxrBw&pC#Yb)r7PTnL4a zjMAY7Ydy4@zr}D0$IS_2+geA$LPS|(?^I66t2{b?YGSp)=jaYP;-MNwRuB*rTm&&4 zY~1Wc$(cWgq0u>b6p{t@&`Mpg$o25ym~MAKZ04G!NEFwPR}K+(1Qz|tf9q7gaQ%0B z;VQ}BrxGLm2D3Llk(U+0m&64vbSdDd_%E&uc!B<-=)t1Vy~S_;#;qARnSJcEs;|ZW zi9Fqq6#PYwF` z=!hOnvVB4QOej;p)Pyek&-~f~z|ILJOAGT{7-)^7+bV2#fD?Jc%0iWLf$j_Gh}Jg- zV8M)B2X`+6r4Q|ajLLz7+buXYlsBRk%~uWm6e!C`iap;MGf6st~I!+krFp(G z?GcXu$wdGfB5X0kL>!G6_BWWrWHXJ`f*e&{4rMcjpYW8x7;`^{2U9hW>?)7jqrxyS z1dY~kv!<+WsfLZ=4t5~;(j`)Ws`_E$ARWTBYuAcmGzc7t2w#~Nj~JLsDw*=(zVB8lDn$7a5WCn;@2x zQr6>#WhYZJq`OEIRmgCp(yylv&dbX5If+a&8r8#4ikz)3I2C+L=lzHQl9v@6jXbBw zAsg+fd$NB6iTS6}aF{^mNJ*hJ|DM|F8(=?DBA+TEPUapYF$XZ_l?t6Lopae0djaO zw`tQZ8=Jc$6CmHr@8qyK2H&d?U63P4yN+Daf%;CYu5e#qT=lZU<60+0SW^w>U<6W- zeM@2_PGS#JAV9rRz?=%^V$vk*>+6X!(GF2!zzY>VCpY&T`Uzwd4&XcG#(t>2ILsys zsM5kDW+jEu%y6V(%}u9KNYItnb-xAG+6L*II8*_GFTrXGr(FkLqB6A$_g_G9?D^2# zth}$9w1#NcAuddcVG^|#6L_$r5GMt=xduFVIWUGeJRTk%-a&@#CZHkz>rI=ha0$rd zdMK~B2}1RJP>@M=P*9L9dUF|mq{{*a0kJ(t9()VBi5^!%G>8RE2)iI7yd$Gkw{N>m z4%s@Afi!SFX53O?{EH0w!8->lh`x_w6e-$akq`#x2yFfNIcr^pY1T3{;J_v@`O1oi zVfrt%R$i6Ufq>md4$F}mCBHX@g#yxwAj&EB7OQn0_#MPG&4c+2+Hj+zb7r+u;4Q=h zzGKG@G>tDsMX`Y(z2~`0wPDM&-P=VC2>!S1cUoR4sfih z`^VJx|5niW|HHBU-%~>W_kJ60IyVT6qM#9I&vyVrA)Skqk3sjn0-59$?$}tla^)8k zS;ih3c2@KctY&WHo&@A{G_4|tuMm)x%+f*M0jeHg}V2k1bQ*3}sne8rV0c)z{wx9;A(E7-__ zqr5r(->D?&C&Rs)aC6M~44ZUiF~{HEev#GoiIHvAPEHRM_lTtANACGyRBsojdV6j4 zXm4*a4S#gxtETJ~ODUK4^HE;-Ln-UZY^(P2W=>7PDc>~hyc~2cmr%IOhjw%d0_y9g zrmdB(=vj+FgfNpjkYjTW{dCzT53H#SHThO;Za5-fwu^7R_fVM$B`okrpt%ldhDJ*=0rjpIkqgDCE z9|1O`V#JOmm&-d|%{NcYybgR374vVl*SX|hkIN>o7QleiCu< zY=lx59v=SX@ri`G+Ae4dAM0nVj~v;uWeXX5BGLd*b_62V7D6fMvjW19@YFzjucBag zit_6|3#*f6H8Gb!ib_58P(BuyERas^&)QA((|L836$^{^Z?C0g*_6B2d6&RQ`rk_= z^h719>KJwqJ>*|~ng7ALWN{^gn2$jm$iYz%d~yFi`}a>eNs4&~;_`dp8DB(fzt{5f z?BnFcv#zjXIIGE`BJcD-UNB~m>``{$_Huoi=81tkat}*u{TWbP7*gxD$6t9($%df_ zlEDRX5P^gFA(FM$Jq?&wg~oRtB1FmoZuN0VwpHG&x8>{^@q)p?!pR4e@eW*ZF(K`Q zsY4{CHTJ>`D+Z^8!R1pCX&0}E8+oaCjZVSJ9RZI`2YPnBH)~*vs|D8~Gcy>+-T&Bw zbT+ZL9f(5Qb99)@@!|Lp2Qn!z#8^WtGp%*vNIdwNOQ2hw=s@+x><7|CtwNt(4tG|e z2W<~Atv7}MV6D`}6iV+6wE$Hh${W)4+m13o{^IU{SfGSioTrV#$=+D3_;*TPTw>c# zoL0D5D2X@Sg*GIyo-ATyOx;5+9KifcDTsLvE`vZnMEqbKT_C!g8x(0)A-+OV@(Q8coYigqDQLnPX^$DCi3nqNXddX+FhaEM^gI`hc z8val+O0!b#Ju_Q}b;uNthf<~u`;jAT3(j{;Jv{RJlz()03*tj*Fi501E91QXb4SG# zNR$xVq3$T-iu(i<?Fn$x(QER%an*8VrU~@bf+y4S2~8<~<^k{aF|P zPe}0hxo-!!dOwjMyMpyPq>D;Kj|0)}-2cR|I$jX_6dW~qn#{I#tuSJV%!BAcw(G{FCwpQ&VDT)K?8Z)vG?-aeq9{Z= z!un8`w3}t=(xp1}`4}J)#IkU(UcYwjn#Y5<>XnYVlLBM71f};&ZW6A_F}C;&Z$r9u zNinv*?@^tu=r)>|LWDKO4Z!_&YIYu72TUe`qUU@w*o*k!V!?=5S%=5Q)<9w1dUo6e z8*AWLwg0WO|N91N803=KvHksar2eD)f z2f9T!IzN2zrn+!&2n7cR*8u>F;VL>#PRnM{wW49$fk(fFAx@ z*6Yys!yumEVle*0Q*7*KMd-!x@KMKVd-r#Vq4MEi-cfT1#9>&&DS43=m9-C zpxrkD;HKAnWZ?)#e_Gsvn}YDT_ZD#=W6+?vR|B%|>aIVU7pZ#$p&ALj(W1~LF7F*i zg`Rr-62~^q{Zus6l}M_hHAEcTiHIeNrE^>H3s@*jFi9nbz9KuWUQ;X?#?WLAGq<8P zZl4o_cMswQ*8k+GQ&F%#RiaZ;FBqRMX*B!c4^F*13>ZsStT4$^zSb=C0f?d9#2KBT2+qXmhf5*%mo8lCXkU<)G90tW_ya!+Sm|!4V z92p=60bdE}F&Tba+`e(`p`NAuGKNwab0??!xMN};JY*@7+k7W;i2jQ~oz(pK416Vo z9+Bxt-zz{84dvbld_&+Fi(I>ld6_^qb%>M?8FAI&jy`|BUwwCbs}UT%_avYB%@mT$ z4KS&8KYt?^G>M|8PpbfRRkvmhn5SOZD{&8AU0q}_M{ESHh!Mczb#`@;&{)mOJ7+Po z|McvwO~-_B0S*m#>vBj1>}{GD+(8HCuSKsQ?T%yBeUG}CFGA1beCB4K!2N823c!z( zXV!2RrP>N;UmUY$eDV9;3pBC799Ojjh(eb22xg;`o}SS#-1&M~av2E2Jblu`gB@mG zJj8Vzp9)Z&=uvWBpbxIOJ|F!TA&vMI04Rt-1vF0Eg!w8ltD+pxoy!+5UPL!y?@}dL zm!$FhRY{Jnmf&EKXp#AQxXr)~D;Nu5;Q%~i#wBL3VJ48c#y;1laP8Exqb4tE_w6J% znPA{8R&9RPsNgyD!Oq{mr2&!FuwI9NsSDGABXrivI~Bu1&lS{gQ4E>loKlVd(iriu%cAzlx3a8x80$*IhW{3*{;^gduLqb&iXU!pI8+-qK zYPzr$2#nC!*jO~+cWKUf`=Yxcjv2HnfpX~EN5@Do=cPYP|`Y+4xrFHQ=)MtOiuEHUJyklY0UJQhU; zh7L0a6o11Un;j{YJIu{*V7SL@DXm~jC<2pACRSl!(rC@sH!>6Wz#gRJ4Bbme|Dkbl zafT~h-*=)6B*VtkTX#V~0J(`i`IGizlv|RsQ+7NLsX$_vfZ`rKSUj`;iMAI?6I_@^ zv$kOK`$od%pcUojzo_l&NMnED&|YerHdfJZZpNOD>8tOw&ZuNU!2;;6uVJ==bD zzgD!^Hr{6!;Br+%)8Pt`=)p5IZy+@1+Qf?_QB>aYDuE5-8p$3<+lEV?f%cLcB#42A zUCPp!nB`N}9>ZQFW@l$B|Jc<@--5gwV0@-7X)jn74H*fi8~MorK?UW39p>1phHoIU z=^~RyyTZZr`s-IKO!qyMgxiq=H%5@zKXS=sMst2LZq?C;<|#;Aa#Ave$su*m+u^pB zcljbqDoEd>Mn)HMbxE^U+8g9_GNXyv98$vMkJPMRx_o&Q1UwzYFm3adW_#k)ysVXU z9SGvVapOE(%xWD1-K5gY+D`~=r)acqBurYcwa?H3s|HLmR=a`nZg=8D_E!U3M0E&- znwF7|sRwD8=`hUrTQuURu-={9-Mb#cl8w8h?tfCAnwi;yvuxC$CG?CY=Ff{89)-}M zVn255m~%^E7P;O_Qxk?uDY&U(VJ=9Q)&_!MgRPTG)wd|*^+9#amESdTtjg>XS#5TT zy<~egXbYLbB_)S{ekS)D=mELb3-E3u|H&714~*0K;r!l{QHSphBj@~%s$YiW1H+Wd zq*4KP-q&qIf`?o6-d9dU`Mxt(riG%#o~?g&;oNulVT5302JT%WW{`LR>`83U^-InU za%WOs{we?(T^;uyLl?Z^(@#=($o2du~K%-;#(B^KFOPfS%91_=dVS%pgtO?P#Xap zVy~{-QiXPQOrJfTSed*U^k6M4xDdP$*U-nS!Wq{Te3*3%V!Ltt9zaDgJX8!Sgy2!8 zxM)bLiRWj9UqVVI_FDaabXmmzXlr98H({wZ)KuzW0 zXSaBKJYLc1qdGV;?X5UU#v?RZNjTERF0EK6j9ZpaJswsCg-j>LE`W8R%uRT-fby3w zcc2#`37#sCdk1@JYhbn$1Z30wv6%Hb8WW)~YZk%K+mE6qR2A?lw+7lAnR_bt0k471v~fG;76KgfQS?EmfGMRDM;ZN_SYg&oE|8fIen;xSqo*@Ed17I1Zpx` ziAb$W@ud+kNJuu>bIgA|J2`X@ms}X54I1V2`W)2JINWYW98EBulX>zw7!b%K;Wzfe z`zuLYV1Eq6+{~mwtLBGxs zY8IK)A08d0fk&l2&2{qIwcn+y$PK&U;LMoAGdzjeC9;uc z)I>$KagVADBGxqH9n!A1b8~(=yUd{l5DZDYYI>=R|8PLy#6Xh(&PEh`=-P;_6et~4 z5xuZIC&Ok$sHa#z3pff?it>gAlaU`EWPoFJP1Iv}sz-6qfihQvV5M6;O2wsA#HB!U zwa4nCjfmAx`{aPgF$-qq;>RI~MNt-gv6d}nX*!opkq9sW3X$wtYgIDutU32z?+Qy;}0%^V0^fEp$UUGV$9I0F5X2@nbV( z8_1P=kOLgi^3+wX^vjmd*xTGZjsu{J=T8Qc4gI{7nVE*NDMp9M;2Y#_R=(CpO8(PO zg<`N*!j`ID1Lh5E$&i_AIyGK^FbZKnYcK#o= zy?Ip6dmH{6+px_<#xkTdNr}udr8#Lt<{=cBMI=+G49z8^Xs&HanRi4<#iq!RS%oN+ zsff<&_Wb@h|D3hXT4$YSt-YS-u~XkZpZEJ7uIsw5+h$joUAt4DYz0KSc&g$WWY`7m z#?<#_v?P#v_L9Y!KWp{nfU5k_`Yg%P1t3A@#$#MIuS?fTO zUp^n-g_e9CxXu&D-Pto2ljc)rEY5f}yUE&Pg1VW_n0}Yv*`MF6UW!%pa$pBeU+*x= z;g25J8dKktQ5PPaU!Az8+aMDO9v5mgzL#!fQfD}=4lQT_<9P$9wB($*EsxvuJj!0l5Q`=U31sfbQT;9QwI}FlU$P$s2;u7{zFNIrTBFKE2W{Z!%Z}p>tBHJNAM_<7RM;Z zsWsv$41Q!t9x&{_Wk>+VFlpR<&u^8HI3ACa5jD+zp>_)H%a#q`*KyR@h-s=z{aAP1 z^-yYOzTDk`t}X>HcnTj*R2Gyj?Mo7t?1 zv4xre8WUgj^yCssZgF_rYxD*Od=!IA?^ZZR!uS&r|KvmrR%K^E>(eXiYgf{9JMPT9 zBPJ^}UK8s2{M?_O?s;#>!!CY3EMhtNQlk}LU=l5qN~WJhgC&^kaE}(7D?9$ykI=Lo zpW={w)UC@!)X#;ai?5lRXcXiuLbn|0Kb;}*G($URp2SK*E(LsI{~lw!FA*nGcb;p> zMWi{CfZ=d$_o0#NCKXqx<2FO269lrGQ$nq)w8h;_EJntpdH9A=22?PdfTl8~rttFS z&3_7CeqG$SgGUb~EQGs&ZSSIo)d~ zU;ADQiKvcl-qt!$rF(a6kp3V%Fq1BXMh{ZcIIY{~bZXDo?8`!?&m{k;zpe#YNA?qZ=`RqH-O&><`{CmVjFZ8{+?h}~_F=9r*r zJ(6CLx6j35>T&QIbz-N|(4l9I?F0Ar8Z!&v*I`^a$V@gRrMOF%2OSy`)$Z$+O&Hw5 zT~FXtIZSvDUS-8IE>Dd1whI^dF!Se!kt;nsoPL>U483HtFicBF?_(5@n&|UrrjzmwxbK4N%M#$6V!chJ&&1&dA8%HTm-v zYHs}CkxI>7aW6T*)Wla zZ5D8y#n(Fn;65U#vn*&V8Ei8VkG8n!V9>7jl7dna4Epsm!SO;JKvdzVi?s`Cocs_-KXfZRkyH73spu2oy*$SrgiH$uw!{6TF&W7QlsaEK=eyx z{B`?gT!LV8%)j-UbiC9-63eou&d7@OmOsx7ldhX&02`sH;>c&w5bIdi_uRZV{#Eja z76%Wz`0D}cNK?h=ZZLRI0r*_#R{;!o4D8TXpS~nwRoMO#JF7$m2+! zX~d!PYu|@Ukfh~9y*tkM)y_j$0@Y9{s9#9-rNGR1aI@{_UkN^3;!oedcR_t1wH}qj zZpsyDBEH7+J^civBWVk9A&2Rmbtk~IK?`q+1(jO&sdgK;UEcV8lB{JJ+}WG2r~5T$ z&^ve?dFXJcfK{BJu+`TJLML4{ zecH`7p>lm?@CIGQp|vXk@*GRA9i#o0?>A#NYXG!JcT8l)b?;?j&h-eIUHQbW1wmK3;g#(s(xJ?@1b$jH`{ z2{<*3rlf`3qC;C%xPu9%_+@@?aP zEYttUglwl(m9_5zB4X zJm{*d++T*bSGB(tm-0TNKxks(o`fE{d8cRJzkiQ_V9;>WsHVylQY7r~S)lpas!$0E z^huq0m7o$fotC!gsy{Yt-?v}f34bxEB^NQ2VaoQ4ik>5+pfFpPayI3La- zhkrmy8YL(%uj3KeY%Ql*4ByVW={jb#6pzB(c?I{CK2`de( zX>-p+gJ%fr+aJu>dQJ1%BNwwP?GkFE+6yWpshlXMaiIC2mn#2w^kQXJipTOtSa?T< z-zNje@ex`AzBtV7IX(ooJ(pZhjH=Oosc=?n?ZO&P3q-4wMDdJor1UyTxYEoRQQfp@ zQ*)rDuvx*gog>qJoU+VEc_7w=!c`JHCcoa<*>B0c=hUO|3$4_aoqSH>@Ya7}neuHp zJ@zYX^Sd9mSOQ=n9Z>P7-X(nwyc)XN%(Vdmrj`$;A>_OesaqQT6(~g~{^zT0F(C zXwqIt9!r|mcxc5mW)vcauTGHWko^Z`6f#1svPmIS7!$(~GkP%>9)aJt-0ow4Xj0b) z;{aiS@?E&7{Fzp?`BH9xqJ^c@*mUb}@H=M+9=X4Nyw@wvc}bBD6gb}%z{uaenXCFG z9ZL%j-4KsVaSX{vHDYYcRNHug$fV{rNwG=3W@6W{=z)RD8s}=gOexMCEFZGxESXpA zAE27?lYti-58p#R8DX+q=iPce=X5iS3&ZkWk8uc(8*x%C%;|q*O6>`%OiciP2ScsG z%ysBo1X$felow!HeiJ@}1!`?Y>j&|_{_Cf7=Q^w#s}Rs}t#hYNYT&g> zFg0Atq6F(60|yTl6jfuS(F}f0CiQC_&05aWr!%ELT6)^8SR1}5h+8<#_|>w%32IVx z;N8)mdMXSW&XR{0+@@YJ{0%A1E@^Tf0saW5P6CBs^C7!DI*0dBZsU1Qh4R7noJb)0Q0VT&I&nW+o6y_P9H z*E=y=1tG&LO_$SeKj0qXmJ`D2P|-TM#bX$}nG^$IAI^GXt{=%Anr=i-=YW=I_V}Nvp*yJ+!(x(VvX1IUt)$|Mj zR+!mH)izs3M<7axFsG~Tf~zwEABfmpZQ9+)AQy`d8wPsSt`Nd2c&FKuz3)H`<4HmJ zF?UId5mKaZIy-eiL zXu5#oA3}Ae6WeO>=fO<#YfU{FgB^(_Hch%THX;0^JcM$NS*pL<{dHx5+S<9B>uXO* zC@qdz3+^><;lhNQuHBTB;%`2^z~Y^0$Xt$gY?weH8V}58hvMZCi%jrSnZ)#!F4a+f zryDhKS8wdvxm5Dem5ZxNrT+ z#B^59lt=ahF=t?|;w9pw>Kst@1RL<|T6Z#U=Fgd{6SP1O=+yUnYHUT7#V@=T? znGX>^EH{c)`47N3&Y|A6N&3Hc=xJQYZ7Ey#e|&l|VnIkKzr7%mQv3lyNfnu|kPXij z$jp?wTkbmi>x*xySLb|u8U4(~;y+GIIe@y|x6N{3iH^Lg)Wc(?eHNyE>s-OsGB=HM zRLPM2Tk=atacFFeO-VWJ1|o%~lD>!sv1LCokqO~p-N1px=v>3SlNlL~MfaK=Mz7MD zvdpz8&QI7V(b_;W+Ziso(f01yc=cvm%*|Xo2@0UI*K73X(Zcx^CE51X0t+kpuL1`A zk1U5ij(amI&!f3>UQ{4}r(B9{69vo@|)*8oC%Gi z{t-P9-*3wGJz^XSW@%2bCTHzv;bMo!*%1z7brgr5iSjt36m-sGy3l7-eCA}8iE1AP zeXFkSAS5f)HC2)Tqrq+F0oy*RdI$zSsoU&HWt$(wLJRss@=K4(m`L`uYZLe}>e5V6 zKbzC3gyr5WaYu)ui;Bu7txkNC%GoK^L6*VX4cQBRPlcQ?>UBMpo2co8DdG5p(Q^9I zt4s3+b_eb=;nJl`*^eKOEY#PLS!m}3AK3{ckq(~1l6NCyM5Km@ZeCSZ9!hHSod6e{SpZr|398Lm*RTucio#_-3%6(r4@gpu#u_{lv6Y& z0#z4hzU7b!xIq4m64;Gz7B5sA-ms3_%Y3q?t}^T^g3TbO`UheNG`OiFLN>NUPLWr^ z0+O^jS&b!|QK#y-+tSZ!QP)LC)%rM3Gm{!Db!Sosztlch0ZF75hz>RORPhg{y2!th z{wLS<7-GgpE2rq5~Poo_m{gi9I*vFkQ%rHiB^)q z1Scnw1tZAN8ND_n^?b{=Z9C(SIm@zzY8?6m|` zhs_SvsyVatQSd_7MH;OY)T>oID4e9~lB7@jBC08}hg0Sj(P{ukX#SfiWeVyNC|_(^lOS(N@O)G;PY#@;yDxy zqoEz36m7Ig7?NOKh$ZNmaJI@LKWvN&!-V!|P~WBI7I?b=0f{ci+}`K^g_j&RD3 zRT)EWw4$@YO*4-Wk#gYTsMvxp7@&y?$ZmJyv7k;J>R9{`HqD1?+7Cpd3#P8ph}(%R zl{wXK`L15xW;LrV08SSAVcb|WXxc_)sniAarKA&9shnmZjX4DvUd!S(xrxFM;Pyy* zOjhOBuyczgFAU&A<-f1BUle?x=bA&l;(^KdJe|sQ0;738PiAoq|&sSv|TFxe7}R%bVj#eA>`@gtQG$o=kX8_97y?GpJa zG14}--Ii6Y8_nSgicb{{X&^HKUsyND-`j=dDGziPcy26shs+KFL5gjgj46p5sEB7u zq#j27)X;$cz+Co*LT}5AuejF{*LLuYCK#^(Jj9Vm?5`BIK)Iuc{IhBZzCUGD$>*^) zHhT622uY*9WqhlbWeef~%-6{WYQ}beD2{w z&gG6%!#3lur;K>B<=n?o)!YeD-?Z=6u1*N*xl;QpraTC09cI2DXWnJ z?a39WuXYLyn{7)?)uEpEqhak!PrKIrMv?*jbxYpwRUkxJvMXRt-M;R)9RfhX2$*mU zSo$ji*~*;l9k?qi#E^-_9W+GW>uC-A;W6lp?nzl{XE_-emp})ta+<|#EXE? z8Vrlsh@TpX-}`{n%I;Axb6-F>*|Vsyh1YQeQUuleShk?>_X3xB-l#F?^R`|!`$P-R zukXSJBLf9Hh+{bxFHIHV&FL8c!N1Iwin@=IAjkqdQe=E6xb(m@tyeA8JyvVjms>7a zrcm(UyQ%$(GC_wW3{6_j0h78`GNTYpRCkgv^1h{`f%T{QTUSI^D9soL$pZ5MhxvsQ zO-%*y5Kv>;vrAgIX-9B}%;~fV6DA}H$Gyq+-S3MU45qx+*Ny9IfQ68#Y#9Q!NZ#6l zw1R(3{L?$s9B4hXYX}Sl+-xCUg-svMm#587&9T`Czb_F{P!vAb1j7$U&z?OiosI~b zm=dVL+gNY{7gQMnj=;u*)6n!Au0GWyT*Q8H>`|UYCW)t2kQfiRZZ|?xMgU-y96l@V z_7xEog?jjc#M5Oxf3Y86x@eJ5F)Q9QQgKp`IymUvDDI>{ton&gy@HiJ!v3crV0FCpfnX9k#GAWhy|!Id*`d)J zoiJW=7!-=Y;s)W>1(Z-#@j>X@`6IV&I$o>dpKh31Ss659L=OeGDqfHD4uU~*YP3gR zVgALj>WFC4n9Hi+zWAEW*rJcH$MI*|pisR|aN}q?x=3$GOJ9+ zGLd9O!$1+?*Yl$KgJGjbhPwjT)nHeRaBYHgS%S8H`0zm%9J*~kv+m<85f|f<uK6Hzc35wSOjKYt@3P>L-XiHC z*qc)o$WllFOqt_%xHXwH(^q6&lyDvVe)a*kJR!KMe9zNu&;%%hV^qP)#G^|DLUA~U_J3&j*yrorb#s1EZ*xQKsWG(XfuIqEqKFYMqY{AlX$n$3)+o8dn%>Gx^^GC0!uD~ z!@3cws?5s1Vje!pG}0`8oiZSD5ZeQMQ426>XFly20Evk02~R2C=EJ3nsG-?|P0;MT z5Q{F4|8Ytv+qvEMBU%2Bd<)`M=#Occm=q-2#_mknde+yB-8}+&n?LjO){3?VWr@9u zz{xtt@y5kV;@57%qd4{DD3!fb4wd}@W>0PUCoN<157#g&s56!+;Jr)u#zpJ%#@_B_b1Vfn<1hkzJ z?mE(0UX$S#x$e?3p^gdt1?rR9y*e@U^_y_5^DlcBU9c+2RjzK{)q22Fseib3<)qsw z;S``z<_T=KADx zx^~F7)fU6ruW(L`eQL8%BKIr8(=fhKC_IBONo3Ki-N>^!{Bgq=wP^r4l*rJ49(G*KX+VS%)e`wkm3H^$pkvSA1`wNZqbvMhD+d4A+{gw=^jG z&9gTNAuFQ!1~xS%5uI}fXtlT<03)q+t#8h4QgQ?vrHo0^NBhGTh7o1^xjpVYd)2sy z=t{CNJ}K;-ht)+IOY@|ox%G&hulD?4e*=@b*h8l!Wz;{$f>X8e_G%o%1z{9qoJYu$ROMcDL z)~;9)NbJ&#eYYg1p$0=NGtjAiqy?wf)2$wEnX-iEjlB2p+m(}$7R$DtNge|x1)Yn* znr$qwu0fNrc81o`b_qV$`ibUF$VGSS^on?wNcl!ofKng8cv`02^*uK0U$Hsr!+fy_ zP%55ZzWnZ&j4aUqrR;ebuWMi5>MJ;sF$oN6%W-e%-3wM?{4d=vmqJF3XvVjx-K`5X z@Jrh4m0o(1&4!NRD&+O8MUNkw-JGjG*VAI7X_>BgOXuLHCmrgZ$6OE%z4m1hkOC~F zdiU@6`a9?~TF_0XytbbS7a|^tjT7SBav}K3=WwUfeCqCNz??+;Wvp@(d)d6f(%r~@9`*5J_%^M90 zi!jIq8TAUQpK;lHCU6@=%+p+#?02Yp6@6Ryb=qK|FC44d4_*3DG{*+)r1PYW;#!IZ za{=YrDr8xI?jb`AtFZf0TmGLxnpH2Em2!P}@(3HzriiiuvOtOC9*ORoqI9B^5=aU1 zOCS1_4pEFZInz>sy?cF$ew|_Fr~SJj<4ffud)X!OcG+^O1VIdBL;z^l0_OGJiP^L1 zD&sEb%TULok}#z5@`2*G(RQg$x*}RCdaag;9Me)}4e5_4)KuiLLYAbI`0FCf7V%DT z>HbqQW+TeC3y#nrast8On#Wz}Ya4 z)L(J$L=~{9 zGVq~3^)CrVR}soAd2HJ#G(pIlAeW>!!)qT{S&kGxKd&Klbmu3J8n&KyQAUn2NuIz>yXP3 zFDIdM(gb6v5kMcH0*pzg%QLEKOOOVU34zchK2hPwIFjfbSAR?}-WW8355hVP%QFz> zLNoP15)#r3+atW4rE-?}5VEJbs5_A&$V4HzOY1$p zj+2pVq^3;ts(!VDhQEM5{tp-*}o_48J~Des+B}B)PQD!=N#JU zhN3Y#Y+pDT=-tM{`Y{dkier90^i$ik&u7ODQCCZeMFb5i=ch`!8$Za$Y)1PFq&W#z zvb9S(rJqF!QJ)Iy0JF<*{|^R=c=W%yBCXX~{@EEztd2i@4D4!us8~HV|H~=GA>HnH zf6Oop(u!(loE+rd81r>AnK-N|J^pKzh@DUZhCQHznVhkEcf((Iao=Fg@%QF;NARnN zYVqr}5%{P%TE`+rl8V^1AoiEyE@U$FwDCYee%EhXRE$3b_KOH6ockbvXsGUVC(n;e zY^F3TiMEqrPN5T|QRLjpz>G}fRnQLE4Pte+Aya}g4umGt>l^U19wB*TKa(XrhR|xF zF1~4aa7NWjXrVE~F8rilSil^p#V2*e2a(qh%ZezW|3z?3O0eqF1){)4*K2p{d~%zG zP+Z5FG(r_|%B2XO^k6jdNgFo(21>ERK0Ys~@2;AM!_Rk53@|UCqGTu_+4&6Zg+ys# zHVMEB$Q=2DC#yDWeDd~x`eoO|e!*)FUfsXHE5_VG@{ol8k&Z+H>iI~B_b?#dX~-p0 z3eS6t;TH)ZisX^$6|06G!CYY2>}IQAJGezqRd* z{f{0sv!8r2M%BORMOEK_b9{AA9kD%S8sd9>XXdGc53;9vI*&s%%_vXq&>?0T2VrkL`{ayi(Q55-$Y-%cpsviV_SOSigQ4J;2MiZ#CEL|1{qV zEtwB_2vD_l%PLxW!OJiuD@>%w}3Ky|-Czmua7&mR&G+7NR0-du%QqooO z3U+-liZ6I!9=PlcL)TYwawh7xv+h~s5qF}Cr;+(m;_o<<7o#*Znp0YJd~!C-WpBaf zWUt$fuC7{;*)1Uq0W^=261t99qsgT0-~5RTetpP@U;H@if&UWrPhnl36*25F8wv=O zP4|^H8GW#(^2Wsn+ONn$;%%>I)!*|a`rND3WMX;yOmE`xD*sN+86A+lJ!GTC6vEdz z-E6052@3G!!rE5E$gO~kegUUe%-d}lXf#vYK;t9WEm66Ml6gg(>Q-sjfk0Sf=T zJ(i;C8A@6CxAvUShRmY{I>ZUtcJ%P#Iz0gx(JE~%9p;_p;j(;d&AXLOYx)BgyxQpD z{i8xdJpA3<+`Q&EMXF5Pv;jXFwm6ER=IQ?`&}SX@4*A|Ln;^CTp;~ zn!3CGE*!7|n43QuHsjJ19GoL5$5 z-;?31ENX>vZ=0HRG6$+|ihrAMCpGK>xT zP<)m0PrYvzG4SQ<*G_Y{On-XjA`UBji^9xJ8-=UUU#g6wq@Lahi|^CJX13sz`XnUu zW6jMUE8~X`W5_b~8Q%b%!C(*dI^g*F*(O#IiRTQcVs*jcF`L^`w-Mxfj*WbH8U|>N zh3VR4>u95$EOsu1t1ni}TBzTte0CQFi$L!?5w}-_Jc)bs=#l=wfgS4Co8Nw#w8wwh zmzvQn`Lk_9>;CztKR{_qHoVx~32Cs4w4t|>ao?9*Mb4pB)CeV@hfA7Cs_hc|GyA%~ zrvWzjyh|qN?B|ZPjq5d|$b&Xw2YAzXd;1W*)#}qezIt^vA}Q5B#9pHaeqG$(xt;HhLG; zsuyw5g%F`Ft0v9K-D4;`H1b1??<_a>pV@P~+i2{9nqi?eDxkZd^ma4Dg*bY7s4;PlJZP!ZUe z?_>Zm^~o3o|9}7uLT(xsh<6Em@S4X%7T?+O?DA%t zZr!@ItP4JL$O<0lC{ZprDoRCV*b4EfGgS@pRrvHgWJWF%)^QvK@f?CWZE>}(U)rshL4?Fl7*MDXuTO?Pt;IR&Gh3k`_pMR7a z7GE;S+IlZ9*w0t?VJ}#|+=kLV5vn$GZ*WLR-u3G{WfK2E^SqT#J1$%p0h?=&UbxO3 zM1L{)GaW4a0@h6(I<onlaH@Zj{KZ>j!_&V&#?AQ1e|ni>zr^aO3?X!qxtQR(UF z97k7qq$5U*XdIpYILzqd+VYOutMhD4w&W?>PzGiWP?(yO~J8*dkm*O)ZA;@KtXnYL$jWM0APXb9!D* zOSpXnP$ZV^P9){%qdaW+rla>BU01(ArlMZ(62&CS%G%nc-}L? z6TSOxI%fWE?=m(g!h=uXz4P2+SG@cV_Vu~@Hj)Gj_qw$HXx`T~0OCN%)dL3(V8r_- zWL?U;t;shNYs$*YWvLRfG@akLT7qJZC*35>+n(kTCmm}c^i8*@TmDJ^RK-1>$D2Kw zo}NwHx)L!HbEw!EPCx!T=8Iv(fdl?{DE02sXWS>1iXdOySYnvduOGFb1u~e4h||bK z*&6OMkpbFu)0&7xY-D;y#!7Vf-j9ejbC)k~2MZH9d}Yz-qkevV)bQKK{_Q+|<_}_y zg;!Cd7@wNR5Xc`>Kx$i3J!k?(EU##nx9ys{a^*mZ@5cK|??8)zX78A-q1Vc`r#Mn_ z)MZ{BL5m{e*2toi7F4+`T2xA+kX>EWLi?mO9Uk#^{-%_+`L&^8VLJeA=fjr4GHzkO zc#PHbZsnF`z|U=@1lzD-1O!S`#!D-oU!J{iy^vhPJSw}gXWh7Y}BpO!S#n*s8hQ<;~~$OB=ad^MVI`OD|)!ZNl31zkV1pa6FLF&{;Zm zioW>8JcqK~5+2@}p7}_8{0_a$+E=m1Z*wDA{1Zc8RKM2o_Y*=xFQUQD0|&a(WlwSO zJpHa#%^k%<2X2lyI-9ADpE~uS>7$hV|ByJfXLB*x?kM_)UWOB!1U~l&?sB@m-b+|z z1U9mRj?3%=q~oyudD-jNs>6nLCEy4{oV8L}1V@7oX-DonN$bl&Y55^a4iyEfa&F$+ z7TT7Cb!4Yov8$c~Q!1Wz{7(G5wvzJE-IaArRdfG@*Z~!(vTD%ZlBq^5ijV&J=W=UR zn>XE3V=$XRNb- zdABZInserjwx_F3i>L{WSTuQuO_AA@c7flli;c`b+LR<7IH|5OEaLX~e8L2Xo;W_L`4*s#V_%v#aJO)< zVxvj_z|c@1QX(44Ug$ZM+K$P49PuS!*QKHLOi*0~+%ZrLe7*>U-J$wvUu=k;MuX-KT5juK#W*aUtp-D6JX+mhxzi-OV2CdsWCH$tE z`ywW?Wut-KV(e&V2dTJt1wXub`*xQbpB-G@&#%H0q?N(x^01Ymm9smH%s=xK1ZM{o zX^VuEC;x8jLT9`F^XoP|=jPxDs1jN(IPm*(bc3uJBWp-0&OVT$q9Uf_dWecZWtme` zc1pdBHdmD57yi!MZ6f(ymSYf^6*N2Unnjm|s*K`20M_gX`h&%f(#kbncHD0J;GV0( zdV=Q)WsUaQJk0C7PF6BZ@}6dH-U8%$JJsVWBtBz3JcfWk`GH0>PV$;+-M@2RUEMZx zX~r0(%$aFn(Q-_s)8xrph=V-Lxd?MbnxzEw_rVCbyVa=nk6N#b{vHwboJJ74~%<*qDN2yopJ(X6<^m=2FZud+_{ubVQ>YH*Q>k ztB4`eVkMJx^X8ri-z4p++C=ex{~e8;bK98y9b))bi=SN@zSP~lgOXCty3e`Sug?R$ zIeGSME{XXLg(o0=^(y^jhV~&8*046>Pa>>ro=(QF-?Dy zYgJ*78f~HVXzBBS#S%?{s!>+8$d22Z)^Z5x?!~aSNXwJ-Ba;Vg`f>Jbl&U;adfBcl zPv?9JXo2p=`-iFH?V|x5qE2WP-MuT5&rnEBp!U69bY|Hc&Ca;GY%-(QJbgF+5$zQ2 z78S{Z0*0Q$Y`NFy+e*W0O@^o)!KebXJfril;;&@L4fIV$g*>C=EC6q zSv+~`VD9B^35kh)F*oJ8?!a&!Y_C z@Y6tG{}sx+5X@`Nuq|EvIr+?EoAGtMV;Lcpc(*0({VhJxvYZ3dXe!FNN2tDPVa#gF zm1c&Nze$-CY0#AMnI@AH%5k6c;WiX3?LwQaqM`zJ-OA^R&c$y_3KDOhs?sh+)3r_Q zL~yVovYNlB$bY}xbYC5b-0R1)S2e^ZCdR_#c+KnDsR*tD{Cy+u@3aZ-wUzYw>FC)a_nsX){>2NyJoxW31svL#aog{J_GEe;R;~Je zzAnyUdQIT0Sq=_90DIy=zis#~rir$4h#StQ`6#;1b~@{up?x7+qg z?$Bqkrl!7j-G9K6o-fPj3B2nNp(Vw{Y?1!RxIw9($EsCFIVX4c5n{;DveyHUNi$2! z-NvdN4C!rV8v)2R9!s?t$3uShAC>z=JPQ(pwWga`shqoeOFs<_d(;~rms{7y`R-3n zGivmrN~Q~zT{OQBP#jFeC@(L!r7pYt?Dp*d1WW5}=bV~qrEnuV+b4X$Obe2wV76Oz z{f4ED3avJ$HJFpA>hMyioTg{k38*$|N=jPJN@eNAIBkAOl^24ajp+~sh&O!X$h$D( zi*YOrjfgN+xRIal4<}jylSJ2d?3nJ|e!=tGqBrB?lW4o@h4-(*G`+W}Lg>Fo!g?(PzV9`7@ZefSXR)F0ps1)i|Bd({v$~#-mp$*)_<0Fqr|Cfi z@BI|rP;dGvflZrcywP3HO==q8j#}VqJxeqP?fC#(&?)aR_D|VlXMEE^$%y&)=1>#+ z2Q6wlW`^$Vwv&fky1G&sKuz;xF>{3k{{rPVFp&ZwdHZTnm$DlTRAQjsvQ+J80Mhl* zD9Lui87PlfV`h?OtYpLk|NQ6s!t2u%@q{qL$f=`WlTcEA3mrGRs0KCVWaCTW{#$N8m_6A0Q}B^R~)YIPNRo-`8c%fT~%g z@ko9q{h=xFcHrT|`=G}r%$%u$L}nZ3K6<6b=zH;%o&t9=gRVe&@ISy9DNd4m?bAX+<8EU8-CA@6s-@G01D+?XzaEyrTe3(4 z7#maFSy)&EMn&zFg*%Mz>^Ob;^qfg~9Nn;QTK)PZIL}KU$3{l_fho4qUKsjO>7*Cd<27B5~LL#<_IZ7n9AFCg{r7R2~} zPM3Dg)3bWSQXn56b{8O4yuEodOSCM00$AOeqWutP?Xr=fp%V3|+z}c%*3-7&oyWVI zf999nQaVihu<3D(y}gY%&dizr1<5aIXu$*qWm`iochfqMVg?~WGfFSK4cC*Jt>=u5 zhcSWK{tyLoH-;AZ_)K*rJK0$N?`>%VYPYQD^!p(Hy={1S^md!pJGE^+71nbT>3&+I zpWufbIdS54!`~YH`pqK>uUZm5>CmtaB_$=Oh_0ZftSc`Xdc5r8JKxBjXa`RILXml8 zrLrg-#8GMC+pd}&d+GM_pWg{?0ikBLzrBGd;wrZ61{gcZN3aemrB7NqAd8EWwj zEBclofN=4R#YvqE6Lv=3nj1!)O)MLGxQPO@sR|&=k-;m_Yi|KmW}d6**p=q7v|-yy zhwbSxbkfOsng?KuM60Uld0%yc2HB=}u~ag0bV$80pi&{@fk#Th@yh#Z{!s}_P1=0W-sfNtgJ@x5#yk&=#1LoR)79gVo@$n z#&w5Ru*s??WH8wQwk2(etm#@+@Z2$1J`K1Ry(7A?kc^e>MZIZm2y;;w)RhxE*|YOWGc{FSjp~o(CQB(r%HOb8A;G%tV{&S0 z!P892t4{6P_kK8gVz*_=xyZ{pbm;IDFF}BA`T&34Fv&59w>c4}?$w|P@sQmK>FI+7 z$UfSz@UCOn=mQ527A#tF;rw}9t{}rayJ(#AcxlTLmaW~?o%1z(_;6bWm);V9*03kV z!qPI9;6Gyhr`{D671T02Z=NasnpapDWH4w@1^S158r5>V*Q;UsAkxg#6j2}SmM>c1 z;xZA~5p}33nu<8|3Q0FLg3OHhjYVADofzImc~~G%-u23sM)%O*H0-3LG!axIE;-p;;l`aifj}4{>o8Wk zbbHmRRX3hIIYg2wMm0siG4c-|w~zfUKZY^&7L5bgpk`r8O4C8a*2Mk%u;BH7_{<@`Y9)r>j- zEW5lu>u9|8_FWt@w*!P^rBOCP{oQbIoE%PN@KTZzmB*Ox`g(fph>zzOUl{-M1`RnO zk+NL@Z7sToQV2qWs_3Agtle`NRc@{8oGv?H>yn#FdH~1X^Arl=8n>4VDoetEW@o#4 zp53#jHSuNN$8zKVDuVM-aAoG+=k)ga9Ud4Kh6iVJDG{J51ezjdIG1`WNL22F$Yh5r$-$>VO z;@>5#E#oM>8N>E>hhZ~PTDsZYTtgv8HbFe}@QizpEopPEXJs964#`eEdM4Xy_NDcW z_La6rdNb7sacXOrR~L``)r^3%L##$|c*>zex}A4-P*%Qqdtj(O{==yH9^ClkTvz&> zOV6d0y>&WY=4UR*Q{-RuRab9nZf?GR*h*D;|5I4Q{T>NevV#>G=n_9cB7}yA3jjHJ z%9LG1Wws1by0(ECE>ONL6Eifj&=svnfOm?EbC6b)_bfn?Ja_S8U*Mh^r6uSAcXn_t7KGy%9LYq42Iftn+W0@=YWbrko~2RY!L<44o@)>jG(J8-!L zz&<|fU)=cHjjP#~f+frls(ZJa*FkC7^n61@EU1-8eqR5+I1rPR^f$Ly_!5+iKJ>QJz7){nm6A;=d(lQ#L>Y-^h5Li%Ms{RBHZMG;1w?Au}? zb=8Wx8W-YTbA~r%-8BW+t?yGU&UsXI$K|unotzMTgo6}$ognG9o!?vZC@i=E%X5DO z&wsn!13&CYeDoUfg{cLhO5?qpNiNJ;*Y&IvGJvU!VP3WR7$~;AC5M8W(7biDVPO5IDf| z7$P#9*Gh9_FQz80Ak>XXPR@I=Xx-Fn&? zn-B|R4&JZU8;&pR6daVsde65kI6~pQ;P^{_gMJ`8O;(K{5!9b4cc)^4Sc@LHt}Uoz zC$%pQgVRqW2@@$56A@gnKjimWte9n%0J7u?wY-9ZSG4v#)3mAwvp4UBNSFht0rshm zxY>t?eT9T|{NzR#ca2lm=ID1_JCUb?A5ODe*{h}K+wXyKQ1yk*gO2$%-{9^a$=y}p zEh*k?lbSrs@90rgu_7=3H?nBd=HJSU8)YFTs(aQu=JmF3Z*E7If>$@L=}>!0_UZ>c!>1LKU~9mmp- zSLZ$)PCrkT-@bcye?sF6?u%ys{yuE(k#>_mNy`3DeoLOg5jd_(nu8}hS;rBHu?W#n zxF3=`NB_6Vz4Z#3op(2#5NoJ)JulnJ(bWCSO!r5K9`YZv!eBVqX)~B4!t#7Zi5j1( zA@O~U)Hn&NLonxjdxQ<{7$J1T?8j4Zwt~#@AM?cGir@q@a}~%wD?{(&_iy) z+dF-JKZ{WQjsS+B@Snz7&G)zYj>MV25#Frc&lb0rA3p{x&#X%*$sE|VwaLsQtGey4 zS)pu-&E_TJZ3hlGhuP`mKTJDwrr*&9izW9$kCjkYre$QPtENKMq(03PdVDN#>Y2hS*rXl z2%Fb1Usb~9obs}=J4{KvauX5liP$2Bk2I~b;y4H zGr06Vaskz&yz$iRXik7V6=sJHfZWxOyY=XCm;0)vp^<1YyKC!ar6EU-_nq7;VML zM?}|rhxT*z!3Vo_Rxs$A;k#hi7hVs0G{<7^5_6P3QkmA6pVb7lh_J!nUrv51o;5>3 z#eB}59Xi<1aLk6kPa8}RUJKubNq1Ag>znVtCKv5nuzu)(0c|PRbHD-TEL?a!V&Omk z-P!Eu9N7UcNKHz)Ic2_wgXcb8dF+2l*ItBVC7m_f*$vYPIANYkGg?^;z#A(#^lLjKNTs4^ zv*NFjF3I^CF;<;Tr*-Q&aZ`%c$P^>f*3GutcDc8{v;Ow1z=_nNq+F?#DI2-@dseMp zEmf=Wm*;yNT{eblr*$N=ZQfNn41mu*f0D}?HKPt_DO45xLN``wzO%P}WczjaJYM~U zvHu=#w=E=eEGdWQx$IfKYp?8wR&K$?3K!BX^Oud`2_oX zhdBHG2jAU9C9Cn+X4|W$byQRwk4mNiJw!5*Ff=sO;lZx!65pG(Qs~zD`Xc^t(_7|2 z5-vCVJ_rS9_U_#~`k~1Wb~(y}2%g&i`1?Qkp!NU0J^*D1b{s*#90I^1G_4@Cd_iHZ zsw!a<7-|pt95eKFqAy||<>lFk))5FHwo)kPJ$$$ychpIe$JT)#xT1_)Fn8`av=0lG zEU`q)(OKDa{P@O?%3Es2x*z+n6~O9f||nicr}0C3YGZ z8phM)wD@)&pvdj<8QqF^?}E9HkcoRa9Ccse<>LwIMK~y#Fht)Eq?uX3NUI*w>yeKZ zBd@Z^Fx;M1yY7lSI@=SR=XgMj5Rhbompwv;uOFTsl)i=wOLbugBNNN2j>*!PJ_U(} z*4sN^<^^L0zGDeHMI?#3uKJXPG>$2ebR0vSSwM=A=ksGZ;Xw=Golw$lKYo0`nzd`k z8M7l(UVy?2JYu&2ac0jOc-a~*7)1!k(zTD*SZhc?uV05rv_B2rUkY5YamH*sz4T=M zxiuAvzJF8tyoiWm3f|ZU1%tvB=zMf^+YcR5{ymb9w39NN4!F*RK{4Mwd;Yoe9LzI| zgV8`$*GBa!So?w!_QSLnkd=9V7K+YzX~bE5vui_?NwnOYR_rz|qK;yxDD|AQo~VrC zkv-l46*q^gRHK*666BU6e0}q>y9sUJ%&Mbr-MXbUe0ZdKJol}3m4`!f?8nwUo0+1Y7oXzU`X3!MV3Yi4S?%hwkx zu-+$i`xqG=Mzk_nUX8c?HE=`S+DDd)>bl+`k`=$gP6e(nNs+ki;7Um!G&V$}9OG zypP53n*?jS38vDQs{e_aMjLs=rZk}omMxpa0Qee4!h{B@tE~;(x34W~r$`)2#-Y*a zNB2!Eiru!Yu>k|)53h5wG&MDSku{sZ)DB+x%J>KSf!b`Ccn`9KxYLo5qHw4Md2?99 zPOJJw&0-7wrmmrJ7cUb~u=Hm8!n2S^tx%Z!pt%Ho3Ft|qr?&h567m0+K>lB*hyOpm dxU`{B*PP^)0TW9PDexcjiIa?vkDs&c{{s3qoh1MO literal 0 HcmV?d00001 diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/conv.png.png b/src/dios-egraphs/Diospyros/data-plots/plots/conv.png.png new file mode 100644 index 0000000000000000000000000000000000000000..1e1069990824565d600edfc08075796540ac68f2 GIT binary patch literal 32212 zcmd?RcRbhc-#+}(&=M($BB5-NhU|tAvRX9knPjibmXT3}l2t}gB$Tp=?8s<{M6yCw zR*~Ov>ifOE_xHZ;-~IdjuIv8q?s0t{->Z!G>vf*b^Z7iU$8kJQe+@Oo)hjoyq$q0j zVI_Gjidy7NQ8XRPY4I<+8`{3%elFd6x8 z+i3LRqssY@n-tD3pS--9U%8-2&fWQyGNa9H_KyS3^oq_-)z9l|f3!<=vj+Js_rsq` z<2$TU6eIqa2s3-j;}4I|(k=LFh`#}i68>szs7Q6yI)R}+!#cA^r71B>P!xq zi&ul$rCznPu;Sn3W=F1Q+I?g4^zYJIi zpJ55wxck-Y%yfsDYh zWbdV`n743pmV9J9zHR$<`WQ*aS6OEDqGok$soEKf4h1mp?h`wB@L%gTBC%MV=mE!l(DVaXk_vdED&m>C>kZ9QqsY?6O#^Tj*|4dD~#k z#K4D>x&e`qtb;>CokK%Gu`+Ik^|3PL!MClRU--Q7{rmUW7bVB&AD{C|U%gtS9w+;5 zAk(b&&ZQ$!d-@$?b+$>F)u)~*r{DL>dGOf7}`?ez3y zt94NH_MDUHx|}DUq|W!Ppr0R_cJ`n*QQ`>}O%H6GoZkOk76ekfRcrRe7!m5{V9UC*xek|-l9JkcS0~1P zep|DfhuK@=?a%(E&W;XmL|;imgJy3|#SM;4yvO#8PLFmOTSbH>C-Z&$_>o0cL-JUv zHlxhNpBkGPSjCpSdvv_?`@6@-b*P@ITNQjY?=JlA*D2}`fA2)%0WbL#kySOhtFZp} zQ=^LD%G)9wl>SIZN#EQ1i}#%W=-pLuBS9f(V`Oalj=g*L_MOk8M&5{j(J%93)P8Bh zDJ?B6YTu*Uoiq4n8yg45jj}R%ZEbCq^Q>5PpO_f-6Y07!hBPl-$4%uA9lGx}^@FzN zp8BV$J2^QLwQt^JxzCiJx^SXHJ%x>L?_L&@>afG30o%R)%u3$ay>YeA>P%cW{Y%35k9Kr-FI7=d zd6|=AF!(XU&)@&>p7w`vo+~$O*zmEX<-X&<`aZ=ACd{AQ)hpOTJ!DTOVbGg)GrTMfHm*+b4Umh8;i5WfdZ3v6v@^V_o z@69Eb7B6!sZ;CxJ^4a-I=WRXD{i9!B8(`T4jz8*XPB~iDlV{PygVe%IHnb(vB*V6= ze9BX%r>EzRxbl^5)Izh=_=hv9VXRkvl`~+)3Vf*3givr!FSIuDhBO zduU^8yB|r#_Sa7f%DVl7rlzKLp?l8QOr*nEWDEK?cXwYw#y)RlHQJ!a(TRgBS)(+N zH92HI+Ah0|t6#6N+jHu(bFxkrPjpO-fr-gA!~lO>-YE+c^&z(6K=&Vdh3>4YH}3Mr z3a#9*Jy0jhG%5E_=GrN$XCKo1p?&+P?Pqg;){S5H7Jpsw>AB?&!?NYtr%oAmzPe`q z=^4LTtaOU%d|w0iv2@+D{7=uEIfEQP|8w92145xJSu=skpSfxNId^w3o5W>oZv3$n zt>tudWziD$Hbb9w$Jh(~ni{@=J%~U3;sSY0k|~*mPvuVysMim;p8lNcm|^&OB}r+h zValVo$8T~T?D+civPjYFTC4`u?!FuqVfE@wd&+p@+%F?*8lvv)b^Oj0yjkXv{q%S2 zhIvcoHgXWuVk8al#lFU*RVUI;UBd<#kUaH1ZByag65?0&r)zuY}UBaic>9Vu4S!6Em!m?+0&3n4fbl=9eC1pMo-Lr@J z$lYE0RaNPH7&z!po;=xxRMVPmRT{8Cq@$yQ>Ug7zB+8qB%GCFzNbc3ESE62j+?&%* zZ@^)!Y1YlIi;+su&M+LAnyP5YGRtroHT+f|r)S2XVR~E2{OuM}%XgaA7W*-BlOn{# zr$JFqoriUIS-z)>jg7q{=_sIEaPb=6IRS6D*KP8ACI<(q_W}R4+eD4Z>HL%)r9U)p zi2s=9EP|CzXR`2F!^TEOS!}Lz!;3PD=mtJHn`c<;O@r!NY+W?x!*k?r$G56m$r|@B z*G*$}73AfY(5~LNBuL>3V&y*4D01_I%p*uro7g2C_VqU=bq){Tc&Hv1kD4th7c5rO z*2Z==&*?ROi|x0{kYjQiHUs$F-EGMwDY=nbA<#Q3YiERrUNFjUS9;N(19?tE6jk)+ zm!Lu*%O#XGfu*03;%vW`t#BB4f5a>nJK^K&yKHuD&USX%wl+$fp*~KoVz_9YzT*vN z3C&{Kt-sx6u>W`wpToC!`fIK6OfGZ3cGJ?cEk|DehT8eLwXLJ%%IF6@FN2Ry4fxLH z?uwQ5xX!H@yoHZ%`Fg<8kQ754ZEW#U z68OmSh8@M1DDSVY0~S+%=BD(JbCxH((QK&o(+g_` z5fPCX1GBp55@geRc4d#mT1883d`ORD64KVdA&KAVfpSt-QnEijA>oj`@$2jB4NXmX zd>J_2vks$huBGgL_GvOQN&vu8IKDo8fgjRNi_>g5`ao6FXX3}_)7P1W?+X1!>HRiz zWN$$_?{3N%HBz!PV*6PED$!}^(^?LWbULru=?q{4rLOXzZJeC@Sqt?`7SU``i4fBC zdH49FI#o&1xWUOMXBa8+`9L=(eTF|hSD`8!lT?~&H?Cb9pWpssUUuf&y2Glf&8&KP z!J!*>ncqYqsKM(TlKCLgb@+E7+otBT?tE@X!9Z8{VD zK1t z-o8sry^q&(aER>Ku|u*nR?cht(v?haEu4q;bPo;+AvTZtShwfvP?dQ6aD(*%$8KI; zx~dv!C(MBCEyZ7`xAWIs@e6HylraLP`f2>30A(E7@CuZwwUlI>INBSnRP7yTEi~G1 zqoyBr{+)f{^C`1wY}pd;GcRrS%gdu~aia2upz8JqilY>uiicEIDlyKa3l9wqb&qyc zNOnDNj4k`_O*u|Ock5hmA~qoM zay))#`}qxiDbk}Vk=;Ut1^asggMxzUel?tH%}yixZe#OQY!J}H(9Db%AndqL(xgt* z&zze#Z!+oGW4&+&RdVdQ!yLDS&f1IY*>j>gLRgT)bNYVWNONXUL0hf+kNH2phpV67 zxx-@X=on7VE}1wjnYosMrgtBT%K_B7Q!g$A0j%l2Pt`GROk6=KBJ%y#O+^5($;VPc z$3A}k%#O7w!HeK?DaMjt69Wy8jtlxNCkJ76w?*TU&d$ziPJcY$ebl;<(a{E0^}J>B zZk7E=QM6Pb%Z>xQ0Z!^)f4h7bs7gh;>Wf~Li4c2$<5F1RJgG@ zZD(wFc=*`7pWkw6*Rdr4R{K%Uo}9^9L{)DtL^4itnO;v3a1OYQq@ZKQo|l^&FYmv$ zieB)+2c$;F%^#46kim~XvDsZ_$06FYiT3^oqICb3Twus(lESXkz(=)pV4-mI# zPNgsY?7Q^#twwE()CP1S1YMK1$#n1Ih!-!!febVdU343FEeFp1Sm+^@nwEAnrW|KX zVpwR$4n~CVK_p3mlTV_*c9z|c`@^E9rdHi0>G++RwvKAfK23tviq>VkoYEgC~ z$}i29H;;9&sn*BRP95*n$WKcXMwa)(Q*_%rPDtSX(OwYs)nM?&YIezEy&0475<}T2 z!``G5#3yJ>QoLKY>JNQ-ZjKtAX@5Y>$t7d_`0WBM52AUo+c+abyL$#7b z`T1hp6)vQsWtI;#R#ICl)N;Ye+Io1(b7=UYOyqIwUrfb|3!fL=l+L<#ZK<@33{dlR zCjQXD(#Db!nhO^$RQb?Y%ceFaAd<{KKIPoF=e*zKP^(eN<)tiQ#>+jYJC`k?r4{I! z`#oG_{^1Eb8ylOxwKW4-@+*hkVvhsriC+9^E~J~!GBehF@D|UJk**tZ-=lpU=YBc* zEmscX3pp>dZ0S;yUyGJGJ3Gfzd|D*mO7Ek6;oQI)3hQu+f)ou ziCo6Mtsp%X>rT@xC=D!oZS)`1kE1%v0G?k#>;4tR@TEh4XdOdrvyfgP8*08H&T_}# zARV6T{Fg$RmYkU4ybss1KWkk~@9&bKqe4PLsGB_&Yp@i`j-M^!mU$x|mb`z@jAru? zL0S%d8f?IauU!l0?q1&@Lceq8&X@yeKy+Vv{N4i?K-Y0Za|ECnuy$w5Y~$~o`)^_# zqM~+NG6QTc5f>LnIIl;;b;P;zUP0wSx7e=FdCs=eBL*N*4)H&EqE{3Q6vX2uxBL9Z zHN{?WA4r>ZEcvEd$z5Fo0|V@}+QbA8{z&0<@vX6o@099jYv04NCTk_FRE`kxm-YCi4?xcUCsZY9 z(zpXK?w<2-JAd%eLU&h_HdM$??5nzM{)>Q^pGrd;OixVN-~M^q*feo6V9)c;(DE6R z;W4x+1_+0}4t;CTZTW5x(Y*)6`t-$%iut)O^Zav-F*0tQ#k=5@18E+9#jwZ)pynTXs-bJV7jr{Z3R=!kHX9zLQVb&NU}r z0#elne^I@`N&z7IWX{UH1!#m<$jQk)vub5FDi7TMXLiQm!UcLSFRyO4S1-mNeDlbz z3PgfgOKp{^7u-pLiu4r74vdEck&-9TOcS|d{AZssh_@9}3BGCnGCF)= zN#kR$b?eqOBx`a9vPjQ+n1h>8_Lr|%QPq4quduA5V)3-KNOj4TWuw47csUwUoXb^)sx_@L#4H-OvPn77 zAz{5zmz%o^fWHxF-q-IQAh6D_r3QZ8JZF8a^X$4;j&;{OociIZ<;JnGQO|2`sk*v4 ze%bzBkH`>?%ZDvmg_EXEDO*Kqu2```8wnWYqba{J7tKt{le|O;8p0rOc+KvR{WEQZ zS{jeegfMSBs(0=+?QQ^ucAVVz88nB#zrW;$#gT{;yCs8^dl7JY8Ty^Z{sZP{ zQu?c7*Y5WEVa7xd$ufF2rO^-9Cfh<^IGw(IBjcY@t-eI`pS+w)x|yAmA+o}<0smX>D2X*=d4<1(`4-FDvg ziHkpz3`%{TK7Cr8?>g@A>Gz8-o(ElAUg|zEuS*5{w#d8j?6pZp-Z!k9HU%K)zpGA- zb*m(~G^0eb)z;RAU2rGe2(X)K)E?m$CmybgMS0#4oDru{-Hs(|(A3-(`7je=+uyit zu)W}PzrNH?(gD4E`O>)bC^lJ3d0-w3v3&XR%I4Y7?9+`0EpXxwJ6%7lunkoi{NVDH zD_;Q%qC~`fe~GTG0P;$SOFW}Lt1j6H!I$k>j4pEsI8HXp0KBJ|@CX^#HInpP{UHrM6bjH9D zO=l`n`~w5!@zWYFZ48orN4!9!9R%&mCcJX>>La7U9Q-Eg{&(f3c3yX*?C*hg=0+ol zpS*J827{rIkuAvV)U$can>TN+c6B>tQWJ3qRnQ^#2+h55)A^n#YnzT@nq!YotX@n@ zM|$m1RFG(7Oa$j^F!W$sG-knZ?{FE>XWMH-;n*Dj_5kv`3~0Cxk9DBCI$ZPqfu#;Z zpVkvv(#2)(x%a95I4zwhgjKrgXyrsNwC#On`GFqI`99!A&@oq0O&_>$Szpw>A&U*v z2gtay$7g?x61B z|FN;NI{-9Cs3a&CP!zayIG=+^q1&{Fj*t}~V;MmOqYvkkTjVXibOwET(28|DTXyYY z!to&)3~%++wo8GIZSUJH&RRQ~?TWzQkY0QDx#oR1Sz+5H04~}GVqfnxt1FA#Y4#TF zZ#R=a*68ch7cWJej9X_%n3$P;kdY)?+;Pq)+hZmnrmWqx=lnh_)Z6$&s}L(4XkZUu zp~DNxSi3=NU@`8YA?<5@Ap+d};OFz(?XI9B1Y0jSI6SlCz1h*Bk62u)=P_~oZqp48 zxy@d)qsu^c_%L!SkZKG1PJ%o<4uAL7IV^eUyLao!HUTcLVPd)p`tlx#x2n1wbSqZu zA9$bE0D%Uulke>Dg3nW5mT|ayXPVRqY$!~6CtTConDyq6y!<`X#J-Oi-0mJ829}lq zpd`KmGv1MLbIOf43eZM0q$qJ~T5sdlhC4fqo|@D!Kf{X>YWG;m+W7eRH7qQCU|d$+ zzki?5_h^c}i5`PRdI?-eb{U0%P1c_YphyS` za01evvn-&|x%2b$9h~?Q!{@Bd&9r?85Xx)6lUa*;p9sn2YgZRH2slFn15R`jV(!y6 zu0Q)i>RwLkz2Fr*`6LXG*ALQ%`$)+$(v};Kd*I7xj*hjpwQU8#_x1aC0hGKDf`KKw zUYD0wgL_Rv4`XOy!4LGK1x7k@kJa6)*wrqy<;vipIP2=_a5}luPCXBma{jS(fJgq@ zk*T#fp6gewS~WU38379O!Kc-#7xmPYpEdC&4bc+1nqMso90i)-5HJfW*x_%t)MZ0@ z>*EFjZ=Jfm0s9<|1xOUEXw~{o%Y*g$+TYI)R1$*5EoSt_W*GrgnhlTv_yO)3W!6wh z9v9m4`Egjok+;Po!OnPSEP&87lvR+o)+jr3{W)Z;>05{HHF+u zD^{&)VikFQfuk|2EiCc<>Oj9rRE#7rg@z_3+t6}0qLa;WpUJG-o6@cNrXfMm1T>6R z_IVv!8=ISWg@C-gyy=;leg|L9F+FES{yC4I4T_PcJhf0%#Xv9epL&)&7W*;V>U8Er z!siEIRa=e6mHIt$5bgZOABBN9R>oN}DfL;M;?(*x`-NUsT3T8dpXLGdj-22r)NJp* z$h*YI2-z(H+kHJDA%QSXk4~fs0D(2J_V&hkMF99*&&tXozYMk4r1s7Z$vDMNlLdHi zaJZU-gM(%0#vuOiKrU0>Q4dCJi8sEByT#t(c~WKGiyROD=m!F)fBm{16B7d&!!JEO zUFgh9(bqFcJt!eelopO8Q``+dznjQAez$MmCIm#G`%EIz*BG>keg}4DkTG98f2U1P zPy1cFb`5D!Da-WDHw07lUTkFu9#h~m$F{R4&FW*rL8JQNRY5hWT%le44g8vIih{XJT^%H7@F7;D7u>6{oL^Su5(aHRUMTiM%PTmpO7g#;=jH0Wbf zw4dr<%1G!tq~<0gAGZ5h%x|A-*OGp7MdjG{er-~hJ7jKdo+ zM>K2CYG#+&Q~rK_jr;-vD^F*iCz=Zqz@5R_o$-U0iR`T@iTAk=cmzGD3?;fD*U{2Q zk*k_1cJW^^+=c$%#c>zy?Y8nsJr%C2Q zGhP2vh7liL`4i!^?9g?lsu2ev>bSg{N<7Y$@%MH=qPA~@O1wf`{Dtw0ovI?t-U9J5 zgO=9T^^0!e*S7O#Jq#S>T7zHBwXSsGpDN3qQd{v?u^rOO@GX44Xb|MjpHIy&y1DsV zUCbt^L$pzHtM+km#j|YzMbF8}iSO9u#MHPBT|U&ZjfvL!3&MU&v>(WiWuoHZzEM%9 zA4t;jOsrv)S5)i*u|;_U8-Dj-6(Wn5nKM1@f-fX!+&}O%JA024TjOG!Q>qwrR*0OJ zX8{L&kNOgP;Q8&GoHUd^y0ULu6V*Q%%5LATHau=hTm@kVM`Xt&0ysbLl1KZGw^5A% zyG>f;zh%;Z&i{f zU+bcgAUYCiGvqU(>P5?XoO1nB8N!`>^dWs}YHCMM&q5It5D<7LeVBp9*T)BXxIS`$ z`P+Lmfb*RL0|DONRE&jKF5PbZ1>eP^HLR@uNUg7GYE~fyQr)w|FiD?(Mbfnx&F5-lEA5iBPSg~pi3RFV#T-oSE(09t$PzAZ;?h`4`7&wX^M z?EI{Kw4B%GZ7LD+;AjCm(m$9O@BSO;3RzaIdp>^pw1`5D)BvhI-&!M*?vO149vzh#Ykp zwhQoJJJ4pe6?&YG^!(Y^K$IoIyjT~`(Bl28?Kf;cQw-3cZ)y2>fuTgUM)!8uxjb=0 zqPca=gMuSbg%Kg|7A!6y4Mf$&a-+7AzMn`!Fb2&28b}9H--q{U$al5JLac^jRk}gX z{nE#e^L4|qP@af<@yxto(eJ5YMPx8YvkH_oJwL<7T}u3-&}eP|Z+3t$p9=gv~VX&WG6CgD>|*rjV@`A+rvT4av^4M+PQ#>HEH zc%q%_zz-?H7trl1dPj(;V%qJg+CH2%{|I@gaGPb_{h&i|LDebsWl%IW-g@f!xobcO zWVf`lOjlpNa%G2gyR^yhW-9;U#7YEXDbBy`zj?Vu8_k&zp+{`?v)o43__6x3cVe)w=T~5uvCEgeQSkE=zSF8^4Brv}WD9E2#DQ z*nFbWQN?H>K(8J`i6TtUx0>4wL?(V_(M07-I1eK_O331ZTHGhEfkxeZcC_MRP2Bn# z(Ir8C##Z8;VUKVKUV(N4-wgUh+$s7zr_9`JjJ9FO(!+j!fxST8*ya5IXUE*$@td4E zH89~}N&9!5ktvs`BGa}gXqX$`t-n3iQJD5qUl5rB7N;LHTe)tHJQ+Xa`h<@$axM8Q z$*l`{&rg-^-;&k8Qtv;%oOg+ni>vzn_ctC>Lt+#CaUrb3YWzY%bfm?~w*C^Vcw;lM z2NV_-F3A6nPY9!t(+Bg-visbzOpD>qs}Chez7DUT4|dgjuDp%7YB+%9e&>g=-B|Es zu^#cIXJ3P!6ii)dQ`0)Yq*u^mM&NF7sPB3ap4WLilWw!j%K(Wlfd`NjZMv)1gFN53 z-DOY3Lb(V8cG|EPEDHT*Y2v-0p$PQ>DbCbfHdw6=eAu;s0KvlFKct!;9bd((7QGKG z7#*XOVuytHMKQFR7*$|lw1i>xL6R^;hJ{z8p+-6_HIgr)Pf4&9jWChS+Mb_7)6_#JL`@{(6$qK) z^rOumu$FV{)&c3d5X0+H7i;BveHN(FCV6{@&U=~c^{9;!Eh z9{)Mix&h(fi|PZZGKeVCpmeHTkgiH0hSN~u!@P^AmFu^k3#^o&UA1qqt%C!}CE#s? zj#Xqldv-M`$%Od<*-i#*Cuy+ldusRF*oc1p`jr$e=rsD+p=y^e9#iZjG-51@W;`VX zj;F-D3$dqT_NHBnN`>YRly1{UO#mEulb$*@ohtZ2{`~m^Jzw218nQ%8#s2&Lk~c(r zx3X}Z3a+rHCcdFvuRwEt1z#mh8Q}t8Zpw8UIyY#J#gdnNxlBVHLPh}Ofv^2d4~ZoN z-$cZ3U``gPjX1O+^vvEDGfm%YG*aAxmr&mD&tv}2Q~AF*v+6_@3%f>3D5d6xM z@~_l?G!f*{yO%d2`S*$A@+~?S+pqizYnaN4i{pS`Pft~b3vL+ec@sc33kexq?Ul%u zsJUYdo29*P-MZC2epl>o@<^~i4F)Lj#yi4NwHQF57{H(b2!zmw#IpyJkHxpd6cr_< zeTIf#^AwLUE&wv)T~I%5932mV>D0_HJfs@Eml3UOIhq5au2WSEG^Gq32u=H{w3LQI zUO52NFgntCFeVzZ=mC%s6oqqo1yT~Ify>a5OR=FZtiP0lKx%k=Qe>}$1hH-b1e7A* zhP$@qI<7*Kc=^_?b?48Y*MQAv7OIMm`Fm%>zkp_})0gAbhYuUTGAK=+ad2Rw{ds}b zZJFY1&didY*_X^Tic@)f!yO5G9#YA@=I1uxJosV<$vJ_xxdmJo z*JTh{-0*o)M7by{E9-cB{~&ScpqBxmV0m+o`-Z=W=uuZ!*A)OMo=t@w?7EtDO&$(X)fD8mbQTiY~h+RX{`A2Fkc`Cq$tLf$|4Gl z>r#j%=RZALK^zaLpTzwZCU}yb>gexJ`Jzxm{f)nOjEpe_PN&4Lq!&L76G=_83Nu(Q z`0fC#RtKzg4}s?B?CteMi*^qJ&K)V|?FiL-I~kxbB~35(h96=J^hu~UOGkRXh5@E^ zz-L9ZvyBZQaL%9qbNT_quC*HoR~`VeCS-`o9oSm(3kn#Sn3hm1I}Dc;{`qA| z>IqB_hck-b2rQgTCCAH`FGCm2P(708(60rahwx+Cc}~Ir*eOi*{iK(8cJAFusKBpb zASLFin9?Fj-^3)`@Y3A~G+bTSyS|nz-bITSYv$OQU}5DBj=$ZoaC#C22o}@V-va!5 zb5NrGtL)3Q4m9w7RzJ-$3w~8wOSd_;z4|GG>@-E}z!^pr8ff-Ybqh|p{s2IF=`~+C zHjS4BB)jrr%BuhYnZPtqlglZk%j5@_ilRm!kdj%;HLf@acFgbyc}_OSY7d@VNe0 zGtZZv6KmC$z^<0U-H>~h;hR0b_syGYRPTt=;Ll6P0cN7D0+bCzFS`T7^G~qs1oDDh zB@`nuy+D$w-V0~eC79*NMgS}H#-uOEluZv-)6%hcqc(TEy0)sXAz>L&{h<(3F813- zk#k{Jyi91r;!BIG>eOom@|Ryl8yaW|{JWsNpfsb`0+&GevcAuGFB%ygv`(COKkYa} zKRy5DVPD&Wjbqw63ARme^;qnoijgnC50oHl62Sx}nl%S~>0jU5ezv=#@ZiX%Xld8AKtc-0t%2+nThMU&r1U@C48}o0 zHAZql@wAV|H<%t~Hu`$Jm{h;NMtn{Bk^xD3=7Uev|IwzSLQ-4yOn!dYr!mRM&c2#b z78m>Vm)FS5fdEL7GJ5Vx^sLwCdO`fIuy9^H{>yRFYl!P|U!OT48iRv_l`o~MFC4^7 z3DiFH_gnxj=&!`^?iYXV8Y+!9&0x)wOTQ~2DfzuEMm}!gWx6S5W@bLkc==#CI_V!D zp6Pvw+i9mD!h}S;r!rRV?+Ag!)yPPl;G%Vv2^%apzP`D|YsTd56EVjSwhr}L)90Uz z#s5ix{ukddqP9g|LiKs!aN+7^|80Z(aekFye>?sE!o&jY82s4#(24_umj@rfq9WJ*Aqn;P!DCZf7S3z3AxZKz3hjIUg(F#~#!U(*D7U>E8Pi zHYMVTtm7c)8`@x^C=Cq$}1Q>FZ28lVV`@7+sB@pZ=jubcPDVD;z~g4z6$gNgIC z|E?)i2E@|IKd8w@9bx1N+7>5^6XzTHdx8c`v>|Rlogu9G79Jj2WE{yR@Pe-z8^sroQ_FHNq$J0|25>Yd6C*zFkUY&PM}eRdmfNM+zv0##WmNpc^`$Tjb>x4VN;g+kFI$lkujYh~8^3*Yc1BySS=p4StxF89z+SJg#gTHu%EwYXEwV1bv7Wc@$RkAr$f1 z?McPh1mDpIbnl;>4FD2i&cdny48lci($)9E< zkk9E~k9UHm)|j(gULOBI1IPyXt#A+F zsYO8TZo{rZr=BejU=}`rF(B$5v=GvpH-I>wiY30l5ZTD{G#FWU?Qr8XNO0%iqlrq4 z?&EL7kp!2>FM+J17`4aB=0}?Z7N^G}eqrgttC+pvL-bZixa3C_yE!lU40R!<0tlf3 zb`u-4FaN+OJf|t2t5^Bv<(z-85k4-Ebtf%kJc7;q^;VRgRe+(w8=A79#XV|XCuJIPD=O-dW5_wPd^%rdxajrGbG@DXj> zqzfVK48*(7uLE_&evkhT4>H-H(Ta%q4lCH*lv0tCnrM|Z^G$JW+?S*$uE>)k4x#P39be0(BJ z18d%W{R_}Hp-lmjBV(-%4dd%1JV|CeSG;J=J^+4OOg*HYdcG0}gN7oc5wW+U6=~dk z14_m{bjCK$&S>(Npgaem)Ea{I^a)fm-X*Pg%a8B3A4Gj%`ko;jJ3SuYtr*ex6eaAfdS~9ppjNahBDGJaT^kZVk-z>CY zidsZ~E0I>wp>d;jAwgb7AtG)R$WyaFo7BsL$x9`Qw=G_-53g`C3_URUJsF%^>{o)q(m7Z?2y-9V>D->VIs!{Bt0!iHg4X9dLp8`WRX5lC*SF8Au#@n_4M=rxxb=w zAYL!{V5|pS@Urq3Un3WT9tc3Rcg+78o_Fw3j}%*q40{OeePCE1c1UUd<+3$oZ0{|2 zX5&&f6fT4~3ny8Vkj|&n5)I5RLhjz%GJ>|?9>zxg8D7#$=J#f}=$0=phC|fBXAUl9z7we|V=$?p>F46%DOc*pbNkViCax-;|6Md+S~UOX zu{1p$ojkk_Z{a!(gwp_m-L;~kBF1$K={N5125%x9C~|ltO6!8^2EpkAU)6eY3}E;f zL8;rggS>OdpGXIqX&gq3lk9Vyh2@;2iRvyga|Jg{xgQBKV^N;76f@>cP~IqSWHLax z6aBQb6n6F|x(}n*WShj6-`Y-rp9uzf=t4x2c?c*wj*^~*2nmn@7s{@Z<@sgS__>&n zoaXRXVilKxbR*_V;OgpeIQsX4mkYWBoK-pfXu`vJg8^@ZcD_BE76X3$cz2C28YWB* zvLSOrs{7#T3FZTI>257aJOfv$X#XOOnvxRE*PT#Z-hACkKGDalaX|`YjO1D$bt8szUy#`wKK3L+E;9iWNE?c&& zyZ+4^;Z;WwG$AR5H0Bg(M$uqC1rf2UA_^M_ZY;crsI!^}@L^&m(o0g!h){x9#XKb^ z6TjBhH@3pU!l3XX4=rGhc8I;aT$cgCjIfb=VK{?GV>*a*7PewImn-rl_ffUtMH=cp z^z7HML_|!1Zd?k9hK53e`ZP0B%#Lhj{5%krj1J%$o z10&-9#) z_lMSY{>KVw+#n4an4~Mt zqk<}dCqOxu2V>d=oBq!Qo}#{MqCTQ)xGzn=Vnqq?UkG$}v=5sWx{?MjG?EQq=Nq8e zc8`KaqM_i!D#vjR47-lkQrfU;pUfjDg7VGw;aFxy6Sabz;ldBrPUQ=3jQ{!i{?{0S zf9}rze}8&n5KbPLxRwz$>)g9XosixCZZx4cHm5-pBc%`>3z^;rn1~0=alwq)-`v7a zHWXID8<2kPL8&DG$n#e}FDyJpP>#xj7oZnObTC8jweO*W2Bc}CwXgXG|7YIw~*8%5(_6PXl zD#*_JD3nw?lLOk^QV8W*(dB=SpkyfDd3%dj-fsviw^Lg75vm zW=SDx1y^Xl+lrntHYzJo!Ahsu+bUG--_leF95`!tZCs821D_swMJMw(^KSj9L{(uO zWH0@Q=HzWyOyl*;-b*N=wGy%nAR^(WC}<_FU&m7Ktuvy729j2>(zKWHRFGZNOM2@jg>T zhJh=M3ksLFXT68U-$i;h-n3cy-Jb!BfdT2JvU?-|FS&5mh zd*d#M>#^?JdexAu!VGZ8nHd-uEIvGm0avet&`E^0pJ~^vT+prmf88R5Ju0- zh1dYh)hwJHQ(@g}a}W2_T=%O)2l*Ovu;3tsZEbD2xW-_C>2FBzMHapek7@Ihv!~NJ zn$NFuG(#CT)%3!l*WhOMZaL%bo?q7~kOq9JjE0-Ib|vfUvHF;2+K!cJV)cULuLaq) z|6@i1$Q{YouBN_th=MGzN(x9=@U^R%xqy*OB4fo&Oo=<)XQxNWNGXtNO|$RrR{75e z*DE^BaR`FUb@)*2#!^+d;zJRyN-%kg)z;UmgDet;GeiTn%WH>mYRE#`{hqW!Ha&97 zAzi_#7RxW9Vs4%+?LMv1>xy9(i_f{w?OYV(Q3vGVP5%a7T3c?N)Uu^81$n`bcWqg)`cPXSru>|IG`w`=!dxQ zmr&Fk{-)Oz6)LDk4r3-u{(Uo~uo=azC)b~wQaF?EqKWcg0urR!9@8CKxQ@i4B{Rv- zuPxv8C@vxrgw3|GWn`y)d@hop%4BW)+*=&isJ&8BH5erc@5T0Sg9%9tD}d2-=O57+ z9jk#R=U7*I_+m0o7Fmtzf33#Rr`+biZFeFgF&9aPtuKaAGy{2*@`gD816ZqJ1gG{v zj{w;wYVc|aCI{KYt=r|X1;lBI3WME>*64{sojgyHyIusSP$SHTy>y=uqdL$75=ytu z9h^ikI2a?m+VWMa4uX6FX|>G#SN}tok&a7ZnCxBO`c0-){>Z<=)NU47Gx4k3oYyAb-Ym$&$VK*ODc$ z7`@;&sl`SoVbsR1r#8~K9tHXo`h8wPi?e=dZ9Uz4^SD~|LY%;$EEq0uJQUDK>A-=@ zb*@t6Z!mf&FARP|J&wRn4$R%8`?sjDTZwm%xOsC6Z1#;g_GT6h@z?5H6+g{?d1-qP z9&8@?+QrU)6lIn3%*WLaCkexhAaynKGPeFQS*6 zHEKf#t66jcb3Ek(i-JQNdz7r^uwwyHQBk0fG|Xlb7cE+ph*PYU=VZkX2UKpBv9Hy( z=eDcg@mQD~PC470=^6NrCndteiYfbFQcA(pAMmVLw+huV23nNG1gLb5%mV z5AB(TlPLziHD$1pb>nbzWm%^?;Sai+6YyAK^zryNsQfyy+!j&tii$NL=S*Lxm`UJ*86y;{gh&dj6NAA~0My>`at|>6 zxe@8_t+q)@p2C1VH<@r7f11JhNnU=+_b@UmCdW^bD<3esT+@gY#2No&BEe*E>c-v7 zWZStrAZ`>yVb;ue@!~c{ua!P!TM0ta5VQKUi}A3lNE3=eB8UxT^&X4}0`#wA#pMp} zqpvp%J1)BnDlFjP!-uf$&3u2H-Y9+Y)lCUzA?+<>>H#eq|JmGU@L;Ove2$pZ4blle zbYwtLUS>c^p5!<(7_u=)0V6MhTq(n>b6A{2G!$d+PU`WOVoFd5kTU{j&Ft46m2;m? z5brkpWnuWq0O;(y&NVCbAKR1UwBuNmpC2qWikMJHglM)fSYdB}bkf_{1O;x5<_EH* z3;UMssk$2b)-yaT0{3$S?ozs58KozRt6y$%9lD+<@flh4^}Bb+L96h<)fBO4=}IM> z;Bb1&odYl_k*SZ-si_SRf+R3ndbii7Pf8JI3dFje>~zOsbk{(`@G7aGyljObN6r4! zL4@-A8-Al7G_?dzq~652t44-&pTSw(iu)H}%MgYs=Qi$ek*{*F*>HjB?iv?(LBoh{0Rq~kD>wQ;vK-~H2~xeo}X3k?m7%g_m4U&?{kqU zcw(xs0LE=EZ8I-?mMr)N$51qJp2*8%Qk)Z4poJjtk|cr(?cH$+l9&Y33nT{(Aq=XL7+I@IP$~1Ix(5}Fw@eSt2 z04F5QalM6dgfUXF|Cra%?NZF%kqvrxaIMb%)d*Pj0VPW)1k3thi1NM#BBM zN2M-LvnCtJR0QYXYC5ReBoO>!m>!Ig+Ir^z{qr;q zVWT3i=FmfF0~O6rx%-4{=Kpx0d7`1#u3JY#0lZD)G~f=QKy-1P03!0pmT$rP!#z;l zA%Q-|af~AVS&c`>_#OJ*(tsR3Az8ocSOl(TSxn(ls&FA(ez7gxT*n@1dok(>~tH~%tcZoZcZvJd#5q;(8Y#!8gg?F24P;I;G#CKB-cLe-@hL> zKnTlkt>{s@OOF2Pg^g0%E`wZTV2#^g~JiS9>iGjSf1 z3p3Nj#rq1rDG94!EklED3!dCA?e&KZZ5%N#QDpj!c&WhKu8w%kGSGWgC-Wlvwz=2r z7`cRn1h+<>VP$4yy!7(rUYN0#q1GG#3rsXsX~;ct2f*Tqmy!k%;= z7Vy<-yoHs3)tpShVC~7o096dU5&(g)t-wv(X!Xr1A!{kZbHhmv8(9KY5gaBD4#}6i zk?QXgi<2$clOnc3=kxyII|yhFgQ7n!VBl+N!IgsKx(uuu(IgQz#Bo6u8CRyox80G+ zYJQfM_~J+?%H1YRcoM4@hCCJ{MWEpop%U9CeX1Abw|GKr3_$x8k1tFkmx#seKO!L( z)$Q@&#$EFMW3m#VV$HvO8*d)Z+h~6oF3(-+yS@MsqvukF8f%O(cRMKf1V7our&UIk zp7?Zj0ML32jY<{9u*pyz^hyEr7AnpWQ&sm1Jk&@#mjBH6-7|Kp2fV(rtwfBR|AT~8 z_UY3p!aLbpSESaC$qpz+^?GFXS+W-h24q{O!JknBf7>_oC0dwm&;eDGi929+RMA(U zYpjObt}&c5@4R5Y#jV69C|svdZMVV0XJ~0DNYEH^ILo#g)CCM+3X{A}$}P-t4+aX$ z&s7=t2@Pj?K67p74Uk>f`Rs{RWs>GCr4m&=jWO~d+BxqTcpef;G-o= zmsX+7g-1D9n(PYj!wmul%SuZR_DAnZ&;c-8fONSwotFt3%=x8uS_wqOQReMu_~078 zjgqc}10bK!PzjFmR!l{>&biMgFq+a0z+2T5)3C?&&uk9)Tp;-{$Cw5oB&%vSCYWkSamSsLK%J*EZ_R#r_eJv*#8g3`AA%cVx_LaVJP=nj zz2@ty0-GF}KGg!tl982l3*7g@Em~Xct;V4IshVjvEg;0TuH%+PxMzU0*x0cTI!;<> zcyB{i4)FE$#U-2mq~TB2JcwF2($0SPF-|-#$HaX4aTtt*kfk@Ed6qz*a{ZdqDXaK` zxnpoW*p0cHW@6C#5GqI#GOMArwFp#yb=y`Ib!Ns8Bu&#+wogLmbmX|cyO7NH={gF+A9X}LHSYXz=`0|OxEMDx}*27*3sSP&IRk#(&?Rpn6gd%+$f=SW_i~@0a6G;wo zHSQ0M>^H#zNw|(}B@ME4uU6hm5BC(4Zk=@!h}_oM`8I$; zf1qXGo`v|Rmt5;=Pu~d)n+UrGX)Vb72f308(-DP(bh5%o*9 zobyZWnoD>fbndiVm}}eOJ8H-+eIL>}fF@}v(vFh_!WI+T#q`jNFVw7aH!iQOVTA`v9Z3foBcwiz8PyVR-BqYyC|L*a`#Ss*Z;vm1;ke(%sj$ z92cK;3y2B_DHy#KI(JIW`{H{>DoHSygeBT;xM!APy{{l#I&h+J_2_!<`6urKL1H6A zIP`=#q^p@G8=MZAKYJLaB_ncW0($Xdv2OTIoR?@r=7Y~yP{cMycyLhpFs0eRBt`ll zqOsyCg|wzOH#oDBIej;NV7M1Nbz@^@vg6!aH&NZ8l5_4bR9>cc@y@^0dNoXKK0hkrkjmHE=kIu{(`JObB+Kb!*mafoyOKa!;b=?2Dpj z73LvDva zMaHHMtW8U;HcXzl6(XdQ5ey8-dE&$g4Cp{omqNt>PwVej3BU1HI9x;ahUOxT@pLpBt%O9js88!OG3 zpQmyh1WFGBboHeR@b#X5h)^P9EeP0!F-)b5E*)Xq8`(Yo_vW5UnEe7a5AS~#(_osY zIPtTGC|JY?iCn81C(DjpT0!pXfiH$!pd{I3!lS5=xz!PzFNrN+t^ZNnxyR*vzkmG3 z9JZNhEMt9b-KrHg5sREQI^L2gF}FjBQYaC^oQI0$P~8<$NJ&^o2Z=M@_xtPj=N?ww_viC|zpv|ho!(HnVxZ(~iygXayrzBXN*+M3e>-Bw z8+&Y~!=cIP9j+aAZM)SkA(6F{OUb{3cf=P@D!<#7;xVuO=~N6)qdf;ddFTJh(VIpd z`+W>g`NT#qMyNmz);h!rCOMMQ#ojWx=dq9XY6w&Hj3*vb95v7s95Yt0=dxX|_D1M+ z1$TNi<6$#YFzemcHC^{SbPkVur!$q4M?fIla$`FTi?8ld$Y3jGwPX%GRiRGz?tO5! z2`XY7eQ@~7w-W{4X521XxcQ>Hdhk?MjyuI)8Y3{5llTY=Xw#1kwRd_w@w@H$)AJT- zQ|4^u5RzyF)x;y;*LaS#kE#*m=;_lwY#!$hPf!v~M1Q!;yLgPv{#ldNzELNUw$8cx z;DHf&j9alhn@mnSB+`bWm|;;>n$Ss%=K_Iy`cCn`^nAErLjPT>1>KBX6&p`3odH}r z3L&il;eH`3qyoZP_YI8$u2`=jy>5B~eg@%i`MQG4FKb4Q;>XmqOSuehi>*Pfk61lg(kV@uOQ zE6uOWd_(Cm1c)kuaKWGuhMv-6sqU|Ei}^$+%ifcfEgpn^7G^S5UD(>(jg-FD&*|T2 zna$E4Aw}-l?Geix?gTbK*6ARWr3?ESh`c=aT^cj@UCAxrO^b^Y9-&{MpI7Z+!m0O< zHf_3YUHjm{A1?fI!$imseY$Hh=tNgXg7Tac)oYjUx?Y<8a8YN*Q;2wRa0CShTS{!S z@2%mKs}``Ig9}?#>ixTL*LSDuCPs~07VaZ6N4{5-*Qd78kIq?rEwtXL~lvwmgG*iFhxxIuBf`)c15}xjFb8CfYN^EwJpPsJ!Ei9IqVCQ%$%2aY~XxE z%)E^~3sU#(@$*V_o1E_*xe$0w{vLXU{>ReV?6z;kFNlX9=@3euvF2|0GG*mg(DeSxp;bw)p9k{P13p z!d5k&{fIOa0X%Cq-ox+}3VbhIl34QVDCcSk6p>sMcFTip&4invwLHjpfRmDAoLnxP+^t*RO%^UUx+>T-YpY@V7QZJFuyE~|a(`=@ypcW*?FQN`du zf7ic~?#P~(R>jm+utWx%bYEApYcFqq&s15*I`EESdDupzAKrh8!3)gFq${N}o^zjY zm+{eAAG88bXRQqmoF%;`D|*~hi=8_EARTuO$D~ zSO1ILj$2utr5J5zw^b%kc$VB-w_HhBmS^XLW}&9?RA$i08jBU|P=>371tVXx0`7ys zEkAtyhs55^L?Go}9P1`7CyygDFGQ+dm06n=CK=BIZ!Pbk^nI$lTV4H)q~!X&j}EdU z$G`no!>5edIp}%iKl#8B<$(u2&VG5Ldc2k7(FMe6g96J7%FHzm z#`k>jQ=qrL4|HALGqBSEV_YpWfYrAV3~re%h;~E#E}ZF9CXs;HmF_5 z9X;=`CWEP!($Fwb(w$c<9+Ob3Qr8=^IY$#VVL;;*4$k^b;jBD8U(>>EKFYKH>J2+8 z*7+G-{d~33H>vOAlK5aKQF}J1P#U4wCpmu9i+1qmH?qIIZya~Ds$9N@tl_b8co1x2 z_o}4f@m5O*P0DFIq5LT(W{?b$=FHe>aH+=cr-(Q?=lJs72F`t3G#?&Y1%v1Z0i>go zv8+BxC=p@M>B{n+C6^l|W|IEAoGd9xtpe({zZUbkrF+KW+L!^pl~ZKI#}*^GU64$L zb$ZH2vpVUdq3J6#(!SWAteA3i`g%FH%jZzf^|=S8ASOslz5seIhjVa@03F{@_QHjB zW714s(IM3XgX0Er(Z)%FVtcahXi|#_XvH0SOF( zJ)Fp@c>dx=7>$jZ{x#0^^h!V4)6t&Oq6`)j%g3x!g2|CIGRG4MrHdoz{(?9$0P8I2 ztb5~9s3Yrh6brrzbW{^zX*KL`FJ!##YFfII#`rq&d1sZxF zL22N@vIt8b4Oy~r(!%#kcaHr4JE0ca4Q!HRVd0-Jh7?Sekae4O$ zdWr212FQ^BfGNL|=%Z2e7*z;$Ys1(2gx+9V7f)QsJQ*udYrd>D1Y%nhA_JD;QzD?cG?z*Aj z>{*=(GMpV1=g*yccw^YtLVLb?WXOpc$+=$0`jCS;nV?z|r??;?$f~za7*n|C4;3>@ z2o@qWng;iVXLknQ`w1+A2Uap=xiT0QeNsHR6-4Fb-9X3{PCfAZ}k_k)8oD$>ldhOxfPgo2yY| z!w{hk7I7nb_HK&r=@?ol=*es(W0F_i|M26ODj$wKBA`E`n^kd`31fa<|!{C%6(Q-B8(gB2$=dqUJ(F3`Ng)*?;u{f?)fcPj@FMszsl|-!303^Slt>8TBL5^Mo#UvlT{+Lfh z87inr)dct*!B^I6S-TVs#X}#NpqBVXmyA2FshvXl)jhxPRl>5R-eKDKwDf0FH6?-r zotpam?Qjb*`Xr*@-cjwf&+3U!U6#ggcIFp*9fCqjmbKXC)TL9!!0B-PIL@6(gf(TK zg+UH!lHE7pXRq0r6Zfws+8gOF896`0xs#6!Ze!+!t2-?(X@nvy`d7Y}ZpgsExP$0` zpqfJY=$_YACMz^vFRX2B3PkxNgez50T5xy3b0;EXqMxTWrdp42^8G4H{mFF$5A}8H zH>;kUER8a(kxX_i%;yAQET~X|6}PWmGI^#S&IubS2n3y6&8P#O|E1i>*K=u9Vp=eT z(b%xf^;XAWlQozOL`qb>=?Jjb_P%aE3;vGeD3^kQaZiZZI=T$^7i^&rj`}>>&hR0f z-7j#6nUU6v8p$KsHLB|++T)ht(kMDRJEs*V&$~0S5z171p+e~euvP+BD$$6%9D9C3 z5_zeU&j&^Os01#^j1*fgrB~3_AjKJTeD`3FeS7BTWO@#c6LE%R1zBEcTUiK z(mkQxVvVbV#;Wqh?-TZ3v7`DLeEs$JR-sKoi$=1}?70L>Zn+TXLjXHcLh~rfm2(=E z-eD2#~mrFkTe<(a;}u2%eP$P0tI6Pbos9<3m)rF=$)qa zgZz~Q(L|S1C(OJdG(^@~j!SZ5=@-gE*|(ft$>jX%H2N0L=0sdHH^cMc-@7@hSyz7V zp3S%{4;yy7g+eZq7~=mpn;G>PK>S_K5yfg4T!n;{U>y0lZQHidjb8OG?)7@J3zSXQ z7u-jR&d1%${vNdQ?VuYsZyx2g3{k~lv>ceBQfP-AWn3g!-~v3*w3#ylXnG`O25aQi zu9$|$Np71|&TFki_qM$#o9+P{fxIR_YC2Lqj;OOt|NRhANd7y!{~TXW@%CEp&HBl# z;&%L(4ZUiMljX-=%g3gzkQ*{+{L$UCQ|v1>QbRL6=AbGw*uQ`QS~PtT`Tw zh`?PiZgNi~RN8scpCxTsF11i-ElUFmz@l0Z=LOENm)5U=T=Q zJ6etXAxhyLsokIG5abkJJ9Q&7oF9;t0I6~fM2^e_a+0uVdgTCWhQFlu)^43R;5R0*ix@Um~EQ-d(_YDvHQr15VZdI83aa~!n5Y;a(cij zD%==)C8O zrpOy&G0VkJSSJRs6x}>M1BKz|m-6{rKf6`ghsU_q!!0WA5G)dwr>@OzVZMbzA%SmJ zOG%Gc<+oH|h(~C;*Kbhel`A@&!h;LHO*L;J6S8Gg$M|z!r~eGwVrggBjzA#^#{+v8 zJv(6edJbQG-NZ-i4_MMIg21yta_A^@&|*-D@6wBk`~X3hgSkEY`aGa4!S6W$bNDxQ zi`=?tQayX7b%(V4ObM@JvseSfam(4<6g`kE31XX%jix;pMoQG&B;C}M53~OqHruVf zGq?4Rr+!)_DaHg(JJHBU6|J#V^-474_z|0i6V8F=k%UWp=*?t8GKv;(XO!*X3QH^c zKAE*t#k^W>rPzkPC6DDUFoFz7r*0;x_F8aY=@YjdD1pNAUil$SjAfMdCqA*47<^+a zd;5&kYHiN-T^y3N^o0?6%6Wv-!z^;IO>xKId~?=bYXxgx2f&!dEyed%jK(>DxrolS z@3d?M=cL>hCTT{$gknhVNJ6@V@b%*cFuJb_V++Nr|9)*_leM3(#&{~Axk!*`ift}Q zY8&I)m4|=-KkfdEL<-r^FVFgG@u0-H{NHU`cGkohwqwP?R(&yK<5{lifoc7g;D{2G z^Hx?(_o}PgZnD|?)?&yIxjGb&cC0v^#PhC4L7fOfL2GwH#hF+%H4GiEti@8VkbJ65* z65W3Kv;%fwS!tXDghS}Ea3v=T-fF66jA*o|5^dpG`NeK1F>vQQ$GBpcp?j0KM7o}C zSp2w)#Xs4cKCv@*6LG)B#roNElxE%KStar#=lCUexCs=F?_6%~9?lw}PiX|^GvA|| zjpZ34RU5w#K2L#GV?GFgSeW7bbC`6s)iAQ_C1HTm$C9nG@_nNxKlVuV22fOT&!jUE zH$3I`>eAudUMh8tLQNmOsyVHhfl68jA?>%3-zI?sR2t(_fP4u6X#fp9y1da3UX&!t z%~!X}orMw~fGp@9Pr(|4#G)7`#_vG;*3sI{K<`(3{hXD%3iAgMEy(n_@2v|D3P?al zAs2OVHerw_k8C7+)R4@8?MBF!Ig;F|83jX8H}&LbDi|aZ8S4K;E2|{ry$Z1tn9Ljm zv8mWf>Fb$8&}SdTeW*+E3m0^TD~;fw>3NT8X+?)8b7I#APoDH9eIYTc65L_~f!=jX zz!h75-+T|lR188r$jIx+n?_*aaNvbO>|^~%$Adp8mEp6<3kKf$K(XZgTMMv?Akj}C ze>pzyPD_TH7Fdd+XGvHAc7>|>a`Ecixk*F&FgMcjwNP**2hy1V5l7D4Od0=u^rtcs z24rRqQDh_9`No!7$&WN&+vCQ+Oz2*FGqVey^*Gm*fEA1DN`ga3ejxj0OhpNfH zgSZoY{^k4Ezni+~jMlai3+AY^WD2pe3-0k&NWp9&xfVntNI)_@VbAh~qE>4HQu_79 zPMf`sefD8~9`)nqxEJuNirouF*o-Q)o0CB>zoh92B`xeMt#>XRk?1TT<9oK=jol{s zWSoTUv0PP}W+U=ef##-q(-(| z2PTUKXoU7Vr(EN{1Qdcnl)JrP9LAbtgQvGn?|NDz)_aaSTI(-imyZ=$_qNi$YyO&K z6ttn7i=U1Fhjna|9%g2r30*_22{CmYPD9fMOwoo4C9$jAhH#t1y=KR0I;)@RpKgL$ zkOb9&p^|L4HmeT^4cpF*k9nM<8UlTdXaX%?A%}_tL~)Na z#^m-52NX=J%bVWS%LWuN2Y|pC=#88gH4SA%rE6;Zj>#&t3 k@GtN1|6?%J#imw41urujhYpD0hINJ7dW==l$Z0?S4<*AKnE(I) literal 0 HcmV?d00001 diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/mat-mul.png b/src/dios-egraphs/Diospyros/data-plots/plots/mat-mul.png new file mode 100644 index 0000000000000000000000000000000000000000..ce71362fbd0b79ddd1424d48763666aebd025ca7 GIT binary patch literal 37124 zcmd432RPS%`!@bTXeenJ4WU6qMkNV_NH&F%J(8JG*-BO=DM?WzBSlsbS*0X<6v@uc zmc4q;m+$w!fA{kn|L1s~=l2}P|9c$WcYVhD^M1d^b)DCFp4TftRatKJO17003T5>P zc^P#Ih1!ckp=w)BhgbGjH+ACw4mrwdJ8IaPIbJrjH>D^UI@(#;I$Bv6ZNFq{?_gnT zBh0^_e;?oW^Nx;o4q^fV*8ltfzpcHwfTWw|9$aLFoxF|%g~Dh^{!l%ajJKdr?p!<} zb5!$6#6a_9ZH=Bex{`a_^fuv0p89=lT=h z3?-d!c4d}L;#`w)t>E`up?<{!Db*0E=q$z+jXelm3`z~9Rs|K0Kp z$fH@y#KfdFX+$M2BO_yCqD@(cf3vb~^1?r5j{DMX$3KIE4gYW77K>F+*k|}*S+3P# zK0XHPAFr=^d(&v{*=SZ4_VmG)$Qwh=@1DsAbF!6=XL+hT7NgbI*VjIKRytO_eSYS* z%a>7}+Kfu4SbUd47(SNV=Ve;EHfmVDFWYsBUvtm>qM*l*x8ha5 zhIDf-9-b4eOsh-!>yxW9tj~&jE^eufSNV*0)!y2*^EG!jzq?%hOpb8_?`eijn>N{9 zx^%q!(SAXX`741F7wTUHs>dqiPNeQq42w!lPD)H{>+13mG%nh{XU`tq$m5~U&%MhS z(^XVdG%z=(S5Z-MTb$|no^BqL*Wv#9(SA!hUS3`qIl1_E$~_g24b9Aa%`2buIW|hp zkI-aTHAL5bF5_o+xa3~)beIy;H$B?%TqAzX%8h%jOSsP#G&HbI4Yz&{SigIpL$@LW zn}|Ffqi%6wt~$rr!R>eX!P(n)4YwY?B=hWeAcYbf9Q-}in4WGm+jZBeA;Vu^6I_%Z z)RjIE2=y~P>%C&17lXKV%(OMGjZ@Nob7A9;HvGDV6-RF)9811 zc`rWlsIlYc&lIQr)614ED|r9@Xnf;hyB~X1oYLmk=c|v!2d$z+dM+LkbL^!!I5<3( zJaETjGO=ao^KIIwt22vp!-lrDK^pPO99pk**)oqfkI6_rl~||pMEpdp*^iG!MIV0r z5bLKgmGW7+u`2c?bJr9`?%9QJFSOIm0*`L;!c@C7yqbLz8N3j#X6w3DF;ZfyprD|r z=c3yquE2-R>={^k&cC~9DQ=^mn9nz7?>}Ju)q8R-a; zG#Y7pA9naM|BVei*LXDIxHH`cJg=``zdkr3f>n_rRB~2mUZX%QZj9t zZ+!o*?KsqQs8p!;?(7k>3l~;uXw+J@=H+F~rMA@z(m3=~Y`x;@YPlfY)7$%e^qoTN zhk~m#sV);2uDG~-`0#;xBk$>t?S+1|O;h;a-iGwu5fKpvHa2T)ZEg9^Ws8)%jE;`7 zNP37jH8&gLWi2hOTP%Xqc+sM6?$^uYI|BL=-+%mw)=4+JGSYVR(dU!ND#5cM-~A*a zL%uGE%Pe!~E>lxhip?3UdMe%9nlHI!>sGZCgQHz}xHF&3%!6g+<-wAk9t zva-ePH0$Qy;W3L z<81v&XWp#D(_fE`-8t6z#e2^YinU zFG!laBQN926pD3YhL1w*vB4j2?l)qG1+neFQ1IgiTb6y-F|5psgFp5Mepc{zQI{ngo^tn0Bbd|#WJ+!M< ztqR$5YGwFA+lukug8TP>O4LawVFtm=GH2-Z1l9aht0#$FonP+JV|1UNPGVb}?N1g7 zn(I%_|IpaD87~$;K74S`sTbF=#JDqGP7P1=*H^zfcldil!~6H|sib`w-oMmJ4w=9P z@JTZ*y-+^X*SEsc)06VZ`jBxEL+svUz1)@Cjgzx_d(Sg$726KR$eU&Fakut|Mq$ zSy`!O+MJ6Jy?D%cv+Z1>QZuuF(*yqyNuJEP-<6Vu_4Uka)~rdvonWxr``$YLjyukN6RFJj+ly1ZUlZ25xx1%&{_zm8X%elil;o=1 z{RYXvsH^1u^!)EiGNMVX5pz?+L2ff+hD~oT`UeKmj&w_(|M@ZCxmNPFy!o+{c$EgL zu`od+-frN#U|_A5fL@N&yjFQxFr#-RZlH(vszh&qphTA9(Be zN5ab5x^>14F?M9UPlF<*skvcsan5$2DQo@Jt5@0j_v+_uIdI?r%KTB4A+a%olkukV9IjJWnCn+skOt&4r#7E*ho|;Qs zd`te~EboQ7!~hJZdzD(orNJNX%ga|EJ$h8sb&_wBfF6UKoLnH66nl2F$HMGWsjJk} zi1qufj2LLX(q%<7pm^oFPDL9Q`f{In&5dm9^*})XCvv?{WF&jVlOy5XrE-lK*6r=> zSJ^$LsW|Tr{pQhqyKi!+`R2mh6tC==({CMm)=+XL9QE*=_`O@Vw;qmGiIniaeY@!H z&XaA!!$F>lGxV5(T5V4xc)Wf*-(w+4Jz=C)Me-@K)iie5{rYhrkm_tF%PY>#9LQ{y zPmV|xN>S7e-FS-%Fm?5L{hTIC4QtBAzypKdZw z4`#Mfat`hM9ST&^4#Y>VBtC={Eb)+-4w_x zK&-ubm4^O9Y3VB5a3SuFL@~|hsvOOe{Y_cIrxUfWB71K|OwtUM1Oj?@Y4F(R+glAS zECSxX6(v!dkzGOtvO8ZhYypibI76lBY=YHyGzzxrM3z` zBrG*7zrEOq)U-eJ@gWB~VY9NT>1+EDDnu;n7!VAOA3H`}mvq)I!@ALEZgQ}s|Ce>c z8|T)_1urkk>kHpjB61g!M+UTs21-2tJ;gA)ar*pn))2(_n+XXCVUHf&2nq_qj~~NA z+__@K3X3}0?Uc^(zS@#Bz@q>kA0Nz*x=!6oErE*{F9u_@FEr(2KN;EDk{38R)9kwx zfCumsx6daY1S+7~jsaWG6#ToVauuboH9x<|<*}I4I*JgGqNxsYdW>EB2aB{Ic1b<; z{CS~Sw;CELItG@gRI7$GQ);OfTFLwvyOOcpDo^|$K71&0zDnxu-Mb-OKwO(BX^wsB zraH(@%e)Z$kLz4on02DszH8Sm`=1|In9jU9_imXN-`R|TBpvBv$3l9-XM0Ko@7)yi{#`$gNAW#=-#t8yuAzQt+m3jKF+(_U{2R#w>nP_JWLxv;xaNa;ym~6lG43& zEPdPN+dmK-Kh@NzQ{<5{O^WYuAfapu2n?PDrSCve^f@-wY9a2i-WoL*KHQRl7h1Zj-8K4&B&^_WxA*~HE+P*G7)gGiNB*WlNSy{dm0G0z50n@b1HfH2ov(81ggeev_DE0k)yh2+C`KL#zH`Ux zskGk(N5$dVc*cQ&ftm>4wA9q2NK+xLM~)l`=GEd4;nPv?*qk!^=MQp`ul>YeBeUz| z!22ghT*++^Jh^HUUqOmtp-3I@hUOys)~$iKt?1){Z2mWI76R5#Ngeu-iF;3q*QYW- z!6C1#d>rviWHJ?bN?csLCgOD2bAi0+V;xgKc5OgWB^zJqq&CY1Z6nnuD$c`*oX_|K zG~yV3)h8$CD7S_RA8Bc60rnTqcI>-``pBre?BV(9Xj&{!gz)I8iWl8|eF4bUB&jXT zPI$k2cNh>%KwC#A)|DM|AKR*9Z!h#j()0Gmk20DsG*9&ReViO@j zetk@KbGB1|T#lL7!N?(Eu=L(jPF{E@)NKfI`DL_D7n{*MnzXpTCMSa!=_wUEQh#jon+0 zxQLXW8c2||Y~6`FAr+~kqsRfb&nzJa%uqg=W5T2&Z<1iFZXjBiXZ5?fLa)E z)nxj2b5$xuL<)PoC{8JSd8fZyta7e#{I(i%9WTb&(WWsn3$RUMn>$e z`}q+dg54~W=Qbna;WFM!)#my5hi!AV53km%7@g!Rqd(dDkqj6SLM_&$ySliT&1U8I z&$w|PJa{m$b-4K*{dQ|J?8ps!&uC_yOEP$W%|e22aB#5w*RMxrtRvlpdjM+0Rg?9O ze0{0Kh_Loypdr0F-_x@!OpxXQ|2g%#cSv$)GOSj(y1D{nbLi)}mC)bIj=UcpzS(#6 zW?$rTF58wI?7+_vha5y^4KeqtIH?&KgGAff)B80QJMJljbDO_ zK2r=Z^5644B~f?W>$WYy6RRbi*^fZJ9W_5GF1pLZ<0qUVnX;#wDgh#VC0kc-oujbr z!z>ewi4}GnONdZC2y93D4`z)&KTLDEuZ9*gQ=6^)P{=gI2`MsA-(yC$-zDMZ<6H8> zo6Rl-IXoB(@ASjptzAmH_AZpR#M(4xhoK@`!!B{FCiWy*if=A_yN2o|Z*J(YsctqQ zQ96r**|z4zD~0=FgpK>_lE~JfW89WF?zcxfWjku4r+}V-oR;OUB;0Er!YJZSkuXMfBFg{5>T0IAxVYsj zSE|phjjMR6WTtm-xF0cqawjgb-j7=+ZHG>DJ7!5MrJuPWo{;MkN|uH1_PuEa{qD4k8gnBAW?UCK=O2|k!%RJ z8ntEpt4}D3?sJ9mBrhr<8Y4go8hu=z6!diQ=ac4^_;PSvgcva(#eI?W5v(4RL%yUW z8fwb2yLjWx1 zy*E&bS>y<`_S`g3_-pUgg%0x~N?O(>qEh{KHu*Kb?pwR;Iet@RN=in?alqZ`Ja^X} zJ9b2ok_0pt6AR08L6K^VfyiyCX3!g7zI{g*C3*`eBi#WI%_4<8!hao7fPO}?y?Iez?C=Z$X032-jdUP8J)v(E8l4*^Snye>2e?AfEhv+bM0 z1OC-u=S*25PbZyu|MlwzuzHHW#nv-DuH=sj-Pq`a1qo!EV)SWcpa3hPSrNAIeX0z} zkEjMlz(I?Y*1EEr?X;fzgWL+pwHWpJWP=TlO6(wrI3?zLQ};tcu7h6a!1iGgzp{@Y zp>vsP&yG*ywnU!DSy}OI;@9=(l)b^dz@(9XpscJ6d=D+tiS!`gj%TCqTqa|_($?d{ zx0wa>qV)4UFh3`RpBTCb%|1G*prGLJs0Mo;t5HQb=CsRKxjC$9KNKeMtaHgg>bRvM zOl3{eK|tpjK0;o~$n;m&ge3D=TK0?R*C{C}RYx7Av1-mf)gj`7D2@U2{`Ku;z7u== z?p?d+u$p9m8$;U9stfPm53;OA^sM95(77aGkMpH}PjL{1E$pZ_9S1-E8uD>~#5)-0 zj?Y2H*#4lC{cCJhk^pIt0qa=~(0UbxFMt#Lh%#06&4p8_#OMKmt}MbDEKWxv;oHXVtyf5 zkOJ6ZWdcCIaa+OF^NksMZ!ilG42Zm~KC_OJ!&8ic?^Z#9R8y`iGs*z3q@=wliBg<@ zn~e`P9z1EL{^iA>@?)`$gaAXh2l$F*_yQU+GU3q8ceYvq3Gq|j^_kXec4L4JPahbuY}A5cco>{5C(UN;9;bR`12Eb2%y zkXY#Syr&bTlfUs5EXEXQ+2X%Q^FG9>|CDKc2 z)g{z%ZT5%{R39@STmv=^pYHy#kNs98psT!elmhz>~D?Lm@@1$Zg=psn851Pyv3 z8ldL$L8#)`zMX<_N#F|7=re}>Kr*re&F`4==f_r!^;9wf4QeKx*?4caN)aj$)m#@R z5;qUocNXBz*0Qtzd2jqvDMFMjmGjEZco)gD%D;cTn%0b(tLC*{Ki$on*71u<-iJmoKx5oWFD3$HyY^ z<{$P+6J+$C?d=@h-QDVWZqA47+N2(RUcGA7Jt`VH|CcXcc9uQVz-SZXk!II^EN}wp z%LF>j&%wdH_|DzKmu(WXQ==6lMAbOCxb9Er<+nl&v)WajL8A$EN?7Pc&P_>WYNlOrX zMe0u27T*LVa1s+O9KyBVy08F1M;e)KPoW><1CYmo6G2-K@51A(2j~AKQZipid@Uu` zeQ{w*zpo)Z9x9t682uecP3rN=TN~b8wtX~c+w@kA@FR6~bw<|KdofOT@Z7~nFCk7X z<}6`={DBiSnhPQK4@{?8~Am#omHT} z94TK^%Z~%hdDJcVI#&P~f*Td^2IxL74$dTGKR{k+H#f%939~LJ!~LH!Tg_2m*o3 zEU1?fC=_ob^jwfSxF|6o<<8g!-M$@)kaO|!WoDq?FX0E1r)OtNK&m7PI%)&RD?S!G zr4Yi+VPId&*M`({Y_1BsX;eeWss<%>6>e^K*Du035NAsp(E|>%oZbmL|@P z=GMO|y|ENy=boI%YQ_L@d#)mvL(r|A&E(1*=yP9V+U+3uY&baRszT@bzVh?eAr6BL z^FlS=fB1&7+l5L$|1>`ryVjAB5e9t|VUT}r6W_Gng64U|8y|3sY7WYEuo@E+pHA8` zplhR1x#Kt8DFOb)+|c%nKn081wNd!bWrxK5VGR~s610zVfQJSrcMRgsK9}+HU=tNv zgZ;;S$@d_SGmvZks~YZ388uV>1R{blBO`hae?ts{T;l=eOOjj_WieS`^!W8SwB&)RaKt* z(a?}e0e=-8ROEN*T!-xT|4D@xd`oRdl9NWCI@t&Fexxr!Y3dv35*P;CTf@^h6dE^^@IeA&?Ad-f*W;_SoT4g8vpf;x7->F zmjIOv+tycnP<_~loQ{NI-aGm%I+|!}#l`Y^%Z|y&u*X6zxPJZm_CtqsTGiy`pl{Kd zo0}&wyO?BL5`4?$#j|tgP7wR6p>p_um92BS=It$L^?hHUDJqZeX{IL+?7$e+NHV>D zCKqG?EmTXZo{^0ujKR+z;@ujA3M$Imw{LshHq+@)^s*gSNJvOrnfSGlWDKNHF!dJa zrhw2<#MEeacXnR?9K;?!Hx04VKPX7cBk0DB8YMeUPR=9pii$G2x?4!#hc55_nPV%2 zq-7Lw=doqCZ{HS^+-h}4pGo}4k&BD-&Jp6y>re^qB&i--^w+?s_qA(y?Bsd7O=~$h zXT$JNR1_e$@rdh=C(f0DJ3rGCWPOJ=>fu9HqyaT#nq{=KPr?3%9dd}Pb9(&vG1I0^ zajA%Y45+XABs>fZwv%8T2~^xKA_-rHgq_5k%E{A1D&Bk)jg!jCb)u%>cYz5|>&B;A zp)~26WL-wYaMLi>ip8T?OhrX)D+{pGw@A3LTpn)ugqm?J3yVKOe>8*`iWg8I=e~Wa zI-X!@zh^n@Mk4S9Rb>3(`U+9^SrNK5Tb?J!OW{K70YIY$MYmW>hWFer;pX45|LsjF ztcPp3!cWx9DVdpasE|{@yM0a8XQ!-XV|#JY=^oS%0v%z9Fc|Nx#$GM}n=DEPmkDcb zzKzMSylvo-*5MFS#7IE+m0eR9>nJRc>Av_#)i_1gbDvA^fS2O{@#NBQ>5>q3#Fx7} zZF3je-g{ewzJbgI>askVY@3i%P!Cg3BvNdTPqGM`F+38nxK>rAn%n8zxPMvXI`6}@ ziZbJZFZT;`NK49$`$D}W_+((<7JknVf-I55J`@&G5u!r|On{-OsSkK8Ba}pJTA%dZ z+EEet5bz9eT@|wzA>n=|YXOBVs4F_Yy?a3o-2mw0LOg61=TV_*1)zhHjo5t*LB{}B zLontP71i#L7=E`}x`3Jl1%_jaiVOrzBI6_aQz$52Q{86lb)i-Q*?q(f>~o)W?2;g| z2Waw(NWTchbfA=vp(v)JU}TO$o;;Ij6Nm(C1Z*t|y3196_Xw1tpW(K7@3AS>Kok*a z3Ic;z^YKfdc!91WTu+q!gwty_$pfLrMAW!smHOA(w1jwz z+HVKaHm{PxFV<`ZBmef@J0e08KJn+Ln*<`t-rU3?D#}6z7^6zYfP_o1=zG_#TdWVd zguOrnE4G6+w}6oVm{&oGryJ=GvB#bcq5h467)Bv#HX=g!PfuTgkj+AL&+9AJQz+n?u1pTF zW?ve-k&?0xtOqmUM?VIvr^@Mjce%HU!)9IP*^;}^%){_nH1 zRgkNV2bA=Fy`DYBp|Qo+&(9cS>NLz0Yd3B3C58_~ZI**JTjuBIRb%9th`tVOP|2Yc zr2L2Sa;2FgNJ`JOQ#lFs0;0{lJHc)2@4<`%wiLZQw1sG{SV#n_z$A76 z3lN(XDp?NH*{JzQXsh#CT=w_W%fe0pjqVzW;0PNOFM#c5z;weCqVPSyoJK4$9*Z;P zD2)lTkLtSe#`nl&fA6`J)YPlsa0%arS$ixV2bT!7aA-f$~NxNEWncs z&$FN=#?KxF({>TEQ=)$UHdG8c%(LtNih$!Kpok#be1<*k;{5bQ$KjT(z>$?B3n=S@ zz~Q%n+9u*Xc*YPP3(3`d1g#ZE5dj{dwy^(Qc6Kx!J7mH3BIuMrEdWl>-X1;Cu4#u?BA>rFvOdkpb@zk3 z)~^mIcgg(0vufW-(lp7XBmv-czkn~{9h&eg9K(z_J*f{H{X3{2i;$&`B9M9 z#xrAGl$s~hd5(SSB<2Q$;U@e5yAfy;StzRZbBIW4u?icZuySEIM}Ge<1k*#Itly<5 zMHn`eMQ^jSKa`XZNnhH;WCx`UWq$EP;oS&-iNOw_BS;*f7~p3x|IAo_bOHhbxKA!b zL1Ku30~A%44S33@fS7Nc`a{Q905+sSAuPRq_G}^iQHL)L6yGp&-|fvy0g6+F?Gfg; z51`OM$P^%rwYWt7dVu6b#tyTL1ZH~A-GY;STb!}iwJ>*g|!}w?jx8h!qHNs zKT zcAJ8)uP5srZp4z%y*Dv|WKqC!>fFD8WDM|kpMCf4cq;{xJF<6io?+}JG!=^HZ=&3t zVDo_`h|3B-nxj}U?LZfR$3*#Ea=sXE*rO@Wvq*v8Q!Yz#r2A(G46Yr>a+H<`3#4{04;)! zy{ZWMPr=Rhz3PFijYVUG0Vc*>l!oo#+=$|QP7-ShZZI}U+>;3lG5&&Vdy4F4VZ#ea z3A%+BT&$)I;9DRLQp5ags)KzBy}}>y{TVJ#thV-N&!6W8r7PJr1)3c)Ek_8V$#4mn7_T)L6?)06Jq-6)hogUr&!ePa$A_F*T!C< zJ1d*XkWJaL&w#?OpLYxQlk0SZot@ng(j>-E8d!vUyb}gKI0>q(!v z6+Fx^h`k_!t)QI|cbdm^JH3c)%NC_aB1aKVY7<+t9Ly01qJWm5stT>yBI*UEyB!PI z@7}%jU~fr{1!l7e0yDyC5z%I5sa_0wBL5ME6Eja732g%diHQKMjM!s}PsCY|xQt)v z`Hdn;3iDkZufh)Hd;>GTt`t}#Rm4_u4+Y`80dSc3+_|$h;)v@ecu3`IB?aHHSW$uR zkOD)s6)#DlP-yMXL-8(y0lzpmX`PfwPe%tbTzw|X^G_Z>=XxflYS$kq_<;Hef`P*t z^fXP_e#=c6HqFFrEYg2gM~4{b2?f7L>(wpTo1^{`HZ$z!5Lbu^4k306V)-Y8n5txb z$u2=a)#?|gcttLJUA~%4L<$w*zRN?pfrDXTVNqCl@(KzyXZ$auqe|0E)MmnfUPbvT zvkdW9ic~W>tO9nR=7=JRVoo*Z%BV`kPR_3qE|+?Lws+U6GSFz z8ypNIk?w)MhjY&?)+4DzkttB5N$fbl#6*dzE4#J2dFsvvu^fKdtBXr@y)Rel?A-K>-C~6CGyUCn+gO zXzjDy+#{&@s3<^cvXDa7ZrHGlGClNeSQVl(Y78R!A_06tI)k6yHunf4K*_AA?D6A2 zAqkU(-*O{A(aesv4IX+51p%Q>I}gJ`&`&{lMfk57Dk=hedSR3yP@jNKR2QNi^KXyG zpTxDIZ=G++IYeBk#DD%=KR++8!(;`D+(br#hO<9t6vW_f^A;r~@cy_mEl1(NWV(6@ zGK#5)?j>a_s*SE{^jEr;h~|Sn6j{*6GD9t?BBaU;2`|N%NZHv zFh%?MLt=u0{=vH(1(WvrwqKae5+;+dN4X1Ge))I-8N=28U<|b=+r%7~)6g*#pphiSymDKP#{wJKyI{|U1BOg zd0=jAkHL!$@_t{wEX|=uIj|M1^ctid!cM?s{?#S&p|Ck81Qrng^c2NMA}ffG3(;E; zCJw$zl61!f3H_IVO+ih8Y<_P@49N@z=O;x%u%5$_ZTUYX8JVIFQL74&l}Wt?cAp#k zd3OI1k9igp6l|rif{aX$8YlJB)6%|gY0;ax=;ZXx!iLPTv{@Bx8-kSG%$RB7*-Yj5 zpv{QTfGJ%);A_R)-TwNJojc_lDQH1$b^vD*9`|g$6B>TBwZn?iP|E_dSe|Qzw-%a; zSSh@ee?J1}HOQ8vZG{+6pi8wlBjK*%<5N^psyJwtbS83}QS0INx) z#Z<+zNB(EQghj|?IdS6yS3SinYG>MPCGZZ_%CA9Z(#6HP$NMm~sBMclMD1wUSmBdU z^8j&@oVcEi?FLvgp8UDlStCG5?YDLu1OiDePSe07?A-6eg?DgjEmQGB@m5CptO zrhrniC`QvsmBLgVPaqVb!`15k&zi-G+UTwk(3q$}fO6g8tgn1G@7q%q}4z6xsl7eXg zc05O0EyguUE-clj+a(~)o5a4zUsd` z`q?u=oy5v;)dHj15tIC*|d(5P#A3k?_w? zSh0kGeI=z4Vo8+!r(tAtV(Wr)7&-%M>!hV?1#fy| zwkyYVXY`-=bn3N~#H6I^Ok4eP5<8a0CahwXLR~-_7O`GCD#<`383Z$?k$1qL&(Uet?_ zV5p;8UFT5cqROv_wuX-vfA3ACY(aAhsa{b?9+eq^fN21t07kbDSU^s19y)>(S_7R2 znPLNaMktb!`HuC0K|ykEZaHJFnwpxSJpgK)LK`qTfTyb=N#9}>rh~zsH0eMG3PKKx z2DO*{Ly-;0CQQ;(f;3TqJLA*KVF79@g0Qdl^cZc@)C&A6s=stTvNVG^YWqaV102V$ zBd$~cV5>eqR8uQZ2X$RW8H-g)|97M5o0i@C_N~P;Y7jB%+RK-gq2^?<3M~`cO1}?P zIqvNf<`?Z4D=5UPyLSEhXQkyi8u#~{vIBkUXbYc$9|WW#M)KRKRSir9_zH11z>Eoz zOAfMtA^Hp9u_H_X829>baV>pdu8}mpWDJsJ?IXa*;9Fh1Gws2o#l-Pse$l?88z_dGn@xq~q(WbDPltGzgedKkfd} zpNU1#NXC~va%t7{4RGE;7wq}6nV0Gb7)Wp2&NFzgvVSm;VKBX0ahswXeqc4U$C0tI z57?8$#M}gTE~54)l-~au>}gkR`iQYgv1t|~oDLd7dcVD77P~x@$go1%ME=F3VsVwr z?gx)@T``~0LH^40*D8X!=;0!dF7*S#-TGp`u(7_|n$_*h?9nO@l~)yPK4?Q&l21z% z0i^SfD9hBMWmcTC`78GvMf``Gs~TNiu#pp~lj4QKISPF)BpX5pN_we8Hle@V?trwQ zij@UST*tR#d9}JvJuVumYf8e}r*e@kRc1&E)9C=f&)x*6aUp=(oOFTi#+fO!ffRCrM~nfDlW z>@H}Sj1-h=ZjsFwJ@B+P%Zmhn;t|tqMgu#$eMD1E9RRiDjgX5L zLlW#he*SEJX(^e+a6ROwyA2o3- zK0llJYHSW#E%BD|oq4?+o2?K9-N@W<{=Z&UKj#V+>`Glz*z+V#mJAc22Ef4tyy~BWt8VXgpD1>p6Hewat^3@mVPw~mi0IMmG^cBy*fbjU>UJHeG*>e zFXI>gubcdFmH%*pz%D-=F~AyrMtHr@>z1b+W;zkq5!p8e{X1vHe{s%$-ja?(_v}LD zt-ugFdmcH3Vut^s8bZ0#27aa*q8CYds5}X>?QvhTP23-tssJE;XlO_qW~tB~QcBKh zvo^_Y|2qePLJfd=lIySl9))OEq&xC-vR)v_jHl4a5fvdvdLG!ZOGtU(xe|&+D9-?O zzND4Rw>}bS04RSSSWJo(RJ4oO8R!pxOrFa0u*ae3psVj6Q+w$DyJ{xhH4=$KMpo8h zp2RNJRlx7ld8qCgu;c*y-~A=@*cX_j{$1_zES@YF8Q*eL z1VtY|wgI`840CXBkbW4HZQhV6IS^oA&i!bczd%PJjNCrgNh{LZN6dsMu|~1aIsh$d z>NU2{KL7UUVJa_6fPZg?DQ2;+HeL-@udXSKFBOIODL~bBz*7KJe9GoB0D->ef=_bZ z>`G7uGN86nL3*#Dk~ZlS-9XQzHp}pek_Z7!rY7G=u91(OBxUgDY5O*k{1D&qY?y{r z-!j8~|HY#F`|OLNwpY}5c!(I_7`Ly!ak7ezeaa!C{1OJ?YE;r3MpK)uG; zTI56mV)uod@}YIfpMGLW0tP-pxQ{u28McCkg4i{>ub6rSjNehna#d*9fEH+oGXhf5 z(mo&^)aC{$#G)xVeXOIiGbAuzsqo%Lr$yO?fL-=bm@o`!WgkB-gOfno&dx4->SjQ| z(e@8ML{vbRNC%)+1JeWSo!)08%wV&jpCF`r`4h0aM0lEzKt-wzOpFTt2C5v6Lx)(Q za^j?ft926{9UV*?H^z>V{A0q0#W}wgNgC%_@Zz$5#3X?-h4iCr;?s_DT7VM^S~ssG znCylKLLJF_;yY#G2L%+Bl$H!Z;(r-}SY2)*!$hOU3|@+ix<;CR+v$?ESxe8j3D5ZG z;lrmO3gLK;Oyk3S0Umk>1*!OM!#O7_KqOy-a#Yt?US4j9F!1+H+V>vzI)IEQ4d|r` zxDRTc7EQOr6+&94DCFiz&phsxQgFG|-4DFPdzdINePLowd*FU2UEck_EtbOC4!Dz0 zKnKBvO?TaSb4C*m(Avag?K=`wt{ z43z;`X?PQ&>KYN(pO4M~CkndHj$6r-Vt9O1`0ZcGEac7qtAlLFB>q1;CFlQd7yr+9 zZI06dFkKWJ{A4Tl)x<*3xoxGe#C}~AET*gM*v)hBuz46eb(emPgpbTotsH6wOl5 z+#fkKOMb415BMZz66YPDv-SO#FDHz%u%}^k@GTT4yPN(d_p?~}PRQo^dtbgd6TiaGjk#0ChGYA{f5UbIjP=$2w2YT~D zF1(qYHB z<3A#@G2->C8ylHax-VEmxG{3f1jOY+OrhE<-9tFYLL;k+Xui94{dworfTKa+cYTlF zAx;iBr3|2uf*A@N`R!DSlI(3kNaFoV@n;Lx&Kx1HmM|O|D zdAhIjuQ@L>$}1E6=ien3)V}!XNs{~|!@kpd^YTYa=xft|&ahYSamymAFY`97r^&~<63Rso<$T=lAAA{l7r(!qu ziaY56{@6LNSz~}=Kswhz+*+|}RjtE|KZZ*`6Iz4GiA7ylgLV=orW0_fIz<0b%daDc zmhhm@29mmTWk8Kk=j3|9CO7uNyz(`ClnHK_gCQ>8XY7a65a8$M*OcR|QTe6CVCj** ze#KLnRX*8^gMC84q4f(^|4CU8`b=&Idc~QUKgdDQ%Vmhz%_Mk>lLKc6SQNr{48R|Z zC3R9)H{!{?T*H43-tJ9abT~iZxQq;Gg+OD05RTAro*9dMqOriZv>A7Cjtw*jGdcf0 z`W>A!77b~!@MNB-;%9wyFF|xGe1@L!K4hiMR zjP!5so@1G`K5b0(+#F2nJt(MOK+7SkU^yF^oA2JO98u+vb}($Xkk$S`@WIxy()+x- zvHNh61pE#1&~Lkdo$K+D?;d}RKZp@|h%isbSHHV58jUbfic`>JWb)+#Fo0bcow1po z$Wmm=`(4|%ZL2}Ip!4E z8Q68PZi|Z?%cc^{XkPcEr==Z(tCMtJQiv|ChKg#SHJ=?sPPMCq*WgsDIgM9LOpHkB zSa(q*nV}o|_K8Ue5DWRO@D>kpr;umcRczdKSs*g)gCDQ7K?X zqTGs;6Jld9;FtwpxP*Ilh=9HxO1izAoUnWZ1ACQf>AUa5dT4kQG&p5prlB#_syUpA z*4dljDB;P}m65?IHr!;)089cW(n>)n-LLKDh<=^hzo z>jmd0g%JbtY$tqZ4K^vbe=>4(d<}Df3R&-g(&cd7Y{GZJ4&KFf3hkEF`$hOD=>?5>4NKi1p?nX`bYJ1z zIeGMPc7c@QH^j! zIc%;3F6mg$KZ|JyTf5uKMR(m)z0w)&c=VQIVb$t*#U{K#|$gGla@v_L%+ zH1-vuYeL$KHF$q54F?7V9*ZaM!Rvx85twF$l2|9SvZ}H2&OfHjX~%@cL+E)YKu(y> zPmij@8-;Kc41bx1j0~I{J3w51WqDwUCG~S)fDd-sk}D_5%voj_4uxm9k$prq47l(Z zEKPFe4dfoILclu()CsWwfF-jyj^zPJKLz-~y50@vm5%+jEHm6dG^js2zB)nenXib| zcU3qsfZTSLgxw}v3+X-fx%>QdDmerL<~JB$4xJo}<%@#zC>C5}4bBl_6*BQB2QQ#g z<+*qe1Q~g>;CEq}qdRT+V~hk`c7d;LTlHqBqf6a*SdHdbFOa;&PP57DVaQmD15R*( ze@GR$jvqnh0pP2Darnu$!;Zb%K@Xg+8?Jiljkzm^gRT?oU=8y849@8@LEJ4f-;O~z z`^ydP!8;)LOYw82U`&M|v@$t6l$e6?AWdjEb73P>FfgqGOfE6#{326p@#A&EEgbLz z@6#RAneQHv$_Ij9%kBIFmqr$1HF_5n@}_^T>I$*})FtUN4T3upQ$wEnyXTsFX3+G| ziKBS#V{(e2Q<`FS#I`=$=Tz^7Y+Hf|y#w6?fI_hmi+VU4jYK%8PcofKS3&dbMY;%q z%exrTM;AH$2Wl@@d2mwMzVW{!l;kcxijxTt5^0OWd6{?M5KsUc8Nz7F6bzr#350B% z48en_ux=eE;?zC#eE55Nd&4@l|73iYQJ#u$cyYL3D@;Q%aNLqrg(wqG+Jv(fuz`Pe zb?rQt?Z^~-kS6LCjwyoX%nL2)K1zRGoamFz$KE(xTefSg`{!2Z`>Ai<+(*Va5zHwo zQltex)FaK8r5c*~Y(ko-IS+I*Z!Kas{x%swu0P|K&4Sx2bHQ0qLrHC`>_TVJC zV4R+H63Ych6ADKFh@{gx0yvcjCo%AWhSHin29~!N^u$gPk&qdNec;~EcoPa;@dOr2 z7y2uLVHiB2u6`G;dPUsUettR}I1`Fxra)p)yXM!dZ;vC>sqy5cp*28LonZIx!^!jo zz6eu{_|NX{UC`)jwFS7j<#8rPG=to|urO{!=hC~iGd%9_%BP`oomI^7xwz}3;%#LI}b7#pGNYhb=yau{N=gkhYZs=I))FG=5LL?;e)$?mhPv(0Tmo z7CZWa`B9FoL&Hg+6OK0&Aeai@ikOkOMzT?7xu!lo3>Wi$v(Io2JcVbs1R7|FDQM?0 z9BK%3CYRA=Jm>zXL8fKH6lpeu=HA7a?@tyjCP|JQZx-kV6UiMY6{W~!X*=~{_l3y} zA^k3zAEg_)x^ss#-Up1*jotkq+W=L z*|C97du>6u8ab)-Hk_gz2tOKi6QiR?;a^b0@qpdJckn2CW506!K)X68(n8n)Yc@Cq z(4BADl>vzP9&H?O8LK{t#IYQ^(1DE#=a&dGW~n4p=%#)|!H0&1LmD#-xeE-3BA*U7s@(PXRS{GOaj6RfQZkI#vo`qkRukMfPiSap z8o-yS4sZorG$#PFUJB~^@PFQGoTnlX?lOm<`HkhRrn}E0m(4s5o48e_a6HgUMH_xj zd$h=Z3Fh)cMZUB4;cEc&GY#r185ywp;Rqf74s?vHqiIh-BO3)u$SH z?V;$!XmX;=&!M3wL;F@jy%R#?&PDXz_1d|^0LgRa^>aK&zzhR7Hj@S}76JJKvbhc? zV0aBOib(%+dZXqPLvJWc(#G=#nV1kQ`kLQmR>bR~mdS2_zaZAOHIH;PgGR^J2gRWF z?7Ip;x+#f4CqwzvLg6Y>Km|)uD;x7%ZwZ2(Osx7B;5}2=#>uV&3sILn1nkKR@{Mg_ z7n15;Z0wEb5(*sM<@6pIwdyzPSl+^hRLa+53s1#xpmAy%&K_B{Dv(O+4PyUoxYJAE zKo2oZ*3YlF%gpf`o8oH_?^riuJ_H{eJ@EyA?N7zrsbsx#a5Rb(rIp;6(QELCG|LD9 z(a8%xFv+e;wNPVf^+pF0n-W)BsXzF z-SI!K(d1xy;^H2o=|S_}83yN8j|*Ia;WARY~fgFmL+N9c^9EDlp=x3#;Tuy zGXI-ZRYJNVR4)(44wApoc)zEFVa*Ta;1|QL@JIpGku~?2eIv#j+gygV;9%DzBwap4 z1{lLaBx88u7Ze5oR@T99Po_XQJ+>nL63!;VchQV=x4ZcH0dyqZ!=^q7X)8@6@@Zz| z(=Oq0cT0v_c@)JX&W>2nW6`$)1MkBsi*K=J$1EV>3gJx5Sji2)`wkMFBNP~H@gC@q zt|{<|IMOkNG!KX2XIoe+qL|L8_qGU`EG`IO)w~^q^OG~dyYUXzYiJLOlK)@mLhQ0X|uRfJ-^gh z%#;A>5IUR`$u)vnW@vSd0l!+Zyd!&YQ-xbmxYFZfjGj7r26*)Iav;y4f!YZ59qL^5 zG?Nnz@Cs+oq=^ZW)E9RE=6j2loGfzkAUPil<)xf6CV{iqrEhZ zoW4R%xTg^R5`<@|_en_T<;`?&zK9d@z><|B+Qgu~ z_}Sn8l0m_>ziw}8N=jAO4l&-{8s>Q_l1SjT@4?M&VrFJ$J!Bt4ZWvW77rb<8udd^q zyAUDM5_!_umOB2`y(eu}I(VsCnuBKrNB5nDOUM)_E!BX@Bi;K1(#W(prXl8Kp@$r2 zsK6$1RtQD44vZ2zkT*h6hbXA3-XWU-@gsy+GJj$6EG!mkdO@7F%ByblUXr1){zoW0 zYiy`V8p zoNR^L+@_i@<7)(*l{7sB_Y+J`Ccpw9pcZd*B0K3kK`}ULu5ogKUKpob9KXGV2Pph> z=mN~#1STQ7mZSkM$b;Q70g}e)kTwbW`FUt#((9Q~&xCI^nsjH78R**34uHG^_C|FG z_NI0NI69L@^Z6YJjM`-Hpe5=KcFdWce+K@u#%3b41|0Uv@)U6@cD!V;-D6L6Y1h>?3U3_Gwn)>qH(N?N2tv=J%$i&MacL)hgf86DE| z3G5D2oNG~%oxeEyk_1_tN0v}~qE!~i^uhtAa`*6A{|YcCUBGT*a7B|NCr~CTV2yul zYSQ&sn9yQ!)+h!;q(M$_z|0C6ee~xmeUR;c$aOODy00%eHJyZ0!eoHOjopCkL(C21 z0bnVfqZ3$PTwJ^pUZV29Fc`{|slBXfRNQ0Tvh~l^xGbupedbXODd=xu3MQZmV0s6j z06_$p4m87dy-dziFOt7;^JZzy)74>1_|dHC9kTFAxcQd=7y?6A23itGQX(f@d^v|k z2y&bP`p{HmTHL(MCaTy`G7E(N0Gk|(eJ13&=+;%7bOvkTV`ZfZ4lCsV_^UyyIhn5_ zSzdsO`spL->dd?lYwn@*tQe?mFK8XMerOuXNK(7tvtP>;jE9yb$1*_h(Bij2+oR4S zsz9eNsG;xtcDI_V3v*ASHTYHf11HMuCewmm9C;w{PuNL79ekX&dbiMYdKZEUkFoV2 z7)z8FTnMuNqq8#)%Q=7F{*AHBFvwnGYZ2K}WNFBZEG=V8n2^koX)z2cYiW@r zMoBX&q7o@Pl|&_y%8ZJ;q(wy~p4a93eV)Ib-#@=O=9ptT~w)a z=l+WSlxT0_@(xa7k_)$oU>^2fN-!qE?QD+Y=zzpYm*CgiZ^Eq6Kq({m&6knlMo5ya(rIXue_DIC-#BK{DP+?xm5B&vRq z1*$HYq+qCCT-n6_0E3e+>W-IoeZGWcZH;-+mi)Zi6geC45fghgv^-B4kU%lHo2Mi6 zBAyRZMIKBP)1XBNdC~w3g0OEeFw?i-=3#c`VYKOqK*)aq;Bz}fOWV-irWi#DKC<`cpCD`c&-1JF3pt>9hyRR0nS7qzyA02>tWKm zffS`_WmwfcYRgfH36{HRfK+ZxmXf*CY1&2Gz?2dW-Mc=DIl75hPhe7cKR znK4VQ(9zPG?pj?u3YqyYM(yr{goB3;!SI!`dMJLZ{s^H=2-nNfJgu>o)26+9(l4)` zKjlL`bN{522^hM}YrbdOo;MymDw3a_fn$&zBxH~N{YL`Tc4sJE`qGbJvKelKfJ4+b znaKVEA2e})4l`9{wesG#)`b_>92U{9Tvp;uAZ^X?#eq?dq3BKM`S#|WwOWvDR?qQ> zr-~M!DSo6p@&CN>XC5x?UvRax5Y+l>&g#kt)gJE@Wt7;&&^154BH<3|#|*00PGDDmT`+r3 zMvdkEqdbjV(e>MJ1DOtvP~E$Zu!&!hpZ4ZOk2beltu#23?OxsdhML7(*)r@KMEs>) zyrjxl0=lEcWp-gXfL#;gdXZ?|<)`g$V8l?97VQwp+N0)L@b2TfSMTpWq`RpwGIhu7 z@PdnF=FUgDZt~m6sA8mvNrAc>?Sel$2lhKBBSi_Iky=&p&BMH=Jk8dq;Q1w0?-Lz0 z3JSpiW>+1mWRT9=qG z_K3Sq`wkt&8+KRzup0@STXk7~mPSb^LQCu^xX zn3raK^#my5HGPg#l# z40YMGRjPa@Bf!vlV;rLdUA&Z>tYd5xG4y+zZNMfus5PZNXW-I=Cx7e4jcJv;x)&6A z3dqPYFsHiI8a((15Sr1_`89uNeww7AfuarVaNpXRnnMzmfrQFHSkyu+45he=wO%Sc z$$59_H2$3KljJ$+Lw5$$dXr>@uF}^oEiAstX%u)NqC~?M*xi ze>*En%YeGid0u}!u4>|t%exYjmRBKp5Fgr{5<+G~{|&AiH(F}Xn_Jz<*)_3*Y$&fP zL*-AJOsW3vQ|(qZ7ZiB1;=~UK#LUFRTZ`_QG%i&eHXtK;5V;PdR2z2>cX!E3J-E>n z8$HMl0-*TDFQBEi1n*@$7+#D`zT+cnG#!c`1p!k_<;PYAWm45DDb$reV1 z7E$sVlppkjiOe7Wc^*%%N56i z|KM2=ICr`f&?LCiACJ4N-=b1w^F#c~692dT+}mJdVx}xiio3ILj+Bdi`vxcV`I#O} z^>54JcF#i3GAlAW#K0j)7{(_1ix6DW>HvTat6bBPA26VaVtTch$fGhIB|S5a;--{c zC0o%Pwyj&A(C|(Ta zOkgP5J1s5kd8)_KFAt7$>rbNc^&S?Npl-YyZ%m($q`@`zgYNz1s2nh#mOM%f^SJtf zc2o=VPro$xuxa>=HKsNPE0~hc-mtK+&Iw#w61Zd&I%-p%S%6WzbET1>h^n|zFl;l@ zAq|At1PINsIK@hHE-^IV)T-#};IJxjm{2}Fe@cAv07^~4<4?g#j4A3oAzs7AjWsvJ z%>W<<^KJqWn<$wmn==|rArz<_otuo0U0A#CsdGVBraH0DUw&zMuto^8^GjtB0 zJ#k#kyb9k8;ew>RlG&Z6#OSL1B(6N(-fg@{&{UN1B?5TiYPeSbrk54jZ!)6R9fUse zF^lZMFoN_3!tKEZL~tuJKCEqC{e8pu&2OHnV}0hrf|2ND$?_)fCl zl-|b%RK3R%XAOdnqXpfCGp%iG>?7FpjS(Z_*KD(nT9%0QxxBfYRf|irJVU2?r4j^Z z6{IG%rU*%g0ba3N##>{k`w2^?1dunskAPOF`(kQ)o^=Sef(F?5ZwjmCh%RnioMCi~ z6y)}5<*HdN117mlMqNGLr!m-jGW4j+oY7z`5Ua-Yvy+*I7~tfz_?{2_p&9z7_9 zFY3L)G~GeWbF)+=c68$?yKX4fa$CwzcxpnB+kg#^D}R1}j}~ovA?iU5g^oGh%Y{*M z*G3o0-k+QVhsDb0$rt1fAg=kskMmZ|`Tj_{61Awj3u6o6I0wr`B8;1(qGImHk{mAt z6!m>T?x)KsM@&Nbu1p?A;HRAx9$RTdB!0>;h??rU;uIZDLylo!aI?psnNP$#R9Gp? z%c>Di1|s(I6M9&=^r&@pL0(nx<)!8sIvIj30X8U&7A?BS1)0DFL35)Ebu5vhtXCy{ zcLdL8Nyi!an^Nl=ogi+orZE<8NO=FpmtL>oKmRQ)jW}F&*i+TE4V%{H3ZtBR6~?hP z2PZt0^we%40Xdn$NM798GXnl_=lO{ObvvU?Es`rFywlI<1NGv=c^J`+5cUT7xw8^6 z>&XBl92bPi@=^h}ppVjnmaS!XlGMe+qrQ^r1_rJd&5~8Z+CXn_i;>WT)~x;VkKqs% zBN>gBtzG>MGUn(M>Xr?Hs_Sb`SkW3ke$kpV8WMAQznuC}85NEd8e`}&2|GhTef?-T z1(gZVRVT~`VJ$N<>m9V01ak^2hNcdamm{r)QbVv<#jkvIDfLYR_d8^ueYq3R(r9^I zFx(qlU4t;*T5CPmdK^<-ri=@Z#K$Qxer}1uMN@m*h*pcs?5vo`plem-(ca-+s_C0i zBorVg#VdD>?P`oI*o>l|Ip>tUZ3)+AAye>?+u6_Yd+Gu>ruxgC?)RKkMerc6?54YSPB*Pnd9*p$*2D z4%uzBc-yVvw=~x(H7Lt~t@;dYiXEcRM^q#26x@KGiki~&o?>JG(c@N-q!XP~jpqe; z&Ir{D`cJMrn>*#z7zJv=n>jf>n1j#o^lGlxf#}0JEHtKo%b-xmU=W|TEWQ#45fZ{5 zd-ryxV_%P38)g8NlNX!e@_UPxPE4akdd9E6w7GhqcrUe4h))+At=fVEd=GpX*m5*9 zlsxwx?t^fb^gp+O0msn0iK~iNYeQd*ExIVw!h36LYj;n)l(#Wl!enLr%=?y3E%RM@mHGYqn>>^g6e&m1lTd$Y0S zBy^@ylOO!n#lkUuZ^|Fa%`OYJ65sOS$JQ6-vR)4yowFf8WiitoCod_#PFph(3hqQfaa&B>S3) z*o0CKwp^Rs?k9l_^}!|=l^JePc}5YffByNWXf-l+8CVU`tcv1*%0*UzAd$L2lys0T zn#3H50axwKO+9Y80FVpwdG$%4a?rn2u62BTzV||-FEip8r?i7h|J55Axx7Q~7brP@emM4^PZ`G|GDG;+E*>5*rDQ zvYrHRdk8H_FUno;Z^IC!_Shzi7IN6_Cr=iGCm$PA*t>*3Q?G8p8QaUxKcLd|=yPFo_Jx>RXwaBZ^vt!{;muwIPUu>Qv8^>!reU zOYbVm10u`IZ=Q5(NIY;$H7m+y? z!#3Ez8=|ef_SkM;I3C8Q*F63!@cvN)bGjfcPHAt+@}9ry6w2m~FZ_vBd<1ziwZZaD zOF1-D)Tc5RbXQVcm_c462>)(5yQk#d2TjMWvgamT>XbUQzjUBbJ zhtucxK-XH3;1KLTB9e$C`Wv_lpYU*-#AVNeEGin!ryQOE(^2-sUF*xiP_T7Yr)Y(n#E2IX4o8*kkUthqmpQhE2AAr9r)kFldKZ zP!wa7D05HOMUEHem8O}Gn@W;eElNHcM;xE#Kq&-svv*wORp-J{RCDOSBEY8%uZ~#( zoGUMQ+^h%k5IJ#jKhpz81Y-m3d*#X%k<-e=Zo_$L%|yXnU+<*T`r+k4ky*ef<^TNY zWQ!|z&9LhGi#SeLrFmWzoJ`H@VsTO{h`uzhqCxdJa3J3_5lF$ID1XnJLO4h%jZWsi zcQPaO##1Yfo8|g=)(HS$4QT-d&$bGRzjxWC%$F*=DQv91`xzc#0niEgVoa8ciXVaC zi!zA;#x+JmXB6*i6J-%yZ1|Ijy}L`S$x-^_KBv z+gf=pF0?lTnJES@HpwE2ZQLSyBFUbQE2;L{*VeQa$FTu}6^ z!*-9Qsb|H8Mt)c?9gEVK0XxP%bIlH)x;S=buEcaxPTXyLH>DzA_Db0`R5m3FAw+`N z8x<8rg|$@rGU0k$@=TsYozJU!1sO!@SQM*2r}eJ-sg^}gnk2ZP$>|MOAkBXOJG7F! zYbPWn*eu#;i)Cq&2(Mt{b;!a3yh8N6*|th7N>}tp8_+px|N}<+FG1VbmF>bce#+ygU-zRN z*W7ZxeM5JZ{nMs?e__q3;yVPYTaOgxj3lz zb}(f>AhL8Ik?Eyg&i({u`O*0;blxp zoEjj>gO4cySjGfeKVZQR(VdUJ$zU#A1Df^mkqNw?^^-7w+1#M}U}| z>{Y1fa-<+H4|TrSIGa0hn6ox6Ygugc+*hw}NPEb$716a_0kf77pfWueq)Hf;{vx1hlQB_eg&h#}(us58jnA znJU|Z`uW?!!$N9_x4|zfSIXoBM5_iPSzzAi)q7^+3t%$2ZSwn z9`=#FJl@`&+#3s4Nf9kuG3Dm~wQp%{HW14`qOq8oqFZ{hQ%Y3%HlQOhL?p(?YlSv%3Um9=ySUcTH3CjKwu(+u6J zD^?G1C4CH)O&gfn(S2{HjkYzkaYJ~I0hZ0R`}ezmPtF8hl~tw0wb4cD^Fh>h^V%sy zJ2N~z5R<%MrF(Qzkc%mFS$Dm`dbulZoVI^Uq~Vm^FzCz$NliRCCEsl zVBwm-;)V}L)_B=?)y9fEW!TQG_PYoF_YX0GnR9DvXJp7 z+`?}^d$xo&>+JdEuI1ZM;-hQ2x-@vl3<}fHFa^C>-jj~~hTvZM zOuI;)kTPkd!1{P|4e$FMi_VvU^lM3z4@zc2?OckSF+I^yfGT;BlvGh0#=QnHs6)P1 zAe2`BNfz?b$y_KEHF$Y}q#JXGl6&-I%K9@(f=bSSSWN~i4j}L&gdc)CH=(DGq#pXR zB?8b>6xM5Ek1mh5^_kOR_8@e^T!Q7EaUQt{^t$}nDjN)-Nc6G2-kCXpu?*SL09!mc z>(Gqd=eUG+l@qrUI^ziVs|Wa4m(ETLJ8Kkn^te2=%S`f9=F^J{ZY z@5da*1P>>u3EBQBr!!e&MA_<=f{<$IPtbQnG@W26vmg0mCY6-I?mg*mJ-%o9UPzMX zm)2ICkIB2K=bh5DGbZlI@q3}3zmrzyY;M-`xM<_L*QE{1(J|km*8)Xvbu~ zZTxP`itU2Ua5J@-LolVYPWtQ!?lh?=|MtIOjVy?LQ5`=1ZS6(`!b^6i5F1Kd-ZZ}8SPQ#$Ar3kX8b^kMk$(K6|nP6ohKiloA%K{$R|Q`@E2pt33h|b zP#oGd5zTC&KdoGP*D)*=NpDn#O565+Vtz4?=d_B@Zuu$AH~Q61czzl$>IZv*O{2AK9qXiK21`on57YjhU>dq3vEz za_FKq&ZJ}mQ~!Gq45-MIKBv4p+ksGDh#Ms^oeWE-YPQ{yYcq_63g~jL69#d?jH@>B zMP0-75zkQ*6B7yWGRaFU=>zxtaOr3~rzopsQGCWl53=VC!o?%9e3+x=)$B-~miVGz z+>^0!GyfFRY`QaVB6Zs>V+|rMXFsO=pT{beGQIv7eMs(r%zxD+2D*?hzd~Bdh%rUa z@2n0qt-M^}&XJ#YdL^bWV&N~Q!r&>dM}~j&@|ukZgP?E%xd^T~vgsSk`QNF-Uvf~@ zlH)J5<~0#PC9z&-z*HQoC3=gmCa?M~he#sD$n5utX+9{UXR!+cL(AH%2_@GU9^}aF zMq#9HZI|&W-0HluvZtSQp#8LIJHF`jxTRnQ7Hkr?o7m5ok9RbPPXK70FT-rusiyO< z^#jbO7uT;^&~4vYqbV*xARF&a$y|ruq^Juh6nir$B(2Gu)9Z5svC}ajYDo7%s@0-D z$m|el-_O5;YHM7A~>US>&bLVueo!xM~1r9da#f-od{(M>LP+L!R0yo zVT}#B5@rKtf?^Su~(0R&xepkJgb zqdw^r;g=WJteTOU=aD6lqEw%TH~%nt=1hM5SEZww_aG0HTktAb{#FY8N?=E+ zNQJRke%fSPC?`6^aMNEjfE6DTl9<2{XrIbnXF&Oa2Nkl&(%o;K?o>EdTB}TOn}EBL zq`%aPGqeA4SIRsR9Kf)w{^@JEs<};y#_EjVcoX2SD3HWuRAsrg2wSrf8_g#dBPT{_`#IzkW^~RY=6uLUVuyI8HCA2K;Y?;1tZVI=Jhk`+As%?{=(e zhX?JO&7Sj=3qbhdb~OJ0aARNSw_I4d$`@A#Rd9A?EC?JTLVi18zl=DnR*CJuMFpZR z)>f#fWbJK7Eh5s*H9<%U@??$x_%P$qNr|Z4x;@#N_&zIYv?T^2ayXVaXTEi>xKU3H zuw<1sq5qJ}Bfr@9)mP%93uUGAlBlBZp!Wn}In{0Oe7E?>ZYN{`?TTN1@e2!e*99nC z(A@J;3qwF13J=tycehgRjp_`>DpD=-#@cO4&11XrLuiYij*Mvopk~bHSPPqAYwDHTEnQX$*xi4MwSbJ8a{?Z3 zP25+ypn5Dtkl6fQ``p|S%whmR7h{)4KTqIIv^0J@$f!T7>P6!vL}1H*2L+bg{aMS* zxTNzb4ArJ%;i}@{u!G?qWnD8hc{~aviLQ#E+CK-UbU~paJ#=25d3I@@f zQr;R)jch*oML!}4KCW4rKtQq=8@l57+Yh@6!sd>G;gCTT@ymSO?nqHV-mErq>j3n7 z0+b7aBH{}Enz*y0-4V0W&uPz4;dE#?+ZoOq51{L0*W7(w#?bsYvW*!z!FF%N;_Yi{ zYxRz}GZikLP~vt&*|m}}&NG(L{?|*FUU40TxU>t>D+FD?aF5TRmgN8Q#T4W_$con< zJm@O>+?XV+51MUpzXwwDfg-j8z+g|$ZBhvChz%pNXNGrb%@1dg-`wM((A&aX@dM2U z={4N`h8N&ZK^(kcNJ-G#qK+e}3kAQR@*R7|y`oSC(NL-hSG+SL>eG9kzr+tKW}B|t znista?7Q{ho)_4({+To{ySk8F>>`m9gLFwlEQ70mQ_zz>ze8voH3=Vw-cx4Kx@EJq zJ#J)Z=VSZYF?`MUhWl=#%|TfyJ1sH#9mpHJMb*@>{94M0a5}Z+x}8jPZ749#agTOp zt?GoFK5k0$CFR?kN%;C7Lc)lsTR?LS!Ruu$>&=o9n=kXp{}}<_Z`7uFO1s(9Z9m`% zGMuIk6?HGBtbLo9QhtX3ulZ7KuQ4i*zCUyKJ=H)|>Z#SY-9uI~$vx>ikLUk&uLyAr z`x?6>+_zXBB_rv6|AW2C?*GppEAN5K;H`pr#>whozMuxD=y#hB{5ZXXe7Q#tgq>Xh zR6eq1W-!ih#MHda<1{pOil?rTDyS_kD=Hv-e<0#eCn&8(aGJVOIm5!D2PJXCfSul5 zdhvNS+lp}zL;e({vT2cIx(jzN8NOGn$8LcByep?>8afl5I2-yHY5NCoUo|+~sZ~W>`;* z!a-j?M5bFZTE+jHLy1bT9aC}bAs`L_P>o2!r@J><{Wc)G)M2pa3QoIP;U^eUaln^- z6ygjI5Mi(`jSUylLj$w+ly73@UKTP=-cqCBT;4#5JBBiru>gzYgB(X`$l;P#AjT8V z*_r$Lovgits9r4i#d?oFyY}eO{FjZCji(4kvME-ND(e1tJr$4pf8eHi0Ea+d!K^``w}La(i*F(BbB-bUVd;Wd~z+Y>$7;*Q#*I} z3Hs6cY1pPA^YyyV{J6osMXgGh>SIbp>+9mJN-}JKkbT72Wj|IC9pWdXd z@2YvvWx<5Gi%Yug_gqoa@LkhZqZQ|VYn)X5&ghDpo+}CIteSy=!RdL!e$Kv_kPy>HNe$wjEq};ml{E=F>tN9-&@n`y5Em_k1cH7sxrj;Yj*3#-Zq{r8$IXOB0%SHzuJ$lr$V3x75ak`snhOPczv9Uo~?c4tDZzK~x!I?v% za;@}(({0bz=H&%w_<8%MMAp2Fqg$tTVg`-RjF=!*|T`tVE=&wo=I5 zDdf+Tl#lZ~+?U?`XT6C<`mTM0s%roC!0Y@hC-yaS6BSiek0A^UrcRBEjUCM#sa2bE zDJjpNRWFtK-Y1ohYiiE1I<>WEx#G>9&x*Da6Zx^n=0v$99}_cFrXmtlYSSzY)<@D3$ZahdzeB>)mxv*}JA*-|Pa}{2p@5lqw%2%#XX& zb>YxRmv&M%yQP<*xCao@p%nbq(4`|tDT=QG~rd;8bRG>^~MB_W!%w6=~3 z3hKj9!F+3L>x@_L{yKYha{a(Zt1WSYuXy`5A5lO()BGN!y-st72-ncs+FDUrnM;>5 z&d~5L6Ro;J;l87`OpcwdadQ2{v%j9-@>NmW58qAX?9QFgMxogLxm%i|twQm%OPlTr z#Wnj*+Z77Mcw@y^3dK3qwuAYCtnLcF&T+S*lR~j#>{mbV=b`HV+i$tQ_{Qql4%03! S|Cb-5Fr6`HdhE0nJN^&C`^HHC literal 0 HcmV?d00001 diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/q-prod.png b/src/dios-egraphs/Diospyros/data-plots/plots/q-prod.png new file mode 100644 index 0000000000000000000000000000000000000000..a8ab5c69d5eed46cc8b01c4e060ee0cc326c0487 GIT binary patch literal 22755 zcmdtKXINF)wk^646*FZ(6cw}p5tN_;3KCQlTp%DxGKdNY2q-y&p`Z*1E|N-8$yp^y zMo|%woI$`qUJ@h+f!Bt$YoGnjyZ4>&&i8)Y{e87XSz*pG`{=#()>nwbelZ5doonJ1^PUU9lAx6tw*33k0lfOa-Og)cNosE3cf=wxv**jmUot@zQZ- z6pGdv>WO3OPGN)Xj&4f5-xhyNh5Mh_aEs;E`t!pDx1UPItTeYa-)kQ9v-PP+8*Ly% zE>kw4_7T5%ozBUG8a9#h>bJDE9{;^;X8ucPp4H0ZL#Ga>^?tiwTH@Z`B-Ug#&+-zarYN_?1;G?LDKh?FkU)6)y&QD$#QGc*j~QC6a-##r27Vs6Z} zrwdyMyHN3R&a|R#nnV99=^Kq*^y;1RxRV=hSckZ~kxvA$lSo*Swe2DE(5G;tUjZu)c z(~pw#%W_WB*Vos0agn-~LrU+35fhD5n)IvX&U7T`A zUS8hsoSc~+^De$Sj+_lWWZigg$B~QAwZ7-GBSR}M-goBR-K{ATejL&QOib^#v&UX2y*Jw*7n$KUV`D$`Nvvi}MMXs?$0laU zk|iXF^0FkYJ{`_9Z;;1Trqf-xam`BRO{w8w;o*aYR{gKsl?NRn?UxqYnS5`4J~}>L z-%;Qi#I3M7v{%5eFibSuMOLrSO{~TuI1rclp`wC+;z-~cmc3`|Ez$~1+&^Vo8zQC@ zSF;M74q>Q%`<7d=RdM<|Pw%{9s*n4+!s)N8Ps+*;6klV!CCjEZDtWmfG03Lt{VwW2 zU7Vjz*5w4_^5FjLP7k$-frdz@0<%|jb#*=Id#N1m(tQbe-2kakx5jGyQhc! z^74uizxIn0wDEGmqIjK5^ACZ%0k>K9#^SY78AZ$&o@;dt4u)R}wCw&6ghX-QrnizG zAIZX}eu~3w(N=XUc>5yT5(oWMyUTt_al}Y|T_dv?~@YTfI46HAbF%CgLvE zVX(FT=ge5yKxaWOWp}T~Ta zkeCX_YZV%M4`FNJ63Z$o6t!PmRH=(qv>)$N3AN`MAA5hFbD;XkjTa^riL8chKdxCu zh}+9M;|Y!mjm>wK1WtD@F7QxSty|}DF8slRExy~uRPk_VadB~djG?Qxy_?lM+!$bN3p(xO1zejr{i$A&stp!NLx=+som z#W@|N2STrAb=bvhPN5#Hx>>K>vTOgxBT5oVetoy zKfFiy8WmA?`nalrp<5Mu^;{#2%a(>qT_rjrT@+N{#0K-fuOnmGAT%8+i``P%%_g5oM({ z-PF&1#^3+9f|1v&8@;-_yJI3f7qlf^#?Lr8y(*WBln8^DUthK6n;iALeG`+W~}RR8!^D&jtCH`t!1dBoxSGch;& z`C0pd*?}aICUvr{Qu^!SlJxT&X0ci#cnTB!b#nOTYMw~9r)N`i$o}{cCN{>IeTa`w zUNcqiKGz>UK6ETWV{4{`2- z{$f7XQ_<)j%%&7B&P`>O3}a}MzGS@3y2z|Hnwsk|pIbQ7vyJM=ys~g%mVRq8 z4cWe-({nN9ONmcgvUO#cSg!kA7NUeCWLzbGOJt6e`|PDM8uvhFQIUM`z64+ob6lmk z%XmEB+2rZsrZkjevf~OD7hETXJ3DnefXA@DN?0My*wYU;v9QD-LG-=;WA#9LUiN5r zd0hArNAu=qM!8Nuo>rW^f^AR@WYzHa7~{nE7PaTkpSNaNhI-5m9@0qF(;n-s(yWS* z)W|f~K*3a?smGt$MXfdToR1+%wxmR6IDhQk0h5pM4-c9S5FoWMU7)Et*E*MZspdG+ zMiy^dwsqw0(+|##FO!AqK3M2`p3s^FlGJtDCD1-Y*^HB#m-K6`;HyH0}V;r$oD_DY8U6+y?eKc zKGmD+8+L4VaJZyv*TKu6fG%4{XSCnCqho~_s@qVtYf`!vh*KgZq!C&My^WsZa*TwwN4|`?x z^ujXTQ0Kh4E=+!JX?%glg3rVv>68PEnVdSXef##iogQ-)I)%6)8QOl+$}pcvk;8|Z za|@;x3x2%YCX3vwIDd1h&!E6{N)-9f7puF?+h~jD&mTU!%BH{gaIiNI;ZdwX&AEWv z5bgG1<`6w_@%=fqhX_E-b#D_=lNX_Y)GYLEi0?JV{VC%*ArLy~5S zF%vVpFu5GyS)Kt?L>xz}(sk*@pLOq1r+1a@F)Ymg(GzOThJq(#)sg=&-YQ(&{w^}N zq@DZ9K@Mq;Pb=206}f)nhA*y$4K>m`JKK>)SNsjDazak7El|J6qkUv@GRwJrqm&wO z&LL!R6MjSBNZbeDWwJ3a^dXz>55}KfUM#uz~>s0;tm^WAGydpNf%HyL9B~?{bvjvw?)m|8O?pA!%95^-D*5rS2yM$9U9xc+7 z_m{6<&#&2f;7x1?z~WjoH=G9!)C%2QG*Dn>X8wkaB5EHn_i$`{{B(x2{JrL8El#9b zS?mJK4;v>-kgpCRe59s7GnQ0TR7}qe7g-Lh-6392f)lG|y2ab*az}oP|HU=i4-2X2 zP7Rp?MO^NPTnboxFf1(WE})@y`NI5^At!=H7BwWrn7Os7V@~hNl`FKi?QCr3UF9KF zK=yYhB!%w2nJd7aiNejkTU?lu4jpBhm|t*+yjZkYai~+P{ApCwS`H45C}a()RcX4n zOmY8dz&W%^0wQL$C-6AyQH}R`dU|%31;|M+Oka39iwCV4BOjFPH-@BucElWg#_c-?~+U-FsQ-v?ua(_m?mC`-B2fnNX=r z-SyCI8JE#ca?1xkn^_h%GK^M*C&{qDWun%5?Y0b~lI8sRx#|R#etdY4Wy_XJYJzBV z8a?Gsoan<|M&<~{F17;zr@pv1f@C64R+2``xZHB!ZE0x|%D?FFb3Z@7+nac4eb3I@ zppKUITt%rPd7sXbnou)@8?paf%z!3V1KS`yID}vKENVoyhZ09)6%(&UV&J;%haWc? zxGG|6)V#U1u_0OeptGy11==*f_NJxi00TloLP$A88>-kOn6I5%{JzTyfp&SpV_WRr?V+~PbLh>g+F(HBx4VO4s;`?N`|nn&3BeV-~o zHf>1NKY}1E@2T!at>_*cObIdc(5`Ktx5t77iddWt9ahKpZ(?ONm9#ej$VIb%E7c%m z{QYXbuV24rV1tTSG@b^W&j{}Q5Trl=%j96&M*X)Qi-nUacu9SsyDJ*$OF+epe&M)yj^uZnN!rd9ItpGp6^1-GpL9-n^hhD|%%e>m zotV&QNYoH<8SlHuEp!3P^VJY^L60nt$!qI`*l3PGOWP(Z@P?6ObZ+st}7NM#@G!B6?DKVPOH}MC0{ks|mkG`XMPPDQaOP zZ;N#XxFYQ1T9i$45ALI7JZ#b)`N90Ulkdk$!B9nieS3$VH(XeU|15Sq&z~CTL}ny> zAP}4qjectS%R+`^rA5~Hx2t?Pqz!%%2hO_LmTj>p-|@JmM<6SJ2V(ASJ!lT7EXqoW zi5(x&msea`Y^z?)B-zL`|1YM1X6?a zc=b`Rhgjb<1XasVnz+YLr>D7SxE1Z}gQ8CqWw4vNe&6+Fpb|V#+xFH!Z{1S4a%E)W ze6N)t5;<*Q+sPK}t5k!6MC_#mvfeJtxi-SPlGlJ&JmNT{j-n`X;oTZ{Kr%k{c>gse zXRa+-7BS%Y8){C$@RxZ2zS!+zHqGw2kxcH)YIQVj1@&j=j`8beee7|aIV`<&IA|o7 zdqQ7fAk$6n>eZ_{FRwfyRmCO79v99r-O=8rOtrDKRYNst$gQk~`H3aP27 z%%2RHn0$EHw)hVXHRaj$KSlZJZ+Gle9RCSUgis@Bxmr%|5=KS62-M&2Tok$a@Y1l# z%PlXs%M4jp_6C2FwkrT~2_4&287ZBH{5BAzY4E)A8kk81AcFeYq@M;;>}M^}hzjVv z^zRZ=!lr$}-g>Ip8B9xAtdeTB!CovoKnux?7A+|A9EZE#L}is4plLvE=OM4_nLrzs z#{Nyy$uk@d>b^IX)%e!dM^a~&S&k}m;0ik73b)-AbkozNYxUcD0cRYrT#)z=rl z|H3=2*|4qNMpJpKxlZ{y*djC${TA4_qS6P*u1Z0xBK~SJpc(xX$nk*qpT{~&>I3WN zicop6!RWUd9f#=-!%s5SZWHAaf7^BS?c29zpNp@N?v79x%*=jFjh4;No}gX0kiG-d zRy}GmFGzR#%%4-9q9k#F%iijJzfY2od;kG?JnbAW(O5*IwkA0acig^v_q1^2mm6GD z2SL1r^a&Yb>jWV}PpFwoo_c+$6Wj^_(6tk`i-hjYa~P}zC%=K;_1n9(^uou8?gMK$ z%W7&}rPZvgtZsQE9}yQP52FUj%5q(l&&`{czP$d!<;PoAj#+l;Qx`8rqvJ}Ia30l) zv(AY`wjo@8&C`RXm0ZrHe$lfLf#dm|%Bu-=!?=9S8FlqQ@_OWky}NhI;&M zN#*UkcjnmTjT{WM?MD~T)RA_b2qPqu0}1avd|0bIL{JlzUbS0^4;6thLm;w9B^W@n zYcIb}S-EOcd%Dxl=^w3Vz;Opgk|)pO+Gf$Up^#;rV3iE1J2Va5 zHNZNk;r2m%a4utFvH)#GxDxxo>8s0@ElWTHOrCW^fvY1Hze2ppz@}*{_Rsux*^?)= zhnLz6)bk=AC?R{lk_q7uw~Hr86dcx3My7S@S(jUctve*h=i=Em28iA$W!XbsX@DBmRE#{sex-#mf-0?)Yx^sUk~l+Q?C2URV;kbNJdFmQ}Qt^ zh$pZNeaInXFRx-1xP0*O$g{#u!GiBob-r;91^a2;KuDswj>A@)1Pwl2adbRH`bePl zZP-gf09%&_1{Px_wGnaZ z7rudrycN50|J}Sgg+M%&D>gOoRSD~OmwY$E1?$sV5s6U@#L(qa=3xYOKVwl8SD z&{=S$LWC&i1l*QILQSb-q`oMWic^mKYfbpn-xJCQXwRfRMYpLLSNL=?L_ps>fBNek za?M|QdQ8a5^`T+9`g8iKMR@7M<-3K2(GT2+bw8z)M|G|i76P4u2og4LOg6jtA+T&@ zWJCzP@MTo%H?fsBW@mPi>mVyhZkZ5CpuaAWR-I_36?R*^e{|Zdr_LAr94%L_Unuzc z?VB&O!mYTyY!4nVg+YDu)ala%A>C@cUq5Aotsr}A1NRzmeo+_8Xzy_S?1wGu=7%~> z9335_&=b5P$`^<)sWRiC$mQ>Z)_eo1mPH42cc1o)El@{g5qrLXZ7uZ=#cX3$0M33r zCa6d%;Oo8(W_M<_aB$}r%H7+(9~;H^Vr5v%`$r;4HASAD-x^bNsgu3m{G>&>wj!hz z#y^!9D3myjn+k}FxBgmv?5nM4J4HcY+$BFlR8g>TXFgy*pe{pG0qo<7^!xKqAp$%$ z3FtMGFu)!%s*hJGnI5lGwv1hO$JbXxYNl&rl+%x|-T?stU(lODUnp)yZpy4ghQ13v zKsQc0cd7ugM9~+d{g-)p+khryk+yxoShkLL8-Z4wV$#pGbN=ku)8SCGFl#|yT!zwq z2%20#*WwQ5fw>!#GQ2IrJX1$T_dVP4&eldWY9*v0!pb9PsPxq4w_{n7l!$vb%iLvT zWCGmpSD>dCfs7U|X1lv4sLpHam32EQ#p-KHs+$ZwQ&35NTR5i^#<+qa^atd~0gx#5 z0JV1LHBR(KGMF5P`u^bfrAzVRc72cWH2MH^Gy--j*5)c4ee`AfQ7H5Q|M=q{+I=r& zU`Uc>5xfs6#Vy%^Q0!0b^)}k``|rndYf|Ym+(n@r)e-Yt$RF*g zm}@RyPvNEy-UKLGJTWB#-QVfa>ynagIz6;6_|LMFl)z*kcS#8eVhs5F@JDo9>ecmd z0{lB$0J)iB+ux<64EFEeZ`QuH`52OXcVFKFb2<;~1tZ)4Lc7lpxOKUEp9SEq-j|m0 zX!PvaO@4j%;2IfLK?BlBxVgJWUOIm+ngdr4B_c%7FqjKKynM)#_z;}lp^ow4-*H#L zNl6wuj!X{zylT6twIJ|lh)3c~#y7^gNHb81 z(?J6&Z`^(QnUs>%q*G%|-kqZ)*&~P(z`dK-JblYaXJTR^Ihg3fq=N!+PX*7Qo~oDA zH9Wky$z=`Y=xoO9%OC1(7V6)BtkVYC*Fc3#m#Pfr4@LR-r7$4HW;#yMx&dE4ph`c= zm=})8qcnq5Cjw5(gz3izPf&^a(yOEbe)aJnZkwP&9z*=>3||ce4XPX$71^p5$`dJ0 zXr*nHG(vE1{B~pXdvB46s$ZDFEL2d zWlc?*sMz%ojyRelU6~mo#cXAvT@!SG4cR>~kcc(p{;n}AbR&oNPOyHrck0&-oHOZI!4glX8~uqsYob|xJ7#uM8&YfD(NN_wO9`$ zec~@51szIKlKb4GSzq;&4-fW7arGnTL1`p1G8C3w{08}2SVK)f8jd8x;ex4%4i`E< z%qd4n4-dkc1x$03XhfEscwJ}>rP&=W3IKjIF5j&OWwFM!WdS@I`A%2!GJj-9j&XY7iLF|>@;UG7 zl`HSV>teroa1O}fw2_ffV-Z*#Y2VZ(6t6iHGEJs++pEE^W|&s7gE2~ij3(|nnapMO zX9aOiS)PBdr|^9JB7@;oO@5Gohm-MtT{SVEF;V!zY>h@=U-j9sTVcI2}ji;sX$r z!45;85`bFh1yddAQjtOjoYJ12ypxD6!Y1ML3`jW*BuPAEzC^9(Pw#CPJHJtAaL_E{ zK3i@3uFvQ#^r85lLHopAu8+<~IWZlKVszMD7rCpKLKDJl}&Q z%PC%_NA2t~1oU&Gu}%8>YNCknN)`YJg@sr3L~mty9WH}BEMgP@w9giBr8n|J&ohKl z+5oaL^Frx5q8EcH_qUr`^%WH{(Q|P@91<15r)ZH|Czkle)ySHka^&<9EEuP)`Syxc zp36SOFT}uw3Vle9ZxW8VXm7xCovNhrFtPO8tb(Z=9@EFjtGUcZKEEO&BZLHk{e8FW zKMo!;m;YgB))Ei*waXJz(B%{GgY;n~)xZ#6+~Q3DEmXADlttwz-}-a= z*@LrA|1Ke+2TmHDR)CL>Tw>N+jp9?oFIR1Gn*3Dgt_z3{()DO=!N4ZJVWVLS`E-Tc zBTLtyWYwQUV*D@L+Q?f#&_AVgAR>n|l!9 ze443YczOv))*nzKZY*D03z-6qFT7+x^To4=b+ay$=HQ(lM;f`ai)s5U2_b7t=1FHW6tRuqqBYkMuqe)=w2ENi%I>zy+FpMY~EQ z)-0>`MBs~4G$M6F%8+VwA&e|CK3bx_I%R1MkVu)J3b5(pWV0jDG?Q=tGBhLrT!^}N z9BqYzwkc>vF6#dM%2Oj#>A#$RwI{sSY!!AK(nI!ECMFns&csmOFw8G9${B?Kw|zI5 zrC`aJyWscxqqW3L82u2SW&i=+0J%Kg%Zp<9S3xCdBcR|OU^$_F(G)a*LCbXdaUK## zoIzY9BPq-NvB-GpMIP=XXbAa*I)xrC{-R(JUb#cU=}c3aVH0@Z`e#NZ>fZ;)uHC`A zsshd1#j@!h9opxGRKwPbh5K;Oe@IZb)*?^4{G*q)6B`R%LwzOL>6x^`}SSB(60%>^gijFnE0qXUNoziJaJq=zq*E#>2h#IIM1Tm(BRk~#4K5RGbt(WLtt8>wTW{5 zOnTg=ZLV@sIc^QP`pOdnhd%09S0+~pn>Lz?mV1oPNN6OEeytlCwY`D27TcN|(8uRH z^8?b;mvnB8D|UP*`Kry8J~7^FfpO*@sVF7xT+-(?dui|a|r z{qX9`(BTF_SAq9+oJs9-7AAj1(b)MNp7SqyuZvzjlJd}2Gs%1+ca@9hvyQ$Rt>@1j z8Iwao{KCS*>t4n&Z)RnUo1UJ|7dgSHZR!k3H@M%bqk<{2T6g$9@FKn)5-JZYI%-y5PTDKvqe|4hzf%L?nir9N8 z=KZgwO}VLRyYH%ud9!3XeK|d!ChMWG?@Q{d`BuK)B~H`E=Y|Y5vi?jzE+g17ez0%; zl}m~}it#A7twzdaQOk~zD1#J3o$fovwQoJEc=vtJ^o}y6VInGTZAo9I<$uKrxNn>-RfT`I`SJH z(3Es3;-=2hDg7#~HO!ZYp_I zPCAeMh+iicb#L%TwWCpuqLwChHmc7z7%$SB%A!8_@4E3JIL(B=JVG*$coq(WM{vds6#h&Mo)^;8JT8+l7?NpTlN+kIgSGcGb$Pu{e@l9tQlwN0;9)<&Js}}? zD@Xj)qCaC@$H4eZ#q|7X9Y+SQviU6A^aUvot#H@;hkZHM|F~^2Lvy};JJ)Q-{k9@& zAJ{RvG@m`lG$>PLQGcPnY+~Su`k-!oePU2&g5DpjTvXOY$++@ei+pj8R@B;8(jIwv zZci42ty{8uI@p_4q@(OR?_LI}Ra3-0oa9pL*ghYOc#fB9VR|L+H?9 z`b$)flp+lgf-e?X->le+L3vtUQcM;-+0jS0ZrwV2sgL^)z1+u~od7! z5KG!ol-ksCkVifK%2m2}SeU->_eJ}L=bTIva}kzohfMsJuw@1#9c5PD$(-*FZjm_{ zdrxDmQJ*7chsUzpeV!Acm%5&Hv}QZztwUc!lzRBji0_k|N{Tu_-=^)`lfkZ9pmX3; zdHRO4F--igN^CMGNLPALccx=@88NPC*y-9ZY2M$MTDFTj;*?IG}s%!tw+JH4=*oI=W1g z3O3`94^(Q~XO_%O(BmNS%dxYw6Ne?StVRp!-`0Zbq&B+Sj_cnctIXM#{f1W2wj5~% z0gd_;_JPS)8AHcrzlC>aEzg{m>&W@p^|_DM*iLPVJ-sDLk*;Yz^Jv~L`H0&oW&I+y z#00|>x9<+9q(67kVI5<^nxS8X!*kMI(agbUDL}$!_Vr(kX6L4jQVeFoV=Hy;JhzVz;}NQ zCq!KJHoCuts@F85N3EdSt@Og6!?&mBx3jPcZvCP6c2VNi?b|diy6}O?H)6kjXVqa- z|6@#G^a^uOp6T+k*V9TmdG2#%os16}lIrG%Ab^CceH^OM~JDO`bO}8cF9wHcx4jmi_;9eHUKej_m>oBWOio0|r z@5yp>?)x=l7dHH&G?d=FJ+epmqgYBF164qPcePi7Zvb+{e>`##V8!?u4J@P$9_TP= zG!?<4I$xS!xp8+q90>cITk;Xc(K_X%?=UgLuL%Zt%>>5)+cY=l^BZ*!16 zpaUA9>jxwk+4d^I(4q!~&mU$*1JLoA)}0IYqr7oXCV@js&YeDe7h*{qbOr5lBp|{* z5aOZCm`E9D&$SslVXP!!3*YHT!uVEPw>E9aYMCR9Ly)t6kpiDU3P4L#uz5%bZYW1Y zLonXreg49Q2C%nG>`U(5yLT2(ZQ?2nx$~m<@}1!d=wp=6LlHsc4kJ1=*}`Z#h`$D| zs~cRT|1N`jf3}xSp9^$FRQWs9wy!vcwvFg>aBqf=;{O1qxpF@Kxt=@=dC{qolqlDoeC%Hi(tb4Bdd_k z8qCJYliaJEan&@g5}^$Zj_~EZ&zs)IsK`=hzr)r08gXf~p1@Zy;_I zRkVA=#|%A%N+Ym@JR#Cof=IO+?V=JC4ZA?r@?%(sPeB5V=O8<~=dbq0nJZ}Ze+`q`nXo3GGX#IDjBiUmA zbK^_@gvS3Te#`Fm<;7^0NLLDKALg$|;LFCUXVFETDe{;PgppP3$dM#)7CIo!)CTx5 z3zp6)qvN=f_YSMz*>mR}n}BQtk!SY$=1O9W22upwT6tpP!uj*0P3!*nPz`BPB|^fP z3=;ufb@w~6lRcO2O38;~6-YpU3VX2@?b@J<)Z$Ou0_fxejVYmAX2>371DaGtq`>aH zJ5kzo@-mTON!y3taZ^1!Jd_L9<6&3r&)c!&9-LZnO5wjozc8u+hdlY#@m7m8-hKP_ zwa|eiYLM@cRtgry`#(QMo-srBNEH}^9+61YOh`5P1qB+pc4oxi1SbS~F#zG#9Z|9{It*pTuz58*K+N*|Gi(FF*D6aB^2 zjFd13By(WaW@O|_s5EXM{aM?t&&bqfAUsLJN->821@8!kBKf`HG!5vKu-NW1-66y~ zj;?|j>3=z2M9kHn&K^5BPyESlWUYZ7a^Y4F#c0rof`3)OuCTCBv^I9 zNi+vSOD8;gnK7XvK?i+6d#`i$>4}w4Jka0ozcs-8FZ>`+o+nvk@w{clVE*kMVZcI+V6ds?HREN$atTKCHDzmY% zkyq~olMJ{aNC$2IkHVw?wRW3z{u4sefwQakVqmm`Bab{%(99W4JrF@8 z4t?KD2^Fz0Nz+Jxe{9uXl#P9VEjPa208(wH!4@0x{fB=Za_XnklVBqutQDVbRxd0; zE>m&TU_|)zCSg~3GY4vR_H{-I<>&yO{y9hk+WUf;q^hDn;xqeiv@iJ#dDfD>6O)L< za*f%J(lA%@t3MHrs16&0ki_LlgNeG`5GBX#DzXgGy6Mz}EyWPhWvJLZUhSWyDnjYN zP#=UMRqO}Vbd*hs#BywyN~m%bCwKlO2SNTKR)T^@Se>J*TKFj*2t!eZ>Q}FhVeAH( zq4h%hYLEexgmNo}XIP<09TxmeRps`Q&)A(b{cKojc+JS5tOM;SMGFpZC??#W6z|=7 zjE}4>+N8e>MLii$`In^tBDAt&&bnVOz>kNG|=1e_Ijg zsTzO~jbxw>Ts)NqW-SRQ<2^jt1{L_Aoqr&zxXCye%KeL8KHS4OQ->o=XK1iqqybc` zfY@&Y!^+)WaPkwd$zaQ`?=QAuxB_CC)N!;Cb{Np1zo{Yw*v*w2|9OcFR}gq4C3LtB zOB%l)>AVm5YcCy@c+z}1zMS`BHf0MI82lj@A3#DQFu>s7_s8(;=(QyvaF0G61t)ue zc_$vS`1$N4FGfa2_~N7i1a-ZQE+NUyg1pn9fF8F2hHT_ zaSU?t!3Q7lYwebO7hU3n60vOQ(mr5~+dyeBV+V=Znh=W~^3$XJ7rYA|0q7rH)oi(R z1Y27jD?rAX=zWn~BGw&x1UvnGExr`1wk%RA0+E^&bAUME-a7YpKx|{U>*tr!T|K>ZO&51t*XtVn7rEpj=O=LAl4`f0HqEHf0vG~=m zH;IvBxNxrW?-y>SK(|Z6+%bf|lMrnE2?vi&iq)y}ccI zgv|#YR8U9Rbb#z%{`bu}tau3xitWI~KM7!GHE=pcBa=t{u%uK&`WnY>ZmmZ$I@c#H4Cm21#(u)Z2`oaogxdX3J+0wk5=?1aTn_ z$!Z`j(nA&LKt2>Z)GIvtFohjj94gcLu|nQm4P?!~h&;bRd;O#j=bxwyKL97o%!DDj5jxCqR5P;qo&{`TDu!#Mj|kQnb^X-d4we1V*iSG`I~^UT{&D@!+fGzNA)yAF zmIqfDD3VKAhvnRfA0qhwbFewOjdB=XjG37kGEhu>NRR+&nV+z381eok(GBABiVyOk zAB0~5XyHthtZ(hY{2XSy#Go(GQP!CChu|%9p{HbD9y&>Kpl<&GiU($jiQSHLExxe$R9wtu(V1qd&vtmr(-7A!K^251@z=2RE#RE}w0X zUJA+hF}tLTgs&A6heY~!2f=f$FrnVY$6=1x5~TcK3UWDj&PPP@1WfyM4Cs`ZbAtRt z%4sY`BcuGu5K&^Pn7fU^O%KzpaM1_AL>7bP=CXJQxPe(=okeuiYB?delYTk8@Z)_> zADCcSHf%Vr#=c<#Y5q@4D2I#30LjFoV`_W>qbyYw+KC;!L8yA~O&%JRc=G~U%^sGJ z2>T5>!D-I+W&he0Ds^vwhZqPe0b<;(0c=&!ARJk|+oarH{{H*#!dJh(;kpqHR{}&- zwU|Y6Df+pqy=a^Mmxzf>H`Ag)_U=`VkRTKJ@UA1LSf;yST!V{OMGWt+HKSZ^%SwR( zUc{#B6oG~SW=(Lmxof=s(elCJ7=yHygwjJ= zKEwpr9hfjc=NLf9tU+?_0zT6;a6ZW@py%{Kk_qTwaUZ;HX6?7;2p-59@wcP{Q{Jp_xEMM;QTUSFvEo5 z=T{{YG7rDnM2#GW8E_67i|>r|e>G*a#SCL-Rb97ZrG&$KouJAMw@{hR=6~b>^sCHz zLnyS3OdM{H0qTfJhS?H6%zx{u?hjQm1V$GZA4Owx60-eSq-!!aL%NNv4CICg^fI`$K4AK7nt6=!d%*?v zqY|s}H zAjMO+)|jFP&KGn31QnBUeBf^ag~?ee7w3sh1cjL3EDY~6syVZ7i5Sa-EpyVIr1zQ2@3C zwU!TKd>{@c+x)qbr6Pt5tcu^2S`0_qvwHdDx(lVaQY2c3T9guU7XnQ})+AKNZTbiC zLk|D3c!<$4f&#%Kq2?n~p*mht>lq$SgQbs4Tl(+14{%BwDtwT@&qt&DInF;R%J5si zuHNZiyyeJ9J+l{WJo=jxcD6x|%}Fptu#{)(@U4WS6cP{a>aQWm-8zbcwU>aQT8jX$vH*TiKDd6y<9^ zK1>Y1c{K#w{mFgGQIp{a#vd_9jq!wl%zZFvqJdIH(hSgxMS2mP;5I|^z?=+A-8mCv zyE8OA`i2+zJ)+_F#&>Cb<1)8vJ z&uI+t%b+=?eluVY+6eku!WgsxP7%P&QZ&qZk1@Xn222|!o%1@5YrUvQ8avmXejr3h z6&JY8F%T4s0&0O~QSY}UpA$Zxy+RoYB-5irheH`616O3LK|5~5w#>xH8fHatl`B4r zm9E%HiKaC-H*=MPlO(5*kojwJNQ^;2Zck-+QqkgqZm0WHIE+@2Hp&bSi2vq0dC;^# z^qpk%13C}R7@$qekmdn=0$BS%J4bb! znljA)0jIb*SmQ@RM%T%xH>pLKV9X>&DKry_uvY%v27|Svk_E*8=MwpaxFNt6`o)f&tl@5ct-Uxs)jvIW_B|ppnkxLmW;O9 z0&_DTXxw)^V0d%Kw;@qQ8n{nrkg;601$H}zm6T@2(k-||q85{pZ5+cyrVh~23sM2Q zYQU!2!vlt9ib^96x%l`!p(S}JnEpmvF!D?rT3&=x1Fo#tay0QgR8iV1p}Sl>W&-H< zi5HB-9#$zcxM*%Dk4)Z>b2;diov>-M07oH1v?Z6EfBs%*%~tPYBa9>J2)@tjH^}y( zGaxEQ63kZ+<&Q8l zfV3NnM{JHWItUf~ji2G11A^gNgPDLH zV1^csHdNU%h2da&Qd(gxwtt|sr$;6dlIP-lcCdi{qpo?kVMA$3G|S|1%HzB!o4;(G zgvdvvHNcfee9xHQK8r&jD#TodhK8_V{c!*QhT!*6$z76%4y(4dHIrl7%?ZY%I^S## zQmQ@2X2_tXxbtW<2&u<}AOdp@kdy%yARf4?kIZu;D|1otW({bRUECQN{3G`FDrUsXdB;zf` zi-*HjE1I*$zEOaBejOayJjqlA~2o&6@-oIuldG2%E$@CqW?`W=2RLP7vgsX>k){lBv2XNb2Cev zV%MW4tn@Z|fByL%q-G=G#@%BA<03R^9 zfT6`iByOz`<->X;ErYSt`v6lECY$2$JhbR=?_0e0-vyu(^pCL$?1I)<4tVR;05Zu8 z03H;Xe{Ad}XF3tAfXgR)8dMhiCV9Y{DIz_8Zjv9T&?O%&s14~ z=3qPY7>IKpU}{Fx#qK!bK ziHzKm*oLzM~0h|$%$QL{2HT{G31x9n`bzC;3bKMMTPI~ z?MTAJM)WxM@{xT9DWAov0qjb^d0nx!J&56}HB?zN#diVdF?PHK#o!Q`wtG`rsvC0a z-aS!p%@>n(vnzA&VLxw!y{l0i8jUCls|6U}?KqRi|KjE5XS>RzAPc?4&!1k_=-{|1 za~axIxXp3o(FOE|-k|r^v$I>Je8Ujd^vujw1b^cTRLQM?k<6bu(2Kl!k2sn5fr*th z)OT>@ZzvD|1ttOuu?d;l!bWrXEY9#i&RbY9l5$QCShN8hC^wbZfHTbMaGDe2U`x6j z8A~AIBIJZnW2?@>6s)i|M+l#WJlgWcQc!}mD8}4W%s=>J+6V<=tl9$;0F5Tj)Lh8a z2sj?<=-60#zfX2{HVPPyv;Y6l7 zhwIRYEzo`AG_RGKOBwySu;(QXVVTu&-XZ-PIb{sIEKzq*qHs6_w>z-mt>7^np|WrU zvkszFWXHbUgkY9GD?Oy|^Y;9GuF%y7Xy)IH1|u*pgHK58aQe2EoVba6RR=bcs0|pk z)M7jb*LLz?d(xRF$H}H6`4I`9+kCax3{s+3Z!kU#F=W-A8-r&&fFZRH4-dwXq0q5j zNXW?(LA+|RU}|Oo1ypl-vk2#pMukMJ%a=b`)iFg(&UPs?kiyJgD^A5A2LWLBH|y0e zDGo#Si`a&^A_^} z>qm|eNTyFiaaXg?Ba>Ii9G9SpiM!8c5PgmGh>hZ8^8>LH28ZNyJPoykY)Zs3lCXS1 zll)3OL3=~cUdE!%3=qiz=eBa;;MI8y=#z|sIh-drmMy4L3%8kHjOvav1B#ajP}zbMnn@sY$sIfQmU5V xz+G7#N3K?H6Ri+36PYV3A<8^dNHP}6kj$Ewh|HN%#>^s#3=K+=6sc5b zutbtfnc3&<|GeLR_q)IEJKlHickkyop88kITEBJQ_jO(8d0yvr2kY(MyJFe8WfTf! zg_b7GfI?XiM4>Q!S;B(9+5YBp7k*Rl*kk5l=xXQTW$k85(Y5wCdBWA>gyT`Z)3$Ez zj;<~;;@ia~Mfn^&JWjeRN=P{Wj~9r$y4g#p`5B4eO_rY2GRPm2MrV?KwR@SZem^Dm$DS#JPQ4wpWWnh>XZaSccQT^w2@Y0Sd*aR!BhIVbYpx1D zE4cHl!)D*n5P^5M-&`-OsujPoA}=U)!Q+|PM-PT=gqAJID<1n%ao@wC zXe-{(DPN_=wqXa~x~m%a?Pg!zYu^h@eEzQstZKxq{01*Z3maYb#I9Jh{vsl9G}VEn=)qe$Jz}`P;yN z#PN4|`uSOJ9QcKVdah16y}o(YL3SOZslHy@sWnGnU#w_EdV0E-_#w-E=4-cX*>b12 znA6M4i$~Vs+VogwV8yGB#jbtt^1prmzUAG!cSr5)1a0f1BTn~!eDNw-$uoI-s;Epe z*15PZTy6H+J?BqX?%cWK(f`pn$0T25aB$FV{Esz{qQ~vS_fLM!e^h(RMV_((c_l!lnhE*S~KsvOa&;wbSyjg@x_wo4bxWItp7`Tf6=FvdjGb$y**j z-wHSMe){yO;lk2YG+Ni!Czp$lK3&$^R+4qN+-*<5^kChI7_GPjSx2hHgVSp$oEtZa z*}qDPz~?o5E_b*6RvXN@VMAVCUr&#agjIDkwx>t3j`xzVVdnGKx7453)$QHAdw1>2 zmpTO&4+MSx{)#9){$9JpuF)fTPpardQJ{jR>&1%~wJx)5tbOxF|8M`Ntynyxj`fm9 zf(}18tvfsZoqc1s+M<@M{=>A}Il1tr6;HYuOsqef>2*URqZjZ{X^{ zz`%*0xrONl8QRU2-tHV)1ya+mn{IvhU?yyITdyE`u2EUO7YiNJq)gnfI72xCBi6_XQ1#=FKzPH+U^ypERl+cgZ@)+e$ZEY`tn3(xR zM6^9C)z#Ja(9-cT*Z!6>Gn1a##lNQb|Cug!r)$jUO zpE8&B$k~~xeNV5fdHVLP(UmpZ1f4(Ki%FDs&3Sj|-idgG(Ztk1jlhl_X4=t0oAKQv z52#M?H_kFWA0Pbww4=lF<;$1p)=w_EeSLh z+aKjK`cv!H?u+RTZ&I5pJdc;38BBFbewlQ9#X8w*ImWs5!K=2WBi_ggzIydaI&jv{ zHu*Z@d zbLh|^DJdxqUtize*SF5o_U^SmGrdX0haZ251e5I1dBFR(8(oYfK167+jB!4-O9xpPm`(bbR-~cdTHf=Z*S_6DMkMBFVNQ zt!Ry1U$k(cCY};2eDD%0Ph_{^*2q&e%`7X{X`W|UQEahkGAO8r*(k?I7^zOMrO&JH z-3sctCe2sL@)nGPY)fdpL*;?B)5zmYvzHJFvP*5&!0Esn&j{9 zZO$0{9vPB~_C?i8)!^VxqpB%%>5);?2+wOG1 zyu|KnZeca~=#Qnxvp)WK97sl0H1zaV22A#DeVc!Xi+T?4ysgjk_h%kCIXMJmsYp$fpzb-1v`09AMs>|k zpDt2WRlUCb_*&%ly_9&R&Trqoefjl^6$vJkS8Y?sTCv9;?%3$(nuvX_^p<(@J@)$b zCCFCDP2UH<*SSu!{vGIK=h#*1)T&qI zQZEyZ8MoXCZo8M)nNT*33N1HM&&^DYZR=B1RLu0WUzLvOiW;piCUA6$cNHxT3E;O6H3^7!I% z$NE@B%H5)(Gd7f!yu9qkj~^$mK>;+1b^UY?;r1qNZGLfy8FmveCZd|xUuGBTQ> zeu?!g4q<9)DrLG;>iC<~i%wM>TQqjk-1sRQWF1!0XiJM*mQyS&EUcZK^Ot$NN>X6P z{|L-5|9j7Ug{EpX^&IMvNCS>YwCvrp@qd->+0?PM{zd9DU9{z+jd_`ig>ljKwqeAv z;YyAZABw(So!H;h^CoRGRmkY}%IWFpKgea@YlBxgzH4F8$^CzxijYlG zShaJ=^OoPl(4Wy!WB(2b%gUgV5*dJvRMEb+lC9dYqQQH^H)S{fe0_^qR8%zD{nWEi zo|PPVRyCI9Tjk`|B8Gi`Q2sdM7_cV>^`2j~YSk7h_2P1_S8pdjmAP_p=z9imagY*E zqcxAL@i=vg!`0Q5Q5qK)N2kj=z17r?6Hm6AxVm1xIX!-;*U)*D3qZ!NwvxT(E>2E4 z({nhx1o4diY~9y#cpH_#HF0rqS7AptwF;ZMuoqRnW6l0^s7pg%9x?ZJ*3{$-m?DA* zpy>UZ`0GI5@~_Y3hU;V< z_%5^a#-f^Q;iuBEFm7GXBTn=WH)rfKNTn?m)=#})RB+fFUN2eLHrzhw`sd4|HW5=Fl=nUS zo+>8U#Rr-TzJWLLQg@wf+Q-`Ouc-a~eI{o(KW#AWG)ybYM z1O{x|wvFu4%S3sJ_G(Qa3)gL#bpElc%7L?$cn^A3R#qsFjK-Qg$A*J|F0!9YP^cU4 zNJ>dXa0+POjx|4cFjLswi6_Ib=*TW0J)H!P;>Yjbn*yo`=9>S zMsw(NkG^-h$|>UXO_0zC7MM%!u9V*t1*em(zh*4Qc}X&|DCeRV1EQ3+e5Qm9P@W zW3A%lP0{(G?i;3aL)je6u*IV8^XJ2x)B;4Q)Re$k?@K>U<15P~7no?ge^OgpJ2vTi zK;bfI^feqh;pJ;Zuc74RBph$~4&Dd4={EY)IQwvUaj~+rv;hhZxSZ79;n?VCetDPA zdQJ*7TEXFRD)mLFE|pp{JDCd}_Z;7h;97H6;GKb_fESSh8G&=N%{4d)f4-c3n`drV zJvZx%6*jE*nHaKle0w{>=|MxB#E%bmG~@Tze|(K>x^(&S29t-Qf}1vNqDFqc5N|5y z@>!_8!ZT{VY$1`wbZ57`Y~ECoYg(8S)`kk&dGLDz&)2VChu&KS2A7q2UD_TbtXB3` z+@gXX$jP*|yQ=qa3s%%J;nqQeKwcV+Fm95oaLn(NmBsz}b25JWag)sY*w*3vgQZRe z`3JZ2D7d9>P;jIDe4D)m+ZP9%WG_+`t*oqkY+{Jv_)5ijMao)A`RV=y&b%0KLt5nd z7xMnR^YmspS1i`IEWi~gk}*9oykO^=FBxOr2ayJR3py%1x9Kj|u&Y<+-89cRKNMLL z^3dCw8%NX7$n>6@9PJ}ipM%5pWeKE`+wbD#oD-050vjAl?3+wx*>?hTiY;2U`Wl1; zGg{>F^Ehr!I|$vX&(uvUPPPNH1D*zgK_WN2boLL(DgF?z7Fg}%K;Niz`Z<=x{g;`n z%V#qn6Ct&kk*J(Y#}U>jjLH2Te}A_p{&sU#OHXq6Sud)t{zSIa3-rEzWTis(cJecb zCj8l;Uw+g(apQp-$`Js8+DDJ3`c3th@f4UAAJaPJ=@|n(A@|&QEM5bSU$$SIw0nVL zRlv+a;4^!MCyMekPBbZjr@XvO^FL+m|Gnt{nsUTk^DlI8{g~&PD;l|Ua5?2rUlPI z*%t;OQl8_x9(!_W1(9ADNxo4pqP7xs?u@VRK4={boFnPu??mqu73n>5c=;0O#om4^ zXk|1Gyf(CiV@M8eUi$n<2hN>4myV4!xV3wc zH&|5O;c|ne#nMBdC)pMc?)iS8dS*(lp!4_=Xh-`&Q3TeQ^#)~psbpxtw6 z7oomj_8#c0vBK`B11F;HQAy}TZ9PzbL&X>MU@J9p`gH@4Y6kF<+qc@K1mlp0ggDY1 zoo!lZ2~9}D5uobs@%Q_Q_6bsN;3R11d&s+oIX%igJlO3yX!+|0UamV76mFrE3LU;L zi41n0@K*r3dY`3BmvSqe7EMY{*1x%v2@ryhpPyFBc1>A!H!yB<`j%eS!dn}6+YEKR zs4oV@yoD-vKVU{NLC$#@WSm&GXk{&&JU^~nJ?ivs_^wKo0der^Gnc4XH^XDTcPOt)0d4SP+{r;?gv^tN(?bmsJ)jKGahHSVq1ZeG?d=Sh%6FnKLj&5)hC;k4EC98fHkcaJGdHUf?L)>uj0v)40%>o3U+6)=g+ri zv$KVPK^6ew5b3vmDqiLN-MTT5hChG*UIKAO=9dCHU~PB}E-b4q@ z@!ntXwRqT=quPdEUsh-CvSvS)>Q{i&Nfb(tL=XCiE;Onx0CaPoBYjZ^R3qGfym|pi z*iFj5{Kvzy%*{pC>TOW0op!{s8wYG-pW*$>#7|8fdkpf=AX+#Ib&ujU+QggPx6(1~cy2GH0Cor(&%yV!5S?M`WFt7j~INQJy3>09Le z>-T@_U+^w?k(r*(U+y(8MHA9&AII83=YXZc;&{jeRl9G{W@jNZG_Pph)lv>eG=x&e*ru*9B&L;vJJcqq6y-C2ZlXklBqt|>k0MRB z94ZSA35jy8nK8k!e+v8uVH^siKzWKe7K^&Efy^H8b5xz+*aC-1E-ttODMg~%kI{7QaDhH{7VCaD7I~woFyLPMw zm0^@3ofB;yW%FrcWB*3&-L&Xxzc&k$j3HQau(Pwr%Q{Aq{tzTEKC8f)QZ$sLpEwh? zA}FXax|fzd+MAV94@^q%AP~VCgmj3k!|RmKG+ocex=a69y$x{yNRAc|g%X@+c#aAJtke++CYz(Ck{d$gS`G z#riSo(R&DI1iOH|6*N9Rj)Rt#mUhO+$Iej=Wp@=~9AObdIts~(o?RdI2;2N_}1GyUzV{SFNPQmGI@(wmoAOy>2Y8T;PiVZ9jE$O#V00)l$V#E z_;^kYq?UjoDbj3#Htgmz(V&yskfo#TxTvXl(qP4cx(Uy3>!(4*+0tC)>$P;nipG(@X#7P1a6O0U z!T{<(>uLD#OEHe#5DDNXNG{xa;udbHi$mh_0xtp!HCfDAImvRf5-1 zguab-KNJ+ku!bQrTw7!0A8uR?_z9Pn$4ykv2>x7 zg+Vl0#W5_MrKP2`|D;EGz)S@{KR+EAapZUVAv8H|<;mODuO^V%=@USPHbB>~VgoJx z$9z_jP?B-)5$}ba`O{UmyXJbXa1GhYtGY!vKS$%_yig)+Iv1jr?RuzU+|It zkJC(4jS3D4zZ$ zgeDjl^ZBl+{;9EnkcfMTMW5f%lIN>IBrN}^+1aYiLb3ypTd z*_q_FI)z3vP#rWcdllEyq8sq_IT$PP%}LNfBh+e4*lt@(=P33bwrjg%b6NDzJsn!J z98-#t*M77T_%kyzL2smR^1`7X9rm6>g(Q(knqsMYLbOTD0*F(o(snPo35^1IGDk0i z^jnE+0R@aoC6bY_-c7Qkw}%wY{5b-2c!N$SpGAUZ5N$n$P@dV38pORUisViS?z z5M1*pO76r5J|ckLyvfJGLGbj#g$wEEbS0_ynuKjq5%HfK;kc@i_Uk*Ix;S9Qr>(*> zmnfq9lNAL}`MEeE5GVU^ppGGR)K8+|5I)`u-L&`@_^I@%Z#46}4iRXF?*iP(Kz$}h zm}FT(GANqE-Hig9H=|m!uyXSOACWat@cNxZbR6iQ?hI~D{Z}@;JE^IMO|lIH6ny`l zNMNAF9|0i!0!^}XKwGRb{(5k1EDo(h?XwFO5h;q$70|sk91#%8a{u0HuLhdOE5Sh= zA-EHH24^9|zUc-MwE=0_)9EMLD~usc2%r&R_&4I(H9laV7%U%szNd(H6B|O3--mzR zLz~aNU%v1q$~i|uVbeZy=053n5!Hc%gUEyXH%U|LZAoy{+fs~5PalNL8jUv-dXz#Z zIh`K>Lfooa)WpPuB%s03(KP^~#PvX+L1{S9?l!!XURG9CT%|emL>LXisKD8Yy~uNZ zpU}V3E&(?%fFx{O9U2;1+t9Gj+yrNmbk)&`01zOO6OURzC0YP`cdM(ntT-9mGxHQC z5TQ5=ya4o}q_iI&!vGC8cyQX8WLmU%b}j9eeu5U}^vsMc7y%)nN~eG6ZJ)bB2kyvR zf3sSv(6Xuvy*&IZ7ul>cXU;s&GcPH=%T8_H$b$1+?$BB7@Amt%0-@_Im8RuxwhHdu zp|#JRi8Rd4PPs}+N?ya}i8LUgB0u~8OUyiq;7Ql3B!Mtq9%bH6Nl=A zc7z3Qp1isageuC2jdP?U)iWT;CE)H5()HkdXt zdz8eo`e^_uphKbv5DNJVOq4Vj+bX?FxYw=AH19-eAc8dEq@epRh+2<6<{e_Q@Sekd z4FT^Ex}6SsC4J-}W#U_iM6^GVD8N0o5PlEwNgLJ1_V?%V91rwA(-E_gU}`RW2{$tK z#a(&u-~m}JXrzmRPIOjV5FLs7;`<^tZpoeST8JWZMnQj1*NUTeSgoVH_7qg50camZ zy8&4bjTH_H`@I9lPTSFcz3kapG5oW28^pp@fOB}&8_&}nzBbQ7BXkqxNL}x&a3SIW zF~1=EJ~x?(Odl4O@0l8S_~6;*4J?=FLcAeiVX=g(AoY82PIJ7O3MLz^`VQ8D2}Em# zYiBjlSuN}@(>qoXMxm-lJ^<#F%2UbAi`lxgEa^Q@9$(%G2u2^#sa`^1-=r)=QmVau zoL%Z@88JOPeg0e%YOK3hAgO-?QNRPl$pI1P7@BkDAH`w0!1)ouTAvbUgwl)8pI_A_ z0hCtpY5_+^q{Jj;a$*9~f>mr;A9_zP_h;S!`$Q254VCstqM z)0;PMhE|zB-444&xch#BEM!IsSc(8UN%Ckd#`md5Fdu^^`0dxPq~E8p=tobSNa8Xl zM6uo&M8Y=|I3wPDq4?bPdA#Ry2_XfG;f;UDak@Q$OB@`%@dOqtpV335 zc>3zqehAp07`<2f=FmDMF-Q7sL^wmPutoLqj`}wy$pld%kFYy*pQ5ryNxa;v!Iz&BAUZo&zCCgNki!%Kj$~MW?JOwg1%S4@= z1xy=~ygdK1y#ve-QInpNo)J3ZN1dIMzxw?9Q_JqIg%?Jl6PUhnCCV{&)F_Xwe%&gG3nYrqQ5#k$%-yTRsvM+a^-k zzo6g{j>7Yp*tM`c$6#0j;sifm%Fb^3&I;NQ0mKA#k#6oROiJld)AMnW;iu310WuoX zwXaivJcd=OXpQ~0Bk8e6C04>|-c%|y{I4^>L+U;F{{P~L{1^XOig+vWKh@QnmMcq_ zb_}Z4xhrfPKriS&^&tnCT9jT$98j-nIujZU!g7b@|Bd4{N@Br7Xe?L2Fi^T8bZFswf8j}Y+BQf^XydvLwhJaKYnb5ZW3=9h_)z_o+xCze?V*l3WV+C zNH57MJ?Ec3jFR4lQ*7S~%`O8XUXCr^%x$Dceaw#Dblimk`YPpRBBpU$=yO|HVr@# zjFH|JYNYZ@^56d`1;+w`h$%i)LJ(-x8Dzp>uC(XiMQ3`5)t8w#uZ0btb|5Nttu`{xgk}I!jM17sY zne)C*)K==utrE%~05a36{hFHBNEZo^-4-WJv3?DCoY|}rN|9&ZJI7dmI6}a|*APB3 z@~h2oF&fm^@wtu%szJQ|BN4yaADF&xo6mJ+9l4Z0Xe|xF@2Lf`+aq}mUYhE+&tCT- zC~nT>os{S3M-bXgyoD5tSNCu^w0CRK6O(Y znixqdBdXt+Q$B#3SqSLjE2h66lBFnvX=c992TMeKB&!g}aHRD7{3!xgJB3a^$>%_x zbt4p{peK(X6OzOUTv9XhH*trO66w=1z{cF*?VdL^pjjC(Gy1;rCE1V&%NH%{cJAG~ zmoZ0FpOk<}tps`30?}N{stWXYh+4(Z$Hym8qCp-;-b0S<3-D2GTbn|o8_@FvT$CQy zNNiANZel5+W=f-^%{M~f_9iy+bo|}_x%t*dx|>oY2+(zPEmR(a3SdsG`ks9z7PcXD zk5HTMSSnR)`HdSl<^gAwPwC+b4-~wQWv&unu#vkjF6$LJM0S7T#;XN8IU?{ES}Cgj z9`TFFV^pP;s0LA~Xc9)l0C)vbGcm`|BD3h9P`AZPa3pEb#i{D z5Pjr4o(&5G!O$at0sC;*j`?a*u4&J71i{J*;sUK^E2RtliWsb-CF<=lH1y3oI+GOK zAysQZ-X;8c7M3~r0yUdwtb}9}5gy7KD~~jd_!-*20YGv1OM1w$>VO~D!a}VPwm}%m z{WVb5V0?R0vlU?+%B*ilr0y1_lS6W$FmHSN;n;kloWgdv?BJDSR(!BUs z@Y@25L&Ty%RIjTVTcEl(WK-vhO`L}!s}9^g#Dqnft_aa<_;#8gOeK{mq!B&PQAN7R zhYue|^ygbs{HWlvjjeadD2CefpYyOLo5FD~Ou`clHC}0#&-Yo&pke(F{k%M3FAj3* z=TszG^~-E8BDU;Jhw75UfVT^+;dY&YEI9O!IyeYLL`38;@C#^v;%_-LN+VHFG;t6R zt6<_#f44zPiy+6Bm9_cV(|=X7UO%!bRfQgFDS`q5!Ywr!9zR}f6oy`wIYx|Ta8<-X z4AI`XGpKlg{D+>NUd1Q0im1BR?D4TwDn&Y3u9u5Y5z8?`MXrO2q|rjqnU zlx_Vq4PqYPTtdzs!xT9_NzpS0ZP_a@3{?7zYLm@@C>KLKRuVPSks1D_7;YhsFgTK8 zp}7zlG+D_@g#8lsJ=bZPLwM06~vv^XoVk^8B7i<~sxs z#F>+V(X6Dnf&w?NC@O0ty5CpmnoyDigoHF9oTfMYQyz{h`Y!2$$0X+8rV-HcB8X1G zdnnx~*F+a7+mxh8#7bw2`HC+^)=!8iR6X@usdWfXz$16!I?)0Z>v^zN`m>ohJkXK$ zfdaLNLey$v02L@cCmSCT@Y;OE5z=`@Z8j)4tk60HE7U#!4v*+ucCN+3f~ha-e~q4d zzbtinAJP@P0$qu&)e#7l=X-hm0o(9?;js7adKR{E8x&*x{Cy-JE2!heXhcD$gpMPP z{+mbVDy^(Rmy=&h!LmQ`IB60@tSQWbht;5hV~(mLmUt< zoX^wg@dJVS6NNmN7?CjEkc_gW^@SVIZ8LoRiOesR76L}g4 zb}e2C$;GIu#`+11l)QXX-~7qxU6H(Ep|iDh#BaB4+j5oBc_lmiV)&n~N&2Dx7Ck_x zdMV|{%lIwGKyZjr(drpO&8j{%L)vgKpc1Df>D{3g7yD2BRjwB!Pw~6wg!q>vw2c%= z0zeBb1Y7d*x|F(~zdzAV3AMGG!NL+hB6hz4$YtLDLd=_R-gnreGeH*$XM|8rLMd#9 zJ7p;=D;ijMs|KiEo5^4u2nvc9Ru0CKRSIrhbYg6z(a=a9l5QZ!QgQf^JX0OO2x0j+ zJN43#w_uCEgJytiyYsyhZPbpA4x&qv-6rA3t~Skw;=z--ii?Z%>q$G3n#%rWF)u|G zhGWiEtB^@GN*vlFQ*dBfcNvC%tg#Qsx6)oFL$nZA3xfUdYK&^+S2>nPWcf z-~GdW9Nkavkl^*Bm{p!Rg(DFSajSI*zo3z%>y;QOu#Yq(YLOMpO`w948jVGUqIV>L zbXdV5ci77uz|y2M^ZKnii3cE_YvTG4-KVV`iHv^*UdW11M8hY8dXN`)hc4_*MY@S3&MmTSqzn=OWV&Y_Ps`vTis-~2 zM>JPyjMLzWSDKod64%?%5lwVC;E}h=>qF;*G{rZoUz%@CqPIf&Y!lsyt{%n<$-*jw zJp&PD z^E?;EKvx93V93Q6h|ERC0)V1j+g*t81Gg|~0YEDg9+S}?Is3=#OGNf)*qFkN28GSk z)Fh&@yaynC^ym@ix^*HTV5DzEFDxvCYvaq+Pwh~6t7j*V6EoQ+Ml{w>a}3JC4(E@u zz3mUm(Do)Z`DKcl6+CCe!olMbb^W>!YHutck~W%kLy~1A94kg2Ee;|>tFWn&C@g*n z7_d&>MHDuM#*>%+$b}<^1V+3Gd@#E9UFQC>l`!M$b`2$ZTVLhs)y&OVJy+HLb8?N7qqt zavI#4@Y4TTJeAC%a4@^MW#lUMs_bgRNH*8wzhaVgnOl zH;GFG5SC0&(ju8F{)MCxMzR2imiuL801dJ?p0E}wVwTn6QV$93O54hlf+-F z6P1enfNNZyOo<*ia3EgF<`P;$+DDGugcly2S)xJ!hOUH1H2Yv_j?)wx!B^<%=JUEX zFA`@F((w@Hgoy7S&qmM9oo}UjOY%c&W@Q~Ic(4VjTE7B&DMaO5y*k<@s*wI1-KUQG z{b+xdY zT!f>*?Zrpf4ghWUsaNEz4Oyb6lBJs{h!YWu(~lN3DQO|`@$nb{<8L2DjERv(H*#RM zQFbJTAvH0>bqv(XJIQ_iaE$$Lg6OTPW#yU8U=&AfZDYPF;@$Dh(k2N@pIqtxhv^Hj zF_{CxD5+6{^NABghclcSM;fh#puUEdDfjPG=?>veasVoZ@PPwA;X957w5mt(7e`Zr zr+|x-lS(FQItg%+j3X~qs6#e7V~R#R7yN0#p`k`%;h~|c4;}g>@1g`F$OK4_EkY7q z|T_F`~{bT_uQqIviWJkfe+48rNaF4JMZ* z{Nqu2RZpCI(Oigu57qv@M1~68q;e3?Q`_2VjP_@?GB@X@P2wypbEa(y7&3*v5r+|4 zozfJ7yk+ckEbWp>MQK97>CtcrfF=yX&He*0Xym+yZ|vm^D~X<}EY+ z+z{Qgpo0MF)T3q*gCB8-pedvIk5mAcJE;LgwOYDl$+e&$ibCM5-=-f)WSs~810sZD zg^VZwaHH_-YqkhPi-#DpiNk_S$RT*QqNNfI*#6^RA=m*h_w0WO3gJS@IcP;gOv^tR zoe$YnTP*CqSipe$e}R9fD$&2?NMbDD1>m)w4>m5YtV%d(*K$N)L4Mh(7;>(XDgV~+ zvS9qLvb|!XXnJD&_mPgQ#PfAKdZ`ghe{e}JUWMMlHxr&r)=_a@!w5<{Z*e0 z?0yhPY5T;adJ=5)BK-@DV>t}ai;G!V_7$IibSdQK39&>Odo~nJ8J(?U;`y=K>|l7{ z^mpF5@v)BiKIfsqmj6X{^IDI%6wQ~Qaw3pXYwG(*7i*|1c=a|D9ocn_O3{2?@YxeL z7v|Fsp$YleV_3gp(#XUjBlm!9YnjJrcHU%cfohB~ZDA0pWxsy?DlVPD>SCrr50kF8 z$Xc8$SE612sErguAt9l5XVRR*-%G=PWZQ-=)6-9%l%9C}gH6p!^Dw19kT7%D6!S{T zDx2e7X7?L*Hn(OK|2#xK*?*sDO1%>pPrtL;IEgoOo1o5+u^(cGg0OWBU6Oy80^qAQ zB-&#r`0(KXEdqH&K+M)gZ-gifL{EqN|Eze(dD=Wd4$g9xuyWI*r>hO5P8zXy2 zrk!2Q!e1n};6Hr3gjKRTPp9w-A|46vP+KWW@`vq;|65ac`XS{Sc8m;>0jKdbwS-=eZTwwrf(Ka#n9@<>cv5Pg{(A zNIN$3lNp|2*>*rrO&FYxwdNReu2~af-@6IU0SqKcz_#+DM0R)?^FwwW_~zlBA#sVR9hDiv6g z7`f1aXo~9HQp$7u5H`<2a)+(NT<_Vf6ig=L_d|2YX~b#Y=|8rS z2)MbuNJ7jM0N52*HO@l_;X}SOZNY;t_&HSVH^GTxrjEYPDJVvVbxq%=I9tXVoAx@X z_>Hdw3y0&MLLpn?lkia*DPkj;3?~M96Jb$g`3FNSl!p-0&S5tFD1x*+!Oqf#GS1{P)U}jy zWO{tCsJQqsTD|Tny8T>5kaJI-I`s$`8GqhcQ@@N-=Gw_Z$vafWh5?A{vW^A?S=qTm zKU4F(lU;ru@xga_?I8Q1%#}Cj^US0Dy}&(~c723qxBIA|HW#Bc%OaXw&qQMRvwxF(rI-IRkh)CtJ zH{M}D>JDHAoWe{LL7nRqpvdWgz`2H24^Mqdf2C)T18FF6);2chF^Yk4QC2`i|EzU6 zWs9!iQ5+tRASz6rQ-CB|0-!Z3Agk7 zWf*J#YiNF|m^S;*PAkBtM?N3DljS>Ky?@qgXx>Qx80G;V;x+;b1!V2wg9l2N*fw54 zcei?Ywliy}`?R@djEj#NE6fU#7<4*!0f&Q(0bN(}5)VqszNIbEjuUFCi=n&5Lh@1+zwoEEwO?>I z&l9X`j4jz9SbU_AvohvP;L-T{{d-h*NR$Pjc1}n;C>Xg6WBx7&6ZALdZ>4hjK9ZeB zA;Cp1$$%}>DAuCNXHAO#UmlpqGDlR1jPa$v|0+J2p;=KOR9H}-lj~vVJ251Hl1hf6 zFg`#P7G8?gB@qY@iMwq6+mv`*k)k@$wF=7kGD?wc!&;c$$xSN?2`<{+=9U)e@uqb< z(hg<Y+;x|a_G?0dS&5MF~1IC#!Mt#!L^I}ZQnziDGZ!f%wTbs_oJktC&Pxk@R zoDeRYQhML9m7hQTfgeUQg$~`z`zhZG3?L~d_Y9Lk?p10&mPaV#H#7$tRI!$32M>P3 zP>-J20B(E0G-6TUTma=E>||uBi&!e3L@8{^9tb63&f7vOURdmn#KuNP_w3v`pFqFN zIeiCMkIxvP%!q8e7T}Fc6b6E9jp?r(26BTsMi(VLe;QF9qFBAgY*A}_fL)aYa%wbk zYUW?;JrcEjd`PJX=UKOt;3d*_2BQY{3L;;_?3@*_uN+{#})kRG4 z1@*Zh#XqInLtS=~ds)ze%*qyab9aA&E?eq!keGS#!gW_!u+B{$2|J%{S4Yc{hB$5U zi0Em5AWxW^zOrH)zmFB%t4_H}nT~02maP)yu(0TXgqs zEzneqEqX_L!G!do)cRpYIOk*WOLX_;;VC-_8*%FpP?T`v7+Q9u-sxffx%JLl76$?C zXuU}(`#Q|5EQGg^+;{_^a$Uh)5QXXzic1)W@h!6zt@63PH{ey$C2Xui$Q!c33?o8B zMBW1SNyCJFC4df%19dwXiXZ3ZCi@D>hNFW$e=8NKKp2T`9vO%7@T9SE1s-;*Wwl>0 zzGxceV$4uB*0)><3ww5R*ZCtZE(KklD9zfEX)~72ABva?3JP-ge8DJ%^lnlRz!#A) zkHTpPX=gEx?q$IJvRE0$?Afl5Rbq$k-R^)6dOj|W3w1qRv<6L6VApW0t_|Ag_wh#< z8N$JStsySOYjbI?|4?|abQ$jZdjxa(NnCBfx>0d8CT{C+9w_`{VmQ_5Kdqx+PrL+%I+h^CYm@wlDS?A!E?2R^oaFu~)HeW@+m!=9eq4tB`xUcflptUsbh{VA z627Y@U`D#VZO3SG50Vv`1I1pZVTP`L<7UeJii%Wniay?leoh8hp&aTO`qmX^T3J%K zmCs1R2Xp}x=N7vBMWv<2ld`yuFB~^})#AaUM=!JTEPz6Zv9jAk3y*!y8&>tZ=WPs{ zm{?&q3d`DUll*lkWJj#6&%y>Rq^k)-^e(`hn9H1$FAz#3B_)Fp@Wma-SIxTHJS&77 zHed!PTy=5@#@B`PQx_6r%EI43t(P&=tEdcu$N+5LP+khN25!JhBW3E~T_Lh&Zv!${^YARi)GXr? z+L!KtIy+&&msVyY*DK+(Z`fJEk983{i^OOR`%1ly2{BKNN?!cy9!{s@@~;eo{rPx& zf~|xMGg*N!SOWv+JQ@;iM5j2PIkN+lm|TjL&a;LOg&eQcfL?J5u46PVSHl!To&~={5-gTnIR!J` zhkMTJt0#YM^Ml3b0~%)c6gyOrPC~%|kg?YSsXYbn z`!hahgS+i`Vbpkx^krr|Nl~`omwLQ!fQ9qwGDj5W=YjrUIg+ z7FkvKEP`1EJVbpdja>2oZyYO-e$Jg1X{X`?$u34lt090E;UW&qVsh_vQ>#NAdIZJQ z{J@kCf|U|^+zn*=3nY5kQpc7CoeFPb0w#r#9hRdnau$JciJg}XNsx496XYO<;+nHi zl0s&NNM8ffeqS&zrN7-hi%palH*-NU6!KsF{rmSjM;!Bqigs$x`>P(|kU_sD9c3?oJcs5a7QaDhSpdiFw8br9B zjw&V0C}mGmkp6g}NbCV3i5`Vuy#y-*sWlDlkshdIvF9cI0ih1BT8UzTogtUWfowQ@ zyqjHISX)cOOy&k$e55V*1H_Naf0HW^Ze#p%NY?tNbd@Wkv7z{xZ3~cHm_(sm9 zS~Fl)#6+g-WlVQ&g5G`(|GZA_#1c#q=y=7y-s3&m&zKOo!-SyfkZ-UMu6 zg<<2>yC-XfGnBglc+G(46Bt-EgVJTm3hpc*C;?d>@Di+$kdPp|0gs14EQV>;;ATm^ zRw^@c8rQsAs6%b!;eilDGcFBTq&hQVh-t7jWIQgYYcLc!d<9H+trhT6)xu+O0ibFf zCHmG1gZ+nM+OjHnqTreMHa?;I~<6cr@4h zvGU3aB1wA@%e}E*onuAK&jCzrD@;pGeO+Lo z)VgZ&-#%3eQ|{fpdqE>jzct4ahD4qNdWZTjw7voZ z->v)$g~w93T9$XOVF~gU5)oMjpV}p0Ok~R#23$CMAwe3=+(ldFENB=81TsD;sFZM( zUBAwK|Ni}lu+j6deMKah;gSoCFN(yDHdz=SRnnp`ST<720EXyXz{xa*&0|^%S-{!ocbtefqQ> zbk#6l4L4qp?*7ECMwbfV;llnNb_(R9Jt%m37{zI=z`52y?riDWn%nymGTq5gLzWdV z6EBAJ6^bGx)BpkVEABB^$I^#zxoujw2`WVPx*J^}b~%>~V2dXInh+;%Yl*{~#yXh3 zai_Ubx;BlraPn{ea^w`_fT_{G%hI;>HxZ)2-Rp*!7I%Yq`=1r9=jLP1b3a@{gc+MaoYt2SqNjm)ucw@F}zBr zSrH}s$F$O3c3gzSW{rizrK85rRG1@c#<4c)kmQW}T-%Bl;rdZ>%2N*(%oy|;nj2=} zrF|a5k;Ma{xCRyO%bP#>D{>L;tyrvxEN?1Zx1 zq9e9;-MVGSC&ob&sK;=Bt-y_YdlCdkN5X~^N##6y_ADW4E9%^FfiP^kU(j(ejje)Q z2s6`m0G$@)+Nf3!_hO>2c6Q+y-Zscs2Ea~g9=f_By7hn|G~}w+7+Y_BxfUi@8v10C za0|*#?em_|9r$`5mJv)0EXNJCxO?-ufzo87d?(Rvv=wnH;U)OBqyxSH!T^vmqt9^! z#oJXZ&ZOQZ;OpZ>P+)eNujS&pjD(t#@Gef&d;tV1Rh1?Ym$or+JUJ=RBa@ z)L-%Ori{2a8;mT>n3~Se|3&0EG^Z~G5x$J*{)&^YT@63-7u@B$>pY9FZL*~G0)X?(g~7P7 zrE%nXfN=N#^4%M3IxI;K-@RMw{PC{7-6~v0whop^f{Ld>o<7_^wFQohJl1hm{?5Bq z&fJO7y_q*2yMVEfi)$fs5(>Hn$q80Ha$9gvm-Q_~o=Kr)vY^8O=o9UFU2ziuofnvn zumOr}z=KOcEWr&MsQ~8W4w;|3Pr>X&s1D3UC>my;OPL*Ith;iS{{%o zh-DxO`igVVjkusX?t%ZT!s`<0(PIcS2t`6@0OA}UKR>v?U}Kz0S~VLoC7;4WC){L% zoxX}6x&_-8Q7sr#=r#3xE-!H#^zic9i>hVZC<`YBx?Ad*32QuzEiIyr<9Z>d@RAQ* z5DcofK$A_|V0K4sT^)3x^+dga=3+cGpHyJ3GG38qqEWh=ltbXqn5{fxzuM9kc4seM zz!HS*d4g`i=tfla4Y++~ zPGKlMWSJdc_xYI#o2K<`tD66#x%ZCixqtt^BYT~)OB52>N;0BQMv8W#gcOy`jL6;; zX`m8GLs3qWl$F&KDj7x5Ksb$zly%(?-|uz1UDuz#|9-TRcpQ&o zEH+Ggh_!v!>n9a-)2LeT{Y$kK!&oZRG<_|jn&H)a<}x2$2Yq8FlxjIcJ$ZagpXwaB zS8F!i_my5Ng%vUlAP@!DotXf`q6K2aEJjgcAw(_%pq$67&=5U6yVmE!f-N36UG<4G z4dhtr0X@px=*n{Dk5#4kg7g(X^+t^wN5}b%USB_qbgroWIaxzqUfm=_M6I|EAa`5i zRf)>mQd38w8VpuDj0!b9E328n*$*9~2IYo5aw&32BM|NI^J|UzxCx*I<_&S~>jVbl zl~tfAQuNm^nj!Ie^EU*VLl-?BQlx%+i9}DSZ0UBkV9Sf(-gC21%0fI0v6B3Eor zNJw~-Ig+bRxo@ZIeQkeLQHt|vkl}u)ko%dX2&gQWXXf-D+kP_{5s|+zXzbelwx7$g8nFwF z+%{AU@%QxnpZ=W10b4jM)b!E!25+?K-BsiV6AasS>a+t0bNq??ITTazo?w4FNTPAB zV;QCpkgc@QTIIP4hg-+JYU@L~p}ua4xJ6w1lHXHWN(~g1`gyOfuMxtmf^G{xdz!T> zu8lPfY3Mgmkq;t+ zzY<@1!Q-TD$fkC(Apl``3+OB(U}5y(K{2Dr!dJ8>F2ES~joXtiQEg^kt$W!OS*nWI z7h0Z5o}b+gFrfeR3o$?S&)-Z362eBbiT&EgJ*rmscX2XSPn{Qm4@!LO!DIT~TX=mh zPV65mG@JEXVBncr;cnx zmS6MgbAN+53WtrApRL4rz?o@o%)c_IPj#MZMgzON01sJWrgLS3IpD0Iegg2zP=;=k zB9hrh(tT`ouRw?(p+0Wj?3LHed9AKdz4gNf-qY(gU8j>5UWZ(IoWQi?>7i9>2yU#t zy5(m5`1s_djeXf6uVe}1Tn}6qRAk~ZI*Cco=v3`!zE>pK)GekU_oQZzD_-})NAE;p zBZxa^&i0r8dkpd7TAOI~?AegY$Kq(IPgl6cvVF_emYw1c)2ce^ZVP@0cDW5|BYsEc z{5JQ3&h@_;_U%{!vFScWPC%0p*n5gyIfx-{=YE2*W9A*#`VtVCSbf6awk0=&9JSXH zA;@u<)`dm!9U&4*8Gksf`NXDS-r4)MZEM8fT~peM3{L$}p z_^3fItJzOUJ|7Y3Yqfu@m~GaXxH}0jMq78_v^SS7q~t@|W>*#6{ros~y4p|1krg&U z^b2AFAZt4@ePiJSmFiyxx{inR2It1-dRfh`q!SK}$@mvynhLtKYgm`$A8UU9^p1I@ zZ{`>%!&UKPPtfySFD$eQ`7(BlR_?(Ntvt)vFuT7z>FjhVlHdX{j%W>UWRq~>iajF& z={yVWkbp2^^iLe^Vz9yIV8$WPUS$D?0O|Oi=BWn&1C*KQ{fM_edG23bmDa-OweC86 z4#9Hi^5x^_%$XCTc z^yv?c(EtSPzRvu@US4GXZr?Zi?!1t-hl_SSC#MBn(Z5MaHw9vEnad-7QC%JPG}ZNG zqNdLE(6PZCCm*dSq`bi$GaZwV`-m5U`dW;y9_@u20pB6s^#O)6c4NX1cl&>twk1z5 zZYF&QfWlIcbkNrzwQWS>^y}9fPzKnxp715+^N1va$Ap$!^vbQowSLs7et}OEvm1;f zROTa68?dte-kp&VkP0`4P765(1m+CH86WZR-o1Z>$K7;(bL6TLS~F*GJ1P>>6iw}D z->bPGIn0qC*eFoG_(mPYyRG-F(cSq^5N}i$=i$e%NL0 zEwD3a-pcUM7%z*`pKE-1`v(@M1mo3rg3_rkDon~pKPi|LRn9nG>Vy%DY2p44gy{GX zV@s1IA1fb_i-b>_>K$K40lx6@zdhbOI&;Ro+X(VGvFT)sR&*arshJmz8L$$UH=flz z=okeB3KXdZ6n>v^{mdsq4w0ich>$%Ig5wiS*8GuI?kruk%C;t=wGqI?954$GvO2H= z?DHhPul2KLHMw6rM}iHHvfS@-sDMx&v@Z?xmo~Z!)RH+vJ<$dQJTGzdd|Xm;6YINY zSDRL6zi!kG!IHKun&K3p$~*M4ZU$6hGcgbjsEk{*n>yk+JR+gl*8R%TKYdNJB7MKz6EupqI%tB-tcBH zPr=8~$56VAvT6_kt2#aN``*D<4e?bhFx^O+9fGf5|Gblev(kvb@%zFKgFQC+SW|NY zI!cKY1CrNMU0nfl5Vt028)tRLTD1lKh9EEza3%kqJ!|puwj*&!`sXh`)|~_b*qtMn%8xx283XV`fH9yUrEH`su}u{CjLLe)fj_$Gv)n}sPNE1Ra?7L z42VYH#ryY(555!VS|S~sQ~06l5QRJG)B{SYs=iFOMOXXET%sVan2fJMVIxJW^s-Hc zPct?3@%Xx7Ewc8H42i}K>QH=XzNc<&XD6S{v^^2xC9IXsc&HPbhWZv3{j8rs<-B<- z-X-!i=^Smo{=i1PBSBu^<`>|280}7=r7}mCX_NW#L4z5t?nAsp$)bCv(5$YM{9a(X zow2CBl)ERm(Hz4J#!;VFRc)up()u-%2fPH$W+Z1OWbGWSWR7k8jx!AcJT!au>O}-> zFPs1YViH!P(q^~vU-#pi=V@*bf9(zI`E`O1{w2jn0K!6P#t$0AMB`RUciA_gAcP?7 ziUsE*`iU+D2DqaHM$`%voBQaWE}t!PT)X)>k@b_@>EtDb%iFJ@h^0PjEYJwG{CG$R zo4gWGb9SMaUr5Z>s(tb1O{i=4atibtB_*@s@*a$xEGAY(=7Ur1OFF5k?FabmA}|i5 z-dRG7i+-`JW?`uc-mvLhFY@3fYL}NYXOH!2rEuU|qV(RtubuSu_3!hwiDqDyW_1Aq zmO|!P9DSNbAww}Nl`hdk`aY)}4+=XCbhy}7s5syj9&7ZQ$?K(3yL74lY8%R(lMboc z0Q_-Zc1}(`<14Db7`sxC7k~pD+%}8DV+N1Hl&ppFL0NEgH~;AOiXG}E`E^t zOs2tOoFE8v8S8V={TU+}Z3sz}YEY7-b`24_4Q$u48Gg~GDk6y@Vt01zRJ0Cyu zO$0?J9|-6aaF@~59mL2ARE1_KN1o_XIgA)Rx&kmD-lq&lTyVIptN~i}p;RhJp(Vf# z^K1=|IbwAwq$QgqcB6pmmwZJ?ZuF?ZkK$slXy_{DuTYQ)DddeD@QGrICJP@73=G7; zNt|Ou@hI+}4p-LN7#WEnDvY(5nJ6iVeIQDjY%0xYVvJh>awn6d0St7tY8CL$HB38r zWugaU3dB(So)MXe-x^Bdwn_(xLpjvRvTq7^6tp#rZFPiy5qae~C+4juw|Q3>b!v%= znSCj(Zx)$cZ1-e075U~A`XMvYR{syrUW$w_Q@?t9Q%Z1n8u1lB0 zF^hO*qXvtf=&kbr%r81A*<-6I9yy?KeK?yz;$22Ddz+-mG;$9a@1%E_!i){_Z41oC z`XL(emt;q_xA`%Z5ou=Vihk>u)KaO)2ZJ~cCfnzbqg^;10r=P{R8qfof-TU3OZjV> ziNk&h?0qgxOW`|%r0SQegtXbKu`v^UTP4V@*!>UH)eWX>77^#JU2O7^6Kz;psnSKZ$g359Q*QH`n4^mdi76s= z5)W-!1B7prz@Ka??%=Smg3|`LkuLH|X0jV$gwY-VU<|3dUL5tVbr2VH<{%gcWg4r zR7#$4ra>KfZn#o#`fWL9*Hq{{KN9EP!RsI zHJ@%gmQ6#vYvzkc2F}E!9`JA+W!F#!HC)2rDk3Nr7m^b~dn~m)b}ug0OUCX&q35vY z=*tJi0FNB{-@Sm)a{*p+UPXpGPASQhQtwIH@T=K>e%BfZaQ{-BgnWxm;N#mf*BVZK z-~G#=e->123sTJSP&0XPvwKKvN`r`p+$bgd%Sa9t{D2%Q2da(u4O0c43JFn`WUY{G z@R2$$lUmp2NAA1*FX5miztVe@V}U(ob&D|S{p$!Dq|qS14q5%)f&6d+w-mR~9PL6k zhhsS{H1wP{BCWz;ysh-90*aBWq@nscxa6#JQ;@r%O!2rp;Z`L*+ij1q)<)EsOxqAn zn^N`&@CDF^c-P|BcVN-ZM2Vb#9#Y;&iOj8vL>CgTR&wX4)qXXD3__g+Yi^{|WQ`3{ zm`(2A!eD~AIqD-ubU{?*1r`D`)0ledq^m*JBGE8j?dF^ycnM$3;zHrK!C$_7Y2@$I z{#Vr@Csdscc}o*`d*=A_A-s!f{@ZeD$RXa?^POrN(m%eqDa@5P=j%E?BjM*Wn}b<# zeBwP}N+=@>9QLoU^zqRLE%l{Te@-{Jh7OlFw1e8}1yFk4r%i!g6V51#rIM_+lOk{B zs%~R_M$jW1Py5k8lu-18G2Z=JW&L!_BrLNx%!6NSn|o2j6!~NX-2) z_8{1D8)fZA=nO`}mhC1Z_F`|z~V-b@O4C~)l13&c3 zOG`?v`Kx*g)?1`RYTKaA+ur3XRi z_QKDtavLqx_1xT+0LM97#|TR|abUzhPFGtgBrVU>V=f;V=R9I=&N-sEv}(LLyJz|T zJxy)TG%6+TZg(~NZa3qRK#8F0uS%+LtiwO7r?Pgp zN|qzcq_h&x@unS)q7P?g9QlF>4i}=%6!v*v+B$uCk!9ClPL5h4`fJw_y0$qjiwT;7 z8i+wj$9K%db|GD6a$=H#-iIw5tSy|0U|v)#af7;mgHPiA*{lv!C>?dwibSU!Zo77R zh+CDHJtODVowI|MpE_;0+?z(~$onJVTDe{6p+sl~=@Wl8f(mjQXiWsiVToCP7jI@_ zgZuopRC{$~0KI-3Z#{POxb=$<>awHnrdryOX?i*aM>f?@x&kA6lBF5$_dn50{6qO+ zZ8jm;-+vSLEN1kJ^{!jr1y#2}|HFs3`)cn)1A~{$eJX>Z4z9o~Q+3prt}0yrs3vA? zM)9B6yOXA=RXlK`*VlRWb~*o@f>m`p$d*$kij#kL7gs=9K+C)*KX`od0+c6wG;{F@ zWLh5dW*#+?Lckdp(LeA&*rvDx66U;TiEZG&aT(FAXa1!eJ0d$t{sr=EX%v_+ zGVkv4XjQwSg0UCIqEDsx?w_|QiSR4rQ1F3w$=dNF8MEll1KLJ^*zdDW%=?e0bV-#! zZ&7a3*p8$7 zY_T@^q1GQYo~}&fhp6es=7!byFwm#Ziwb)`p!LZ0zE){VUo-?k*-xYSf(YsDoc3X@ zuhpwQE=PaUbb{hFA>>y$D3vU>|SniwS~g|Zlh&$2LF#U z?);+hjAxeTjLG=vL)w&l_m| zKjL4Jnp2{wJZq{SKLTMxuP`}aS;8+PbNepn}6*n3#(#|nmPdPviQJU zLIT4gU=@>B&#&WpLYX)Dr@Jrekr#YHG^vr=t(|hzJ|7w6K{n-yH3p&*RXfF`G3~hp z`Uu7gOzL_#0s-%f`2leUkk^*#Q5L%ruLUhlw+MSr63M4+>g`9%ETb^-pT7gX^rh5` zsn?=L?nSA zo#TR46*JT&ZKO>pVK0&s3ms$x)?64B+||u}>pAdz>}=Z!T1XBF-f%l^)}rM#U1Aem zkTz)1#I+`MpyumBhl+0TaxrlQ=MmUmUA?q zu+5Z}filP<3={kO{y`TZb3d!zJ>E>dtor_VgHc>nn8J`zqf~g=#(XB5)qa#66Nw1y zi``4DBxhK-PyTIf^Oj$q00>|mUYlI%wK{d_i}8Y;@Rgvrdt&!DL{P2o-}`nJQCI{o z3)z7D(V)?#0^x#V=$0*ZbsKDm9AhHriqxey@Gz@3sG-DD9oXMr|8YDi?PXkJ|Cr_@ z04g5kjKUedch;B{FI$P`Q4E?>hhQ(?hJM>EXB|RR5eg&a&-U^bs!IfgSc8^-R=-9-Un!Ki|bUpdljcGyS`v{>0aCb4?@ zJ?fx*UX~0w0dxYl9xqTmytZ#UnmS>|LhN7A*9koj4R4dYI>asGDyjx5n>K1{YP9%G z_}GWhL$GIUGdfOimYz%g=W;%XmCdZ%$UO&}S zPy8y#ymc!b)jBoFbe-8YuA}$dgHoceV4zT!LUSAKzt)FmsVgQGjjVUfMT2J{gTb=k z3m4A4U~awOv^f42?yUwnT$D+YS0?u0lYYP-lpKnTZG6WvT}lTUoxY#5T&>w@RaY~> zuLa9YorTFVNpGxSaEIP=i#P<@uL80{C@_3d?hgi<@T7}>`Tao?nY=aMaLelDe~vlv zE5S7oy2TxRetGqW?&wdOQXoV(A9bSm#JiZT?b>Ytt%y4)`g)PRQ@JxNy^B(=Z{hR? ze;zIB(ykp;bYRS%Ct)uyHVH6@B}|I~ONeM0Ecu5c8d2dh zsDomkd3{98cgn<<^qZS9Z^wmoSG#H>vdx!Lr%%B$RZl@ZMY*CM%!uUvEd~7DXo&sbmOXjgik@rmPwHy~i$;=q3nBmdv=t{~Om$9l z4GD5vxbd=8lYv?WVHkD`i(GVVz!)GGDm!!u^Pl#)JNLeiz$Re1_f?w zldxcIwX2tJnFjMPT$6D;*s~B+GN%WzA>ap|;```KZqg9Z$v?C=n*}xl|D#Az54+-= z6QngNyJ>aGKZ6u}tX3?_*_cf3iw>>s`L#a8@u;#)I#3&)UGzj-QUIkfJ&whwsn@FS ze)U|j)HNuRgQ%se1KVgscQTjmu95Q+6P?j6)Q*GTXNOQ@R+ZPGbQIzU#KtbPEJ-`; zX30U1+GH%$n2&0L6RX?%(uC8egJOzLxA_^WT&z|IUVbuEQ>WLZWB(!Am*E)%z)s2g zL=RCp(bnK*O1Chwt9&o2gH3F9hhtodC#NdvM}oPD&fS@=z0qCJ499=UJ@0J~W^*Jw zyzox%wIF8{N&SD_C@5Lr%Fg!|qESpMar^U!v<)$-%4lOqjkTL=?XmnBE0lxvzP_ci zPiLjDa+|*WtLG2Tqk@b?InB1`+jOWwo%YHo6+NPIldDvI+_7DtDL~XJ>?8}1S5BoU zVD4yfOzuH@vg-ui%@F8)mnVER@clgi_6n1`zpF!=VY%{Emd8J8V{h>TbX2Eo?whiH zX@dycoK1sQ7G3@p*_lQL=f9I1W>AkEtU9k0cYSimCc8px^6!4ZLDc6K#yDfCt5STysSS@f@`)M)_w-R2wu~e z3zOqzU0nMN+`0PgxpL2Ao^R%e?w|J4Ei^Vdss9Ny19|t+CzyOryI>2{x^^KMAu)O# zdXjGqEF?I8=rr%k1jDA#aeypwrECT1jO68DMUsgK z76lgYpIZDyX5`o$Dlr)NXd-tC1z>usOT*e$t#(^*^S6RHe~1GA5tyC8#WKcEH>1_w z4!CfZn4e~5PLf44FqNI0oXQ4F5A`JSA&AL|Dn~qpxAK(r*Z`pQ!v7eMn3CKm{y*^;Bg|5NDz z&Ww{`MsS63fk==)OXzqq=+v$(SpG^TK|36%oKlip)f(SUxX{UfuVR-A8eRnhB+Ca# z@`r#>SytwFuy9?;f>S8Nmb1kfRjL9~N$yB-=;1dFPgt zjg?O(K8lDF`uEI!b@S3eyA%0QVehf^;#p|s)W}*`KAW*o(C5LI+*Bwb9`zzU#IbiZ zU=?o&GHI%{br8*(OT15Ru)~jQG+aSgu%=nf;&fb)u%xU<7A-VSd z&lf0B7F7$ghOf{|**h*4BVr-UWBbb%=;*Z9(MeJ@JNA0(@!Z#+J+l z>%O05n_CRQhx=(m6uZ}7+}vrS+n5PYWLd5kG{-kRH~S$^u3zCqmkZO)CMKDQmuP&k zKYKHea|A&;hD(3O*_5$zI#p5J3798M2u0kJ?nAzsRZ+dgy4UgM=eW6rEG~=oou7Of zfXt@*^E&J=&GBTF_JgiwTXb}9tPu8^ejRvcTAJ3*9~akt?T4(%Z|=<<*NclsnJxHv zC(ZkFOwF9^)n)6O(rda54-I;iu&T6>=lUF5TCrZ>00sQl{1uZ)F}_+CSw$q$=+MSi z1>H|kc;9^TM47-P!xc5@(``f5%dATuFuTxz@jm?Hnv2LfB}EU;CTOS5@?LpCtXXcR zr7?u;x7Z{lWU5P}KBt4SI6X`0WM|Yn)?J#>XnW|n!+3oU#QzODS@<`IX zZc1bQHJ?A2K3cFJ$~IQcrqgzyD{whI`^wCf*P*tg$w1g7)Ah1W#QTF;-bm%f^QgNf z0E;Zc(Bvs#1l7!V8626OXFO?=GmsJLCw-w#UcC79aKo6977yR8J#=oOvGEJQoOEIk zlL`uC@@V_v{Vk{K0=w(ecRlCkiDpa&;%N?|t1G+-$;fFNkdkt|hI|`TRaBg`;2$DY z>4+xtCZ3yVtDYLi{CR4#7A=I6u-^LG^_yzev@{s%aEb^Tumb&y{*L4&BXB*jS8#bc z#q;?FC`Fb_G~q-Gq0G-4LRYUk@{tUsKpJ9LF?r;4t7HF&WFJwK@V{^dFSD$URR`<2 zr14j6fm`5nDnQCX*?bN!Tt6#Rq!1LB+o5tI#82m`v2p(^&Zn>RkeN{&SGH1sALp$w zscZyHy(0Qr;_zpJZvB5~aJH1R#f{4l68ZkcWzJRO`n5?NU;pMx3x&e4o^DjlfANpq z4;gzEfn-ivb?cq@?Qi1a#lN(fg> zhZawZPGz40EoWE0wW&_aDQm$OXwjaN6m#C0E-C(V3GVBLD7cbb%`ir7M@`wlUq3v2 z)tuTpvz>O;^a^jfiFm!SVt*~Iv(W7_!U6U!N1k%bg2US?Y)1h$)17O+9At;(BZ*Ku z>>312x;9t_Wz2Mn_s-23ce24sPv^x!y8V}@ohHfJ4xIk$`7JTP3Lj+RK^CPf=tKMy zQwVmEya0zf2|;sEv-`a}Tl>#^qV?gA$A|Y7?pOc(*=$f28(`uR9pn&XseR|RiM`SV zBc*3i{S98Ae zB@YIDaWj1ylfm~2y7My8vpOrSTvsFULh;2oNUYdzj=Iog(C!i0E}>o4b$WeXaSe0! z&9z>zQD z2cs^xr@HXZB2UvGiB{O8&l) ziqg>nGb2iHD6pWe+(OW#Al+GBK2QGO zJzj@kvA4J1#e+fNg|Hz1f0(yDZ+$v->^PSaADv0>vA@4Nvj}Cbc*(b!zTzrJ$Zna9 z^QnG=24#RcYDBMLmZ~98IpsxM#}#*Tb0@&!_Gs3Sz@u@fvTJ)bNSPj1*ZOvq?n}r@ zP!&rDm6Nl(#e#0OZPeh*)q3}>nkLmfoz{CeXUzCq=-qTkpFT}_0Yf^C&T(Fey_vr1;#-Js!iQg7CfIgb5c1bb@r zzWj1rq52NmR(N2AqU4{T!uzn>iSt|y;RI$AiK04Se0giXQ+s{BCip2k=S_Om%`l?= z%#?9$+hS;Xq3tE+@n;6tQ!6rR-5Kgfds9s7p5563PEc%&R3_W$E`Qk$Ok#^&4hqao z!tX-$WioYY*DI{HPSegTaACak@c_cHhx&H&!2BZhZ-3a z)Io)EC-~sOb_&bAyfiSLN&_=fQbHBd$4iLsrdNGnt=yQHG8ExOcOI^jg3VY znRuwIsF=X}=)isj&~;6P2f#^d->1HMjpUuZInK@%qbmBf9Q4Syr!^W6FKQ9Pji0BF;dm6gkXZBh5fl?oS{ z;9XHg-*JjL$>HLg^fq{)9pd{N#x0+*0V95|`?9L#BbAg!jvSefQbJ2dCy=RmtqtBQ z-#tEn_QsHh+=~Z3W#=iH#>(f<4XH{kaxXxRWRlE8XwUxru^b5{i?1zoX>l4qyRz;O z&iYf`+}s`@-_6O+j$0iZ6dG<;q}oPxgdx3$7k&?)lI_8~7=1H`&QVgL)Geg*j05=^ z21`$Z)&OBkRGn0gM($07it*B6&)lY9^2g zzBspvM&e&2Tof4HiOZK&|NQ!}@kM22)bNZdAja2^&Zx-_FZ*A|qoNv7Rj=WU+&a4g zF2|0FO0~?=)^-Oq)v4AIU^2sa21SD$amr{(1(uwsIBUj?W;{xvr0B#>VLaAQqnhDyjlU6?U*IZ#Udk{%(E3;p-*MGH9HQ3;=$f;<3-_JIKE_4SwrLPbI zR}}9u*#T|0=;=j^b}6S%w*o4jKn2u$Yi4krii*lS z9qt8!IfeMl5vc2LV$z1#=-OPi>fUu<{I2&T$I4SnJnwW_SHJMGBElB?Wp`%U+IG^` z)*iwe#!g|HXPTasydhMLi^Evm_m{28O3$49mJ65%M?mmNRsXTmj>4BeX!pE*x z!5@OpkqPGZ5giMrKn=k*0@S|3osDnXNJj4sSN=DKB*6A;z?brbl+rEST6%_FyF-G5 zMLVhvK#{Q6mK4?iJ3OiXt=)kS*9 z%OCiYOINSndFISua*O}r!_AN<`eoRBJ@)5kQxYvhFKgZ=q=*Yj(4j*W)bOAKit}(2 zt}?*mtQi&!$>)ao1&4$*=k@t~yxw#dKCh!cf7U?Km0x)tufIM}j$s4}Q32411KYTU@vK(RhvJa(WI&|uEqpVDgQj6Yo=X@s|!>D7>kKf*<)Jx{Rl7E%4 z?ydB$?W$G72{TPn9rghQ-Q>>zMonNRsz73^Ookos;P0Qk=0#)C?=C#;C3?i;Xj-v3`I84k}e!j79ZcQ6~my+vvvAF21MT%yTAzRIMqZg^#v z^Z6OA^BgW=Hn9G)X`Q$CTq+#`=&_5F z*O^TZcuUM6ur}gn@O)*W8VSDi7!S`l?I(N}rDaqFzD%Qm7QUSV(TF$r>3UPOIp7Ry zUTp6yL7G}4+0%LjzEVynKHEg!al`BuB2>}**K9JSc|vZbuCCMe@te7(qLu)y%7B2) z2mE9uY;zQ!-w{iAgg1}d?LcK922J1{yZ@^L^cJrUDbVHJXxb-d{a1E7!87h+{Skfd zcBom!CS%jJLCKvvbci5^mM?8wbgzbHUer3->vb0}`LCxSJ|Ih%E;YpYflGJm2U-6) za6P}jf&Z!vGLDA1-jvyaJ2HyqUrk%&Ty$||aBaR9x8`^1u(8dJs8nd2lz!=c)*U=} zKl7fZm?X46X7^}LIVuB7kk|PeTZb5HHfu;XftCH*wWCr~QxAZ+Fk5NG2`5DE<#A1* z!g1GeS-*b0R-ZmQ(X;M)eddYBM!g?a2=8}~P{wp3P<}dP>T$qZ-n&&o(u!jmkGMNl zl!e*XuWyz@hWaM<$Pv{RZQ8^jMLS3-jH$^)_mZ)0iMWC;hA|4IkEF2tK^wqHV~y_{bcPb`NF1PO{ic{1YiWqs{o!=es3Z3qD&(>-`b zy&0RRrcxN;8)b-wq6hH@b3G|UOiWFq=w%o7whJ+x2+4eY&8Kv)<;J7deH%c1VTbSWpZ2~%L=?t2w_yH!^5n@~C@h*Q*w369 z!1Ei?eA3sXpX7&gNl8*@S@!J^5E5f-Y@GaZuS)x9)|k8TOwThb!JI?nnrk3X+4HsQ zp-pyA9+e$|V?I~Fv}4DP0aNB~QNK@M6zyX9>19>50WqJUsXCO57&y=mjP*4Qc%rVI z*@d$;>(!~$$cUA%UQGfnihwLixG>cDo!YdpD^shS_JakNKM8&Lku_cO7c7{%&za5$ zlap0f8c#VEH3O@V4KkD_Z7Sha&=2W(NQ#d7I&syxnv^{q_g?N-8-D1}PN=b6&R^b) zQmoNPWli$9wvDEQpWU;kg*dJfP-i;MaSWLb%Li`wwp(V*n<+J-R|c-=U5iPk9XMVg81+Dh>;7x`jJ(SL z#Z4*iupqybZ*WV^=VJp@LXv+@pB?~?c=Pt{#+;Y7$C5zxrR*Wh;mnNJs58t%1)qVx<_D4#%l(G zuB$L*IPX=AYbjYT==8^i*87d$bI7oF+DscWm*-1ewUbymzXTZ1V?dcq&QKrntA|d% zwhGjA?&q>#fiv_UsQA+GWzZ11#TmY5Hx5r&t+3=-nvm-BanI4Xi2Iy$o9(*iy#T=3 zhU%;Mv5{F$Ufyja$i3o0HC!ozx(azGhC8!%0MBtE#rfZNGilM^B%8*;s|Gix*(CYN;jpzdjL zMub*}7S^`5H&B7q%eJ8lzmT8hCpwOn2^`hhlPXrn%^!?PTnz_KiE-f0 zQ3!{fuv8;F)>6xN(Vjn<{I>7ZY30?FnACBd@A-Z}rAFV8NxQKO+)aUbxF19DBlEj3 z)O?KFeA1Uf{OCx=YjJ5bq*0P`6(v!xVZ+W(@jU(0!1>*i$ggcE^qStjwu+um7C`>z zuMj^qIw~q^4!8{PxOIQ!NY6a)5Ls9T1egKq#pr;aXu~(Hx^}*QzkdJXARzh(Dr>O_ z8r|!pr*?>`Wk2ItAR(ff0f>JAK=XG7;KQHa)88)J{N@qW_f1hE5_WH|obz^VqUE@8 zo9A1aa-9+J^th@Q6n;qP7_b)-&8jPz2M!+8v@f4J=+IQnLk~Kp@X2a2C!@M=i$&?T zx|1FmOKGN}-c6fO;YxJW3~zvqKvSsW0U>RzAKl-SR+BK;6@-uKuOV~?r{Xw8ql^#` z&GU~>Xet)MhR@1co4GkxRlp>UjhTLIAEkm2LP7&D(}zBQjguB0`qA~$O};Bw+jJ@H z0YL!WQb2iD@oZInUEPi#5jPk@zjdx#TI7F&3z=eKF1RBOmHs~)m#K;|Cpbm+r2>9Yu*?p|f$!{~9=jjbgJg3MlGrIh{{%z~h zjaKt=s+J2G`t!ZT%fO>Y#j<;u<<6KAncv``Zc}d-@TrIdKr=hy;2_81$|K6o_cRmE z6bx+Eu#WD?g)i7j#b$QZl=4v{M=DvYe6sW9o>MJBW$;H-vPinH%q(qB$bv-bfythe={zvO0X?|Xsg+0%$RP(OMI{&W<_V$m-kac z*TPuX>rE(Jcj?xj?`Y-it;anQSs>?j?t%q<1~skIn9%fhq8}~mP?}^k<2PwFV`5^& zG9^@XKew@5K8JeM_p2i6VLM$y#791+*k-aTa=L^4K&B9lxq!Ir&DrEBMtvqyboEXV(du8(b|p-x8VM1?hG zY8gj$6>cqa1xf0ls%pyPjiSX0A@#ooJPFjlrj@HUT=P9)h^;NG%0!ku-JPGDbn6*D zT$q|}A^XH#ym**@>GFn#FUH&7ultx#GFetpQL_E=qHi9#UDjp1{`Yfsm%^S^RX0!@ z7`AN$L~tF8AX>~O3JD$S68a~;DcHQS^kY4xux35VSP?8AM81=Y^|Za`jLvUvxcrz zJ~Z^(y^L;#8C{%xhmN@!Xej^A_|j^%g7W{_f1FnSiN;!^SwkxD>Uwh{Di?UiU1ZBID!tbZD_X@t>_jVc{RE$8fdQ9MoUDK3xT{Iu-^{gsbk*Zp<6k93qf+|5GH}a zk(afz$6_fxfeCzdF3%ev9rni2_v5^$Bj@Dn?%mcmqYUC+W+WMaMRnF4_Ow+(nqnvEdi79!Nh zzJK4JD!cdoSCy6b@%=KzrZs{{n-7Pm1-1+xK7}@*gPPiOxp&|wdrGos!T2WCn0kIG zvU9QkdbslA{iT3kXnbW-jv>w6Xn+lQE&-WzxYh85$I;bUn3~=R98Ra@)-;3LW`;E> z-SQ9}ovjq-yTpK5UVy@LAb}l7@p=6h&3IF26_&D`beZ@CZ-EXu)}9d$VFVSJr*75r z208%uF*`{0ZI$r-8c@n%8rxQhCcBlat{h62YvJH9VC-KFj+d8_jZN^HWBhoeKyU7> zrh{SBt<#b|y?bxoC8EUSRjixLK(Z(1P#~;V?ylz^4%{*7KmI*KN0~wZso2A(atJ z=UmB@F^24KEE%t>le5Ldv6okn>Lg-Uc6$0IIDyKmG-aQ@LAq)U8nln&FJY{5`A8=6 zEkJNXLPOgr31OshHWut{i|t#;WXV32mQXA`wvQi&SWFYZYWd2Q z(Lmr$2oPpnO{PtY;nxee>AeGIc5%Kp=fTLvFcX`Crci$S zauz1a&=WH9;}$=dkBF|UoS$>|ZVcwl{tkm4736b3I{l4N+bIzJ2{unkO01|6`i>bB z4ns7B(o8}M$Uro}WdVcNSv}7vXOxzzS(nISPsSKh7p4o#2B2fT-e$znge;e(8DNv$5M16w<~v`y1OI r`h0JtE+PH!(HCTZ|C|1!ZsTR+_bI`99e*nD&lKZnMv3F+Z25lxP|>V& literal 0 HcmV?d00001 diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/speedup.png b/src/dios-egraphs/Diospyros/data-plots/plots/speedup.png new file mode 100644 index 0000000000000000000000000000000000000000..274651c991488003f1b2e86d7f1c50f99ef9a194 GIT binary patch literal 44382 zcmeFZcU06{w=Gy;RtzXAD2gCak|3fe!B8SmvPcrif&@X39Lz+K45Fe0$)ZS*Aecyn zq99R{5|p5TNKW?LoZDaDd&ld3_l@p5-amcD_`aiuqUu+B@3q!kbI!H>R8O5;yL#Jd z3Wc&(UXH3xq0swKD0E*}F2{e_U)$D$e@Qr=IOC{cXYS~1^kr0c=6I9VPTtpy+Fv$!9rNZLvs(_WYtAEol6wTdL!~bx)hmY zO9~~UTAq4T)8*dJc4tkdrq&fB7Dtm-@}4=ejPcnnai*n584q9fQ#0{f=Ki|FByhh- zUW(t#$>#WHbMfVOn~jYtI#la@wr$>!xToyhjI71w3$@g_QPL(wdVhDv8G9faVu{Ol1J&VsvV=HY9yQ1CCYEy zvgPq$yy2~%Bs$@X*Hc5?G?3d-aGy67Mtea$&)7) zA4twsCAM7~Hm7YanV-<(Q+?<&_vfQbYUu4`mAHY&s^RJQ3a3iuMiqPQOTFefg@jIB z{@KhfA|hgJZJqJ9F3H@~G;xr_LsKD~_mSH?m1_Pzn*IJEr%;>%g_dlyB=5fqv`cKQ zgE2bzols#z=?6>iTpeCY+dS0ql7BP*X{G07spo6s(<-(F$Rj5u>#6ntKwvmG6p$&@WrAm%gbXyKr%yCxn<>_w7t3~^wAEq=lHEN!@k=A}#*G_QpB^1A!-L`Z z5|v^^MNP^X^7i34r#Q5RsiP?P7 z#PLhiRcU2IJY|ml%J1~g%*lRUaM@Objn>N`yJ4ba_1eZP+ z7&bOG=D@k!%FoX~J<)2n%x6?z*OMZw|6-SNtoY_NYu1SD+ZVQc?Y2ro@A)KM9%<>b z+$s+Q%&H?dW4-t^Q|{D1G0&`aB#s+n4Mxan_qu#9PS zq@bwF$T<;F(a;kCTabv9dxlH@p3d_g?eVM6(9w{8>d(Z-fnyQ&K+2tb7Q>E1n$*zH z+c?Tqew%m_RpO-8A01n^ih&^6t#m3ED z8;lE?DgKb(>BW!JP#(y#8*dQd`scg++nZ~9yUGH{`f_lbyv-W&SUJ`|Jw09N-a$16 z1_smdfd=(bFHe#^UKAB6#){kYG-m3JBGjte3N9sG{Pu=}Lp@QBk(?Cd8hJeHnKNfr zQY2hQ6PcNrD}Q%akdWdC4d2Vc%q*9wn@gpV^w-thy*rpqhMZ#sWVYV=R1Hzvwu7ry ztqQ?o@s0Pj=4WN-JeQrDpEn!%`g$W16X%*O0(aJI6(X_4W!F(mD|8-e3qhh%`1n97 zNiW|v+p1v?N!c$73l+-zH+QypTtG(9m0!+xYU{#}?IE<%t&_>=59OS*b{xsKmj7lDH;pME;?9gU{&^XE@dPlnKaINl$nXO&Jr z(dWbaDNeq;I6k0-9om9GVUcpn>G=?=9Vuk62SFGivoLjBQc{x2*`IiRW~{$1U0ayM zov-go3HOPNdotct@#V&TsVlhwU+=3j%OXU;1xDy-SOkcnYV3w^ZE0pgpy@w z->*Mgp3|~dtl#;nxtW=yp6m>Zj}LfHXE0;wWEv)R9CAEzg?`z{%=iV$tC{gZqjTqc zkWSJY^|DP@QJ3QLmQwVKT^Kp8(k)q@(l}V3x{11U6VEBlNUY1+iHV6+nzVfvHF2)l zJV)A6!flL~x|B;H{0NSCwg=MJ2FCYsGHjGP(b2xyX4P++nzs7+`O(~P9By8_wuG#j zK(=T4d%V&Yh?RYQ1%M)Ii> z3v<)3+1b7Q@Ljnu_eaI7<#V!% zk*Vo*#@({TTRC^vFw1n~s1%sU(d8oUEiD~KX-Mdk!PUt-L6h57zN{7v*jZXjhah$q=nRz9}Hz446ZTyLE zU0w0Ib|~;07*QLGb0xo+)E2w`ag2*Q>?}lGT6FoBdSudE;0zwyu*Bnq`{a}CpPk-+ zZxj?r4AiG42&C)g4(OTC$;UY}zD8h$n_->LcN9xt8E;|7=_p9fs~BG+Y_wYQtr}J* zDMYBBJ^AaIfI(r-zX0kH!ynq`EYbN`pz^yt`F{$PMIexxl8n#b?169t~Ld5#11@(*OZpVW)&*`u&c?EEh8 zg=vel>Suh&r^@f&zYkaZToKH+RZmZkya$QYv!z~wl%98fo4S7e%Cts`6DL8hsHoWR z+ot*HBTi0E-dTK1EEBKl5pq6tKQ=UIk#}l3THVs3vpHEVgo6O80-TD&!BM_)IOV-x zUte<=`FgY^_d-xrow(4aZfwPsD&?Hz~H1}x5>=f!bP9f(r%oEcMd;vYf9xPE=9D_#`&ZojCg`pkVUPR`0d zKR%Ydf3HM~63+6t8Tw~(GRk9mG?liSgEr_e@l%In;<3RdmE5zFL+we3?_nnfbF*OBSc z3EFXe{d=s39ObB?GxyzGTq+S}5#wizUD!$a1tdux*jbmUcc5Wb#r@}U2IhwYpPP$a zFWEadg#Mi$_T~tEHo4n##${EsLlBm<65o~JUUHBQI}efKtbi3KCARD5&x{PcJnI(C z)+oSavR5cvo0cqDLe>aSr3zW3E-?9JLC?gmR)_f+CqDJWO1&ock*`-!NqrL%5(coi z)uq~=VeZ+0`M9cws*+U?@* zPU;gEviQQxpfEv@TZPZ2Y<%9DZ&%%09o6aTfs!JKUrwFzc&x9IZdw(Z+W0Id=l#g~ z2Y_f|*3GJ}Z9u#PO9iq@s80;F^9$ z-}y2c+t>(_im^pdR}PgZqUqG^w(}|i>e)}9R;m_gD7dobWIf8uVB2>RsypP*{;oT!p=pHRa3-T!Wpf9%HU(CBDh z(h>P@=BEMOCn9^RKhc)}+zJ)5tfLk>_H$^ZX{y;+oI7_XZLXIVv58S_yl4k{VaRg{#%QLB@;i60yEoZ(E83~QpvT{wIB z)6QU>=jMI^0g87A&+937Jih$v3xUE4N=kvOGG6-S!ED0hqymdp0X~&|`JzjuMo_r9 zxfRf2nvV6=S`KNNwwLbO_>Np8GBeIRJNCWiK0nfF$k~@}W^Udn(|3&C-mF60^`>W) zgs$B#?i(AccURtEw6XNPThnZaw5Jk**P96^2E)~3I4 z<3`O)NlUa1;fhg0@}wnJZli|UYNC4zFLEAEE6D(0jIlCMRIC+3P*h55-|z%%`76u*8`bFj@cr> zgoK1dEoxX>o*G>n-Nq!SJ9fWxy-+*kJR|^Iq}jf5 zT>OiP^Ple@GzAOs^)w$exwVpw__;)`;7x79$XyMYi_?+dOQ6ABO$Z6GBPBWyi)W(3@w<7_`85u3V(Hm849}d24t316>pY7SWD2a2+UWl0yYlV#O4$}4cQP_^t*YHQ+cT_85%pg1Xt+#( zKH9* z2M!>Y`wHk}Q@gsl!h0_?W=wx=I(X=iE=f+pXN&J3&Xe6mO|W!9+YgXNDplyCG=QwY{)Eo@d*>XT>_!4Oke2S_q0J4UfnP z{8U_A+%xj+t>ta|#z)6juD#d$C`GGZ?UmzGxIgJv@nsKr`#C!6C=KT_^mR7Rz$y-vu6Y7V2CZbb60>=bf)T_E;%~0}Dd% z+LbF;vJtM6Ls?N+f*5g|yM*f)Xvz{c^!R;a>9S>2;HviJegwwk&oTsar(bkgnUqPK zEK%ckE6tQ%jw!{8D}zu_8@F%g2aMKiyma|;B$y~dD4fyJscLy{ zVTSLJwq2SUy8LcOsdp)9HPC&Uc9wXKmj0brDVZ5a*UX&y5PKjBmAwidmuUI|UZC(W3n)sqyeo*KQSa+(aEq%Cu_9~4nLjn!sDU7`c>2IoY&30{rI z3|PABSgVxA&5R7J2RDH2JA!6}bWK(bY2QHhrB2NDRBa+ZyL|a_W$XcAfGY&W5&^wD zrQEan`mD$%R{$^GR93ExjEubKV&sZ5H%my$UjIx$^m_bQ)Ff8=)3*R{J1s1TEq&qS)HogjSkNLR*j21bbtBm%07e|`W z{v4Fk8`#iCXvoNi;!rYx0($>I>TrXq3>c?t>FNARN=oN!ZEpjXm;gsiA&4WK>b5U` zS5d)p{H?E?|2IR1wQG$oTnGRWo#>8v!y|MqE-vTLGo%``Nx3nVl$4;Wqyz>9ee3J3Kb1&a58+hcIaT#jURV0aTXjxNl=ZX(u_cdErEL)a(JE9Tk;p^A0 z6!e=f3JSh`l==I9WH5Qhayjb5+Vkg(jL7bgUiKhplDn;m<1A9%3zBHbjLglKYiMZH zr)#eVkFK7%&*qC5c&vnTudnVE5xEi;wuvy=4YPURkI4RkXbu1nd0Su4)Y#Y$ zp34-A(^97!zosBlnmY3sL`}Bm?1Wg6cA`b|Q=@E$UWHX_*KXF(_-rk(qxUpQE0sF7 z3cYOzmb~&uoOex6aL4vp%0@;;Px=Sao_he*LID)2(IS~u-eE5k+UWTv`kx=8_HqLf zM3O`wEVB?j6n&4%@><#l{0r6XrssdsO{9H>`2e^V<|hqN$GivYwaM#r*7~67S0HTY z^5rphA5G||9*~zb=o1=;D)H;v+grvLF2wf#il&tCpsX;;9~${0!A3M0hhtzxnp;}< zrsd2<=sZ-uRF@zxE$TILJRSMlx7pD{UHBhJIE#;1Km{s!S60?v$Q-^5loCJiLrb1F zH+lSmBk0eNjpXr?CjrNF60vn)wZjCv*X|>-(9AnJun`+X#Kf2mA1<2Q#vCoZlR_BA z(XoCFMn=BV8D}CZ&e4;#^+RzFD)yS!C0`MEsZeO0869|LMn=+A0*#4P3$OUYbj8=V zZ&MU-GY1F2%4d6dUVh84X)qT8od<~W6t92v(&rySEz@qI|6Z4pvm;bIA~{*I@x0h5 zQu{!Lq{NK_S2i#*_Nv?3N^Bqg+44TK6a2GI(dEq1W)OB(^(pK~l#m(HOzrzcxdC|u z1O=To9_D$rj7VROkHHNCn{cqRvpYa*0QDK1gEVf@3#Nq;=kI>mM?S1_UqV;jd%@%0 z)bPX1EG&LOK|#&U&0t>w0(Trfy;ItQmvowpAtEN_as>{(@9OI{6{3U$0rgeeH@o-O zaiwZ!Jpc(20FoYL2d%|aIfQgQc6N3UVi*q}KFlKN@|1ewM3UZvo+zJTXc?f9iM9iJ zcoX6)RW~>7p0uZKuClp_2ZQwV(u<%H)j{y#ctzn%`p3n^wSn@U)CI54fL zW#GMus!6&zLdYXh$>XA82YHM)XxF{4Hz~OM>n=cnO#3fD22DQ>yZbyG95h!%@n#UL zjEn>W7?=51At^?*C=qSItew;(ly5TtXHmDY^NftE)~$;qXbh#@*Ed{H@3f9LAz2G9 zS+E>(Oc`xMtn#9yp@4TW99STY_1yz@LL3~xG?ApAMN9*d@l_l5HtEUG%c(W4%B=1@0AsR^akGe$*LjL?4+WzIAdlFXD8Rf9a-W zZDaGf%^bTX>N?uI%g>JjUgQ5L<1(_VK^!ey7=S9l zCW#=zvcaPgWSs~N*WuTf{rIvK-d^@AWr8I_*JxklrXDKR=(c0d3cGSAcMK#+(G>?!)IPN1AR-ZJ?L}sbydKaZ2Ly z&**`nxIhKSv0dTf%7|91YbE&1a;xI(`pRq3m;5HrxEnB&`ByLo0~m z6y7sY#i7CA75+dSv;ALjM$DujVN8!V89EsR>i34Rtss@AF0wXt^K%9B;dT0_8{msu39EUp$V&1X;Q=^_27taI=k^-H$k7Qny-~#WzB|ls9RnRu@9KXpB z0@4py+Kx(nS6)secSrrYF)+Mxi-kOdlr4BWEQl75pTiQs4#6#o2<(I_BIGNWLtnW8 z#aHN5hz5yu4?*UUkCXP~1YPX9fc5Aa8WMuu5Cthi0Xj+#pg4BJvW}CqmEiISM?&OU zyd7cZsZ=7GRK*_1LfoX$Bpmxx2}8~pf{&7WeRY``NKEjw05!!=PQ5-2E+P737woC!12^rCX)UW^I z;!!|R;1Z{S`VgY>qqUA0@9yp<`MECP_Vdww8j%uQuau%24Nw2Dab3Zw(s7*7tp)?t?23cT?iXbN2K;1kj|-* zc3*M;!HiA8GKb1|doeQVJUw?M$GSzGFfAk*fVZ)%jlY2o*56kS9gODeCtE{wEZ`#h zlj9Qc62g3;P=!F#rs0s7A?I^&kS|9tFBX=6);Bmfic~CQ;uBOtgAvUHyCs zHBamj|EF|HG8|yC5L_qNLac{HFMBmp9zn#lCs+8SHJlp60;%U(X>m<&Vgy|m$;r)4 zLvL9I=tWiqdqT2hjEr~5_3PJDCe*S`<;hEbkBw>OZIv&S=mp{+oSsW+(6vF|OFQ7gk|2d(e`Lp8vFqP#UoDD5|R7A=U3xoV4KjosxIZ zb{c?7%^X;f(!PMzaDcUi;L{xNcxc?iozVA{y?JvCxtkkY$8g3;e2FBe0(C{sb`tho z*Yyh><%vs)NPGmEP(z0}r7wQEcV*}>8bzSj}J01;9G3D zr1w4Y(Rj^-MC=DHc7^SF|Ec%>Z?ZH3dT~gF!7(_c3P(T`dYnp}Dl8@yjH)Rr0z!W} z-0%WSbsEr=a2a_haOUZeNCBPDUuF=>IqH&BN|xj3g zd=9E#2-s^6RB~%;G=PuX3J0ws@O8$?#1 z-g3}Z6>!rrtXz2qK#d0dqpY!UB6ILdb2IVoXjY>ZaEObG_uK9x9wy+-1vK5!T)pH= z&@AIT=vN*genoUC4n38OkQZHkh(CDnfVA`t*-#+;lann9TS@DVW^5oBkJYC!hTfF* z;Khd|JrhLq{rh(XnG)hLM7abi{PjhPfF`U6`~~dCZ=!YvzmM5>k~ENjU6BZ28aRB% z)9c9!-5agSrwE-bK7$@233*mM(aZqk6NLxiry7BM-i_!8w2`1_3)UQ~D z?jPVa-VQ$abxC}?;xQ_R^*@Zu;r|u9``_my#`pc-`{pFzH!QMOU{^Gtxkd>K13xv` zlfJ>%LHK?ir3927(S_h#+5n6RB{2kM3>@MnBH0_C2Bvf~+1ast{E zlLdVM07G$B6Lrs`V_NhPcoFDLlolr8v-+Tb=4J-7>=!YG{?IXIFb5Ai(kBWA%_lIGfh*TTlp?0AW)s9>*69@J+& zB=i;fx(GB4$ps&feaz5$NoY~YYFa0Jjy(cQ@D7qycnGX zb;ByKgKT4Ct>v1p$go<$=m|(h&PL*3wX-P#B?51+K>Lp%bX%2`yekyRLWfJ?yF!XP z)Q}CTop>f;7!a&O;$Zm2gAS{~Z7W1)I0u-Zw(;p?ClVC4Q^rdGJqg%E{=mMb{L&a4?+6(z+Luds`ou?PxKdl1( zsz{{eQaeU+@}(^gDN_Um1y3Z@@&?mY!tz5XPxxXVz6~Xt+0-VpsWSjg6pa4 z+qaO^0w40ryxmw8NPPxT-^gFb)Nk|nLvE8#iQT? z?)x0pz3L&zR+u1r9|wUT)c^-knQBZvsy~P|ZWSjluN-I?9vA_$O)AzQ|KCBBR^3>$ zm3XGI&)0Gg>jDgAT*(2lSh&Qk4#mqp!Id)Y4Rc&i1)Fy`iZov}8V?RARH8<2mZD{> z?k9vFW3Stx`)dyF-K&b;LS4`q?+xCx{~_4`YC#-M>gvsVDsiR=cc!yt>sB6u5Afgm zo|-PcB~Vxex&sP*A;Qt3vI6eP}juImW1l~#gEX%dbPK=_oDrPW++==-+j~9 z*W-8D*1A*;9?}8}Ek1Yng?MtzWaUb0hOo|yYWV}T z)WvFir6lmL>cuYhq=SUoe)L~YD!uL;&Q}tGn&`ZQX$7_)C?q(NT5z|fg3bd>GjW|Y zW*G|O4Z#C|p;Z87dRvj4fbhfpImyx*IZk6p$0t{M7uYmN_uGSZ1FRge!mnIO z43|5i0`Y6S&UO9DyP%#2knBw`C#*9OgpUF0OaefKSGlw=`~2dUjXb4CLC+uv4kZli zY)YgGyzgW-g!FB<6&AO{9ow-N%vT{q0jp_JJfX-OTEsqTL&+$ffK=#1e`{9bm)T)C z`M-Xq_zJ{SD5RH4m;nj`(PECklK6w1%erhN+qj;Nizh!*af(?_&$L0pgCf3hU~U-Amo?Xw>LLfbY-w?1kHml4nt#3!*meQXdKY3SPSuyP#Eg< z-|`APN80y|UQt|%k-n(Mqz9l<_kgN_gm(vL&Oa;5?(*g^-@b%br_DHMn+1JhoZ#9b zGLKaZMqofW9%{FNrur{sW{hv~Rhz-tKm|`T!~X%gutPmSp%1l+CH_DdATFi(uFhvz zXXGTlMx}n=HjOCzB$b+q541va?k7S6d)_pO^K=aiiA3gp{_aZ-`=iwy~CM415Ecr9K z9ga}P={yuk+RXgSI2i=00^`#Qa*Zp1-= zuN>?+rbv0DY2v)b!Le@Lx<&5_Vu~B|7BOV&JTqQ)%Lj^gd_GO^EpM;?VuGOG)&8R? zLnrLJ_*NnP%5rr^&o2(uQLK!NNs;_QbR?$L%CVu4m^qBArOa6Wi`2m|JvVJv%~n9V zf@S~3Us)t({`a;n|2MOpN;@$GDs5w1-6opT(IRc}ynqv_F7UDkiHQ!M4@Qvt8k2o&I{V82Z*}A zoKPm125Ml#8`LKEL5u3Vg`o(-Kj`c0gGD<@SYKi}gWmGEh_o;NB2U&u5{iX)&|`Gf zIi$2Q@G+oD;&m5bw?b=F^+}pAeE5=K$d!pjH81SD31vC$4ji~5A{M_Zrw!q3FE9!+ z$sS0brz3&CXjt8{&!6`lpOu5y2gaMYJPHT@BYzT(-ner0+gt0SP@DZ?V`Jg_sYcD= z#54t_%Fuuy1rTQ-vF!Q!k}(#m#tc3)T6oCQr+Z`eUx<`+894&gHmTZ*5TCui`XLlA zU|zyK6308In3yJ3z!XrD7Q~LcVtj7oZ_s@r8Uc4HqS&0#)g{(9j6W$rq;}FG0%<&J z82`p2j3LlM0a@h!6o3;=-TCqUVQtu=W0xf1B^)zT9;5$wc6cxSPk565|HlM6{lg&t zJ1+MBlVyp~52Fa_zfdJZq2=Y}1Pmc|CiR*+F++1uLc$M6So4X#4$=Nt_be7LQsoCQ~_Sr!)6JX@Vc=vsjZ1q+Q3K&1!BFKlxAvHps}xpp^^b*M8D-v5I66 zhqM2W*}(FN47|vM@DGF|l~j4OAQ4d0sv$QxyyzjTAu)^~?FQNc4yk);blfZkd_9@q zfP!-WyEyXD=}cjcuED|Bp&9b}0(m6eugB52dy{biQu2H8e+~e|V)?MBDPNi=^s=_O zn9|9!juA><^dRhZI6*&Ew$-o_3{Ow2T}cSiG118(mhc~NgPIy}<@Mjrk)JRJW6n1; zt8_sZeq4ScmJqT|Jyo5{Knw0TU)~CUD^)7zu3fv@6-)jjA1o9xggi{NPYr`S^EMS3 z^3}BM$5WroY|)MzNV6k<)H%AOczFITIecS$Gd(M=5~BQnL>dy*wIm=^G&kcovQb&#XSZN?!G4pdH3()#-PScB(h zhfAx``dyN=h2MkI)auC#l_i^AsRUwv3ROTJZIBr($oQz}-S))Z2DbDL%;CLql8p0d z>5ZV5Oflt8$Q##%IoI3o>a{$b)No3c?ILm~DllL0B^uE+(FJ)9>i^JD-a6ipZ<4`J zhL*{^EC9oU1bah_m62Njh}{ii(znZ`Fq=heVVD;SHxYqU{7OX$9(mHWl1;}(MM6IL zA=d^I1{sMmg<%-==Q*ois*2NQ`Ma;np@$RW4^iace`WPc511K9ds67n&q0Q-$j8Gs z%NirC9!uGe`FkP+11P_*Jsn6^&3Isj`A5CH{pcAoN(=_38l(*71UXL{m%9Fu&B(}D zJSb2tE5Q{wa6a+%g%t(&*N z7cRd!#I~))_{J^39_t`H_Okl%pxd`)K^T8}VsIFPJp6jy12efFstO18JZZ#N{`&13 z9r7aO2)d2Diu%R^Ky;Xl-+|S_ad6sQqTpwm9VDMU-${xnx@;qGw)mi4 z7uO`7rolNBZf97*uVDEsF0>IH%xfN=WmpGy15F%6e$Fd@Z>dC)If{Hhp}@($7G9Tw z7%0ITpG^pR7-2GVT zF{IKpL9*9&pS(?}>|p!Z0|;hhZ5;@QmBpZZI38K0;-Hzq0qbTT92O&Rr4kPDZa->v zz;nOV&~tnDLc>|HyfpZ8^uwl!VQ^HD((0P4da0gHT1~;d%qLHr_=X^+gzD~W&Ao67;+Tb=WM*IA zdFWBAKx9&U&;`D&tJ_HI+acAK%AYbxlLj#!95e z1SILcx}-IrH?CtqodJfr*9e-)4w25yhebpfQBV8Yiw@w_?S)lj>}Sg+l%W)@^es4) z6R)3jnh&)V>=hT6#Tc&|^ef~UEauw&yjNaU(G;)5y4fR`b4-B#{=%_;BZV+<&~o2l z>NZl)9umN}uU~x-Xc%Z+LCG%Je=2TZMjX}iMr`bMkTJ_C2R&!RaGisQRXs2DBTPfu>Mu;~$3RZArbo3Z}Nx#v2Dh=ReJgOB1IeEgWq;DEy(yL$|0OATH_)|Y$0Wl+4{A>}?rLM!%|R@b=~&oq7ti7T18ktI(I zOP>f7dCrP|$+HQ7V8ZLE0@-;9I&xAxbT(=uBWjVwOu`|82E>YC#$(5h(Qn+eiLd~0 z(zA<-iYl5Ud3+#K6y$sWAW=DcE%a zQ&LhU{nt=%4tKy9zHaN*t#}qA7(x~3-C!-jU-+W#=00gsIl&=U1 zIOBvPg$*AJ#Y#x=eGCMybjF0o3U4^EtO~NSgy-hwE`o`p{O#P9@BMc{scgwu`jaDH z#^*68`n9L$I!wi>ky$#=m+!Z#e+9jXLJ8g}bqphfnijJxD=Es{Tq8$S)Y;3_0 zv=Y!*F(0sH#nl=1SWQfCJObJ3vM?t>`3ekj1)Bg*K}d9T%Fj}em%Ms;0`NqI_7|c} zeu!L>a^@Ka0QLd>5)T%W^1!1gfDwu_s5Qsn*Nb>}+P&-Y?1T+$;9F6o%b~`L?l?R> z%PuXw6XCD1U%aO7(!*%`cT|7tS77pg~bnHl3Nnd(}D85!OvGZ^xMh-2*_`{#e4 zM7i?tQeu)zL8VWNj3<*xVDL4(Ug64@WvDFZ5KIqp-|?#c1oAkDFrEV(wTOxOG_5t> zbH7*kB(j~t%h;nIo)(iZkD%4r=?%g>|y5LHV=z%jJ3k{OP<{AH1jo)UO)vmX7SUhvmCI%a9E~ zI2yxhb#3#gZxwP2|Aj4(Uj|lLNOM}Dn3D1!p@T_~SIC(nn4ZE|*b)#} zN6(+{J3JWU@%M=OU&_)ll3MjDoS?SNK%0v_uTkl9^OVW!~Beu|M% zj1h|*x4%;FHxD9771s3(z5I0$>fb@bSA~-p+$zHn_0Bpt>QZgRaT|>oD3%)lyN?HW zrc;gRQ4O1z)B{zq7J?Yb1r$J`o0!3W{pJll*7@T_ zi|%lXc=(xo{QOqI+7y6X&1-R;^0lk$DxN=Gw;K)|)u-oYjW6mBO5b&4NB#p`lvmG8D$bodN5PD&*-NyKNi6(kZ9Pt2qdluY zHLU_sjGMETf=brdIEh_-)7W@=KeZg_45Qz{2yqk4h7JcNSI-P|i=48>-8w1SSvzMY zeg)t_@*QuXkc|CxXy`Vw9z6x;aXqRKK+z*|4bGXTyAECcnUdqhJqfZ+6IX^vJX*uR za2;R=c975^duB}?Yed4$bqGm#`Xj^nw>*fkoxo`-!98z5%>aYnqZ)p;)7lw9_eso ze7p>dnh1tYF(I=Now`@n?R*5!35;gCO%4Si42bW8`H&+o$dc6nv2ih`*kqzS6GU!-0a47gp zR+qnf_v-a)I$%E4jf5OEMr`q7qT1?b&X`b3a^QBHU^FI}?_@^zwd_v)$7hf!@olvl zKTCIDl+`CO@*a3XqRbc6KMsltc_E=?=y|xjO3Gz~f-imj`SX3`o*)cp!JnQcWP_~! z2A>FIr9RUegtbC?okD`|cu2tiNpH{`FT%Qd0B4 zEz6g#f0iTo4#Hw`crfC$4&^IQ$zt-jSFG|4FkBV$It`b<{4I4GUkwAS`|z_BxyM`J zx?{pUS0@2yMSvh04q?XpB*4gC3_+Q-U>ajD=3L075xKVJvyZ&3qbURAGNNw-1O6CG ze}g2Bj_57e-|!zWan_c4dy@ledHOHJI~4pi7?4|yyuXrxL9@}jb*s`S+lNlC*|oT2 z=Lp)PPZ*{kz05QnQXV(5{w{D!do|m%GS;vmfi-T(c z`}3x=lMMrz6)+5BN;culklCyLC@0ufaHrkd>gqM9DksslYYzA3%TFgr;5tR{QV+4q zh~x)Wa)BTM9^$ajdOxAQKlGZP0lGMfg*#_$eeavlrhm$KIPU6N4ZAn0pP_s?0skaI zaJ@>}$6jy&-*B-^+cQF`!^X4$!F+mP?8(QFys}y3J#tSHd|LOd?xSHKOoRL;i?}Xoy}vu~C&0=>z|Sd2?cu$oN?b%a9fz#b8}@9y_E1r zYeQA^B5I3@ao5^)wC4#x6jQjeO9XRz;X(!|xb>=;`9^P-BU!A1|8%!cVyxKhol^eL z(0RKbTfQ93I1M7nAQ_EbAFji~NQ4i{9_-ij1ahaQ@}Y}L%b3MIWCZ%G-6p2`Yxi{2 zV1>S+0imL1{>D3b;e|0ih$eKOC4G!^)G{aop{ppx6w^`wSyr#45Gl?6mCc+&>jOIZ6yV z{Z_cSPh!qdv$_<-8hAoufYh{J+%<-_IG}EaIp(PM;%14Le{IU*J3;i z=}FMzP#)%NfL&}m{H@1KO*vr#p$DKR4Lmq3#pmmW$t`%q4Cu;=>_SEshCVfMXPrHJ z_6#&h7^yM#cIXIvQGQ)T#e#HsIXTy%VCAND;8q3}71h=O)}1@o;jA@qF0Go3+w^%2 zvB6953J5UZsN$usA!tSP0>Pia@RR0z4lrU6tZ;8=v^^hc`dQ{30(^aakd{ynA2Qq; z0TqKw@0Njjt3dJycZ!pAy^4$m7B8{&(SZcQ1%44GmzR-8irRT4QIQWvQx@&2=Imkg zN@ClT@0xt{eE(1DIEipN^Yk2@{}w@iAcj3}?QvfYD~z-_2BlW>V5s7OmTBt;X3Le& zdfA-xrnrDwM!CC+5|!47thtNCLYibu5TVK5ikx~NHaG13_bs07;&6wQSqm5sBMXb< zZOi#NIhS^Hbo9ZFV&xKw+>vIhfGc~hVbHfXWsHLA4gbwiy+sxuW z%V$C#y)3l!cgJ||C5(2!DwHmr0pV&2?YA1{2pepAEo$z)Mg^=Z{>+Zsx)vmQ;La@E__P#tUj2jhGuXtiI7Wc~sS{Civj;L`p^0>T zLYhVnK8ISYFxZDHkyJ6i!&QSb44(91AluYH1Al#{-c|@cQxN$&bc8c|D&~+N!N5dk z3*f3*($5*_8s=JUhJLCgm;j63RoR3q>FI;+>wk$}XcQob8oA^aT3aw(iQx$t20D+6 zZqm5+v9r^mFQmeQqK1h!GBRVi>I3HK%?S;iYP-3qaN(YLeKa{p8;F9e(}uXONu=)6$=GJw8$NM7$s4AY~xrZb3^* z8&WE^?_Q3pWv?J(q?pGNcZ=q#3$n&sQFAMP6IcLuMY}!x!&mG|rkBJ1o<=2#@o>Wd zzD1Iz@L)M#=W~?RDa<1+9`QqmwgCl~fv&*q2;tl1y=MA(5Ebcxn0}BHJWYOUN8cHzXiUu zho=lTg1u^KVF5Qye@9Fp9Qh7yfdh8-usYLcReq;TKrVpUGbD?rk{7*S06GYl%w9#$Oi z?TN1DF1SsX935yc>JN}cUjYwcozy*>KMxK*Bs;n>gRp@x98aOu`vheVF_TW4$BoCt zGB-Wy=;TCNGR-JUpl{@{s7KYS3)8ZM2GXiu;Ea@E$|s!Kg$1O9hKBaYtOE>u;XJ$( z1e!leb^63J$9{F(D0Q@fK0V@W`X`)WLKF9%D#zjD^K+$L@u55S%7+g9&WnM4ThKs* z|AL2_yJ6w;?6rB<{hS$g!rV;u|7k)qKhRt#_jQjTQzwzvu z2rlS(m{tK2XDtSiV3XVK9@C<@l+zbRMT*>As|*O4Y1COr1~dT=QGKy4rZH(AYw`%o z3OK74bnOMQ4~g>`k0tU^-k2h!1j&Yd+ahzs;0Kt2bHJ1-hNaTrm!5k6^@58UXi7Er z1NdxJz_Et+Jllen<5<_lQiY|ID4oY?5=%iq1R)mQV%PB4nwhikb3P=TXw4iJ@+wkh zvc#wFMM5Iy;#Zf>p890JP=li%~`SN)E0*BNYHAqiufPS@y^2`i06c>6Q&;Nx4 zz8>2Gs(K5W~z zZ+mY4m0Mtj#K#o{BI2wI?*N~cX08b;gIarARpoCx;d+x<|H5NK@Y95$7sSd(OqFX7 zH{sf$yJfipaR|UU+@bmX)&Kb^idxp#0VX6RCB=Lkwm*GaC3Gs?WlZKVh8~j%^nO z`%U4Er?8kS1ZXlT7LJR z+pp@3PHc~ftZR9{(cDpH`mey>G{i<}(m40YP(N?Ke)q!bMH@G6^!PAKztr~^_bYyF z1eXqg%{-`S&ZNA$QzsQ38Rk5SZK*9%TB`U~k4j+c)`jN+NR8_b*~0FF4)i?S`U#V^L0r0Dy?WKQjG6Gxs4s?sKH7}U32LA) zEOsl?G&~+nG!`ynBW*@D;9h+m)c!P1#uQbaRQ>VUs}*_-xT%K+T^C`R1xC36Xy{*i zdo41%|Iu57JYmcg)oJdDKTrP&QbNy6b9d|By)L2^r#$zca*BpYJfRso9^E(9U+Aj_ z(RM;LH^$4W4}xHCjLLhU;?_PXx4=JpK6lT9AJlfC7ZWtauaDNx|6iu9nWAPhaI7GD z02wv_#pn0C4bZsW+WIgx+d{S81+U6;W!i3K?{A-j!>|O1B!rowUuh?u9$I9j0zg|m z$CfAe_iFN*hb=>{%@TD4-_k_K4lq_+;SP(>j<>x+3|9pC?7V+}yXtB=za{mTXF;(G zen7;zQ_Ld^td*DZP#)hzdAU4h=`ga16!4Ai4)vg>+MsK{Up}eeFFZ{!Rf$?d(Imi5 zREUxT2DpP=Fx@>|nD2e>a|5%FQ)AYX*cTwj36PRVwF*(P>?-a&L2fjyDoW`8u3VY! z(QibO?ms$SJ=V`GZ0U$cmz1!xuC1E6yK$qiKn|L&%uGzsUNn-4tGHZo{pN2%+Hez8 z2e9cy=8kcqQrliWi*|Lq(aSM6_Ug5c(!et!t1`4^hk4p}{a<(3@Zmw59y{4aV2&ip zAbmNQY#JyJfCz>oj_^}P0CCsPUTd=SdV1D{x?Bg(1Cb9-2q^W&jk!eJ!-6Ns%j+Rv zyKRdFStd}-CY_#r`j~7p-cvCkm{#ySIMmnX=3M@+J%pEl>4|X0z9;!T!--@0O-Be` z>g3^W=E2lrJ1Uj3o$J>IzZU*aNB96?004q=Mm7`B!HYxa3E2`=o9oPKg5rmAc~NS+ z9CSIMw?!~0D|X>cL}4ouyZJDhA8C4^7~S5W4ZQ#7<}6bNuvUvtLdc6 z5b>&(rltg1)OfvP!meGt_wBo4+qI{K`z{v4vOzR;J<$|2f&t{(rC$;v&jwHaZQ@QY97!NZpX~wYWzji>2>#7Y}foei@Wa96s zs|zhMP=qFO2LFAD%o*GT_bC9r=>7VVe<3exa;HQ>XVL)l9$H+=H`o^vQr6>{F`g+% zWz8>WS(0~u+tgHB+zZCu+QWi2nz{{it0O(1$>2cv{J~)Fe|0EtL;(-^%K1oKgP&p_ zIJs`Zossegm&!2M^X;FV16RmN_wRp@B5Xq9cFjWh_tdbJTsc80Q1JaHlHug-;6hUL z7xmxIhPaI2&c(hTwKjFL6wVL<&-FfgjS)uIeZKpiO|+x815WAyMsh^7=VAyyC*pHK zL^o)BfprNqQphT@UH7I3R2$`XmW+L3JAd9hQF2p#g;7kCa8X@cY#e3IThP8HmSs8u zu|SdDqN%3iU6R(`G^e*h%p|jHml+&egbAHFnkI{C>FI9aq&g!WQ)_Kb{&E_JsJYsW zu2F*#0S2#d1&a9!>UExCQW}a469ypyiP}eKaSsRxaEPs@rxCYGM3Nef>OXT&{g1A# z*Ma;Oh<}^G~e$8xa{A4RW;CM_pV*)v~!&( z>_y6WMww%84nx-%#etBwAsk@~JG7Np@jvKzMUt7sgRcfk<+?^#5XB?OlC+XxuiK zu9+7vCe_B(Sx%J8xBo{962S{e<$c~;9)Vxs#dULqA(hxcLoBO8%<4-96`?gzu`5X$y}G;SWWpC2<0JR+MwnfT|bcp>504swahOwxTK6VnqQ4~AE^nd8ghOWj$+aGG~Ojh}y|!iPn2 zyN}-kY$T#83ObrP1N`I;*oglJ2Gg-OKLAeHX>pe4+s~gxCdcdS&hzKf9yKHs`d(8T zJ$`(5nv}Z`u2Pbc=Yw~(@}-DH`9W{7WP9`ey@$wHpoLU3x0x7J*=cC&n!sF8_z)Wq^1Bh3l%)_xvS7T_-}CpKbRL|N0O9Ml<&z1>_lx z$IPJcM!&_j3eG!sYQsE!MIo(l@wk(dyxoW!{CL+&oGV`m1I?)nO+V5i9lIeU?I#dY zx5zJABOG?DSh0A{`D;I-2bO<2X}{-;ge|o}FAC7RZXG)PORZQvISB^GE zoat3(WKL0B0w9`8;+G^75{Pa=wNZwx_&!A#J2PuDm04A*Qab4$D=eEep0d0uxp>k z+BFHCaRvCeEW~q({Q&}e6S@=t#aQ1P{=R`?aQ-JI@l%~XtrFydpb+#|4(3}<$^I!y zv$*;Xm~4d6E5>H~TsB14K%wg?N&rH|uJ5&j1Zmr2z}PqZa!(rn^V>Q9;((j5kOM>q z%Q=p_T*pl?VA8C(w4a2yPbDSOi_JG^<~*6nw-mS#|79?t*W?r)K>xr8X8im1plv92 z*3y`)E06pF$7Ou>s7g!qsWY{~1(efy*8jfRciJ=~#EY3QibbWT~mqewMd3t)i8S7ilD1UX%a8PZlXB}Ibs`EQfRk3!VUO)sRk7z1-;_+k0 z4q_TDkinS|uWsz0i#CWRv{&&QWepD#`pA=5bBP@nrqS0kE!Zd!r?s8nE~McE#CQuE zjq;f3FE%@ss72(eE7k=oC#8R>Dc(&tMSXOuu#D9Yl%PKNrgBCa9XDC(wCM7EfmR87f{5 zJSNHsv@{49wbXvRgj!gh>0nhXuPY(M+Cta+X!6N|h8HOj+4VDz<|kQ(6x84UG$$wT zK?CZU>ZBc?byjgI!3q2E!HS2~K=^sY#Eiq{B!kUi77#Iwt^Fj* zXPh%)08jbs3Nz;*8!Jb1+#!tD`Q}-zB+RrxXsJG$q2X6Deb%g@Tw4z&E^Q}XAoxU3 z_I@mmFVV1+zgSPM_t*GiIQ&CG!uI?TvE?)J8rjnjxEirM86=5V$S5dsprw72O!AB5emLNkuY5B zc9V69^XtwuN^fZoO&MzH>S>gd0TLcu@qqtdA(pEByK|@UJqh1Axf)KN+*om8`?#EW zm~gUDzVyNE@s+QrHadefzPN1N&n>b(1yJw}1$6hGJr4pHbM9ltqzVo?t7uez*9$yl zTUWFVqC;a@ihww(S2)>?Gnn+*u8ufB7*aFrqrRFGBK+}W^42O|KjjWOWywcTclmiq zd9G!C-y3G+$wNLq%i(hoEkcJe_w!S+F+}nZmC_jv^~v=F00@dTF=yd z?d5Q51t$NGZaZ%NUZ>~I*e#4-lQ9+8tv6=mV3JcGogQIU;-6}Hf#ff2+4J{~vl`cy zaO8~3elYsm6r4tn2A^@*-1kj;w~98jMJBE6=o~0eI?OL(;#ThY)@?2n9;Z(K+v!Np zR?26%seB={c{9XUgJ~XmDk&YFGM2(8~ zv3C≷EhSfqp|nZue>pV*+HiZ(t3R^P<(IFpH`kmm#_j8JV|8p|w`6k`@|8S|w40 zfF9fw_XTw6Eq^ZiwsE2g1~3Of+0Jusgx-=`s4IFYA>0*uT{@Ejz_aScE&2BSj;81) z=}J5anc+^W@a7;`5-zo+O>9rGW5$?ABM1NxQ?RjE$VR@C;e-JQWUjsq}`nmoC$_(CF& zyB@-pvZD9vAhB~4oK)mUF(d*+7X1Exqp4{H6pHAIC=_zgu-en1TFGWt&3$ibUP56i zdUP6PbqEM-Mm%Xe?v#}cWwK!TzQrq93>#&SuEAX>ZQzo~#1v69q^Et=Z7UZt&4QFt z*OQ7{=se?^8z^=TQ@m}u5Grys0HZkSy@$_QHA71;=q^(u5&Z6K*!xFfJ$<0fIqhF9 zEf!4Y)O!vA#w3Y{@m$0J5z~NwuU(|l?y`iq?K^j>lJI-;V?u7Im?V5U604t&H;fOG zX@N(=Y_4b-6m!kOFMv~|SVuAu^`xkI0OEy&)h+Tt{QuGqO5A61aEHaRFZLi})4fr{ zM~<63_eT9QN6kWKJ91w^y4~uDW~trVTPxZxu{{Ju#2fN2JkAJ#{#RSJN=5lI_Kx+Q zz|R4jjnAqPHV0DOE(A;A2fhUHI}luJZd;RIoQ*4R05e*Kj^%6$Dy=36ilq;vb$n7f zQ1R3Hs?tvQPs!r<4I>c=8U3=kZIK}jk2$_@#BslzRCHUr?0k2(2U9$iM3VzZPYd@` zmQ6P`TRGC^k_p-Dp;51q#qvls-fWGp6MXiVW~wCV84hFEgLB?R33d-Pm#T>?%^c|U zXyoyVcfPvVszrcxN)A$=}91!YP~2nSuqP<^f>up#Z-Ef!S*=$Je=1v6o* z^95T27iJxpgSlv?8pTEnxl=D^fAgkka!_UQojb$z$e5WO0XUzQ;pj8hBXu&GYmR ze3KSB`?E>N!=dZKn*%ruFvh}M#l}A;^=+`Sipp~T1~t9_0iWo~s{8NUlenmUh_k>2=__nYMZg3a z5)~<+3e4Mi<+g!e+XMUL0{B9C$F-NfdY{qNcWsQwr92`rY{JtxM0=n(&JN500{sph zxXIaywAR;`HguvvI?9!cep&yD7O)%B3wn;GWo(4o-H*j68)yUT%`5y-dk86*=o&Q( zpH8CHzssuMc)hlt?7(1}WBd{yr4u`ww_;#{lfeOTYUxmDpD7MP3qS0lgfiXZnHIhD zY}`Zh)rf1vsI`P3Vwf7ATD;}!WYgK%W%!0M1lI@m>OKHcApgs*Ea_$7bbnBf^FJPp zOPn)@dnFXW6Vn1Y2+_b$FmK`U;1R6S-Uvv3S7gOlAbi9WXnByM2N99c*9DIH1NAzbQ~9vIvzT zWgUb%QZxyPiFUy|E{ydFDnF{%DE)FoeU;9V#uB0FWMh!Y_bE%3G-lXbuQ1Fh&MC_A zVXc(fi9OodorRrrOVX}a<{xZ|rzf7y^Z8CR+OTPp2d|E>O^W_|KKW7n7g*fLUvpe5 zEJGX;G(eigf})^#c09$8Y_A0>jVo43%&0*h2Wa;H!x_URijaBvRQk`KyS6gaNel)! zxyLzkH0)crJ#h^V*9!p#iqh$NlT9ckW)O7p&TYB3q24iVPrh^+Fypu`OMt z#QD*!ptUlL&PJv_OjvhgxR^Sk1NZg3KIwhyv+o%B0}9;a4FaY-p+a3x8^Al7Ouk{g zY1(1uNhMzde=j<68t@afpI)Ou$uvGwdSIQ(yjIJXpfSZ!d7ir}N`CMoGsdSVYG|`u zrY2+@Mqj@8_NwvsO<|%pGa(H zRqy<`Ol_>G`(nHwwQiMJ>%${7+UFr8Y_`R;?WZ#oog`O#f^)#8o$WPVYJqEAjpjhE z4M#if|JF@;z_5|$9q3??p_B&~9RX+a~lYi)u#U#x_tIg;VsvEP?O&&8Z!r6V0o*^Q& zh-&kn2{AB$W85e$Y zdASP&Sjak(v8tc7|Ibjc*1Jgm3O^TW`oD@g*Qa&BRMMMRi`S?QWz>Jfh$%Etr@+;g z1!c4_ZfrpeV)*kf8V_HA=xfuS!mXqyZt2RggM5x3XML?s?STIM$MEmvixoAq z_B1UqX{5&n{N%G4~+02#LKc+QPx8 z8dg9s1;9gA>31oC_KOX6!hD~!wjw?(;W>I>mw2L%!UbbFtwM;^N# zNR5EX*(hsEukF4pR!o#m1LAMw-t49d+FPswmdV~EE0T>^$y%CWA0eiiscSmBrJVbr zB8;iIdI6k#bUuqV@-ziMAf>ZsuFGauH8ke-G)kjI*L$j-m*=*PmJ7euUOqh<46D(# z>fJjzfkRr18#Cqzo^$_WkGsmFHqdZS?h6QzzuOcd>B@w`erdAhVUF*FTxC9%*s}!2 zRzFB%HaZdu>N+Z)K~e_ZyLpn;FgYn3^ZE<#d+Na>5DU!lY_f3)U*<9S@)KvBi;kNx zd)tihW8*HGtxG8>^Z1S?mPTxS>%!txArW2A9y_QW>&>fj>LE!fJ6jbpa^HTksoPmu|urh~`iXY#?rPM=?q7TE6$@vn$2UT&cKf?xuTG zlvS{6V4+`W7A4r~%mdbt_+mF5BjUZF$`Raz6K7>=SO1)2@7M3Iuz@X2QWPjc7H{O3 z^W*}6vEuYo3=ww`%hI>1=2cvIxqF!ypA~9PeopjS@AfWJZ|5{v>D}Uq-eD_)qv_Nc zpOa_q{V@?yK2NAK2tl<+=)*M!T~@mq?E#Y%z?~RfqrWBRS|VtXGS(N*x-}0RWR@CvS-nSmwxc> z9(rH?jlZ*ozm38C(9wNz8MK*rhd zOa?w5>%okLPXiZUq@82@#d%qEM*~4J&tSRh5S@HYL}yMjx{)+(8`#$JWQzqZEPNMz4k_p`7@U|t=K+T~j2?7&%_!IJ2J&t; z`X^PHP!k*19T(Ms5p3ADEh*v5^M9pr)DZ)@wrlS-ZK__Zz?M1kw^5(46rLR2_nI1w zG;*4;>mt&y=(~yS7TkxpJyzD{MSdqoJZ?_h0oP~HM!0JeUp9Wst#Yk9v!+p|bD{4rNBtRRw9OnJd1vJL)nTBrb6v$l zJ-c)UjKqMPTJeg;HYRsE33fki;ihTZ8o#I2i474l;7K zuZvG(t_o<51?jQTs!J5Nt++?b9}R6wL44f!@`*@!MIX%ZJ0q5efB3j5y>0)6|1azO zG-lZd37w)+n{G}|J~sC<*cSP%^S`t*+uIuWB%=k-PU;YX4<4ORr--`xOH^3ukAU*Q zyUiZ3qvR;|`5_vM{xpXYqKuw~OmM{`=WB(rmkIY$ZG zQ8Uc_pHq=A!M6+mXajHx1nB~PcM;)YkLId4R33ZY`fR0Q;@f3AO-ep_pGipy%Iq=k zs%w5_sLKer3)+QA{!4uFR(~|w=6AC5Tf6DH@;(~)d=StS+|6T}GxIl>RW+y>t+V@$ zj-~j-oS>tv*-DU=CeAxz)EqbNX;AJz{i=6E>mEejarJRbEUgEBte>qnz+l_?qRw9l z3?N5wO3}i(g%4{%6Uhn+{
J`Z8y*q*5kaxtx}@AyfC0K6Peg;Dtv)PO}c@p zvO_GpIlA(Q4g>@Y5|gcT*OlB8AkkB{P9Dq<-a9qGQC}ej355egdY$+wO?ur0Z6PZ6 z6D4Pyb|$frh$iN(MY9LYDC}lrlB}1))?5%OCV2#Epx#T1m*K?PQDFc3HmB8#?08;) zYm}KxCm)4g&TbBgVUv@N+A|pW&UoI$rk>t1*9V+-5$xns0k0&O4v_U4TU?eTqvLcr zJe$%Y=-8P-N=8eYPZ!*s0}#4Ych0&?KODBN=vS-Pq)@oK*Lnt71c@u~YmgFQ4xs$g zj@R>F{R@56F(G{fiet{)4H{f|&HPQi;Z!lgorJ}-UECs@%y$uX z6_9Nr{C=sp;lseG1%-ixa)n&@GRNX17PGBK_Vb44#6Z_Lv;_y4_zK5}>+6pnE8hR= z-K*Cf0*#1OV3L9&qMb9T{i;;Lcz>};Syi$hDp^g!iShgfk&3|Fcp>Y!TOrV{>t4Kk zIWMB%Jc&JSNn_B_UJ`%|#(jSL!K#C0U>!MsCVeLN8S}`y5`7Fj4Q-^=$At>!&sJ7= zD@j$)Iy=q{#97P+2njcFfBX(*+TH<(4zyv0Oe}p8;gzB6IAQg#H?%O42yJi^8dUVu z{ZS|h&E5gP=y3Yrzy1NyVnQUGcNRxaSkeH4J^wx|wZ{VJO~!r;xW!^-B=b|#*SGV9 zw(@Vv>+pk=7!HiWJE0!Bwb%SyUd>v9QNW)F@&P5$9NlDiCCQDvLHM}sO<@awFw*S0^tM2mee)l8e z7zzQn9>r3m#s|A$D=Co*Gad@(aUs0qPG~tgI$jODV_4*rG%3+7{K1-6z870M?I#dg z*uTulJ?HCt@cj9aOph#1S5#2AMV%(BRlKZzyu=9S*PMh*90YdA&RrC?eEp{v$-B6@ z@|FEo!s9_EH#lwS^l8Z8plx=BODZ#Tqxv(EjiSI~V|Wg*8?HWz<=gD64&_|FvU-Tm zE7M<23m-nVwai)dp-^YkW(kXig69`PX?cexuX3=nOWf5CrxF_-?STk=srLSVhW%)2 zilq9WI7(RcB5gLiBH$0TNEq@#dJEA8O4WxlbI{>@Lc&DD2>})luaxgOPWiTE#*7&o z8F)1~Z{n&kcW!80T-@Et0>>nMp9p(B^XixC+jj5Xefe|zJ{L~0@H>i2N{W8>&Z40K zN;rkZcE^9m8Bge_35Ajh;yhve`16!n$6IZJFJ_H&*pV8reTiK{-I_D$$t~)|xseYt z5CKK`8X6fLz3_H>sy+KGLB|_^{D^L9+8K=VkIBlF10)fu&}pr9rT2^P#M;R8?!Z^Pr&MBt`3Z9x)Bfm|>IAic`4^wz9U~ zQkA%T{CJuLT0R@n;Jb!8xs@z=Qx5V32A)rR3MW|{Z#5_~N#eilLz*Z%$3^`*ov5s$ z+V}w7yap_TJRElHP+*>^@PNUDfOl~dm4ozV{U8(hVhv%kVnu%`n=0lb76Xe(zS151 ztD~do(|4Lm|)gPIc4fvL^fx)P4;3qMD{Azi5d8$}7WFZOr zx!?wtAiBD`;b4-I26uX@p{!L1ICZL!{g2~SRQw>i7t<~#CixboUM^KX46$SYy)bO< zzBMj()Tx>j5vi0~a&pJKyrL){{fPVQ$1E7U&P59w8Fht$J?$5q`|k(3vkGSn#A;rEufmfA(9!p9Vr=XISY5G}NoH2k;}R@p z=qZ3%!&VK2;bc2AAG z%g?um|8;W80brk^5F7S++KQ`qD9|w6%~Qn@$s&0>Fa@uv&Q zhdf^+3Z_EaBwzqUZA_0`T&Qp~hk44>b6a-ro&hB|*;GEFMxq{`^fpYRc!>6|kOG85 z!1DIOa1M#m*k$JulumtQBJCautw}YhVpG%zh z?6_YHth<2XtRJ25VtxHVSK5$GrUfg0{V<^`aMeHcSO0z65p9Ab2%2E4 zw#~Br*OUDtyLUv7dbDKi-af)D5N}9BLqljZH)<=LUp}PsTfpWK6ZQ92zkfe(a$x0I z=W;FFUSI59dEs+^3=DfJGpl;Yr!ZHoY;#&RU z=g)5=n*IHbIzxzh&X})OWo#N<-u=gqb13ibwGP?;QMI{46GMAqJ&doTGre=Kb?erN zi9D`P_Z~eMr&bg~$5OUCckZlpb8r3i!_>H=H8fURwnOIIrd`IX#SLSL`LV`AXv4p` z#kiBf+O^({1|2=y->xc{9kk;@UHT(N?jjKx9sTan^oqNT0ogNX>mL|sNh$|wE!dO9 zc#t~`fSbo4B?{m}jD3p6zDt)be~CR-rW}p`GLm0K^5Dof$gF%jyx;%*5O$kuT`2$F zeXgywh3P*2OSmFYL~DcWq@CLC>N;Dz7&ytX=Hh41#AxL#6$p<(f7^egAi^UE<3{ z#db8LmBNl56)QaFdytEX{8RRhI*;t*w@nUtd{gI@o4=t(Jp%rh_~rKAc@Y z{HB1-!obLA(eE;eEtVHtpy2knQ%R`7LEr7O!&cA&3hhY;}U6NQf=hqn4ycw97 zL?!E2h9Ka2`o(34(FbJPV<12~ZT`7hS6|=GDd<}3Ba-z7ZfSQ4d7%M-BBjF(qpB9n z@SI;&ew(c4*^Vx0Y1zZfqa|ip7JJ1i89|)m*VBcg*P!Q6DR*W6pevbC@62{^LI9^M zfMO*6Tzj^5#R?BVf#Y`u$=(PIoV3~_CUl>f!@8oN=)~$i5;xlaXduKn1m87&V9$m> zad8t6&~E_1kcd1R%D!YVLFQW$nP{IDwB0)0zA|G{Ky!f;f$vXt>YN_hWBm+(Sftu! zd`rv7IL-Skf^HueT2i>)0&Q&1UcLN2U7U3JerK(b?3-mJL4Lj*MIx0?IIU{_qeoFF zFAE{bbtVUHdFbD#XU}k!U#6s|j}4z|eX*_h(~}%gs^y7%2c20sd! z;fF1a?g*E!9YAuuYinya^R%?boK2kImv{FSO^K~@wwH{oY=4PbhFZwe6DCg{OETH> zqtrSN7G}$@o(DCdNyqr*Z3e!6#_DeIo#F67;H#UO5ZI-gJ$ASe5 z3b=j>BS*SXV$P>P5uc`EyZ+hSb@%LM>C~bb2&3>_ZetT$^|PUUfaf#L4;F|XC@Gdv zyfKHUEph+$?SpV1cXiagDjwi-Gzz!^q;_P0u6&Sjl>;o;~xY1Z;zl;7e{?Zf>qGD!W7TMgQ-sf)c_4`hS$gx@f_sCs+r?^}dn$e}v$u-Xq{;W= z_f$|J3Evv^g%_h#M)vdF!aOcv8$WV$5sdj)zBAmb%schFggOxZNd@4p|{Pm&AlaZrG&!6?6uBvLxTG^HH7cOYhQcm6UP&#ifq+4&swmu@vMJ47l z?%b|+;SlTT2olLVqz-v5V8pKOB{vq+e-!nPzbR zx?zUhXEG8?M(@8pCNeU0m#OiqXCqe7YM+J}dHhb$RR>4jEH58EWZG7Vr%!ciN-kD( z>hofoe$iud0{jF;MUcYDLH~u@(f+!R?9S6YQ1u5i*AjGS=J8hy5q?;Y4KC8cP8l~! zPuPVS#!3zwHpzRs@fqgQ86eoXZ$^U6TKYzYPBpfbWfXjw^3J(@cQ>>d8!h;76bO;Fv5pz(qNQ6$!$``(VeoD??*5o~P z2cMPWJCeA8RquiF1J&?zXo<}pGy@hO^6kKX9i!Ny8ujS3Ejd%Tu=O1=r_7>^ z$|5mc$?{XZ{)b^V(N^4HLYgL$8O4Frb5AiIPX2GvBB6_BeC@o_G=nq8jvf129k@<6 z?nlY9XY0x8_a8odW?B}_x50tWoo@1M4o;;gAiFV`V!(O@@P}I!73-9hv;v79;7ZR& zA&fe67^uy@V;+!*_`FHO61Ma_)R73FZvU?HxjnTCtgX26v+RA&`B>u#&x(sj(GHMx z_FAzT#qX8(sfXK+BER*2gP{56Lx-HGbq>XU3EB>CnB8}Pof2-qb{16HW_Fh6Hx#1n znlGWcQyaoV3UwKg^DS+Okk!%2?p|k?@Mo^&r-JZreb?*i9{agvaDf0%)?cFD{}IjtN9bN#gas&58qivNmxb@BmWJekdnY zA9t?})*I{RuUilxDxa*Za7y)EM0Mf!y#LR)(3w8HRIhm;zt4f<-2mm4)ag~>j$#tm z7(a^%6DGXDVE&KY1&t}t8;da96OR6=dxla``^sng>qc#K&{<*+%Pc>4A=c*q!8?03 zFIuAFL8C8k<@{3i%K7t}$~Co%KLUida+r%`^Z9&wx zzHb;gp>!5u@!m*~7}F6;{XqlAz@dFvwUg71fGU=#(O$>UqH@0Rm_$hGBW&o)<1 zk01T`9|{5tF%3Rg&nyc6?@_U$asR~eNh2KE~6RE5u6>JrvyC??IRerZ<5vlC@Sf~Rah>kyB-R;c-jHu+nJKtIR%nSv5Vsz_>-^|q4IpmB>moB-D?C#r8Y-MEy zz&Ws-exUVnkkXIMg})}eJ~(nYz|<-a&B=jR;;-eq-*t3y8XNAB(r?(HD=OU&YNANj z)*kjUN8fQpnYVY3H(^^X36!ENNnDkDP?&1HL^n-ae=KSqKQ7xfbX4JT;x1f}Tyh+A z*O%5hW7EI?^EJdj!8dZ574aYusWqtbOOswO@mhK z2zXG%sO8WIsx@gDQ z*PX09ZI)H0s4_VwJ$1cXmEKP{Xfe7^p=lp*lzd=83Ej`QFgi0MvCpDa2o8as1n5EO zTHYLaDLrR4F^sX9<#*^YR77^wZbtFo>JTX!q$s0F=!&bv2K<0Irh=5CyJgYW`~0en zR#u~!;9_FZEK{}AY|VEebaQui@5I34l;Ct-{qOSXt1+{%-+tixy_Sk&A5<1g)l}Zs{F=uuz6U_ zb^8MzQJt76TAgQlXOUxXRm)F*x=UQFx$`2TtDE}c1g`18;lr0gA%)|vd;n0OCMs~P z@2vKHr{->5Rt99nVk9b}8pI9bRA4h8LJ|1VOclcRgFFK(vT9W+6-GDg6gs#NS6;)TFGQ0x&ZjE)j zJo#PRw=RDszDuY(t-qt#Df9f}J$5pdK7G$V>A1<$v2W&@rQ~U0TXL zG0B2Z6@xU_O&6jv5Cgp1K_`*9rhJrr@vr_;c^j2|^HQ6WOP;SulewIhk+tvdE4n|P zBaw4@8>z8?=_nl1kI;w2QkJru)OfXSDQG%4zDotm6j$lrB=?fc>BS zpX9z@5#0KY><;W$P%r4{tePm>2CrfLo|IZU*9?wZwYOrqIvIzlkm;1RZJwKY!>ArMPFOD)P zd=cZURi8iWGfh3#{Q8~%-6czo9{C6|tBQ1}w*0z=shOE*ny504zKWj`xbN$&ZX366 zU&dAmRlxYT`1k>#Muc||&8jzVGEast!RyEJ~SIcGM zs8I(*Fyi)9ef+qXb}|Avd)&o~F@ynMAf(b4FPXN++Njd)j*jB16j1in0QhZW z@_6^GNt3+J?foK*ie>KQ=FZ(bfUoJao;Xql8!{^J0kw*N5hbM{$4;I-+ehN>JjItU zeQ5)?@iX-zb45l*9%&V`2Gk@!LjW_ceZAdF!VPh_+mRE&!7o24MaIOqANiMA(1vUP zZnq<1uwLROYWwxfmZ4s3m_d{3hQ6GjajzB02|o8PEV5}TJs5J|wHM%Nq#Sm53nR>d zP=vm0XWjEYfW=$wL;0-7wPJqEk+Dp0$28khpyArupkt}~+V`vbyWaKieNg?*4k=##lPBZk!s&`nteG4G%96fkC}RRAwl32)HZ+_E z!pyujUDk}gU=92my74j10sj6+uJju*M-N4^@$%)Gd{%C~;E*`Q^zJM@7AwV%COUzW zyrg(_MPV^(^l7Epx4H4B*V1Un!v-|`;3vtW-KKnxjf*=5nxse9Qu5+40_NX0NUBvz zT9SA31_BM*&GgP`mF#@r$YSMD5t-77_a;r45RUR-Pj$e`%_wj-+Suq&8wju1lk>tP zHQe|&7T0n}Zuj%^kFmsBmzR@GmbUa6J6v4aB?m{I;K0Di79iF8$W%DzVm^g{?|xFC z-OJFxz`@l1`S^&N+bh_OUr~CTw>8RltZyxx;cM-T4rJM=89)xDg=YmXBcUZ!otD}=g+$_ z{R1s#xM>AkD|?RG*bdN5_MHltr6*1&=L2Lvn-palX!WhJFBq*T7N;a(%=o&ybAT3|Q)-3!5m98V7_@LM)V2jD#n(7q9}5Xjkx5%#Oi zmPA1t8yo-7&}Bx^dSRex!-h_>{Ip?|gr@a~&LXT!*QvX7J;9%rdQZfs`@iY`{2mc^ YbL_Kw=QP?R_@B;#rP@jJ)*t+T0AGifWB>pF literal 0 HcmV?d00001 diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/stencil.png b/src/dios-egraphs/Diospyros/data-plots/plots/stencil.png new file mode 100644 index 0000000000000000000000000000000000000000..987f7489e946721928ded9088c568cff577aee22 GIT binary patch literal 41939 zcmc$`2{@K*+b(<|Nkphjp@fLYR3byU8w@2wB@O1Hlp!H=lOai_&}1f+P>Lo*h?KDq zg%HY2=HcH@&$qtsU2A=7ecSr~wQc{iZSVU&Z*kw(b)Lt09Q(2F`*8;D-?w|&Qof}W zMJ?0XLo=W#dS8m7YhAJ!f1+63+=2hu>b%p~+0g!kv#Yt2HMQ5=*}=}<+0NEtjf=I@ zIa~X)^3saZo21s*I6FI>Qrz}>0XOBusqP;F13Pgd+X#}YZJ3%TtLK^w?6g7$to)H9`;+GRerv^HIb8pjsySn zu)eKe)1cuW-XO*`_=C_J=5%}Tf0HbYDNgbOJga=kUn~3lfBkD))ky8D5{4;O<(5o~ z7bhHh?eZZ~lG|v*%3q)EL`iJT<9_S@cSl>{RmBwRU(+5vHKIn34lXxrf1RJ7zi-=FcQd*N>pVR@tDYQFX=rFD zXlUU1`t_@WqvJdLioDA0gZzr8R_TdBUejZ4BfpmY`g||w{GYGyN=n3z3Dvf=2)*?f_q%sb z;CgVdxwZ9Gljo<*hdavnM!uvOG9~Lo&2_v=(TzzMI)3N&?ZS8O=(xmmHEe8b+|`1e z^;7o=biVBF>3P@OyjoN{LSwxDyW?ngb;XVGPp7IA7CEJatX)H9#f=Bg_uW;xcx&U4pLnr~UU7-9FVCG+u&r4{E$8IC#K6R6){trLHZvi6 z;?wQOvL4Q$>({Sb++JdTO;F70})p9UZ3^t0sOl zchm7VZWkXN%CIQ8&Ks?K{?x;-udT{Bqhn&)zQ1-=a_U&(J^T02T_s2QuFv-_YbR^B zpA5}gxrF#dJzS2ya%SG@)KQf?(D^YchU+)~&~WERTE=06 z!lI&3#nWF$*(2yzZZOiwIr}}L32(eBa>Id4$M#+6QdDqouzaWEm)O~eH~A8O(<20_;(p6d1*tqq}pHHlf&ra4) z^wJmD=g<1J75Q5Z{VXP{c%mXYzIi;|hq?6q`!&+iY<_-zJ!3o@O|ms!*frw2OGH=- zu%PRj?^9wlD7ZxhA-#9 zhLwJ70@s%DZ;qn{a&FqZc^9_QacZ=O_EF7ajMZyu^kQ`~L$#;A zzJBz!3v$<1ZCtu;-MST0#>*{Bu5ULoGD?`-70h>a+lApZM_$<3AWG+E$6IKBbe`yi zPTe#<6|1-7x3{41rJX^mB>Yo2e~eeE&zlXlzWZ=ziwrhAPU^MWkVV9X152?jA7a(j z{aAS~wYHiC|9!+h^gY*+;rZ!$zqB+7eDAe0O)u!Exyddeqcj6XF)=Z5{e(p~1XRY# z-oz#tq?y^;#@NU-^?!d|oo#E_Q5h?QPt1JwtSH;ImQ!Fup3l76w-;wva1eO-`4crR z(s$RTaqZZ#L*dtvd~eka2Olrt;^OM4h~c`Ye7>+YRnIU&##~6_@AO3LJBHEUUsw<{7cwn@lF3oWF%lY?#Tn`RE6MK5%)3w!G zo#L!N-pciweo>-ax7GgptE>z zkr5r2``z6(y0;aWS4tkF;Y$&H2kRam=31|Jzb;zudwT15i%%(n)BNN~ChUgMlcT&* z(#N#3e;^3roqKA?j`q|f<7|A+@K&3enljEbqg%atHOXK$p8a9J876RKvh7<0x1RfT z1GAa@14xMa*z{;5Cp;!1ys#vgKXUrZz`#8f*Q$6;rsz9&)*X6slt$hWpIErLxsSeeS8S}WeE)vYnQz(ehT2QRXX3MPn5;cmsCVz)@oaSxS-MJU35m6h zCM)DmRA?MMdbh;M(a|wl-AAoFLW054(z2s0oC&$Q5a*Tdea@f3L5Ii3ULS1q7W2$_ za_qGqp1cT|^u&o14@~pbKF6wO4DrN%`|91yJPz9P^M6QM%gxOdnf!Qx9INp4>|%RwU%^sPL;Ue+hN%hLFE5^G z^A~a$?>~kVx(I<%`0`wr0-Nbuujy5vZYyf0?z=|UKetre%l_@$F5;^4*?Ghn`SXhmOgoYd%zstH*wm&h6I69i@=OU?z4g+IGfkw>3knLl z%}y!0kN2k!wTnMTmaay<)xE34akBFMUYFO)d3XXyC<+VH7x}aAesc6>rb{2fqnf_y z#Tm^%U(@L*$%Bsz2HT3Nu^-8CuN~VLOCQbkMReFaex~WqMheEZZQC5?W-dq>XHtl> zVpE@4#x-*{&G6)0XG~>|y}E+PGB3Qs%=hOA4j>M22pz-X0>ms5f$q@Bv#3rDFV66z zIxJ;kVtV5}%ZG<6?rX}?kKb|C$}}Ix$^tdD;i)wXe&(UB4*(E=)Ai5yREltxQG*{G zd%gL-ss~w(g#Gb521iG)ynd|$I1um1d9)?ZE9=B3K83T*JgDrNy1J}DoR?7t6AhBJ zjdRWlO#S`Cw0`}1R-R21WrplViI-1 zQdPqA)pz^42OQtBZ7(C_%&@={J*lz#Q*?RpXiANxd$@PByj{2ZMnGO(UiY6rLC>B& z!?*9kn@UwzR|o!LTtjsr|CVP11_fnhWo4iJZW8k~$KJFoN+$M=`(JI#$$j^fwUD|+ zg@t#}XkXKhtao>JcluSq8$5YS`q&1l0|-G__-9|=M#>83mT%;IPt8v112S5qt*Xb^ z3V!}Ov_|B6DIuRE!zs@&1`Slk1lcoxe}AFZI92yxt*oqU`Dm1k=`|cf1-pj*7M7Ng zJw7u-{?yF_j}A)Io~nO>k8UpsF3(nR?O#Gw*Vcw(StM%#U8N{1d`@h|@2;-YZ7m`5 z-V<$1fk8pF(WbM4q6#=qvWkk4;{z>00UuC*7x~(Kf2G)1`geSs+sn&q5^zZ|WqM*F z;Nr!LC8J3{UWc@p{+(_})e{PyG|4%;5l~tCOS@Pn-bG6-$EmYi>&Kf5oqzw-muLT} z&j`AI|2_e1`kxUqKEsZ6NTJ2=-_xi)_&uxl0US?H*Gn_Av+F6?BhZpj>tm0`P7MwG zcq_Sm`}Ut*UF$#I+Q@~vkd>XS>-6>%)`4^7%4FF-1gV9sZQ9VQN9u<7sIV>eKg4TY z2Yz4`@XO`d3RbINw)?ob10>;pR=x81iB}ytPr*#k6`;+Yu7Dj}IlV3#d zpzw=d#pN-g?qm3RJsPb8c!FNTk6q|~cNXW_lYkWuhvpt2`D?Q-Th@t+Q=Sb}uLC@o zbTHkdJiFt?bLN2&fQaF(OpJ_LIJQZ;F>>V|Yr@=Ka|;M;L<@4u{qJvET|MdP(vUm1 z`{s}&l68ZwUE>5cvc*vdt6Zl#v^xA8uC@}$vY zWBr%M9_(M6ZP&<&H`#@Spe<_3vEM!N>(g3cx4*w5hr53NzIOBG^3I#@aR|)q?L)#u zbxu{J{$NN@>u_!@prc4eQ+)bhT*N!LQZ=^jcT0Z$F4LQqr8m|pEAwF|>bj$mr+k1r z)+@7GpE%)%lO>U(6Cr*P@T%a)kK^s!nN#y2f~wm3Pms)7yQ>qG+(*S<*tgtRtLbNk zqUCVzTx~R<25r*E*qG<|TlcHC6;BHz&${+E(^1y!Em+0yZ8J+L6_;MXjMXSMaYKSf z4j+y;&awz(lT4|L+kRQ$Y&{TIhH3u6&klP-O1*}DGQN5Hwi*x+pwTeq_zSzkD8u5h zwR)Yp`_c@OTl@Ng(SW@}LrAg_nh+q=Kma;V$Aix~MYY%H-QSk^{CRP~MF#zB8~xsv ze4&(-lrrhWjEsy?m7t)YqB~pcmveDl){fjDdO-=*>_%p0=Kh7JDDa8Z`upcA0;*G< z;k~^Fpq;^NGpC%qJcF;^{ZH;#R;8_5ZC790A}?R$OUKYUFp#nO?5EImOaz>Zuv_}r zeYm-nFXzkpPig8b2+Z9$Q$qlik{-T*RaqBCPGq0@vOzz#3bjiZ4_e+QGg;C3A(Vkz z`e*@ONBxeD`WVGd&<37`>t6BP(2KEi_z_YduUx%q zjwX?BV0&TT4}=y6dQ)tl_wc5v)T9%yV>4qPp%uP+_pZSkFFhS8>O8HpJQAzL_6G2V zTjn^aK7g_W!&oOiIQZB&i}n$Jyc(qnFk{F5{mX$tvOFi9kgj&OXCPrvbARf*=jXhd z-&~Nd_ACzKu8$r7K&`5+)ycj?P_F50v--HF!N;#(54MlBmxj=Ze73W(2@u|SO?>>( zO7Z<`s!QZN#(Kqpx)W?QW=b}QURx>Yw|E&J!e}wU26Geb?5u*@cyLNChlGU4wS2m> zrTW$R(^i%D1%Cbd1rQhV)ZAeF+0*CGuim-CfAGoCK$MH3FJD%osMU1)pU2Y$EWwA~ z>h{X>ns#}p=YU54X33fJbP3)9HSLb!9_(~D8iE{q9;takl=oE6dNlj*%F37t#Jagw z)1kllrAYnc(7C72o?S)>*tPac;%;`djf+lxzDu{e;w)XFr!R}QuK%ooSVpR7tVT%Vl#3auTgc*Fx$MVl(Bz^CaC zQjxIF+zcP%X>CSEMj$HT#x8zt?x+Xs15w$sRP-iudP02!^KTKG+SdpeivDLQ=_k?~ zBYeX~A~qg57?Xpf8!dlwU)YG((cUDQQ~oA-_q(sJwo4 z|Azf>)X$$k;|$Uqux}#I9*sA9rd7UW>-n2&x&5dBnwM&*Xo9O%FoP zFK(J`nm}sB;R1>0-m~tcyG=KFUWmjQy2buDDo;^Sk!FwPgyWAS&rc_Zbtb2#=rt0& zkEfCJjm@?Ij8k&%UQSXh7Q(V7Nh?<1#`?6r5MX?%jT^J@p>+=ru%K%AM@Ak98|bRH z+k&L(i*sv%vrL{J8R9Ch{0Xzt8v?O?>Ibtdc1B1Y=Jc7L-2yb7+MA{8HfE1heDvs1 zLV}3Y$IrFhug>dGYrC@JKZJ?09LzYL?nuAi zy~giFQVIQj{{s(=m^EhB23+ZH$&a=9fiQblS7)@UgT75-MAY@ygIC5e2)k{ns`_u;&r2P9wIY9Jn5DP3 zca1Qrz^5vifKj*RD+6t%I475J*i$DR)XSb1jb@)|+Duv)l%^`1DAQrhO>6)Dxn*=j z=lAm&10c_aT*r0b1=TO4JP!`u`*h$~;a%z`=mO{71`ZHtG&~(axv23g`}_O(G`YU# z` z>DVI80_9c$B1x|uZLuS0(VgrDdETqBCLl<^1vp;g_ zuA&2s(|H3QxorHqHLQbS{X zM_5-YEl1MQQMhmJWH1#Ign)65mIEUQ;>{;v?enn2A5B|qYeWcZ$v2&WmJ*Lth_C0` z$iI5^V>=c?pmAt9+1nOY8ll_gnl-Mkuistdw=BzJ{A_ShHyQ#Gk7!SZ%Epzp;=4%S zYJTQSI%~_YERqpOvf#v>b8~ZA$A{LX^=eiB>YM9w0G-0K*^UQkx(Y>*s;7e*F?`!7c2AN=)+ALK{z z#v?BX+p2V7SmfcMj2jIN#yD3@U@70>mGsjLM9J2muP0kdPz91m+r&`2+hFVVgsxhY z7Ls{w(E1?P-UtX-LI@IsjNa3A_S*OLOh`Fw{{CI|{BI+LQ*}!~$`UFz3v?z5#MWZd zC8mD}RRMC*cY85#8EyYCrzP*w{D@X`O5=#16pYs0TNdLGlo87C)Ql zHuN(fFz_8no(Lg24bSa!A7%0jzJC|M`q_MWxDp-7Vs&-(-sYEF42xO!L`kM4)wh4X zt;mA9=7)4YHP-itwG8yiSjMXpL+D=}I?Gpr4c*lv>`3Yg4)X`ZVFK6vOP4MU;J$4~`|weEpF1DN7>fT;OSO*dwVzJdA+XD z$~Q0a%Y6ED2YOO-U}S(iQWQ1t&0K$FM@L39Nh|+Af1TTf3lf?Kyta**P7ZhKzjC(i ze4nGY?Bj4&gy%0ny!~++blIot*>2ppu?e^rk>CKWsN%k=%FYnW)2D;5zodFnH@nX% zxR2WOHa=q_^cOJFew3dp{{BU!rA$aAm-F-0ow_~;W8DcNA>1kIMhIx`wV>buSGI!^ zjEp}4&Nu+5+#D1JVXp1YoY7C#UO6-TQHJy%I0P5pU1lmOD4+nYHNliWEPa1R`TPbn z1y`B5#KbH0NHY5T`LoTpYzZ`1m+=MhI#JSYlf$B`Hkw>X*co)HcmIUZScvUM7xe>j zy`Glk3QX*R5o%sjd&Gzwj$ zn2EiaSsW{`91WbRI5-pzw0dc#`FT>Nc}FetKsLw7TKa*7*9PbRzOZlyUQOIEbzda5 z)H?6OhdsP%p1B?6kvix;Hz2J?A^nQ+iP1#7-2!Wpb%H=aM2PF}09P5?yJ?ezmztKo zOzcf}9$&nU=*tQrpP99yI=67Jp9Ul+C(jHPu@5611z!l`A1*8@*#%PbcEV~Qol`tK z3=9ks21xEW$%CT>e2D^R}-Ly7xRI*iBn4HyzQtnb)ZrK& z*1TzxurT5?97i=MFfb5ZrVdbfCk`Xuh)9(DR=|~Tw2^y|e?)=lfecqk0BQp>=~ybU zhR*v1?fISIJv)`=K63>~%nB?d8kfZEK&XSF?ml&~X%H$!O?DgfIibi|*xSoMmiXD) zx&{S(k4r;Jh}X>BPZbqO*sz-PBU_*{>vprrZr;2Kv>2dj zY{@o+Q#eYe78t_!*c)I7D-bGtP3!*t@%Zy~qiy|@dsZKB?Uj;}vVwNT$;%sKbsy(^ zDHWsU<(@FD_SfW^=bSwE%9WytyF*s(-o3l**2X(lpKc#mEg)bG;YsU>%vPK@PU>ez zM?}Z0xw-kz{(fl)I4eM;Kz`lpJwNLj9A5;)V}(^Vcxt63tDq18JiiB&6FM-- z@@?p@LkavT5)uU;fBfwRkfs(VPKcq+HOM)8Y-n_}?!$1}nZDAd%ODKJnofX^kM)^L z^W;XP7y!fF>u}xe)EM5x=Jm-%J^74CdKyhGzH6=RM##n3lU5{&553~HKRrCuXc4k? zm!}sqGA2)m+?gEdvP5Vca6*QN@B|9GKJwA<%kbZ;;bU|& ze(#Hm+wkolAgW%IG1P=m)FQ;O9i+o>S7ipP)10U=<*j*N-RJ)A(<0E&tH-;5{Kfaj z6>!Ux(hu?{7XH>z*C>hd02(pO@Cn&rN*U01gt>%O6j=6iK=Ju4Tb^bnlg(SX0ep~PK zY@3HRjbn0^SS!nH{uL&Cp-obCa&7wroN#%Ha{4JlxjKgLkwm&=Z zX%N^!x6IPoI#MD;=T)+HZ1;*!uI7>E%Int(9|Pn}T8TVpW^K)gG)X`LvV1Z2g=58v zMbrXO{Km}@3R%LQP~m7L$Apr@S)_MBRUi!oQb?jj@wHsf$)`^9XbAP6o!Co+acm^1 z^bnK7KZOr2&qS3$)h9gy3Qb{M-3rt#;H?_tUphjU@$kf z)ngTa-E6{95ErF5K}+oI?FlzP1P)}`+iHtBIP`b5t~IM!iiCAJCWe<<19=%0w+K{2 zTq^fGzcmDe@;A`)O7QyufWGDx(Xx~;*lIH*BGeZVDh|(9J_E_48_tAXH&&-L=a{{t zCt|72EvbuHS&rA_K!Svj(np#7u1ga?@Nfmu{E=T%8ZDP|a9A(urxQuvk30c1TGZ6U zPpBNUmwa2!UI&(9pordvq+L{Az7zrL~+D|?<7PLxNIN;Vs*gY z>_ALbCuwo@Hs!G6UyFbiIRs`+nWkV~X$4p%uLEL%4tSKHGPInXkU=&ursU*o0b6`y zdVHYh^JnQ|-Y@*`P zdfk4ctqAF3d_R8t=(#Kx2GpCUemu2dkJZw6heLN=%V23Zmh`7*@ddUX?0NRAiqomMe!@x zkkm_$j^Vhi@I}O{#O!atT|b0}1v-ichN*0n8B#p?HasMOqeMspM?^<~zi5Zp05*e( zo`E4$#q}&O?_!`C#^o)p11K=>ux;c}1}s78tO86__!!F(vYkeYIA|ms<`@+?u`kW& z;R*qI&@GC{o%Cq&2&zMCT+b~)&$evYPB7r;80aBN1y6&VW(N#!`1Wo1dhGyfOVa>1 z(SOy?$aJ-}T?9y#^kHOKrCS<-G@K-9Or7wBDseQ=lNYOU_2NZ3R#w(Ql(lxC zzD=7p6$Emv4?Fr9O#-^y@)jg5d8Mr=5?�OOzOpnt*Y4TYf{weiH_pia{5ZVu(FN^?2$XSQ1HPwa zV?SMzZvBfhN1!qDS!tnMapdb{rk3H z77BQtZ8O83m~trPNM8soZp3ko)jjxHDBtex>nfwlM%7gLTF0txILK0Xw++9>0= z05Db&61u_jJa}bv?`&R%&oF(%9U2g@=TIi~&pU1NnOhCSx}7)?e%ArC?x(}2XbmYp zJn(2Sbb553tq7V1u-xG6YGo9FN48vNTw}`!Obsc+?C1MHG~qP#N1H&3u8_$``8bMX zN`z%V3napDXI&jUymxN_s!SKq-;|3)0eWbZb_pr) zCz{HM!8Qi;FadM3)0O~b<(}v(RTi+Qb$$8~f0)0loUM>h>;+jg1a97bbk1r8q(ayR z3W|!>HC~vRomGFy#-Y`2M7Dp+e}i%hh$_IDl?6Bvr?H6sCSr6?De3|KZ;kAK`PV&n z;tkR=5kM6;kVZy7wI{baYecG5^NtyFAM~2E?t~YZp=^szhAqIs6iF zK2c^k?69{mAxko#6!P%$wv~micbgvD=swy_$HKxwuoH;CT%Y;8r#Db35-*JW!s}}Q z+Ys~7Y9)s?s>csv8$}3!JCJc)6%Hc{v}u9;EbQ$5gk_eJBF#O7>6Q57A{dkCC@8^% z7J=}Mqj9$2<3|?IBEH(m249DuX&~kp(ajg51UdA5QzY^WwI2o(Law5u82*|^Xl=G@ z@I`=%*J9r{S(dO8=Ld9T8ZsPF`bgTvks_)nZPDJ_3hw|gC`!|h6-Kxf`(o9W2?+@S z3Nhd`_6+j@;EP`MqO=}Gg3*FNwnxDWd*L=er#d(FM+OboN{Se=;_&j(vX+aVT9sWw zzpZfkE31o(3q^T&cm$t+5S8=d#bQe8Xzp_0tYVY{;wc3Xvp`+v0ZB-iVe80e64CN; zJI0O4Y&4a_ zl2`r|EHF~`no_vE#r_7oqN&}zbs&-pii;VM1unjPx%KY7dyY7V*O{l6ad8B{bvcEW z+b=OO(O`y715d@l&Fv4EAPl0U`^zKlqsNX_12I&@stX~UtBiM}ibVWmr|>C}ofjv< z4GHXnq{tAU-q8eqS#YU>f&vYyt)dez4^JG_&+=^eIpd+b!AG~5GQ(F-bVix(2TQDb zf7{kF`Y6&PWKp`wFg=ktL8B&ddmgqPV(QXImXtvxw?pw~dV@BQ)= za+n{s6FJcje@`ekZP_SJY{^d009o#TPeGUZ+4}A>pT7@X!Ta|jW%Pdykv09Hmc=8S z28V{W!&#Y$#zn@23CXVB4CRjyT{8JIm%t*V+RWj@2ta;-PKoG%==c(WjvHVkR9-*p zat|h0oJV*<&m>F(Dml(hod@EStOa;uQdLzY#7fM;JkJZcuU{9{)^dS}D_OAQ-QRZM z5|n?*oXS}BSer?35axJf<2=u;fHhd1zj`NL6ZCS^eZd!NJ%seVKfti6+{y~E8HqEjr!R;5Zm9v!Uf;$3@D1LSy{5~W4+9#rlxndUEl;z)CAE6E{_?&h&(sWVu+A>3CfoPq|0D$ z-`}06uQ%jRekR*VvI#0>#0A8P`Ps8KOuZ)=QHkezN5l_4<}rC;7l?GfL7 zT#U>{90wXRG4F8#{bFNCLwhb4{k8mQngk80=ah4KCbX* z6W9S5U}k1UAsvyR!HPh=OO;#hyaV}7$#Y`UuIoG((OedxGb8=e-L2;$rUxL-AcTGb zUw(*^5h7+)0`UKcn`Da4zMG6usE{R%>rZAf-xL^6c8Z zI}!Sul0z$nLr=fP`%M9%Gk_AV;&>4QGpb$6STdmDl*I;)&okz%i!49Trafo zv7O*Ih?8UPU04A?SY*wk(WGBXRcNDLP&Wv|4S&pRaLM0Sz5yt|3LOD5XlHn_5QPn0 zCn7?d^q3m8U7RekCa(&nVX4h{Ch)k{E~}A56wpNTcEen`D^|@b)3#Pj;mo(uXCwT) zytaRnEAw1I&A&&T%-nn7PEl0qQ&^2!zFAxwCbCXZ945yZDYb>0R)`6$Eg$pSR)71 zMoywcB|54kj)@K3y+6R-VhBRxbTh-mz5VR6$oHXTTH@(xErx$Rbb$k&=Q0hdz={nM zpwj|_^0V~abd63#s0+t`BUMP5gcDF+1;6aI9_-NWx`n%Na8WX`;g#bS5ELM8 zyNxA!0nIqDui;4M8_&cS-N|M8KSVeHtp7DW%3#D>(@gy9`SeTyZt?F)mj8THH;dOF z5Am1(!?$i*q)Uq^IaR6G7{7KZ&+Cu>lAh@j)&FX!B+VIdr(pkMHHbEhXB5JcsP?)7 z+l@G31lAxet})uXS6u|XP4Tc45R%V7F>xJO^@xN?MAm|Qfr@};ni!2qK1bn9J!gPl z5Z=3bu~;Ml`b3C!n=OnSUGZ{FY_G6dU# z;{;Jqo`jdDB|XYz+H`r*qD2KYH3pWqVX5Dtsma01%gn_UPWwpPxpVD?4cadS`1o%3 zZ^Z(4PnXi~54zyyMhFq!sp#ne?Q6eIo9u}EtQA(`Y3e_jw@-V-`qn__U(38C*_Qh1 zF;CuLMzh6PkWN7Fmt@0gOk7eg$Jr?eQ*FO~r8JXf6{)ROaXm+M&ZQ`mb=s<{e0kqD zHu5sDt*(l88SU1G5pDhJ>u_d+^Quwd!z=Zm4-3-l-5zHY)ft-nuF%=9K z+e=TO!QCOTn63aT3sR&MAJo>}9e_h@3DRXXy&u9&ZZ$hyDmiv*!aV(JlAa8c@fczU z7BIv!a(D@j|L@0;7T8hSNVJWc3*j?l zbo|<`%7>^iP*ucG1vs~S^=dShW`tNm?_2^?n7oQ0iLO_*FEtB*){1cA^%Hk3v%U@V z3m4Nx@H^HP0Bphu*W=GInt_MC38jx#i$G;#VF}V*IDRVRaFtz*$a8)|-G%)CnJ@F~ zcV*DMbQEg(CdgpmO=QDs$yc4IiKIW!wX^_I4FC@l868@>a>F9ZZMcIOv9Wx`iYvqe zIYMZWXBJ3U9LfCmB&j1PNsS!XumHKp&ntaVg+KQ$A>kjR-L+IB!JauI2~B;^cKjc} znFmG>tp-AZgFh80L^K+kA!SO}yrg#^4zC<_&k0{NQReV{l9?f~F6Enl^2v7wMv9zWjbvcEqQM#5^PxI(2{1+|3}G7`BMTS*hY%^qmiP@x3`BF?NI5>5h8 zFL-c4BpFB8_B#M6s%OyCyl=>|%z`nGzJ|uVR0?5w z%2saqC<@jI-}$Lb=N<#%GX$wvh)?4BGyqBt9BLwDI?5N%o``{wTtL|@~_{f+Ajy_mTHpsSm)l&(HiH^YA z3N}?5F)3gu;d(N}4uk<~$)V3n`$3{UPEW^U(or@)-fSf@HOc3QPuL{r#4xgfsFl1*=0o*& zRlTHNPLa7M*l<0+C@Z`42E@g!AutQaL;TQ_)o|%0-Q~d`3|FwKH?$Zu=a3S(NNWK_ zhKV$2AQq<%Z0&A(Z?1&xlJBvi%SFPHI`V=Q4&O`Y)WEN8!m%W6J5Z$VkUQKGw|m#N z0RGxGZw3R3Pm;xP%?Ny=Xj}j#!Rxhizf>!dTEVAJ%SdO2K#jZ@3u7OK1>RIF1A3xG z&|!U!<~pn;nH65oqJq|ZfjAuaOCce8Vq)ZMA+sN%rY0xjQOg2TfMnOg2f;e~p%ZjV%? z?tJfAGeB5)0>I$VBZ{{}+mZE|^Mt)t;#oWShGZ3d16>@FBhgdvG=petNLqNdj{LPX z&%HkkO9%m?7w<1`op@$LQB_{gNAa6%%B}@m< zh&eRw7y)pI0piDcngt6u*6oVJSO$2T^rwInu=WSz*H8X?(ea=v?`~xPInKk!cS=_V z(dDjDyL7E$rZj}Y3T5_C!T8i&YtAcwE?Wn8+V0Nq#?dJJMm z!^zNuJFyyqSaA3b@>6+-uy6FfV9L=s-Xa<(p6=?tRlacg79;05PLG|TeBZuRC9WH! z>=RB4MRhdw-1>`lu*H&o8=6{cclQ!d%%lSbVe$_A!G9EJRbR|PUWCusdv5Y%a1SbA z2?!nHoz~S}SbABq^hhmLIDWL+*XREbPTA^TmIElq0cVR=i}wzKOY+3O|DC@tCJrvf zdQUOKfx8_gj*cQjyx3!6JOUWqZF!um>Oe3S1ExN3MaSRp;Jfw>tF4(^x>oC$9K1^b z-A72I5t?u}f=syFNV%4&3SBvnMQ!vL+NyUzVjwDt;TB)Ma%H4e&%!$0_`Jdw<4*Et zf4oU+9s}1w))qMTx`4_mupiKlc8iG-n2pn6NV$7>B!Y%0?*mm8j}$@lNrE`c-qB0=szU7(VDZVn@NDD44-=-Tyw6 zV`Zga249PoCuU^`9#6ceTbHf@avXUY)IhQWutF*>_Rhm-6iARfn|qM8$mBTWiR&8< zJS4>@_O7~;5)a|*2mxSabp?Ocj#`xFu12CSEm^*(fH3M9OWcmKZH|Ehbo_-VP%nM3 z{BRtaU#rgn6^=PKa$<-F4LOkM(4j-k7*56-?p5%C>k?~y6IwWtSTJi&#;BdZkFL^N zc=BTB6siIh0-RwmI6qjWw}U%rpM&A5u%be$_|F~rlWcIdg`gj1pfE5*rZX+^^*kyr zb_tV*(oL>{mMo~PHQZWz*=cy=!0#wk}s4e3$O-+n+x+=jX`d>*^f<1!aj9djU7){ zvsa3e-vduCCijAXo{31_-H^2o#V-WkE%y34_79|TggV=TbHaA}$tfx&86u*lsi_dG zg?zPe2Z8>K?2sdEFlc85Hua{argdx9kQmVt^xiBdcPTVf|C=Ad;`d6SDVo8T57O-? zB){@(D8H-Ky;s9>r6p|$`f!`uAO)Lmc{W0ME!?gW{T>80N31fYQo@Mu9>svLJvi)S zf^xZ_;FG&6o6-G%0g)V;hmDEM{}Y}FA7G9dyJ$5pWpF}ahxINZKAl&P1_pH&zJV+7 zY_7#~qPrJA0V5bkeARotpLzr0k?IXN0uJ*M206+8OgfoqxUZa z!!7IFT_rN0f@7axi+4~$g-}k#4+u{w2-1s@pZu?tia_#Q|F3n9|37~_zYvDF{cyUH z@ogj*IAATV-D=9V<${ZX&|-OX02@)D?I_ucGw_k|xvqdeU7310nFE zalr0e5tl?x{B7)Z&A-58Ha0fnSI!d1B!Jovt)c>6WnM47H@@&-z0n#b{wW zz#k~Mi*kF>B*1py8x?iv>BrfbkxDY)PP!Bh*@efl!mSK>I|0~t;J83|0^x^r?L;zR|NY5P zb;%-irw?`}$pz4>nMXn9Ow*HojnRT^c6i^btDF0_4 z?tK+jm3TC5WFdiAF?C*S)pZP|myFGV-6k$$kTtP)370AUHWNY#;-Cm%m;eP&OZZ6J6rhjZbrpAdN;zN_s>cjPVv*v-vQ1z{E| zMu!)6*9XXsOby^%_!GAV5?%mi*?_74F(ULRukuY>E+7z$f=?c+Tmhs2xcd?V$DV20qHTeoiIVrcW)*P&WH^|fTqXJkW`v&9)m;PpEOUrhvu93k(a*ab|KGN<9Gj06f)NyD>|H-lW zTV{=M6j$;IWMBokdAjVv_wV0f{79I=r7NS4o?$^a@XL(wMUqj-_C;)enFWxV%cQqp zoSL8rA~ZlBQkzzS?Gtl7^%MW}>Aeim+BkEN9M%JuEP}VQ;?5R!^jY2CUvHt}&>v^M zc;N@ifFeT~(F(SlgkJ|-CB=?1J8=RE{xe+EGBKKRs%S1-6F!KcQbSZpu z>T(ONo*w1P6>NhWuVE~Ov?%@kPjqW*Yc($~Ue?;#37CX4wJ`ho3p2Qo=|rcM(DVgd zyJm3K|H>7L0vnw#Je#`nP56pW+#_?E*M0>B?`v%L^W#ggQTwICHzFE53FcXg(9Nbb zHOehf2KDs95+28ekOY?gr?rf0`SP0`GidkA)NUYpQ}*4vZR3t4Vi;=j+PdXtBePoY zWFFgym%y)S<%;q1(F1>yT=QUP0#=+2in648Pz>f;ag477^#)n$pL?JVh>7W#*w}~@ z;|F?#!sg~}&)P)_7bx-n_^fQ%*nl-)ULgzUeHq5eu7v>^Lpi$%H$Dn(9IU74%j=Pm zU${T9Q{*_x$<;*PA``|}KWjiVGpW@G`=35^qv@%!nY1C1n?{=;OTz}}2$sd#1Tgl7 zkUHDRlU*WFjxH`=qlKs&f~vQBXGq=Yx#~d~mLb6sLKdeVexNJp3qTmGhOD`mf^N!y zo&ektMG>qAj9G$Cj@(;g_br>55Wv7H@*}ojFGATYkZ{Q)7vws~i&tRABbHPo7K44G zgqj^4oojk$f^ibN=3EX`kBNwpUFM;yB^@oW(zqqWNwD_f80 zsxi-RKh&~Sg$l+V1shW}C(pokLDW8U2XZRfVcDuJh}4L+zS*IhyD;ReiESm!Aj~i+ zQ@5jfL@{9j7h!i(jtl~Uuoo3KA_89Iu%D?+{nW=$)$eJS{|%O5(LZ3QDGGOXpi%h= zaE#jap~Gk%ng|$miUK#nO_mi#>_Nyoy8HeLovGyJvl7L`PUeHX<@W@c`B0BRxOgsYxf z>!4mO#*UQtx|zhvz{dxV@ea^Bq|TwvkQ>b?-xlxbjp4PnK57(NNCu+v0+rEIFV22n z3J!(r7WkW(yKKaB1nENJ;4;*-Me}3$RDl`yMR97C^%*kSr5^#cx8ygSD zyZ|X7p`UOCKHcz1wc)lRrr;*5zd_O-j-I--@NM2R#x&Y38B=K(@|-Z#p<9Vdi`FBv zaw18KRzi(Q1`#NCVc05I=Jx2=#up?)_mZ6n%$)TOGN!^x{5nA`$esQw#mvTb3%5S- zZgon61NzA4`>LSupugS3DgOwsVa&k1X!CybDgm;&K6#U49+Y4ftqlkUPSh zKH|?H%h%v;lFm6H?}_c?UJ$^GPdB$M9MqPaIvm&7B~1f4SJq!#4t@YP_FMx|OAJ3K zT9-32WXM%Dd0rkc)>>kxDPIsaMVQFOy+MVLU8C7SQL16_ff;(Y6 zA=gTQsj>udPZ4W(0C~j=HX7}UGVHyqhksxPp zr~!+@%*<@A#&I~?CX@sVfNE$4uwoCvXUVtOFo5nem002|(h#v7bf(&5ci8Jj-2whzuK3#1<03i08#qNMLc zeKWQmMZXm6<$}|LTFkb33HT5c?&QNS?6Ve5{VAM!^2OKt#`z-;N3OSid?i2E6xR#Q z)NlmO7u(p{N}w&~;Lv(H@Bx)23Cttej-Wk1pH(|c#NwZt(w*t>2*A;u50|I2vvWcx zXk$ridKm7$5;KXF?wEGFhgUx^C&bGuffo#i%q6xpV~?l5D@L4om7O2p**APl=JxUN z34Xf>`V1HTymPEH_8od?7`#GAyg+55iS*QOeV&)RkT=Qm=Y`aljZJ&C3Kk69o#IZ$ z!NHUsaYce#&VBXD0x?GU!nj8SHJk`d{4I`TgsD670huErBs|`#5RQEf&wRmSAJ)&$ z|2dp&K<-K*o*vjVs}gpykRZTs&{5d3R${{e2MDWCf|M)lSA@expq4@cjHEkKdku$U zuIV(nL!h@omN-VqDC-0LMAC4<3QSlIV&5F@2rPMuD|nW$veH1Ek}Jc=6&ocbB~;wZ zor6FFFu0S+gWC$W39>iS0|S|f7KGX-QOOt}RM${n&kYeklc-rhx5SZ6-9%l=_?)s( z(f&aXRMUX##*tHc<$Dk;5vZAaF?1{mek~FnfgtVVFmTJL7vLh2VX7Slb1m@^-Kj3d z1rqKqi@bj_($*snN1+@gISIk)gnW27=j=ULH$I}@6p@oN0C1BA$wP)Ftz!VTwBb>* z0!Hoe-gW;)7t7M6Vd(n~{kIWFNP*Wdv!2a0RPAlUHGj!-Xt7clI*c(f$!OA zo~SaUMxpK73#D2VT6rX>$8tPO2bS9)-`fj$F1X}JI`j%1paT(@SdWeTD`;vO7!QPx zN33nl-Baj~S43)yqMPLNU#0BYFST-m(M>ooK48cy8No>=>A+Ekf=wjPhZcIxUKLKC zph@7+4M3MD?dvcTrJsT(!W8t26;ijbFj+8+IR?V=rj7jl9ttYCObYt0a=4zz5ced` z-h13yg!X+snZFI4(DXz?<;If$-*9q-;(hgzzmnj0M?i7fHoZtEgQ1|PZ+E!oZ#FHg zug0|~untOqu|AaY)@%A8UNhqJU!WK=!YHvn&X)|L!qq2%Xb6Y@J4H+Se2RVgzn^Z@ z^g~gYFuNhLx_h{Ny9{1f283^I1DvfxJMd{%=)$xUO|mSm!o_P0pQ zE8c(mW&#@!ZfwH-%AKkc@A>j5Y82Au-v54&Gy2WxChkoRwq1rey$>l)oP}H3$UAhP zDMt^V2Oy|}y^A!}gO`m%MddXsf6VNW4_okowz%zptli{Js4V9lot-0Ks66E?{R30f zm%3@zT1_jPXW9;CqzWH7#wC>T#6L1bj5Zb;WN*#A!%t^Gw(8Lh$lJTpsy z{fAP2HnllLXN;0`X8PW_bBA0+1`n&KxHyP(uyN}^v+6p3D8@)J)VHxOvul7pOL^6Q zEI|@FfJ;3$;3BIqpn^dBGQ3-5sDS{aum$|=?Uf>TUKl@tC&2*u;-%00oNMfWp}(W> zx^?6Lgu$lXQ=RBf46;DeA_x<1j!+0nhRB$l$5_4LZpFQzL?yLF#rQJ`j7K0C%;)83 z7|3X?0Ym`u5drT_xs8$y%D^xyj5(b7{^|fWqU1+RUDZOVRr~xPm>|2XS%V#*O148b z)^ff!g>#XN%L?Vp3l~{`1N@-j6VT`Eq;;S-!8cn0z3?sTK&rBJA3A-eva72rs-yUW z4{pA-w#40U?d@>%X-|IPU%q?|Sq2!-KVZ`$CZV=TfF=DB<9SNR{_z(tJ9rdCg@B4$uVRWQw!zUZvY}(6mX&^ZwaWKYXS+S0A2pa^>4Fycs0VIYlP|vOWc09eC|p z7$9TN)vH&p3#lJ(cOczuGVaI0B}sPoD!|Cg0F@h#k=A85xnfUP7(RK!&j!tz=9j@x zYvJ%mPVf9VF3iizn|xtj^f`(wX1~LcVFUNf9QMYdt>Os9J_m(|hY$OpDNHlYN{~~# zw`V5G0v(+-F1yn54)oc$(8>Mtx|Y+)S75hXSNFOQCxa{xt^ipB%R~)4=wR$`!iBmA zZuVs4gD^~W+P37qZGP>lcX$q1Mwm07|2xL-G2)V7I9ZoJM?w;`vjOh#k(HARLok$s zu@r&54<|?lf-wgMZhETXFPCKWgLU2T$F@4*s?*TwgF3<7QG5;z8VYXG(E3rt?>9FxrE>EJE@g zP#{O3P*?)81UWglC@%tMy&UnnQ^b4EvcJbKV8wpo#4D;q_vfgyL6^D;HV1Nz!LHpyz_62j zIV~MIE#5k{M}=E(5sLqz`^3=TJy`WbJ*?_8SJ~J@{ zdYlcHbT0{7la2)*AZxHz+CE{JMcf0+$a_K`%&A8GmW(5XZc(`TSW8jCX#PK(|*flK>Bm}zv*SxQ<-|~GJz0__f z{z<@uPrYZ|t=*9(Nrvb1kjFg|R=8c49AeVs;i_02_q@J+7|I$#Qs7YLqqMk?N zMmBh}2x5?iL6{RXN|s7K_Ob@4k>(|U3mqlUUq<4-$<)cVEA|qmAQ*wO10Fqk1e1y> z%u|V>m~QaKB?sR{O2wyfuU;phv9=J;v5IjNp~<0AAyJ&@VccR9h}-_0_J}Dfn;>V1 zVQwgpjTzd45yEOcSeG!wQ|b}5(1}ar_KTlwZEML4jgpl!aEyMM(Nok$nx+#Lezv030i=2}1{iIBVg zCX8fE^CDnovmSLtRD#~T1xAA4SS!36AvAtX`{4OQ$$17)+$l2j|CDy-VL9(@yT7r_ zGs{qxDJ7XAnL=4q8W0&GLoA}qB4dLgvuMs(hLVH~g+dvlqNofR3Y8{FrMa{}m-Rfy zvETjA-p8@uf8O-|sb?=XssimA3Hn=4bSq<}yR!Avy_Lt*3cU7A2{PGm;iE zQ+l%p7;LzMzXe)NrL=q;8*3~Mn!v~&b+v`gxmAJ3-Tn)3^%_zV7~psXd(O^_%X-?( zYmfBcXKcj@Z%Q(! z#DtLuI&g&*K?nW0t9UabJv%aVc}E#kky%mDdI9vMBl$VNG|_>xKoI|n*s+t}C+@?v zP%hvbd2d1w^QHeoemg(&k}?c{EAfV-tt`+Y7@E0vp=p|xWGydE5X&uFj?GI753XHi zs(T|nba`}uvD$-fo=QDGHyWX()sDz7uI)T&bSG)5*>|zUlR-1FEwax`*}ijU1Mb^C z(&R&iPJw_PQ<95WFzD@R4w8;%A*OTsT<$6OQBqHiK}d+rDpj;hKg)!^cq=oOCbR7_ zcf)9+kaeIR8K68iA8r+GJl^_6O}i3=La^Nb4!?Tmj67e9k&=w+auu0>ZH9dZc1r)j zOviCk*AnPu1ebif^ZNC5$1-ojMKY^7l@o>7R3tI6nxBzWFQR3*wXv`pjxo&`zMf@i zDF}klb7b#rsIVVk;UlB_JjE!TIdsoxIO_3}6p1P?*Cia3=+@5Z2IR{i7&^r^B-&(X z|4gl;ubjdMcRy9x&5A7mA~ode+W)7<4n`(>r|N2j)H2EQ1$j)LY3Ev)z<*Pis~*Td zxGdW1V2h9GMv|Rlnb?plj!73ByFqR*j})UU(U)9lmiI9aZI%zy#OLBKuUVN`&fMQ# z=y9>Fk*qCfAHK+KjYpr@c1-7H)i3I1BW_W$^@%?GbWP%*`zh~-&$CA^g$8AGx@7A=fSxgALiX#Ll$MJc6pQPb9ggBz>NET6FTS4{*} zVnF=oII02@o_7Gr${af-jagStA$gmpA9hxt3PSzw_ZVtrg}y?KFF=cPw75j=gkM`J zhhaBB!E9A{m5yX7FoMI>ce<$)l|DLQC^nFr-vLC|_QS2KEZyj{bGW+K>;hlytBPkm zfcUl0!fMR9agqBp3DGd-qP>zpd2`=|=P&ieR@xPCVWt4EAb$H|zEq`^U}+LZ@g=zH zHROJ6+itF@xH|F3pufpZdbew+0x3V$uVLjWh#TgD0k{0)%H7QLC0Q~26WU@N`D~Ky z$H?%M5DT+KQ_?dL5y!y+NTv|*6q)$MWCschA3~c6@TvIglCQfQ)pL##h8Ia+S!$LP zjEuASkWYKl*EDlX$9q^R)GR@KSW%w zsIE5U@#9r%gd)}}zpJy!mK#fP}wDikcf z;29>Aern{CYbb$F#;dCogVV*MSk%_hV8Cq}2U2(TJpol@uv)_^>W&nb5=u_S3kVMT z?=DDkL<=vYnypJ$ts2OTz5m<|pyWv?0+@UEl|E2xZoVoV?mDVB2PNg!!M7xkNB8f4 zzV7R1d3}j1q^JcbWG@UUQ#$ttq<-dgb#Vr)L1JPq{eS zWdW0T)*`XA_6sNu+$aU!GCx1RV(!vYs%z$?=!znu-J$hs_x6Qn}?)V}_@{WIa#ye5;G6UR$mwTP+pmJvJ zexuB2*Qa!)LMr&*d9e}3LelNXKhK3!GTn0Gd>g2X%YP1>JBzOE=!#+P70b6B>O6QH zdn)|b$Cp`L3$ucB_H2F{X>X?9+`9dy8^Of7ic z?Hi1POx0J7O)k_cjW5}nsNSwFUk6`_R0lax!&biAyLWNi55>DF)3G}lPMG%Y$&;18DRI19ORkmaxg?kWbE+_Zh)%??~W5>@I@_p3S-7gN)S&YQ;NY2FosFyyY0ON{|c+qV_i zKtx-yX?nPe<$k<3}|f4izfdWU5Z>> z-LY8hPxv8fCt0CloTjM)_hP``*LxJ5E!K)uILT^#a?P;ZmKx$isgb-vVH/# zBe>~5w%oPc(Qtt>Ep+`;vpdt8G^bS+CnAsT6(HyLLDVtE`%uM26nMsnb=q%ywWX41a=Jn$$*$~ zLLHG{G;4T>=d#B`N(&QvS;lV$jfto9re4dI2B{n`*>LvB8(wy3V z9nG*Yu*)6@@nsXuOs$ff^furKF&dK)0@!YzdE>ROJ1X6IQgpmSr%vUuPR+Tl9RO^F zkVUp0NB#0BAz^aH!AO_*Xy>~5SZWSrs6}9C9*i$=NzMxBys~{b0pQ{@OBOsuY$!O! z5Fhke&8m`ps^5o~6wR_E7kB{($DtNeV{d}I1W3+5 zXG+ie&bhtek_d|NFz$zMwTV??Se)+^C^gQgz5raL@lN1gmB6?G_;Mi^a?jLatgJm~ zgvQK^M_@1GnKdy#IpEU!31i1@6R?TKvKMW%8(*wiUh@m4l~G}iT%5_jYD&z+@(}5q z5kaC|SFIYq;(ZR0jt<7v9rig#QRRCPoQxP{R7IA*&v1=VyaQk9tI*R}m{2krN?pWV z)*{P>#!aw2?7PFXUTKqT=CY_Q$3czK3GX72Xq1q6+E66QMh!}_9TYb*Rw*646u_V- zu2zMVOVSb03typ1geS_=kHQ`56TwlY^k5mz9sJ9RnO6~!aar#3l&@fjD>XG=^jikU z2`Yg_4vKr|2`H#$!4p~3YP~6QD6ZSOF=e)Z1;A#VJJ-B%C@n4h{Owx{01lZX;Ks?M zJC*-I*c8le{yu6^n_o@6&k`F0Qct>DZxM0CHU|W&o2Pc#lRabLCNTZ!OzZUZkXs+ze?-KVY86jnW zqzgJIlNmQw5AbJ24h(v8`Vef(=59V59p=ch6^;0R9I0L~;Z!&*{YgPN2VUMh^`ZA> z0-*T^r&qE+O3)KZpoh%-PIFxRIiqYVR`lY>0V@QTsrX@CU{QNi9R3MWt}JMh(fsEx z9%g%uJ37bNX%xLdAPv($D_2Sa^Tzl6DSfEtwfQVx>M5>g}kEb6IJ? zY=DLS7iYD3Q4X`07VmO#F~Y1(m8k_LY&wVmV@*|lXk1QV)5OK_sNNK_CW!U}1XQF6 zqB37_rqz4fkyFk~@}UBV7KTw+8n6sce!Kd}bnLdRndV~g#e5Bm%O`+I_U+ucQ;2E` z)xc*P;dZs}D>~ew>yzpC(2Fnuv<6aPgCpNV}~4QWSHPdb4@XjVIb?Yo`>8bC@XB zWmdRYp^=#-!B{qHpjec~fGTECY|(&eNeoUzT-z})vw72|?vyqCEMgHqhf;Np+Z;-` z(j9sur`*R(HL6x;#0YUN4J36WQN%PwtEp{e^1hICA`UpVK7|ZonA3e;y?L_}#4iWt zP)vrJmq!`%RvKXbldd~>l~K3cqiqi0n}aB5AK_FRiB4}S30hs~hBDJAlrHl6fS@+^ z#2aaaT-PS7S4$^S2ZD;qKa-jeQ5dG&JK)6CQrmu6`s@e>uLF@x7^aa47vtGs$Lxr9 z`k>`oqv-rA5e}%5&IGrKx*vb|F#6@f?)EYC9P+#~iApN%>avEw_CPqW~)0C3DCIg4W{g?P%5 z;X~eO26pebP2&^I44LC~=P|XE$N!+?f<#m-O({``!gx$J~>v)zX2F0*D5p z!?AuxZ5_ab<#z3|9Ng@-w0u*y^4}eW_jagsX?M)c-5_d_hYx)p?`4>tPgADI399ZZ&_=7EY9x z)7I{q8O-_8eDrlZm;&e=rIY?dlsP;V6@Uk-v0L`}9c2s$t;Aoz0~2xA@6Uvj$zGn3_Sk|Wi+Y-=InxKI+VSn-Lck8N~qSr?{n8R_mSoszA8-Ubi% zg5eVm{e=%V~!{ zZFCmJ>-j0C>u;-@yek^$2lUS4ud^DA)No6;^W(r#_JvrPw~h(K`XL$$p+m|B?UL}DH7I#JSyXOTvS<<7{lXv$(|DebDOaa zqfjr!UMUOl(Q|epL5k#&a`6eSZG-4oKtBE;>-sGQa4nh`4zKN*1m+e@9cu%Ly8@3**|s7L9gb=&gmDX8!xh4awGj< zf^Sk;zjn^IN{J@Q1vkjhUvP|H`#^t5m{qAZ-G%G;Y{*w%Y&0!1`fAyxI%$+(in5Jv z@-%_rZKQoatQuSQsI(eNokmSYYqJZDs#XDQA^$z&>$KJyZXFxUU$!+04bBNyH{a(1_pgt6Kojv-gW3Mu z%uF*?GlC!}Ah3*0VKl<~FM8No&t`?fzNZy1OYyE0ayYu3MaVt;9sE-gbD`2r{O=(O zY6<2j+Fso`Z=Gy)OS3Nm`-r0*_?(ywIM^xKN3ml`zA;%HNmF?R`G9tOqkt$!#W9O1 zcz+pS`Zn}j{9TA(ni^U z6nk-^;5f+=5Se-*`k9r)_qW}uTN-XS0n1jN)n_u3Y(J3C7x97EBhop1OO@d4^~Hv$ ztYA*ov4}_y;UGa}rAl7Hb{NEK|Df0$0Z6OvkGS^_KV8TDKmoE{l^cE^@zNk%KecOj zOv^6e_0frAh#{VJVs$|Ksc2UiYwkSQf7HfS6yJU3MQY2UBKZV3ju<7k??8KWo&r{h zwrV`;GwNMBc? zl_B=0XWE}n(n|LYv+CibDC2%CqFR%ITA5d+!V0Qvy&%p(XP$yM(gyq}27H(M&{&Lc zu?^%cjB0~-$Jp!J&pS_=cWX^OsFvN>lF~U1hL}A0OpGE#0tYN6$Ac1)c3yV@0S+K$ zTB`}p)LTm#h>$s_A{r1(rRbRVH9<+UgODK0yMzm16i#+F--$LxBi$Qa-xcfljy(02 zpum%THAg6BF)#>EFBB|*idZod48-nJp?Y^hUq*JoDa9%drP&Wig!QrIfd%6W&?$9B zXwHXI@DRkV^!as|bo2MRB5#F(YEP#toggh#kSBlcBnxAKUhC12i?fytysdwK+viwW zH6K(Qu~31?)=)tv2kMPAYQA`^-ouIIMfNJMVC}!X?Fr@qUG3MW+!crYW}OE1=k^+h zbt~0@$5Q1@>Kr#K;J0}|X}c!e?=oyf8z2A#s&hT^sir%{;~u-Tc%Q1{$>&6^y##TH z%?bKnp1i>90fn;iey-(q{lYXim4qFvXD#_K= zRk4mLj8aT|JBV~L^m{cG8KJ5#EU~pSf>^09lW^0|(i&yE_T1ZYx5A7APOHHwrRm|G z&pZcOZ_JYwb*LSEk$(P*aEI6qja7_U($8hZw>hJ0^jxV8@D!@`rq@5t1`S(NeZ}yzi$z;ryt()Obo%G_wdf z$NW;j>GFUFwE)!%2`Fa7ERDj{tjVq+J^)wiRHSk}V);(QCsr47QO ze=GEUtL{%uee+H;_sFwLddcZ{LRjPA_a!H%5AthISph(6n@M3TOCF?k!7#}7w@3L) zR26_^i#3;o_Nyd1_2riMil76?M79@7N6othx{+>&(?Ksi(i93?#9*zS&PmA7 z$-jP8ua2e76smzm(me={Uc8*?^yyx*U_#`UuH&5mL)xNYg)2u0`fB}is*+wnzp{s6 z%$A>S&^-I^-o1;1kE#Bb))7j|%6+%aK?3gJKz(G$knCbP#ZCn&F8^Z`#5Zbb`pSBp z+qUfq#q5c{lPMwZU(uVqEzw>HFGXA2ryLwwy1Hu{3+GeR9T1vhQuRy6<&`vzaz})V z=WCuh>?x7S3prQ6H^B~2bh1^D)r&m|y>7%;*>M}6$U>F@1UDUT&4|h0{sZgD8qJt)J3!ctK^3g|hE!DS$j9vL z>;URJc=&K%I>`gk(ay)dn&w&X3CbV8dNl)ncBsxf7|Qv|!&aQ1Yup=6FuLjif_u=h z?4#Q|=2YnvA{?i!snAR-q60#ra_j%dFx-mpvT3ts<~n&*5OCmskB~8BXpQ1b5qHk4 zwdoD%#FDS}_WHm4N*q5_){yM&U>&ox43O}M$dni~I}<=@##g~h5d9eeG7R-`<{BRJ zJ9+_irT(IaHC!Fwkyp3soa?Ing}IsRH#&ImU7ko1ZmNJ69o#F1=wZ6zmYI6m)>~HA z%dZhxY4N*NOoRy104}N=|J-lP9N14ydi{>>74h@Na1z*s?Y|`Te>>A?ZhhCkA3jfU zp&;$<96^E5E6*63}M`yw^u8w_^>amH30oD3DWi!En?mGu|-P6>Jgemz0Qv z7yXS;D-mlx??_-O($$+YzZWzOe z+Ah6y5Z&v4NiUAO!TwH~1A#HjV^XV7tI5`v-#Caho-n;h`0FB_wu^>HP;P#s7s9Q5 zAj(1j(y?D~Son?ao4IZcac$N1nHOOfDE@69qsIQ9&+_Hlr!_tY-h9>j0GrX+j9l|++uldr^Om7|g!vx)yf|?E zx9iX1d}S=(li?1H+7r;!kK*D?M8*qQFym{;+=y$k80YNj7jIgH9rGx!pkDhuN7Jem zM{kHiq+OumNdOeqQG*x77y(yi(XGXWg^nEcW2-Gt!r_AQ{42E{1C!s?A9Olk*swfX zQ5w-Hq=ql|6+H; zM#%nLflviYgdv_NTmiL$`PXz_ki8sF`HLj&^q{Zt2OU(bhF$pvil91Z&=dy!0ttiv zEL_-=2j(F`p7iFeI}V1rJ)<)M-z95XZQ67TZ!p!7{#8RA%Dx?yIUZph8MbVT1I1Cc zgUIDgeQ1YUb@23H4;1Soo;U8jqEkt2#<*l)BH?6s-|C3_NrfK!H8b>wfP8V&A)KH`g z{wIb51qy7-&&{kFCgC04RH|HkeNgP7I?I9#AGA!Sx)KmfqLb5x{-MvFs<(@tI-`4| zsuRQ;W9kI2@lG<2fRLsVdl#?GuP@I#e$HY5wyeG!{x~`d-BE>P3As<3FJTMSizx}| zzMri_6`&peq^#PWfu(`WnQ6Yvwpm$E)(n!A(bPXoOxXJMnGzePXC$ntLMtcQ86*e+ zBt6?>`J5n0<*~?-1@_QSUNJ;v?|x3y^V4<~3}1Prwq4-CnH{%L)VpD{_>@XP;AI*6v)*DZQ-VUQ zQ9Q-cEd_AH7~)IU?!K!QnG3jc06lf=swrAS{P&9dA%J9m4bI2f^1}{}~}vla8gtf%?vqx=Um0Pyk`6mgD@k1uUK}C^yR8P9)FSQ4SF-{g^|6 zP?nm%1j}MmE^8nh(3h+%HP}8c!~jo0nUibc$LmW>rC2%r;n$`ssR099;3Pj?I0*Lc zSn{EJN-U#)I~Tw8S=sR|E24wZCjPVzyQ~l)3~ab5mnw^SBrSH2BV%}~?hAqLBU)1c zK8Z0R&PLQ^>sP=eoovLKeP`3vy^jvx*Iu79g}3YrINj&u4!u?Pl)TMUf4!K<*-%wg z{k?cyka2JsV??E1+ieZnbstCN;q{Ql)st2wPPef+FT{*gWCEn~ zalnz>X|A3E;d#tU1V0R9=E9tv*_`adb{&^nE~>88SrM)Au44~xne5;Yf(g#p=uM=|Rui68SSz?zk9l}*=tnwU4IUl?Rq=ZXIlyox7^PH` z!b~w_Y0UG{+uTQEK(Si*!pRl8qUnFF&Ahj4;TkPwoY{X~Iq5iy7RGAmw+BJ38UzRL zd6mO^5q6d%)SJuPfuNanFg~-Qq9UMjL1gzu|As&_{(oJWX=JbD=JyUKKA;%&r1_I^ zW^Y?ImI|?e1YPW?zc*Oz)YppozPR74kM}F6=460CG;T>|ea*f zgk#z_Y>U)%BsJ@tAByhT)f7i?Hsd2V6!ZmabEo{kV88-Cq?bAVid4*IXI@q$KjeyM z0)tQ8ZBbG2WATG{O+;HJ?8k{N`#!pFT)tC-iXjb5X4h|aEx3pBh7NNN*!P=swnyGe z)tI3(4OXUH2phR?&NGedpXX0ETJ+NBU>C!YMuVsC?bCE`bIY>}cAVL<|I3d1pXzs> za^rJl?ZRx=_CDuZw5TkqJ&`eWNm|yz%uydA3LOh;15$^K88C9>QBaYkQ>V6#^>;ez z?d7$GUI@)sA54tbuWCd2(6FGOAb-sKb+w~SYKOU|6drJ|>To8vMVg{Tt5!23uZQbp z(x@^xt36a%wu7cplUdH0vkr4>o%c$8(E3$-f$`EiNZET!BJ3)Wg`&cZCbuf!I#u>v~JOMXvj_}az+jl=l^?BsB zD*jDHwqD)QzTW#`+_E`O${&AGimzX@<`dZ~Z}gkL^!9*negZA9{pX)?jT<*6mflDh z2L2Wp9IOH2Yi~#NIgT3MrYv@yfA7-w+UC)T6Y?yY)J}c4OMvu2?evq^%W;r zYJ~!651LKzfUES(7!CCrGNe8Ea62=W5HRX4poe8rx3eEhOSuidoH!uCb8kz z%3tg3?6COTOWpT-g^Ngt*B89&K>t`zAxk9{Q>IV<10&GsGiT=g6%tsi#N=WjnDL7`~Ytl1Cp z>5c%8IsW1dSr}E_3adX27{Z+a0jdI|DyGbv*HUq00sP-5@o!T{LP`n=5@BEWuggKCI+h zTnN$tQ(L9;-qtD}^wth3`Ni|+Ke0YC*gTkJX?Azb<`rodcub>a`%3gt!e(c5(A|sd zpd3p-$PSbvvuDp1--XJAQH8)KQ-YNoJ*ymx9L%Gudh+>y<%HLJ`&RMs+BL1H;<$bs zVO;YKw{)*56ZdLm~8t<|g7PLx*k`vI=5 zEy2l7-*gx{bg0fE$GG@-wop7}YVstOG0zebzH&lxDQt!qX6M%Jmp`9b3T9m8<(iw z>gw0gNTe;AVPR1Mp1oyWo6!RY9^%`(gV-mb%lDG6Ar~)ZQdC#?*VRmz-E(X@w-qiZ z|7R=Pt%{`Nap-QYvDjo z-~@d)+3U|vol1xxqk$RIw2URxh5j&>p-4#rE(dj3@dgaZbhWYE2zA2-n^w*V-E*Jitz@_+xM=lsrB^vqbr1rS6 zrP;A>UQ~rVKYi-dHb9Vx#>Td5jz`xPtp1B7$3#6N51na_+Tj*CzFywmFNr}fhZ>^H zHG`c&OX-^FX|=+#YJQb*Uio9=+C2Z1&^FH}6b|41XNL}_QSHm%0pVoIj2R8XEibhg z)VrxHINGx1%&Al2f>`nWcHtG)mU#L2sHm$?NOzwybLMVFmp+HTng5WM)!pC!Fr2{2 zm452}8(yrUnOnGgdC1KZLU&<2!m$Oi{{t2WFjpfx!?V#S#VI1&(}aW=LbTiQ;|CZF z5!;1;fIoOxic@d?PN3X=$UPoRk+OKnl7Dap6W>-Qv6{OYo?TJiztXAXczVtNi}7J2 zK{1-LRFNY2O{(E2o6)SRP~ai56VhI`jDZ_B{;~MphIMgIpKi0f6tlN&FXd;$Q4gVc zQt92h5oba?^ibM0W_eHPoCoI7nv?S^oCBP8+0?6AHFdX(%Z~$^Q78!eG;XOJ(0_ex ziJ6YSLvd3!$>l$@4NNrcYFSxXNgvo>{(kfgbmU;|$61eVf44epv95>gFDICJ5AI8o zxp2*zLFta=^Q^ZQ2R(lBNmb(hz4m`eB`7nVIg8hYB# z-NU0@Z#A{t6EdtIr}u2Ny9|L6=D5n24!+quwWZ^QXr?nVGBWH3c4Bstg5yxl<1|0I zjwylWFS+Kqxi+o%Y=(K|sT_8z+Ga(gf0h-EuhzI07WO9%-=^w#b$T%99Lw+ovWH^7}!Z`ec7*D=}@q%o_q?nfK0p&=` zPeEoY2OK?Im#?%2hiK7o(~6uHXJvuewtJDN>w$rdKI>Pw_{KeEwre&w|BzT zu)B?`X3m}4g3}TZ5TH=}Dh+l145=}~sb&Nn;sKN!OA>=>qboNj?ky{#fm{F@F3k6m zXGgY!q4yj!rYjRxrQaKm&MmUDw>Ma}tgozfBsDLfD4P@)G&5xC+_?wat541uZDnb> zUrtG4VrwxWKzv4Y$AVe;TLD62U0KCK*`dN94Druk!GcfCvz8yE9wJ2yGmn055k2ls z8^@b{$S#);CZ=9p)OgJ3(Kf4AB~T*gtr$+ewmR`>nm;i6Js7v-`1k|hApJEoK0~$K z!VMgrEwBHQ`E%yH|58cXeoGLWd{k=}#gB!q_1Eldh}WaI_lG8w$uxINVc4TbkzDF* z*q&5q%*coc=Hdlc&!8L4DJ(RzvP$n5k0VhmP2L-&jb~rl0I5E$sMthh;02sGJ$QOe z!D{FD*18qjjE#*;4_UFr;?45JcrLN6we@4F)%>5g+{|KHbk%b@LG_(YFZ~wwe?$Ij zSBl1L!su*Xa%pf7#kWR-r^k=~t_~eHm`9HbYqe#$Vp+Os?}R4G@s}0f8%E`!hNJ!0M(eHXb|KOfIPtPtXNRRdh8~h8`^9ct# z|4pkMRr-34YJT*J>zquIoE>eZ9KPI7pT;6dB*u6Ir1_>K*=V1@_k}I>*1Mq0*-7@7#6DN*fW*cYb zHRSHO%p&NT1$KI8+koZ|aMP?oP*2{Lt*xyw5&3IYmd0U5^AA&sexeU=ape0x7neU; zDQUUTw_0@LC(lwdz$Lv~kv(bBBx|ByC1$baXJ1~qc<~?+1CtLlj-8BurR;tA3!#Wf z0peR5=xwz8{7p{JSdLb|=;xdsLe zTQf_tuG1n@kE)f*!Rfn=$$nKir5#my3rqohhPK0LaXgb|62akFIF4pYOOq_cFqPym za^%QaIN6-e5VK!M#Gc%JR~mkliJhPx^tWu$LF~nL!v#5eWwS+bNr}6s=gtck`m(#c z$H!@13vb$A)i2>)2chZCd5 zQpXFOHo%Yo(0HJ7_=)d7&0XoWKfI#AtNrj+Unup!w{H`vsFo~V+*!xxAEpbCQV)4@ zP-#4-#alLQ>a#8;S04Y#Cr-MDzJj!cg@<3-SXM*wd6|+Dmg@99FTOl=ssGFLbblI_ ztdP*CIDpzBuKEcKfaOtgjXlh^j9|Vo0gigurcNQJjpRUvlFXwU4T$WxX1Tk!_hVvD z(&NXwx;y^lv843V)AJ7w4zBrfK(j^Hz(Z8=kUbyg-dJ%oP2F;mu`$}>;dMoaol}%1 zPRcvf!DRB}b5!=u^kX{tM|^yJhv@3g&o;MMmz8{|Gy(@~T#5)3f!B3&IkzKhlT;@o z1zkFK3eq0NlY-tjtlXZmI@ze!q_)}LH(oaCVcZRP^!u}A1{Xp?B53fpY~K9x8(m)b8Pyt~62(z`fKC^%Z}O-XtTpjV71rFHcl= z=^0*DWm8b;V$(E9@#m$PmX^wFI6FipTLcle`_Qxr|Ko>N*)^RNW=YjTqdXqr5~q9q zr7`|(cBj0{KO3Ii{!D~%gDFjVy0kgtYJ0ukcKNSyVKaE;ar~#vqJVmR_=6@dV~qa$ zQ|^sU@drKiyYy-~R(^QzFhk{S^22Rb9@JLsYICN;C`Llz)=f-IT68rS%%~@miD@qe zyR@0qL;i`)J&1C4c6#X{hG!c05gloZu9KSdJf<%%^l~rq!#_KAJagqr8(NwJ63j`v8d()Ts$JDh6?!)Y8QQ80-=p7 zQ1xdp^0;5`;sd$zZU5@pbs^?w(KT6`=PzHLc_aAZ#YdFUERgwweo5S*K;3jkqQBb7 zLXCSoGR|NOFsfHJ1}-my%({z`1|cy0*gb3pPrbDHk?8!Zy{Oo)Uw?>1ZT+DLaRg)c z3-*>$-0@TN1LsC}XTq7w&|~o6wiI9(AKphAn19DmNl8hhQIrjy^OyJ3kN^Eek3P#u z_?C887BXD9di8I6`=RU(`jKR+DFqzqe;fCLExc1_%n1C3#mGW2n4-=SIbvIWk?cCI z)T_-klNknNNp^O2vWc8ul5?UO5^ln#O(V313=xlh4)e)u3jFOmvWI7#K_cCR_?4nRE>oi4EREdEg7hR@>}m` z#$VD*Tw!?*4y$3onpfML3FOS_-=!A)n~Qbq`0?ZqA5JK^e@}lW;*ACG49EvaG@IFISEDiy-CL}7Dm6XQWHoc&T?NXanWU9NWxECT-*zaVrKN;q$Xk)Rk@LIQt=|9=h>)r+8ffSZx_K2%dkEGc0*0h*>lJ3eLX2g(X8jAb*P-DguIgGnJc)5UgRDQYaD1DXU5e1ayQXC z7@|HQ550R9Ykv}1Vt9BsOK_eb!jYxdn5g|aT3(2m9?+VbukRj^nY<4vDlRs)zvGkZ zd2X-RiXeN=%*r^ceJTFbbJ%+|2PI&r-Nm73^x!4BWv zT##gs?V?UUG86Lt>FKR N4JPYf9=Bk}{{pN&i~9fo literal 0 HcmV?d00001 diff --git a/src/dios-egraphs/Diospyros/data-plots/table/table.txt b/src/dios-egraphs/Diospyros/data-plots/table/table.txt new file mode 100644 index 00000000..c714ea8f --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/table/table.txt @@ -0,0 +1,52 @@ +\begin{tabular}{l|rrr} + & Baseline avg (s) & SLP avg (s) & Diospyros avg (s)\\ +\hline +3 qr decomp & 0.0006218 & 0.000671 & 0.0006932 \\ +4 qr decomp & 0.0010648 & 0.0011612 & 0.001141 \\ +5 qr decomp & 0.0015842 & 0.0018499 & 0.0018311 \\ +6 qr decomp & 0.002206 & 0.002588 & 0.0026048 \\ +\hline +2×2 mat mul & 1.5e-06 & 1.7e-06 & 2.3e-06 \\ +3×3 mat mul & 4.6e-06 & 7.3e-06 & 6.8e-06 \\ +4×4 mat mul & 1.11e-05 & 1.25e-05 & 7.8e-06 \\ +5×5 mat mul & 2.19e-05 & 1.94e-05 & 2.46e-05 \\ +6×6 mat mul & 3.85e-05 & 3.99e-05 & 3.73e-05 \\ +7×7 mat mul & 6.43e-05 & 2.83e-05 & 6.72e-05 \\ +8×8 mat mul & 9.44e-05 & 3.21e-05 & 3.71e-05 \\ +9×9 mat mul & 0.0001264 & 8.96e-05 & 0.0001287 \\ +10×10 mat mul & 0.0001747 & 0.0001252 & 0.0001381 \\ +11×11 mat mul & 0.0002383 & 0.0001733 & 0.0002341 \\ +12×12 mat mul & 0.0003032 & 0.0001928 & 0.0001626 \\ +15×15 mat mul & 0.0006224 & 0.0003461 & 0.0005812 \\ +16×16 mat mul & 0.0007734 & 0.0003652 & 0.000358 \\ +\hline +4×4 and 2×2 stencil & 3e-06 & 3.9e-06 & 3.3e-06 \\ +5×5 and 2×2 stencil & 6.4e-06 & 7.9e-06 & 7.5e-06 \\ +6×6 and 2×2 stencil & 1.11e-05 & 1.43e-05 & 1.29e-05 \\ +8×8 and 2×2 stencil & 2.57e-05 & 3.1e-05 & 2.27e-05 \\ +12×12 and 2×2 stencil & 6.95e-05 & 8.49e-05 & 6.08e-05 \\ +16×16 and 2×2 stencil & 0.0001733 & 0.0001655 & 0.0001193 \\ +4×4 and 3×3 stencil & 6.5e-06 & 8e-06 & 7.7e-06 \\ +5×5 and 3×3 stencil & 1.65e-05 & 1.74e-05 & 1.66e-05 \\ +6×6 and 3×3 stencil & 2.63e-05 & 3.13e-05 & 2.51e-05 \\ +8×8 and 3×3 stencil & 5.78e-05 & 6.87e-05 & 4.98e-05 \\ +12×12 and 3×3 stencil & 0.0001685 & 0.0001908 & 0.0001569 \\ +16×16 and 3×3 stencil & 0.0003276 & 0.0003711 & 0.0003072 \\ +\hline +3×3 and 2×2 conv & 9.9e-06 & 1.11e-05 & 9e-06 \\ +3×3 and 3×3 conv & 3.4e-05 & 2.43e-05 & 1.74e-05 \\ +3×3 and 4×4 conv & 6.5e-05 & 4.28e-05 & 2.78e-05 \\ +4×4 and 2×2 conv & 2.57e-05 & 1.94e-05 & 1.39e-05 \\ +4×4 and 4×4 conv & 0.0002652 & 7.74e-05 & 5.05e-05 \\ +5×5 and 2×2 conv & 3.58e-05 & 2.95e-05 & 2.12e-05 \\ +5×5 and 3×3 conv & 8.85e-05 & 6.66e-05 & 4.32e-05 \\ +5×5 and 4×4 conv & 0.0003726 & 0.000123 & 7.05e-05 \\ +6×6 and 2×2 conv & 5.09e-05 & 4.22e-05 & 2.77e-05 \\ +6×6 and 3×3 conv & 0.0002546 & 9.73e-05 & 6.03e-05 \\ +6×6 and 4×4 conv & 0.0005261 & 0.0001795 & 0.0001105 \\ +8×8 and 2×2 conv & 0.0001554 & 7.54e-05 & 4.64e-05 \\ +8×8 and 3×3 conv & 0.0004185 & 0.0001692 & 9.25e-05 \\ +8×8 and 4×4 conv & 0.0009788 & 0.0003259 & 0.0001787 \\ +\hline +qprod & 5.4e-06 & 6.2e-06 & 5.8e-06 \\ +\end{tabular} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/data-plots/utils/gen-latex-table.py b/src/dios-egraphs/Diospyros/data-plots/utils/gen-latex-table.py new file mode 100644 index 00000000..34aa58cf --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/utils/gen-latex-table.py @@ -0,0 +1,51 @@ +import sys +import csv +from collections import OrderedDict + +TOTAL = 10**7 + +csv_file_path = sys.argv[1] +out_path = sys.argv[2] + +headers = [] +group_data = OrderedDict() + +with open(csv_file_path) as csvfile: + csvreader = csv.reader(csvfile) + + for i, row in enumerate(csvreader): + assert len(row) == 5 + if i == 0: + for header in row[2:]: + headers.append(header + " avg (s)") + continue + + group, bench_name, base_time, slp_time, diospyros_time = row + if group not in group_data: + group_data[group] = OrderedDict() + group_data[group][str(bench_name)] = [float(base_time) / TOTAL, + float(slp_time) / TOTAL, float(diospyros_time) / TOTAL] + + +# https://tex.stackexchange.com/questions/631583/how-would-one-print-table-outputs-in-python-into-latex + +textabular = f"l|{'r'*len(headers)}" +texheader = " & " + " & ".join(headers) + "\\\\" +texdata = "" +for group in group_data: + texdata += "\\hline\n" + for label in group_data[group]: + cleaned_label = label + cleaned_label = cleaned_label.replace(r'-by-', '×') + cleaned_label = cleaned_label.replace('-', ' ') + texdata += f"{cleaned_label} & {' & '.join(map(str,group_data[group][label]))} \\\\\n" + +total_data = "\\begin{tabular}{"+textabular+"}" + "\n" + \ + texheader + "\n" + texdata + "\\end{tabular}" +print("\\begin{tabular}{"+textabular+"}") +print(texheader) +print(texdata, end="") +print("\\end{tabular}") + +with open(out_path, "w") as fp: + fp.write(total_data) diff --git a/src/dios-egraphs/Diospyros/data-plots/utils/merge-all-data.py b/src/dios-egraphs/Diospyros/data-plots/utils/merge-all-data.py new file mode 100644 index 00000000..ddbab7bd --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/utils/merge-all-data.py @@ -0,0 +1,35 @@ +import subprocess +from collections import OrderedDict +import csv +import glob +import sys + +OPTIONS = ["baseline", "slp", "opt"] + + +def main(): + bench_directory = sys.argv[1] + csv_path = sys.argv[2] + + print(f"{bench_directory} is the target benchmark directory to merge. Will ignore 'all-data.csv'.") + matching_bench_files = glob.glob(f"{bench_directory}/*.csv") + + with open(csv_path, "w+") as csv_wfile: + csvwriter = csv.writer(csv_wfile) + csvwriter.writerow( + ["Group", "Benchmark", "Baseline", "SLP", "Diospyros"]) + for bench_name in matching_bench_files: + stripped_bench_name = bench_name[bench_name.rindex( + "/") + 1:bench_name.rindex(".csv")] + if stripped_bench_name != "all-data": + print(f"Handling {stripped_bench_name}.") + with open(bench_name, "r+") as csv_rfile: + csvreader = csv.reader(csv_rfile) + + for i, row in enumerate(csvreader): + if i == 0: + continue + csvwriter.writerow([stripped_bench_name] + row) + + +main() diff --git a/src/dios-egraphs/Diospyros/data-plots/utils/order.py b/src/dios-egraphs/Diospyros/data-plots/utils/order.py new file mode 100644 index 00000000..f7826040 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/utils/order.py @@ -0,0 +1,35 @@ +import subprocess +from collections import OrderedDict +import csv +import glob +import sys + + +def main(): + # iterate over benchmark folder files, that end with .c + # for each of the files + # run each of {baseline, slp, opt} + # gather data and write them into dictionary + # write into a CSV with file name as the first item + csv_path = sys.argv[1] + + data = [] + header = None + with open(csv_path, "r") as csvfile: + csvreader = csv.reader(csvfile) + # read first row + for i, row in enumerate(csvreader): + if i == 0: + header = row + continue + data.append(row) + assert header != None + data = sorted(data, key=lambda tup: tup[0]) + with open(csv_path, "w+") as csvfile: + csvwriter = csv.writer(csvfile) + csvwriter.writerow(header) + for row in data: + csvwriter.writerow(row) + + +main() diff --git a/src/dios-egraphs/Diospyros/data-plots/utils/plot-base-slp-dios.py b/src/dios-egraphs/Diospyros/data-plots/utils/plot-base-slp-dios.py new file mode 100644 index 00000000..d9a72bb5 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/utils/plot-base-slp-dios.py @@ -0,0 +1,69 @@ +import matplotlib.pyplot as plt +from matplotlib import colors as mcolors +import numpy as np +import csv +import sys + + +def plot(): + csv_file_path = sys.argv[1] + out_name = sys.argv[2] + + names = [] + baseline = [] + slp = [] + diospyros = [] + + with open(csv_file_path) as csvfile: + csvreader = csv.reader(csvfile) + + for i, row in enumerate(csvreader): + assert len(row) == 4 + if i == 0: + continue + + name = row[0] + name = name.replace(r'-by-', '×') + name = name.replace('-', ' ') + names.append(name) + baseline.append(1.0) + slp.append(float(row[1]) / float(row[2])) + diospyros.append(float(row[1]) / float(row[3])) + + # data to plot + n_groups = len(names) + + # create plot + fig, ax = plt.subplots() + index = np.arange(n_groups) + bar_width = 0.25 + opacity = 0.8 + + rects1 = plt.bar(index, baseline, bar_width, + alpha=opacity, + color='xkcd:baby blue', + label='Baseline') + + rects2 = plt.bar(index + bar_width, slp, bar_width, + alpha=opacity, + color='xkcd:deep sky blue', + label='SLP') + + rects2 = plt.bar(index + 2 * bar_width, diospyros, bar_width, + alpha=opacity, + color='xkcd:vibrant blue', + label='Diospyros') + + plt.xlabel('Benchmark') + plt.ylabel('Speedup') + plt.title('Speedup from Baseline for SLP and Diospyros Vectorization') + plt.xticks(index + 1.1 * bar_width, names) + plt.xticks(rotation=30, ha='right') + plt.legend() + + plt.tight_layout() + + plt.savefig(f"{out_name}.png") + + +plot() diff --git a/src/dios-egraphs/Diospyros/data-plots/utils/run-all-benchmarks.sh b/src/dios-egraphs/Diospyros/data-plots/utils/run-all-benchmarks.sh new file mode 100644 index 00000000..082792ea --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/utils/run-all-benchmarks.sh @@ -0,0 +1,5 @@ +benchtypes=( conv qr-decomp mat-mul q-prod stencil ) +for name in "${benchtypes[@]}" +do + python3 run-benchmarks.py ../../benchmarks/$name/ ../data/$name-data.csv +done \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/data-plots/utils/run-benchmarks.py b/src/dios-egraphs/Diospyros/data-plots/utils/run-benchmarks.py new file mode 100644 index 00000000..7c6ed1e8 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/utils/run-benchmarks.py @@ -0,0 +1,54 @@ +import subprocess +from collections import OrderedDict +import csv +import glob +import sys + +OPTIONS = ["baseline", "slp", "opt"] + + +def main(): + # iterate over benchmark folder files, that end with .c + # for each of the files + # run each of {baseline, slp, opt} + # gather data and write them into dictionary + # write into a CSV with file name as the first item + bench_directory = sys.argv[1] + csv_path = sys.argv[2] + + print(f"{bench_directory} is the target benchmark directory.") + matching_bench_files = glob.glob(f"{bench_directory}*.c") + data_dict = OrderedDict() + + for bench_name in matching_bench_files: + stripped_bench_name = bench_name.replace('..', '')[1:] + print(f"{stripped_bench_name} is being run.") + results = [] + + for option in OPTIONS: + option_name = "run-" + option + subprocess.run( + ["make", "-C", "../..", f"{option_name}", f"test=./{stripped_bench_name}"]) + with open("../../data.txt", "r") as fp: + file_contents = fp.read() + data = float(file_contents.strip()) + results.append(data) + print("Deleting data.txt.") + subprocess.run(["rm", "../../data.txt"]) + + print(results) + further_stripped_name = stripped_bench_name[stripped_bench_name.rindex( + "/") + 1:stripped_bench_name.rindex(".c")] + data_dict[further_stripped_name] = results + + print(f"Writing to {csv_path}.") + with open(csv_path, "w+") as csvfile: + csvwriter = csv.writer(csvfile) + # write first row + csvwriter.writerow(["Benchmark", "Baseline", "SLP", "Diospyros"]) + for bench_name, bench_results in data_dict.items(): + csvwriter.writerow([bench_name] + bench_results) + print("Finished data collection.") + + +main() diff --git a/src/dios-egraphs/Diospyros/plot-utilities/plot.py b/src/dios-egraphs/Diospyros/plot-utilities/plot.py deleted file mode 100644 index 5392f961..00000000 --- a/src/dios-egraphs/Diospyros/plot-utilities/plot.py +++ /dev/null @@ -1,6 +0,0 @@ -import matplotlib as plt -import sys - - -def plot(): - csv_file = sys.argv[1] diff --git a/src/dios-egraphs/Diospyros/stencil-2d-results.txt b/src/dios-egraphs/Diospyros/stencil-2d-results.txt deleted file mode 100644 index 2e53f347..00000000 --- a/src/dios-egraphs/Diospyros/stencil-2d-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -stencil2d opt: 288115 milliseconds elapsed over 1000000000 iterations total -stencil2d baseline: 355675 milliseconds elapsed over 1000000000 iterations total - -conv2d opt: 43573 milliseconds elapsed over 1000000000 iterations total -conv2d baseline: 66823 milliseconds elapsed over 1000000000 iterations total - -qprod opt: 5684 milliseconds elapsed over 1000000000 iterations total -qprod baselibe: 5974 milliseconds elapsed over 1000000000 iterations total From 11c580045b0cb2bf97bb0304f7ee6fd54e0715ad Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Mon, 24 Jul 2023 03:18:12 -0400 Subject: [PATCH 130/143] add all plots --- .../Diospyros/data-plots/plots/conv.png | Bin 0 -> 31923 bytes .../Diospyros/data-plots/plots/conv.png.png | Bin 32212 -> 0 bytes .../Diospyros/data-plots/plots/qr-decomp.png | Bin 0 -> 27590 bytes ...-latex-table.py => gen-latex-table-all.py} | 0 .../{plot-base-slp-dios.py => plot-all.py} | 18 ++++++++++++------ .../utils/{order.py => sort-csv-rows.py} | 0 6 files changed, 12 insertions(+), 6 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/data-plots/plots/conv.png delete mode 100644 src/dios-egraphs/Diospyros/data-plots/plots/conv.png.png create mode 100644 src/dios-egraphs/Diospyros/data-plots/plots/qr-decomp.png rename src/dios-egraphs/Diospyros/data-plots/utils/{gen-latex-table.py => gen-latex-table-all.py} (100%) rename src/dios-egraphs/Diospyros/data-plots/utils/{plot-base-slp-dios.py => plot-all.py} (82%) rename src/dios-egraphs/Diospyros/data-plots/utils/{order.py => sort-csv-rows.py} (100%) diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/conv.png b/src/dios-egraphs/Diospyros/data-plots/plots/conv.png new file mode 100644 index 0000000000000000000000000000000000000000..d5fe2a3c8a58b3c9a1e4ffd9b814ce62c1ecbbca GIT binary patch literal 31923 zcmd432{_m7+AjQ~xeSR!iBdwzScwdkAxariks*|+l3C_7nTN_)nKCPx$0SOKiil*$ z5JDuBDbhY~&-1MJ-D`jQ+wb@7eH`Dnj$<8bJrDl>-|xP!`?}8SJkRU)Q9E{se&za= z6h+Z1E6Hn66s-qE(R3|eihr}azP%U!BVnhYWp~`t#O|`8jWKo9(9Y_jrQJnyqfHLR zHn!%Lm&62j3knNtGPAR@vXvAPviR382wK{h3dy=^ZpVkLuu{^tr6@*2@?V-1*<^Ey za^0jXe?ZeQ^4q7&nk=SmOGhspIN;1&L3=IEpyH#8&k`4fv+Ihps$Q-S_{DR0mq_KB zqay0p*9T->80OhCOn>%J=DLeY>R*5V?k;9Nxu0g8gF!~81jnZq{^nY-VwdkBrn|Vg zH*w=X@rFhNlsx_uQldqzVPaxpXJ_>w|E%b_WE1}J_H9ENCH&*LbN{=*LsyG z$)-)O-KR%4&&|zs_4Y0fP`v)V^iRho37b_{u3RZBE*_#2J2xA>*P1r(eABhi(6zhP z)7M5zU%16lqXB`=7babbbDO&3WRhVND3XWY2E1cT`_ZsLRmPq+?MJ-+u@T3*$SUM7Qney&LO< z^w0IZezk^v{Vp|qKOdhJ7cDHh`}%H1OS>2~r|WqJhM&zdf4>xe;qC9QVjDT~skr>j z8#>i!srsR8(>D=U={M{du#axx;We#Glk#NP|Lbz^E2do+Kd!!X=@Of;0i~;}Yc)G* z!6EI;V%uL^G1^^DzfnDJa^#a=zD3*ToH`jXyAZpf*1R33bIoWdZ*T9i&MSWcy34(K zzP!CZ{pG$TU8G=QxX>AH)#%|r11nanc>eaj_}FWq(%371=Cz+a9cs>qKi~8)Gc$94 z=QFqNk&(dJsqZ~li3kbXZAnM(ho#O4p3YtV;ll@SU*8wCQTs&Pe%TFwDqiQs$mOXR zU;pFLtJ4a8tP1=0Q`9C&$@R?{XR4mq4`sT}*w~GwihzJy|?>%aB#4Gxb)ddy6Ksj2YSU0&QU_t&*2tk zy(e~8i;9Xm`sov*q#uP}P!gbQ9}K=vF0ZX+;5`=UQBqP;Ya7HVEvk~j@wq)|n@OwCYIFOiPa?KoRQie{=XoM;-SS9C*cx&7u5pWKYp(gC4I4J3 z=@yAL(OUIX26zVsDo%}j$`QCOW8Qf-DJe;K_wKioeH+dfwOo}=w0Usr*6NU*I(wRg zp3p8?IUx1HY!};}i)<&;v}lBcgnD0xGluRsRe5#MG7S}%sS&-O-`dt8$Zr%CNz{bx z^y_q=-C5z7m6a7CYP?cPTH5f^C5BU{PWf-#$BKW+Upd1cu4t}OSG4Fmlv128pBbZDEsK% zdHYtq(8`c*qa;gRjBNAW(#h4GGe7#(OWobHPM?19{yp=R-xKSoF8uw0iLXsK(&X8P zJ4$9BJ9Q#ODyiW3;IC zTPGB!Q*g=3%4$FUph4F}Nolz+>&|;$4ZnzYdVTLHXOvw`-SG5Gk~pTT%kgab^D51B zU3Lcths^BkgP)$*XIXa2M2+PpuXUaMRxmx&AEO>8zarxPeRAaXT7Q=Jow@WelLqf0 zhn%C9t}BstrgXmKOi*enKaQ`LsBsOJXo+N--uZs2qtJR0dFuH1Edo1ts>dtRU;OlB zcZF+zT})86@#_~24Qm(}7*aG6SCa6S%CP9f8pR`NWaj1F3=U>i4CL@Rm7&ky)0Abj5< z%+qC8ty*O_@Q%i1a#(r7tFk6^M`)c6v|T zvG1~AO}J1HlK1nL7IvKT^6Fq-ny2n(uybcyvW;u{O~>;kZNJdn6Srj0(b3V;(TT_V z?REZ{QY+dqsXhCp$k8%-&Fwj%8Jp7IlRAwLPpmq2>=?;eSh~ekX;pq0eU#lWZbJixI|I zR^118UHCv(6~uKa=?d!D`Not7ELyd-wIA~>HXxe3kmwDtCY1gaC30fC7&g8ryShmI ziJkf4Wy{XiMoJVqPdu!ZVEsKiRW;uCy8f|_IC)H*O@-HCJ3D75lKUr~rs|hCFQ=oE zuZxzhPf!llyduHKF1i@sGX2?4X!XX8^j#BP@9^>O3a!t&&W%+S+Vrht-Ff;3Gw-o3 zWPVAn4Ad3RlaEh7I1(;6(3r|heu!8#sPJ9Ow);HGQbtbyOP`)}-?w~r6&3SR=QH{Z z8}46z)`1i&c$FYyGorWcKm6NAB#BuG{dT&{oYYR9)geszb@gqSUg_DZhyjS;T$k z%Mky`G&YCt9RVdSQ`;3zpIp9T1&v1mXRXWF=OwH{`u^#9#YSZwG$_6=GfL-Io;h=d zL)M*R?T(X;>{k#w zCQWsPpK*Wg&9tSfzdxD94(GKh>ul9KL`amA18O9|&+_HV%TTsNEIXv6TqfyC|IF|6 zX!csXjE?Qr=N}UolvY7+vtdte}9#Nra_)AUeAsqn>* z!Xf;}mjYfqe<0^&HQ2zT`B0Pp?L>2l%Vhzbe7Y<1GrXSk8YDA^7HfA_BjZQe|hT{x2deerS(B<$vY4GH;g5NGIn9exOy;=Qh7{^}ZgEGj<=s?quL+jn315Q4?7 z3Ea4EtDvA@MIp7=BT~vqgWciBm!s9;LNU3Po%-w_GS0~IYo^@78PPKRg#+as6%~aI z_4ZaWFo=zahQBX0I?Q&w(n2(rA7_bZ&i#U5$eIosa$dEUI~lbV{U5+kE$>S$$^ z*;^fa$kg$?#gxhH`8L$3XOca6Sck~SNYT9Z#^JUCp{C}OSw;%{YOx+tPUG_ISWyYY z$Zh1W#8)kO7n%zM4-LJzxys1Ua5I&FHx{+&RcgsJye@y}P%>MA`|n>G`4?LXYE_Hp zcAQFAa&#<2k*rab{k`?emoFNb2G2G7ABEn!wFzKEvK%>W8?Z~G_V-qcn(AuhXc;%W zYoT>KbqiQ^M>zmI-_4b4@8ICmWuLnIl!^dDdV<$i}xk&ZMo+0Vy@UR+WmmByxiQtEa9k0)rHnQc9k1M zjBaRYYb$GNhIr2v2wGTJ^x;5C_9R`tpOcewWXvgq6X01RMQwAcc1}{Q)wljp#FV^z zFQ5v`jvYISNcwl|sR~+m{P^)A`=KyzZ=mE;)GM5O4S^jq{r;h$p~q&;uy)7Zym^Bd zz^Z=lkj4t!1`OseZ}^pVbY9oY%@)1S0okp{0=|JSFY?rA^FDohxQ7k2S zlNi~ADP#bC203jMbyTgIvw(mf+uGvsfs&;jd4Ec+dlaA8&~xlvUF`7f1@cj*!?)7} zQp*1R$*dT}`B*Ia=FOYqe_p*(A|(umO+iJ44hMsds^NULai2qi^YmJZAah`Cv>e*W z0eZT+C=t~xUTfc9KxUK-8v5|qk5@IC?^K~lh0XWQ(x4pEx)+_DT!DdsHKQxnZjV(C z-d5xAD?ielAFvij+mBP)d6R$u9SWEN9!o9L;K1DYYa#a9hkbp0OH55o9UUDF-o+jm z{rU4HzS_{-{O*@sNI5l5rT{#SNUlhcYKezKH5qx`f7TxYfK=mbMm8+F(w5**GBNT= zr^UphIY!#JCZzi3g(J1Ct?Q`{=iNi3c|#r@ za7rF95?EW(;?|t)_m&(}G^hK4i94Oyep`tCD2kbrIwdA1rit45DdxMj&k_T{6M5%P zr0Ene03Vf)^;C%%zF3S8c~MuVI-}OOT|4`n@8FTp?KgpDZg_cZk&@!T5w5C}{X>Tx zi7J(G`$bKEPnbk`?#4PE712-CNVKagcKWeRMMXtpnBZOK@6Ge+&B5B7oSc$DmK{%5 zY%(^%L$CpJN_GOLf{ml60jJlys5>pdg3(O>1;r4NS*_*4dLh_~*k9C-I&5vzcf z$7VUtuG#OU*wswY4xB>2$!~3>6Gfm~IXIk@iOB8Oc0z#s9H`fN>L@baU&jdGF*jPy zDA{j_u9lwr+8v%FNoM^x55~V}mx+suM^&}b$`^BH8x@)7ZKgs5_0|V*$sV&_{`Kv* zE{nr>=d!Z>B>V|@@Mjmh#jDCm$FW!VeZLb<3@bS~^S@`UfB&-AEwnn&zfAQsDyAnQ zdlPzGK$2@YIb!*jKHdA=eyr=0#REOd)RN|(W1LGpQd0OR$0768;+7pMIp1>Y7L_3z z8dzA6AbfDH`VO0<9UlXiOypOoPS>O0Mm(_n$2J3h2(>@4U$u;pbMfdCK1Tp%=gHx_ zM(%^Z+uwaT_UtV%{we@%k4KMo0NHt>>LnFhz0>b~AYC_x+;?u}z?1e{niqef^G4UILR!a(JOop<;aec#`?+48wkfCkTU6SXh|Q8vXr8QGJoEDFJg}i>k1HV{gSf4Zgg&t5tC6)4n>cM<6wxmXsh1 zT`~Cl(x-;=b?>P^+jw{$Agu!XU3~cC-Mm%ZC3bDo4SOz@y}2tk))#&zX`o=D>-lv8 z_P#bf^18WdU7Xs3gRkz33xx@2$Jx@Tozy6I;om7h1t^7XKS4c}S$w-4#Z$xIW#a1z z&hKSa0V!KW{oNJI*jxzJ5JIe4Wk#RpW(4bsblGp+`BiS@X5;k zzGqIeJ4yeFEqU?-5hUrg=kw>!tJkl;mnwAGO<75a8tv2%K?~P|1{E*25l@!vAO0aU zzSYRMAwhYsP4Dva&FKMLSLT?`xJ|4*9KuJ#bk)o2P`>+ta}_qh1>N^inEdBvehifq z$K-Z+X%C|7I5_sXBJ0v8$*3t~Mfc^~k{3>&hE6zpXheiJU>-X6@yDl5uiJ4_;BEeq zXl5!cEp4>^5Lb76kjz2!V1UUU;JVJfxUoV~j~V&ns3~J3LhW`)NP?}%Et1adhdcq# zjj$yj5hqU=bxiE;@6b$H?McUejbtl8nH0^`HA$*5(dny}6L5_e?7?$t71{8R42M&? zy)K+jomO34eejnqkmfSLuwNfaJW}1h?Ay1myrDt#ZSw^qqdTb^7nKdwO`w*<<542^ zS{wQH;}rV=tQex^Na`)$shz!5ujP3Bm%88npEq~uq6czIoqzH!T4(%A`Vc8QKQ^N^ z4G7pE9%ueO=`fcPs`yL9W9XT_Y986NRs>sniHmrXZKVD)*Hn(yckYe=f~uyGjh9MJ zf35u1oD~z|#@pmeIAp=Fxcpx;zNqaZl_5`T{D}?$iUf zPt~@S04AWBB;S`fjveIZ%mXdK$jr=~xFH73Tv=ly%lo9GN3x=PygR-N#)htay%TFl z_+#VOVS{t83az?nfj?92GNag$`s7_*U46UFOBi!B&VRcy{841_k|oCH-K(2*#^ubB zk`G(^Dl77A+eVIiq21sb^tm)TI&HhEEb8Ny1airy|BwVnIrgJYw*1p4JqfO{w-$sO z1|+!ZonRKvYhzPy4$;y|77&cRP#P-l;GBnjZ$uUsb> z8F%DyuG!S28xHhdm&uF1&;CeW9`VO=4}(Ih$VI6VR$){vc0AGxJ6A(&Gw2Q zd!(6N7OgBB#4j~U_|K1a%YmX?WB;|$>B5F&9B-0yh{BA1inF7m!`siV{PvbZna&gE!T#^)|7MOr zXL$DPndGM=>YY;#h{5KM1(sY7Ph|uGH^!>m6(y*N&;(!rdJyhlTKML{n6XJY3Zp`J zAwITwxxcoI}alLYe?S~m(DoE6&V#pFow$gJ@iPia`*RGSRq3S=oYRp6^4N{&m4a?u99_J0& zeqtrs25>%_?u)66RpS}YeywEQrm`RU1U_mR_*XPW1SO%LHwbH2=6Mu0i{u)MMmq)d z34M36UA3X9>6HGIKr|TnV^NYf5vqL2N|7>d>w!J?actapTz8a(lQR&Xy}iO!N=mAu z_J!atnOzv%Pso!T5)MGzYP+2MQRL2A6qHa^mrdVHDN>|_PRkkDt74g2c6 zy_Xxe1^D==k1Pg&y@pPhP_eED3le~ADJLGWqJ}P~`19jM&Q==Ww3;E!ivXL`NY<=c&^Vt8cBl$pfG+5cIKr&&3qG-N*4) zprbc*x=n4w5u#&cWYo;i-$+tBit0^C`>zJ*7heVQGP?g70ea!~@V-rId44jE#4r1<+g=udV-ALyY)ykL}_Tc9^KuC#nTzb(&oWJn=j&r+qu z1pR13f#sR8$w}4=7cPj1i7|r(VV7|&lyaR`Gu55TXbVQwKT>^%r=q=GS7y3vQ44Ys zaDCy}A*FNY5^AR*6@{(eecl^=9D#O&oXb73-7mElZbs~dqI4^P{C-(quZ~UOE57{w zSk@eZ>M(epmo+tqQ4C+Uwds)eK9#PkURkXqF5KPSE!mUU^i9q=?EbynCs&!6T1s3B z8q@W3keFV6_;B)c!KIyGY#Z5mA4i%?xK3xHq0~gd=LI#tuynSz%n&A{Mug?(hXlFv zO0kZ{=Vz*UczCJ-uM;2%*0>mfxaDs>RD9I!$6Im!FSlD~ZO~h3fbta)7Y_$F{|a@t zVK{&AsO2*gY$U&Sb^;*397@F9ue&TA7H#3iA>xLJWMpY6G&9~83{YJSU~X4AGngEc z05XMa2Vq1|)Y!#L{A>GfJN(vv>ZlGG715^1!NGA31$`VqPxHr{d%KTNukdslQd9Z% z{Np1Qg6RD#{n@vG%r`oFb_@3Pb5G9}0B;q0ck`KUFRsNNq`PYB-?C+kG0?;3!9jl1 z$U8V0sV5$V5luo;&x>d4)>q=4u3M0`jZ93ofsJl>>SQZn|20Ik7Gao($2>fBsJiVd z;Oyt2p&dAwM&{=HVA(^q910vb=hOx&FJ>92uVX)?gI$YC=YKsK*X#QBZRe3N0p*F} zEsuxI-55&h`rVy7w`d>T5_e6EsoDV95?ksE-iJ-nSHCyESx&x1uC2%tjr9|u?7NT* zWM+DqZ`{0@SZH%k>6;@1WNY=MKFi&IPn>GZI1^LB)u$x2?3TATUlfXHmihZ_psg66 zKYz|&E4!tJe?itxJnrbY7kjkS#l=M>axarbYp#gh);N`J36HI+<^iAJ(BhB5T#tsaDadDdZMx zeS%|^G!j76^~!D>11o#`qt^S%e;U!GaJQB}Lyssx`FMK|M2Wi8unEy_M%fY-UHDht z{&cJM6ZL*eIXaeDwDT{ksy0Y29lUz?u6B}OCnzgtE(o8inZQOe5)xkS&}M1SazLQV zz$wvfyom=|zN(BvUT1DCpBe?#d00Tw>%GIA8))3ZqM|HmA8)?4OzO>MK!?!Jb=B3? z@2r(nQ3kC+MS+vrgqGMdP!2wUYrI^2t6Od7AvMuQ-uS8PG9V5Cy-eBL`&9%!z|b*5 z__CqP8m8NuGpVhAze6=2YJCqn<5Qd$cCEkoG4r{B6V`~%XWi9aE1e9be8G&wQ`}LZ3!DOBumGTo<^HFM3zF>{hk{eo9#A_R=d{sK5uJ zrJUY9Jh9_?AJQMugrA40!p8tXm|urzPf#gH9Re(<4B`qxqeEx{O88RyUQ&yRat~!; zn`(5LZ9y>Dod<^Ho|#!$SBX#HRJwmKuj=+XE=jxPpqY@Zmgwr~i5R_HPFT&myfd4- zU=o1cflw5Gi%fukpxP3|jw*&jmn=Fp@yz=B+;K5hkC1P1adD)Fa&&gy?zX=5<3|HD zV1^PPUk8<(*$!8Mtv3RcJrNiWIU4sh!%7f)hHn2+#m(f&cp5hn+4# zx+l5-k(q$+6L<8O#<+PRgV9p326zGid?rdOc)>PNY;>cC3%;HdT|8U`sgPF0)%21^|<+ z{k2i$C`f8?@;-K^D(*Y>SS)*sr*YO>3M0+>(bdB2Hx%Qq!ux@4#Rn-d7S&^V?nhl{ zf1bg$feLjaIoUL(Plo;H#~UIz(&yVRU(R`;`p5r`WB5j3 z3nGDL+7F!s99s=ZeROQ>`MNW%R{_}3sBMf_40w^E9+K z&M4y;6MhgGDVHQKU3A3OU*512?G36-1=b0y@gis{TR;6E@D}+l2(1jUouI|7`q5vP zBe)AoC0Hr#5t0}!dnFL5-Uw%d97BpkRtk{4=a(Lstk$ht2ZE-n&9YPhxWpO_D9)OR z!bNWGINbmjYU142#?*IayCkomvU>&^TOlW8f>g$?uHLZWQMl7-EiH=r{5gASR2<)U z={c=h5b=(nsorq~`nIUu*6XhUeQ^kVfo`97ba3JnR6>ljh9ax-_B*T|(;&qSEi9tC zOT3YOey(4&U%DokH^?TU0eKid7q{Dbf;c#E{=AOf-}3-^{KwKOvfEEQyaB!<$%T{H z!SeEU1L#;mVj}c45W$-#@9=3wZ1^}(KN3{BZCBX&hRuArafRuY&606=uj%qR%V&ibE5-` z1B1O_(7Na0K_N2vuK;7WM10JI|Lgj|z<^!Q_pe`l5zl2E9Wp})iJ@+V{*p9a?AfXL zGY<|fAs-15s+_c@P`?r--$FbUhN|8-h9|YrW15DBZ1=Ts1t6~iYMsH^v)6H^j6S4k zLxOoM+JUaudU9Ah%G_`5jPDJ14?GJOkFD*Giz=bNq1mgK zINO6uTef-gX78XNFHo&Z>DWAcn0dGNxDmI>WSgaZ5;14!$ z-MScU(VK?8b6;vXTB_UIIcR8Tpg}9yM#5B4P%9_@Gtw8Qr*Mp8zx2#-PTeRnCecHn z3LluA{APImyf1j?^^)s}myltErZE`lIu^t`$qj%L>dD785_K40(lbyIo0)JO>kc)y z<{cA`rFX+kFOUE0^Y||?@$Wy@l#8!7DEFlI3`C&~8Oq`+<4AYzEWD`Kk<}^cJ|uyy zC#Q@e)c9UW$v7a20odZu8yXso|GNAN`LVmJE4KNmyu8E-XPHj1A9qz2lLQzHJ-Hj~ z`SMk(%0a}w1A*qxA+hH1>AZsn57JU)h-5(YrN{2&1Cedz;bE6VE~|GrOQY2C0xJi! z!Kab1oP;#i-UzY@v5b()wW)7&S67#T>vXplR@DO*oTUsL-bm(sGkBQz(|PAf&4NdX zP>T1(Em2hAc!16Eb8?LPHS8y_7a?A;d=$mNvDX7_>}Qk(NeQT&H-T$uDe^S{x6t)g z5ZVZC9R%`CRNn{4HV~aW;DdpwCJ;Nr|AUlZG)DBmS5P`1047YM^yL`k!60h9a@Bs( z2z0Eg(M*8()bs5JtnMBfB5F`1e48i?&vA-HP)n%&SQjKiF9apw5SOo9NxU!*>diu& zbW7bg5|hCm%MO1OmL3Eso9MaaR4lp(g1o?xgpT#}^kf-Ttb}Ln6gw+3^K~RnBXFeC zumGsTV>UfI+XIFAW3D;#wQJWlaYL0^g2sl1g6OYs@uC1(3al%-U1p>-?Q8~wisdpZt7Z>tC{GMj8qra zf;=pA5`TOu4Mj|PutEU<(-89lYWBOVvxf+oPpLh1w027uMJAd;hCipq9hcL5YzV~~ zMMm9MmLllr&Ye4ihC&b%jS`%h8GK#@H6k+)y(xeD_9*{66^p2?`B^sPcRc+}X&?X; zt)ttk0w7w+*So~TQfbBAZ`>_A!- zE!qnN6sMG<2Y83iXmE)isWsoik9e$zS*{YT-afS7Uf{cXV0a*$C4pqXg~YD}L|TDF zPaq}^un{Uwp~JU(wN4(bgLP5+yh!Z^=|Y?;fV?5rkl_t4T<{|#pxdvXPW?lCpvuYD zfdcJ3o*_1>E8xnZTxZ z2t86JtcGvyi!*}Ay^)$~?mM<~=T10Z7^K{Ov4G9I2|pM(m>Vdse02~7*?~IbL4^ui zb?ql=J3ybtm+rwqA9NbAfXZgD3P23=K->FOpz7tz#q;y?N{~_WRv&$f08j&SmV3Tw zC9DBAyt{DB6ahzRo{k7v0S@x%J=sP27sO4XmLMv`W^;LYx{cTLi7N}5H6j8;xK30* zlEO_elj!-0PPz1Oge{!pYl%pN=Al#KQmOmz)gU&39E{yE#d{|m`l9F>1HQ#yy2xcO zW;gqk07HjGa*8_uBoT8EGZGK@$^s|K!QQU;2urxveQqyt#lbtpCdq&rn2(vO@y3TF&orR5vw%~ z$%Tc@UZNU9E4_fCAB}R9alC+b0Pm>d&jBWn=HFe$KUs|v1WC)n$moR}Jl@-_1G*ra zvDDsX^|y@zxeA;5ToLkCwzg+Q(7(3$E-_$;t2a}Fzl0pKv9U3<;m!soCQF%_qkT~E zTKw=EAmD3kEAe=I?+b^n{{5K$bl?A8F#jt*9zC`R)umeV`-Z6x{XxG?!j39?XEOj! z6NK~scw`_#;o1DJ3uuthJl4EZ#{+oVfh(YeL%R&#pBvDl*(6uuqt#amP!j^cyg)rw zLZx%GC^B+YgbF!)6*z$&@kk88sHF%398geV4}KiNlZG@o3K>Wv$*v_z0h;E73v)nh zAnSXX9DsI52H2@*(s`mpR82jGBry=rX!;edw5MI ziaW<%D+)a4etaW@RQn{*d@#T|IUM4_lWhUNqe@N?N>7v_j_=ii0&%oc;c?SjNN2*r z>fsC)?mCjzAYBd$dvNe^-)=m!ye9QkBMuVg3hWW46zL&wV_*a>F>$(WWxem{7Xb_rc12@It_h09j&KX+~`i~~L- z-=Wl!9*4MgiTQ?``^CF4@{2!xT_~EL-#fWXYa^e7dQi(TIZEwUQ(H}T7`>`t)58-> zkKtv`?QNQUHXE05w)C#C+?s}UtR56RXxE5PgpIBdCKe2|D`p?k?$QJ=#0nQ9%_3rs zh>Ftl?FKUpz$CoTJ;tS#)2=RqLzf_UDaXD=kaLAx8HHZVP7s?(oHy{4LpTus`;E`iEH?msBDo|Mk4R8HI*^W4 z5%@ftjaY3!?M@d?vLtZ|eyhyGLn zrv5Wme(#0|y@KgEan8cf6e(f56fLJ)Ov=Jj8Rsq`4rc(&P2ALI$HoJ|1E>4PP!dV| zhCU-?VW=t9x1QAlG(ydvI>PaQBRgB^e*nTB4Fxb0A?>mUm?^Z|3F~Tve@mdpm+*Ta zI6$>;4}x?2cDFGTQ)1G5`(C+2o4D7CwbEu1A>hwG`>!{E8Xur6)?^}yl{Gc3MIb0b z<89bohkVeOrp*kvNjh0rNrM185+vN=dj7r(l_lX|(eFbnvotSD&~X z3evt~MKkeYDs|$pkoI$s%E`T(1S&Etd_YS9WUpfX7f~|52DT7c3lhWNJ>ndH2Z;cL z@@5d@aFD%3`vj~JnC5>B-IeAg*((3WfF)gj&}I-v@ZIQn7!U@U9<3wvF5Co~DQYzP z9ESJ9sQ5GD4&rCvcNR4l1CJ9qHsqTD{MDc;k^UB~{vjx4of5y{rMO4Gki9#&glL}+ zwG{?%O0Orp3EKRSz?h939HSUy*szBKt9Cm&i&`vvW!{w>dP2wD=B>N3PvMbf>NV`x zqHQE;H^{PjIJ%v>#V9kqsRFgw7DVp`G9SmJ{=tyOyLqEb45FrD6rGG|3KvNYagiSD zu2O8$&bx?u0fcx!-U=iqao`aTH)sm_jX{4OFj``jh9D&j<`q7sSPYWz=@tqTBoASg zO_Gy5r9OFaLQeu6d;9nt(9~qXz=gp8$RiIkpaL4KZYEu;9QqV*$;$3(H zp%I$JR`4k+gsEV53hyIT%~1p z8HHD?B55JQpZo?%%4KLVtucg#xurom@WoXq()1yq1HAbzpkF2hX0k3tG^@(~z`?=@uxvQM zhJ1SSYh8et-D2(&1~k*M&+o!Sr9oiweEE-oAPvXd2eALCw4p&?c%vgxz> z+Q>gWJzWO98L3(k8eJCLj&fBCOS=P0E9@|=r4k`b^JnVHpJXvt&_Tq^juyr&_8fVV zUsci`6d~~dWfR#t;3I=h(Ya9W0i;&#r{H%d3woSR)^1?+*AI^`c#uDn*Y@)Apdo`K zz&Pcw$EjhQhaw{YStf7S6F&mH2o3DvWYKv^+Jy{F5iD*5_e!eFvZv4Fl+G9UlgF%4 z)}lU7ln^@QRpv+4vy&oUZK>x2&9X_dlGL=)iEiVj zf@ugr!gd2{kGf#_j(C3F&kjdIO2F&?*LHCsME(b9H!#-c z72nfG@rDkfD)N;#{*f^LU6=T;MbPl_dwd(e*15UxzQCGGtoV2M{l6IY{#17BeAdtc z7xG+sAu;_A^WhNQ)fvidw=@NOlarUA`7eHf{t^{=3C@hCJlr*GWbiyBQ7e`?m64H# z8C&{4Z7dy`mK$qLJo_&5VD_V>Ny{zLh>KqTj}?-ZwCbY3UM!efwBeKSpMfp0g9e)&mJ-bThjymjZK)IJkcsvkEJZD zS9`!NV>~2UpntnSrnBKlye~(KkG4i~n85jJ8sQ_S8}BIWl%PzM15VpeUu%r`q?Wz& z*rpOe(nsHShzXv7cu;ofQVigZkSa03wH&>} zUx4HnBVI%j>JafKNudwgMGHs zKaC^MUcnAeyqkaH)X=JTG&X+tZeoEXvr_E(D4@E}TqT;!K{`M)ek2v>?M+cnVK8`Y zQz>7q6Qk<5_D`+T*2)qW2_gN6l2d{05q)|RKSLF`hPFy;f0ViV`c7Nxp`+S`kIxCN zYU8!l{@EmRH}UaW`HtVGHR5l-Ii@U0peB0rE?`Eu5tBq+VI?JX#|r<;@}6*YaNxbJ zjUY$Rul3shO&=1mr?FWQibHDbQ|Ky{pGz^Jc-$ z4-j)U>_w!#B?Sju(@QeQpv#7qZ^4f;0si06oOAqbDvFs1IW&(mW6`;0Vkj)}=cc}Y%+__Ek^ftIUF zHl=>ew>@7S7sf&5Bq;2}>Mc@JWereVgnKz|+*mAk$dEPE6ibk2T+JXrt)PVA7oLM& zNa=UJ2-I#2{(JEoLyUo**(7YX!pBDU?9@%|#whGbV;Ik)ENYjix!Y!9D}XXYLq$ZK zt222(9_zWl*E)Rh`I_-BhGO6TaLKy+qvza=k<^xa(>wosLWP`^)i5!Eb+vEs?iTV}%nwkzPSYETm zxf}8)JBb+-*u2nXDqXa88JDZaqGb$a5Qq4(Q+WPf@f3bPhwt%|oy5BjVz>f_E#yUG zWhKNsCJ@_ivN_(w&ExqWzc5z|5_>q9mn&f0(pPJV4??fBF0<9LvpXu;ll^j)Na*YFktO@MHc3D-1dV-$Bc~P zYI{Hz)xcS>`O(S88lHOa{UOmLLLs)n7y9`7!?E=66~qTyr3nJJmM#gZAfO_LwF{_2+HPfK-?D9=*n^Oy?CoJ*?6ot#UJH#A} zC?pu~es+ynfVNBIby@UBJWy}9a={74_JtSV; zFOK`(UbF>fkUO9;YtRPwVVXS*{%a)=bv&@S%F91I`FQ)$d%JGKb+r9GJgZ9R6^t>i zC?T_uP8U+?z3it>*IX`MOoP)x85L9j-iU1Uvg~`UVnJtA1Mw@!_r6z^4Mj&aBp?RX z%Xw;KCw#uIHUC*qov=uMEX?SaP^DI(AjlBjxRj>7&|1#v63jfrU`vL4nQr5T3vbL+ z!*ih0?;6`%TxDzYpHHcin3qGwD^$UXjt>3F;g)!*b!*r1AQOf{TEuiVI&ESA7R5wI zl+msn-Zx@?PyVU~y-SCepYD@i40%XcMC3VW4&qV+W5~2{eFF=>#$w>v6O|e2@k?>( z&s8`%Ho@bF0wj+I9K(G`T{w|7j;yS#5_|sh+H9>OOd}nq6dRUp5ry-=1$Iv~%8Mu* zO>+AQCKJkF$Rjc?0wNY|+s9{a`$#ms5qEMhsowZk8nFRf-kw}F;4RRe~Kr1xnYL{ z{~26-lz`6EW@g+NTfOMLSo8vSc z9%}~!V%!f1PTZQPp`E(;VSwY68 z>J{f>5K+nGO~htE98oMYM>tQ(@15-LC6#4Vcb`)h$~$K`J8)Ph?1V`d_==*rO76}C<9>F>$028j+xN#@< zpOUMvWpFQ;_}zXEs6WZsdL;+SGMV5vhRMRF_tou-b#ZT*k@cmWf8K(V#Yd@?%U?TG z?&!DpiivTqJd2!G70-(?;%z2ypk><)YDnP94xOq$fG!oSt*1dd+C?+2SaBPg4_}a& z#!ucl^3BJSRBprR@Cs=u@|$*U?x_Ix@oHWTxI`sD59cG9Zpd;QIEv1YO!cmVpN1^k z$JW-v;HN_Zb&t=kodk7$80m>j`)hjQ>6A#`043v^5#R<%1Q>h^bf9nqna`s~kH#PI z7w71fIG+IWBlQ(H`u&?*O%jcbGTgdCmKyK(?1&4T)Xp@xO1=d}I1#QjV$1JiJ=N;6 zg!gjyizWQ0(j#py-+uX0-f3<+i*EfcB^WDzOg&+K0UHpRaex-N6%`-jYs`1|SlshY zc&48%>gW9mTzG=x*k`E8@D(w7cp9UcMh&qj*OI3Stja*Q&X5BhoQlC(x zm2eNzasQ0pO+SWP#O{im^@N}L2tqLt7es)j<3{p2afu;D9WY1~#1nZym@1CR>tDGI zEnOMDn8?T4xp4SYU|<`fyFhewbZTI5D0%H_>yoEQl@=#oW_}Bfh~UGy2?y!EpM(oH zH-?WxVB%B8m|2J9H)Q{ciV6j=mI)C4aaqKH)RT|zHCSIFlq8u2J%^@?d)!&|4Vlz{ zIjALCf!swwo@vGM<*NZDC2*yIPTjKimsxk`F|~Q!IZG1}fKG%D=G+kAKXub-7&ytN zr_Qfbxde)mV5lST#~K55e2+54vXZh9Wr#p(ynFeyrM#O-{rp$#XrA4-))S=O0$h%N zy4+b}d)n=bn$E0MNkT%H#YuVTZ$H2})_lx5Yd^_}HeVm8^hd2w1-wG!DiKm&0S$r& z>00gGaf6C>!KJ9$Xb6qls{j1%{KMaq&?NJ#W;_YSMd_}nF;E<4uwj*1=LBk6C%OJI zt6TwINTZxqcmtW+0Dll#)hgaiRsyJ@C7O3eITwge6lI#+1cK9C4JWbv$l+TXp_it% zPmt*xG8}|vG!*5~^y^%jzlP4NnRUtBA&a&&@9wsKOy1Z0$ve{lM+=f!-7ZE2Q!ivG zc3}e#T!o^9C+UOD^%lVEvmdYZIw2Bm1}MWQ05FrEV*G6!gx8?9dx2bA?i>gpk>{{D zszQD-QaWZPr2q@NdwLd8K*ra6s)w3N;Zh;3H5GEqAV>5FTqx3wKl=WcSLk+-5O5L^mCNQ^t2Z-2o|NE9%4=OcONInfI+` z8hrQQI(SiqYuB$Qq7^kVHHC1w&1Sq;IV!E?wnDL#TtSexb;i29i#g`TOx5#~MYjEc zAaYg76rxnJG{>1@2MZjCcSAOvVFZV(jcs5A97e9k(EA6D3C8XAOVld$V1IFj>G4MEY;43<*=% zea?}3_c|lyGI=L^+#LpVls(l8)J!f#7=cdKci&SyWd?PnkgFhEof&WKX;c*DIj&bA*vevHLIUU*?H*+ASgSOz;D zaifvB4xpT*9aU{?ac+hB*?X4Vnz5L)&~9pwfe8qTAO;J3#*Is0M@M`=M}hAjz54+G zj||-rEC~1~53lO2kdU7pXNK9|-g#PBc;%1GGu)G=nWn`QfH{sS(f8+Lex9pm5P}1f zjKu)Ko`upvd00N1S&9LoLfighzfSF6-`Jpdk*g+ixV0%_1DviU6F2lMxX!>_7zXD; zHO_S+c0U`|P2Rc$0yFt z`&jOK85+&es$+;ADrv7P8|n^n(9z2bdAUJ*>SoVXTMQ{2ZA&BnxBk%V(+g1~_Ay?Q z0I(~HJ14Pd{sbfy4aoOSY_90dNJN8+BKNYPLF4>~v{YNm?%_9UfU(K^8M&DTJD-5I z&jwY^&ba=IQ;b0CBu4z--3^~yot7^)^0QY>4%@BN2`j|$ad8aYcw-(_p}$6u={8Js zuW)J;;V;p^NNVcoyobmaccq+ce1C+>>&ILt)(4!JFr+-w?XTfIJO|W6&}Njlx?&E9 zxrM$NBis*Qlic}9JC|HJ1nbYKbk?S4K4SN_6W%;P*+5nN@kO$w$Tpe$rKuw( zHZjeHiIHAr1i@{LkN6<3!o^Yz*RMuyqBrC~xtACk9}JO<*RG9`6+(Rp0dX+MvY7oX z#s!a%#xoaJ4fPAsvyZ`^cNjG@^qY#yT1Um4bCXgkc_d3^)2=F`bDWEbDH}IedP{=mA2HZkpvrbE5OxbX5M?tyiMsZ{dyu@1$-w|BJ9n{UZy z9NbVxqmqnsK<;$`<+gBZDx3ynzyn>7n&5nKdo$eLBDeyNdZNcgZ?tY zlqhUAl0CTMqZ;;*ir23(>8Ty}BT`1P1q5!f>!06Kq>c_(wBQymQj%s)+|$zriP(!3 zXn>_1xcwGPLAt<<0!T`$Z~vK0I%m(Y5E8HNH|8!x1jvi6aGlz~0$}_RTU7z)Gcs_T zSo$7S$m3)(mV3NYLh~27AouRR%jw4swE`pD0R=WVY<$$6^Ylq$rWwmRO$=k71CAAi z55wBQ;h6P++CKf0#tX2YZ-*6Q*S=SO(rwPe@Ja@rFgL(oSK>Oe27Yk$bhgF@{ZKaa zx$q)2??^!3!z? z%hm%rV&s|)+5}Yp#c-&n`G<#U|CQ(0cs`PD;t>=)hVJlDfwa3`2 z(GMlt#(cn_r#T1$;PsJ`oCNWzXKu4EJ3G(F%M;QQ`@{^#>R?MwvS^?~QA^gb#s-+uHC6;{b&~+>njZ$&&5%4D9f^q2+ z4Y)CNPc$y51iEUK?>qj8#9yX;r4)v4@kLeDA*YV|dUAQjNd{GOFVk&DoNYPax(8q3 z2^%Z1?UKm{V#&e%L{{j;r)qEC7HD;P^uW#Eyd=f={#&K6v`aDQIb$(nfT*m59g${J z)3K|;`yf|>OrZh6LuO-fVG2wip|wk6OD84WK5&)x^AYTk+VK6v1)6NT@$0eA7!Xd*o8B zZ`ya@PrHv6Mh^~zh(YWxRMgENf41Ksg6x8EGPG+A97WC0h{WVK^iN+$#RxT08Twn)AMopT?M;v1csPOk)&{ zP$=9~_HvUNqX^l>DZA+?ghs|zlBG_^Sc*x>5>d92EJdrBvJ8c!BS%zF(dHH?v$uC?ur0 z7KclG3h&tu^Ge!fzhoqmjP_z%Kv~Dvyn55}j%OLktn(uuki&Ub!X}N?Z^}!Ln;Xy> zij9ZNMSzWpk%X{u{FnpUUd7$?VRt6~&Jj+Bx|v4W;#{HW>%_Z|kD4+{IfRCg4#%1u zH1CRZame2X`N!XVmT>k-S9|^2OxM4Siza@~Zc2c?G|MtWIN)8igN#1{hWT*9;fMTv!Q<=5*w&d5a7Xlp}_h-f=p~xVik;^fcvu9 zyYYfyPN*8d>LDwysC(It3X8mh92ywx5mZ#`|%#?1(X zH*bMpM$^;R1;Yw8`F6l0)X1?EX*JA*i!BKTdJ}ZbT@E3t6cRnjTZHk_`>>rFqK z6J8&I;TkYMDQCo`74M>i6+Y|MtQpQ(QCD^cu;@fRS%94mb4*jqki>>jwdk!?L00{% z{w8KlTE?_X-luT6XE+GII%{8ARfEp`N+1-7BH5^@btehiLkGg;98 zDr4BG#Lox#CaFn6{@`#Pi{D)-tH{nDljn6Mf(?l%eV2%ecrHS z=eVB4tC1Je(<2VWV$36eu-oAYy(0&Npz;;$%Wi!L^uH+q$+dF+FQcJB{>9nm?hHsE zsX0r5;Q!{ufuPh%#gaEKbYkKnc!$jtdLWbGk>Hj7KEWERxH{5em zxKcWBp>METmNFsMu$Gkr(i{f!xk}E zhZ%vHMGp!5{Hk!|$qA7)Y->I&P%i>SoZOKstb7(w<8n zP#UfBj6ccrIZSg_PdT-4hGti|p0!`A`pms;pf0(2bWj!pP)bPk90?vF+qZA0p~_|f zhdN54DyJ7u*cQn&fKd(LQb<;XLjjd) z7;(e9uVGS*)!E%X9v|caYgNtN+ekpu-6=Wo5Z%WZ>_?Qq-o(>xRE=tOgcH@IUw-4S z`Yk;QY~ji$P(LJGWS-H?&GWc7Z}0K^fw`6QFOA|Mv{y_}ZmpQR31*k`zm4)~!?tbP zdd<2v2%WUmH+H^TU|YL9T=@IZ!NMd%XXtV+Sn96UoO}*KK0&qLsY?Bf+6qs5z&WZ1EFN2<+zWciZB8hyPfKv6qH)`;~D6nO}S8@3z4G* z3Pp}#?Ax0rm$#U|uHkfihdxJ53wv&nUBCyX`!^iz;^eIu_3H`&weXuhRNWY0nmMd% z;+jAB9k($!LKCntpeI=dyH3}8IzLM6*j+^p-IXT^#}MsY(tX>%e&MghPJRvd1ZBbc z$MWsOMF`ld1%?2+n}>@g#@Q~d-h;$Qh*W+dAsFw%FQ`Q}`ciWXd%UR(55R8J=txH2 z^Qt43$k~+QF`!z)r2rRe*kxrKL~$Zc*!=4-+VmU(yb>1F z4I$U5k>N#2@clnLefCU=?%T{vpf@1}CCD^u*Uy49YZg}1*!}(oURM4lwX+&M;^$48 zsQDyLAcI0Pk1#AS>wMp#zV2-T)^%gVBn<3&neDvGmIvk*#02NJy9eG7l8{hk-~468 zhtKrkjKBf;vEaxMz*N5|98HI(TX_5B4$b5+%*1qv+X+nVSNSsyv zN&-q~Xecepc;WrAf({pblDMUJo|1an1W&*#{)E5B+RR2tF(cnL5Su!&;l6!*b~xfR z2WbWnPfMQB{eJA>#OBMNbogtnrW~|T6sFizQ(|klS|tm9#GJ+DN!p8pv-asi``-q} z$sxI(2nd=Dk`n{+oO2ZMS`eX?KgwxFc1whJ?>x_ej06P{{s|3Fu)K?G;+AWD!H8ZXfz~ zXaMLNo@}u&`#GG8OL=)N(8#fpZ66v>)Uaqd!ODZ6R4eKIZ020I z(Dtb1m?){sIOGS5tsLK`%j{R@1*wKD(GNOxBwy_;YtxGQ=f1uL7|}mRj-;S&Ftokt(3Z)v59G1CXJ1Wamau zyErqA;MV`f&mK0rwO9PTd*zD#996w2wy>puAg}ZNkWo!Xgx+g(zR$V5daCi{rO%^P zM}lwi0VFSoK_9x42=a#$sZ5KC0~J7|XOvVhhy>H=;OK$EA@c%QJN)2#KRx-GqQ~Z30}KHB?>0Nhtz42!!!ysc!yWCZM{UdXgI~ ztJk;hc;Q|snGvt_`@Ct>2rQ?@G=5JTKLV=&3EiowsTmh842Jv^I)B_B@u^ogxAtV`%Qv2nB`iHIjB74NumnKUUcKmbj$bqG)WPHU{IZe~g|~OP$u?rE zVj6NY?949nd#sr_L2r10UT1qkYMp6ggXfN3W^(Ds)P1+s>#I=L9WUISaF!T{@kcfokEKn-dya77eXwxmP<`jDp)2hSD4^O}h&(|=9W}STcV{!V>9+A4 zDk@2=n3zxU{&Q#B_#Pp3iSO#(R=II&L}bx>KTutu`)yDD^w2i~nza^I%ZeIsTRVsAL5x(ugWeE|z1zUWAY}5LX37AgGOu`!UmHS6fi2^=$Lr%ODWP9J%y{ zEc%HeFVem(+b|uLq?*3JL@#6DFj21Z($5U9K@ssPGOUR{NG^Yqdo9EpTKGDbsD8%h zE7G1ieu^Seuo#>7Z%vxH&SeR%b!(vD+^-@|UBpyKc0Ls!7D0-E0|!Pv-)Qd4la9hU z3D?W%q+wRcaAZ`!U@miON|h~+?FNHvdquiSwP)plgC-vb-ggZ|zy%ySR=#^~D1!)e z+tIXyId6v*ExAh$5%U?vn`d8ydlcMcxEe!3Pcpd$zI6D3Z-OwpAUCzuzwbjw5+*8N zPC`(D+WkzM%_g?dg}pOT{C5+27gp3Fos#3lvc9g*Jeov0EdQ@NNPU3bwA3`>4a#>Z z#rTs^u2D8$j&5VLl^Pmuq0xmro?H6kO(#Jgwl|FMzOg5%o%$k`kq}oJ?DAcQ)6^R$7l%Ub&+yAbijJkB;;hJC}%HIUssYox}ZtBWNeswhX|CrsKTts%$M)vokLyNhSoHn>sW*-v}CU@t=}f_!B_3esHe zcrY;8@Vfq3CtG&q23WR*nB+55k~qi6wi}i7 zs5^97K~y(ir-ptw7EG!uXX~#{b$k-8ZM#a`8@8tWZzhBP!!Td#+hEunVbb-2#}BQ@1k80!IrXa;lrhgNPHHkA9UK&fWZPJn5_(O+=ER|z-dp*2 z{AUp5L16w4neC%3&oho{VN8(>b0{`u6xA8fy*>b+>i0|r z$seQ86JHD&$z^{tWs$*7mhzsyYZ~plUF|zcWVt;9!V_&vYw!>z;MFzns0KMMn}ya! z8p$9}Ce5W=yi}X#xkHd@U`2x8_m1n*q^y-+q0f|GO7GnrDcL%boF^Ts zT`e~27l=Q{m|c)2hB@EB{95tIBCGU(5fkn)V4Ss)@rD*gjPH|YC!%<}^uZGQ(?FJI zxK!(Ag2qYF3BH!o3q(MwWy}>uJracwf|pU3b=MDsuizFUAWMA34BC_CM)XeS3w#=g z`}suUlZcB9+CxpF+6N^Hij9p0ei&k$dFz(os^-?#uEvk}3kU&br`=;fS|sr`ONj|w zr4`kGr{~$@^NYrkVmqOVOk@}IDtB_Ub!fMG>~2TLE`=EC0kL7&N_#`O-9ii-fRKDD zi*3a&j`(X=C_zZ1*$=5{sr<(HPr?a_UZH`4KOow1JfG_i+sgGUJ^L#}#|M|EP#{f- zGy!kJ@v1XoG+(_m*FTjjsL+Z^=B5yBNF1+b<^{OmJcBz5OqKQ}%{vUZI-QC_uFYFI zlmXulhXn{h40|^NFgC%cLcAlm_jU;2B#w0(S{2&b+jp(MiV1rDBGOj!FB}-N$+lrv z8AnwF<~KAxVD)f*Zc`!;y+Jh$?+kXinB`MUT;q~CP@v8Y_uwp?m|7;6hk%R;uEE|> zZcrMz=3f-2pnF61{&t>xE^qT+EKM}LEg8l(06pk&anj#;Wn&4*g{b~28e4JaW_f=jDQ3?vJ1MmiuG;E2hU zv|AA(v!HD_J&vS_KdSD_^$8_J3f+!#q#&fvp==&c0Nk8@b%=y1?)N zm3%2#Bkx*2xjdAIDP<|djgh5Dw6HQ#_SQeey!&b2Ku8W@LSPhns=sUo2U7)6BL>R| z7d07hu6Q{7#@CZg!f>E&%4G~@xJ+?9SDT1wEtS+DUtixgXifOE_xHZ;-~IdjuIv8q?s0t{->Z!G>vf*b^Z7iU$8kJQe+@Oo)hjoyq$q0j zVI_Gjidy7NQ8XRPY4I<+8`{3%elFd6x8 z+i3LRqssY@n-tD3pS--9U%8-2&fWQyGNa9H_KyS3^oq_-)z9l|f3!<=vj+Js_rsq` z<2$TU6eIqa2s3-j;}4I|(k=LFh`#}i68>szs7Q6yI)R}+!#cA^r71B>P!xq zi&ul$rCznPu;Sn3W=F1Q+I?g4^zYJIi zpJ55wxck-Y%yfsDYh zWbdV`n743pmV9J9zHR$<`WQ*aS6OEDqGok$soEKf4h1mp?h`wB@L%gTBC%MV=mE!l(DVaXk_vdED&m>C>kZ9QqsY?6O#^Tj*|4dD~#k z#K4D>x&e`qtb;>CokK%Gu`+Ik^|3PL!MClRU--Q7{rmUW7bVB&AD{C|U%gtS9w+;5 zAk(b&&ZQ$!d-@$?b+$>F)u)~*r{DL>dGOf7}`?ez3y zt94NH_MDUHx|}DUq|W!Ppr0R_cJ`n*QQ`>}O%H6GoZkOk76ekfRcrRe7!m5{V9UC*xek|-l9JkcS0~1P zep|DfhuK@=?a%(E&W;XmL|;imgJy3|#SM;4yvO#8PLFmOTSbH>C-Z&$_>o0cL-JUv zHlxhNpBkGPSjCpSdvv_?`@6@-b*P@ITNQjY?=JlA*D2}`fA2)%0WbL#kySOhtFZp} zQ=^LD%G)9wl>SIZN#EQ1i}#%W=-pLuBS9f(V`Oalj=g*L_MOk8M&5{j(J%93)P8Bh zDJ?B6YTu*Uoiq4n8yg45jj}R%ZEbCq^Q>5PpO_f-6Y07!hBPl-$4%uA9lGx}^@FzN zp8BV$J2^QLwQt^JxzCiJx^SXHJ%x>L?_L&@>afG30o%R)%u3$ay>YeA>P%cW{Y%35k9Kr-FI7=d zd6|=AF!(XU&)@&>p7w`vo+~$O*zmEX<-X&<`aZ=ACd{AQ)hpOTJ!DTOVbGg)GrTMfHm*+b4Umh8;i5WfdZ3v6v@^V_o z@69Eb7B6!sZ;CxJ^4a-I=WRXD{i9!B8(`T4jz8*XPB~iDlV{PygVe%IHnb(vB*V6= ze9BX%r>EzRxbl^5)Izh=_=hv9VXRkvl`~+)3Vf*3givr!FSIuDhBO zduU^8yB|r#_Sa7f%DVl7rlzKLp?l8QOr*nEWDEK?cXwYw#y)RlHQJ!a(TRgBS)(+N zH92HI+Ah0|t6#6N+jHu(bFxkrPjpO-fr-gA!~lO>-YE+c^&z(6K=&Vdh3>4YH}3Mr z3a#9*Jy0jhG%5E_=GrN$XCKo1p?&+P?Pqg;){S5H7Jpsw>AB?&!?NYtr%oAmzPe`q z=^4LTtaOU%d|w0iv2@+D{7=uEIfEQP|8w92145xJSu=skpSfxNId^w3o5W>oZv3$n zt>tudWziD$Hbb9w$Jh(~ni{@=J%~U3;sSY0k|~*mPvuVysMim;p8lNcm|^&OB}r+h zValVo$8T~T?D+civPjYFTC4`u?!FuqVfE@wd&+p@+%F?*8lvv)b^Oj0yjkXv{q%S2 zhIvcoHgXWuVk8al#lFU*RVUI;UBd<#kUaH1ZByag65?0&r)zuY}UBaic>9Vu4S!6Em!m?+0&3n4fbl=9eC1pMo-Lr@J z$lYE0RaNPH7&z!po;=xxRMVPmRT{8Cq@$yQ>Ug7zB+8qB%GCFzNbc3ESE62j+?&%* zZ@^)!Y1YlIi;+su&M+LAnyP5YGRtroHT+f|r)S2XVR~E2{OuM}%XgaA7W*-BlOn{# zr$JFqoriUIS-z)>jg7q{=_sIEaPb=6IRS6D*KP8ACI<(q_W}R4+eD4Z>HL%)r9U)p zi2s=9EP|CzXR`2F!^TEOS!}Lz!;3PD=mtJHn`c<;O@r!NY+W?x!*k?r$G56m$r|@B z*G*$}73AfY(5~LNBuL>3V&y*4D01_I%p*uro7g2C_VqU=bq){Tc&Hv1kD4th7c5rO z*2Z==&*?ROi|x0{kYjQiHUs$F-EGMwDY=nbA<#Q3YiERrUNFjUS9;N(19?tE6jk)+ zm!Lu*%O#XGfu*03;%vW`t#BB4f5a>nJK^K&yKHuD&USX%wl+$fp*~KoVz_9YzT*vN z3C&{Kt-sx6u>W`wpToC!`fIK6OfGZ3cGJ?cEk|DehT8eLwXLJ%%IF6@FN2Ry4fxLH z?uwQ5xX!H@yoHZ%`Fg<8kQ754ZEW#U z68OmSh8@M1DDSVY0~S+%=BD(JbCxH((QK&o(+g_` z5fPCX1GBp55@geRc4d#mT1883d`ORD64KVdA&KAVfpSt-QnEijA>oj`@$2jB4NXmX zd>J_2vks$huBGgL_GvOQN&vu8IKDo8fgjRNi_>g5`ao6FXX3}_)7P1W?+X1!>HRiz zWN$$_?{3N%HBz!PV*6PED$!}^(^?LWbULru=?q{4rLOXzZJeC@Sqt?`7SU``i4fBC zdH49FI#o&1xWUOMXBa8+`9L=(eTF|hSD`8!lT?~&H?Cb9pWpssUUuf&y2Glf&8&KP z!J!*>ncqYqsKM(TlKCLgb@+E7+otBT?tE@X!9Z8{VD zK1t z-o8sry^q&(aER>Ku|u*nR?cht(v?haEu4q;bPo;+AvTZtShwfvP?dQ6aD(*%$8KI; zx~dv!C(MBCEyZ7`xAWIs@e6HylraLP`f2>30A(E7@CuZwwUlI>INBSnRP7yTEi~G1 zqoyBr{+)f{^C`1wY}pd;GcRrS%gdu~aia2upz8JqilY>uiicEIDlyKa3l9wqb&qyc zNOnDNj4k`_O*u|Ock5hmA~qoM zay))#`}qxiDbk}Vk=;Ut1^asggMxzUel?tH%}yixZe#OQY!J}H(9Db%AndqL(xgt* z&zze#Z!+oGW4&+&RdVdQ!yLDS&f1IY*>j>gLRgT)bNYVWNONXUL0hf+kNH2phpV67 zxx-@X=on7VE}1wjnYosMrgtBT%K_B7Q!g$A0j%l2Pt`GROk6=KBJ%y#O+^5($;VPc z$3A}k%#O7w!HeK?DaMjt69Wy8jtlxNCkJ76w?*TU&d$ziPJcY$ebl;<(a{E0^}J>B zZk7E=QM6Pb%Z>xQ0Z!^)f4h7bs7gh;>Wf~Li4c2$<5F1RJgG@ zZD(wFc=*`7pWkw6*Rdr4R{K%Uo}9^9L{)DtL^4itnO;v3a1OYQq@ZKQo|l^&FYmv$ zieB)+2c$;F%^#46kim~XvDsZ_$06FYiT3^oqICb3Twus(lESXkz(=)pV4-mI# zPNgsY?7Q^#twwE()CP1S1YMK1$#n1Ih!-!!febVdU343FEeFp1Sm+^@nwEAnrW|KX zVpwR$4n~CVK_p3mlTV_*c9z|c`@^E9rdHi0>G++RwvKAfK23tviq>VkoYEgC~ z$}i29H;;9&sn*BRP95*n$WKcXMwa)(Q*_%rPDtSX(OwYs)nM?&YIezEy&0475<}T2 z!``G5#3yJ>QoLKY>JNQ-ZjKtAX@5Y>$t7d_`0WBM52AUo+c+abyL$#7b z`T1hp6)vQsWtI;#R#ICl)N;Ye+Io1(b7=UYOyqIwUrfb|3!fL=l+L<#ZK<@33{dlR zCjQXD(#Db!nhO^$RQb?Y%ceFaAd<{KKIPoF=e*zKP^(eN<)tiQ#>+jYJC`k?r4{I! z`#oG_{^1Eb8ylOxwKW4-@+*hkVvhsriC+9^E~J~!GBehF@D|UJk**tZ-=lpU=YBc* zEmscX3pp>dZ0S;yUyGJGJ3Gfzd|D*mO7Ek6;oQI)3hQu+f)ou ziCo6Mtsp%X>rT@xC=D!oZS)`1kE1%v0G?k#>;4tR@TEh4XdOdrvyfgP8*08H&T_}# zARV6T{Fg$RmYkU4ybss1KWkk~@9&bKqe4PLsGB_&Yp@i`j-M^!mU$x|mb`z@jAru? zL0S%d8f?IauU!l0?q1&@Lceq8&X@yeKy+Vv{N4i?K-Y0Za|ECnuy$w5Y~$~o`)^_# zqM~+NG6QTc5f>LnIIl;;b;P;zUP0wSx7e=FdCs=eBL*N*4)H&EqE{3Q6vX2uxBL9Z zHN{?WA4r>ZEcvEd$z5Fo0|V@}+QbA8{z&0<@vX6o@099jYv04NCTk_FRE`kxm-YCi4?xcUCsZY9 z(zpXK?w<2-JAd%eLU&h_HdM$??5nzM{)>Q^pGrd;OixVN-~M^q*feo6V9)c;(DE6R z;W4x+1_+0}4t;CTZTW5x(Y*)6`t-$%iut)O^Zav-F*0tQ#k=5@18E+9#jwZ)pynTXs-bJV7jr{Z3R=!kHX9zLQVb&NU}r z0#elne^I@`N&z7IWX{UH1!#m<$jQk)vub5FDi7TMXLiQm!UcLSFRyO4S1-mNeDlbz z3PgfgOKp{^7u-pLiu4r74vdEck&-9TOcS|d{AZssh_@9}3BGCnGCF)= zN#kR$b?eqOBx`a9vPjQ+n1h>8_Lr|%QPq4quduA5V)3-KNOj4TWuw47csUwUoXb^)sx_@L#4H-OvPn77 zAz{5zmz%o^fWHxF-q-IQAh6D_r3QZ8JZF8a^X$4;j&;{OociIZ<;JnGQO|2`sk*v4 ze%bzBkH`>?%ZDvmg_EXEDO*Kqu2```8wnWYqba{J7tKt{le|O;8p0rOc+KvR{WEQZ zS{jeegfMSBs(0=+?QQ^ucAVVz88nB#zrW;$#gT{;yCs8^dl7JY8Ty^Z{sZP{ zQu?c7*Y5WEVa7xd$ufF2rO^-9Cfh<^IGw(IBjcY@t-eI`pS+w)x|yAmA+o}<0smX>D2X*=d4<1(`4-FDvg ziHkpz3`%{TK7Cr8?>g@A>Gz8-o(ElAUg|zEuS*5{w#d8j?6pZp-Z!k9HU%K)zpGA- zb*m(~G^0eb)z;RAU2rGe2(X)K)E?m$CmybgMS0#4oDru{-Hs(|(A3-(`7je=+uyit zu)W}PzrNH?(gD4E`O>)bC^lJ3d0-w3v3&XR%I4Y7?9+`0EpXxwJ6%7lunkoi{NVDH zD_;Q%qC~`fe~GTG0P;$SOFW}Lt1j6H!I$k>j4pEsI8HXp0KBJ|@CX^#HInpP{UHrM6bjH9D zO=l`n`~w5!@zWYFZ48orN4!9!9R%&mCcJX>>La7U9Q-Eg{&(f3c3yX*?C*hg=0+ol zpS*J827{rIkuAvV)U$can>TN+c6B>tQWJ3qRnQ^#2+h55)A^n#YnzT@nq!YotX@n@ zM|$m1RFG(7Oa$j^F!W$sG-knZ?{FE>XWMH-;n*Dj_5kv`3~0Cxk9DBCI$ZPqfu#;Z zpVkvv(#2)(x%a95I4zwhgjKrgXyrsNwC#On`GFqI`99!A&@oq0O&_>$Szpw>A&U*v z2gtay$7g?x61B z|FN;NI{-9Cs3a&CP!zayIG=+^q1&{Fj*t}~V;MmOqYvkkTjVXibOwET(28|DTXyYY z!to&)3~%++wo8GIZSUJH&RRQ~?TWzQkY0QDx#oR1Sz+5H04~}GVqfnxt1FA#Y4#TF zZ#R=a*68ch7cWJej9X_%n3$P;kdY)?+;Pq)+hZmnrmWqx=lnh_)Z6$&s}L(4XkZUu zp~DNxSi3=NU@`8YA?<5@Ap+d};OFz(?XI9B1Y0jSI6SlCz1h*Bk62u)=P_~oZqp48 zxy@d)qsu^c_%L!SkZKG1PJ%o<4uAL7IV^eUyLao!HUTcLVPd)p`tlx#x2n1wbSqZu zA9$bE0D%Uulke>Dg3nW5mT|ayXPVRqY$!~6CtTConDyq6y!<`X#J-Oi-0mJ829}lq zpd`KmGv1MLbIOf43eZM0q$qJ~T5sdlhC4fqo|@D!Kf{X>YWG;m+W7eRH7qQCU|d$+ zzki?5_h^c}i5`PRdI?-eb{U0%P1c_YphyS` za01evvn-&|x%2b$9h~?Q!{@Bd&9r?85Xx)6lUa*;p9sn2YgZRH2slFn15R`jV(!y6 zu0Q)i>RwLkz2Fr*`6LXG*ALQ%`$)+$(v};Kd*I7xj*hjpwQU8#_x1aC0hGKDf`KKw zUYD0wgL_Rv4`XOy!4LGK1x7k@kJa6)*wrqy<;vipIP2=_a5}luPCXBma{jS(fJgq@ zk*T#fp6gewS~WU38379O!Kc-#7xmPYpEdC&4bc+1nqMso90i)-5HJfW*x_%t)MZ0@ z>*EFjZ=Jfm0s9<|1xOUEXw~{o%Y*g$+TYI)R1$*5EoSt_W*GrgnhlTv_yO)3W!6wh z9v9m4`Egjok+;Po!OnPSEP&87lvR+o)+jr3{W)Z;>05{HHF+u zD^{&)VikFQfuk|2EiCc<>Oj9rRE#7rg@z_3+t6}0qLa;WpUJG-o6@cNrXfMm1T>6R z_IVv!8=ISWg@C-gyy=;leg|L9F+FES{yC4I4T_PcJhf0%#Xv9epL&)&7W*;V>U8Er z!siEIRa=e6mHIt$5bgZOABBN9R>oN}DfL;M;?(*x`-NUsT3T8dpXLGdj-22r)NJp* z$h*YI2-z(H+kHJDA%QSXk4~fs0D(2J_V&hkMF99*&&tXozYMk4r1s7Z$vDMNlLdHi zaJZU-gM(%0#vuOiKrU0>Q4dCJi8sEByT#t(c~WKGiyROD=m!F)fBm{16B7d&!!JEO zUFgh9(bqFcJt!eelopO8Q``+dznjQAez$MmCIm#G`%EIz*BG>keg}4DkTG98f2U1P zPy1cFb`5D!Da-WDHw07lUTkFu9#h~m$F{R4&FW*rL8JQNRY5hWT%le44g8vIih{XJT^%H7@F7;D7u>6{oL^Su5(aHRUMTiM%PTmpO7g#;=jH0Wbf zw4dr<%1G!tq~<0gAGZ5h%x|A-*OGp7MdjG{er-~hJ7jKdo+ zM>K2CYG#+&Q~rK_jr;-vD^F*iCz=Zqz@5R_o$-U0iR`T@iTAk=cmzGD3?;fD*U{2Q zk*k_1cJW^^+=c$%#c>zy?Y8nsJr%C2Q zGhP2vh7liL`4i!^?9g?lsu2ev>bSg{N<7Y$@%MH=qPA~@O1wf`{Dtw0ovI?t-U9J5 zgO=9T^^0!e*S7O#Jq#S>T7zHBwXSsGpDN3qQd{v?u^rOO@GX44Xb|MjpHIy&y1DsV zUCbt^L$pzHtM+km#j|YzMbF8}iSO9u#MHPBT|U&ZjfvL!3&MU&v>(WiWuoHZzEM%9 zA4t;jOsrv)S5)i*u|;_U8-Dj-6(Wn5nKM1@f-fX!+&}O%JA024TjOG!Q>qwrR*0OJ zX8{L&kNOgP;Q8&GoHUd^y0ULu6V*Q%%5LATHau=hTm@kVM`Xt&0ysbLl1KZGw^5A% zyG>f;zh%;Z&i{f zU+bcgAUYCiGvqU(>P5?XoO1nB8N!`>^dWs}YHCMM&q5It5D<7LeVBp9*T)BXxIS`$ z`P+Lmfb*RL0|DONRE&jKF5PbZ1>eP^HLR@uNUg7GYE~fyQr)w|FiD?(Mbfnx&F5-lEA5iBPSg~pi3RFV#T-oSE(09t$PzAZ;?h`4`7&wX^M z?EI{Kw4B%GZ7LD+;AjCm(m$9O@BSO;3RzaIdp>^pw1`5D)BvhI-&!M*?vO149vzh#Ykp zwhQoJJJ4pe6?&YG^!(Y^K$IoIyjT~`(Bl28?Kf;cQw-3cZ)y2>fuTgUM)!8uxjb=0 zqPca=gMuSbg%Kg|7A!6y4Mf$&a-+7AzMn`!Fb2&28b}9H--q{U$al5JLac^jRk}gX z{nE#e^L4|qP@af<@yxto(eJ5YMPx8YvkH_oJwL<7T}u3-&}eP|Z+3t$p9=gv~VX&WG6CgD>|*rjV@`A+rvT4av^4M+PQ#>HEH zc%q%_zz-?H7trl1dPj(;V%qJg+CH2%{|I@gaGPb_{h&i|LDebsWl%IW-g@f!xobcO zWVf`lOjlpNa%G2gyR^yhW-9;U#7YEXDbBy`zj?Vu8_k&zp+{`?v)o43__6x3cVe)w=T~5uvCEgeQSkE=zSF8^4Brv}WD9E2#DQ z*nFbWQN?H>K(8J`i6TtUx0>4wL?(V_(M07-I1eK_O331ZTHGhEfkxeZcC_MRP2Bn# z(Ir8C##Z8;VUKVKUV(N4-wgUh+$s7zr_9`JjJ9FO(!+j!fxST8*ya5IXUE*$@td4E zH89~}N&9!5ktvs`BGa}gXqX$`t-n3iQJD5qUl5rB7N;LHTe)tHJQ+Xa`h<@$axM8Q z$*l`{&rg-^-;&k8Qtv;%oOg+ni>vzn_ctC>Lt+#CaUrb3YWzY%bfm?~w*C^Vcw;lM z2NV_-F3A6nPY9!t(+Bg-visbzOpD>qs}Chez7DUT4|dgjuDp%7YB+%9e&>g=-B|Es zu^#cIXJ3P!6ii)dQ`0)Yq*u^mM&NF7sPB3ap4WLilWw!j%K(Wlfd`NjZMv)1gFN53 z-DOY3Lb(V8cG|EPEDHT*Y2v-0p$PQ>DbCbfHdw6=eAu;s0KvlFKct!;9bd((7QGKG z7#*XOVuytHMKQFR7*$|lw1i>xL6R^;hJ{z8p+-6_HIgr)Pf4&9jWChS+Mb_7)6_#JL`@{(6$qK) z^rOumu$FV{)&c3d5X0+H7i;BveHN(FCV6{@&U=~c^{9;!Eh z9{)Mix&h(fi|PZZGKeVCpmeHTkgiH0hSN~u!@P^AmFu^k3#^o&UA1qqt%C!}CE#s? zj#Xqldv-M`$%Od<*-i#*Cuy+ldusRF*oc1p`jr$e=rsD+p=y^e9#iZjG-51@W;`VX zj;F-D3$dqT_NHBnN`>YRly1{UO#mEulb$*@ohtZ2{`~m^Jzw218nQ%8#s2&Lk~c(r zx3X}Z3a+rHCcdFvuRwEt1z#mh8Q}t8Zpw8UIyY#J#gdnNxlBVHLPh}Ofv^2d4~ZoN z-$cZ3U``gPjX1O+^vvEDGfm%YG*aAxmr&mD&tv}2Q~AF*v+6_@3%f>3D5d6xM z@~_l?G!f*{yO%d2`S*$A@+~?S+pqizYnaN4i{pS`Pft~b3vL+ec@sc33kexq?Ul%u zsJUYdo29*P-MZC2epl>o@<^~i4F)Lj#yi4NwHQF57{H(b2!zmw#IpyJkHxpd6cr_< zeTIf#^AwLUE&wv)T~I%5932mV>D0_HJfs@Eml3UOIhq5au2WSEG^Gq32u=H{w3LQI zUO52NFgntCFeVzZ=mC%s6oqqo1yT~Ify>a5OR=FZtiP0lKx%k=Qe>}$1hH-b1e7A* zhP$@qI<7*Kc=^_?b?48Y*MQAv7OIMm`Fm%>zkp_})0gAbhYuUTGAK=+ad2Rw{ds}b zZJFY1&didY*_X^Tic@)f!yO5G9#YA@=I1uxJosV<$vJ_xxdmJo z*JTh{-0*o)M7by{E9-cB{~&ScpqBxmV0m+o`-Z=W=uuZ!*A)OMo=t@w?7EtDO&$(X)fD8mbQTiY~h+RX{`A2Fkc`Cq$tLf$|4Gl z>r#j%=RZALK^zaLpTzwZCU}yb>gexJ`Jzxm{f)nOjEpe_PN&4Lq!&L76G=_83Nu(Q z`0fC#RtKzg4}s?B?CteMi*^qJ&K)V|?FiL-I~kxbB~35(h96=J^hu~UOGkRXh5@E^ zz-L9ZvyBZQaL%9qbNT_quC*HoR~`VeCS-`o9oSm(3kn#Sn3hm1I}Dc;{`qA| z>IqB_hck-b2rQgTCCAH`FGCm2P(708(60rahwx+Cc}~Ir*eOi*{iK(8cJAFusKBpb zASLFin9?Fj-^3)`@Y3A~G+bTSyS|nz-bITSYv$OQU}5DBj=$ZoaC#C22o}@V-va!5 zb5NrGtL)3Q4m9w7RzJ-$3w~8wOSd_;z4|GG>@-E}z!^pr8ff-Ybqh|p{s2IF=`~+C zHjS4BB)jrr%BuhYnZPtqlglZk%j5@_ilRm!kdj%;HLf@acFgbyc}_OSY7d@VNe0 zGtZZv6KmC$z^<0U-H>~h;hR0b_syGYRPTt=;Ll6P0cN7D0+bCzFS`T7^G~qs1oDDh zB@`nuy+D$w-V0~eC79*NMgS}H#-uOEluZv-)6%hcqc(TEy0)sXAz>L&{h<(3F813- zk#k{Jyi91r;!BIG>eOom@|Ryl8yaW|{JWsNpfsb`0+&GevcAuGFB%ygv`(COKkYa} zKRy5DVPD&Wjbqw63ARme^;qnoijgnC50oHl62Sx}nl%S~>0jU5ezv=#@ZiX%Xld8AKtc-0t%2+nThMU&r1U@C48}o0 zHAZql@wAV|H<%t~Hu`$Jm{h;NMtn{Bk^xD3=7Uev|IwzSLQ-4yOn!dYr!mRM&c2#b z78m>Vm)FS5fdEL7GJ5Vx^sLwCdO`fIuy9^H{>yRFYl!P|U!OT48iRv_l`o~MFC4^7 z3DiFH_gnxj=&!`^?iYXV8Y+!9&0x)wOTQ~2DfzuEMm}!gWx6S5W@bLkc==#CI_V!D zp6Pvw+i9mD!h}S;r!rRV?+Ag!)yPPl;G%Vv2^%apzP`D|YsTd56EVjSwhr}L)90Uz z#s5ix{ukddqP9g|LiKs!aN+7^|80Z(aekFye>?sE!o&jY82s4#(24_umj@rfq9WJ*Aqn;P!DCZf7S3z3AxZKz3hjIUg(F#~#!U(*D7U>E8Pi zHYMVTtm7c)8`@x^C=Cq$}1Q>FZ28lVV`@7+sB@pZ=jubcPDVD;z~g4z6$gNgIC z|E?)i2E@|IKd8w@9bx1N+7>5^6XzTHdx8c`v>|Rlogu9G79Jj2WE{yR@Pe-z8^sroQ_FHNq$J0|25>Yd6C*zFkUY&PM}eRdmfNM+zv0##WmNpc^`$Tjb>x4VN;g+kFI$lkujYh~8^3*Yc1BySS=p4StxF89z+SJg#gTHu%EwYXEwV1bv7Wc@$RkAr$f1 z?McPh1mDpIbnl;>4FD2i&cdny48lci($)9E< zkk9E~k9UHm)|j(gULOBI1IPyXt#A+F zsYO8TZo{rZr=BejU=}`rF(B$5v=GvpH-I>wiY30l5ZTD{G#FWU?Qr8XNO0%iqlrq4 z?&EL7kp!2>FM+J17`4aB=0}?Z7N^G}eqrgttC+pvL-bZixa3C_yE!lU40R!<0tlf3 zb`u-4FaN+OJf|t2t5^Bv<(z-85k4-Ebtf%kJc7;q^;VRgRe+(w8=A79#XV|XCuJIPD=O-dW5_wPd^%rdxajrGbG@DXj> zqzfVK48*(7uLE_&evkhT4>H-H(Ta%q4lCH*lv0tCnrM|Z^G$JW+?S*$uE>)k4x#P39be0(BJ z18d%W{R_}Hp-lmjBV(-%4dd%1JV|CeSG;J=J^+4OOg*HYdcG0}gN7oc5wW+U6=~dk z14_m{bjCK$&S>(Npgaem)Ea{I^a)fm-X*Pg%a8B3A4Gj%`ko;jJ3SuYtr*ex6eaAfdS~9ppjNahBDGJaT^kZVk-z>CY zidsZ~E0I>wp>d;jAwgb7AtG)R$WyaFo7BsL$x9`Qw=G_-53g`C3_URUJsF%^>{o)q(m7Z?2y-9V>D->VIs!{Bt0!iHg4X9dLp8`WRX5lC*SF8Au#@n_4M=rxxb=w zAYL!{V5|pS@Urq3Un3WT9tc3Rcg+78o_Fw3j}%*q40{OeePCE1c1UUd<+3$oZ0{|2 zX5&&f6fT4~3ny8Vkj|&n5)I5RLhjz%GJ>|?9>zxg8D7#$=J#f}=$0=phC|fBXAUl9z7we|V=$?p>F46%DOc*pbNkViCax-;|6Md+S~UOX zu{1p$ojkk_Z{a!(gwp_m-L;~kBF1$K={N5125%x9C~|ltO6!8^2EpkAU)6eY3}E;f zL8;rggS>OdpGXIqX&gq3lk9Vyh2@;2iRvyga|Jg{xgQBKV^N;76f@>cP~IqSWHLax z6aBQb6n6F|x(}n*WShj6-`Y-rp9uzf=t4x2c?c*wj*^~*2nmn@7s{@Z<@sgS__>&n zoaXRXVilKxbR*_V;OgpeIQsX4mkYWBoK-pfXu`vJg8^@ZcD_BE76X3$cz2C28YWB* zvLSOrs{7#T3FZTI>257aJOfv$X#XOOnvxRE*PT#Z-hACkKGDalaX|`YjO1D$bt8szUy#`wKK3L+E;9iWNE?c&& zyZ+4^;Z;WwG$AR5H0Bg(M$uqC1rf2UA_^M_ZY;crsI!^}@L^&m(o0g!h){x9#XKb^ z6TjBhH@3pU!l3XX4=rGhc8I;aT$cgCjIfb=VK{?GV>*a*7PewImn-rl_ffUtMH=cp z^z7HML_|!1Zd?k9hK53e`ZP0B%#Lhj{5%krj1J%$o z10&-9#) z_lMSY{>KVw+#n4an4~Mt zqk<}dCqOxu2V>d=oBq!Qo}#{MqCTQ)xGzn=Vnqq?UkG$}v=5sWx{?MjG?EQq=Nq8e zc8`KaqM_i!D#vjR47-lkQrfU;pUfjDg7VGw;aFxy6Sabz;ldBrPUQ=3jQ{!i{?{0S zf9}rze}8&n5KbPLxRwz$>)g9XosixCZZx4cHm5-pBc%`>3z^;rn1~0=alwq)-`v7a zHWXID8<2kPL8&DG$n#e}FDyJpP>#xj7oZnObTC8jweO*W2Bc}CwXgXG|7YIw~*8%5(_6PXl zD#*_JD3nw?lLOk^QV8W*(dB=SpkyfDd3%dj-fsviw^Lg75vm zW=SDx1y^Xl+lrntHYzJo!Ahsu+bUG--_leF95`!tZCs821D_swMJMw(^KSj9L{(uO zWH0@Q=HzWyOyl*;-b*N=wGy%nAR^(WC}<_FU&m7Ktuvy729j2>(zKWHRFGZNOM2@jg>T zhJh=M3ksLFXT68U-$i;h-n3cy-Jb!BfdT2JvU?-|FS&5mh zd*d#M>#^?JdexAu!VGZ8nHd-uEIvGm0avet&`E^0pJ~^vT+prmf88R5Ju0- zh1dYh)hwJHQ(@g}a}W2_T=%O)2l*Ovu;3tsZEbD2xW-_C>2FBzMHapek7@Ihv!~NJ zn$NFuG(#CT)%3!l*WhOMZaL%bo?q7~kOq9JjE0-Ib|vfUvHF;2+K!cJV)cULuLaq) z|6@i1$Q{YouBN_th=MGzN(x9=@U^R%xqy*OB4fo&Oo=<)XQxNWNGXtNO|$RrR{75e z*DE^BaR`FUb@)*2#!^+d;zJRyN-%kg)z;UmgDet;GeiTn%WH>mYRE#`{hqW!Ha&97 zAzi_#7RxW9Vs4%+?LMv1>xy9(i_f{w?OYV(Q3vGVP5%a7T3c?N)Uu^81$n`bcWqg)`cPXSru>|IG`w`=!dxQ zmr&Fk{-)Oz6)LDk4r3-u{(Uo~uo=azC)b~wQaF?EqKWcg0urR!9@8CKxQ@i4B{Rv- zuPxv8C@vxrgw3|GWn`y)d@hop%4BW)+*=&isJ&8BH5erc@5T0Sg9%9tD}d2-=O57+ z9jk#R=U7*I_+m0o7Fmtzf33#Rr`+biZFeFgF&9aPtuKaAGy{2*@`gD816ZqJ1gG{v zj{w;wYVc|aCI{KYt=r|X1;lBI3WME>*64{sojgyHyIusSP$SHTy>y=uqdL$75=ytu z9h^ikI2a?m+VWMa4uX6FX|>G#SN}tok&a7ZnCxBO`c0-){>Z<=)NU47Gx4k3oYyAb-Ym$&$VK*ODc$ z7`@;&sl`SoVbsR1r#8~K9tHXo`h8wPi?e=dZ9Uz4^SD~|LY%;$EEq0uJQUDK>A-=@ zb*@t6Z!mf&FARP|J&wRn4$R%8`?sjDTZwm%xOsC6Z1#;g_GT6h@z?5H6+g{?d1-qP z9&8@?+QrU)6lIn3%*WLaCkexhAaynKGPeFQS*6 zHEKf#t66jcb3Ek(i-JQNdz7r^uwwyHQBk0fG|Xlb7cE+ph*PYU=VZkX2UKpBv9Hy( z=eDcg@mQD~PC470=^6NrCndteiYfbFQcA(pAMmVLw+huV23nNG1gLb5%mV z5AB(TlPLziHD$1pb>nbzWm%^?;Sai+6YyAK^zryNsQfyy+!j&tii$NL=S*Lxm`UJ*86y;{gh&dj6NAA~0My>`at|>6 zxe@8_t+q)@p2C1VH<@r7f11JhNnU=+_b@UmCdW^bD<3esT+@gY#2No&BEe*E>c-v7 zWZStrAZ`>yVb;ue@!~c{ua!P!TM0ta5VQKUi}A3lNE3=eB8UxT^&X4}0`#wA#pMp} zqpvp%J1)BnDlFjP!-uf$&3u2H-Y9+Y)lCUzA?+<>>H#eq|JmGU@L;Ove2$pZ4blle zbYwtLUS>c^p5!<(7_u=)0V6MhTq(n>b6A{2G!$d+PU`WOVoFd5kTU{j&Ft46m2;m? z5brkpWnuWq0O;(y&NVCbAKR1UwBuNmpC2qWikMJHglM)fSYdB}bkf_{1O;x5<_EH* z3;UMssk$2b)-yaT0{3$S?ozs58KozRt6y$%9lD+<@flh4^}Bb+L96h<)fBO4=}IM> z;Bb1&odYl_k*SZ-si_SRf+R3ndbii7Pf8JI3dFje>~zOsbk{(`@G7aGyljObN6r4! zL4@-A8-Al7G_?dzq~652t44-&pTSw(iu)H}%MgYs=Qi$ek*{*F*>HjB?iv?(LBoh{0Rq~kD>wQ;vK-~H2~xeo}X3k?m7%g_m4U&?{kqU zcw(xs0LE=EZ8I-?mMr)N$51qJp2*8%Qk)Z4poJjtk|cr(?cH$+l9&Y33nT{(Aq=XL7+I@IP$~1Ix(5}Fw@eSt2 z04F5QalM6dgfUXF|Cra%?NZF%kqvrxaIMb%)d*Pj0VPW)1k3thi1NM#BBM zN2M-LvnCtJR0QYXYC5ReBoO>!m>!Ig+Ir^z{qr;q zVWT3i=FmfF0~O6rx%-4{=Kpx0d7`1#u3JY#0lZD)G~f=QKy-1P03!0pmT$rP!#z;l zA%Q-|af~AVS&c`>_#OJ*(tsR3Az8ocSOl(TSxn(ls&FA(ez7gxT*n@1dok(>~tH~%tcZoZcZvJd#5q;(8Y#!8gg?F24P;I;G#CKB-cLe-@hL> zKnTlkt>{s@OOF2Pg^g0%E`wZTV2#^g~JiS9>iGjSf1 z3p3Nj#rq1rDG94!EklED3!dCA?e&KZZ5%N#QDpj!c&WhKu8w%kGSGWgC-Wlvwz=2r z7`cRn1h+<>VP$4yy!7(rUYN0#q1GG#3rsXsX~;ct2f*Tqmy!k%;= z7Vy<-yoHs3)tpShVC~7o096dU5&(g)t-wv(X!Xr1A!{kZbHhmv8(9KY5gaBD4#}6i zk?QXgi<2$clOnc3=kxyII|yhFgQ7n!VBl+N!IgsKx(uuu(IgQz#Bo6u8CRyox80G+ zYJQfM_~J+?%H1YRcoM4@hCCJ{MWEpop%U9CeX1Abw|GKr3_$x8k1tFkmx#seKO!L( z)$Q@&#$EFMW3m#VV$HvO8*d)Z+h~6oF3(-+yS@MsqvukF8f%O(cRMKf1V7our&UIk zp7?Zj0ML32jY<{9u*pyz^hyEr7AnpWQ&sm1Jk&@#mjBH6-7|Kp2fV(rtwfBR|AT~8 z_UY3p!aLbpSESaC$qpz+^?GFXS+W-h24q{O!JknBf7>_oC0dwm&;eDGi929+RMA(U zYpjObt}&c5@4R5Y#jV69C|svdZMVV0XJ~0DNYEH^ILo#g)CCM+3X{A}$}P-t4+aX$ z&s7=t2@Pj?K67p74Uk>f`Rs{RWs>GCr4m&=jWO~d+BxqTcpef;G-o= zmsX+7g-1D9n(PYj!wmul%SuZR_DAnZ&;c-8fONSwotFt3%=x8uS_wqOQReMu_~078 zjgqc}10bK!PzjFmR!l{>&biMgFq+a0z+2T5)3C?&&uk9)Tp;-{$Cw5oB&%vSCYWkSamSsLK%J*EZ_R#r_eJv*#8g3`AA%cVx_LaVJP=nj zz2@ty0-GF}KGg!tl982l3*7g@Em~Xct;V4IshVjvEg;0TuH%+PxMzU0*x0cTI!;<> zcyB{i4)FE$#U-2mq~TB2JcwF2($0SPF-|-#$HaX4aTtt*kfk@Ed6qz*a{ZdqDXaK` zxnpoW*p0cHW@6C#5GqI#GOMArwFp#yb=y`Ib!Ns8Bu&#+wogLmbmX|cyO7NH={gF+A9X}LHSYXz=`0|OxEMDx}*27*3sSP&IRk#(&?Rpn6gd%+$f=SW_i~@0a6G;wo zHSQ0M>^H#zNw|(}B@ME4uU6hm5BC(4Zk=@!h}_oM`8I$; zf1qXGo`v|Rmt5;=Pu~d)n+UrGX)Vb72f308(-DP(bh5%o*9 zobyZWnoD>fbndiVm}}eOJ8H-+eIL>}fF@}v(vFh_!WI+T#q`jNFVw7aH!iQOVTA`v9Z3foBcwiz8PyVR-BqYyC|L*a`#Ss*Z;vm1;ke(%sj$ z92cK;3y2B_DHy#KI(JIW`{H{>DoHSygeBT;xM!APy{{l#I&h+J_2_!<`6urKL1H6A zIP`=#q^p@G8=MZAKYJLaB_ncW0($Xdv2OTIoR?@r=7Y~yP{cMycyLhpFs0eRBt`ll zqOsyCg|wzOH#oDBIej;NV7M1Nbz@^@vg6!aH&NZ8l5_4bR9>cc@y@^0dNoXKK0hkrkjmHE=kIu{(`JObB+Kb!*mafoyOKa!;b=?2Dpj z73LvDva zMaHHMtW8U;HcXzl6(XdQ5ey8-dE&$g4Cp{omqNt>PwVej3BU1HI9x;ahUOxT@pLpBt%O9js88!OG3 zpQmyh1WFGBboHeR@b#X5h)^P9EeP0!F-)b5E*)Xq8`(Yo_vW5UnEe7a5AS~#(_osY zIPtTGC|JY?iCn81C(DjpT0!pXfiH$!pd{I3!lS5=xz!PzFNrN+t^ZNnxyR*vzkmG3 z9JZNhEMt9b-KrHg5sREQI^L2gF}FjBQYaC^oQI0$P~8<$NJ&^o2Z=M@_xtPj=N?ww_viC|zpv|ho!(HnVxZ(~iygXayrzBXN*+M3e>-Bw z8+&Y~!=cIP9j+aAZM)SkA(6F{OUb{3cf=P@D!<#7;xVuO=~N6)qdf;ddFTJh(VIpd z`+W>g`NT#qMyNmz);h!rCOMMQ#ojWx=dq9XY6w&Hj3*vb95v7s95Yt0=dxX|_D1M+ z1$TNi<6$#YFzemcHC^{SbPkVur!$q4M?fIla$`FTi?8ld$Y3jGwPX%GRiRGz?tO5! z2`XY7eQ@~7w-W{4X521XxcQ>Hdhk?MjyuI)8Y3{5llTY=Xw#1kwRd_w@w@H$)AJT- zQ|4^u5RzyF)x;y;*LaS#kE#*m=;_lwY#!$hPf!v~M1Q!;yLgPv{#ldNzELNUw$8cx z;DHf&j9alhn@mnSB+`bWm|;;>n$Ss%=K_Iy`cCn`^nAErLjPT>1>KBX6&p`3odH}r z3L&il;eH`3qyoZP_YI8$u2`=jy>5B~eg@%i`MQG4FKb4Q;>XmqOSuehi>*Pfk61lg(kV@uOQ zE6uOWd_(Cm1c)kuaKWGuhMv-6sqU|Ei}^$+%ifcfEgpn^7G^S5UD(>(jg-FD&*|T2 zna$E4Aw}-l?Geix?gTbK*6ARWr3?ESh`c=aT^cj@UCAxrO^b^Y9-&{MpI7Z+!m0O< zHf_3YUHjm{A1?fI!$imseY$Hh=tNgXg7Tac)oYjUx?Y<8a8YN*Q;2wRa0CShTS{!S z@2%mKs}``Ig9}?#>ixTL*LSDuCPs~07VaZ6N4{5-*Qd78kIq?rEwtXL~lvwmgG*iFhxxIuBf`)c15}xjFb8CfYN^EwJpPsJ!Ei9IqVCQ%$%2aY~XxE z%)E^~3sU#(@$*V_o1E_*xe$0w{vLXU{>ReV?6z;kFNlX9=@3euvF2|0GG*mg(DeSxp;bw)p9k{P13p z!d5k&{fIOa0X%Cq-ox+}3VbhIl34QVDCcSk6p>sMcFTip&4invwLHjpfRmDAoLnxP+^t*RO%^UUx+>T-YpY@V7QZJFuyE~|a(`=@ypcW*?FQN`du zf7ic~?#P~(R>jm+utWx%bYEApYcFqq&s15*I`EESdDupzAKrh8!3)gFq${N}o^zjY zm+{eAAG88bXRQqmoF%;`D|*~hi=8_EARTuO$D~ zSO1ILj$2utr5J5zw^b%kc$VB-w_HhBmS^XLW}&9?RA$i08jBU|P=>371tVXx0`7ys zEkAtyhs55^L?Go}9P1`7CyygDFGQ+dm06n=CK=BIZ!Pbk^nI$lTV4H)q~!X&j}EdU z$G`no!>5edIp}%iKl#8B<$(u2&VG5Ldc2k7(FMe6g96J7%FHzm z#`k>jQ=qrL4|HALGqBSEV_YpWfYrAV3~re%h;~E#E}ZF9CXs;HmF_5 z9X;=`CWEP!($Fwb(w$c<9+Ob3Qr8=^IY$#VVL;;*4$k^b;jBD8U(>>EKFYKH>J2+8 z*7+G-{d~33H>vOAlK5aKQF}J1P#U4wCpmu9i+1qmH?qIIZya~Ds$9N@tl_b8co1x2 z_o}4f@m5O*P0DFIq5LT(W{?b$=FHe>aH+=cr-(Q?=lJs72F`t3G#?&Y1%v1Z0i>go zv8+BxC=p@M>B{n+C6^l|W|IEAoGd9xtpe({zZUbkrF+KW+L!^pl~ZKI#}*^GU64$L zb$ZH2vpVUdq3J6#(!SWAteA3i`g%FH%jZzf^|=S8ASOslz5seIhjVa@03F{@_QHjB zW714s(IM3XgX0Er(Z)%FVtcahXi|#_XvH0SOF( zJ)Fp@c>dx=7>$jZ{x#0^^h!V4)6t&Oq6`)j%g3x!g2|CIGRG4MrHdoz{(?9$0P8I2 ztb5~9s3Yrh6brrzbW{^zX*KL`FJ!##YFfII#`rq&d1sZxF zL22N@vIt8b4Oy~r(!%#kcaHr4JE0ca4Q!HRVd0-Jh7?Sekae4O$ zdWr212FQ^BfGNL|=%Z2e7*z;$Ys1(2gx+9V7f)QsJQ*udYrd>D1Y%nhA_JD;QzD?cG?z*Aj z>{*=(GMpV1=g*yccw^YtLVLb?WXOpc$+=$0`jCS;nV?z|r??;?$f~za7*n|C4;3>@ z2o@qWng;iVXLknQ`w1+A2Uap=xiT0QeNsHR6-4Fb-9X3{PCfAZ}k_k)8oD$>ldhOxfPgo2yY| z!w{hk7I7nb_HK&r=@?ol=*es(W0F_i|M26ODj$wKBA`E`n^kd`31fa<|!{C%6(Q-B8(gB2$=dqUJ(F3`Ng)*?;u{f?)fcPj@FMszsl|-!303^Slt>8TBL5^Mo#UvlT{+Lfh z87inr)dct*!B^I6S-TVs#X}#NpqBVXmyA2FshvXl)jhxPRl>5R-eKDKwDf0FH6?-r zotpam?Qjb*`Xr*@-cjwf&+3U!U6#ggcIFp*9fCqjmbKXC)TL9!!0B-PIL@6(gf(TK zg+UH!lHE7pXRq0r6Zfws+8gOF896`0xs#6!Ze!+!t2-?(X@nvy`d7Y}ZpgsExP$0` zpqfJY=$_YACMz^vFRX2B3PkxNgez50T5xy3b0;EXqMxTWrdp42^8G4H{mFF$5A}8H zH>;kUER8a(kxX_i%;yAQET~X|6}PWmGI^#S&IubS2n3y6&8P#O|E1i>*K=u9Vp=eT z(b%xf^;XAWlQozOL`qb>=?Jjb_P%aE3;vGeD3^kQaZiZZI=T$^7i^&rj`}>>&hR0f z-7j#6nUU6v8p$KsHLB|++T)ht(kMDRJEs*V&$~0S5z171p+e~euvP+BD$$6%9D9C3 z5_zeU&j&^Os01#^j1*fgrB~3_AjKJTeD`3FeS7BTWO@#c6LE%R1zBEcTUiK z(mkQxVvVbV#;Wqh?-TZ3v7`DLeEs$JR-sKoi$=1}?70L>Zn+TXLjXHcLh~rfm2(=E z-eD2#~mrFkTe<(a;}u2%eP$P0tI6Pbos9<3m)rF=$)qa zgZz~Q(L|S1C(OJdG(^@~j!SZ5=@-gE*|(ft$>jX%H2N0L=0sdHH^cMc-@7@hSyz7V zp3S%{4;yy7g+eZq7~=mpn;G>PK>S_K5yfg4T!n;{U>y0lZQHidjb8OG?)7@J3zSXQ z7u-jR&d1%${vNdQ?VuYsZyx2g3{k~lv>ceBQfP-AWn3g!-~v3*w3#ylXnG`O25aQi zu9$|$Np71|&TFki_qM$#o9+P{fxIR_YC2Lqj;OOt|NRhANd7y!{~TXW@%CEp&HBl# z;&%L(4ZUiMljX-=%g3gzkQ*{+{L$UCQ|v1>QbRL6=AbGw*uQ`QS~PtT`Tw zh`?PiZgNi~RN8scpCxTsF11i-ElUFmz@l0Z=LOENm)5U=T=Q zJ6etXAxhyLsokIG5abkJJ9Q&7oF9;t0I6~fM2^e_a+0uVdgTCWhQFlu)^43R;5R0*ix@Um~EQ-d(_YDvHQr15VZdI83aa~!n5Y;a(cij zD%==)C8O zrpOy&G0VkJSSJRs6x}>M1BKz|m-6{rKf6`ghsU_q!!0WA5G)dwr>@OzVZMbzA%SmJ zOG%Gc<+oH|h(~C;*Kbhel`A@&!h;LHO*L;J6S8Gg$M|z!r~eGwVrggBjzA#^#{+v8 zJv(6edJbQG-NZ-i4_MMIg21yta_A^@&|*-D@6wBk`~X3hgSkEY`aGa4!S6W$bNDxQ zi`=?tQayX7b%(V4ObM@JvseSfam(4<6g`kE31XX%jix;pMoQG&B;C}M53~OqHruVf zGq?4Rr+!)_DaHg(JJHBU6|J#V^-474_z|0i6V8F=k%UWp=*?t8GKv;(XO!*X3QH^c zKAE*t#k^W>rPzkPC6DDUFoFz7r*0;x_F8aY=@YjdD1pNAUil$SjAfMdCqA*47<^+a zd;5&kYHiN-T^y3N^o0?6%6Wv-!z^;IO>xKId~?=bYXxgx2f&!dEyed%jK(>DxrolS z@3d?M=cL>hCTT{$gknhVNJ6@V@b%*cFuJb_V++Nr|9)*_leM3(#&{~Axk!*`ift}Q zY8&I)m4|=-KkfdEL<-r^FVFgG@u0-H{NHU`cGkohwqwP?R(&yK<5{lifoc7g;D{2G z^Hx?(_o}PgZnD|?)?&yIxjGb&cC0v^#PhC4L7fOfL2GwH#hF+%H4GiEti@8VkbJ65* z65W3Kv;%fwS!tXDghS}Ea3v=T-fF66jA*o|5^dpG`NeK1F>vQQ$GBpcp?j0KM7o}C zSp2w)#Xs4cKCv@*6LG)B#roNElxE%KStar#=lCUexCs=F?_6%~9?lw}PiX|^GvA|| zjpZ34RU5w#K2L#GV?GFgSeW7bbC`6s)iAQ_C1HTm$C9nG@_nNxKlVuV22fOT&!jUE zH$3I`>eAudUMh8tLQNmOsyVHhfl68jA?>%3-zI?sR2t(_fP4u6X#fp9y1da3UX&!t z%~!X}orMw~fGp@9Pr(|4#G)7`#_vG;*3sI{K<`(3{hXD%3iAgMEy(n_@2v|D3P?al zAs2OVHerw_k8C7+)R4@8?MBF!Ig;F|83jX8H}&LbDi|aZ8S4K;E2|{ry$Z1tn9Ljm zv8mWf>Fb$8&}SdTeW*+E3m0^TD~;fw>3NT8X+?)8b7I#APoDH9eIYTc65L_~f!=jX zz!h75-+T|lR188r$jIx+n?_*aaNvbO>|^~%$Adp8mEp6<3kKf$K(XZgTMMv?Akj}C ze>pzyPD_TH7Fdd+XGvHAc7>|>a`Ecixk*F&FgMcjwNP**2hy1V5l7D4Od0=u^rtcs z24rRqQDh_9`No!7$&WN&+vCQ+Oz2*FGqVey^*Gm*fEA1DN`ga3ejxj0OhpNfH zgSZoY{^k4Ezni+~jMlai3+AY^WD2pe3-0k&NWp9&xfVntNI)_@VbAh~qE>4HQu_79 zPMf`sefD8~9`)nqxEJuNirouF*o-Q)o0CB>zoh92B`xeMt#>XRk?1TT<9oK=jol{s zWSoTUv0PP}W+U=ef##-q(-(| z2PTUKXoU7Vr(EN{1Qdcnl)JrP9LAbtgQvGn?|NDz)_aaSTI(-imyZ=$_qNi$YyO&K z6ttn7i=U1Fhjna|9%g2r30*_22{CmYPD9fMOwoo4C9$jAhH#t1y=KR0I;)@RpKgL$ zkOb9&p^|L4HmeT^4cpF*k9nM<8UlTdXaX%?A%}_tL~)Na z#^m-52NX=J%bVWS%LWuN2Y|pC=#88gH4SA%rE6;Zj>#&t3 k@GtN1|6?%J#imw41urujhYpD0hINJ7dW==l$Z0?S4<*AKnE(I) diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/qr-decomp.png b/src/dios-egraphs/Diospyros/data-plots/plots/qr-decomp.png new file mode 100644 index 0000000000000000000000000000000000000000..d04c4d9b56c0d925e6ae00b81ccec041a1d6621b GIT binary patch literal 27590 zcmeFZc{G=8`#yRjLnxFnQ#463lr$PLl!^>d6d_5$&eu$RVWcMM}rVD zM@X5;kZB*6dfxY4dw+l5wg33;z1Lpr_pJ4-=Sd&;=f3ajy3X@BkK;JcOWON)FJfNC zOi|RLJ?g5u6h-SpQ8aB#4EV{WPfg$OpRG=622Ka;PB^)kIa*MfW={6jc23q-$Cf)= zIG(Yxv)w4UNm5#3`AH`y`!n05q-_560ZBVYODRPUJ#k!Qp}o4{8H!>vBmdH*DBiQ8 zsM8zwsP5ErjqYo9G2lGZ$S~;M^O~uOljbQk#xQn0&a|}U?lC*{n({v@$k-sq~OOgMoy-#(*pG6O_DR!Mr3Vc?RS~p!4HlKpEcQ*0 zHQM<#XNcPL)<50i^qqaR<}Kf|KWpDKHfE<~hdl}mWD?_K4p&^dbZN~)!=;zG*I&?0 zmFSh^UYnYl`n~oM|Iu8JOD5hkTt90cUA%ZvCHL`0QBhht23DWv&o_>aj+&aAUrg4# zeXJqfO1$sZX1go*^&fP-eArWc??Fx-0|SHonKOmo-rnHg;fajBeN76VkhHn%@6Vv2 zp`o4SXffK?$g}&3z}nt^t|NlmoP5ror)lr#cy3dDPfPgw_wQ|Z6Q_*F9~wSSJJY>C zL-KhXXLl~o>PH3rRO&YOF^R+JC(k=}mYbDb6-j&g^i4-;s6&6#RtbYNDq(l1B%Z>s z^opPg{vB$an|+a%ef{)nw_nP)C>n6dESb2yXY^X!$`YUOVO7y5U1#o>_3Pvs0A1 zhDK3yw#!q`Dd&eKc|!Q#(%-*+#y@(rK7);oEx?~)eCxMFtyqQn=ay{Fz4hn4W`0|^ zDXd+)Fx$EB!iyK1B_t$jUiBM3KM{Yr?(yKzkj_s>nmD`G-0xqWuW>qi_H(ZF@~PbJ zaktCc8S+KDN`<|BQ&ZPU9?fCexN&26_uYg9mU{<^Rh^uilItYaUAlBhT2}Vm+~Q8< zzJ`pU&xcI*gkn(^;!doaUOrW4P?DE_t!!wwtFOCj=KZ7SA8mCe-fGIqw93lL8~Uu? zRdlD}F4EG|7f`Ii3IfKjoZgJ}HIkK(Jz3fIJn{&yhNhY$ z7va4nbq7r;7FbzXscUHkT)DD8_fdjMz+!1Rxss9){^zVdcwFUU$3%aR_S~@Z)V>$| z@n&-JYS~SjSUhI@GFsX%D3%7@bRX}J-y1G&IICi5VUaX!5U-v8@gp0*oV8MCd3gN2 zdjZ?d{%FG`QdQR_$wdGq@j%78Da73><4`t}`F{OV^W8Bo zb{{yfl!Zsq_x5dm?)8TgbzKba(7DdddOH2AU6iyp`~srMkGdzJo>OBMhx_gFX8gtV z@8{>|Q(awMDMwyN%GoxF$;S@O`uq9C<9?-OWX}8f(dW+oKAqz^b?{Ujp1&qVXGKLW z4msb^qem${lZwfa?tM=>A02r?eS6%6P$hHX9q*QH+mx}Lu;?ehJX`hHDC>IP&wEdv zgyNPvzJE$EoUN>X>5wq$$wECb$t$@jV{U3^$7XD7te2uq^W5^|E;qMVc+Mj-R+*PdyA&L2cEd}H)WlyPZL3;h{L_$45+te^Ev)-ckZUV%ExE% z2$gr$Sa^AP<20f++}vonKXbsup-|pOE%@R^x(gRCI&_qTgvK0Nki#N;d$XN?UY;TX z%hTg;m$P&_{D$b=uo6EQknv%$eFuSYJf009Py-d1Iquj^F*xP-R+SSRL zPi>pGk?gSkNanlY$dR&ZD|B8tS!vwa#)WW2wRkRke^Z7oCMJeg=GcOQ!osIcJ-S9Y zt{j#hqII@BnO|{kdgCkSz95{4 z!utA&gvQ%pVZPlTW9g{&_ICTWd>VlruIx8fYwB9@@bWUPSg|6QfAiv9yLN4H?&VG1 ze<$eW%Pm$~W_$NC=j7yEjgAifl(4(7q=aE^c1mijzxi_A6O+Q48qWOe*Y6G_YwR5F zZzd02R8-XVqSZ?UkLZBg{Mb-Y``X=Q%hs(T%a>Cmo0NyIGeaDE#`yhA-FEAG>9Dm2 z7@nVae*p`C)4+K8^l3^-TbmtQAY{0+!W^N|b);*N*HqtT*;6%4xWe1fj)WsGY*P%L zik^6Xi(W)Ev$3PC%_lByxxjX34Q8v!uQ>M9)sC+@cz}mSSxb-;3$BW&unP-^>OVO6 zrlJC8%1G(`txft3c1+cy=Q*$;4_ImQYN`Y*twMt4oQ9g z`EvzQFJ4Gt-rVeEvqJxPqb$cQz#jxmA=IWCE!cdra`^%VhP!}PygOW_tE#H(f7Glc zcZ{DDzdHL<8f*VDJKG$mf}$)fEvfwXw>N)E+_UuN&6{nv?Q+j&X39lKnOy05e|w*z zS8L>FoaSvV{lUy!;wc*09+MloKE^&7raS%S0zElcSoVuyVVu?t=?j}*oh`(QCcP?8 z9Wb1%8Su2TvvcZi;v=A<_R$d;vw{VB4-PKEf&btvSnMAs+Bx{OQ2D@t11C?N3SwBa zEYL1@a+kn1Cw0XB^OuR%1j(fmWWb;*H-MlpZ>Es`vtWeIB~V+ z&(^#=dwct`nsKY@DzWxj8wdSAP1$@#t{I zlD$c~w{lN@5~Wdkc=Yw*TlnBc8YO@eom<@8+^Z?3MT-^@ykYelOSXV|ihVCI^}Fp2 zot3-dG{4L-U%-n)Rq zWrMt|tlmj0t5l7TKVDA_&b)UOW?U?gQuX-2z5T%L?aym(hXZU{X4z~yUCW8U-wvQ! zg^2TbA@1Z8!o0>zFRp#%=Ue-Zl11@Po=8!F*p@+0@@5bBHf0@Hxq9`9Q;m2E9pov2 z$jGO&gM;=QOP8(?+Tpq!+dvYr)-NODY(&NTs}T{a0Y&taaZL9Dxje|}=i6jqW+nn; zzE-m}<8&moe607&3e5+G&y&+KGPv;V5y^rI9tKp|@W}7f#vJp~#WYF)#Y(rj+S-=? z9v_gB7ut9tn2H*@K(n}YB|c3)xPZQzkb#! zYiVg|+?3v#bSEz_4*-C3;lhQ=fpf1TRDr{b)T&i#dXEn4dkhuxh*v-VITi48CVF57U|i)^M0B(SP?9P(__L-f?fwBi zmv2U_KO(wi3oojV{EX9e(v}|tT+f}0M^3CqTuvTav`kj%&bG6Or2vzpl5qH5DIylW zmq{cI=_=i>RY;j>hxe?}?~&nQWozq)PU^=hBG%hu^)6q&yf-^oemP%2^w>Zv8^GQ8 zp`LvuF-18O^doiR`kg%XC3CGPSq{xC+vX&8MPTcN6}zvPHfGxJZaT$DLI*N-YI=2DUF58yf`ZV; zSjF8)URROR($b`LQS5`cNc2cCw6ED}HX@McfiG22DL3Pzm{ z_$3m}dzkB{4F-Tu*q1D!H8C*>6W8B+>*#B@v+34fbnjP3|Bku?IECtTfr;lYE>Tg@ zG}AY<6B85p4GqiI9eg?G?S+jCK9m)gkF5^QV6zJHtvZ|>;o8vD{Xrhu12l?|P$ z#a|RQHyd|q=UltS;lEfg2*qrV{R2SLm>uV?cbfIGg)G~&x6`3S?F?UrDS{ogSv%lI z*ci1ge6Mv&4xfU8Lez(aeZbDR&GquLVYg5p6$0Z8AvuJOO?l!jNl67bq`%nM7Rj)- zwsvB;LW(A5D%Z9uapAl7@AVZ90ETFbhgYR;#9~}jjxTfiwaq8=sB5C0xWmMt8Qw{p z$K-I86V3#I9rv_iKR9ts1+-=eC+LZPLV_tOEd{g|6d!mq)#{GSiH#(2OV;XohuLVf z$A|IRsqU;F-@oTKH_O+x4yXyznR4hoIVx}6nk$G81B&rTTUD*ZMG_Ezsv?j12XU=C zaVt+ZX|MX;y~-LItjCTW8~pw2G{Mzk#;?-LTYYHgsOf>kvF$GXZAds#4akvul9~$x z*nrZmS#5U5W*^Dv6K;(8*!k^S;_$@R!T@T+xZ&>IjGYyckue{&Gp)a9m_#HeC#Su5 z@lke>>$pWYKi9g0zW45}*gDeH_SigPAx(4y(d5sqcc#51zbjjRR=U4X=clo#jJ zQ&q)tpyb@a{rmUxZnhOT_}FOSzJ2>}_RNObi^aRmnzPO@Dl_RlG?1yD@|YUy`85y6eE z@%u~nIorm}MJQ|ETw1!ue!Snf>7|28C!>}#fg%7_Dq31>$R&6`vM637dz$vQbquJL z{EYZ)sVNw1Myac-Q$7M(7hSQUnFGcpBa}1}D-igb=4L@Wc~Oc^f;|9|MC9aGxvkr` ztKgqi>WG|dp+fQnNRCtv3q26?ca%)YotYAdro*@~czOfNoSrD|x$Fufj zFUwYR>?mEM71M@73H$FJ=VPGxi{K$n{N8cS&5d@Vb^I5?qN%0jy(I=`)~#O6fL#2= z*3Qk%E%}VAV&_H%3Q(xzsd;G{f`Z0ea&L1s5AG!(CPoOQvLAQ~^PZ|C!&*<(c^XX70YXeO2~#`dR2C&dXEe?&uxomXKdS1x!jy zf_tV0@*+!;vK>2_S8Lpib1WE_%9|#!rne!31*{B;jyxv?J&-2XVqzBiDW6A?as118 zu>9uOALdU~ydI4o@Z2v57<2b<#wlcgWy@qw`k)dtMMY7im>xTyE%fuVGEfCi{Qg}~ zTFQv59S|PQg+vb?X89eJ2{Z5K*CTID>ut7eqI@*E&v`D!mhueKJ)w6$%gc`wNFgg`1x;3w0_L-uyYz{5hT?t0*yWP z$@KKd$D8u*a+!9mv!6yvaS1ve9-isR?$`;ia!K+L112-KV`8o#1+@bLzIEi|Q*2^s zxR-MR2b0hv$eHgnIx=Inon=PwRw88)3b6Wy2IV71_)ph9)XAd66uJC9>UtfcBv${Z zeZbo2A3kvULv1ZL2~#LAIRphm8Zu6IptgATnSO!i z@6k6XzU)zr0BLgIjH?2IC3fGrbLZOCt5+Wk`~p?=74dLuKmEHO+w&07q@|_HoZUCc z$|{^Lr3U-mUwhBZf}MMVg70?Eq)lVyPDB~NAzeP@lPD*$ez+eD8XosDYZGY`?V1?s zFr0l=w&lgQLgTtOZ{CP^tL*Ta<|P!^gM*I}Wv;$*>bZbx?8z%{9iKJ(+~1tt(ds?- zepuk%X1f;k0J#mhIoy&)DOIZ<9C?BWv_ejf8#QQTGOA#SMv*lNneso{dwLcM3JOXX zz0~gfbu{A?*D-VR&yH#%-3=fbDfL4AHKS;@UK z-5)AzYKnkYHhhj^SMGD+DMKXA$jDH`<2-flI~*>|hm(|#e33+xsaT4*cNIC*%FQ{p za60Vsl=tlPxdn`j-nGLm-pwtFVb0&mu4&x6>47C)=uV&&o%;dGqERkI5yd@OPsA2l`*w)*RUm zZr%@Xy92xDWK9Z3*xpqrPzFKJeyxmGAuQLa+J~0_+sr_^f)Z3CWXtGxou=3Bo2$nN zDYA66h7!4&5grP7umB6**4Gz;*R*`c4uO3U5<77e z9B0^+j~Zt=(j%fX4)*&_f^D4IWPCzH*@8nthET?#GNHp^dmp(y^;?}A z0-I&JZSyif^ZdA-7ZXLcf?pnS`q=*Exn&19k?&1e&z*vb!q*)lG(ntRs&15|YtW@j zZ`c&4uUc&d;5a9g{=zuhxjxr(J0OWKVthNQ2@9SA;YDXMHjm60TiYJyy~9y{6T zx)+Wx(MooOEPDfh{KBraX=*pCQ~X0ywY+LvU~~JE{k;qwSF21s zqKf)@dUmB2Ip~OaFVU?=lc%I|Ab1*ciCTZP?9zNTTX0 z?+J6fbLY;Ro9%>pdwbiyeOmxYV0y5SZHwbKre&LLgHhZo@7u>x5h=CX(Q()^^wMVY z64s_yXIX$gA~K~eLL(w68BckvCFdNuKC&tavnZDnl>L~+w{trFmN6eyKoHW}DC>?Q^lqt^7E#XOKYADOPZ_~S@3D~~I7@{_GgHTGdBe9i z=REleDX@+*73c86RvH-@`DSKjre$VwuhiC@}kjbj34W!GuSxtgGWejH((`q7v)6!Lc1{%Rdj6-;l5{I476z9f5Ii zaT-5cv87HznIUz!zrTM$aj{x3DjlGfeKrafyg*jG;m%(cO%^*TGgDu7Q86)f z(1YrrvRFVIf6DcAclcSmK3wk0uZ`OYT?1)i3CKS#AbMbnBVp?f?FVAI&l(_7H0OW0 z?8g8g!}P;nc~bK7#_w)z+P7uP7WLRmI07f!yf>Ws0r~7hdhkPqNF4vAj~_nU1JuSI)t08&SAedBm`T1Fp$AC;00Y_bzh>X00jmSb3pt7py?&#~WnlUToAlfXIht|3eY0zqE}^K}_pf1eE^t|~!5 zUqf5GpMd`t1tIKv?bi6v1kAlxjcyypuvGL=(sh`Ffr@}|=&LhPd zJU^isG3R`|ZoD@)V0id-Y~x3DhdB1TzR~Z+R3H%(*$BacIeXnHovO@$K;~T~ZMiZs zGFiE0CqM`CLZZTf-e9~{b0y;r#;bvW$JLn5V=?2Q53+i%Jo4vfA~G@@*)9VmP+AZL zZuLxW6y0h@trQhC47_w{CtkVPi4*=1Nw^73&@0KowJe5CT~$?e<;MBn4^%(FOI<=( zKJMR5wKvOi#VU9*o11qSwd(p4^*oJ91Ozf0v?;5$QLQ%=QyVnkSWiV2+}hoO;fW#h zu=PjAZgiVd3lbX_%6m;uJnuhEuCI3$Y&#T@*q(^MtYV5uJ-|~_Lu3BCQtBs~4%E4F zF#T70{GfmmuDnE4US4qR+O>Yoi*)I1Y-~^rbTtn1kvrY%^9Qt2AQ}9cxpbGJwy&?R z|5m|U%<6+LE{Q&aJ6zU(AFJ2}Zx`L(qN`+k~|7bM;VegZto z0z4e_5>96q^+lt^MkNX(T=C3rK1~p9gCY( zE!Mxr;seQpgnWRV=m%qLLOYaj%sxTAfzoXH?xqZ3oC$*YD8CA?r2PX(QxLV0Lfy{Y z2OJK4lccq3ezggL!JVIDAg3NpNsOxb^)lDK*;k&4fj6hh2T*e(ck!+%abVjf^qNwJtJdbb@c&@ zTal4G$YLz1yK(nnM_O81T*rF(V&vWZ0S`#FfX2KoeHJUBbm-8F%32fcAWsU(%oixK zTo&?1QE~BCaN~v_Za`CkL=motEhO%WHv^6TJm@NdXi&trV<%(l(oo$6t|467c(*7n z5a8gTL5+!!m7sQ+8)sJu^wEaC;;;^0`Cu|pq zJ2@3)!52Zy+V3X7!^7zAKIPH321m>y(`Dc&7!4X%7m*ZL5g=l%Ykq%x_}g3EBS?SK z<9Tz%;P4ZviSM6QAf^gQN=i~bAe;8~(b3XYLO~%srkm~80zaZ`M@Sko!#H5{YbbAS zMv4g66vQ`%`+BJtaDya}{=GrDla!6O!NCFL|g3b_>J z`s)W*goNP*gk7RPLPW?1clFf%t8!De3mf=o0LhCuDYA1f9Xg2I^6>=OpqM%X6~zH!v&YE$%}gvTg(#Q^>^1{d_50+BE&F=`ax+LTZanB0UKPu~^$$@4JUyO+S zhb?-IK;BTL2r$;=T|j|{?QML)2ClLA0zC^jJVVHT*H!XigyJHJX4$f3L`(b-BhSLf z$mlWN>{2ztCamyIRtqYHcB<~GV8QL&0R9D#GAdIKtRVr5DF4Wq!;kXlpI({P{zhv$ z()C_Mb>Hdu z8VZG;>rgvAtQi4-li(-rA~;v63nS*0qSOu{iv)h>JUi`&42}+P6j8s!|v!c~jAo0J^zU#xC%eb?z;E@N1hs#54Bbokwc|lnj6Xf`Muw4+y zK0nhyx}uc!?hQH|aY)yJZ8-k~N|( zVByVSn;ZP~>;30xLIOZZM~W~#aU$Z0=lZpOzC4H%Wk1xyu2-*-t`NrfW)*10i8E0r zo@`YV&i@#W7`)uWu4LQGJatzh)U0>(pXYv9+T}&@hr?> zu{J*87Nixk8@3l38XDxqvP@0vwJR0u${xdRm%9I0Cl~+Mn|M%^U0hG?tFuYYXCC-1 z7tKG;fA720xt9C~DHZhZx>WGMhrqiOsepUj^{>)p6+HFmK^jB8A8VdIe3%%W?jL$` z1*FFtlvK->4U8QC1;&bqLVj_%<%nLvbLMpLaD|!o!i;D|*%Yt|BSb;7IPzcdbi1@n+edaOgNOS-gln5&Y z4TvJPwaVJsUB{0n?o})e=DSE#F$!ClV0C<`@csKmxJ%d3?;LnqKez{{SnR#vI?`GvdH=lY@mssPx8_%KME1Y%%LYoi@~PS zP{?j-lf#|F$&>1N>Xf*gb-fSv>fY+>*RM;vk69D%37AV?@bNIUIsl%L69(p!U72J? zbxltfF)!3@>agb_{1fg1iL9`!Ow8nHVd&Sb1jOro15?n3fE|@gyehCj77Ke%(_jIp zd_u6G&?RB3p`n40U*J;PdV5(Z=(GBUyGo`l%EM?V;*dSrn7N){Vk|3hF`*(`R1zx8 zMiKvIORnc-JmS6zbxqA;V(3HpNaf$z?ve(21^NvGs>B5pUjz?@H zq69_MzaJiN|7;(wb!uO^uTyp_qFQ~~#ohZh5YBVcshX<=!W1#^pjcsIV#QglE#1{ZjjHOtW>(yid_nF=jM)c8^GTL;586_8yn-0;k_$Y7UL8= zu98~6e!Z)!t9mtJ3$fgi@1qFY$FX8p8!*h^=qS;{^&)hxx-#8FybFp3qhI*;ZO`*A zW4s4od16K;aw!pn;5BoU&%=^6ys!&}`vK6B89QHl>of}mm!Tm;O*E^}4&L#B*4?eS zo@CANI@&>iPbFHUu>9R~#FYyx0FwFjo~@t?jIx~<0l8JB6RWI>3LU7sh-7F*EQEW+ zniJZG6+zeE1u40&#K`Fe~49-(sW0N zEGR7e+SRpM^ZAPxrFdTAxt;D-oO=Tt<~Vsjg@=u1DTmHu5~>&H>At;&qNA)#9TLB) zn%Y;O2Lv}I?3+qr>gbz-?FJTvc<}l+VQm5!mv$RHNqlM$hM|wpc+JhY5ThuF94uog zc8Oj@5=@z(bIfrd2}A%r_{wEqF+V?lY6xm2KypBPMWThxeRn1>UU0i}5&T$bhzBSR z%-f4Di5*C0h24^=tvS31^xZDNu|Zhe1Qfk?R8L-__nXH$!@Zi~?A*Vs%)b`rauDeX z+a*3JiGJyUGkTA81#g`A5TF%0`*OsHrmcBHY|j3Vi8Qo?okX|tp7SE*jO*s3ZNM8Sfr;@czp;@YDd^{#*C&3E z?4+uvENGl;Z2B`Om@9UNR(`=ICWDt<34OM^#r1^_IX?2wx7lf!nUR@U0)&VEYP#MFR=?jb=S1!rn zcD;T5O!mY}gXyJ0K{&z3hJx-08=p96BzSquf_t7IP#qZJZ&(;#CmFXp*6d_`qJ^2u z8k701fNRzhAyEu@GS!aO&z-BE(*Dr=y0&3z4YE?8df%Tk$vo7fws^OuT<9|qm z1+v#9ebUow3i_E-b#!%?KyE2ZS^AOY55vH9*!4kW>4R<==qneL6WYcH67O+`ybe{K z9Fn8|+SmBeo9$oZ#JhLz;$Y7&iFsW(|I(V)at79A`Z+r;Xi@O;)yh_Rmz!b@jE=$fHo4*s9s!dLVMlWb()xI`8w)>oQ_gc>NLgsA7f$2^A)@Y5O z1n;hh$zz>=2dn&)>Gt{^jeof9x8|&0xYp3vS+nsrLHXA?^!d{zmzD6ufya}}81k`8 z3IQIWKo1TM=7Ujov{v+<^#Etehp+&bKq)Zy5a2%TkM+p8Izu`u>4^70#U)NZ1Xq00*BOy<;);SA_iDoV{51KL1Q`UqqJaTS^&yKKLP{Tn|1( z*D6`~wp`uZ-~`%Iac-HMbs!=3&JW<&k}d(jv>O&vs20FYq)y}ZIo_&W%VwT{Qv}8j z5WWYaY;9~p($XG2=|ig69HZz>e9$UP=b@Ft6ThQTB_B8msM^0_YClO^0-Ra$KCvA| zmzLtVaFeoj-J!>(usPJfuoFJ6OFFV#K`+7AXhTCg z`=jO!;%Sl??p4&mFzwE%_TnNJWS1+*I^d0R7G`Boxy*Ol$$@fif^OG$RHBWKkgn5A?u1 z&#_%hHqoVoOo|t>8g#5<*f==L6s4)D37M7-NzXqu)#`ENCcBm$tb7|5%x`q^CD_S@ z;51|$+KV*$0CqznrA*pk)&NjvpzuIlo(bfb?OmQBNDGEZ8bBlzA_s9%y3YLGM6BUJ zh9s3{9o-N2xTHxgtCrTsxwE<|#L&*Pc<~3v7+|=BofjF1wGYi71pg4#h54@V-u?S` z85u>F3*-K;8Ku~;wZYXuOo_6#O$N8R5I$j4KwjNmS8;B3dMg-d{JwrNG@TUyfEDNZ zTk}Fm9|vhJLS*Z2d3_mAFA6&^UO+{S0))?vC*GBv1^0$$Sd!64lb)7VHZ1)F%m#{x z&l$HQfM*KJ%bD}$X1BxAa#2X$tu1>Xw-DOoGiycSL@O!T>m4er_@QnVH_5@Z>J*C) zu_J+&W9Q>rt9dy%xCqRS3?hb+RB2V!63{MzK|#7vLR?(*#7oot6ovv|93;85nr8s# zjvrr%rQDEF_xZCQaN<7?-^V@(veW?{59n2k0gUOW9d4sc*g4>Z-#`gY$|Qbx=dIhe zZ6gBc6(M7#yXWFE9jNCA^1M6+HqB7gX4!ptr|M(PTih>^8+Wj-v$ZsZ|IJKRTodU}i0O(POb~8}7pjx>d zP`&7OpCWf4#)id5u@?kaU_2TVK&gkz4n7BYMY;-D@Gm36k>_FImQVtp z_&H-4t|wZ)BI62OJwykhn*g-Hpy?AH9=?r7i@c3fakUOqbyXFyNZ>(tf{CFBhX*+( z<;tIxzE4FUs!=Db+-T(XEG8qSPrgz~r459L} z8W5(Oe$*_)8P>1+^J+tD@DUtl(iZ{~rC##(ZQI&WnEvPAOG7<>HPbi+e=;phdH?am zY0zuM$EWqhIPo0$05FxPX8!&60Y2VfCt!|!1C$GrLk)T@i7lY0iKAKXU-!UG;jS;E zYBNJPf9W-|4aAc)Bo(}*-JN&Y$%_*=Iwi<-7*GkmZM?2M^RFwpmf{r>x`Hx&5lCqU z>gB6f1)zPP@cc0Pv#jSEBx%27+VI~$_$UbA7^3ttLL5Wxz5u9a21WAU8(@s`f+_+h z-jdw8S@-ZB0?IbGdR&w=ewA`spL}81{M+ySvJucxs(O_8O0l#D{^L1-_OGDb!s3?K z?@=oVR;vGs9E~Dv2#lr$z6)QXF9sccY}6K)egRmLFYs+VVTy(!aT|j1Q|uAB;gerN zrNK2RVfbuiMK`KLrnf&L9LPQBN#itBS66?|5S!mnyU!^wN~0A3zIgqpF<6K4-@aXl z<_^N&fnXkaHNWq4Z|q>qN3AXd>MBeqz~BEE2qTavMTo#pL4hF+d^zwB3n`F@aTaiNi;L1mc#82g6ls%Wr{D8c4$|PhH(@W3Y5|F<@q1M+XXy- ze&*<(xP9t+x0xv_jZ37Ihf@G7b^%fWaoot~x&1noe(JOMllfpRy6Np1Dm^{Dh>&ow zP{7Fnnf?BbJkW!X3p@XL+Ee^Y4QK(W=t1h%trI5)M}5(-$6~^_a&Srgsi`;2m#lCV zsL4^1M{{`Lw1Dm7BHoOo(aWn3CJ}oV_$ku}3&&3Pkzdr20nqu-G&7f4%$U{It@^C`Gc%n)lK=jJ2&Kfkj(Rqrma4^bi^7Mu9|ftE*uj40xI zgq!ngPtRJ->dHzVSlPjSJ-;&mW`=l%(F3K&-aH@6GHc`($Gw8_Twrg~5->mF)sG%M z;^5|v=-dH!e+f!32#41noL#wQ4FZWFky*#T%*=yW+n^?)d@U}3PS_QXq3sPmlXstY zpgKfLRroBN2qRgMWOu6VzSc;G9;Af;q@bcwH9~`eE`)?=CXZo*BjGFn0Vk!oPfLrD z!ly)F%L4-@KB93;b|GAAH-t)yi)9v|61Zl~nMI`}BwU2ktqqW*N?Y=8y@x)x4H+&D z4m(etOes&t;;z6(LXa)nP~Y?82P{0BO@5PLs#n96MKMvhstUMcofo#oFVV<0J3UF% zOwvj~LEt9y21vR?^D61SvG9W1f+~}}Lv}&XNvannUBZ@~{h_D2Yu8;QZZK4FPG)70 zRO8}xdbLdEUo5BE227cVjWvk~@SthKiQ4*spO-gfmY{WxaQ3E8~;yR>3{zb zZ{=SH>;J2_@n0Tjn?{r)ainQ{^X35H3^a$Fu_l{pr6rcy7Njz^6)D*5JIlF1-_k*S z2)uIT!?PKp38FU!9H#&z0T4*LVWAvE;N`aeNDg{Xt|%YX{r+|Ux~5F?eQ3y}36vn{ zF7km~Yx%_sPFLQojw|67SqM>|hz)=C{YjefX#9T4_jQgm(yLa{BMSIKKbIx+<=^`2 z^9D#1FfIqdc4sl^;pn&l9woh5vX{Tz1#yHXVU<+90t9P|R|v?WD^{Gz2wu2!b^eDB z`&X&vN}-5?k*+HC^=r?VDI$#N8Q~D#Jft`?x)cG<7YwhGmX=h79D+0H8XFx)Jw)5T zip0CGpTv6?dzAfO%OZhpu*s5Sh5|I1c%+6enboL=;Eg-{dms6ZCc+N=DRr)t(+lwN zIL(_WCP~|F)J98EUceA+yVEJ0JidXrqdP@oiZ+<7lnU_I8EGtW?%)Cf0CMJHJChfl|XfF7d%sC%L`sakl z;d?{IgK|g6{r<3s@f@$4U(Fn>CJhCaJ8U#h!83*DWN5!1BR=98SUB2KAC%^N!|VS;d*0=5<38XUCAhVznKdSb-F`E zEnbeQEe!}#_Dqx0dn%{*HVK!y%pN;F%acVJu;a){!-k?X0e{$y3-?1OCR{N-!)w?>O_Dl%_%Qmpm!i8&ygK3TUHHszbncLolZ&nKf|VY; zMfB(v*d)=??u*`gPG&Ws7fN1#T$iC__(4%oVp_I}8q;4ZTbi80ex~#Ig8DCCJZq=% z1?WDEgDsqp#2y|V=wv&Iv+hUI^aw%>g+CMnP$gQyg9FlP(KG8O4<4{UB@eecapJ^} z@3{}7Zr&WcxYzKQjrM*6;V{A4T^|%IKbrW9^8DYAKyWE;0!iX;;x$PRH#Kph5l0_s zYV^zEo8KF{|7%`(lxXC#va>EUXOI_27}JQ^!H05ZJ(^zw5m<@I+b?6`{B;iEIuu-X zk$obq&>)WALQ%aLLq*odx~my}`uw>V`D%mU!8QMWoY@0`A76VBjy^CbNpv&2EOrk^ zrxdY%ii$?f+iXRz^Zo0Oi2^VcP_f=mc8X_VJ(t!pS_EY1$VyYX5gQb+thXp1R2Q5l zTvC|K=3m*U3||IjD6P>n=aq0}^$FX9nITSE-@qHh!GJjnP3yESMr1nj&B%hfig@0b#^Qedm&0%FlZd6JavsYAoD(jDnlJQN-W5iHYcIAqqt zy0b1L%dsmsb__@DV1P;HyBiFg_7KphkaLHXDeOt0KNZkb`CA)%!&-RSwk1Zb5NNy4i@|3+iKdavd~cq7rE-&McssEKDvul@nX@kga_G zE*pT_5|(xA){za0u{}G1GiWHNW*Bx;2=RCbsGiu7{3^{Sk5OSV(0?!p3t@oBw4f;% zAp5gmP6O%Kg@*&W{=M=Y*mr)=)xjek-f}0I_RU{+7hi@$*E>GK6nW3hY(|@f89F^k zS%nEW_32hieddf=tKdXHC%1^0Sc-JEoDIAPz3l^#K*l6o!p0XS!J7 ztpAj>j|X}eA*zW(2HiS@T8=}_f+krf5xa${{C_vv@tP8xATmD$L4&k;k|GF8;tRco z&V8)+JlaknpdPKMYZ&b(=D6(*iCs82K$@$JD0O48vDuN> zxi)NrDm5iy)q%F5eP3@aT;FtLvk3?Y81U0Ss(h@N>@o7Sa~$yME_Pi(eZ2uO_%ey8 zqEd&Z7+Z-!O9;=`8ggBSl!;Fl!Tl1{NfBRm+$9%jy8yR^Sqxx0SWfO!Z~l8uy9*mI zDh~iUUR#V5dx=&gG9gzVgFmARtbpuLwU**S1bg)LtoLO<2BC$NcU6^24?`}L%KNNR zFPY&|%$YulmR0B;m>YJ%i8d2XJQB*Vpt$dxW6)mB>a(g0U#kj3q%WFUpj*En ztweAsa^shIl$nS_3eNDgWNy{_8@FzWuihVxnB|8uT+c8@H#KZ=r~S9t_K=L7Tg?Cz zFr7*bM};)jpbkoe|CIwvcB8**2zZ8!nghxx0zsZM33^-<*O@>Uk6uJ%OpN4b1};g4 z{A1z|*ea3w)bMM?5TyLrl_Bjg!)gcpSCaI(V7bB6-bF}g$APT!08zaJk05#T%7I+- zKe_(S!&C5qs)L5PMBb^0FYcBTHXlwzGcNdmmSQr(TFgwh1Niq3aN3cmg~XmTW8I81b*1{l7>XtjzYo%Pk2 z;ReA@tbUP5#y>a0c6tTekgwJsDdB46Gau!{BD@Fx3_F_g36Y>-B9mBg%L#BE8xx4HjV+#6UJ1CfG=F0p{b=6*>Ep0kqNqD^66EWAGIXMctluk#K^YPDnxKImO-J=81S0gKhcN(P0x!=2bP6%>PXHe4 zAdV83B#I$;(jpaHN$SHHgGRm>MoBX2LB*eSCrCUIUmxAnZvpANe^6)G9avsGrutqO zWIM0-nQMN#V*UC>q+b{@9xwY6G65Of#S1YI{ZOPKiHz;SD(Y>mnIkR8P-oed^U z_8VnrcLw~cc7E-3T`AIX$F&bJ1SCe1qi_)xJ_rrNdphhEil~ATQAN8^5g3Oe1Z*`3 z4MmtAD1vE-MF_mbU@`nKi4-qCxO|&ymGe;1-&9J14|>qgLRV+mLtAj2H9f-l7&}Lx8en`CK9{}Bv zlZy~~8IO4hF#D*RErJUSs*u4%NIXX#SH(*rPn;wNq`eeIFoX8~NOYk#^@ucAplr%ZkRZphT3ffb<4ZSNwisB-Y zMMPxt!Zy7f|D>*|fmYmCF}NKP=9An);IcJZ& zO=NGM+dY#=8xBuDijSR4=g9z7q`;ffX0({&;e#jl6bOQGsSP1y;-mNMMXYE%vQSikaDIc>C+4$r2^!70o=2e5?cH3(tsDr$H|b`$ z1e432YHWs8XaZCP@FLg%sp!zw<0`BE30OPM4XXkK2LiCFSHmSDfgY-LIj>(QSz!(e zCt~_iWFsDuM}gkjftvk3S%@DOV3dyimfkpxokX1QXk1OQ!WD|po?wriiUw}luZGOYmq=Nl%tKLsVmZEYDLaMU76(jTGH=?Lsm%QV*KzL=i z@zhI^`wS&x*K}~3{F5qX>4mUctK_3wpM+qoNF&k^=C(elA)Z3gmz#3vNepyr{fG)n zn=;rZ^_~ZtXkl0EMxmJot9f`2G#TQ$-*B9X$XigDci$Sv+ac4aP>tI|xR8kaI3g1y z^-oBCdASrec{03|=$}5PY*2y|RgZx957Ap!ymv>+sd;pS2TTtWGjqA)}aNDqMbi>N#?cI?Tc zV5!02+s-2_ITQcX$H5Ktm~!DY^XrfVghn#70z|%%vGFIguEBh4iY_HeVfLt{Z(eYN z=|lqCQ&ELNRM412W=!T7101TsrVFPrY3re*5D*z4eUed1X#AySU;LC`vxDQs#6S2F zHb!FL2Y(G@rG$P5wDpM_50Y;~dyydO;5T?slu=h>VrD;eK9dsz3lM7ttrF7-OOce) zptzEjKgFE>5mAZg%`IWzgYh+Sm}Y^tQqoLs zYGbnqU&>V#((Ivq`A$S*m_y8h~S&Az)^xuY;IU{P!f_U-UznD8DaI^ABTVp zFhzDRghgc}dnzpSpHT23gN8!mu@4TQQz%NgBjgRi*n1O_gpZ3m4RCG+dfrB0{6oCFNY}KkYHm{1nr_cX3`^KxQBJJ11fQWm#T-FmJctAPK>u7s&#|+bsxB^=N=PxucGe`FQb-H> z=vgozBOeY-1%Il-gNl(+kcj;HF&|&fbp>*)7=F>A7l+z-9)Cb0K`-vv2pa?b;o(`A zQn1*nICs7nDh@QE7tv=ZJnRJ5G64^8-4&tRQWZY`WH~;UNvv8lwDu(!Mys7QvKwvm zzPQ{;Ai?v$ZhzZZ>rQbo)1#&!EgCqLWI@QF6NGO}c}vuAr`xzc`PcAp0Wi_|IW#g< zmHogOz4-L03<1}0O_1>kRLczWyZ4t~={c=hwdyNuG#H;yLOK}HXG^Eu%u?2Hl?T2f zz&rBhk*Se;cEe?dR)U-&af zlN*kr0(>!X9AK3=Vf?2V>@Pox=wfKLrm_HY@QNXc6HHz%HICmwq?Lrgw&DGap$bK` zCVNbFMix|6>DD|kF+o8qgLY<8(*TFBK`a%4g@xJrh?qAxcrvX6qfb|mL#>0c2{JHe z_DqhCFL)~V4T@Y4M&|6n6`n$2uyQjnxao>Th!{yz_+|k-br*2FswD@>GY}5QOOP7& zWLzC;4VD8j&WWSRgISh$F?I#>djc^bC;MD%pBSYAvix^|Z0R1Ui7#_~jB9 zyS;E(?8R6K#j25k0Hs=g4@?u$A>&8L3=~M8Mr3FQrnhQ>B_n0ZWiV?eZUy`j5muZY zyoiA++@u5~6aj$(1QGz4a)D;yQf3NQdCfN$i;0)8S%|@QB0{El%ICrO_VwGhl}JSg z;u?3Z;u%;l9D}Qp;b`!xKp>PPOdkwU1oWgme?IkHYznj~GIj|Hcw(#rR=h{KYfW}S zv?MdD2`a+tAd?74zZ217GT&?5!`=-&4IJvSAxs;vXl@w+=L z;l7ApYIuE^9y9l8(vW}EAp}8!YG9fmv9&5x6+_6#Q~jaz_FSnj^$gPh?waVi7yEzI zcJ47%*I^j{2Hv1DNKX)99B~%IV8L2)K;QreI9VzPtkD30jfA!3h15hyP(er(6|wS? zfy^|N*uX>)M2<+&7^Frt<8lNQ^U6ho=B3Yz{?Xd{qd)r7P0r5me1G5f^1RRUyh6mI zh&C~^6>|Rh^CnJ2DwQx+IhWU=yJ=iZu}ct!l_?g`&-Scc@%r*UBb4Z? zUWX$mw1@rx*zfbkk}gML5jA_^ftu=SoRSmTnkLs%+xmN`x~rz=dI^~c@$qW*>|=qy z+iJ$6RL25*vYdu4^-2#nPe$htv9lMqIyDmPwwOAzsC=-cV|VJEo&ez1x7Yu zA3Fe3K<>h+77+^=+K(=)*8#6HJ&*uZ34AWsz6}%Aj|mvRW{sm(3D6YEonyd zsX3n@5r`Yd%Tv(4iSJT_Z#I#`W16;Z$w9@w!}GyLGavh zKy^gJYptvrLAcCArzNb~oO|$l22v%@n;0<=Jxny(OK}8)L{!Y1p4~~lj&#LvF>dUG zK9vu8=CR^cM6pQzgy}W_;)9jBQY77cdI%GOR%l;=AbLE+^p<_gs_~#cPEVc9$dOB# zjl}rSJdTwPQNgQ)DO-YZ03~+OTz{IIe4$k%eWh46U^zJa8;nJX@c2DDa52|o@fyNT z3T`1UsC*HKc_U!vQ9E_y05Q1c9Tu4pl`i&tT3;tT+qqsMDW#9bE2@#e>{%NEJYgUz zfb-p$Q`^o&nzb+Cc{^xkX$!@ait(3JZvZheE#TbE04HQ@*}FkrE*;(C0$G20n0B%pZY8!BgUPg+fe``jS&9aheTS*g&F8A z@R!82IwLRdpw|>^0Q3OV+2DOoOrtC^IU5b!7Z3cglO5W3XPY|L$T9%s~?2blF*FJg>e-3xR3g37|q zziPY9;V4cnWVP|?WZGS8spJVB%Ptf zTVZIv`y-gG1J<7dFio$fwnoL)rN7AZcI5JBi^5KQk`eh$q!mM#mb>>!XR_@;R3cpR zvX~2jhQTvv4rhjNlc7C{azPq7F*YN6zA}nk7*rV}H;;8oi6Pc4yv1mS(e9XOKq(7?;Dw+<-?^Z6 zX33>Y2}}7f{Tn!AX|Tx%L~c>6OY%bkFKw(0-W$GZcGv^|Mn;y%jv)P#ze+?k~)U6&U~k literal 0 HcmV?d00001 diff --git a/src/dios-egraphs/Diospyros/data-plots/utils/gen-latex-table.py b/src/dios-egraphs/Diospyros/data-plots/utils/gen-latex-table-all.py similarity index 100% rename from src/dios-egraphs/Diospyros/data-plots/utils/gen-latex-table.py rename to src/dios-egraphs/Diospyros/data-plots/utils/gen-latex-table-all.py diff --git a/src/dios-egraphs/Diospyros/data-plots/utils/plot-base-slp-dios.py b/src/dios-egraphs/Diospyros/data-plots/utils/plot-all.py similarity index 82% rename from src/dios-egraphs/Diospyros/data-plots/utils/plot-base-slp-dios.py rename to src/dios-egraphs/Diospyros/data-plots/utils/plot-all.py index d9a72bb5..e29be2f3 100644 --- a/src/dios-egraphs/Diospyros/data-plots/utils/plot-base-slp-dios.py +++ b/src/dios-egraphs/Diospyros/data-plots/utils/plot-all.py @@ -1,13 +1,11 @@ import matplotlib.pyplot as plt -from matplotlib import colors as mcolors import numpy as np import csv import sys +import glob -def plot(): - csv_file_path = sys.argv[1] - out_name = sys.argv[2] +def plot(csv_file_path, out_path): names = [] baseline = [] @@ -63,7 +61,15 @@ def plot(): plt.tight_layout() - plt.savefig(f"{out_name}.png") + plt.savefig(out_path) -plot() +def main(): + csv_file_dir = sys.argv[1] + csv_files = glob.glob(f"{csv_file_dir}/*.csv") + for csv in csv_files: + short_file_name = csv[csv.rindex("/") + 1: csv.rindex("-data.csv")] + plot(csv, f"../plots/{short_file_name}.png") + + +main() diff --git a/src/dios-egraphs/Diospyros/data-plots/utils/order.py b/src/dios-egraphs/Diospyros/data-plots/utils/sort-csv-rows.py similarity index 100% rename from src/dios-egraphs/Diospyros/data-plots/utils/order.py rename to src/dios-egraphs/Diospyros/data-plots/utils/sort-csv-rows.py From d4c7b785bbb64531710a55e7a95bb4d0259b4995 Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Mon, 24 Jul 2023 03:26:21 -0400 Subject: [PATCH 131/143] add all data plot all data plot is unreadably small --- .../Diospyros/data-plots/plots/all.png | Bin 0 -> 69775 bytes .../Diospyros/data-plots/utils/plot-all.py | 3 ++- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 src/dios-egraphs/Diospyros/data-plots/plots/all.png diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/all.png b/src/dios-egraphs/Diospyros/data-plots/plots/all.png new file mode 100644 index 0000000000000000000000000000000000000000..3665e004ae985b6ec6cd80e1a1e67b30ba9fc1fd GIT binary patch literal 69775 zcmdSBc{J8-`!0S{8fZYKloZK4mIiZWmSjkZQYd7~6f!p7rpgpbWTuo1nJQFBREAWh zQif27$aEX_aq0WM``K&nwb#4%UcWzn&w8HK6Zi1>e6I64&*MCfFHl97{ z;%e*UxK%<{LP~6-jhovUS2;;Zhkw67!pX&2QrX8y1Rt{ejOJli3T5>P@*iD>a=I;r zjzZB=-EHIzs1}mVecHZXRzG!pu}p}I&1&39{Nsh%ctz`>H|;I1L%VvlDFIxTA<|Vb zFAj+GFOjOkUvjd8nN-OCNHe@-*tqb^3*YE(NaIKQo}ZzsCcjBLUPQ6N+qev+%qTMC z#ll}(|a(Gs^!>(;F+b`Y4FiI?}dHZwb`rK1zPnRPjSld0OOf3s&Ij*}BF`O%nK%lZ1P zb|Jf{@eO<|ZQ_?`#q7;fW|W&Qhq8~Q8b3G^_V}@E=KcGIQ@?vMk3BoL&H7zjxZ{z` z?N-&ytxr58)l^mM?;X4^v~62{%59^&_<#sizA#$DhYzVD2k+Il7rP3Jir!DTZF0}b z%Id(+j}Q9a-rljz_4__tH~V2zQ~w6qQ~!A-_rcbvfq?omZ0c zP~ItB7kBqrn}@jJYQ;~tZr$o07}#f76_Qv{Q6V`cRZ6`jYZvq?DJ4asJ*X z`Lijrw25f9(bB`Zx3>%E>gsB1Yl}#w`O?Ny($dtk&C4V{(&A-KGCH*6rkc3=iFdfP zm2Hm-cch3mzm5>h#624&?qT@Rk|!7&8{3eqalLP>|NS<*k7C?X$5pg;a-DIbu5f;VLz}_ND=P!3LYp?7KRei}wrI(UyGvJZsK@(sirl@Pc~4w3GAfXD#&=uTazHi?OFK_RND_54d|NIzgZ*Sk%l4tGq?FBygyP8>w$mT(v0uJ?nceOfU(z)mc>_<+om#1l7InHtXWGifX8nST zC}Nd4fiqyA(o0d;k74ooI=ke%l$zUoWv9 z`o%6iRBDD^f>KlNNuAbw+ckHTd~eOn%qYn_Y1O}fAK^X7%A$KmE^5iD^_ru-b%F?O zJU`ZX+DGL}Teof%mY2WX)YMek?7PjOHPX$^O-N2ou7myz!Z9*De9vH8;k<=|(Ei+s znA}_$#H;@wnq($T{OzQuiSXHpp`$L&&aWpfzN@HE9dZg(-GlH-$NJy6cySSSMnrTp zCy(N}ZFLR_3SN6Ym$>sA8XAUcr*?OC?y0>gUb`mx=1ujkSJwr-y}j{ctCJ_y_|-%1 z=I7@tIPbl(Vw0t%Wn)7_Vw&#lM!agYThwM>*g#}+uTp%wRhI&#{;Ah!MP_FEJnd(8 zS%ycffpLycM?%C--$&-_w#WT5vK8GHfC4T*Sys@ciGrqL- z(TPPYEVF+;nRoyEsl(09O}5F>RqH2hyp`4Y!fs?{+QuI%cwky+kgOhB{o=)i6a(Rw zmX?MTt%%=oBBidqs~+38gkxKh-AO`CK_QNnm9@LmIQy9Hv17+l5)*e{xpIYV!-oCW zHzZx)!}UX^hr=rQSnFXW6{*KwC}~Uz3ooX5=Vu0?u?;Qj}$uU zvIbr{{V_{;&cCmI6XS{%o5jV&@t*WkZ*MgqLVH}QTfMrfR-XR!kY_Cy*MU%OS;nVz zPdt8wr>34PO^;Rf;hp1rZ1-skUs7{(Grc9E(YC$Fxx1%F?cBL@2tKv*=g+%M4xcFA z+4AVZ+O4PRoyum_&z?QIIZFF=Tifk-cjTi5f87)}t^V*Ks_Dar*Z5<*)0de9-s^~k zxABU%vm8x&SV#-q-^F^@RVw4ZoXk8BCux7ntO+`HykZGlUP2Rjz=zj zB>(QkrK`Ww)cED(j;wbIq;__7*$#hwP9n@8Gjd>XP*_AHC^s*!$1(cO9o@WBZ~1oY z*kN$!kRU>iaplTX1kf7&cQiQxfy>4Z&4jNd6ervd=9DzJxMZc~p+nL6j~{35za!^1 zJ!V@>n{3V*f0^USyDG4HHiG77{U&Dj+}s?}g(?ZCo*sc~Ph@!Qymwntf?e9noV%8M zxOXsko2+c6&(v>hx_tVCGAFtT%1w)pA3wVNXc8_#o)mn=&eFrii(Qy}e$mo!Bazgp z(OyFv8=*Uj-p1{7mBE}w&(68)A78s}-FsxtOz(-I$e0*S-1L^DxQnXq({%eMN4lEs zACh!&aY;hLYkU*CwLigsE^M78^7iT@&(6srG)VpHuTR|Dl%bzIKQqZR>Z!!W#-@7T z_`yPAdv@;6@7eF|g-+%&+qb7VcU9?YYcsO+tiQBnf1D< z=f>_vOPV*v?Q$iBRN~mv$-~d=u?Cw=O-+MBLQx)sT zkJl{}#g*%K-gx@#+3TjJNGf&J`kg8(xwqX7Vi#?^w0uq9=O-Sh|LYl-ExU`)=$`uB zfIGiul*y_aD|4B`HtCu5;6Z=+`9*Hn%K9n3%kjAaeA9jXPw`Dha;=s>c4)hKZhSxs z|Kz*%8!#?ko|tD{XAmWFu)8xkIT_U=@Z7`@$q2}X>LkFi}pPRZ`6|$B;+6l;LY;M*Yg_ct*)j($3HnB0ghp1Kg zCtfc3(NU(nOJ4r{&rjJTR^3Lw>)t~=oTp_2j*ttR9<5W(^q-%zpxxf#Wb^CGv!((E z)5iMxyNDCURjbkj_2LiUr$GrIlm)(WCh@8TL>>t6Wz<(d2glbj3O+iXj2#RBGS!pPy`8T$;d2+bk-U85}&Asus+l z|Mr$_WpqdTZ~2dY8Th-ElaplOAi_S)+<*4q5Yxx3!+9CD@6(z}eZ95y^wRO946tc! zUtQzrbpMl6(3_s|YPP=q!0Co029oNw+c&H5Yh2rmY;(H)?(Xvw`uEO99YNi{h?E)` z74@yR_bLm&`f=Qsrk-Bi8XeV*d~wSD$Lzg_ztSNGH$3%n&3S;Om)f!86}EM^BkzU{ zt58IM-{>j80}KKJ0_)|^?vrr*_Tn<$yVIGTa;CpgfSF}_d{C70)5D|k%q#>z7Pt?! zuSLf3YE|-^@${KA5F6+cUdEDWUgqc0c2m-P72h5dlpC?J!UeA?D|hSauKe@!lZLVJ z&1NN!Av0zc)P*et3`>_@q%!mHh!>!^GpR1dm&Ymj9_|fiNlZ%O zD^x7&9eUY&Xk6j!&yTti54}deOUcMgXVKo)eS3Lz)YtytJ)`r1fyM91cki89;Av8OLk&rJP3Nm23fQNXH8yL4+77hzlO(a~Xg{rWX! z?sGKalo}iy5iCAnw_i_BvTh_p3YU7q(Q%csvU2fYZrAXaXWn#_h%LGoZrl{VX3L9@thpqMEQ!ezmadn`khjBzCB9sY;0{`D=N#JIC0{Dq2Wtpv&w>Wy@Wj) z8cTQY-fdyJ`&DcEd~j&!+X?A1pD9*$5us+LO|(`nk( z3EHk#vmO5PN9}cVbS}omu02vx>b|9SO2%WXx^vRn`aSZg(AKRHNQO6m^;i5VrCnRGVnsy<;GpW*FFS*R z4mpn@v140RcfHzRZQWbvXJ3rxvxddjZTruMN1s%`dZjUB^c01dq|c#_GRgggKYspv zPawKt=xn0j?DXqd@!Q)3y~X?b`}F~?`mQU_F)NlIK72Ue@v|Hw6I1QhE%~TgR`&K{ za}qUn`_0W0-o@?Oi^?!-^~BZC*f?!?_>7$UkI44S+P*#;8Cls>{Eb=v-H4o&)D2v0 z_ZLHgKX6CVZl|y$=<$EQIVPB-w_gDPo`emkZ(_oAFv~>W9;r4RvDttcIQqt7e%OR2 zj0+*P1Sud1IJ#DGY4LS$?Eh2*6kzsDpY7Q7skkn`A?JsuPoM7Y>N4!>^0uvDI4;~f zIEcpCA*s-*qY=%hFdyHA@bK^qG;p1Fg$y$dQWXlb&5C7!<{Odx`g>}l*?4*N%4Wyc z0jSl^x}lY)&ha$)^}gdN@tZvtUsMw%T3e~;IUEof8Ts2SPk!glnci1(nXg8Rv4TBw ze#rVPJq~%dZgG=)nK7?wY?P^cZlI@kJu55gw>^K&E)#b3RPCq?$Im53g^ou_op}q~ zj%2S>Ha9Jb&^=gjw9M}gsqO^z-@mU@RaMnlzImPXNjp0objzs%IyVoXizG<<``g=s zJ68P!@?RVPjv}(xs}$MRTJKE8fy4{#9l7+X1-zvdvm9gVSy^`N+SPyx^gHgt^sxJ| zbd>0!G&~3Lqdwf#HuT8``ud63?!MzY>j2yvv3vTf!}!_Otl5h)6hV7ocmr<=fAU10 zw2DZn1kB>AM?(fTPmj57YIB-;_9>`Kai3~rT--j@v4#`Ot2ZbJ4HrkuCQxmjMvZ7;k>m8cx4u{rlfZN;%h%mqb-59CF`pXyX!>`)Mbs?Pl@nZCD5<&N|iR$AO zy?;MV`0??9j_Uk|Y*c6+4h{|%2D=l_e5S$;3=Nrlo}xGOsv73#QB+smcIpimVMEk3 zGIzGS*Cm38`SRsU zHzJKZQtY=zAd}JAEr*y}j(-JBZ~zc%G}PI{gEPm`(Q&L+vaEaE#EBe3E(YJ1Gp#?{ zd*-%G?P8o(H?2SGW98G5j!Jm-`@1_j8rL@nU@rt+zn)GSV6adLe$!f6Iv!wDxOcgv zvp+P@ReKK~Ibv}3>~^iqtgX+yJxU#-|FpAxNFRS0UDA;> z9rx)xkV|oKaoFwK>!zlr^dB5~Y=dIfd9gs~!i5VJVqag7UcP@!Z0zCEx?cZXj`_e?^othRj(&fe@6@4mOUYMGO--!< zRYqx--<#Un;PpH_{m4dl^^?^DH`KQ0`RC>3Es%E>6;vPB>e3>A2elS!0EEPh453@w zYz^YwP0>B5W80_Ry}R+2vcFR7?b{Le?um$-mzp`!*GC(1f6=K7Wh!?%@c8rxs!%VR zs-wqH`|CBdI_3ErxXzvKiOI=(5n%~4<>)w*kr)i`$hj+>J6+z|dJ8}4N2}w~k%B(z zj^M}g@{M%^B78NYy7z^yr|+yj?ImzE!|JwRp zd%)S@=C(MHo$<}ynb8ZOZX{Pc6PLb${VqLzx%CH zcKi8JTm)oDj^u=4J#AQrFd;}WVc^9!Pn3$)a6WIhqKTJH9Py*&X5{APf)=`ko>f`y zeQRrLNqMh?hkPIvL`~n5U!}>Uf}pBq0w9k8v>U3bUTSYt#Zw6Tc|IsGFmYhOzWB8Y z7QG4Z^GJ>b16dOQNJih*+>Gp9uCA^>+6pEA47NpsB+CFpKt6@@H2rP>W7~n|95?hK zLD#OOX-A2K6&4mwmfg9-1I~~0M^UNAuVd%1*c#q+qw?RuZAGM|ojQO0kd{_|p4<|u z%e9~2AkrQ^lH!&!Py^EBN$)L!F+j(vZKn4`UIVywBvB(hdnL zOC6QDtW3d0H=^~dEd1T>l3?ucK5eC<*YC|8();m=Ytpw{ zEzL$*sIBbw=$Azn($>_xD|+Z5)dAo`^vvr(J;nG=17v9=i)8Y;!C$F5(FW*VbKcm@ znSpo|1wMWEL)Yf8}U z4|*BEhPQ9uX5#UXzNuJpE%IwJYG5Ekf$}T#e?t8H{B`3OzFajo@T;4T-D*wXaYB^j zgkP<%h{OEctk;zowu8pT9Hhl{{PI+5(b83$j~@L@Z@F%}y(XHfDDU;?K*!pg%ECfJ zL%FU`ezV)Fs~ZabXSqszuMM!Ui+}CI_^}huKjc_ykhbv8(C6FX5fP(V)+RSzlzvA9 z{fPXHY&;=w%&3Isf4?NR4>yKfuH4eZMU=pt|3(njqU)X>l%eOrW}USo;-PxQCy zA#D-P?(V5KZZH?0G^A<{y+t4x$@xr5O-)Z5U?E?(w8Y|u_i{>@-NkdN2bjXHzdH%? z*#K~^Z}{6wHfClOYi#bqXv75Qq`O8F0e-Q8N`%s)k=83G{(QmZ>U5&)bsK+|kH^Hl{;Cm$P4 z;c^p5l?-&*gsHg)>~iwdsYU?9ifBqw+y$TC-LJvG?4J15k&aD7$k^s)lh&tRTLJxk z2AgU1=zRVKG9YvJ#UF7qLp}XsTH32HCe;@7Z&~a}YT;Nc4RBGwNf7im_?nI2q)WU;w-DHbED>oPassea7Hs&Qy?gtyQ7kg= zt^PDOIyScX(4j-pBZHQ;xjq?E3nXN2etrrOg05a&3WWR~3x^KRb*yZDHv7PYCGZu9 zz#2!K1_h!I_P}<0pv4U^ zkt{XRC*dzZ8|sM3c7eVHLw(oWZ|bs`m{|44k2igMeQmK&`Tp~M_w>fYBh;H|NrpH>?i8y`4$@G?5VA5EDTJN#(65)z7vJo@_jXPlkSqtiZr zp6=<>r&5UQ&6_t8=`(I_mr$bj>FSQmPIPo(pSVafhP&qi>Np#94BmS zs92zH=qMQkZ@?z&>OZ}*^BHNLtYT_yVHJ$ibp@7=oxz)g?j$XB?2$?z{1 zmZ@0;VsGA%av_3YU#tuR$b?Qsr1onHb=L=EWqqf|6n6dgHpeQn?BBothIZ9H!zhT3M`i6${SVsi~1u6fz8J|&hD}>obzRAhSZrVJUd{tG| z@-JUDU{!?7Oc({4NED3>wIh;{9oC!MWp}T$bin?VUTo3~`(be+#zH zruxPvH%}8H_l=-06#Vh%0 zL!-M~`g8>m$(y5>1fU0nIJHOY<}o%iieJV1bUkHS>y-uR+`JVkZFOT~ZgpW7{V=~( zG{?EIz6R@~T?;DwfR5(AeVd^|D@5-W6O*7Q?>XCwBp3ZxZjX{nkXH>AJinmj|F2Z} zSClyov^hBx1^C>Y&d z`eRnsMW|J{7VJzty4=bR6c*B|p?2|mcuaIU9!pff{h-ilcb5_qgIa}R03$2_k!WSq zfgX|}(S!#DG0I}6R$tk^U8iRI~kTN z35SXfL_$b0VF7{E^6_pZ2A}ykUjhSc-)}*|=de9}IvjNjt%>%~cC}}#ao-`c@NY0g zqx(j7-vYf;?g@+>dnB_^qJg9?Mu zePeNCV^(sF@8T78z*sij-M#1>p=Vei4_`yGRsH5o2$GQPn;7Y_;ct7LX#iFR5Vr_5 zvkefk#Czi5WCxbXwlatv)P=Yok*%#Pjkh6kywH}63iAc7R~7y(j* zf*>jjn4LhO-bfcfQML14pb$%Zrer=ByIzI(!T4FdMfhZrKd1G(ckkc;Iga~gV^5$z z1y)T{ODhmWvIfMK&31Nn+=}NQ-DX)!6(af#fmdvi4hffbq||4Ztkajcd&Up60qi$= zd($L8e#v$w&(P#>71;z(=;D?A_tzyTrvo!L2GA`M+O;bd<$VuSuB5B0c^g|F+mU9N zNJ|Y*G*riiKFbr8h-a6}r40(6DuVmth{_li5wV7)=gmeXbY6dcbtGUfl3p_r>%j4I zap*t4(*`@v!tBnqYpZt2$q^nU69KELx(yB;^1)~<0D?ko&|+>o@nSJD2AD@MvBdk) zM>N)lX1fTZM(`8WrD}*61;|q-T0F(d+FGN-cj9Qy<9WOGrP%7mH?=4}pLGuT>}6fE zMhg@joB$Au4-NT4)lLU)&VBs2=WzPTWy;xjC{`|#6yH;^N0t|C&c6OZ=<;*Lg@iBq zp>ma|pTd%VnWR>e`eJ;cs5dlf@etkri&1J=E}&A((|9`lVo-zm#h}ttO(;Ud5RmWM zyZ_usCj&eLH;@2;j~BujQs0u*8}?z-ymM_M=A{P@9!$1@yaH4cf_ko?s5oD{_XKXG z`H;aWZ;If)7!}q))uC4n4LK>d4~mcm4g#wt8sHS72m<=5!x}=EZce#Z1S+Ei&PV@F zJT1`v5BT`ZMB8m6H7!Utc8Jc+**OvY6}9Z3QO(#(@kls02nr^_aN*)bG&EpOe0+R@ zB45EffP_Kz-zjMCbaZrX-`}i-f;z?e_7n*}X_5ErVaU12s8McPwb_A%h?*>JT5teu zjFw2$TyGqtdR5iP_&Iky|EW8&t$_SppEvveUUU_ zI|8$@n@MXyj2Qs|6m$@5N5Q7%{f7Mf9zGFGQ`0BaT#F446o3n)+*Qei1upte8( zvaMUE1Ku6F3{twkbULw~2dydn7Y#I6TfS=DGbaNA{O$;9wCt`^y^wXv9Zwg5jf z0bvC`N3#Bba(()9i7c^t;exd^G_Jl&P}W0TO~#fDEJFUN2ZZZ(v~zHnjWVspC*?>c zzTpVK^&~?Ux&beqqVIGhE?SQeO?YJR;suhbVQy}IYnSVm&u9=)zp86$f{4(Leb4H= z_0*fY;LEB%pG9+Jgdb&pwjyLSRyQp2I zTQZHbp}K?0Jbe5O1R~{Ii&UfwgBAdbVXQa>G>mDZE=i|cqET$8SeKFD=Bj=Ef8d3T zk$JLW0q2$KmVR!m{AGPJYUq!9*}?Wj|J^+~MXEUDlyJu-zi z|7uKSdSF%z+<;)M#`eY2&4e$2w4eqCHB_w2$i2ldNCwFd4DD&REMmTnQT6cL4A1BF~FNwG|OG}k-qlBfx9&bdO z*N=*cK1vX`yAkLK3OpT3phmYR>bR_J<0Zg0O}IhL%*<@CbmkU?;*_#(*_7joJu#v4(~zU6ok_W zzEq|=%Kq^rouW+fQJNl|*jrOwy^${o6s7)=BiwM@MWW%*Mt=>MF{?JdiR`)uU!?=~ zp)XEBKJISW0P2iXCGazM(7#ftHt3q0Qnc2<8^{MwBw;Q=$wZ*-Wg$w>>z*5*OW{{S z?S#vPa50by=J~;({D9d4L~7iX)x5Be5c4F+B14E|s@Fn7RuI12%hQvYr2#ywOc<3U>4*k}TlC4#Rz-I%f*(nBzy_Lk^m*g}#f`T2g2j;<*btci_7 zQQnJW3V)PgDG_3y=z+#?@$iVYi$-k#xfkRBM+`9l?r{2o2*QFkqLUQHS0Sk+IGras zN0?iJ5_<+#71Z+CtTrUZID*6o%>!i0PyImBsD!bV1@WhBud(^OBO2E9X(7G@+xi`-6*=s~hwl>)TCSWMExF z0rl|Uz9u`Ig(r-dVqCw6G>;?kmiW@P10)7JU~>_B7O|1e4!>NT4|-?M5Iac=_MP+x z=zy0Ru+RGOeZ+@LOi{$_OT1oa8*0|TNb3y~T{=X(_n?#r6}s(sIR%;k{vYgg{)|&%8#jz~z;RRWVB6&vQ%O|*xaw%0&y)+H`^b(; zNZ_Tri`G93{1i)17H2bPAL7x2NeJQN5>GpcC}0a+Owlf3Nj+#`!PgHBea{~H{vSVT zW{E$B+#X2Nl!p(G?Gc~ZzAr}Ve1Uz-9(V?(tq4y?ROfu;J;{zoKEtCmT(njf^WFC{ z4taC*-3CCk zOhl6rGFZ6dD(pw&G-o=fWbmBVqsvg@c7+a`-N=D=g^X-wCVxm@Ug)r0ZT`_=D@WKD zM~;n+>8a*AX@S~i2{Tc@LaJi0=Zde^(20pV^xV{M0c3JKx=)QAG8DqlJ3Bk05G4FM zNAQ!$;pYrrtx@Z&932U%fZA{o7bUu7dV1RBkRU;#HGC43`9;{*#CeynM1tSw;KAbF z>uBTle5D^&hJw;5s!r&Pf!i+@eIgh8pA8udixFxe#8S8A0maiD8wNlA;0ad$?ps|pB!hC zBJ0IfYaTmD_ofb?pB&W##2Vx8F#F(JFK z9sr}UnN{ojd|ge=bkKvp+s|A3C|!})=&9l|FolFnAuZjloz51^A1}c9tN|f+hU2g@ z{kFHm^ha?~Q9XpH4A{d2>P7zEUDxfki%cLN$N__D?I08mvdn}oTL4NDQG3al(7*O* z$5R6j#%8$199fr9&?fOA{lO;TzPx~h5~?k}?JFGHJ&Y=_RKk2snwV0Kg$-aLO)drL z{$JsL@<^r-1-2uDJ`+%odn$=;$RTE;0vsmD$A>Wp)aUgO!oIrRTId;!PelgNKY~~c zdS@AhML$Puv4gFx7Jw@B8xa=DkzyACQr1cH4mzx|(Q)Aho%Azf7}lfspvEK<6%Zhy zrM0yYG1HF+M!+mVdt^KUb&lyy9s~}8O-U*~_|PnNVIT%=H`rs5KI~H3T3VZwmCNY68XFtICh5-HiHR}k zrdi;bl?pu1Yt$)Op0ybFCRGT!GW@{Tk^me_p=0b8aOlV+#x6^NPOU9WBwj~uv}n%3@Wx^n9Hu6J4yf9hea8Kq+|%# zuY5#R{lBNY25aS23(u+XUeg~6)Xp0O0%97)qUskoPvHN0pw-|=9q8tmKR^!!Hwu7~ z27(HD3rEj7Ni@&kA_|w5FFc!?s1o}6SFf%DJ!;r}%t~A^6*hQu_-cUagh@c`il-Sb z^n67^FN&TtrZk1pZRUf3!cKHnXp01;!P*^IM3^->4Wu6TA5DQsJQ-y6VW@o|gQhvdd7vm}r+;Q&ewPQezXT&~gm=TI@xY83OaMoS zw8&+KuQ27?jC58Kg3UmGYGQ>u{4{-lTmMK6>;b6z-TnQs)=w}J0n&ukhGk*Xw;23j zRDp+o!A~#Vn_xbmkgOP=!-I65_sMBSSFoE1U5qb*qiy2PJx)+;Q+#1S9d`h z_;I)!v@;I=eSrpE@aEk+j%fedL1anj1%~wEWD9YLB6-u90LB*E&1r9DCA1t~UuTbIDD&4tG$z9yM=XTI=PA_xom@zW?Y9w-+;kfyQC3yz zg#vJD*QO`>b{FxZ3k=4Pmd_OfHPf z?0~WGo!<7qyeyu`4d{p?g8wd^njtn1v{_b`2BcKt>zJCmF4W7oY3iuVHS%R*JC0r3 zl8wE6%G&xL#ck`7VQ?BmA}Mwt1`FimaJb=#Hy3$QJH!27>n=?3pY9ig@3dr+hHcdZ zGv7VaLW$YQuIu@}w6jQiV$V4is&EwSC6uSoJ3eF@YRo)4J9y+bSW6J%@3FB-nkFp9 z&Erf9QCxGyz>tFLz6rIxvcrG&4<~Ak%FG~CF*Ld&foThi6eWz0gld!M-f4w5+5!$MHz!I zB*1g{u;FROa?}}pG+|_Hj_5BCB~sGHf8Dlk$vugtY?jrY@E=tsEeG&gm!6si%s&ZZ z8WpeALSY%2#{i0;gTo-}X`a8Y(?89qvQ|;?nNQ(5Zf?Jxn+x)1ly2%nid9yAjjwQG z*^&{ijaZP5hXz}jV*bV;&wnz!oENax(wxWq;CbmPZH|e5O*<1>-&eNylL6qiu}jg? z|DygQTj=3{2uCkU$k2WJ_6doJg}@|uwDIlV?dlAUcZN(pVq}9%qP3R!E1$$9F?s@` z@ul3xC5Gh83wavLtrneHT{FN{4z0C%_8F$BWTd4N5iGZ1(fpeknm@hVI&^q9f%<=W zH8QLtwFR0Df~cYcqgLimk1j-#xoNB-L!2obp({5i#1P@@!-vP^MX>0=0=PyQt$=w> z)ZbTL-NUxfq1b(T*noyanVS>a4p6kS7i(aidHL_F^kS76+Oeg{%ogFUAR#Jqi!KyH zztq25JSqfyEd5=&MN8_D@Q8~TZbne6co4?g!`;i$`f%Z-lO`@f($PclQS_gSqf*gy zYVO^ee9820bh(hHfC`01h5kd<;fObH-&WQE$!V9^E?kVjy*J|&dZGR;e-~Z*WWnF% z2YY_|We=*AOpG-Dc zgMXX@uwhYk^NtYCS5R@rcl~=~{Cy_IH6w418MpsEJisLJ%%&q}hlLlqbR0IYprt9T z(^It=XsaM>yYD8m_1JWx5|qQS;od$q<`*XyMYBsv^r2)jEO;_%7E9Ex6@4nBxV*cs zA(>F2xsM)g#7Hx|z{34o{wA>^na_)2!2P1SG(9lg0c}uCYsb|bnNV+YlM`$`?k}>1 zt4iZ!j#9S6cLv7%0BjKdqbm zBr}F3AJ9_r2Yz*4>uo=|)#Jpmi>dmt@eF20a12J?xUta>18+vyFO1cNrX`c4nwLUO ze4{23h1pW^{!@>#`&^fP9nCZv7Z(a$vyaO%b#JT$B_bk1c*_>`86Kd1G#q>wb-8yS zanJICtV!qHqv_+LllJ^2_}1efnM2Eg>So z%_YhcG`-<_(zP7P>#!s6$!C<7&HUOf5fYQsCCP{k*J(QY;swLT+CxK}+c-l8Op}Bd zjyfyvA4;{{m$R@*6AnaLTxH@1;0l1V#U%FbvdhtY3hGtOpPnD0Uw}4-QX~vTqe`uu zI44rDI6VJO&;Rg;kem4DVH6Q6CV>W#YTnrTIgOW?RE)EV?vgB^t&E#E^h)Op$sz*5 zul|+k|2H2+f)q5>f3(m4fA=xMqYPg$t52p!$pFNsPth<@{7v74`a)Eg6*}@@oN7V7 z19!V|_3G7R1Qx@2;f~zo$(RHRzi0R=SvIo=H7o^F5wM1C6-So^Oks;kFYtNWLb+p2 zHfUTqX@FAHPo^}{`sC=M;U`lYVBKmawxcZq%-%A$UsDs32{|gG^<$G2wPS0CHMJwJ zv48%T`GfkL%Yl;GV!hwn<8HgUI=*{bJa0NAx!d?QWm46O?tnTx--}f*0ae#H@GAN2 zgbEEq9cEZ}tIoVs{`bEf6?O1lDc#>MlBXDuz>is>E30@!=HRhNgTinFEQ!|U@a)Db zm;eS(oCGc|?BPSloT`N{+_i&z`x7fyPEHE?994{(B)HR11#gNTx(2&cKhT&~8Q2qeljZ^EcCF$+RufRNdy*ci=~R@m3;H;4aBAD(Je{Fk)<{wc}qrVn6*b z83F*#Z-k+-4+OXo<}6$DtXG5dzv=d#5$6fm&PAahaQ`RkW$tP&2VZ$&I^qg5Xi&iL8Y`k_Vm=3O3kv>oPn)~StcG~~Ch6b^YKG4xuvbER1{h>hm?k+RL$3<$8-+D+F=i0< zVkL6E&ow$6Mli#o;EW+di%`H21=yP4RXXJ(Qy??~$f>ER82}jol09+X=vAesr6r-e zhbANsFs#(V@kMZoX7pvAwXS&dy12cMzC~GOJHq2~YN`PIkH7X5cA3I1VjecIZZYGo zkrJU1bG|~cQxFi{W~bd-i(Pv)${5%M=5)T_{A09ot;{93BZ&6KI~X+`UL4G=r%DE$ z;W`+5xmuZ7|9PE0tp9`^m>%ojOXvttfPUMY^j-X47hr=hvyg*s$V@&I6d_sJKgzG- zce!4nQVAvn=$@My)jNkTK0hd=J%hIrPZN%GASa8Qs*Be}&nqtF0g@42e;5&bxw#wR za<_$8#85S=G*y&zEGSQCBu;+b1l1NEjY@8s_;)fBNk-$rMUhF5dpsQ>tYS*+RaZM? zz~Tpqmu3_zW412ANus5Y3^r0!KuZyUXae}6!b zKwaHCQwr%xNLcs;-jZ~u7+rc$I|$4Aj~}bx=e=?3)^viae`djEK|ykQ3r713tCe97 zf(XKcA=H_HW~fv9Zf{@t@Zm$q#fvBiH%@qGxwhMz#J@6bngk`nFC?Tnb^&e?X2b?Q zB&tnM2KJ?Z>}<&br;b$=&)I2NNLhhsD!-uveX6IFqsJ%%->cZs61IRD+P$5fyF88| zMm6x5z*CI8j?Yc1@DnYDA-+<20DX4Aty@PD&$X2a!0~yaxWfwTdl5l}LAdMC!oDFr zP`Z^51hdYYyLz8H_oA+DwermHB9KYZPBheYItpmXi?HQlQ2!zf*X)y?c&&gd8YXU) z{kyS~Ew?#+Sr03hP*}TK=+Xd?5jeYm4zeR}sE(#4h4LnD*BX%ZJHZKQbOndgbn<&Z zj=A{byCsJ=zCce~`vvmjx4u3q+=Ab2HS{}q70;0jN zdHDu^mX}Kzy8XJ*C!Qk~7@1(lwyIrIM^p7nU^9v#-;a@4uL4fwwcgPAVWW(JiT?NL zFW4b#P!429lFV1**rr=@?)N;+u$C&gOF_(8Lw#LE ztOQgl;kPjJOirFCexFUzgiSXtE{=%QWLN$Sn$Y^2$4pR;3 z-)o{J^>Fr-*OpSYg;j~Z!-!L*_>%lP+AtEo2@Zjg84lTT(|6*d=vBjWa-@kl6V^Bz ztSweFF?Z{xl*K{sWCmY4cH?M-jXQU8q1>=9x`$AA28Dx?wMj%od+020@TU(SDq&Zx zDA4PP$IX}xVS~J^tz{Y?ABTOD*}(^cM_?hOp*a`7-+@C(0-im4)-J3Ao;2U?(<+RO zydG_43UX>I5GPZ08O@#5zVM*}%5%aP&)kMa@Cw%MD-N5PzE{i?lzsHc_G{oUIB$oA z(V<{sTK>Skq!_qSKpu?dFQSkw4D-=f5H^$ml##uBVaLIjc^$jT(DSjO;W>Ed-`x@q z3+&sAr0Xu-04#n1r@QjpWO6eK`W5ol##EgI9@<#Hp{JME^SU}cdty)p$NC6kA_~v= zc!N70ow#Ubf9lj05#uZ>#-bHaa45qV#oE1h?;=!%x2~;Q<>l950^mHJm}fka8WT)p zA+YgHXerS)dq}>8KUV5=lTh*dm*~XRK-6`2GOm&I4vC|6c((l1JIjklrGmlD3K}P6 ze}d9VQ`0sD#b><1FjVDJX&+tu`%v&7<>yzx^f`j_JmM6*SfPPmg;!(f%d^{pbEX)M zmWG47#^dR39NPuKVTHT9dz_*-2c;q=PuMW6yuN-oVg$(@1dh6uRf?I9zN@nL_zFC; zt0g6JXNTICZ|u$Ad}C{x#=(QKdMxk3nr*XfRDBcB`pkO=j%R~|Ca ztTl@=jYL)*(~DQw4Z)*6&suN&E*I5@Ci3vT;lU51fCRt)fJ8I==T9XNT*c0+7u`z& zU~S>FGX?RfY6|okjJ(5-RD)aM!bwD=+#`}G6~uHRI`VZiGl+ks@KrOd*|SQJ%P)bxJ(i+#!O-qymFh^k^T)Ks+88!3iuEz|e3=9y9A}2O0bdVr~UjA^6|t*dF-7 z(&1lBbNIL$nwv9B3m=-TZUO$Vb}p9RgW=PeJgS{3y5)! zN*&Hqf#LVW*+4z65ZVL@I`H0-JFn0tSVWaLIUs zh|$x~(3S87eC_VOgvlA|{0F2n-CNt91BTFbj}3#$2l!k zT;$a7@G7kD4Q%y*?VKbeVBO&d7;I6V9A?u{Rb33+iTUxx@XLqr$a7F`#Kcgs`urO= zsyyV(!kjb$>QP=^?V1ia5nzwsTY)#J7#glY>O#cNSq%Y@?dA(BudHOi`XlX$QZ~xU zvID4G!BfWO-qKkYw`!0f)(F{@`$7HSaOhf_>P@roCnY&({8tJaYIiKYK@DEx(wWd zLPU*!1ptkkTm)~k4g4eIqz)IH%(EItym3h#58#R;CwmEritaTxSFV!^SR=S!g)i(Y z#@68ixl#=m^GPml?wU%(FV0tvZDwJ|*94aZj_Yv&*2M!&21@{TW+7fB z){Sh$we!qI&hrFETlmM3t10ay*F zRFl)wFDfd!BSun%3*n&0@D5D2Cr+OZ0utXOEW8|I-4&dOh`mQo>CB(fQ=VOnE5zJF zx4c=u=?m0O&)?n4fr-crAO=}Eq1(Q|I;?=-b0pLkUeq%l9va9@Ij28}Kqg%RS(|J^ zn0pM5`yg0yaGn-CUmSSJMYI^|*EbwJK7`2V0yxcp{tmDH2u?-A;XB;q(37vlXTH&+ zDJ4aE@zSN1pfNF%Ba4fM$Vrar+_ZhWu|g@zr;3^y1JI%dym@G6cOxOJKtHDjWwsuO zvv|)&5fMhjI~+?84iy994Am;bywOIIefIby6S!g7+_UVs&h~ZXHef_wp#Bm9l30)bM9P8pBB=`4uZUpbQSz&xB%Od z3J)6_)!bZ*+_UP|c84JJ@m*+LU_nvAC@ltZu0Z1Tu>@}%2)}6(iU9#9XumcFF&&nN zonQoQ!}I25E?8bwVTJ?dAZ)|abAPn+58jmbn8E%f5O(LzoyVIWo$#Fcohnpf^-=)J z={1zkE*L;50l-w)tj8ftx>(+~GG~q!JA47Zv;-9LoSBq{uJNt2^8)NWLQP6xgPbi& zIk-G1Lk&0*&yuDyLdIvijGoMa{` zbryaR%%onyxGzYKJ`MpD4T391utZ z4!YFcl4U}pDYb3eB^*bV^Zg4(`tdTH1rUOnJPVQy*&WpGg1 z>vy+L7qC?@@Xt$BW3UXn0ZmmK7`yK@y8{n2x3(sc`C|+&k?t6S-1;-~FJF0Ks1fHb ztV9c0iAzKh7TKR5`a%g^^qZ~y0@BhfI5cM?A5fx3K3%m}=+ecDzv8=H3P=9@Vb3-_ zT#Dp?1C3W;BJf5qA$O_EL_|cm$oV)l4SiSml-Hx%Jz!w)0;YkAQC0vKvhBdVx#S>P z5bZWNJGn0t{iz7%@_Q`K$1N8Gx~NC%3U2HM24R1=)LP(^sjy_P%*@OrARX-_nZrg+ z-GFu=M&3gLQRe*T$3b`>IDqBOW0V1QB6_Shkl*+I6Z6K-bdzi0hzNw8|1J($GQqjY zq`ttv)RcADP{I)zo2(^T%uD1Y$UocbaI~2eW`2-sR4{lB z$jIrHmz%3PgtLK`lTwW4qP@NS>#tv{B_$>C-8H^>%I-hdz`9F=8y*{M;{>~;j>?8ta2C_~7(1dK!)&w`z>q~T36o@k1iE4{(z(PLCJ7uNu$xpstJkc6 zWOtG3i{Q4-={f7{{2UHt^iWrU@4tbe(cHJM0)~D}i3=|&#S|hhXaiCbkTUuAx0xd; z!j>JiZmbkm3T76#CIbKfUO>~vh*BkbOXjs}mtz5(xjPZ1l4NWEKI5~mrBEOvCCxWr zKda#(KEjC;KJ&8^xJH&PS !0-w(@@`Ypb;w0X`&V{8`vRXovKO@-8KN1^0XX8w z!42>dS;9dvIav7LSUMB9ob#{!w;*IoC3`f4Qe-!_qA-MLQA(+VAxkv~Nm8_tEoCHR zNXil;HJX$*EwYTIl+a=sNiise=Y9O1dHw&-YkoA{_x-&;pL4Ero$FlZ>7CPadbMiP zrjkE$QR=q$L(OSIm4t9;Xvuqw-$)uI{=3BPa91p)zP)khP8Yrm@%Oq5wgjq7cmAAD zk?%ptX=Fpydy9);MjkS|ZO##4^z}qs+%6JF{!xROvu622G|R7M)~{jm9*S0~M8t%H zt~RDHeK_?c_%av0a&_)USk%7gL2?;9Hm!vQ0?^HoOTDJjmQLlfSFZ*?j43K8xWQlL zG;bxl&zv(y^FkmJysXmF)cZAua!5TIcpDO0vtilPdi4tBPmY6bZoVZT$gL!N^-ngi zyVQ#Jt3K_*()afNwSd;14f07bpNcpTp#s5n2TY&R%}r{2d+LBI&$xEL%H2p6$S%KR z-;Od;uBfOG27qK#z08$c1wiBHoQtX3%iG%WTbK3|?1a1g;5PFNi*4Xs&m}0egGm^*|Y62%Xszb)lGxPO-`O-YDuueDRKvO2fwWPP|nRh#||D0fITw?6A&^j z=Fc|)CT4qWgz6CLoKuiT9V2n>OnkhFom~&YBG6mwLMUL(88=iwZXQ3i@8H2!yy2wp zG-Yxci#vN=-}GU+kMYa_UHN~|_9yu9A62!&aK@^sh=mA4XS{;ZkO5tTy9Z51K?q0i zjH)3In#zcu8s%Qn1yb6jL!x`~uXQDSz^X_mUlXZS00z(xH}dn_@g3w6qt^j+8DEt9 zr{JmEw*{70f)_&NBt*gE#*aV2A=y+(zzEI@*UqB!Lop9cVF*mRi~L$41LO^Tw4Jl5 z1CSywXK+-Xk=_^(r4Ve*m8pOK{svX0Buu_r6+Ch>00TKy|9$#2o#SS-;!eN7ib33? zX+QyS355{=78&`t5p{UBYEH%sKnbXq%-5k~4>DBL`WELFKAKw3p6#)OJ#*%a44Smg zg*>I??>}HbD+cSRW2SRO8;B$!k3>pn1T*+YTwE0Zu5G(^YI$RN^*W;Dx;^^#U|x^k zVa!(qP4P|F)9J6@+7UA)tw4iiH(UTgpRv1q+F^dj>i-f8bZrW%>pp&as5+&Gp@M3C zQfMhNqkV{g*IV9)mK@r<*AMoR2V0oacfy7a&>cG8cufZCI?`{ugDzlV1HZ7fT5#u& z^y*GRfKw?jK!(^}?Kp9wKXl=@ z#;VTQd3mlFmkc*D5*HBZ4GF&h3nQfp%G+Ih?Nhm2sKr?a6x*zG}?ciZHsF zE|AkrxfnfPypvi5{X)S548Z&ZKXM=YJWz2Vr((At9+6Ak1JVsA|(%1*I4%-10~1(<}B>@;}e{t>fsO#1=q zg%O7z%^z!nEe!Zm04j$(=qT?Iay4hjG03+*@Cs)bj2<;=)bT(5Xoa5-MGsrT568|}LQj== zM{jcbxW(tm)Vf~>-}^4X72T^78XB zXp<(Q96s(I;V;{**|+hz%KSeIU0hvUETFqc@aohy;`zl4<3Jb2#Di&&sXYbFbm%dn z74JpQI+f$U8#tL-KH}2nWp%TARp60K$1I5T3x8ik1}7yv&Wxd*w|blHTZ=W!v5Rhn z-8yt1ZmhBP(xB_q;cYv0I`rl*I%+xqF16mhhgM>l5srG`X3hoMBqfNTOhGhc2FjL@ za-zEqLC!F9*9xwIak^HBFxnuG+lhk$b?qQe*q4QA&%lRaFqMio z$B({)C=_t6tiZAN<6yXwGAQJ4sefm9A=UdZERKJWFsJz?(0ZCfKJ&W9roiF-_Qx%g zZzk4iblv-5*?JF;CYMxYWn~jn0P(S~r&rf%194Aa_H)(nG4D9%SC7Ng#vFjQrU7)B zLf;ZecfqhRX*n#JMzMrgVOx`vKMnF4G+;nRUY-gC2_J}D{h zn5#WWUX^0TcG*j!d=$gtpdoP+0<0?Be4V=O!;9TPksbGntu`ew(03IH9)8K06lBA5d?45 zv7hi>+PRP)h#7*3OI1Ag5?%>9MvR{o#^=qUubJqO+7*#NAhHqp>h^3S#O%Cw?V4@l z7n+j7PVMbK&2;Equgwngy)Y5(Ms|gq9|75wGkO(OSWeMbpp^1WyLTh$I>2}N%Z;8h zZ{7?~&k+pwYzHPo%_S|CD!%SXFZj%Escw^2oieyO^b^mqzP?_#*zB;6OUbo$6iMYZM(4v3N^8di1D?saUwglwWEcC}h}^jZcI7DyyidKpOOi zAbJZK>r@Xvw#wm{o4K2thuMD8`8@Pb zF49!Jh=Oy^asLaVU6aW$P~CqYp)tAU9f)#9Mg8dNeMei7p1Q61?O57_Jfy$xte%fQ z(}cJCqN9gCe$QA`7+XICxNvaqB2Jgc#m}{-6VGnPE zXVP)Llgd%WoK7M90QiLl{ofPJuxM3+9e(xb!qY)jlpN0Rpd5?f1nY@gzi99`E*&S? z?OTu7Lrl`1KGhaY0kAoM5yb(zy0H|Z1hwBA{rhi8XNp4l{baV#yi_WZz{i6iyfjj8 zADy7Wuf`svtF++Q1ya6OvETAGTM1L-mL#+pVcJvr(zF(6P6_d>uFmk27kXm|QmLYr zFJCUHRIm}nj##fz^rpTvz3VvvYzBm&Az=X9qzne?-lIpb>ycWk8nCpnDIyqyhvyfE zQgNN2Hzn(FCgh@~cjb3g>P_Y`PM1>(P0gs!jgpcSs~3lI=%2J8krxL$&<45uE|t^# z+D)4`$1>s~>|Dubigu}?;O7JBl%6Dbxunds%WK{5{gfxy;ASbDLk1cN!Am~E_$;V( zKPK?DGj0{!@Ed)XahyiJGTlo0)j64t-SAlV+}BJTh$7a{@X{W)PJs%u1g@N)+KB>- zY4e_tV=iVN7(Iqdw5^MM%bRF?9d}G9y~$g@1g#ZpBXEXld(c(mWt3ZZY%4R|-@-7C zOEaC#;lJJ+z!S>X9LUAi`>l=}ulm|xBMs8eq9m^_GyY8}WJk3tTke%F3=96_?@XD8mllA1u z;_@+cPG=t)YGQ2cXM3Q_&88RlYpxTDZhe>S(J@Nfwr%5hEI8i>UO{p&#N)utwtmqb z5vg8uBQEvG5YImP*n9Wy*Qs<OZqP~$SpAw_|v5vFodOfX|YZKFQ&N?uqGWrPw9J~u1zJr(;v&Jp)E zq>(;Ga3fIg1gtg~$DiCq??J75_aC&5!bNxis+GH|4(;0~_basFI5ZBQSAhztRlnu= zn$&k(!v4sSCe8&qby0((FKn6ot84(6oQhXe$soA$SB%cHl&K4Dd^$H^Yy=?K+S&%y z1TQ^3?^q|iS5~ZAbrOOCZCWq3HpHy;A({(BYplcowm)eS{=uT8T+}|-uV1$fUqeRs zX3GjBN*FOGtoQf@8uR&Eb$yFbYs$MLa56=!1!;3|P`8^+mBh%7@m3)OFIVEdyb4di z=B#IUccZEQbyFJew7D=sUxh{@!+OdTW@bXK0`C|QSMala^c&U6rrgh5*}^`}lgD&O zOtg5VRG(FN^-0<$%QUf=*hNApI9fRgpE8KFU5ra9-L$iMM5;t4;TqFO#Fa)VIVTVJ z??Ev+5%ncb4Q<=DZCqN1eo=^8e)cC{qNMGjMIn8UkkUy|8I*LC-Y!il>>;2#VzVX$ zviIlo@87nt0s~gz{7+19ISKp3mW2IfgW!Zz)J#~h0ClgH3e^`q2$|VG|NMg?Xa^nw zC_(;~TJ7cm`wa@VXb*$hqSfh|Dx4NqIM^Y-HTUexh@O#?V;OJ3hQrSmOIatZW? ztiUocEddP?j|1RKLbl)3PA1kE_R|rxAO>ZRe(#{FI*s&PZFph>3?Qv26TJu}1^F)k z>=-R*QpI^r8e}{-Del6B&QQH-PpPTrclY&P{51B@8d5c#kAuC+qN1b2NXuu=o()M( zTzsvY>443u;MZ)o4|MBv&v%uE4y?fTpzL@(yIMem5#eCIx%6bP{q-}|F=4S8V!L`u zE-r=)AbNkQ?IClVY;b7DL(ZdBFDxY|U0T%yAkPMUn?yf)Y}ngvr=XPv6lJ|u)pLTM z1*)s0R{4;O&)>ZZr!?(9VnkOmI8LRlgJwp*Ak6jSym{dEOrj~(`>06E4lUod zQ8x2EaWm>f+yt^6Xzs1(M@-NeH_q1Q7e~lD7-~Yl!Pw`9*KHL<)bYoH`0`Utvn=cA z*MjOzvuR{? zg2X!=DH>ZD!Z-%fbK_i#d1%lvT3+-M1X)l14emxFY@tEeVcOE*!;sSR6mL=3bYOh+ zUPJ3r`6athiQrB9{3}(D*xPtxdaFf+4pww|*pT;l0WvY8Ue*r}l|Q{VdC=9N6?gB- z!Vn3D4cadb(Khsty8EWGQfKos&%DCNEzl1}%C{dpu+DZ&S!Xg-|3LE?)D%i;ChQGi zMq(U_8rjOSFuJC=Pq%JughPU3Z0syioOW}-)vIH>89KB@kF#uBI{5;6`UdE>0Z$T- zx4&Y%g*S!N#sCs+NLLm9#6xX^T_RsHWD6Lo6J4|RTu4Et78nktT)XDV3@NhAX{_p6 zvSf)L4GXa1bKsFgJ%VhG=E9|_M++asCxg_)T)*6p(MBjiEI#rQp0 z<>d|?u1Er9Hu0kG*UgFcSLY^XrsDHrYj4lr_U7GoZ3eHT15`{2p|tG#eIBRjb_}eR zzu06D$&;eCvT$YHqnO%JkI@9dy@v}@w!Nr87~a7lirU?j)QFdFYmtB4xs$FTy2CGUV8WzmBw=>wi-iO7xj=kH3Q zPLk&(%@=T_&_m}}e6?Le;^z?mVUfzeIO7Fhn(8(!8oId)3xs$UM9x^jEnEL7kMr3c zo#4Rk#n6bs=;TyMRp3&6^AiiZ1P)WcHXvAJOx#`mRqc_|!L0|1x4s^sK;5oZ^%jir z)7;<}T2t6oNZBO;St#b#Xp#RE<65AW`fABX+nB7n5_;SIz>ThWv&LSA^+AJ-naVaE<63b738Kq!MqB0V)u?vWS9b#aLSdd)XjvaH5 zaP2`-64D5+env3o6}!0327Y)v%TQ!roQ4qW5SJDxp(q}n36HsGQBPd1+mh#!(oa)@ zLeJHswv0v1a>nuRpAl-y@<^OXJMf7hy(>ND@>Kvl`PibKdW3Dy9oS7_Q}|Ru0=~F?Y*>Ypc5&jt9NK6|1d_&eRUTIK5)PO{Y}=SeTjQ~bi&xZUOlpb zMWU))5U+v8DTBm*7vIbK$id{OB?<81pK)uH#~)bH{>!BcGc`^t6kWJC&)nS9!oniP zGUAfSH|o`$v`B^c16;Fkt1mo_%jtdQ6n26SE00atwg(g{RRfURF{WJ7ei8sX#rNya zh?2U%&1mTtP6EbCZ*$xCW^JBqAesFH2eh9~ZN=aA*@f-Z`^7cee@by%_!Im;aDHQq zWrtK(%LNOvpV^n5Dxci#dwe;UFyc)Fr%#vPiwV;d`Mk9UI(sc&woC^+C$K1q{zYLY ze~}C=adQiV0{Mv_s#E^yb4dgTs}emb%v@~uBazL=2Q)7^_~a37)aP||o#0IUZb~DlejE#0vGK4n9CJ+4VTXgeg2HNEu>y@(6EjLhc z{K6JabQWx9e=UYIrSD=icg(1&r(yHX3R09EA=$NA6QkII6VPROychp{LfhtHdu? zR7Ea+-}sGYYX|JL9G9Dua{`W8SXRo&>{azyjpX@qjC86UnL5B9YH0dz+s>8qPN5jo z|GpwUKDbH>{UH@6wUcYO|1K-0gOPY|gZ3C`Cs$>ab1!euMgLHJ=$6Co(9o=dLo8{Y zu$r!I>gRt^Y}V)o6O>%2PK6l#{_QIrQiJndJ~}75Pr+*J9HICec@UPVUixX}3&MV|+>teKKMG z1h9lnZ5mq1D&GuRmNt3i6^2}fcTSqxyestxg`Vds04f(DF?v|D-=SNH=g>@pq{Igv z8=v;#1%gC-w!?wp91ob0Y1|wO*l;9|)@xc!d0^F*-1SATN^a-Bt*TN+b||DY%uVI3 ziEN0V-{MT*Z~dZpYi3jB&7%{(i8_0Q53MtbyPdKu4|ZP&c!WgK1*S_Cr0hy;kv1{E zjWq}DaVA#_d@$vE%TIHM<-b?Dd7zi^_S7k1kp{0DfE%~h%&ybuAQ=4gyDT4m>9Ut$k27%jEK-585IHcpZW_-w ziDNMF@3~+$jnD&yo8L%0gsK#DZkSHbUAWpSSFWVI_{c9!-(1IQg~bfEQheZ0|FaHoFS}wv{cUt46=YTrUXe=NI9V!?*0Y>>iT~l=u?=CoaY0# zM4QqADtOe&`)z5{qUabPkxp9AIZ7E|%$z@BC610C--DMbxN0CT0|YC#o-4x1heZc- z=qt8mrojYO0->fewq+(HcFC+D%*77ZQUq=HR zkwuV)M(UPBlm$EZHeK;flZe8}6VXFZWBHr5eWv*KhIP({zE0Zmc@2bQ2afGXVKhBo z8cv0A*iF7X!6TAhZ^QNPUpouQvTK*h&JI0vJaPJRMl&YN8Fa9yi5PY12-6YUC55vH zMUk}Pu1;#BQ2V?K3BjU#n&LZDYs%V+iNdQj@#(UQVP0nnHw06^Ee9f>h(+86Bekh1negcam+JNn95+M@8Zi9mk`B z6G!qGI#IBDApaYHfBf|6He@Vm@WEohIftZtanvh9sZN$HF!9A4WZy?8WM z{a(A@ZI)=|kzAfju^!=F>o5qH~9Ui+p1y)-_TBam}T*1KQy+9XR3^y`{tX z#6Fm51aPA{bG}Aw9J}{7s0nsQIHu#96}axDL6QU{J(=~0xsZ(Y_#bjL>KXk zOpb`su6uW5*nGgG6azEn1X0ktz^ zR))Vv(kow^O9u*=o%aC4A@>7DS(Wy!gSCnGXx`&Ub_Iv~~rdp6+Lbb`@y&`HM zvd^sELzy`pj$F#o)4&L70A_+$|Gj7YL zL|c_P%lI0g&8H#c5g~_27eH89Fg-%FP>9Y~_=9OyOxq}B%qXuZ({o&|T`H$BRQd6* zRgVn0VXo8=jdj=FEm^fHl-!Xo3iC^?6Wvy~Yu~=>s$bXFmZ*UvBj4;6=`t!ZHEr#1 z>Zu#IZgrHo1|HExO7<4wU!}&pG{|F!8!Bk(!Pr-C4Oe67JP zKA)d#^sKrv#eEk8_i(>$xR+qEd;_av+IDqFC}aVvXub0+N}BzWha`DE(VhoTuf}ZEr`)Y*&!SlHU359Hnb-Y z5<4VTJ6&06&8)&tLx*;Nh%|#q2UzWfLg&gUa`IK zShCMP`F=L{RBDZYojY%omiD-N_io9RRazpfqCe z*)224 zBW1sFnuhus1OyexJY1TUu%=`0ajNa_&s~!MlPLD~G;ZjtrM2t7|9(rdsOPH`O{cWA z!C0tK+9F@B=N8TTF?|7rCMDX3qqXnRZUB&E!GlgD|CFl)*51EbyF=o#DB#F~N$#a1 zel+xE{PIq{bXDW3cL(@>Wg`@7om?Wsj~F|g5#}#9=hZdDksMy()+^Ep4UGWliRn95 zVAbFEIc>x5ejDqvs?lQ-c^TCFNp9|!LxWxetHXx@O<+ZP30RjP8kILnin_JAw}B$ zp6ZhjEgFVwv1TLYcOY~ zbdOsAQqSMK2^GrAV;E$VE%H2SEg3lEGcz&OuoQF;jc82U<;NDKav2NfjWRpXi6#vT zEqZXr?>u@mgAU)T6F5NJ;7+e&;!qTFGCl%()Vog~mQwHF7P#`=$mq$^r30{mVluux z?FLa^!(st5l^xT4hIATC06rKx1+JIoD8qrfSX0su7>%|WNMEQ<8iFX4HK8PfAN#oY zE_JTf_B|0VPl&wq`Fmk^G1v~&+M(AT3tM>f{_VO1zS}I zV~b9AZ(!v$26o|7qbG0e$(PNbWT2H^4mmwL>Kxy!4x=}OUf*gWouoU; zqjzBi6^hUt(cEQTr2O(sbz2ztnzhRC(Losc2@eqFF~r=S$78d^o)IO<_YM8EH8oq) z%_@qS)C=rCRUlMd-5x23Uu_`dN_J*Aj6ih~Z0VT<*S5*UZRADX%^m?6pfS3W$kvh? z4+-Fw$sw3zZmYNtAZ>l0UAWnLX#)ru!JAYX!aPeq{D=H`gK_cxLF91wIu$jk?U7d9 zpk@?w?vbC(AyB+&_c({YAb14Qd6kg@BoSEi`uyk7?a}0Y22;O{{v5n}_nerml@!Xn zr+#0*KW_fFesJ--=4)Lyj;*YlJGGTgm#lWJ{9CoFtNT-9NKT)}E{e0z^Z(P&;E+yE z{}p@pwhF3^d8*xXtGP6PO}}^_{i01VlSdu7469UJT)ye)mO2B2njBNQ1`H+`dw3X8 zjpj{PI>df^%CuSfdO0~c(u>F5?IPQUY;@V0o|RI5on*YMG6f8u!86g>*OpV9MaVvA zbu6S^+n1`JGyWJC{PK44;-jvKgi7S~rF+cR`}~5r0Lb+0f1=owUXou(!_o4O3zI5* zs9!jOQaQ_XuH0Sp&n2uctn%_@80zi3B40cl@ygPd zwwLvm{}%oF>k!_dh}nmwH!5*j%#hm&DE#g9z1EzrEK1pnY*7iyaj7Vot?1M8M%_5) zTmlF{ba<@LalYcpY4NAjK=Dkn+D^p37}p{qd*GPqtXb_v%Ab}tk72P+-@&u86yeEawyBBu zTf!uB@?3^2nPE$H*h2wGDh<05C=y>_W>8xq^-khaf~uZ@K@O2dt#98w07)5d;bOVD zR+G$nL|(q-F~#j#Vq(~G{!a%!ihE0+g?|(~3kbM+b@}L3QjMNIt;@kf@cKUPaXL~s zrZR;vhft>e7?JK^xUh2h(xp@hM@zMhTw1OA$BTgunktu-JkJmR{m52aW(#dPQ+9Wz zM55yvF=@2dWe)A`{#}D*kiFYegv41a@+kdQ|6Cb$tcfXQ<@1Jws@Y7Rf2bk7fJn?B zO%D5=261-@Hl@;axj@rd{io?+epF`|0)^6-|4{^Y?!0`p7ORBUL9mOfh5QKPIFRUg z)ejoD#@V?)1J-QZ9tz;=i_-_PJlV%ffx4ZeMXruHdQ2D^vK)NU1o!TfFMoT{EAIT5Lfazb_897LHo40fR}(v=@(hJ@7S@y-;VVKXX5Z{kaLfA6n@C3EKK(ZYeyiFZlr4oEVFo?DwcW>-P_;4g|T z?;PtF(u(XZIwpF>C@??RoI3kHuz51`nTvkK6!#!yjsWe6P%LE99QfRWK7_L?_nM)V zn0-B+tqlbrPaL&k2AXRquzotp1`a(UzA9M!%{=9FIBoMxqa5yP?m#jC$V0o97o*4y-_!~f#N(L5oM5d5LwZ!h?#r5)K zz?r(2TvTMjRA>v{$Sz32N=z*T*q2pe=`r6opMV%gKJ9IjgHIcVC`TBUGx#)L*Ji-_-zc!rXujlH?co40TOh^x4 zfK2u=pmjE^ox;PqpoD46y!aD-n=5SEES!?t^fP1BQAds>l&@^nn12sS5MF-MNmUuF z6Jj0}6aoPW0Oy^q@1Y|^=emEte#d8(i9i$DNSat2m8@I0cCY^R`-5ct9kcg~g9e|T z7WoHv{V%Z;2BKhxn?l&^&lj?G=Ox*?;tcak_IrNysqc>eiNbP^Jn2~fMz6+mJcED= zG%FF$k1XrGoEh@OQ)&y30O8st*@jX6B>Y!}O620DOD>(GKvfGKZ?^u=Ea}R94ho#F z?T&^5Uxu(RH&RSeZNqBjvFf91pVB$-0FZnqW7HAF=vF9~W!-M@HaH0Bu7F%q@POh38DY9bz1N>@EgP#0@asx= zS6)<3#ybI5DhZzFWZ&^zNLixY$cIkm4dXPeqZDKYz^2Ulu>Ilhs8fWRrhU1~RA>Ir zx%6{5$Q`lzO4;(+kV-o@WvcC39Uh?iGx7l2%%d5`nY!npx=zO2s zT5Eik@!p2{qK6-BXqd})GM|k!?AgA&%8c3c1%3MFsp$1RO|#DKkzc{FTO>`XfMc}1 zUKDM&9hwU^v3^W~U*-#65fgt;zs%;p9Ulh5JfMS1+^V{ZUX&WRw6aN2CY>i{PG5f_ zs+E_0vwz9SFO81|`AG|eaQEhxij_|(nq9kdofS~zWKds#V!DLppb1}6Hr=}6R#A~{ zvlNU!jUD^>Wb)URmVVtrndiOX?v%1lk*dxV0KzwVH`Qyow6ow&7)%g7?VGv_LFno- zDdPXXpk*sc7~pmZ%@Il^nX?O@!j!noEA#xLaOQ&E4y5mwQBW`|*j~NNGqpE72x|7e z#QWiN;HlkIrEf*U2|DQJkx4~c`=Tn4YFTg&`&~6T3g5GmF{@q}_51HP7%rd`&8Bb^ z)r9cTG7T`xDMgd-uep+mJQ*1h2vA#Fq+dSSv$`#%Den4{q{AceLpCPUtrO(&U<2St z2qJ)@NUu=ngi7BQoft!5daG61w2|#jQLIdszk;)hWj?_AeZreUAQxk_iXey$u76?C zfLFjiK*+W3Ha3$T8NR|EDJcLF>@&*m-?Y$r2!9Mn*iRO2(8`z)?=Ht>>fFE`Y zRjKoO2m3*Zzc(^+7xsgKnsoNfbSv9QofQlAkalPKw=b(?x>i_$yLX?DzEN}h%wv*> zw08(VLN|cuM@-T4O08nhCxAkI_WRT$QL*p$RneYi0zLe}e?xRT^#0Dx87ixI(lnRW zJrxv7@pbd|?L62|+r>%bBn0u^4a#~pQ4HMfAdp>(f(y=KdE=vlwnqJ%K9#MV&uGm6 zU0qN^@Rcs}tv2aZTqFTqqK^{pJiqS?e6*_d=Q*ViZBHSWx^2Q-ao*mVn#N6#*^!>8x0}QY5LI zVXI0d(W9WYQU!U7?j9^36dt}jR^4n0jndo}>8^>J6yT_$eO&BY3P}P@)TA>!NS!fb z4lTF8yY~DeM^3WsuxVzR<6h3F$sbz&@t3HAhR@HO_9>k0c{j_MYYHD#WuOghc9J_n zE#y2;6a8v7WwA6(5yONwkZ=1(7Fxg88fvQmrKqd-yb+1^PR4`xzolKv0A%p!&ozg# zmls84#yr~{76w!h#!Xs~)SfwPo=|~Vm2t?lpAWz&Cq+!GTMMskH2LcK_EsJCytFCi zGku!BX3lCa`zH?e-8vx=6?d=MgHuX6WA1t`=s|XJtj=ek0*~L~(j*2myX;#KdpzV= zWTa?_yQp3GdV?YgJD{9pgOxSM<}+cJ?4LJqzhp%smG`-Y3QN#&FhnfmIhF>v$il@=IuuWU6~O zl&Bt8$;mLX3fauiqy;%sa15K>KJ{$^*gGpTbNi%;Ln?{jiCT|2<3AdqJQ4~#_ajbf zA%Wl6@@E1;#u>FfJ4S!re0*|T%U)?)diyrkD*`ep+>Ti}dwz@tu3cC##Lm|CW~*5Q zErcaf2vV^U@`?f`g^aa0Z&t)hP_eh+G?IM#<5$3t*ZjvDuG77py+>@Gn04AWGXP-L zABhXh>%?aGw*c&A5DjqoLwK@de)r%^q`mmWTv{gjuy&k2@RB4ySQQwA9C|3;bE-7A zGEHbgb7=CfTfqD@b|9VNli$+pZ*ad#lR~9)-|9)-hde$s>r@e8?aAH0)~`w~%li8z zA#vv1xjX6j4$~!EW^vgzy|gqjQDn1-Kh*sVW<%h<`su)+INu0pm$un-PvOT%@KE5) z0aMv(_HN?KS}}l=S{pipCREg0;pM0$((InH4{@AN<8TU$R ztHvUM;DR6%Tb2wfV1!JBtT=KCHT*@!M+hMZ9gaO10Xg3u=FR4373(YUYW1I=X&+V4 zix~|2l-8@mu9g-buH+Jw2WgKs)|M!u@aFy|J)UCr4Gd%FV&~ux1}Hb7w?>t-^xxTL zWg#b1;c$!HwKOy?UT=PJs}~($*B>8mH%mpP0+bkAaW7qL(cIcW!82*Cq@A}k+3U1 zT{PFtB1i#l^H=TmdSCqulEJ~N^YJFw@xaPc<{xBI23_r^fZ{oUub*DOzW*(}RX;!i z9wx6!FAoX{4J|$M*IA$Ex&>CB*viz3!3}4vXHf2-!SrXh4xLY2mYC>cHMC-(l{ap< z#;=#1y=i@_ePYptL>;mIP#FB+nsqV`chiO98GydzkAkJ`&)BMsp)_`h|6R{gKktt- zXJ($lYvG>*eNMV;yZi5%;Ry?7$(XR{4M=%!6wd?!BdDrzpixIOG?gCS;qI@f6=t=Q zsVNdw;fC&X;Z013j~-pKThlU|950IYF@ZDrULZ~B$PR7ya{s117+6>lq^Az(YQk)@ zIE%ouO0Ry|XPD`UW&A9|okF8Sm~Bcp5R&dwFy`g#IjhOjeQ0FXb~n>uN~`zY!-w;G z<>!{-hO29DtsX33-Bf;6Rc^;_8_(C(68q^^SC=M(RjHY!C_d$}pUKa}R%Uq2z>hM+ zD6pgo9#&(-;SR_;0%3jZ)vPQnU3EIFxuN+vh9LSAVStMZY6El0EFJ91?J*nYiet^z z3a~kUT>F=jmBXsk5$%e#<_?d;^o9r`P>U!tRJS~3Z1Fo{DG+nydA)`pzw27xU$xqs zMY3Ds;u8CZMy27(=$?_yBi26tGkAACjFDUWS=K(q8vs_%;AR_1luRv%bfKuI3&oSW zOc(+TayeT<`IAX54a)@pO52-AF4i+#1z3bkB&;6)-XDVWwl}S`U6g`0iME-0=II_t za&)AOcmH@gE$zo&@jX|O!lY3|Ggo3NYGB)=3*64fu4b#SAVBnl6bH#j7;|uXngjt; zayFBnw|lgH&FtuF<0Yh^P9;j4k!>W|9kVOGeBsfeYuSTbzOpXO|mlt z#3onG@zFo8Z%^bDKLdi2JRjY`^J|Zs4MJ>eOzBE#mKe80GVRAdjdRAsC3WCYIeCX3TK{6F#@v%MV~!mQhd7c}9E`t3=})ju8(y)gmRcJGtH1;XJy%Jh zY1z%(3NX+vtUmZJ8<1y-Uk_zU*zM;WAdw{TIyyQ!7D&fotGn@6!oaa4^I%^`>DkK+ z1PXZWzOWy{jYmiKcI>0`Qst3zCF8$Re$pJEC#%XASzr7b3OEzL3@Ag9#<7Hck4#*m z+@Y4Vwwsps))7zI-iV5@#ZwRo6GMbUCONG0Rn~mjB1%eNjeX28~eLzkVBH|Ip^o#e5RF^O^VR7ZXp9CK3mA zPG&Ffn23nhw4q7EPykBQ!Fi<7b+l?=hV%zA%p0~Hzc6Z!&>7ar^yn!mmH2u2T)+X> z(MD$*rIp0#;QQXBRs26!G^@kr4@_@))6_g&U#+o=w74MuT(I8WDvq)!EC&xBEERfX zW#z&9Va7Qx7g5M1|D~uvXiuv?ru?}HiEb=PI|aS{Zj@3HWtkaq5%CdQpZkcS0cOU& z!KNlJ&noI{V{gRgZQ`v8Y@DncQa?+K<7<~*#4T@|Y{b8JD0sz@0H$XeLuOFQ3d~xZ zTJ4_dP~2B4`n+1cw2plHdKa!BpqGjl)r<@iGg8ZTkRj(b6x4W%2$E}?w(Apc2AHvU z$cbkC)$d9#t59f(FoS$wyeEmmSkL+jqv+zyD_l9A1TSWz`z`yW(g%N@UYHswsHmuX zGCl&WPc@}GxKiAPikJdIuaP*Wj;Dj-_*uN|_|XOmLUR#0&1irpNf(k@mn$^O{;V*g z4BTMqkmWvwPse=}$l_;@WXCEzRNAk4lYC^>WC^NYvT~n<@dy#0KJr5T(Zmp6`pRfNP*&|8y~99Xzp5d;<@;S5 z8m*>fD39V(axdMq)D>L zz8j?jxnVnU0bUvnl`54T+-6WIRgY`J3NC~UElKZuL#?fhhZ2j$evD$L{@d3rF~jIg zn_e^*flp(?SqCo3v0bN5_~2uC~Wx7oLJynXpSide^kHluOkt` zpLosvd6%NE9Y?Pbv6PsVUbah7>D2qof~0mn4u==n-D1dZ8r5_bh44mKo1E_l+aCyR z_ai3u@2?0l158?!$2x+W63Q%n?kIfQ*vTPf-S!6wto3dO>Zsj5^TENuW81#yez2ZiP^~< zpXKyTuM9JSOnNagzv%Gy<|~WxZ27lZ93K#7S$|Qoy5Gsu>xig^>w56)X06^iYxZor zt{8YMs($f8r(Zt>UP>HTFBC_wnh6dIf)DzC=fD)&qery9Q%Zcg=>4*|k9?5QZ&BKO z$rM4-QvLVazl|Xk(r)#)bDzd2*v#!qy^^yg43U~>JpR2OpXKYc zY13aLo*(t4yrQY-M+Yeuwlgj64K*p9)b{+2 zsv!$fpF);)mT759U%T)OGHt!#r;z-Ax!3fzLXfCm{=KiYI84Itij-L1ucmT1CIn!U zCG%VyCNel7W2YJ#8cSGTlz2rJ*_X~~btYxqIfvB!qb=Hq@CdvA_~=#b6ht3Dd{P+h zM1#xY%|nBMYcPhQjTxoKRJ00kMEW=!83a{1*F4BQS{wlPz`#|@dNVH00cxnEP#q(& zY15`n(w~8LPfJU)swi5ElnQ=X443e4=uV@VDN=&IG<9PZ!_>~k7?GAdSY;*o0Jrset?9X`!C#T(R78vGs8W!{sgh{z@>Z$N2ydMvR5=P13EK|MTSY z8*WoOqwNA*0_!0n+ff{Qd#xMq)BN_TyT2c}iO<1m6U@ggWJ424K59-B-| z0!vYfU9>)5FhLh;!IT~tm8>}QqEU0k6baVi*+7flB&mF>CUSvuyDw1uGG@aYa12ku z*GilqFdcU@2tZwTW95AgMUhMVie}^=;*}{nCpah@*Ttog*Y9nBcT3{&M{*8vy&8~%v|1%M1d%nUHWp(lgSy84{KkdQGe$S5Wx;{y< z`r@|lz9iMib9+77-9^r;)}>3vi#0Zp7aFTA`;>(Qzv5&Kr!|L5S-*)BqmP#33AK|L zW#7aShqLz{7KoshmdX~rN_Vf55B);2S28?&F)3*~D?XsON;{f2OqKmN$*0E4@(g=> zSGsa+lyzOXXpiWMf=qQD8Ycts#IKJtUZzxS*apsZC$St3GH~zSE@B};NH=2>o*Xd$ zR-GKmr6q$wK|j|1EkDq_k^2iNoQc{`RJGh$aS%i&GMy{C_Q(qoE8mA}nVQTv@TBhI zElYa69Ou+YUW&J>{+G-QfM)aP0;-bTyKMv{&?dfodkp|idN{oN+6fj7l?>8t{^Tb) z+o!1}Qu608MT_cVjQ%s;B>wf>Yg*)-G9~CQ>g9`7)fH8UE5Urn`95fdPop0tp#Pyw z4K;?I^${5{CgaSzg`LjY7OYXyJ{+B#7zv=hkP-cN&RaUTau3Mzj1a9R7Dvl=ms`~Hv)oH)V2E_^cM2^BW0A1N6;RL{Rz<0&l%G~oU;Y@i95 zyJ<}~;`BqST#5!UZ%2dDNBjk;m+<*g>(l3!yG@&yRScgVkB`r{Swn2yh3{fmcO(D2 zr?_o#4g%=K3H?RLMaD3d1n!-#|5&EukoO>6SN=3WI83 z%0XG>F>qku+kVsxzV{=Mk7PW0q=n=bU(barH{RG@wt>;`Nm8e=Nl!uZZ$uL7;d7*hOxqG#wovZ@h1*hseAprCJ%PB2$wPC?E4znWYJ5A9~(K^rm(Zg z+veMO1fC^1GGTiWQwfWU+wPnh%I)n@| zn)@jo;cC*L3-AQ>YyPDicmKFso$gef`+)4WF+cACWvv7$@{#IfU4F z0DK5LDs*f_V3+*+7F5sh$qg%W!~mPe`+-~$)WBkdsG+IpOj$!IKV!*~e(8;ssxJzuPNd?I$BVd3 zvk~XUo#&PTt1|K8pigGCsR4W7pJ`Jaem@@S7SQ4NcLnwT;jUl# z@WaQB-DNv;S-~`>ii&<89lAQT0`GgGPQsxZ>Srugdm5>6C-2Jan!#rnrx_A_RtFA{5{ zsklOjl_y<;v|YojlR*=uSF8*pUHNJ!I(?f}c6QGPY?@>jfqLU$-Qh@*{Av*o03+s5Tp zYv(D%%{j7@X&bR*;8h_)rIZCa*q)>C`v7g|PdWD}2)B}io~4esr*OwhEWc6e@ML5f zDtT*^?@RdRXNx6s~)N#ZWtbAn72b@&2fiowe4+p42qta1uq1KVDTd-o7 zKPc>*DK3b~D^CaUj6q^m^buZ_T^+``U4oMqY7*ILgXOb}y3?9Aa=vk+Dl}Yl*3c1b zLle^_eelC8>yi+FN{2?6AQIhhCxDtn@CxM&C`?eM03rp=k6VRd1@V&8S+ur0C1qU1 z*;`Oy^e&}EFM~FkI-5WUgOu6ojYRcw8=Rg_F9QFjWj2W2bG(zdFqmLd6Z2lt1(oQ=v-JPg2dwC!}p zj@81#Y5Drg(KjsT+dkxh18HQifilAw`O=AmgisFUsVFq(&uOg5oFRuy?4IQ=Alm3Y z#W!t%&W){(=!pYhopF|tr9gbeq8yE@w{CGqj@bbgM6jh8nIT7xdbaDsSAe|`q{I-J z_$6$LIW4T1^`c)a|Gagp`*)^&Ju0)?A@dt9mr{E|F&Gf#ng63+2evd$z#ki8T8y9( zyeT1r7snsIL(wMWE}Yoex2Z;p*0(4QNlgBnAL5v|l8)Io?#Er^h+++mTPw=Ms(~kJ zYHEbAqFl!5G`~g0I7K+hVRAIrxZvjSQxjQRKGmH&dlz~o7KB4Ch#4xvqqIW{6r!{R zZmbIa5B~`<)r!xF^4r$=+x}J;f1Z8ap^uGREOqr=6oeG2ez(dAd}cy$VA#psm$sF5 zO(bK5BDq12HMVSCDaOoZq)vt^vOj#M0U%^O|9ifp;~qku2t*1CLnpP=K7;#hC_Rq5)>xcjQPYmX;CuE=UzDD?H~NDDQSkucitpU0)SGWyI(e zSg%UjcvQLn5n=aI+;d1#8CfCa1ftJd^&nPHm$5K^?jh8z-NF_|R!+sO>4aioClX;n zW4wUOTccyEIoHLrVLU?)kTBxdB z!VsK{CVh)|L}OR)tDeud%()*-gp_s&@1r?U=8>;o3aN)nre9=gYukO^Yc1yQ>8k4I z)FbAUt3{G%-l8Ds6_u1;1Bt9~^6a_$ZpM$%e{}z$>GS+KgFJrwZK=`ogW(}%f3X=v z{Zuvd`A$@4c(v@;Qis2FJfhPH3^;w(toG`m>t0$gb>^);UPJn-GM9(zI-L_)6M?h( zrh)rZm(MedJjpko_+sCi4#(R=O3<$rYfZ#`t)*#byr2i_B=0Z(4Z&_^J92{&P8xyA zA$#+eO1*t#!YYQifuTnN8n!tfnNUn=iBh#Is1{#Pz1w!jD?hwp>m^I_WD5lahBGpY zSv75=pU^|TlzPWuwMI-JXh*?rBZ(7NivE1XFto;NycC7D(jEjhjJiz~A!9xsME`AN9 zr>SY!h4bgBBM@eDx*|UpoJI=s4^Mb76b`s=I0gF+v!u3DQ{&O-=*Tkl3+Z1%m~#3B6~YPP{yn~z6?8?d7A=~Eq^f92G2%f2 z87)8LJ+TzXcVLc&0RA@mc&0`p+I>w~PH8fEGp%OXrI`Npll}hur5ajVaT@B33!inm zJ{cA$2x{7xj|9pv1KbVJ(SawIcSqw7=B|4m7Q^$~h9G?cQ7{=DL~pPp)w&0$>AzcJ zpYElQgIow_sC^nCEf&yL;o;e^35BHDj2V{D!M|8q9su}Grx`Txu=#XO%jYI6H|}AF zKX~NG0}w;fOLLP+_|%>!iZ6$20(r-3n!`Sprzhl zSc8-BNF5y-VXi*Kojbw*(z6}AnfpXDz$o2R2YNz(YRF3SY}&U!q^Z?b~r^4DQf|@;(;F%Ms!h5G>TxAn`&QR62CesDgE$ zPsAlHB}ECYhIM{T4fY!mW5ZmJV}a(nC*f?2deK`u1qXXrwsU;J^!XKW%+%Bh`*9p< zPBTi^H8kfBE(?`3Yashm`;Ck^bVx6!p6gOwRVA(yCmD0S_vq1}eKSot8deBrWF^QE zVB;$WROEhQ;n0Ue7e-DnMLs+$S$m>414kaeK?LbBEefdQV0C$t>NVrkY7XhoNcZHU zAh!71yTM-;-m2@GH%_^7tItai*Q-43Z3B3A9MC_wcS|QsnR1Nd_6jV-ft?{wk@pXg zuS;Ku*irkTh$jY1dIxqd|L)y=Prfs+{3~3|#Pd4nPY)bCc5EJ50$*ZN;_hi^k@t=A zFY=!tx;oeb1AXD%2HFwaIB;<{zQhCOx|R~c(B+&-ON&;(rKroPxVR4~L3oMS9Z_BoX%bC6|h6h4fJ1b>gzcAd)4X0xFx`?0SqAK@bl8U`9kkwXgkbl2T%mtYkINjG%-{E>hCs= zj>SHpm+gUpty$(E&0G3i4aG*C;NGRuBNL#U3Fdesbw97vO-m~aau!1sDcE7G)wHx0 z4*br+9_RIi>&fxq9HX}tD##llI*RCTx zsAF?Xfx{aAIqR*r0}HUrSB0ZvmV0pvO2=8rnoZ8z*G*^Epg%SGov{vF)3kX|g|L&-ionTq01x*efYkd=|)7oUor zpBk(UA6@otYpp##^=K`-2uXq$9e1I04Fu?g?uuQ_7|}#_mr`oF{rk^Zcm4O{t@1u0 zd0odXyngEwAB5@202*<_pF6sh^_Ep^%;8lOy_NObg!x%N_0>!j+cdE1#bvj8q9~Bw z*qJcU8Fm*P=us?l>-!+-)`_2!8LhtguXcj#H74eGLFs*oZ~MUWe%}qh-8XnfkQS8$ ziW;FwmK8_0W3>08BLRuE1Ugod;``G!F!q60#S zZ;Lvg&=+?wTO5l_TZxkY{fGMc3`(++1~08YNUZWUpb=$>7WY<$!)RXUt@cpX3|>o2 z=0qM|P3cDcP|13wN@>7`Qi_7Q#gc>UFYYU(NA8m_Jwr-Cd?Q7BC$Ui+NXcnw0nS`g zX(6KPFS)PG?<365cGJElW6m(fGW$$R9>#)pyeuxMq?&VAUEragK-7^f<}yo1eI%b2 zx{`LDvZ&^+na~Y$AtIH)F7oY$!LEuE0^HcFsRfXv@~! zG&RMol>mnR)3eM3@QU~Xj$rvYf4vX%OiPzv-28DR%@+(q0v@fIsYaJ*u%6!8Z+*Bqi(`y+;TJV4kS7%cklZ7`QzWr-~7G zyIah6dV_FkO$!&m44ue)E3534(xH{IfD?52owC+vGL8;VDQEwi*DLsApuz~}V405e z-IHAQ=dW3p*}XaaVMV($jTNX@`{7(cvP^9DCqYyyX}Uy%Oqo?mdN=m-lkbDdUZl{D zocQB}|Jlj{N`#XpJ5koU8KM-W7p3>$qQl>}KroOI?Z6>67i8nOL#k(MV4|rwW|>F9 zVFWOM_Z;FtL^^!WGVBC|ZSwwiJ7cq5cH&*c+eq{EYvL|Kzn{GTDX)`cN!-@ei z2jEbK&3MhK{2KITxhYU^X!CH&l&0Vq_mblpm*2nll=*u6WrY3V?OLjnUt>yrapMx9 zj|__DBLaMrs9yLlP`VJmCR0&_8cjT)lzeXB6`u9WxhS_$k zr*+ED1y-FxR_4z0&}`DSbk@r?rD-8bWpqlU>w{1(M&nt50Z}2p?BT@456+&i+)`Lj zP@kja)DJ}jFPZq0$k)YXivxdi>4-z-+ZuAT5<3>dO-J2}tn#`N()&6giAO&JvLYho zSi)O)31*xaP)o9L}dn;cm6iLe{MQdpD!+)SMOz(mbVE&xMzHd+wo#Hjr*QJC)OwOwv5f z8Vv!STBcMFKe0~>B)6JFs z9lcu7SC=iD?_eS&7+7x|Jpx(}SW@=l-Up!JzHqdoO5F=@s!2{VPLhgTC&bFNFKiFX zX7VwSW9b%{25?rwAG1DcDz2C!HPl(KAY*GmWP$6AlvdK3k$^$38&!(2>8z6lD`S^g zwK%@T7o0}yOqQ<^?hunI^U!1DU>oN&ET@VndWq&sy5(p=)2ggAuF6;?kxblBfGBja zSmUNUc;MLb+f>+)q&MLChSTCDT|mM_4D@?oecEPEaP03!ggKN{jo3VRH=GY{Z)v2f z&;=R=LQLGrA;f02^KkPO<&v7(cGzICe9_M0FiD#OHt+vxJJYxx_pa^#%bYP}j7Xx` z<}&QIl2TGcgD6U7GQ~!w{s;|}DKlj#sf>|~wJ9kQDI^<}BwM>O6;01~;eMX$diA`z z@6Y{WU#kCbp1*Sq$2yKJsuJc!BTRrc2SOLYK5pRdO!P!c zJl@6S{SX-5a8)_W0e6l z{G^U53i0-ve0gD0rT17wp)JAwW)+Ts$?Q~o(tTU=E#Sd!Tr??5WV0Tiz1TgBJpb{$ z{zn`w!K>;rsSdY39oQG3N`GzcOJh8i_EZ$TfA4szG}`aLcycQ(9z*GZFCRJ%S^Qed zv7V|%b%l5_vldj_ecqi&S&__!r}H&Fg!$gNj$y+zStfa~X8wfxhEQ{KA96o$Hv4^%i-LR3EE=gS~dpm%4-p9lq zX5$)Wk+fjqwj%@$lB*I}`ANnbnj4X@U+W=re~`&vYkO_|?q!X=Jom9!Bvb8HypK97 zw0Bq*m0Am~QCm7)n9+#E5WeiD5DO{&9{U?sl;rjuqkvtM(joIETSA3xqpGU<%*e?c zo22y;Lyd?{>8m-lX;sz@m>%jBX{QdV$nIqlDJs_-9$WFWLv`B&EB;hyzB(`Bmx<_CVL-uBU zWpvzuoP~4YiFGW`;_itR1^3v>z|X{d{0$y!CK!_rpP-zW@OWOI{li!MtJ!g`Zm<NwCU?h5YC`zslC!#|K?LkZqRTXtWQ+dPsS%5pd4FI}ravBG+UtxTl4 zW9*SEY9l~b!^;!3cl1p2b(9Uvla9~gpGdS=-ME~Jm|8aEO0mru&0M?wb5Rcy)=bQP zI!^9H_|&Uc)VyRJs!Oo2tUz(9xisCKSQ@@!asN{<79ScZ_GeUw_;bZp6WLZ{?k9U3 zqa8hYj$}g`l@K8qRDb_AVfAYbckxR%$b1`~)-ZPVZRXHiABa`=morr7FY0jc8QH5w zJjyt=mR}8)sfDbx82;SbK6?=;1?$ z>|cH;gu=VZ7|27Kdm)8T9Ykfn?o$EqJHxNlilfwxn=N%FT3amZ(6;gdJ5nHI~Et$j`JW{1Ut_S3}Eoi4ajm|a+BVm4M*1_#{<2R6r%8u z0PehE;a~aD;=$r{hYP#%)7z#<@W83dj}aaOri9L)GK?DpG9T@l0Jd}SGRus zj~?a4IeYzDfQ1EW88@TP3(h&jSOK@}I`o88Olky1dcVdTkWt9g7JX=L+yCYmotC-R zN^ag}JSX}A!wYw@sNzEx`u#43s`VPTabqg#(cRZ~-{j@Q+3U~$0>(RJ4r3PD|lvQt{J_=4ZdHfUMxYrcV)TdZn2k zUKGF#SO#J{*94~PwZ;qbLF~r)$7V$b4%?Cc#9rfWS+8EzoMJi6!i^G1Ij#drA}p6} zWYJNZvh{#7V`bN_P)7g+DEW`mtwrl>tPIreTnM0^op$|dYUcYyA6{|Oq{I6FxN-pl zviwKWYzxnkn|y8`;NH1?6o)%5tELBQfJ$6AU>IuZl*#>O-0M?D^apMSDXhbb~Vfc7Nz-*O z*yYOWY7VdL?PHd@a)avFnWy5WOoXY;_$7fw6#z1H8&dExE9JR`86rjA^6UOLMZ*du zCOXZ6faf@^Q?Bf>XH5*p7d$mucMeZhbe5lXTstJ1p|myk4GZJ7U3GVfrw6Yr z$yiXE#*G8?*JiVeNqQ$-KR%bd(n2?csoLygJIKwo3k{|-;4?|HU(Z9HoSu?0t%_0K zNS&5?ewIAe!h8e>Q}%e^MU#5=(V&x8c>{y_M^5`1A1kdIoLpBCkixu38WgxIpmDMe z*}B~YrjEE#d@?g9T1-FA()IhpQ$<=qCSo;_tm(n(Z_U?O5i?R}3IC&Pqe~v|;vlFe zcyiv7PMVq~W|R$QcHS$aAWgbDoN~L=by~-WcbmKHu(TiErBTK8(!FOC0#O2)@L-K% zjw&q9)gSX0(;`&iNq^nl{!DSev14%;a6D)q0DTm~X|Qu|8vfbtn|ZE)HJUIVlBX_T z{s79FyD@tVRUrXYy<0PKVS%82T(ZJ#B^)OWQrjaK4phXUc(P-$-eBrOohVeN zu+BZLatZ`Fnl60F?~KY+(Ri#%kv&O!a(kBvojY_$HR~EFrl25SABRWXuJyjNq+r-c zvMUN!H!hbTZXycjft5EvyDA(r#%n3_tcE-q2UtqJ5QBWVH+T?Hd))@5$+mWSP)NwQ zg&hp&8DmWVqFcmn={QOE z>f*w$Fx$jYJwJaVvz2KCX^+YnPAFpLnZ>$8@STxh$&BvXNVRnDrPj*Y?l2p}xW1An zKqi$G%hQnfN~npUkeM%~5co};XMvia@f;T+SHP7WB?H%w2!S5b6AfVvE{4Biqe?A- z#5$*Rox~%FYfI_9gfg zddmEmW?lM=KUZtzyV91V*=gb{S+c?;X-*`uKoQaIT6c`XU|R&gw>L)_-0hD)I>AS@ z|1O}l1>=-)_r<-IQm@RhcjPU`redry_4cG@qAcY(c?gqL#5{ytv8;R^yCCX9y~bPe z^4tZR<@VSO9iW>_ei&W_ST?8FwOIO<*|jg%B^2EU^^pbIEw@>-zMhQu zNeIevsE5Lcx#vj4uuwR~R_WTc-yeh+OHvdbHPRrXdgaKFIrS!HGB2Tgjk0r&nI${V zCZ(Ta3d$uuoK;SXr2WSq)yAE6y?*^V=uX{;In8H6ipA6iL?x7aN`35uBLLX(5@1|~ z#-)ciEQnr@Lzr$gSg~femb&_19UD#Rlmm`hGcISWm6iL``aH*0N4oV7rY;|zT5>iuHaAHAMt%Z zkAJTiAek1Y#+LkjtpOFy)as{PF>dT;3bS5i%+O_vB+0`Bksl9?*kUkstSh*$@!i~j zzBeI0ihNgA6Kmo+&)b>9@D<0D)Hc+FI6^h!NC7a@-873{UjmH&#O$z>okN$2gF#~a zDEYCk%#&J>)Gx7#p)M5VJRmBmH$6QEFoE)iw_(_AB_)_>Z}|j1aLudBz=gfhtT#P~~tZ5kFi(62vBv6f2s0S-%El0p6XG9#iTU z$iCMdb&0B?m7f4i754o&>NPWgNSnK{ND*WPcc;beCtKoJE{)~jk+WhP6T-gZ3nlmapfXcGbH5?gP5k@_Llg{L2@gA-UM>b&5dhuSw~;$+;Kuww z$V^1)>wAX6=)Fzbk!|RT#(Yed)(e(0xBA_#&rxPwWZ6_N!f@9P=(;VPpMP#9U0^r^E8);w+t7e1K3+ojciR7aZ7uO{U6=-wS6 z#)#f+ro%zq8jt4)e8rex9}?g#>N+8{Gir9~{Wh!k2yKMu=ci;4bEIO39mW#8^w`SU zD53NA)}>yNRj5LGbll}^Z|@^L6ptppxBv(Vbb6&An&>VF~^bP*;bCzTd!s)2qZah{GW>4Yt=c=jr_#>Y&j!!cq-W3&9T|A|55e z0YDlJfBLM3M?!?O@W?8gO-!n%6t5-fc)=EX&HUCaR-;;W&UL((a(K2w8AWnao?+w# z7c_3+t;^-;ubvKYRZ0f{NTJOE0A&$G8I1XK;@ax(2ECdK3!FwxAWq_H#Kko6c)`(6 z#`J3d>iK%lv|?crAm4q60>ZKJB#ntmWom`-Xhm)6k^$a(is{xeE$bm#Y#(+y(YP_7 zaqT^i+NomXi{gL>(ud>@{l%-P*LwdMgP87PcWRgSJSQh6>u_qyDthQqftro9H_(;v zYh`So{d8yg5_z`y5&lKlJ&CQU?@NXE43uPb6Byt^c=n5?^AeOQ){mlBj{Q=c)0T@Y z(cx_SWl-U^WoWbZMT&CpWaVYhpm z!vaTjqgZVdE6`uONt!|Ekee_!0y>=+1b-6}d^Dz23V!;*8E{fYC_o^iqojOPhb7w2_S&44;IQ4P&#}ldfq6R&uQX?F1(CxVM&@CYshAJ0A6LIA}0l z63gAPs3^7FMCw=Z#`#-(e9jgB%fpN})qg6wR<#owdll5=U zZiDW%xrv#UOtdMZ-)4(T7&z4Y@sEL9WI+&`rqesMGyzS+g-Sp{9hG!8a6#y?C&ReQ zoZnfof{j-A7j)Mwhlpgx7bTrYuE(ELM*rB>o!MCoiAKx@G zGBTp4n@ge9422ab?1NsMe!PGmutP5F&(Mr>2%)P-dRYH7V;VLuDLa1%rh0BD#>Uu%v&i zLhn{HY7jqH$ox{o1DW$tCgzEBo}m{A;Bels0R$)1+89o$NPm9~KbWmc+Wt1#hsqR) zDFOdM#t}j}`SZ2Q0EbpNE^F$js%}TlATb7Vqrta{(M|WbJe0=%qkE9hIkNz2l4pt zK9JVSGiv_9IakzpEjYx!B_ypfiQdYSEBm|u}$=flwTsU6cqOpW+SY|X)z!9yK9 zXAWpj@mB|ZEZZGqVWQThYWUegBi%i{b8KV~daaIKMDr}hkA|^iwYU%k|)o<^dfbNG!QF_l~$e$lL}Bs$kJqi{%BuhYGu<; zeBJYD{WX!F35@lrz|QK4_iAe>PIfanSCxZi1#L1bt*3V(cl{DQUziN=bLP zg>NBVN>=k`_L=R?$>I^AOqMT4aRfPBrVmeS)?e$)m@8<>6c?9=Fos0573cruslj=U z_d*)|T;5dHMDjhwFj?64qOC%Fy}Z{_D8fNBji2KDlOygnhg`sKok!QwIpmWJga1d& z2^|-~fb@GZ)L~rWJjFhVC-rxpVTN?c`A;)r_4$fp3oeaQDuYJ+ywCpG@IGh(*}R+7 z$Nf<1fol)NiO<3FO5??(0C{GpEWI-g%dgR9%d6uV^Q~t{WjS0VQ=y#4EOt5LH>fmz zvWaeDln5J6c8gc{iBxBN}n2}3wiOxe>3{oWyzJXsF{l5BZZ;&1Fon-9jy}W;W z_WDw>qy(feo)JkB!ATtW`Qt>Qrf3nT+Bg=RX#iEzF%Kfho;b&aL#B2Tmq|!vZcERq zxPfoLcxa$Lja+fPWTt})BWj{(>9Fotwx0{uTbT5jlqH%P9v!D`-S%F0OrjT?sc)Z@ zSTQ~J%F91_F_{mT9!l-jjvc20H?#=~TJrbLeOkcG=bz4V7lU>BEGd3~%<-vHAhuVb z?zIqvf>oSHoQkKfA!-N!J>bB&Y;o@h?$)qg!-_vzv@}tPZ~UU(dEhhQO_7m)SX*-9 zuLt~dGVaBmk0qC;*j1w39l-!mU`;rRLO0y%EMr`ii56*h^_ns3VOLcWP1L+-H3n9q z)}@Pe`Yr4g@>ZcT(M5^x3vaUty6~bV`H*=geMIl!1kuM029iSmeF~w_DQ5n{3EQl7 zD?QvlveQzG5PLV8OFc{d6VS-E>l1OE=k~5FbOHpUDZCeZO#>9=Ok$y9O62X+9tX#S z3nanq`fs%t$YB{Nq15qZWFEUc6u{ooE?xC^e)eK__av`0s%F5xGi?Tw?U74at(%y{ z7qy_d$x(Zdo^HXZ7e`@lvp(bbmVE#iqmm8-7Ba8vhaY^09LLu7V)cV_ASCMk`Lkxu z6js^V@1IY#i6;UHAAzG7IWvc7QSgPwVeg~sKbM5BZr4xRu-fn$;O7rliu z9TqzvGx^-1%prRb8fOXwCf~MUud;qL*LROXi%>)_((JLk9php^n-dd?dhESd#+JJg zlpb_3PIzHc7;a$>A-l&&rGLSBz2&iV$&%&E>#Oq}RZPsaTjQ3=I9)gT+Y9%d*p#)t z0Mi1LQVcly>iWNaS&rj`=)|zy;VC_J;SY`S?{u1gZMWUe`nz_@AwU08=m>QNN|?~e zm@Bp$IID@WcI`O(B@?5aRA^dOu3TAqVBo~f$68L+X(>y>^}YOGK!-4e)<`(?XzZL- zwq)Brt>J4Lo_C5qF?IOj*9waHXNjF3+$8n^UT4423!J}bk=)ylRqY`j^4JK$V`|kr z_l^-mj2ZscdF`FA260`53`K=8wy52;WknlctSf2{w#a|`wmwBV>ut9t<3u~dRwhwN3Dtz9Z z0}-v(zmHN42nguU&H;KecUdQ~bt|x8J#r|J;bHyYb(BM_^XShe`5=V7Zd|q>vvua@ zz@z3xccxCBtj2srrk{|vvv91B+n!zf!u=zo%QauJCd#@nG}!0x(!dxR9}CB@wdP@~ zScMFN$L%=oY3Mc6q4LnsrNHwmZy3&ZKlrJdo*E|SUu36ehdj?1G}-H-?@1i#hfnUE zwbW&R=V-j63I-28=sezjyoNWQTu=jAiJc3sxSScZU@jMxyD6tBy+p7}7S)rG+7TX` zX%10J07oHt+BvW?_jUsjO_?Re5qiW}NDBI#koV8n&tRNx48|h020~~=_I-~GU+a(O zegXtqpsM?ZTR?0@p3GFntqMfMq<0(PWLd>>@lx|U4_T}L{}QEMs#D_@rK`K@C{@=D zX4YAssVrlJno}?0H>97^f(j^PE1o1VhANvEIV;+4n7Zw0wLyl4juYy=j9XGiR*=5@ zhv*|!3!wMG!!BNz-ce^UdbmZm$A;b}u-++Y8GQ<9WjE{zA`uZ}OI--xg+3mlMR07N z+WGx+TY5Y6N5*1?z!yMf3s@H>3*xN77qxN;gr?w+XDco>Bk3K9$9R_QZF!lt3Q*m0 z+_+>`xnPe}N9Nv>CY7yp(TtkzjN{!COWAIqRH-9dhsaygfT`H7QIP${F@&exhf?y6 z_kS?t4>&(4fv4dnbQ3>6ai<{OdBqN0hgy=6A%yKFK*-Wp0muNZn<6zidtb)LTn`8; z=0C|!f8|Yqe;T#)O&pVQw2d2i@Wh+k-Cc0&fjKXRl4P4O6Qs^(C8I1tkkxP$RZi&> z7bbnTi|yrYD7FG5q8ZrzE%ej`KU=&s@WDcUQ{=G(Z z8&(|uk&rrKr1R}f^qW_=tD`CoP53Cp`Q;z>?VPalnu3JG8046?>-Zuubm{K!n&xbO zJVqF^olO`I!@QG^bk_EU=dIh9>{2jMgN?!pYSBA#SC8ml_Oy;G(qj@=o4pT3$@;c9 zGH&2(jyhHJow^ja#zs_F3@Ai)&gV=PXTscD^*UqydK}(A@&h!!U=P->s61fw7)kUwtm!qF# zHsvQA#2!7iT>kDHT+E)R*Mi|vBCM?Xe-!ViJ;k`*-R4r!b;xEVUX`F3<#}@z6jCfy z&dt2nCiu?`0&IJ^xwWIECN5uUvwNrao?+vI^K64N7BSL!$5KI=?uCtM`*K;6e@hnk z>I$<%mezn(BxP=Xv5m_(SZ1qeOn|cPj5&mH*6EA>U#{Gl0RdCK5Ufm8CNizdd>>p~ z^88X`1}4Jyg$Iv%AOADC6;WyTNa#EO;?w~;J`&11YGonxQ9{9JO2`*`NKt&sfLON1 zApduBe*EIR-t(8NU&#lbyG2f=40-$IV%>y%s~=|l!Xu^25PiHp)?}K*FV;+n|1jcv z9)+}uL$4sE+@gaYKF@haB|=-FxbfYAVPx9DpmP5OFQ#r*Rn*%T-aMn;lz}=NEcx8d zlj`Uv_9k)5H{w;PmbQbXP@i5XiO;*OJf_9xB34J+Y}L3)g2V*JG0Bt`oS#~6DCeP$;>gMt?L?gU^v!QBBzM}xX{^QIR{F=$p3g9K4}E% z(9d{h@RHrMMRljG{hqG+^wb*%@LC_k5Ym3?)UhyRk)@1=cqXg}{ocJ#z!S@$`IFbY zNUzTO6?O_h73XJWN)vOE>N|sKb}s4kB3={P+K9#KDxnH7{R)7`VM2Dn+tEzn>l=Z- z<1^LF`xjR}&FjUTp!c5e=a`)5qXV0~*% z!N7{4yeV<;BzJHppFvw2a#2QR<_R8+hXe^!k&hX$&4s31S4q|z2diZ|)y3KYg9rB@ zq=>eOI?N9cECsu01}9@-T6cx+P&L@lP?xX7=2=mK5h*ClEE(c4R_k?@9SD_&R34I6 z=hFJf2kJo4M<=osw<~jSPhfe2JKeP9dZF{k0IlQZWN4NpPHiP%OLQze?=t%d*B^3PSLZ0>Vz@< zdBn0oOCBtHTU!%Yc=s1+YtzG~rlo}uVlt@cz^-kOqK@S-btkdisbhi(a5(hzBRri% z>mURG)}UiLyHeP@H}M4iHkBVBELwmiWTJ*&1v%+AJp6bbil|^tgoitJ%Q@8dPzh;d z8N-#yjPs_%uR9D}Fp^6pH}%LX;!D={n!lShY*9LUr0%Xqys+Nz;YWOZ|6bX5%2hKE z`|i|!C@W9!6Fd6QKiRb;2VAG+4wpV69i-^Feo%jJDT81;s0?F3#;l!o?mT?hhCyyO zLOL=`QO9!zdq8*+E#|rw>MUj4OuJshjlW0-%|@hfxs>2Qr{d!Faq=yDzcmh#Z_Stg z&FP$y^|v;l+`3$u_e8f}@?bd^G8hTWLWSV#D|f=o4CpCjfwkBGd_*Bi%(X=Tci6~l zC!Y*Qr2r`0>1Ql;j=@xYNGFM0%@5yGD|D%}!=o6|9piwMRd7N?E(`f=+2?oT{eyyf zoU*qfxDEo%;FRZO>uNuzCL20rh%MX+7RqnLd(ae(PIqLHZ6N+6=Ku5VoorCGmCnDf@CVEBx{PSsz*hb%A-3s`ZB}UuK#@@kw z?!goB9_R&8?EvwBuVRkJfDW>|aR+uc6%-ck0!-#E1tq7Wnq*7hcJ#Ggt5-ip;nWkQ zCD&FDar25ZE!1I+Z^U|M?%nNOb=m|J!Ky?3X~_lPIkpfM)!{-KXdP~sT9YO^Jg%P0 zmxnXPd5`)ieYW;7<~%LcAF=O7E+>PWv2xJ>PIp?>uE=TaXJ!z)7Vg@Be*L;IE78`r z8ap-|HIlxb-uPjoR8(uM8un7E)_j5dpId$Zbll>UAdb>cQ8fV?C5-HFb7&HXr*GaHr8VFsDK5s#a1(wr$#efDaQuOZhOM_o5!w!!=^X zGzOINN-VbTMV%wp8NG0ZhKr1Q-RDvonA@$dPsFjgI znKJ~HYU2?RtzBP3!>=*h{FoS4Gw+!V_M z7w12gFI#q^&2N(*qG>fFs?drXc0eY{vmfB=%NfhABlsbr{vW@=Yszm(X2@q>p}Jxi z2og`2TuDh*TQTAv`ySvbNJL;H2|0tDqfnx922NY=2-48rjILsHl;2W)%8gaQWUZbQ zTfV+!-rtLQ96**GaVIf~2;0|FZ6m*3 zkKynWZyT^YR<;Ng1{k{;|0epR!inJ2=P|iN8hxCSxSye6jgB{WD(fxTsttwb%+tPj zeXGb`^>^>Y6WL#U-MF7oNeigu(ILs|QjT9fCE!Y=-16Ikg7$`vX8w-0zGKIO<2(yI zy|#J$H<1-R_Nt`@|I%!bm$jntxL*R67U&zI^&_-med~+RC zHE{u?%G@Rt2A0uzu#Hfb+wsdGZ=Pl@2NargyD0LznOcidZxw-a&i=)QmzAHKHL`+0 zYCNq*j~kc%Dr{`kK7Cs%-SI^|cBPzKTe*Bs#1gA`V$ZR2=URwS4MP`5R~spdq#~s1 zpnMq#Qxn*S`t$tCGIvD+8ZOxpjBqa8x}asO^a(U0g4F~DKA5es>!(U`Pm053JAZR3PWor_qWhMd?<(UyO)rpZ_}qY}E>YOAY{J`hf=z z^$Y`44|(HR+ZoP>fSfJcV8}0|IFGLb6u=DiNn&T(Tn>-r<2DA_M0e471$UaUM&XxD zADDk%x&o^~GW?})M5|wfOkqcS{f_6Iz(tTa%RJy2b)bNcY7-w$``7?j^7k(E_ca=# z|Ktkr$+BDfr#4+<95yTZ{%OBqWz!=QQq@dNE@kw^{F=J`8Bcd`a*BcbOR6~5>oMi? z1@BeZ&TDf&DUls3Oyue-UZ!pNN4oZyMu3~cUII#5j1IQn#$msWVpH*&Vf<)%8~+*bA^icV4n_jL~2mT7$qDMUkM2+ zK%6tJ#vOZqs{PR}ZV{*C`+=HRYci4re_{dZ@7L-q&(;;{$IC6pH4hqs>Oxo0*zXhmC?RbVAG;U~t>uSD>H%*Q?vLttXP) z5dpsgYrKeJ?>QY1^Lbm$%c>M zLKX3VODY%!b2se737k3#O7dzt*PsY`$03!p!j%alpFf1UF3u>7A$A9Hln*>_)8+M| zgwT$0G5DM;9pOEN>9npQw*dydu5<4d@b0qM zgeDPzX@5w8va}UOlf$=*5Wl@ia8u*sL|k-es9jh*SgzxNPeVQ5SjIXyT=`na(}{W6 zK(;7RCk+IE620J%A#th0y7ti}z;@9Vr6_dKkUy=5EMs#$dJ1s`4(zoWEhcU9c05Y) zFcC>MC4j^8=tl9nFQ~Y4DIIYx9`1^OW$@IJ^2Hi!;m9=+MiT-@F*fQ}JTLXMo;L`Q z7=O~Tf|>%eY-Tu8J#C@Qx7^%Z%-BXDQxQc6K{ht3)Z&}LrQb{vUNw@S6;yxXW2Qwt zdb{4#6m~rmYf%O=Ph1|OhnSL30s^E8P7H=CidD4Jf&qfS_QHClZ!+kFKtwbf$6?Ee z%c8W_V$X$D)Pm8T&MQR%Y-l(uDMy&A2rpy<7a-*FrG@jdfkc}lhvLgKpm2lk3wD}# zV)U+3dY`cVv$1SlPyI)&%m1@^(FX^huu-Z0{fVt&X-ORX8iqe;`zmicWW<#LVvLYoS44QOs$rGy>g zICSH;@{jZ9IV+X%txQ#WzII*zzGrvW8><`^{3Ir2MJAx9kmJMzDY=AF9zwhTqC)Q! zMvSG7uw95AZ2a0nHC^`74xSMW?T@{gAYzn=IF&wPsI#X^KXUfaFAGPH_G{6qUT>B? z%)RdHfLVm~r7H%+im$GIMCFM&dk-x3Ztkj7sMIzQu-bc2-07Qqs`Uqcn6_bo$mFLh z8ACjFNfay$gtelGcy=Hv`m^(S_j+6_*j0gCK}g^DX19a>RroDkh{Cd@5Pg}fE#f<| z7e|ZrtZx3cU3F^%{Pb@sR8;FWjOehTp;>})2RcYRIcP z%@TEo!i=zkZZev}+Sk{hPb>=xk4FyaDqIrzHR+GJ+_St})q1bl3o^diMn8Bb(9<95 zs`f#FURzsTo{61XYD&sc@V{LYdFXPtQZbC81mq~poj-q|3N2DIY1^fu;(5Qw4&4Ln zQrmy<;J{k^Ci1z#dx;W4yBmaci9{bNicL1NR#tvJ-J;c|eLXv${qfZ*2Km&rO)&24 zZB|`jk?H56eM?0l&MyGfRsdVOckCDeMj`zZq#H1?9{50Ns_#fG8;3y<_~#t_jx$kx zvEx?#Pqw`S!;LXpv*|s7DQ#W-dQ7ZrGt z>)1ZzV3V@HSG+=gFDuH}6F4oFN%Tg#@#EBM*E_@#m1x*%gEp^k&{dbC(V~lT<93Bw zMS5O&WGe~_*$<37=N7A^D5m39mL0QN2;>F@MzClA_Rg+LdbhQGba~uQnNWVV$V?%E zb={Rx^D#Tr)LLHCMA609q;Ay~Txe||ix{kP1yf|oZcEKk1S$c706Jz)N8;3-I{P1c z7%s994}+iyjH?%*7WA!|m<*=ZPik)fyA0?^&noZTyO)A%JRNydP-Rq?L^HWYbzAJP zt<%#)sW0v$@G}%jDl}95;5;as`d0{y>mfN%Pi#x+*$ke$SwK@&^t9LO`Dr=jWCDfLj=RXzQ(Nu#{wN_UjfuZC@m^fixXUNehm6mKdr`ppbp+}cp zDVEwhY~jQRwjljIL%2{b)C4pYmKGK#pI0%oq{pv082Bcgd4Hhx^AZ=G|Wp+}N+t&l~>pe{pR2Oh10SW|lzIKaMju zExn1n@vpxt-7oY8`SkJ+!v_+7<;?u}ItflzzM+x4|_;>a#Ce{q>-~T8nuY3 zBSv1P(vnk@?6B7l!;7Ams{ReNiO$}8wF?K!dp6(jNW5Xxj=#{BtX z2w`vX@=QTn^S{YRM6FSyu3T<^U*AxK2j^bl!f*to?i^rBh9VZ6?@ailxMX3DI7mJX zhx-7l4Ab8NCf$Adbnmid%a8@`k#Z8Vs1M~xKXAmgBrWxzu#gIu^xQKE*ncToct-&Z zRF6F0Mov)kANNLCYec>O4hC}M9QTRyp0jTUKQY?mY}wrO{y%-lIh?DVG-j zeU&Oyk3YgsYZSDO6BKKYfU4)PUttiu1`^k_67_&2>GA~}g%p#)q+~ADfzdSxM8}z1 z3U;jrrbd_U6;J2d&tT3&CE|K(tSl;ycm452o63<>*0vK>00zeJ@@bNlz8H4{y!TNk z@RV*!QAbqydIZK16E)E>-`~sxpF+0%Nx-E!e_u6FyoTo3MO$;oVw14u&^c7pb5VzU zxWwdXy|3*1P$Y;Zm%Axz%a_n}iBl~rqoyuhu_75EPo&XHzGVU)IMvtjXKA6{XX^Ec z)_XZTl6hot2z6`JMLqejh9|t|f91buan#wWJQu|&rw$-Q(j$fhk4ak$lE;Y2AhtO9 zHwrnM%kXsn8bkdGB>OdnLny9vzP`MTB{>$-P5F3ZQS_Dc&|ZZshLEYQz5@s-y*Un( z;PD`7UT6QHm8xC?(Ot>_0f}GUTswwh|1;xhPFW|ahF0o37`BuwT6&k#0(2>Z3scYh zoI)j}|IaW{JDMxJojF#e7avl9F!rb%MVzYEtXViHY|k6%=(Vfk$;KrssD=FWsH-9i zwj9I%#>_0S)r^Sm9knX2QB+ZLcVPYNjX4-g&hYTChLU@NP$~^xqfk(_%GLgmEOi;v zO-5^O;PK41Xi+0#!zbmBOr%U#<>kpz>^VOI``{h!_}zGfJXsu6J=@LA_P5n2gL(Li zAc}gQFI*#AHkj5(j)2~n!g>$7cbDSwg9i@C*%LG!miV1kmm~mvTK_9l_;GB+O7M@h z-*-!W#RN(m_2wbE20{&FdUXm!YZ*>WS3y$5VUhM=9Gj*i_pEKupn+hye->qI5*sm( zXxI3fzJK)LVZ>JNIEwhkT;Vy8!9N#Ye)Z(;W{AGx?kKYlYDQ*0HEq3mab&#p|L59^ zIT+*PyO{4vPsx)nJ*B^&pF|pwgmUA@!|=U}<;FZ7Zfp`*B?e1(JRVRNm_^OL9ADT36wH zxz(>sKq#&$=@R|FlqY^9j*#~MoudA?lG6YG%__WKj%~CqCS!uNg8v(BKGy7vsoRGC E13_@EmjD0& literal 0 HcmV?d00001 diff --git a/src/dios-egraphs/Diospyros/data-plots/utils/plot-all.py b/src/dios-egraphs/Diospyros/data-plots/utils/plot-all.py index e29be2f3..7ab6af18 100644 --- a/src/dios-egraphs/Diospyros/data-plots/utils/plot-all.py +++ b/src/dios-egraphs/Diospyros/data-plots/utils/plot-all.py @@ -16,7 +16,8 @@ def plot(csv_file_path, out_path): csvreader = csv.reader(csvfile) for i, row in enumerate(csvreader): - assert len(row) == 4 + if len(row) >= 4: + row = row[1:] if i == 0: continue From 87c8c3a81e3211c27fb7c5dadd92763a9cc5402f Mon Sep 17 00:00:00 2001 From: Jonathan Tran Date: Mon, 24 Jul 2023 03:32:36 -0400 Subject: [PATCH 132/143] add diagrams --- .../Diospyros/diagrams/AddExample.png | Bin 0 -> 721724 bytes .../Diospyros/diagrams/CompilationPipeline.png | Bin 0 -> 200148 bytes .../diagrams/IncorrectVectorization.png | Bin 0 -> 310237 bytes .../Diospyros/diagrams/LoadStoreMovement.png | Bin 0 -> 641840 bytes .../diagrams/VectorExtractionExample.png | Bin 0 -> 283542 bytes .../diagrams/VectorRewriteExample.png | Bin 0 -> 435263 bytes 6 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/dios-egraphs/Diospyros/diagrams/AddExample.png create mode 100644 src/dios-egraphs/Diospyros/diagrams/CompilationPipeline.png create mode 100644 src/dios-egraphs/Diospyros/diagrams/IncorrectVectorization.png create mode 100644 src/dios-egraphs/Diospyros/diagrams/LoadStoreMovement.png create mode 100644 src/dios-egraphs/Diospyros/diagrams/VectorExtractionExample.png create mode 100644 src/dios-egraphs/Diospyros/diagrams/VectorRewriteExample.png diff --git a/src/dios-egraphs/Diospyros/diagrams/AddExample.png b/src/dios-egraphs/Diospyros/diagrams/AddExample.png new file mode 100644 index 0000000000000000000000000000000000000000..b6ed1db6a688eeac037566edf7c39a6d14bb74dc GIT binary patch literal 721724 zcmeFZ1yI~g*Dnac0)dbKK@%isaCaCqxDy3y5G=U6yE}sp zGdtvYzwdeO{dTK%Ywz8vd-t8H;osAJPIv!L+mT-rs;(-BheM8of`WplATOhdf`Z|Q zf`X>_5EGe0J~M@mf`V&rBQ342AT3R;?hLZDv9~}$kq?d6#nRL2Aipj#tt2AF+7k3i5aqhEN{-mO$hDKp4f5 zSh=>EjT#qlyR@-@XN|W}TotpId4_o7iXkOLg<{Uyk^DdiK()Gtvi!!ks00NiTNA|B zAW_^=MKc-OhC*YK*D=G~Y#hq-=IT8}Me_mHY9C6pjc5id1849pTWx?!MN_>Qvo)Vaq0Z#0}GyP1u;cqfmv>J6);=!J}|1&!b3bLpZk)6D4+LUgUhLg{l|V}F&LC; z;ChEV7Q8L+3Vp>}=)tyW0_g0j}9RKPL;&-r$<>EZwdx-5U8KW%{+^qF{5?uvftrht(v%K;sbvtmqD@kE_ zqWVgmQgl#eM>|>2Q2VEOdtFd;PyDfQFxP_7!aS;atDQC&2wJ;cqWLV@C$P}GgnA5^ zoSO33PGvtOMe%7_IRRX$SPeapkVE_6f_06fpQRYW!hwFpk%gg_@{|)z=O^Xn8fNnc zs)rA7p^a+L^h zvT*$)4>c@L?8Y{c;V|ll*jco=9S^zW?4K}p;Ju|mm1iP|prf{t$Bc=Pe}cjNA~_`P zB}+7pIiY6IzU(jsCEZMcQ2a)y{7@Ag_#lxp zC0Zp(nTa9pX)VFyC#vmL{JK$Um)eioE_$HHah=mySYIkbiJkU{nu;Ds1E9UstbWB$ z1XN^|l}J|9SDcZu$u`P%Rrf9u))pxS@P(#Gr%SUHOlqiU9ce7tT3hScURih8ede>Y zi?iDw5iB@SU!zq?+>~D%?iwYkrmuFWUaOv~u5%S$NT8HaDAWf1v2cL7q6@a z&&)PCl5Ujwd7|_OMLYLvIV(Q@P}j4sw658%sjgD4O|xs5J8gY7@x>zEqNcZarg(04 z-a8KJx@WK_!cRnz5b@hGVn9Fjvevz}sP>UPg?;86*{svSmwnrV!`Aj6@vi?6?o4A5w0L z_qB}N)r7=`44budv#AAdF%4m43}gVCn_J9H+)UCJCl|>U-z*B~dzT1KzMB-H3RL?s z#G2VIPUY9)`^fk58osl&pSV}oesnG0^!3+Q!`I>$b>~~xpi>aODW*BPJ+3u=^}}@x z9V~I2N*n@o2R!dCchjvO9+g0hCd|G-e00dxm9yph$PiPS6&iY)(yYd;o~&nC_`g`& zE8A_`8{5U&3qDarQOn54vPRYS@NJwn@Xa5}AN?l#T99q65vuV_BP#nvj>U*d&alSU z9G%EU5;H>DP@B&f74y^a6_;!Ks|=klzA$!mcj|TlR7k&aimlkcq;-;7jqLDHXj4#% zJ!F_;WQj@Ds@CkvAJ1)8!&5N+IxFsdwM~}8kxy0-Iz~RGu=8riaLlLcW2C-VzFEoE z^lF7b*Gxnz_aV%4?Zj&>N_jZtXX+0?Q1sW73feUujs)F-y`FVT9Sip}As8PlF_5~A zS+ZalSTA+O+6S@#7b^8X)%^_Il3BO^?UhN9+Qe4J!NCXe5ElZ_ zBEHG!#;^$Sjo7cog*s?5+Ei5g#T?~M#5P)JRF*o+h^sE@eZvCm8WLT6$t1#mgoeZN z2%xN*+zkFQNYoRTP!y>}qT;7Rq;uZK>q`zcICBl#amfdywPjS;FmUs@Se~(d*FUpz z>wlct;`i81^)PK|vvJUv>$2Rd=qzP3bud#5aiX>Ym>{+z?)w5_KQ{dFmZnv``G?!r z+w7XK)`W+O>~W0=3Gx0W{XpN9t*3xQ#C#o{ox+}GdEQ>)(?o@Y1wG1o-&>sYXR<7m z<{hztK=_!WjsOkB&_UG8Qlkbvurd*tEpLW$u+9j6W8POEin~)hbH1+1%YE0a^ z1-j`@VwIN9nKov*wiiUKM%3|t0u`6nma@+&&l}ZW?<{)eNxAb)3Qal8I{avI#h=cZK6?l1TBNwmzTi8u zIw-%L*qkax3rC}e%HNIO9Aaun`_5H@vJ6feE&8R`Z ztVhYV-jLJINg3e`Bc*_vb*qo_d5>Mes#;A(q_E$i@^o^n)r?hC`We9Es`=+fwmU>{R)m%j}~SwA6_aR+AVSTn4W&S@d#P+sPn0z@*+L`v7xb=(-d_b z1r+}ZxSr=iyt=cQ;W@M78(PSqI>g;;hP~?#i%c9%Y%%TxZ^Ja8z3_>0Fy`INs90mu zTFt)OQfenCV$t!oZ>@YYqdo)c=yVC%%5iOa{`OXGvb!o%6Ck#i4og07fQ7Xu=9g;# zkDwcuPz5h;6GQQcrQq!B@xCkRbaXY>h zYklOuFpq9JL_F;;>CZO(y=w$j{uH&a>2_H?Otf!7rBgP@4`)RZxMVn8^;PG=v4uVm z2yz21B@S@vO^Z+_7J94aJ$XCy?i2{v(a1h%7^q)RIlK@s&7 zMkXCBT;EcAI@mk92z!dX{6is(Oy6g7yrlj^;%X=MQcp#lS{meRLCw$3$w^}pDWzr1gVFx)|ac~I<32|_8b8vIBAt~5gyc}KM zda^mX(Ee4)f7Bym;bP`&^WN14?g1$nte|Iq)JlD}L02dCcOoLoFY zod0C~N6~-J`o_h=SsLVkY|~Zz@A3K<^FIs!#VE>g|LXq`#b0v%lZzCzIF2aCe-4^B z&M2US3^|U^ZDiE6kx%3-yZ@l(A^$M_^+cvIslCL_cSTW9Bv2G&B(*(J_x-S7l(Hvq zhL4Mq-bXE$3<_~CWwj-q$UM>bsE$q%NGA#Y_Fy!(2#oqQpZD2AG3=H^^#|DW*jXQ+ zNMY0GE`TkqGG}I5nm{oesDb;p3XU1cUhZ3tiwh06i#|uK#?0Og!6Xz-dLm@Z+&{ZY z8_{L!^D(|9#(yt-Hmi_GJ&p3JjO(}hY9h?+dHjj)Ca?%lo09Scd)uN2vH{Z?gb0GB z$zn~Ma>?_f4SLke0KHJGE{z5_(-hUlO}mbk78hy@>wP-M2)x|<)+*e@kQYUn#_emC z5jbuSKRn0z?xeF7z8y$WS2Ong1(|`^%fgeKuqVBb&EHFX(x$BYtqP*|f1a%1C|k6_ zvO?V!O^*;qlgBbA1IG~yJsa=RN(NiNS2)IUYd>pJaA0E*;&KaSNhMs%g}yqAl9O`~ zjla>_RVxByy`PkIXr&_}()+#q5l4TQgFb&8Ncp1XM0*!h1j&O3|I#O^xsFj+H>C+8 zWHv8?#3gBhy7$CuhsD?2()l;9 z7blm0*diD;m0Ykzc6>(_r& zS~NxFw_59UL$CFjX*A%yjom`BhEa9R&}Gorjg8b|)&$$*7ko)Sv{thnLV-SW+m50$ zm6xLhylRCLZ0y${LXUuq8o9ctMA;I|mFX2*BnL_rIM&p( z+H6?X%EdhmSbLHb52Lz)O;UNUy5qd~)G(&T_Y%Cq>2h1WtKg$^mFLv4SBI@(-R!_6_e)!S zG~=0n{8bE=2sBm(Lm-scg-#iR{y+&%Z)gGgq(Ph04vGyRz&?8L-_UF9fiouPGrX*n7luyD@oUQs*&JCx-w}4%@EqBXv zt4n#yS0~ZFWoEJ&!h!T&#frwAY+(ga(e_AS@6e3Ai~LyhEmw>M#=NF>s@|3P|Gc5h zu@e$-8mpH_JC~#>7jTug)D71lHhh79VS%6RxoVFh%(DH$68cyz~uama*_UO zHFAjjHzy^tZQ1X*V!08$us_>?_b2O`xSO!EH$sTGGd0+tmcy@(P)(QkGY@wjecm7p};Otv#hdtaZy% zqt%dc0frxIYHt6ATqEx*ZmqEbEna}--_Ym$X({doIpb`NG3>j14dCbBIw(D zU6%&7F^$`ca=w#n|JEQgynGz_8gzXd!ffwi`CH?Da|*)ceH?SYz=7nX8?f63F~qJW zwtq&*b-2(FH(w|A(GYpj7%w5#e6*rG?rW|m^W#Lkq%@K`uO8Ra@OY?4Oi8@g(i%4X zh_0U}xE$O+FUej{M{4?f8JzVS$`PCYm2Vkd0YDfN)P3O!FPmp@dB&%o&ZDN_fr7{Xg@81K6_{0r&Wi}nn ztHL7B?a%Q2Zi27P!#k~a`n?~*jj(2oyDrem>IW3z77`UD7<7`xNM8Zz)-AT`~WCxkR3P%{X&w{hM;EVp*vMc$U$} zl)Wm=`@@J=P4O;bEHGat(H3W+{$NyFLYa+by8pv7Zo78lv+SF3VSUo1ZBMG)wats# zI!nxw=vs_UrV}N2-_^e~>794cLo#sLf-qSV4riI7@(zAA1>Cra#SEgthmU4m-?W*0;pH*?RL$0Vm+6ey17E^v zOP%^aw1~9=N#4uBuzjVYaXRSIg-DgshPCS#N4CzXtM)pw~&G1ud)R#gUrrxXWz!OMU}yk z6&ZI15<(=LaVWO){^kUI-a*!LuOI>ruLYW%D6q(OT}d5=_xsWxXGE)1uEno$qTm<8cvHtnHqr0#H)oxtfSHIn z*UvMO?_TSESUpH0G~BQ+7vQaB2s2ZYK>kK!k7TJ7`%+9$+VhT{=?SR&IekbG#SEqF zo6t$~3Id|fb!L$7qSFHa{S(`wX^n0lJ|S*2rXZ0wn8u zCHR@WSId7xbed_^w};nLg=~-^pks~;Yd!`cYG?wI&*0O4&Vx<27%8-z0Na{5^_D&@ z=~+!upgwW$N3B~23?)oa#8{~icPl>#tuuREPml1bZC;EPwW}1gB#>`@;qsI1dZ=mx z8oaI_+sPIC=Vcsw!2g4~GE*AV4)CP2 z9?X4spPFuSEU7rAO-Lb1FV0Z{;?7kMP$6C67a3a)ipwuz;P+I7-*4PJJTh4iMBYBE zn@Pr`%H>tzt7y~uYeD z?2qIAnh2J`OxmW5xc_YC&z$cE@JJIe{~!$hXTL1q1HNZbj`ONnm;EL6-;TaLxMgP$ zv{=RjeWO#={Pl~{vsuJF?#WvHC@OKXkfLG26?dB=+z&IRO`{7r>9p6jk^Z?;XpEjx zY^dgLdA_uy@|$2yq^Be93?I1sNtatt{?!x!xya7cgb6iLzARsb!7bd2S}{W$!EgZR*-x+^v-A`aLPP>p7wKTQx=BYogZmC0|SX zt{90bzE*BH6PY;GU!G6oN%%3gn8b*pkezPJe2>Iv5b2m_0aR)ZLT>sK8oHiX*LLZa z0qz<8c;Sby>4c_^r=$S>@1BQqRc!@$kZxstS$ z;MIy?&W;j=)YO#%e~guvL))|LXnbGz;;#I^%*9#XD8g%CHvU9NHms1QZ*B|8=65XA*n& zY9qr4zDUx_zQ=Fui0uA^5|H5o--Icu&8|}Aepm0{y$5#ZJaeR?MCo;~FH;0xvCv;B zH$97jQhDofT<(tz4L1^L6>9CqBc2BIVX{0A0hF52+I5_AbZlXiIuTf9Zei{4EhXzE zi^z&!I2nhwQxUmL8Dq!&QJ6l>6IHt=7h zm~I?po?Zg4ZxaVaU725j5lZ^YRkjTXjY9;CV+kR=wePZ!Vpo<$lI%?XibVa(vyYEh zd?a`Mq?Xwr@9%6mj7BhWIS=?R0xo*rpopiJv-r~_1xoz%AeDcU_`vl6yMm0l*5H|N zxMY6zWd5TC`Gdt4kxSwxX_7@-e%GL9QFFI1M*Oxg&rg9fJ&sG2Va3%2r-P4kG?{io;HFA25wUa29 zC8v=#6yor78GZghw7zwGVuy+E)PKSbO(Y0N zdmkx}9CoCS9xD%i>U$izY)ezVQw;bc8T>?T_F__{!WwhtdwH`>QE>~nMn>O|nSgPh z3l2lYvdZ@snxY`gzZ^$Qdh8l^0emYvk-AK1>Ukhi4cgy*c~T!yv)*8xtP$1)=e9lk zrX|0J7TNlFIp{niWsa15^(DznhQ4hgJTf)uKZJ*9GAZSIGxD!P@$rJK#EBX z9l3N7@?iCq+*OOB6!Y~W<@^Tdc|Vwmx|Uf^a(i(gt#*1b(5Kb_UsFzXrp4wCCW}UB zB2>hyo^Ffr)p5na5^oR~MF=O`xDx>F5I7H>azccaEVroTLLprQ0oLEJOojRFcvs-7 zydZy+nR7#d44?!!cDX!V3@w8dHU%6!eP z#5-xZSY(&@fd0WtmA(_Zph?xe$VOS8&~F_a*O|fB1!(zh>Ju;A!Y&Aq+N}jF{zPEn zEK@aUQsIrqINPieI&!DEz9bujP*y8TP1HD}-QtIvjb{5k*=lswlAB^THU>31^--OB zTxDBs+}Q0-Ev2;c4%l9hoX&jmVwtE;ncM?AjLzCDXw+GAJbhBsjM%u#oL$NkHZ{O{ z^H*jZGPby6mayz1wT)3m}ePF(_x`u+;h~g;*Rh!in>;5 zm#FFu_Uglc8Kr=(pLB&Lr@<<&jkxahEz)VO5?ubejcAJnN7!+CO~fgcW{6gxV5(*;D_ZMStN{tYxXrpufM1v*>6%hQ|yMn_{T8X3MuK6-{ zv76-#G7iy1DBh(`LI4GI@VGq=BBQeE^T1tt(0wTIPhhX`$*`Yz92Sr?SJ1a)53*AV zF>V}%zFerD09lVb_DkCv+KE6oMFJh?C%u2i+CIdz{U!2fm?U>6MCC*(4K%;X;`Wr> zdy9HiD#b)Ib_AFAd{FWn!^T@dUNrZP_@Aq`FYct@upGUxX!$)xUwbkby`bm-l zZMh9T>=4$?4nKKo$LW<=M^PY!;cZOGcKt~xPy$DbaBu1xHL%{}Q9@#dt+!6$x3QBw zCxS30dVkLpdPs%IX%uRhvc&1nihA*4@@71SrNOxft*i$T&)Dc&2V{{3TR`wWZ&L47 z4(?Sm(x}%~-=xY59eT_~`|4tiX-3cTJ#SKT_g^d&e^_E}lE%m9Cx71O1scGwyVw9T zoYA0p{?>PfZURZoLvx)w$KhVYwIzmO54m^pnUMu8u15i=IFdeBv*9?b^lgtuHw=OW>3C-fDJ^=mdoA8Q{)1cVJVOibUvuTF7li?JIZLWC!{7%#Os|^@% z5W%x{7bPg&2OUC;Lh%=MzvjXHb)dRsB)9d8Ew_VlmjEu+_mjcqOVl~V zzq6TpoW>2$2d{o2=8v#|_h4k@b`A0jXeJ|1r(x1>eU>i>aDIiSh-Z~z$6a4`rh&zF zLJcX^?yzS09NT{%zcd9@m!#uOCPs=iJULnxcxrFAn;#D0*t|NctLVE0b^-9v0mQy! z6>eRVq;^)>(Cgr_nvqF#&K zHA(E3r?g-Q5*t>d^{*VdcQWHW3=c)>(j+uw+^H6OR@~nb(=!pj`EFRxAmIRH(S0No zDp!^&4uaOn4IQ3$NRS}b%$?DpY^pT7H0|TN;6!%gjqaW+skBF*j&jq$!ajEq2760E zHoqYm>As8i?t~mJ2}p-l7jO#>XFD6xrJRYo;h8uC7D3*2A)~4d@Z{h<$R^0zreVZK zY>_*5f~v*IP^4uR60UNGUUIVXQ=^MzYg>VVDD|yrT>9cG|EV%#W2#2l#qOo2Q48{Y z;hFU#9QRn6G)3PQp7?V%MF%k9;|CH6SASqKJ%Bt}RsNb2UP7ds0v@ZhWgnb)6N?Zc zApD5^m`qfirVqw}Jf;8up;mb)MDCIjUM*lMfglrxuYD5q@7H-0K}1~`od@q9hAG8( zg`rDh;%m!q@BtBt_cp`RZtX9&P;Mu)50VKdTRb0JC6K@yn^$1)$#mgB!RaL zU$CAY?^60D84!T0S{R+_bQg-$WyC$!AdfhlC~On+P0kdF4NwxhIKEKyrfEK#=3h)A zETuaaK*n|?fsCUT^JI9 zPy|4PLFFdv^YREVahI`9ZAs!MdBwT`kBkd0vg|s=@c@=~x9eV=9QTNwvGWc!cz;zo zbwU)m0KIq%R6j#dWK#PScyB`8&cN;twpOjKF2SL|G9-3~1B!?(M$ChNcU*>L{HHgf zM&ND2I;gRwlJmn{#>D(0L*Zfm(U{tcTZiRp{}i(|4PI4N_KsLT>=$|G9fGv01z6Kh zO4>g1(~CzD_x|ekM0{29GNXuDFqKOEORc=0Z6Sj*Gd8K>c+ESUVEy#7TB#48IJ*;t1AXn)~ z$15m+P0XYZNAXm^>M#crkAbvPcJ42@5o5X?GHyS#8G>k4)W!uI06$>yv1*#0FjG%c=6AWU>qON?TvT>rJ2+Ht4g zQPfcv?V(!IyF2sMZt4^HnS|GbnOl>Ko)x{3YKW8D$<|u}NHa1ZamKo=n+%Q#(R(%; z{&~vYZ`GP79XfTn9MmCiuapz2maWf946$%w+=bNQMOj=s{?=Q)?f+O6G0@QYYvg$X z5_Qroh7P1y1lY&`acSSYF+RYIcFi5Ij9sb<(j;b4!4XEv3eB zx5d-%X$U%eE*6{D9d@*d z5{E5SHl!g9)#RRC4#)Y6!-Z5;yo=|**4#ET5T_lbnIP^Rq7*+X!J5R(RGHI|R3>Nw zUmYf%1?LmbqK{V{4lPu63OC9`eFR1>Rc$b*Wg4v$6UC~>bqblGnjjsQ#6RZdHFkkk zK!Hj!7`i(0PQh03Unrewcmiz7Wy(^)IfV&I%;(VqE77s zLEuSj{)AZ@FpbU3T;nmGpwpq28*w}A1kdTod3ygMFp@>L3!;rQzNKVYq2>4bs5Q55 zBN*H}YQQ+`Tyk!@y~TeTLmA)QMgNO|4cXz9`@kRF&x3M(aS?_sgHzARK zgkis5f(1PB8n_8bwn;L3fw+6KyL3K5WfVL!y5@9mXWjeJ@hv1w^58d+sY27i-+we` zm?Riv@TG8=_~O08>jBf9hJBDW~qWcGN#%Ofz8nyFeu)Iej25t|7jzN zn4*}iPPlir3nOCoI)`Si?DUx1REfI=bjM!2D1gXD=#Ni}Rx^xv( zuJ&aKP)c-rtaxo5+uDme+$%i<6oh)GODSZ|aVOnqawZY&1^oa(ITPNC2_&%5^C92k zZ<=5O9%%4Qa}dC__g8Q~zcDk7_AA6&)U~chzlUH)0AuF1 zBgevmw^J0#70WkkHp@zePCxOA?iet0NPo|lov)d<*!ayQO-rEQYl>h#K_Z%t^YsP( zjaK@fg6LW~97#`5+*VYZA0@BzCX7fIs<6aCKc;SAYdP<7pt=brHvaYtZ%3h7;oqRL z|L^c}Y*S&3!ys$uAxjxKSCe+fCtA<*gN{bV zIvL}qD0H9VJA>4|QPla;Py(g6I<`*>4|~`&bkR|gp*C-_stNZp<_X+nQKI=xgFlU` zmiRa;jXy(`u;eOb{u{hBirN3;Z=BO_*8DgBAf5d{Myx1O@gKEzRu}(GDo?@L{trl0 zRg%uLze#P-EcO2Gs5$Xi@jpl%e&Rm*8)()4&fv}8q`7!WZ~xJAedX`}z+_dU^05Bh zbMCPJJ1=Q)9%^W?rX%*36f_xEyBt6_6`|)sp=lKNviX1C!XGTz|9)Z&+lGhEiMxU; zI2M5D82?q+VtLuRwsJ~I*91wNty+)j^ae1vpGVu250CE!*uGCSLAn>%^_T+)m2RK9 zV@Qro4pXZnViqG&QRJliYg1MpFr>+fbkAGB{dACC(tq72uzEd}S$@91Qv3?3=lq28 zwh2NmKG{|7K;SWZvfhD#t$7m{Qb*2NWhq(h{C6jAW#f^r+@ltie0#C)Bo@Mb5lV3B zQsy-d)q32W&u0>R?ST>W5XHO95$KXGEC%VLR=mT9c9Nw0cmHMyKwnVtuG;4H)-Pwnoy(Vsdi^1CB2k&A7 ze!hHaFxzq@(Aoocjph`0^qtf1drCl$TjJ?DThf8>Jc2O9Nc2F$c~fJP;uH3L`>(Cc zYGkMt$q*-gKgek9dJx^k2>w*0hx&TyeJ}@f%S8|D<61yb=V~nY9%Z2(O4#J(u66|5 zT9OeAD0_B|)N12z49g(mgG*FnB5+m+(m{lLp?lXW)%EQUo=dR3m_w`7hi9X;O!w~C z$UQY(I3C-L*hg_#heP|Fh)9aTYd!0!)jMij{${1PYHx!5#jq>_LD$kz-4a zij>++(HSwY7C>MfGF4v(BtP5)S_;+8C;HU8L7E_WQ$K1MIcd)TE!~Jsm!YPtyD~Sz zu9l;wHlFKcnuI8 zZ`R%Bi_)2D>tBQLysP)w$!MCd*EMll*i303XgN-t&)zFNBGxx!J22lr8)~2tfOV_* z9*KpjWIXdeckQ#U!OOZS#c{BfF!as)@GY2#&hZUV?m8x3KkOJtdWS8DWC_(ynmJ9UaC){{$`R(@DReOyk*zK@7Q19_JQ_L zZN4}Eqlv2hPivXnHeZ^K!U4mA`S?5g!}NX$LY6Kp7qDVO^HR6UNz2{}taMrRwJl8E z8BbF~0$ki=u@0Pi7`4E|YYr71QY) z+0K3FnF{lLI%t=W2MsZp^^SA4=HoL?dI@V+YiQWYoGPu|xH)fn ztPQ+t&7SfW)hq7Nn*U~a$E;PD-V%frn_$7bAkbWD$g|G?h2C6pa)7 zz)bPNZ&~fN)HND+>f@b%V1+CEuS}7cIOqQY9{gxZ3ZLdV%wx=SSzGj-8xUHWcWmt> zc4q-K*BY9haM*AMFJ2~5NF8;?g~r}Zud~;}57`}8@`%SAQHRA=rG8~H=#}COFH&f0 zd~4=Mxd$Xtp>sE%-H=|6e+Ze+Z-Snig$s2##B~%K^PzGI9nO_QR!ls%Z3KoMoQ;Yu zekdcc;c|IW*`-zJzi?Hcl%48zzRET4aG}4DGUvFPVB$j0<^DtVQqWP};lbY%mzPsc z3{mL=+Z3620(bkV{wk~ZUZy6pxv_qFg^7lPH?g-@H>6w{U4ADuYO zN6!-d6KVO9ef_vQq`m-4Tw(~r-HFA%8gt{_TJYq|v4oYAHg9dkYhFns`Hg?BAmxrv z+RP7a#TYU!t0)Wqevo#*RoXV z(Ih*i7j&~+g$n&;Rcqtlv&hqubG5BHT@g_$O>;mEhg>2eeO78yN8*ggvJ zejc86e^7W?nAyE|W zw<@j=eW|s!FXI~P*%~rlyn^C7q^u7B#OB4ILrM$JC{5 zYAKknfAIuqk_+lHbdx_1Ee%>j9;eGdY)AO+T)zN3qke~v$5Hf~<-`jY?8ap8=Nn$T z#_8v?h2nk)$5$O(wXBlW%6pdfSWQ*j_qWGA@o`J{V2;(v0lF|jCB{DKEYD)Wd#aZ z<6lMq?>J$17Eyz>z0NO;(nG0y;wMXdWK#2CL75#mT^;-p-BIxOG9`&u=dD9B*Puod z<;A_^tGgk``CiA{yElc@d}k7LBohWNqVp#|N(al|eyCx`i@LqAQ_rXN)ubGhT)C%4tp07I!EcG7_Fs37Rg#0t2bFcs_m_R>z zF)`u6i27UAZnZQ=XdiGa!mzd88URnicX7pe)_=T4<-AaHxC1sm42D7mdLrhUR{d@D zFvgxEpo~}zO0BOUX)2)ML>E3*Re?O!8ELMq|jVG$fZA;61Sl!aDyBOj@-EP@Z zbOR;1|K!@y3M6gsP;h0cOsQ^WK4uTJyb1Id>OEn)T>M^MKeL%Sbl~RzxLNmipPlO! z1D{1WcBG>j$%0A6{3hbhv8F>DkywnT@vaMGfc$EVEYp_f^lHe!!bV^x=yD-yM{Bo% zSbrRy`*uBwZjde#ybyI!P%vNaF-JMq?J&Cc8Hv7Tv!Pn7xL=>BWhe_cCK}EUoxkybH}1VwOYL1m0#@HXuIp888r4bx+Tera5)UgM zEDhRv!7d2Q4BeiHhXHPEGEx!Q4c8eyJ7pqwb=0+@n}T&1Ln_{GM2M&4sqL8ao0h3o z;+If}iTr+YIs7_Eo}WPt2Sd~sK64nkbTNtR;QXGWZr}uRCo_bI;a+$ODbU$tLiD`d zF)5gAwDi_3{MJ!`J8kdEj}Y6Yp!dZ9d_#QLXX9*P(dL|hc_e?z__iIpRKHNUpNQPAsLza+H5Z}%AT2Ax~M z?xFOE=dfmV_|0V9ryrO ztPHmgTxu3ne0qq7j-;dFP4q%(HeP zWf2VLT(N1a@LkS~bfGx>2+E(u9(&t{>4QaJQvt87NWA}&V?;iS@ij9a}9icjohEO%+l;u*?QIrs4dUcx`XU};kQ*Z=pHKNkJF`kyk=!22jwF(*P?tky)K^& z4X6)wh5JT`Yp;0MlGKz^L9q945C^uG{pt7(Gg}#bFI?IE+zi{9lWbuZPi(089lzkI zWd-hh0p9}4ou}aN^6}s(ALJ#W1+76lo1e-&6-Zpa(f0xSBSe23g13U-u84607s-KW zfZg7P^+2T-z`gyniOxaw} zJUI{>y$o@J4>o z6vfgMPch+T`YRVgpQ#h^F0rfn6XZP%uc#jaeQ5`7Tnu<6*jIN`a=KQfjp607R;A5x zR3J1}!NEE-#+_WrrzYk;L~k74st~hC1|Y9sFnOiya#T5hiMumCD8fX6zm;RFXOJjT zpBlD1wU`&=GiioKM`IUECg7Iof@^Osu;L{gnL+WSNUt8Zr(PBZ=U)27(m{3v^ zE64@IVB@>hE#P8r^Y2C3Am=xbxsB-JsV{SH088&LZKv0UURz-?MIAwdQINMlTI`S) zH9||BYo}D!9z_Zk?aE%)pX~Qle|7rK7TDaqZ6hT#w}*#X((~?IK}|t7q_o}-3I^VO z72tFFS)=Ls^*}>?C-`Mc=A3SU|K4wNMVZq5PM$vbdH?Ho5sK9 z&47^A{v8jP^%B0vV8Q)+#$?JRdqaXyhQD45Mj~bu3QV4I2yy_<0;$3j#BMu_CU$(N z7JeoO#sX5dFY~4p=XB4{f2=6*p0gRWs;q4-8V${ufF7Ev5O3YlXgNFw4sG8x?CdM~ zX|2fC%4tj-{(bQVTzqzvK4SJgXKH2uQYTjmbpvdbiSKY!^EiQ!S9A^w|6W0A&?~d? z&!;U=@Ag5%*v<;ZKeuK`lB4XSvWsQxD)q2&kaun5dU!10JG7r>?__;MiMwf0Z|tLh zne{41EVrqBZ)t&mt|^jcjUF6(w0~SOsl>!UOv?aoINLdin4>Wp9jbUx#pP|{Y-hi2 z^^ttPzxd*ved^%B?#E{ymg6sp0BT{6SFz1UTBVr@%TRnBGP>iKI{GYa zByf}Ku!LAtRP!%$tj7hIz`X#Alw9GBD{8IvQ4E5D^g&2q3+Ls7R5j(rf6w_a3E%9(pJVy_bYgLP&jl zf91Wk-rqO(uB;`2{Ep0;jn4yEm$kOg zp)PG&j=$#DtlBrgC;w7i8f_b4?x)fQpx=9uerOrq5nVJz>WmH#rwnV;lro~dep zX|{=;j@YV1i-0!Pmsg@irHlEoGxT?RRuJO36SgvdGVafd7A|;o&`QM|eIt{K=_sjK zV$00Cc9s|*>8Y9-o}D;;HBd$S4A8B$_vTLHq!(3j53tc*Vqu>bzv-|oKozACh)?du zgan4Z9<~QGLt11uV&-oL{!2lsalZZ1#z>^-KBC7p36!!nrwkuWPZNnw)WGtd#XM*S0rOQe61!c3Y3t` zt&#uT{>kr#Qd3M&#||%~tI3r|%q6vdR~0Q29k2=mjf{XV)g&KNgmi@sco#oh7Rv!V3mDH1_7FuG5F`I(SFy143>AgQL2n5`4-QSoqgN2u^Uyw`RI^EtmLOat*n;vH>A4rn_O2^ax)-N&z4`UDz)hms=sQYdZhYxnV)f zODy4t2q2y8VR1*Et4}ul@S7zD?k~nTQ;yBZqc8V}qS3HL7;Q~(wHxpIn^$qmUH?fL z!>Qx`p`AGh==fTln8l$&mab$!BvC~*pxLUHwCUEy6+wsQ({wytP7X#w` z_nLm!g4o9uf3Y{&;e00~+j?}5;z`S{UVzoqiP`bJ9!|hVwswb!hLHH-2ZJnFjKV_R zn$o&QL|LUo*}xb+;l-)q4TPBGuWj?pNCllfHKNcDO5rf#!$JSjM=BGB~7HlKYf&d(ud%i z9knmJ@@m_?4&T({i+=moxJpEC&-fc73AyI=04hB?Rn2p?OQ0#EOzU`eqdnPaB>zjS zsJ@#{`S6P#PyB$ftDZl-%p0AUkiYX0lXygg05&@S28XJl=RPl=&z8xZHj1L%g`B7; z5QjrCm558@Mtd%e&jC^1ED}|P7A54n;I_T`^~vnb_=a#9DM}v>BaCuWAqx)ay}d6K zb;nuf%FX2r|NXwor{NFe7a&Rsa47C=1%TegF52u*_YLBsb=6=oqc)h10jV{4`ji=F z8l^ps_Q&AnzU-81JLbDAS_wQHL?!%O5hb|~DB1Kh+ZE2YDy>T*Uc7QT-4i%-Xgm0! zPzy)>F%e&x8bb#Fm1b1bHj1?D;imH*lHLpP4L?M|><5U$v#_)3>aj5nF_4@4K#GAw z-nqiV7{sbxmvzP5Ag&%Pf^FJNK zSg{L?y!Jn1<0>ne1YKg%MY%JR-Y=J{P`ONfqr4_t4KR#RNxs1QCsBov1ZbAJs<)WNM zv)?E4>^b}YdN@W|aP3vcz9s#xlia$y%B{nXA<^I(KsK+T6GA@V5uc8Ap^k)_=7P!$ zaj_FqkD%XWrmwSjyi}>rAQ>fKT`P}_;L5DY;+n3oV{QT^pXZX-EsVc03s+X zBYGnZov)caR!rG-)vqnw7t8s=Lzo_m5>R}^&&b>_?Cw+d0qP!g07tB$##oc2Q^6mk z6&Ba8LInKXhm+G2^Si~5Y+j*8u3ZWoGOZiHyBydI&oHk)=EvAKO=BoEFk(a&h1fD` z_J`1VpxCggo%+nc+FL5O!ZjK~ySOKC3tM*hLUjMyW&5NUcq4hDnDY?6MFjF5hA6qT zO$Iz`kq-EK4bFnu*j{IYdNDvuU@l$<;!#Ki@B%<39E_g74o0Ui7p0qGN8!|N)=hT_ zIlG)zUe8odd2=HIeUx?5a#`qgq^J}*wB>lCRe}j2Y&}nZ`zf_^&-d69{F7CF>XfdE zQC5X&(lRgMn*^*)Pg~3)x9TuBeb1dS_M)ZZsY)Kqcx~XeEWK^2?Zv?F-jXj7l!N@uH)s%gA@pE@)G5xwPcADzEHpfitqmWh+Nf-k5h+gzXENH z%!an^GLx)_+v#n6l2t+wwZE9ye}X`1ge&zQ(A9-?rAb@=kcJ#{_9Cr(`W}N!1po;W zWvP&C|35u9;LSsFJdof+&8@iE4-}V|3MLIyxks{?u+NbcF%#`f~4BNewJPMpJHGi9SRHHBwYxv=@O+uxLSUVK3)j#l^+;hho42> z2|wLNw^YJ4rJ-GU*S}&x(HXQ9>|CM?nlS7Yon&pLhavO_ zHBgIm`Q$QRnZSlkwfndir2_tVj0!H$r06mD)6|_3>MiJ8khVS&etjVr_~a+p<4Ufa z&>7gQ16HQA!~N9^RGJS1U6my(_SJhb*%|Hu78KV*HQ0u9+KAO;Of=(YwarKSMjCah z3YZPJF{g6r)=fAM5B2DzHgJD@RB$BiDCY&BT_t+HhHJA&QZha>atlbK&n<<5Ck}-R z8L`3MNf)h6J5OOKrZv6?deTiZdQX&6P>q;Wzh9mfDhm9-6&^s~Si1jv7ln+3t$rl* zj41X#8L|ny-csOoR9o7z+c^Fal#R#%*C5xUTaP|AbX;=0b4uUyVr+gTJ+`3yn41&k zqIh_W3uytra4Zo_O{0It)zlmP+EX_+(srkvGWJGej^+9iyJFadIb)+<(Ca&L8^;+ zep9%k7xrKY+`Qj0EA#?>gRVL!sisf{6C_spT0!Q2qjFMzD$`Dd zf+bI8|LqYDgV3c%IrYvJ%7Jsl6Gpl8V>-%WPZMy^`j%x#2DJcw;XUH(5uL^Iq?L!i zvj@H(c6K@ghq-ee&bHGjOKZp}@+C?ChZ%lr8E-w@fNi8JehYgG7nheMAx52+uEd|% zsHKXDd3t%R{$U(*Y!fFPm&;i0T7dJc<&*v1IDy+e%KA&(&j}%_<;|61?41O6rWUNl znX{jp+#-ZH_y1Y}=%We?_D(sAW#g4rUErPuY(8J7THA+@Dd!~PU3b}I@vw`76SXHH z`*kY~rv~X0D35l!>bG^w8Gmu(*+P~F1uA$T{X!)7MO6<~Kc+nJ=#=Ty<9{+-=Y=Sz z&|QvNehYAj)EIb9X{FGTDuAC~`e`x1$}3=3j=qA?4w01Clf)OEi3$*iouq~B5Y}BB ziZ?YCJRR+K#BtK`)naRXi=p9R8M$%CmmYHyfke`E;g3(Ty@?d*diYD=%O2q89_i?2 zLIZrKp4?efF21n4KjjpQtyZ+AEk=!EQ>cV5HW_@&&128MONp-D&5l8e3ViB!91iCG z0?LVao z5%FXPeb|WF{PgcC$;68$ryF?tNZe!PazXF&6=3f1*ON@Xo1B;q@$zNvH@>#?xV0B2 z=q2qdjp>5zd+UBGvT6D>_rBo~=|rpQIRf~|CZ@UEeQ;@NZleYKjmyl-CPZ~3qkZ|B z`7^pOa)o$yaT-&LCx?&!tp(5{8s=9+-qAq{+kFW1w25bDU~MgbiTvg_t0^Il85{AiX$c! z??YDXX&@E7f39JKQ_7{e8-*E>R5OrUiAqbK`ekW{qoSAjz+FOr%v2>Z5#FlN^y#Y- zRb4sJOz+M(h3jREDhJpWH2LTU;lmDC&ESI6hWF6W!w3t%!?HnEtw^Qb{&WMVN2>yf zmE=|(=&{1>h|b63=;IF9j0M;;p@POWcy+o3c))o}Z0H2D|E*!%XpVi<=s`_mx;_w9 z)=titmQ+YNcsY>M_~agvrL4Bv!nHeov#-}1ZAik`R>f^}Z>^SpNmQd)I_OVV;o|FSzt;Gg7BLflIlw z(;5=Ly&PK@UO4rVDrpUk)<+)xW`5i5SS=pU3KeYwp(A+I`Vd3ItUkzF2}W1g%gvtk zBwdaeH+oKaG5ETw*(1Gs$MR?0>bga9TLn#8h<2|tS{{45?w0DD3}zUF<|4=FW zV}e~=2_w4WXoCD*yli_t6Hw-VI8%TzGJB`XlWc^>CR(l~EF`$x>l4@J`cbgdu*`1K zakF7YiX0MD(foazzE_?N65Q~Ne`4T2?%#gB?W7vyrnD8rj_yACcN3^JbnH!~vS+)f zNMNm+H}p>=%HTsdU<|xlK9ldD1&f9Ctn;;br;W_H6J5wsHN)Fe^Ns+&Vt=la`;g2| zo3d=+J+{U@DQIW9x9)9?9^%%MixN=Yi4YG(>vzrW1|Xp6c2f{}y*sgG?p{m~O-cdW zkawu%lbZKxI$uRdYPA-TD06+`=N%?--^^zJW_q9HTpmIMZS%>_vJ#d_p(6K zjpp8d{LKdC0!Fq?mu7|+FJ(SI4Y~6AA#LA=v#)}IlT$@lRpGlNA=&&)k>s{Q=KobB z+1U)~397AM682ynkI(40=*3IVAI4g77J;1_g)y6oJG?OQ!AVsNd;?azoa&2Qp#MmH zS!epkuu0NuVSeuzsxuO5Q*(E`I)~Uie%%($OPZSg2KUO@WV9u&b8fSvINP}oldpVt zIoj77WA)w>Y++);FMb8b@XNarw?=g+oDBz>7BV{=_}EyeYGX|5{y!aB&(ewZ8a^=8L z_rUyb0b6rOhT&@6{#Mh6W-E#?#)nsR^R2=hzxx=c!(5YA0;q@HI}9&~G~kkmskteF~kDtB6tkyGFj+Hx`RodmjaN%eli^ua*$9{WsephMtr z{sZjo<|jv$@F#JqZV^7MFEqvMHmHT{meSGQ5#x*ue1Xp5P-o>xaOIWkH2m?hd|ZtS zY$O7!i9Zbiv#C(OlM&MT`31#1XSsJ{uEmdx|ERyF&;n0^`6t^W*5Ke{lq;}pJ91U+ zRHrYaaSMEMy^O8|-kk$Cd78R|%uT0SHsDI@`Kj8eqkaYArAR_kk00FM^$>obAxY4 zefO^?NukS>2x$$4zILvRDUl0pKFWq!XQ7$wuCSXWsuDY+!Ztm$=ccT(x(xD~$VIrU zFn%%ObXoC}Puji#oE$c|5tHdhxIS4Uc9aviv)34esCjMAumjBnO2G0InHmG2 z9Tm%B4){v7opZSkfZx{lpDWRmz+LU@aQwb)O2gqO=dPy1yTjO%u&UkWlIAEctiC}L z7Apup6@*XizA@KK7bl&MkXV$bt`m-Onhp(H{rQ3T0fg5WHPI-=mgatGm#7m^FDlK? z5b04vSalJ^YAef4lwoXSy?g28GO``629xfw;-z#J7%)Uc-yexgp@G^O{cd(lo=SH{ zH@HD0a8NC!PFjO@dNa!fM7i6su}j?Q(t|b}B*5C_N)#tDc_~xG#`rBY#W6{;vBO1Y ze9_g>o7W*4JY&BX?n3@tQ|kRtDJ?5kSk)wi`d>c|_&Rb<<(sLBZ@0*#9C^<&y&auB z=`bMm+OiHQILVo4^=>rwJL~U~?a26qQvIwtNne3cql9;v&F@q7ilMvH=~LnFGD*T^ zT)roLqJiM(g>@21lP%ZAJhK^hrIh(CFu?ETbVZ^MqyjK%4v#JLeYrcbQ*nquLL&XP zpMQRb1gf~1<7Yc;GH$$UQ}}G`Id#95O?Yj$4k5_;FW)%Ea^K86)lCSly^%TO zEqVZbg)ECDelm0V_|Hwj7QX!7f!o$|E$FVoks?!z0HNS*=BcNuU2XK;3OC0ONx~IU z%W0BThMt{EX*EBe2Mi~J#N&I$b_`TF0vuinhE}lpmEI&Tvv5i7ES?rrH0FxVP=sk* zEx27&5M=ux;(|_dRNL76(tm@h5irQ_nUD|en&s*;MCW~~BN7vk;ag@#f9O16SYyIf z;WpswcdGt&MLq^dE329WI1KJn{3$qVm0H(Zv7!b=BmlG&gge}oGcDZwUVc-2^w{%!NQUBsKA6R^6&+yL&hRB+}DP5Ke#J9seke2q^f5OHF zr?(#f1Cg0tfaody_Upr0J2MfEGyY)Co;YX9N^qE})1iFFV>G9MQc!e_MeKV)J|311VK!5nHfBa{( zV}%GFa5~^?P8TGTgC{h;A0!+icfP>tW~(Q|S{VO`?_OmtDr&tehctW0z#~>xF^&Gk ztonRIvK(G8>VHD0JmZTkqECw|fES>LJ0k!D)80#Wx9SSHQpNvc{4#u>%q7ha8`wU> z8izL*McQ+-bMP@wNgj8ftno9RJy4w~a4`Mrh(3U}{=ct0zap~_f_ex=gP;FVS-P3{ zP4<4EiYWrWz(>~iT!>d7AB%3GCQCZasOFx-=MbhR@7!#(6kd6zdhISD#0%yJMu3b3$sKGsZz23^6GXC;{MWVs% z=~a02x)$*C0_HI5P^xv}pN#@$F9znWp&%F~t#%F2cA|cKAS-QJaYewYjY>*wW?QL5 zgob3~8+*T_z_uUs?~OX*Or!k3Pb+JZUbiU|yDy&t-3o%*48HJtP3_h^KC?pbCvCLr zl*o}4*3fc4Ha2^*meaoepXiSL>tWiR9s4O8y(0bDEyN>JuR5=U7zJBeH+CJAyK6vXlfyn{>m{Bdx)Mtl0gKN{q7)02h>*`A6GB~}4MhbL(gH1eYuc8lcoJ8Qqeuan!w za$@t1&1mb&fRhcrfQWLQ*q)35JfiKCZO!+YoZs|-DzeRVx?C}3T@jueJaes!0<~eF zCa5X@hm7I7ty{A_1*DHx$dO{@yozZ4+rfcQwj4K^Bbs`??=3`TyLeSfS=J0?{55-f zMmjlYr9{|tu1N9w&J67x5}dsfaV&uanOH)WVI{IBR9nwEP(`^F1ni^r0%lC5dzokf z&DT#75x;+;lc4VZ)jM`fJe)xd3QQ!aYO=YG{rEXq$fIK_47+LGrYW~FNVSi8Ukq=c z9fR!kWX{h#f-i<~iXoI0G*ukg_R8#O5m6`mxPeL?Mp3dPM!)E6G7X$UA+KpKl+XLA zw9y3r?tC04u8_Pp@=&RelH!PHT3Z9%iyTy3Rnc1RuK}nu4y~xpfzgld$mjL*tFeBN zE^Na4itztcNnF@m;cp^I)e;T{GJLu#kO#*bH{m^HA^QJC(^q;7P9ii%qVJMZewgHS zB^n2e9>kn=7@p0<3tp)9L18kFEhp_HcBCK)swl~=gp2W@qgHyk8~Q0R`5a%`e{lJmT8 zhjmxG6xvI6S!Ls})}i7oMUH-w;mULJHyP^T4GZ{_8k|wMtp)pv)ZL-~`1q$35i=73 zi{H19_Z?$^8>^X&7C2BiBq0<`@EvIQCy={D<=0eqb&{W)l^ah`_sx7kx;E`I9=^3T zN^zvOr-o1E3+zGb+UIs_S~OwVE1jIxcWHSoU>!$o)#dspXDL?0}=vj3y6!I(X8kOz@QXmRPB8SLI7Lon()$sc z!ULz_Pwt`0$G25PG;D4|_%OCD0o`r7J5FA}vwxM8F#jG1G@%f+cp4dzDxDBDgOqg~ zu!6;ZA`CT@arH_I5vg*v(YWE+Fcl^a` zw)cD>675|uM_ZLeHQkW3b8;V&zQ)};X3V_Zz_p04|8 zp>>>HM(7OR2_nw{`};K0AK-Z?-OxJYjZFsC>#a%6*eacna$=e*#yC%R_PjG&wl+KM zPk#)cLl|izp7cFHb!}?B;7v=#3TId=@wWFI9-iyM=HWTuO%Jz7NI3D?xo935HTB-o zr*0C{b98-ww^sj)7jGc&nA4BdsEID<@zo!jzhhsvib0n{o4)$Y{2e+`o9nP<6+puC zi7rYEN^G8LnRx(b6^aIrm(>xur#Efef`f`Q(CQUcSf$ zrEjR>{AHD7m1>muypi+s{0P|kgS)74I^zdBtGy=_IDQR$lnHa8oU&fZEtNnyf>z5Bbax&7tXHGd$>G}XTrXaKI}q7ithd}g zeiv;)zUuV?Us5)o#qtBNFimYTI;|kFuoihCxc5I^*W-J9O^FsF6loAZvfp~6`+fh> zS?{}G=J>1D^+?J0_7CU3b+@li)%0-f=N1{g3V-C&HVceuKJcqZN_{mNnfaP~_1#U< zr4Q)a#^*yphBDv_PxbkgzoWWf*-w4asVi{pEz81l4^?*BLNH-FMfwGHTT2^!NuYr_JqZV^b482|;YC2Eka#q^Yg+;klTO(R#3KPn(Mo(62T-4dA1F35SyHz&8o(B#Oz@B@)m?rgGQ?{@d{!sB5^AQ7nr{VipnyY&MMARX=_cb zMuv!Gfo%$LMtcsO*(MWbw5bO@Hl}gtY;t0MX>B#{^7T6(IMLP9cXB$!3pO+*ryCqZ zXCJ#fQJGxvypJl(@8h^ud}Y#L0F4p9>816A&(t;CMUgZ4OC+jGx=d4FB)TXo->vWpYOujsHM<+$WjLge^1;ggrk@H~Abc{% zySobrYtc>59Tbbp=CK>xuv^D$M&=%EeN-DVx{f^y52S>4}# ze_j_RCd$pQXdhGfA()G5~G!~vBe3wJm?>D-U7cR|hn9Ihq zk3v6?xk-d2#xPJ{)WuieD~~`-X&u4LnHq`MH}ApD2Ylj}UTw#>R%QE`e?ftKIU=ki-YoQq=+#r7`~7%&prH(SZ6l8>Y&Ve>X1PA3o4|^Ce z^1x1i5o(m}k3K(_+yMG3-huy?3tJueA;Vhfs^rb4fa4L1Gs&K zyOK{&t&m~T;BlfI)GEf?frGn=1&B#2hD=zzkysUEi|WY6gNbWdG4ETpWQkcjBS@rN z&Jer$8w=NMw0t;;RSWvipKkgae{uh7RA(>#`R=aqx-i z_xOSX_gmKAR-KYheZ4lQrw8p02*DSSI6im#p6+jp&W4!^-NhZ%mx-;Y1aBp8)zoF; zb{miqv2(Zn{JVY6xZre9ClkUZd|*UHY+mM`eH#Lbi3+W6{K9PW7jiKP@$QCW#s?Mi z67Brf1Bo)N#}VNIPmWlm_nS@QekH8R0zN7m?u|A6>~}Z}MHOm!|d5~EUGywm+dh&)pkQK-_TFzg?4vDqw26CU)M~z?Hndl4>8yrZ0 zOo<*hDBB14wh`XO{FY-fI;~{7Jn5i%$Wc{6eNTR_eAn=f^5&@_#qJ~P?XfdBDh}DH z-hn-*0Bi#2CIduc zw_F55abR`@Cf=i&e&V;EAC6F}>)L62v)giU1-By_dw7(v& zsl2VtWODn|nG6GBe#Y~wG*J8u#|-Q(w#z4toMlbb3M!-pMZ zB}*;!fYEGb8RH$#fXCRn0!jK+&wB(0ZZO`u2}F7o?SJ4euLtJ){V?@UGD(_OE~*`R zC@Yfbe;e5POUwB9hV6Y5zQcnJ7t(HowpdBtjCh2e@ePUZ8lEZ00JIbNMW9XhEpvFB zN#1n>mi=V*K9_-?m;Wc?{-0gD_x~XB7lnhI0b}4Sr2IJU2EPFrFFbYiPaQgoH*x-M z)&F}hz{e0_-slv}>Lu&@Y%`jJg}0}}uD-KpC{N%P;vs#reY*3NeXzvk&$xK+f(x+x z7(^Xs_nA~|3yNVX(h%$}X=AiJ+4$byvMR#GUl6*fwu16RMgBQ>E~8>|Nw#-# z&%m#Wt3PA-?cWOS!>ohn>u)wFeVqEZv>WLL?)lzqF7qGz8aLy`*;K=Gw&70G@8i=! zhjws6m-sFx$Z2brXp_)cp(-p@>HZm~YWG*gNA5u|7 zT|lVrep>0kRTKfST;!+a_fSJENjQ@hj&0U#&4z>+GWXbZjB0Vhj{ASlKe?^Sc|jr^ zRjI9NrFjaO8Tl7p<5vB{;2G|HLzvL|nOZtnZ zNLW&d|4!sqI9;#l&v2gm#&uNE#V_k4>KhR)IKzk60&P}upFcaC)@1}jH~HT*X~aK2 z^<|?(?sM}iY5~gSSqfX3lD{~SO~qIbTFz(k4&KU0>Nvk8G_K*11$d+V4pR=vKsF+m zE|}|vXtKJhpdx9b;y((xb=Ca*$O?ly}djP0Kc?@ba@_ut7n8%{q?(Lu zyzG-S7?8izN~c?=|4Ix;fE@Z?jO)Ij*HxCOfm^Tp6C4fGI1Qlzk%%Rf!%RI1;ds_$ zyhBLIw=Vc|oF2T=dhk&U9(pF#gU)|Z5rfKRUt{G^605UBxoUYEw9KV>L#_;kRL)Gc+RSlk(G>uT*zaL_Nu_n+y2eQeT zIUt`Hl>O}tGDcN99Vujm9kWC^GnPX~N*iCjNd+hZY23@)fDt_wT;F}y#yoU&=RL>3 z+ZaP;iXy?$d-5nT)~KAS8r8&l_5M-;i25*GI{3`hcm?jR$Zfl<@^VaST=`9Fea<-P zb(WG(y+%3E_0CSgH&p+H;=k{yjc+GuZ6BY4O)M07ke2&f>s@`Fh%|!M@ie-i!^I~{ z#XQIPZXiLD()u>|RpXt&kT%rF!rFsLH;zD&3_APXbct5v*Hqz;=3!DtL5t1ZqjZ*@ zmu4$Ri(Z!Ua=(=b{Q%mK94q7Uq;KRaLsIqIOFTCWX3s8%KPsyyf;lv%?GPUrga6dsiPBeB z&In~T$XsTqlGI=#r23Nouf2=RH<;ig0DwMzY*)-S==J z%iAe0e>h-=MEm+WAxt;!ZSokvet*CJxHwQiM7Gh1+3qC!Vu&}P*P5}#pbk{|-Xmbc z%LGc`LI|t{_ZX2+Zkz_~Og%QT^6&e%v)&uqGxKP&O(uvzS_-crhj4gzu=Zcwt&ST8 zyR%yog3~+^4#{!pF8zrf!!V}k&O3tF>?6&?_8ZgjSweQ7Pz}aU5DJc^)NdrfLX)Os zsrk&;o(~*bNeTb5c~$Qe^mnK#T?SN3OQ(&ha`NSHpz1`qKTX^D-_aI-Fc&Xo^>md4wb(mX_ogI_xKu6qDdf<`(rPu>r7|;&eMZc5(cudB?3tFi{+_D< ziki$Lg%^5c_bE}o7?S5SZ~u*Zn)l!@WY{c^MG4s8lufP>-At)sQW&92r7Kkg$Yy)V znb1yQCljjZ_KR*W+5)XnYj-v&w}Qz_U;U5I(@D%Wzg<#0-E6CDCc_Rt>}{M@i(_*5 zQ11G>Gqybdgb@wP(Ulpp(8x+$G`|PT!9mQ*t_AX2O?cql+fqJPMF5P6HkSU!xLAdf zu_oF^)qQE$V>TJD;$okk5}l|=M`~T;%hp1+->12k?$Z#VvuWqFWMEBu0RPj=+#-ic zPy+c~GKb{ZqKux_?TPbVXvRIiL6EZe?o;Z+6s!gJZ3A*Y%v}9wykaC1-GsbSWUL*< z`unQAt$ZQk@PYLg{!IgSTSMBy{s2nXg<$RuI#&YO0doL?3-gTj%7Ci=U7 zXe46{8TZ-PMA_<&B@J^GY1Eic`Kk4GxRVMa7dt~=#`-PhxpVh-0#HeR@C?qyJ(_hu zdrD6F+)p6{2O{BFDd&P-Nv-n&XJZ{<@Au-&$n@l{0pEi$-

f>Rda=%pSnK zBuo~PRhdqL%5Q(;rMpJ)?Sy>s0DoBW1d-aM?=R3|!7Liq_5O`hHnMa+9=Cvd%wW zggaN+^7=H~h7m9aoxbjZ^c$?$Q~j4+Ze{#er{V9opw%OScRV9N@5?8)8yoyHCAbJS z+-%I_w48KQm3?0X2L|A@m8Rro-ZCs1D)W9!@LE5eLjF3SNWz@^{QL8WyJfoOo81pk z{_iRo?w&=;Lv~eRUG3J%-@-)tZEqa56K)`s+7?yqxd&kG@63*VmdB?)vEa5@8ddS( z_?d)KJCmZKS-7NS}|Iy-#l#295L7829()#d%S- z(%bWaFy$|UZ{?lKW$!n`WG~6+wRH~kEj0Szmh95K?%hu@(|Uc1qL- z#rzP19haUo7tcNFdS$8_0)ISJrFfo1cRS)qum*W|yjgwxm+5CFrR$Mk&X;$8KRBoQ zZ>ybj>ciL5A#P7koTk?Uj5loS9&=k3C4Y3gYh9JF2K1l~Qe!;JZGbj1>2qHJ#%Ufl zotLIcS(g_MG-m$%l*x~|H4$F(ANx0-tYM2glXp#MY9e<O)gh$7;Mk{b*TtfK*uku}{NNk{y53o{WR%t8mUvk&%^Ls? zzBm;dm>>W8G$`&314?xP=7u&S09A>$yMjR1iDcr|g96BJ1Xqkr{Q=|wAX^J*=Z~&B zt^dO}VB=nZ)!`Z5)VpO>^+90EF0>x!rWohb;mbvJ+KPMfDH2rgv>!mY-`{Q9nR;=% zmnN5z$w!Ct6=e0)ZcP`E+iGB$$>^S^2`AG1dBh_g7};%uwrQ{$TMV4B{5g z9v;d-N& zJTJtE8`W0Hz)s?w<(-{Y>uV%1d$$zu_U1QU4H=?mkc6Q|YHK!;sasuV~v}AZv zar%BYu7Fu7*WMbIaWv7jrMRm8m72sK^XUI-0aW==A>=gP>*4n}mbI6c)f1)DjJC3o zwZ;|wAzoY-hvWxI%hFZX$)+gi)}lIb_Ec$G9FhMt()AlU$VOg8)rkR*nwz0DT?n;* z87sOu(kxeev+v#lfO>9`n7q~Ghem%^i#Df+owo<7 z@cZ{)OA_Ad1)7A|OOuvkJWnS}-{}6j!))b-)Do@7Wez0bbxqZ%FI4E4RYtf#;U~pf zP>9^uPoDm%3z7Lh!bZ~`>G>&EIWj&vjCfFWUmjmlQvaHA$H!JM^rLNyFrcEG$shoD za}8>U*Sc3ubvdEBbES+jQn`q>^_b>M;JR=Leu!c%WIlSue0b4Uj$zc@+M~Zt#bB*Vv3^m|QPo ze{9LSi^af872RYue7(+`c?^-XOG+*^ipb+%dpX~+5i7DePfs|}qX@^pbE-FDCCjlY_abL)EAGxc)p%G#_yLRvJX+IjQM#7+2Gb3y)|I;0@)w z)vBHJ<4+#)nW#7J;R>wgAkBD!HU2@qo64+=9d{v?s5u>L>LVhrRlj?r8O7+Z&~IC+ zDv9;x$d2{GxBqJL0@41 zLLFcT#G}(2^j{wF>x~c6Qo+S<(9+gY-HvqrKcO432YUO>gcgd**Ao77grGYYb}!DJ ztX{Yn6u$j)mbbb)K#5K0N_$VqsdCohqV?c5uNw}!ymGwC1K%$1c>ds&VL6gb4O6v$ zxydK)q2_1ZKM$V>V47f+kADyweim)nr56%~*WPlZ1Kksv%^bcxE^7=~sq1~oRO5QE z_)0Of^4d2Ply=P@fd>@^fK++(NU_Kp1|{Wgg8fH3V2BT3=C~x%Bg%pKmYOc=Me2qqOq0F!KjV9M5KI?M3iD z|IQ~QvTJyeOQt&@ihQ)i#H;tuyZ!crGTb z=VgB^dVRZR=eZ&y;LC|VOqj(pnIt08L9LxMiS)$A3g{|7PrPciyu@?UFg>~~ZlKl5 z_9?YjZX1~C%VVzO{qw$P14~2S^R8*OUHbFacV1arjD74^s|kq`m55xX0jI=^Z;a0d z1fZq+1A1rxN=HlWd7kH3_-yjN;%mCNhXXa5TrPX^NAzm@hhU}B%MKH_T?L=t?-jkf z#mAa~`BBE3Qgm0RikH1Ur{(#T%fkN;Yi}9WX7hcGwzN3K3oR5c?ohlXI4$lD!QI_c z+@W~Uphb(j7MJ2soZ{{d2}w?#=l7TQz0S9Dov+C~_w3ng?a5?j?LE6RX((}eFSWs$ z(yud=1uTJzc`lUz&$StU|2(V&ia)`|VMz(<=JfHVlFI89sRz7R+dId zC2DU{4VBI4y;$Ygw+Kx`EWtI_yTUPPD1tg=*I1aJFo0C-_;^4b| zDZ|`1Lb(s<4C!_hrj~;nA)fp3l&W`+Z=+c<%bEARnCMNT<|%JpS+L`RTZd2|>}B>n z8yxm<`kO>>Sq6zYIx?_H^U?nXrn{~JmE_ytx*`zSe$+thm~R)HILT@aPcVvv^^$PEZ$@i;4A z{`&hj_9SaKC?tX=a&3PeOfLJFnQ9zs)!O`q?+tGvVbN#DBak~x7;<{5)No!saP-3I zgc$zII37Bx-k9L0el@1M)F9llcPx{j(KOLJIRlTP0snN@T_$+}kmRSKR7i1}&5-N5 zAdLC!c{|xUj98t-zDkuQ*hqh@;9b(bzVw`d-<^u~-e0#DNZ!_ym+w+wApEIl#kVKM53 z)5#aLBSmdKQ-c_&l5PNhJiH%DS(lSkydn`{P&1LH?QQdiXKTFdd9l{MLkl*i2dcy| zc(|?$$QuLMeiLPKMluBCVY?9CyX=lS{(7iu$quLRd6+ zkG}IJgvp)Xu2k!_mXp2Xbs68^G{HSkcdNs130eOIBVFh8l#JDPqvP0B|EAn53$rl# zw&l|L1MeZR23r|+vH!jKDY42{Cl*5!bh4@zV^(3wK?}Dg^%G?YB-ZqWsQPwDvii5k z1Ee|)SHQ=URPJPd0=`1NzghtAum6IAZ*)#X!F!}2L4kdg$;<<9+DUH(q>c2BB@rpkGXU~ z=sYE2vA}8XyIFO1pr;k8H~NS~_e9c}6uL_;9!;##sEFiu^~q3&Xm^`Z8q>@x(+EfT zcIh>Hw$fq$E66&%|8RqHGH!$0qp#-qFUYm5_;osmLNzWzYG84v1EiQKUAV~$|C*>~ zH+Bf6>H=;IRFMEXq`fwvZ)V^fDlqkht_Bi3oXK~95P5>iOBj=B&BEye+b*m6_dl#Y zRS57li1CED$Zbf;^y8M%>uADl-&DL^U0$d%eL687jxF+ToboL?Uq`JJOw``Yt6@jr z6bjos4ROd(*T+~Q2E`&0v&R?KKG7zZxSk4^`NneK%@4)r_bGVXx$EChhsfU{^7p#e zj+>?ybRs|1`GIXft%&_L2BoA(7J&+R58;LnxBFrHfWYrXT`y>EwNrP9u8bQtC6&Bp zj_LfEySXxs>zrmi$wKgO9Sys1?=y9n&=LW|snrSu{Y?&vHau3(MzTPprY3f)elIZF zOU3`d00cX&YYSr#qSKe%+wj^wJH=94)>EsdfuS*7!X_o};B^*g21rExZwyVAveF61$Gm@h!@Gv2lg3(3X?`W@)|7{gp|C zz$QD2I>?bIcSv4T@2#7zyj+M7^=d5vcL(%eREhLAGL8d^W(GbZ+Zd=tzG zT#b8#b5f85>HGf;ySi9>8GvKzG)d0=w)+d`!x1OnMq+xQ^K)Ob9ma>6JfjyIp1Id+ zQkjcx7$k@Tk;d5Vz}2im7A^G95eEbLcKEUO$&Z4Qg#&nv9EQylbwj zqV0ZjyWLly5b1eL?%Efp~-2WN@dqG zsIFRxy5`(5EMU4Mg;~;ni{a4<0_yK_ZE_r2nL|8rrBhwW%6h4f4{YCd&mg!K%ygKC zj_Q6dDc7lyu++JP7s%%oM}GMmCx{Sqk8s1#WnGEOM}C&N^3MadQ4vy)*e9|APDSpE zsiX}Dy+TsyV&A;)na>5~y|kZqI_H1TMXdO}?rox;S-CuNC^Y(FP*LlKb)R zdbf1>{bPUPi>5?mJ*U)aQqm3r?@VR9KtCLwS9E`?>@vGMnLtt6>Z0D;Op>2KWebpk zrn6t4j^ZQ{)1k0G2aL$+meNxSXt-lGG%OaergKbn70WY=I_B09vlNgZ?>nFxjd6;Y zuljKvN>$$S0=!y}R)=-Z~WRA_5nsT3x&WEYCgy9u0fdgJ_jjlcMc$T#sozX*Fk zp6%7geV@FF$e3Jx!a&a-TzoVes?GbJF|*V50z1`Tox(YifRxsfg`ILLoy9#6UBq!k zr%&N!;=06(218yS`(v%KA@Z8*kIcotlhf7qhCS9`i1RM^1v*KBoy|1YP{5q8rwV2LUXrsrhvnmHam%;G_QhOOA8${o>8CEfq; z^VkX7F&pSjCu;3iYG#Q;o!eoK1UH5DDPC%{Mb9k8)8*OOcuw2!R9UA+4;Xcsu^aoh z!=pKZ(nyfbZ)HMQFSQ4XT+(8LA{9ya*bW%Q`i)^`CcFWACD-AdkS9!I#>{kDKk~ z@)$WCO#PzU(pz)7yefB3KGfjh>m@$6tBJiAD|e+~m$8c$nFQFQ590fi^q$ZCL?6jc zb-IFO=Bn9nnWyQe)HeUqNdVLN*;;GxIWJ)a>R%?d-iVKRF+*o( zjrxZ-m)kmveG<=^7#?@XZ>to~^#LWZd7R_bMsD~Tq)=Z_(ky)mxV&`0Ni(*Bfvw18x z=h<@dVX1)>gAWL;EsSSq9!M^IH2uuSawJcqajoC5%Dz)U`sH7fNjqJ4d8%HwANIrh zQZD09E_Ahy9=+{u^U7_iaovGy5o``7DoB6`um2a;|DizwIZNA1J-w&*tQPrc-m5A| zKm50|{};&rjiQGyTBAtD3Fd426|OrC%IR8X$1Z0eXOEbDr1MDsU^i=_#pwn2VvDP> zNjHB-1+A&Xh9RMA9lXh!ony6}KxHV-Xo zF{1DxZTvct%IlN;bNw0m2x{i|0)2%Lvi`+eCIV;^C)-ejmuCx3v?F&A?L7CZUdMP5 z%5o)-2sr*Fpo8oX0q3^5eKQN|@@pZ?=__!mcm!>-hkT8|&*yPg`q!ReP#xlO0!C$f zm~~F#1}}2=xjs}f@7Se96yD;MS1x1+;z=FhY3ojKmx?>K!zZzuJL*kHI9J0zcGQu3 zVkB)bw^^1$rB8A<7S>OeWa_YX{Q0YH9+YTWXg;&V{Fkj)|Jv$bb)S9281XyrKVy5K z%I(NG2YC13a_1iAmfz1}13e{eI-7WNs9%1$B>vYaGFh^iHa*|5MjCnblk{2#K9q{n z-fdQQ7a{_5p!A_XM8wxj!;NK|+l6{t)~FGjPu*s_3`e@8G_Du!dk&%6BS`}7#~#%J zew3Hip4xZL`ZLAh|LwvZL>OWmR*qEi(ca3+K8g$?#_s@5X4VZWBdFdEjKs5%;TyDa zb4dcF;tE;`uGiam*v;?wNmquJ>ILn0Gq+(a=pAot@cFN-KJ;SSl|1Qla_r(?B8`qUJ%00T8Z-IeAR1zj1LH5MK8SM zQFRXL!s!ZSH`%7WIfo(yO_eywKYml9+7XOab@!cgTy8$$L}?RPyhq zC8rTH{d|k#fJsg|0_uh#y2n8s&cE5;5b?_Q=xc)3LjrA#X4Svr+knpX*l?q75c+=* z{e^+n_d-ZhA`$7zS;~_7-O4!(uZk)cU0A$gawFjS_uXoauTJ@!T@s@kKn8>BZ$r}$ zG_JnbetmHewuoV2jaZo#p67gDmHzi|w1 z+@AL+M`;Ri5U!~6DS$xoM_z?`8G4i2ti%s$;$Y!Y;tcs(#)TgX*doQ7VpSGED>t{r zw?EbN%d4>)|45wxJ7IKiE4-(B$a<@p?8PWT>w1&L&sYl5y|TXHU^_~Zg z_`SU@{%&KyU3r%WalQ=R&R0J9q%aV_J9_30X1i48vw_b#|N zpJ{Z(D5ut%<8Ca?;+yuMNZqgO`Z-3u_!`D|0v&xvmdAw=_q`2qzMl8;$+@mtrJ;f|kv*)kMF?340`VD9t=7nsJ1#{4%4S;) zI&-XQl-m=A0EP^-sTjA(UAnxP=v}%INVitbx8fqQNYn!p9so0lDfA0`{;F;S?KUS5 zJQkhyI-wOIL^X=X#u{=_{x9)wI4A`uF<`-=z*l>7|{ zb(c#hTd{#nv#p{T8l9#xtNY?%Cf)0_rTxvRAG%azQXoMwd!FvKm=IB|Dq|-7QSWD@ zpRN1EGaoAb8!Y#XBx3JZP)m#D5`D(t(c@%-7btXhsp$O_D0J`${hL!86;Ru&L0}OA z8YuiS_G0*n=s|G8?Hi7B5Ql6M3f(q<2TpelCVPhf?;%)HD@uPGDU6NjFC#k1FRQAI zc&C}DvJMBoy+YCY&&Wy?*4Hm-+FHi!sriD4fhWuX{lpsU*A}bH?A3~3bPI^#Tg3V!Y_gA=2mUXvwA;$# zsDd`i&ZD5k1^WR^>X+|RG43lI_sp7dMfLAIb3kM9`iA!H!wR*t{AXG`FTY$_lT6&f zO$D;Bh6!Ey)~i*7v>f{bEFP^+CD?-$7I~jY?NNCKXWA?>Zb|QEBB{JXaF=)bF5z@}rE4jBBKt&j2zrA7zodE#>T#15jCLBmqz`s` zSxfg1r=!LIiI4kKnIKlP%`_)p%Ym_(C)ZXssFw!9nQvIBQI-D2ETOTAj4<6wThZr|UJp%0IiKByY&LBE6IC$W5pHR_a*I38Q9keQu zp$mQQilA|;h<%q~=CNFi#>1XJ{A8E#!DS9{emR5(X{RN&6IdTqxqRhCIr!KD8>I4F z9V~F4PsUB*zNo1n9B$8T;OyST&e3-Jv&8th2r2mgn=_}icg#&JGgy*FMC(km4lobXNjl7nhjJyH-d86yuJ?k_$rVBq_5jk0L?KDCBK>~xj6e{k*$m$~wMmcFl8vs3_`<7C7^#n)!_bFD2*S=!Y}_vp&sPVr40aS z>Jd(P!h>-1G38fehZ|(yIRT6hVu*TyaF(~=Jd$m<{~gz=IpSVaX>Yd5Bdgwj42>t> z6h_vHA6ddbchr^IL-aukxpu9LyzXT`pEAaSC*LWGJLbVl?oNx5+GMq)IU8o}$7teI z>Yx>w&0`RJQ58}T0t(mIq8fe-NSfJY6_d{5FR?EkJ)m9v8lY}TdbvvWw~O%hf@uMsNiO(L7?s%yQ}sK>E$?k%#{G z_b540ge%kQ1w+KIF-A}O5{m1+sl!evllV-3$_F!DA`uRSO6sG`#D~+q>&-Z8>8j@c{GG78%4!yE`Uw?;v}EQZ&>hJu}H_ zL}E#PU75MwlGoO2B1chVZ<2n0hQ6>{XErT`MVlk13ll11g3*su@jW1J;IIadL3b97 z>gN!hq&&jaO4#i8Nl#8au`e?}3JrF-!1O7PY{SItl#A)eQ}Bv?YyMct1!2l>IuC$FQ~4TfjGB>g)O_Ts){UxVnnU#-UU zOIw8jH|P_}kyGIHL$SmUM+?}(gpownwAREfYYLBFc+K_V^nvx^nZFkQFDpkHWJMUiLVHr+ zc5@WM>CaPpz>@HCtu0ZGsDF0S4<#iVo|2Ld`t#xsy1d`poD6ER`8U>E-BAOGjQXGY z%-9Sk#!S}CdGv=*_YleG@l!|N0cAI4n$tsse)P9kN7FCcgYNbp?XQ6q%tA;t_>k)5 zb~_xa3-Ji6P<7ut@folpB6G>NYu9PKO6fED=AX<~-%hG<_9`aS z$g_7b?z#1Z{7c}v7gT8J1U(bE{(0Yqa>~~2xV9L$q zmbf)%eB&*b!jtOV7iI**tI}6ABbzUDlg0BR<7sj=JFXY8&CTCjUR$;{Y@}{>-RMbk?Bp)ytk*u@02v}k`Jia) z>vO8r=bHLr6@I($s!)Ha=c7=it&ebnXt5pro3KNsdZV^;0ERk<;ME^^uX>0&p-e0Oo4*fFefKdAF&~w*ISjRS z$JMCuJTiEOwT?C`jpC0R5G(10)-zV?WH8^_Z>dt#*FeugTD(+EGW1Uisq6K`M(+E}&P|U3xEH!y^_L+I+N7v6m;7Ka$hn+uo){pL8ld^qz z@wkJp1XP2iFb_L@*40PdZ6ck!7rS%}g%cvC)Sjo=@RkdZc%MJF^!?TwqJ;R0V7#YL z8oHxd^qRH< z_!sT-f|Fams@_;0qPbs{@+H(J_o)3k1u27Oa?d~onVbTQ4dK*v3OudpTqeL+0wn{QbK<87ikT;4zeZ=q~p3-sxA_WB+7BolSTRD%o?Y9yuMR z--CU)KdU00j)mr5;|m4J)KX{C*a!9AYL9N|l@3F`ILbw$(D*+-`H*RJQ11p@nE2KV z4Y}K|c#_RNV6x1VVrur|82}Nf{@>q4Xmx_)9ap8w+;)EidyuO{vt`(%V@)^Z*Y9E8b&k6Z|P1+5{<~i_M8gDk?^Fb*D+`EFK8Ac8* ziNjFBPJ2Tt(q}J@@V1tZReqqLG6&@Q9rQ+RyJLwsae_tU69R=|d0x-ALpyYZm{VMWPsSVP5_fJE>Z#tFQdUMo zVoAiHR2kiFu}r+o?MFgcYEaZ@CNsMmv;U{p;clj*V$H$5n_KfGV8m(tK^)62FD0{Lv2QG*+7IK)zfX!jgvL9p%WRV^6!yktsug?v{liheqx)yWl|IrdW8bexm{O}) z>#WB}|CY|JZ*qICuQC(XRTP{6urWCEV8B2-Fv|%{$|5WfBe|jYhaEEgvNePxqjeGU zl|_7J+~CW_?3mhCGxEK9uLDt^QbKB{o5DGyM)F(J;FHW3HDqy0Ajq!|PHGrP^beh+ z7sbZ$n|?iO%4%pq9>YoKeJrIK4?L$h$%!o4YfR!a-MphfZn=!53xg->B2<_X6}%LA3UHmKgeZH5D3F(ODuVEz+_2>lk>%H*TFN!XK|< zurD4UvYct1iQzpy-Z@-1W{wk9Te<$p(^7h~wj4(U=Hu+vgI z!z8>Ws0)OcqXiDEIQ6?nrhSK~C$*+ZiHVlM8iGNvE zzMqvDWRiJ;!f;O)Wq}`#6?Tso@`kv;kKs{S)%v@82i<;U7a_YF2=j8Lj&I0X2vKe| z4}O?<&xF+vg*88@%F%hEwi)1A|1&RkB^cBH$08(Os%kPP1EW-(va7)D!MLZ(@(GO& z;&|Kna;1=Cd%ck{r$NH=1LY)I=1(GDtAYFi)byUHSS$Y*zyA|D1pHI^Z-EKLE>r(ZLWsbyY+-5EagxC2RV(OttkHg%y;8w`hbJ;cqeO*(;vFx!CgxPJXK$Z zF+W73PUDzh6IYc^+r-%A(;g$Kj`rljkby zz4sl|ZVa8Pqlc-Ve%5bX8#TSwPETJj4#vQ#J0fWK?A>FmL_Hwylr60L2+~QtTK+n8 zg@a~{Y__pgjaJ^4P^vyY{+8*u_!XMeBU{A-+pp5pIUfb3n8N9Q8blhMYf)V4zi7TgW zmq4GcR=3@&*9jOWNY(x5I(YB%dG=q>Gh)u_o|gm%vm46Ri5J2D0|$&D=L=_}T3~L% zqe4t5!Ob6~l$WEv{~)MR7ycFL9%C-XZ(GH$7$U`fpCL5@*Gp}fui*?9m743nMEi0k`yiAL4a zS9RnjXz`r$e?Mp}zB(bi*5*-fNjgIE4>Cv_M#xZX|MLd9%6h+3lKs>UJfxn0GyaV0geN%A-xwrpi0n7X$^Db`u+YTn0lJYlASKcNm$&}46e&0+Yu1Km03h0|R(X~G2EfnN zpwfRMWjciuoS3IC!V`M{_~Oh^(h+< zqDw0wSsKcOnfjGc1rZ!zSbAK6cqAGs1#PyM8f4vRG$}H+xt@LWoJ}i!cb{5#6adMB zd^uh8fogLUQ`w42VJ0&clBQRcNHMF4Dapq=h3CX~D;E+@c1dZrf$;Ys*Ge1Pi|mXZ zo1}NOi5Zeg1vYCl)Hxp%h2?UE>T9ci_q0}3!_^#v)Qtku<`v&q8L?X``nQl6N|j7d z0OZ>D#s?5o?zq)@`%L=c1L#WW!1H&iy;@N4YG<T$$TmL9jA^ zL8Y|NJZbt8NU*lHfpT}Leg}E0m>c;Pc(14f?~~L-s!ry`DsM{uGtbp>6-+gWaDM|3 z49{s%Ok{8D7w+HnT0c*I3pRI7&oa@o@~8rLuf39vKjrHF{h#LJ@%8o0dHkCKdvzX8 z975BPC2>4LnJahOa*>%7qEm zLB6$NxQ{&l<^0-IEiGm0%@u(GBInw2HfZ;^^<(7u!A?9yO0|sQXz;?rV|&IWQNOJH z0hCEZf;v=K=U993i!YhW4w>Tj0@F>%?$e#J7(%I^c@I9qZSIy}0 z%95)JT(7Z~=w?=R$;388NCueWMa|nb+Q%a7&_v(;X4&bP=#XPb;(oC=DCuYV$ z4;XiMDM>0MqMq}1!T+FPuv;sSzJn~IevkbFg+e1v! z@!2V&z(74@xeQwY=kk+QCg3euO37%SsW3)(-8m^CW;I+L2#T7Iheyrqj8s%vRmcV? z7lT3c8;XP?;D?#&40T$Ig>_DP!rXw=V}=!=edJwafVWf?V(T61fFuZvpsI}4cIWmP z*dRNL6JeHGJv~MWMDl*=>uusr!=0KDOWP%7ZW)YaGRMfef)dD-dTKT_wXT_awb@(> zl_z6_o5G4RrCpqz7a_!YGoi&v!PfGp_++PKg%$iuY8F67xr>NFtGhxoPH7io`_(o_ z;OrK)XBXte4G<;G@#%H>+6=Q=NkK}+%v=F>0Iu0Zsb7+2wT4#$G)dLuEaw@;pFn;y5FUkyL))3ExM` z9@bgpLvy|HH<_`RSpR<6--k+bOwJb{A6&$Ll7r`#7hleQW($Cgwx__VKF+<(Crshg zv_#61X06S_vCY@_PMH?e^pJAbWiGFb=w019OX<01+8#_3DU9f^eOlfP#3Yb&RKQJ< z*SR+lTWGYW03BcQ&WIV6_20eim;Q?Za`=VN4P6@6#u8=a6eVH~ZwnRCa{R?{_PRe` zNQpv7=F8>E`|BJbg8fF4;*lhA3oZ|F+w(z(21<{NPyQ$rOCDdN0cyFf)S}hzIto*d zlgNBD4>c_HLq220(NH<&NX!A>H)a>Tv}FMci~(hk?=E@-C=GB72WWJVouyuq0VYBI zoVg|DG;KuOj<+cijw_=2~WIY-zvJIQ3<=0``Q?phIp>3JO(cDo-~XErXA z`7natnOv(GAeT1(>Ls3KRw3k8(a&%c z;P!%wr}v)*J^sn4;dG|$7b!@8giqe2C8W|EB_4g&M5+p{W`Tiz2DRyUJe<~ z%hn(`4Ld$x%PReFIGBA`FW5)1bO86on@;SEIADT!5pNZ(2Q@~vkCK*jl5=tFa}>*y zaul#fPet{~TCY_OxL-}VjAnD9r`oB6)b0GLMxi(ha6b}u zDYg+b3LpX%-ODbAZf*J(n9=TbO@Cz%mVNu{^7=~t_iRLKH$$;

Jv}9`O>BNHwIEh{+D~UeeqC+0c}n1^%QfHdj;Sr2U9UK@F2( zge@K!R+6Exvcw55zyNw&nSbq<`4QU+kE$q0`W!7rp6FOY001R(n8+tGT5FLUXnn

#u)G#rcKMmg|UgRI9F`9OrtH@}pQt^y?4X%G_epCn#3GemEir|72G!ZL{3oW3EmxRkfX1}LIYi^Tlr?5P)`u-D0_X`? zy!fn`7wRqE3xEsdQZ$?8RCnJa3*XBWGi>IMqTNzQ?|*)zq4nf5_k*2J#MLmCXu`~& z0Jp!ZQ;N;s&IWoSk3o#Wq&G?2-H-VfgD3^1mjQvj{gFQ2;!9T8oSU-!$lvg(a)Q3Z z=z`FZqz&{LA9LrSpcRCC=RgrHykO_%M=I%wFGP}!?&inDABTHUEXjEN|ru0%k8qf zp;G<8YWX9J&pohqoU0*Jg>T1+)_q9PTl-$-$(466IIwQ--~|sOk)u>j41D3{g!0%+ym9rGcZP{_2a9L z0vir2kk6<&jIoHNjQqXz7y4ea5sg3%ZkA68^-i5Ba-~FD0!;yEwVFbKg$*G?7AG=K zfScQdS{1Tw4hg+WJ;PMf=k1g>#uZ*0FZ`b|Z9ovzMzE@tM_qx4CXn+A<&i2(w{2{} zRP!}oPdPk-BeI5gQvHvXM`8rDRy5x9`OXTm@>tq{o!G?!KsPhR{qBL;pNoyNlh5MW zgTP+|a?z{-%Dvkbl|JiEWiOtXUGja1cojxSiyBfHK2yCx0%1b z1vRmnK7s%2N_VT&8k+3&Xen>Jw!C}0GB?x#{xLn=>`O+%RO3o;N)#G zgz@tmaOQ0QHuj*efgJRoGHNo^O*W->%HIWPo>qwh_xub`50lXo)JQ|=rqG*Ze!)09 zCc7LT9PQYi=J~hJFYeYpIGs2(*l*bjch~_^4(}!ncQ{y73u#Ht%T;;$x4M^U_-=nA zC!sQ!%oK6I%|!Uar0hMFny0WLz0IJ~f7b#yP%f*r?)057{b;`VSeaHn_Gh^pDmghr z(=X%jaAv1bi@Do9AcYAXNKuji^68}OXcpy6O8 zBU)}l#hGL}!;?W-M@ngQ*TDi3aLHQL{AFl~ls^JXI& z^Sru@phmb=ma8l4^L-ENCZLOro}2N7kTUgsT#e>4qS2iJdD6qSBC(wvR;~@806`?j`6=J#3?r=$qz06=B`o=~%5F?mw3&bb7wHa7T-W7qr_LDWc zz_-vXv!yi3M*C-kXmhaLrr@chc?I$<$cX@-IBro_n>8fZv} z1SIq;+gvnBadu4ovJ>#aM=my*jV{{Tl|GGYXlSy~yEUpaLDhXWwbJ%^cGk_Wq33TN&v4Ze(@itxSpAlibnRI8X93gA>lJ26%a zK`^bpIn5M_Xri$+>Il6&_|qs-!%CmQ!h=|Rt6C!5bpa=J6i%M%ROTyg@HsCk%;KLZ zrvm*-p1>~uVGlfQNgeIahi8>27b+%$29=Ip+ddc2ynCmE&yL!??TOV*HnTe-Tddz5 zgi312e_8fj1G_mARWIL-v`v|P6-{LPPM0A?2=r4pg<9Hmx%;JP)<;a1YL)39KMywX zK7lE8T9bY>2B0eM&8lbmKWe}@a|QE7j|N2S>P@*X%YZ))w_3tB4FjH@nfQ&J>*R?j zWJHscd}wvcQc4!oWMNrtVYXm4n(>w!%n>UC{{toMt+qxoS0wOFSvUTQ)+zO%<2G+Z ztGzU{LzM{E`3AHYv!I5x289G8%kL3I<5lnY#Zm}*+2*8r?e?80=+aM!KdO=;MT zi#!v7h+rD}loC7W! zQm5bIds2e#r(O_;87gs_kUG!AzAeT=gJcf9$1&aMrJ-f^L?!dV7(18=y6e?mMfmLd z)!iH(;zQuJW6x_B4R3cMYu$%lXu2b``ji@M*{@WjPg`DjxBSEOwZ+5>Wt{L$i{(%} z=;hPQ8<)}gR2GY9y5Zt_tIe!vcl#lJJ_L#1kJrUt6THU3lE;j$ps8(uUG0`T>E5oI zH&dTWzg722s;4oj;QEm){b#BB54KW==N&Of`HfUo;6MkalH147t(LW;Zzx9^Q%ghq zm^WfirLg>YNq2ma-s_J#amXafyL*8ZBIH7-KYnF&I?^RZvMzERl}wV>7$dMB*&^g2 zOtMHy?F8#X!TcTe$~SSQ;ZvtXI(0nTF_txZYEL)FJH| zsSlh}<5ASt#@Wlc<}-c-1l+qtY*aRgvdUo~X|5_ZDPe&6#~O=9E|)(wdO*Lx(_%Id z*;TeQg?1U>I%8a9uEjB-Nj!E%+sB1Pa5fzE_rdS|f(ZL%O4|KYfoh~b8S)m;INhD(@YbB z?%LL~l{=s2!}LiH$=La8+jOpT5(OMEGM_0v4JJ(L^R97tp$1w}6QdMTcV^U{5$<&o z#jhlpalMx}w)p6WCc>iq#1Fo!TK_@fuo8D@7TxzpKP(}M9W0C|irmgi$bN4`JoZ&^ z%>M%Bl%z3Ix|;llKwpbfiY46ds`a0^hzakPRy$%>2{l7YC_JsG33=lXcy-|7|A7_Y z`NC3joDi>_=)6DvG`b^iID+XK|7eS2wTS}u)^oLOL4)0)?uq|GHgn4D4GXz^Sj}O+ zajU8O^Y(>>iNSL~hvM5X=!{iOa@U-hrlL}O6eM8CsD)EmQFp`YugN$bs#fo$AHRJV z7(4OQYC_QLLT5Q@Z3;wF#fRrYc^#$pUTYhC4w^L|7sG4vM`MIp(=YVQrwMW?^r4?a znV?BO@$24?a6l=S?gRYp5m`d_*&_GQuKTTGHN<(E-`?0*h7hmS8a_!(v^}4&de(J^Ky_`7_EGGrW^Z z70354Ec#g53e?{E?p@zoZ-{_5YI?af4!;D7?{KclVASoRp7P*EZp2UMDV-uxp5U+? z$ih!U`Sk7VtZX5-nHh`UOrHYn9+sQEs4V*AiJL}@RJ_uZ^{vyc$CJx?X_T2_RkYuN^9%-`NAvPq3n3KYR`L(`@m{M}F@bl*2J_#I zsdeO%%Gj@(mlLSYrYt1bLSL|lI!Br4l{$|p{NluSe9qAJH9ted;F!;8dUgL^9u^02 zICamtz)Y2vEspRyy&!pYuy0a32Yo15HG;-)T!epg2RGM%fag>3r-Qh}3xrKHaI=VB zpZ93yEB^cPPrsUc>S;h-0-cI>(=O$ZLY5*qUM5%C8`(6>!>cG71D-oO-FHMY{Vmc@ z@7ZM!hYG?LMSe`H{~g&L3j2Me$L;ySIua4n0@f$tL>X6U_$kRou5GkHDNu@2 zC|;~MDGsF&ye*XC?pE9#0)ZBa7AtPSrMNqxxCbfjPzV&aph*sWpXWW_xvujUd_QLI znarAXt$WR$nYGr9cFjp?avU;u9;hu`4X!X~SK%E31TMdIj?vm1yW14_|8IFb_a z!X%-I&h@RhVW!TmD4#^Z8lFpf{-r%VfkkoNAno+-2c7giCJd1>lN7TT_}81UX>?c+ zzSWZqe~5wemxJ>TJ{WMDHT$)s1%0P>3>B-N*zDPCA3lM)tp#3Kqw|$8Aa(yxziv^M z{qf{n?t5d1NO@E|mYvoQFWn1IlOFfT!{f&ayYTWFmi39a+)yzY(L(S9)=b|jHuBbn zgv8~g(#z#mVYMAdr9T!3f!-G%Hq<}DPh(}fq;TD@)0HvSPp=Mm<5Cf0Hyc*r|=hHsBxdxZy;X6 zSut7Jl^{TB=T-ioM<-{4sC7psBJ$sLWR>ao8c_0_5IQYv68PnO`+W?r%#!=~J_V5K znhy#1XpR~`Ag0fdR2nVVPwIOqo)LD!Ekn7`OhuP1Li0vUl21n6=S~Doe-dOd4qqui zpWcg9!6Rs`^D&&+XodyJDNvI(jV>0#!_-FmiXTYE$?(zqf@4; z7AadNmKtVzH!~;7@%})Lm7PwLd6S=Ix*@eoDyJciU@r?*<}$|jvaKKw+~OTbj;5Px zj`GW8%#R*oviR;HE@%woHpyD!7ltk2DJ({V0CU`wXr#||@{6X2!hZnlZl10XF)?c! zvlwDtD&*u8W(47v*Iaav8-F0-?fCxfwZSTWU)koGkum(%*+<|>^d;hWdB4{dnUP$H zpiXHv*u)W*iB^j7}wY771g|oVF(O)dyV|PxibKzTo-3+?K?mhlm4g~dM?zyrsG81J_E&bjWB5~Pv7-{{++J;cqU~U zCDn}mLc{HV4Jo%n321O@v`hQ81}MCbrE5~>#GJ7>TXiwcp0u1FB>*`hF7U$Txn#1-Ksv{BUc!a&UR=u#s!e^ z(7Jb5TL%=(&p_3rnWwN?l3rO5zD znDBgoo|&dQ`mF(HW{l0W5@@AnwJgTv=dDpNF|iF9Zqp+&^Tg;}Y})yke;oDBX%;_+ zHc`4Ou>Fo27zv^OnupI7odzSWYjX#OJuhv>o}&8@^tq5BPFP=H4tU(pMuU=b-n9N| zc`wA}2@Rtg%kBQ*Pvm}X(oFPf7&PP=c!gkVH==2JgT#5=2mxrzAJiZR|4wzwHd`(- zAr|08Ao%`=C87JBq- z4|>?Rcjqy0No<}*nFRl`*3k66fX8sI(*voRWx~$!LSJTHRS*sEtXQm^Dq)Rc2O>m0mO8=ixrD#_*JHJ3|w9F|P`~=&hA9q|IXt zKpX^(f4s}^si56TmSt@xAGES+COos=oJ)?Q>nEVBLU`wL?mt36?}Rr~;~%KCz9@=- zq=)$)ifIo_#4-d(EhY0@db62^;fa5h4Q4DAKQlOR5rCj#{cid6`TUxD>3HWZgyM6Z zH4KE=xN`=N1j#rzU`w#mw_=sn5&rgSGvg3xy1B)avZQxi7BrGEM#DYIhNewQV(d|} zUv@RF4`>f+eF@n}ip2n5ouO-)s*_cVj`g0ahS%jS=TyhH8Eb89$mIrRI)(6{K?HmP z6Hu{6U0Gb^Wm;iZPg3JrR(~kRy8jzr&y5oaOMNjW`eT(AM*YISWSR zk&>YB|!Y zpksEgKUb|Sf`6fOr(O(sZY2!pkdMyi7(6tRkC6v>Kd)Ec4J|UT(i%V-d7!27UiV4#A=$o+Bc%aW43ce$Hh+M@299N+q3k^08to7Trl03a&dbM#f~3 z>{?SDja$fBc1^qcXk)r1aS`^Vi0FmvSJGC~KxJ80r_PwjH6I>HqSc@m&Ip}Isn1h`nBeN4D;=IKC`%< z6Q^&pbPGH5s89?K!i<~C$E-%vso+eOVisEvZxWb%Nr;_jD3UOj8N2f=*{CEQ<*c!feZXS$Nhn?7Ql@4LgM!EOFs-r8Ubv{GJv?#j78+hQ7CMcTZa-BWxLlSCTo7Q z?5IH|*;6ULRd=Vx`@FMA?32b_pe&gw9HX~s8!ms-_NyrgSasA4`Z&4E4uwhIW~g4RAmDW1&aW|gdK^@Vo( zp1NO2lddI0(0>*(OD7;Zv!DMUk;;E?M2N&0P2}FL zY-_fK;^j|NJyb)p-Sy!_yLh$&8T;M8)B=+OlN}GOXQ@ogB<*u|>fGk08a<(t^l7lC zq+xtw05X{MkP*F5)x4Bf;%wkCc0@6)uETJP9b?%gWbYDZ^+vqb5zy`sVE_NfYc^ z#CZ?->LSiRmHe9y3UJo^vz;mp#yHli0d)`eoj<=b6#WbbmRIRiR(=S(Vkr2$=8=oZ z8Wr?sZo1Snh()V8e7Q(eY@r?OL!VOfX3b1?1MxW1B)t;~e%OIj^boQwys!6K*EM%rs z*>?1XTFfn^G`#Bi#ZENy_1|mnaw-BE;V-RThvvH_3qipq(_e{xD@dN5?Z&^OF9h5L zeE3c+`3~Z7L_4$&Px2)62s+s{R6JCX5u5mjPM7}6kz4a!U$s5A9IWwXlns#Gh|ojo zJ-D~qanN(|`{Wytovy<(VFs&fOsC*4^?KGjx2ndxil=7sUg39)Lr`GT}O z)2ci~UbB;*xLGJTP4FOn1%-iHoD-eOG^M638Lrinn!_PIb@cQictqKsmENW-1<8jn zUfKl%2_%FstwE8-^EV)=@8Y&w2YQ-kl8-^hzj^eL9M@o2tI%&sJ|~s+ABJ@vEU~8Q z376+}TxdS0S0$PrBQ$uLPw@+W32qjhPR}xMFQ=!*^{ecRch7E`CTw`xt9gN1mmf7Q zajn0+CE*jcX6pZ3{U}le+ux&*V{GjUL4WdZT%KTy52rW79|YoMq_UhJPGug%o@$iG z->p0{Q9ZT0lKNir>0-Una#xCa?pn`VG;OxZ%mDgQ=(q0MN&F@+L+oWtY(0{ZsL*!N z-ds}AcDcuCv%joqUopLl&(map6x8se|l)M z(a$Ve_yJ zm-^+qDD9Cj_j}{Zll)!(h>J0@*r}hcy1SP+s`}Syvi>f~0}Koyr&G6%Oe%KfKbd`v zo45NxpdG_etWZq);5=t>VkKYrnUx0~A=Kig-KO1Po*1toj&aE4ad7v0r1nF!vhI|) zHg{onlcvmdy!-Bm`0#Y+8XqJ{=v;3H6T4)!nd7Xq(z){ahPtFj#ab47Y+poiO?B4> zUb#E!JrKXf(-l)TDEAf2;;D-djuU`T`k_>Ok7R-3kL31z+#lUZ1JDd&o%Eg`Honv` zgh?j@w`1Lhgm|(EI2#WMINMWS?X_h~q8RqP+R0lm0rJ;VZ=e_O%3(gKv6~L@sxU%9_<^SBdN9<0OC*k0HSSW$KoWF&>j}KAEJgB|vIQ@wq^7~wNN0}o ze;NTP>&fCCn`&Y|#eP~_nW$hp&nMcY`UCDd+G&FI4;2=_*$Kp-;JmOqdmAHG$xvC1 zgvhd1uN9qi^IUOw+JfzLYp!#0?dW}t|KObN;He!vpk^*qR$ckk6TPADi6!s(y51l0 zgtN7I-n?n8zxO27-tTFHZs1c0_ z$&UHy+aF;5K);+-OlEdp2rBI-L@f)k20SCve)Bt;)7D zwS&$$)Hel_%30UKtMR+msW0tzQ0(+!H6 zaQsAW>M5#HlO^kS#oK9O*9L#D76)9~x1`Je{{oJO{&oV7Q?vQC<6?cp6M`4-uYfPwO)ZZ)?mm`0=5>)>s1-ba zH%i0@$Zt`BbZSCT`BQjGNu0`cV8U5-j~c6y6paWaKw@#U^+;9CF8PVj2+afsr?eiTL+57{8D?1N7Z+uJZ? zY5HS9KFdxJR>)|5wU^G*DqO5_XB)QZl7r=e(e+^?ZEVWHF7liElYtrg)wjO@xuOpr zz8YOCKe`3J7B2iIe+tihxgjFhw;_eIi@9nJGG4BfA|wjK#mfe!IAs9BuJ+e4m|(Bj zHU&_y?YddijFWdm?U$W^Nm(0{6(g}e1jBso%w1kLf$`Ekt=mf+6eBs7WQ`-6FG_G} zUGxy-tM(hv?zlEPty}8gnS2GAFSP?2RkK<{Xu>!ykJ6S_)jf0 z#5tRS$y_rTf32^(|5eDc+|4KixrpdK@0f(WJ*t2RQ!!G+Y@X-;SVgUYuUZqincj$u z5lRLRR0_|htz^vpmCzBvN^MnfLKP+6bQ2}53zYu3xI>S|0?D&1vi%i@YH}{qlnJuk zACA{BJ%^5vziTS26sUKBwax%ZJ=k``NXqmQHbl=v|MOPe&&@jD07=S=r-YD4cP-y* z@aLCmZ8{TsJ|M0`z>&N5Ca=8{(G{Z@S1Zp6le!-3c;e`K0hOOt;fMKli}CPQubBnx zNMEgCnOI&?T%7la%JDb1KOyaKnsX@$aaiq#Sc`p`g(B`wH&e1jlFwooB>!a4xX>;) zj_reN&Ejj^vAdnuNr6huScr2RT&-Z03Dpqbm`E<^cnXSM6c9JCee=VY9?&*du~FWd z&jJTd-q?RxT6 zC{e0xsOTH{IEU9D|9_1jsK)KOW_BZwN9f}f6sIkf>PmA%-OU1JHE18cB6%N)a=fk6OIKE&bQxdg?wcSF!=lT< zfAr}o%pv)+hK69KfT$AR|4!?{;xqnPJOTXKxcW0?SJ4TK)<^3FC!@)`TIGYeq{Ht0 zMjui{FHfDbDGj7PPfcO!r?O?b&yebUEBXf-qxJer7%l&*f8{RS(fO3)wO|v8q(!!n zgZX-XU82EU^Y*6PuWpv@c5r16Ym1xoC;&j-#wC*sk)O&P=Q^>qIv)5XtSKWU5ZBOCWahvkayRe7 z8Hn@dWuC6XTMCszCjk~snFoDOZ<0#TaGG5@geb0$@X?r_{(%%S2vz;=68E}J{Y;Dq z5cl#|uIg0r2{dW>*au4PHZ^Qm+f&Nmpi zTEnPa^0<#{@#3E|wAnCBUHh-8_y~l7xVAi&-e`yr)=lENhiAdeZ7o0a7zFT2-u~J6 zthkzHkFH*IS@9%vwJfC91tE5#(>Cem-5<%5u`bJ|46G)eqKzo&-CGlX@}G3DMB~A% zrAtD9Q)}hFvGTkRmsu5cP0>N#k`JYve0v6=Ruay0MUJacxN2Pic6MNaT-`%w?Y78wN-KIqS4!Dzw9%dSOfnwP*Kz1q4WW*9 z1>f91{WzgX|BtKZAPZL2GwA;hmVeLvA1ECAcHCX;xHEQ)rV03l5iz)KJ4AHgCIwMq zw10*t@fc`ZQl)QCY?6_ncA8UIm9#g2@J#pitpOFACv1@5sX`@uldXP$!YTV+F2944 z+bwe{(%}IcYa#zU+-22t?2Hqo{yt=ap-~+!NV^oc%2Cv!DU8XmlVF(I!+Am_D`$EC z7A+cDg&Q(*)}&u!o{U{cSC%v0j*`-FVCO&rJ?)MI{IpJAqFA5k%#B3(*0p*(3w^(M zyDxLYoI$Cn6OHWe`Gj*yHsEj=U@vyl3b&Cj}z zRxfwVtb3wMYrAGTO?jQs$z!uD&f=SoV*@s*xK%CGDDkPe2EKbC&dhzqcdxJW-mU^9 zBrg$8f;HX)y!UNIl_B7m9oH>ajK|F$OLt=l9`N`k?+Cjei z$v3OK*Iy3T@Y@sdD9;Fw%^(%=;2!!wso@%!S5+7+2O^!kR3Sn^s;YDB=>f=|lMYYKN^d2mOA*513bn5TJHb+vP;{LSa)Jb+F_M+YWK z^ev|wDTzi5^6iOndr(&%c_tTX2m{a5k)!k@q$9#d_4?W^*~v^Jslet6J?A$mL#mZD|Hw-6Yz0Ix~`} zA=6$119Alv>D)}cplR<)zj#jZd?_#{YsJ_YVR@AtOTV|n5ECJR>`68*-o!58@y$*W zUHW8`w@cLj`87mud2(W2@b-$?sd8ej^7M^~SIs=s`~o)(jWDI}DIOeu`9P^T<_j9L zS$ZmXKZ%1hM2-A>jt#5F4)AN?Uw%^8=1D<)1wJa<;lB<0UBOzPM%N7hJ8WfY-wD*+ zhE)Se!wh!5iWt{0$=-gAe1rRFgzsj0s!F~$C?H;v)$8s~f9n>;gw`ohRpVLyW-y5$ zn=|EGT*>QZJv?UuEI)+r#-E;BTcGbM_vcpk=YT;sGr!MS0wEbmmD}Hsg(v1W*TlT+ zW$#HwqwLQ6mCviiRoF!Z2kd>}8ZtBpX`&t^pqc^4m{9@#Dq?!bHSmTAm3!=&a*rU$ zXwMHjSpNKRGgPDznA@GdYS&BC@XC#L7C;R)1!>ejrKl5D@&`b~Oen~VrDz8vGRBU1 zo6tp8_8px7Fng3zx%;ncs?IP8=h=zl!qF(!pMF352+oOxWo*PnoTQ_NeyyJCe%uOO zr)ZMgaYcUJzkoB)zF@_7e)Yu8s%q)Sv5zQ^@xfXR39M-{Yeu{|Ea0Raa; zL9_Cz@V&U-pvkk7tRBg&c_`lYMx7Y6bq%;xr@iTJ9NmN;l)rREa;Lv?&Yf)$6Xko~ z-h2_ZK=0)l73<44-^cAq_!{9IaNx)RT(9_41pL|w9^j4L+S}Cda~`~dw2(e|{k)IY z=D1@x*57$W7q*Ym#C+fA$vm$mJQ`n*j^T81>b(X?fpJ7=f|*$EayhBYIfo4 zGXSYiOMG3+p4-gAxZQ`yx8U$d&189RgnRkO-#e?{mXJ(#*YF!wV!&^tpvM=^CxxBH zWx$uWkE2}laC8+lsdsSi0q1&YI7WBp=S+7lpeyNdl$7qv?ZXTP3ZQaqO*Q=q4spuC z-PyZU1Cg2Y)r$L6Z>iV;DemUD(}%KfKp^D9vd%U5D3u|zHekuiH-GUGk#49bdn+>F zkd!(Z!>o4UYy#E0B_Q-A?__+DA?_0+i7sbJu@=;*x($|#S|ApW(m8OXV9yGxYlNN2}#li%BE!#{-fGgx4(Covn>9m z@BW;EI8&I#yMD$h_RvPq$KoOYFoS!8egJ08x$X`m_}D{uvnMp@;kLCLEAK*IE|qx~ ztPsDwN&evb85*G5?%4IIxT37L0O-A9xqac;-`T#kUboP~gOxg!*$y9p2EFF*sy}NR zEz|g|3O>#{EKIN=QRWfaIA;tEw#=;pR>KDm#qXw5$Y00Mh>ZK6e;4TxSp!XH(0gh} zs?%yEke~_C`v_}>^Ztp6?d6RiVm<3_T5uJzw3+Ea)k5m`_`=8~l}RdR;Wm7YYYN^? zn|PZHke$-j|$kc?v<#Jcv>7~GQy^oH*i8%$za&_$}ts%w(K>a5D|&ZjVG#xosD@3s@CN(9DuhOr7W{`}Kw&ev&U+uMW1@{2%LZ$K% z5~`9zyo!Oj^UGTIb%M*2q8({~2*0{gX4&5kXi@j`QQLZ28XtC&1%A@oP@NK{ zu}`CR<9+bqwEwRd#4~s3xRb!B)Wy$HK`|D#QiIDAstZnkwzt>PXV1zE>N@QXOnuE{9~{-I!nR>OJBWk;DHRE>-(25N zTt5h7BZqub@7L|k#I=Rp!U$)fTdmPhJmp&aBC;IaVp=ez%gewBCFlCwqDcfzib|RYdLUCnT{N8umktFZ4^2A0LHrwwbDG{1qloL; z;N#S&hR25x64zdu97t9MPg_0u%DFT0dU{z$B0Bd1mn~jH=HLZY(cE&RDxMB-DkyX^ zu2skuNu=>O`7{is%h1Dd@erC=h{9E>!QS8~4~%ggsd1_Re*AgN)DP9;{n5bRY~zo$PYg(?U!}Il9$@tz-o@N(wt%hw^(fyQXIVW z1>W}~k?_=+;pSSX^=}MKD0(nh_QI*~Nxe6mUZ%urSg-j}Hl_DeE;0-R(O;3nu3}fH zig{gwu6B8k4~5`Sm7XrNzFQz}G`~_HbbTCXi>pSIR=S7L`=h} zY?LEu$1)MNZQdh(Z?0{V8j~_~HU5 zevfRA;?{Ay?T%!xpdmL0W@#`NQL2EjoARN!7|j=E!_C5Gjv;&<>H1ps>}C}DW7OgB zzK`qtS!4BDA*7aMu~|7qbE{#m>ED%}Orzx{ZkKj zeNopUlL#pwb9D6YM3O)s>rKi;66AM44D+aK{EcE*-6#H?K~6@F=8#j0p-c~@6}#SZ zlk=yk7Q}(UcM`EsOxCArb!scGQ;rncF9vpZfAuhN3B4K|ko3hr@A{rVFl2kJ}fqY8Ma3w zBqLwb6G@&t+FT}#u4bur7+Y-6yx5EiMX6;pUMNndB)yMK7Y+zrY!KjUU23#f0*IL1 zrq!AFOCC5@D!d@GA$X~!hxIK=)a~CQPrZEOThoG{WZF`0vwsQw1UrpH1;N z;EKf8GQXZ-aEW~NlofzE3JCFH1nPy@@0L)!R|MOY*Sfaz*SVD0or>%=U23N*U%kOb z*1c(9KFDJ_eq5yV_T$RMFpXxBnuUF>5}67u8;D0t#s9SiO^G_(pc|MTalSP)}~Y!LP+b zqO-97%f9?Jmm;?|XfY;q(nGV{PU|Ov!#r0S{o&fjD~0gFy%-Uq(h2gFgMWS?@Th8z zoK=BDq9Bn|@KI@6mD(2oKTZZ3)s^SUFXG{$6Vl;&Exx}DHH5nZjcs^cdcJGgcfHp0 z_`H8ZT>imX=m%D`F7(DWw5m5CRWxWM69@z$u($g#bU&KT3=nsLHffY?Rtk=5s+$ci zTxq8s)FI#0zVhX)$9e}-Fgp~J$y zp^6gQIC0jmoP^(Yserbg(}w@XAqs0}Cla(Q8gt>L_~G1xVFSyTsPVCWMa+RkaZ3^a zeq~M$hXxa=QumdxR2B3?uQ3WgYK;c$S4G+W-ikHd2*_f5t@YkekGsf>@n&%VlAB=y zUP_cRDmLBlm0rvC_+oKIRsQ@CuAxSM@fF8qLZdZ0?eRdZ7h}vph)BjJakbbVI&n-* z&Nn!LTtTwAoK3dx(%ejJ3T|$1RHYB3zCo@YIp2ccJQS`Ec(Fk#$IGCq+xS7?x+QnA zhg3D&L?Jrx$TcE^E2#%7=i&o*B4vdiQIh>Vzt!m0fFOB5qbPr8;1B(`#UIb_eki=_ zD&9mdq{50GqQcO@nQw%4>gHZUBnHm)9FRCoPPr&i_Tz*$JJryg<99aCtSe9iz)yw0 zL|u;FK8Azu2UN>^zC%TqzM7AGzQ9A>T{<3sAh)DDhmVL99-$|1_$vC{c5w6@*F3gQES>&)Et#BOjq1feSRp3k&?) zfZ#wc^1u0T;LcFM_DtSZ<}<2$PCvn`sO(5v!YxyQ1KeT*gMjGJ zrFXv|k)8hA^7?OlUhfEASZVHn+C`heUATG<^wTFF^!avZx(O+1L(<}hb0J1oEd2s< zue|lpVf1R{KUCv>rHLjwj9VM+u~W=g-CMtntqeG@6&MepRMe!(Ckhw%jUGp=0iBkb zkv5mACQ&P>hse3Ap&x8Yew@f-%|!@&WL-K_eqa`$ICJ=M4=r#AL``Fe4i+|X54%PW zv{3^Un}7xec6eOt+U^4})YSK%tkWgzNV6$#uAaew_YSUoteQ|GfnhCCNaHh0!S^M~ zb#lK-^@*RTk3DO^jh(?_tzAcRaK|8@D4=6cU;px)DPm<9 z7CWce67ko2^Sai@xEEjUz^e!kLMWH16lg4Xj`r2#*9hzlHjGHkdW$~KV_|sgae1hF zAUbv$tFjqEbhi2Rmru<*AiAbRvs7K9X1}UHA;4hdT)^LYe3!w77~~a4`8St=y~ae{ z1buVkK5Y>WGuZvRDM+nV)N`S_ zT`l&Z2R@qI(P5O>xyvi8%9GdlJai8>rf~S#CH*dmzRLFR#y&hi3XAN`bS&1uaNU;+ z;Del)u0sZj8xzfUr~H^WfYPWXu)_er_JGag34V-2#+oxh$x-C1VOChyD|U-s|N7H8 zq2KwF{`KxXpGgL|ZxvHehMefZ4&4j;b%XTC>WqY|ANxNq1{I*RQVtSc*nLt-VOxuB^Ql51iYyd+6WJx7CcTOqY<_*D>L-7T{^qvce@ z`Q45t06lSyuH4rNG0OF!_;v5`iTWpkbf?bxI<9i}VR~zb6;Y`Q5+i>&kLLMUv8&fVM+{>>g~i$NXQZpJfyTc znA=kyF!qWHd~@>UK)#)QBb7Sc%=n6@bsUMG_ZyWVUZI5OAs zX*`5&seB{dd7M3WZepBheBVl!6-7xug*Kh4@66xL@RC5v>omsiZtg2iq3HZ=?!K+@ zv2_IX3>_u3RLCCc>^8BM{W%sD!>t2x_!SMbdHAeh{8cUH*x1KP1J{|PM$y}%Bcp+( zw3hlN)c@9xL?y|$sD99LSm%IJrbXCm!q=$jtdtl|N&T-!f!ll}RnDJL)*n8j#K>;H zgEYCZ579b%;G6A~(ttZeA|JDH=Ha|F+>H6T$s!7Qy(DSSSpJ18>_w9R@s_slYF8(k zh*nez(0Pq{*==SGBB0|>v|AlG59!D|gk!;W#BgwulE4Cd6^k!2h3UBZ*Tl6EdijG$1;w(6v`k0+)^JI9mGcX6c)d;7p3 zvU%?cV`(@ghe8_fXj!j~o+Nb`&7s_*z5F}G6t-@VI1UaCQV%KpgChX*Cn#jeNH|iS z8TAzHKQG@JLU_Cw5!y91v?K1f#%>zmC?As@gSfZu7Uw4zUYejg=q*`kXG z5*c;qhkb4FHn9rKjTa5gCc>fJsBnE7&kuV8v3HHnm3G6g%gI6gQPMSsz;8`eODWvl zyyJB9S>Cw^o1ga4E^;=)3%JM!>vF&3q;2#J9|!0s){?V_tXUl3hNmZfM^)9>G*JPG zJrx4p`C91|Wydjx4E&TLk5ahrK*5dbGT_KBS@nAyM8+ir-AlGzp1#sRMiIJ}yMM!)a?D=_lqwhv?A`4%zkeD{cJOw<3CTT= z0r^w-6I65&MH^4UN(=r&Uu?vsGa1a2v2A74_$9Pt&8TToyB)W+&O)cPoB};*L6^XP z_hZ`GLG!AF(@xG1tiA;-y_svsYF4{bzcKu(#!Y>Ci~tuZKDR{G<2Nlj;Mlukmy*;R z!f{S1e2hWjxh+&2mEl+kS>Q~kCz>Q*=fdZ2c_9Ax-tx3@<$<6^Wdj|wLQ52Rfy2B! z{97BYHovM^yGkJs*W8wA8dh)EaDh8?J8-2rnrk!2@-pN0&ReV3RB*GZ2JLaNHHp*q ze$Fgx%UBQh!+2oeLNiI5hqv>pV%mxNNw+z6bw0rUy^-dVvsjBO=1qd>%C#o-z0Uda7u@Z@OeXUxzObIWh#Xj3S7E4TaVi7S({;;@wh3S|IrETMPeN6^9P^hu# zW!P64I)7Ipqn|}A$mEM0s3E%$;T|mRGZz9}6o2YzI0d)Ke|#C`{nn}1Nh2^gp2wSa z*#86LHC2=cMwAK9cianSBWS$OfUo}Qft#!Ju%EOS`DF^wkOYTi4;^=ysV4tSnHdu~ zTb|Rdf@MzRMdRFey@7yzlVs@v2qgV)Er68p$=WXojO-7%iH=dL@SnPd;Y>;>)0nEw zxBuCUgmBz7RV;%wnJk;jr0wk1^pf9fBls-$^&b{LcbctoElEZrIGPfoD=NC(P z+ZU=!I(y&zoi!XwKx{d~VVRZu4`$$McgS$TsHH8Hq{As|`p@XaFHPJJg#^K?pjJeB zneF(pFQdC-|3UNExAJ$PHMYmwDH+!+7C`6=@o4BT5oBkY0wK+({Xhei`)%b|dWl`N zh2?k<8eIO%;&Wvzk#)zM0#aRvC}Ez1Aac9wfrNxo@CKVsMr+);)|ZQk zt{YqrO~j3H=1HG2<_w)@0SqK3sHJ`un0Z}Eed9Uyp)*Z;E+h2=E^g8Y$`K0E{Lpi7 z6#UF!_4G{_KUYZ5E0}ku!;O^vqtb+L3oBSujqO{WK~~zh7|PiXlAHz4K2m2TsB6RF zckmt9e>74~jG6v+?WT4cs{*4yk@~YYk>&hZ%p69t9^+#{-_CQZ5gdK2_ep}INA!c$ z+jLb}+;uXzHfJFCKCYJ$*w4z9({1Tc6B$+Y*ji~`GcMQQ7Ka7JH@1717Eg zbUXTP*%(7SwrNsh-ZA3|5qV_=t?7Q*f5yE9)T1dtdJ{wdpt{9?6q4cd&4`E)STy$N!o#g+SymcEg$#TGv^WZwwl9T(Y>xlvQE5;;7ve7@e z1Bq3-2qSm!@FXp|tQj-(KSAO$|{N$1Bk!WuS}!a zM8S=((6eZL9e-`Fv{xVdWypGB@oEav6P3cSeO^Frjld8wKAONQo{m% z2mQm-9}nVY`=&#T`NWFq%GnldY~m1e*%$J-9Do`*c!;Peuha5(!D^ORezpk_497~9 zHj2Ma^^LlicF9Y8(c&k+ocxS4&qHAM1qd|SE`Mt)8o`M5tR34hs)-3C#2e8&FltZ$ z`SznZaG!o(vn@>W2%Y2q?so9zsnlzW{#~VyO`$z>Fmo){&I$5c@Jut`#hAwBUOe7( zb=d-l!qU<}8C(%q+M%_bK{|)NY5*(VgUQX_wi;@OIN7hnRGw|Mpr)Zfa7rMpzJj~| z`i~C>c5uFS^p*1bh6Ikqd_H~^m$D}b&5EZr&V0;a@&C$BYBByu_Wla6oK=C(yv!h| z>UL;`A4ca~Gs!P?Lv#x@?|bS(O5wz&oQw)AI)JMviPa|`EJR{#*f8QKwK~i)?f5 z238p2pjfA>j5Sx$N#X5W(63|3%AoQ8(b5b4E*FY%AgzGkFl7(Rj)=j4c*fkA0k2jE zE?-E7`JA5v_+I3I2${7n;_TqhLsFWGf-RuW%NIP51XcwyFp0UiH{M5^i-WU~`-6P; zj?o5McfiLZYwE!dx`mu=Pn&p^nRg;3{1JI<**-G35HDn6wl5;B%d?Uxu!S6j1Qh05 zV%&*T03Xt^^h_K!!W7qK)I6VTa?_%h(O>DBe#ZB@pg%j-losiU=3vJaE@-l=S%!w^ znjGh-W@;BGVd*~-%Ok>NjVveTyKq+bUOGOhMugd$M3;_Xe7PyYU*$x{79nD9dDc8|!4>Zt7Bk^s2PQ%jL zL;eK)lm$V#3jqwCAf}s;f8D_o=sIG5f4}`JnXOoR?MF57Wml-k@sUjVtHk%yr zCo)~3VgI|yr#hPHz)a)%(d`St&AFn1jj!=@;0k0zfHZ*crLy%kq!E0P%ID!N)Na}} z)9Pt|5a;;Wo>lbjYAx_#x%)!b1af`5NHU)`GL6Y(o4$vEL=ktx4*J*T@hQ0rk{Rj? zJojhuz_~J%liVS1njeK3yfdCf;gPXT95$`-{_$ROly-)WwJEzINBW_q^({CR^it{L z)eUQsPW+#opCOwT9>Vx*SrL)-Kz`?&--)(rA8PfVeERjr?0FA1)2tw5oX{&q{>1d~ zBuGNvK=9SN*96DkxqDaSIL*^=gqNT~jlt_9w8mfbtP$V0G?mDQNBtl0f|}7=C}8N# zoM$SFxRxLz^;W@SsdjYeEd#?u^WQeUdDk`q$!^97(Zs{NvuzF~)R%ti4{wChiNE75 zALTToWl`RN4*}I=%;(8~{@zTMGA^w32XlSSrO84R#-*GlDDTf2#GLga%{$W!bHEV% zzzh6U6)%SG**)YAjqX7A9ReGm(B&LFB7v~v2<}ayd=@>e(*F@E35qsK`B1ANPnO5` zN%2w8-dZ{O1ogGvrTZi8?zgX9H9u2G@B{js+o}0{+|MT>U&IG55ZUR3 z!P2S6CN|g&Q3&q@g@3~8R}(=n?t1W>m{bYwS+qBdv^04LN{B}->k`hstL;`wt|o+< zhTBX?f$csj;*B{zdphv^kzx?W^X?Qj3)u;fxCHV;MR(Lo=20!-N{A836?C7t72JEr zuuRGTE>g<8y{CkLC#=m6WFlbDPMqH!_EMqYFd+E$hFKDYNv?tH5|`+F3WrYnq9l#b0}!-B*tviGSpS{L{S5$hPXhsOL4m~$ zsB@EB`dy*7p^ifFHM3$Nu=>3~BCR->Vo0RLqi_$g-5c=kW{hM{^d`@kpl0 zj%w*tVMy@SuG3QI%Ikq^=6X}%P+KbHWcLuMZfB=xRu#A zv4rY1HxSSYn#mQbKYOZqz=2i4ox#Zw{gl4fSG)=B#q^|#)5|X>$x1`(O|-fgHYXx6 zSUXucDe<-9HhIas#uI%#VJ-D;_9TrQa}_EZs|C#uc=?*2f+HoY{&iwy{lB~ukS9)- z^<;XP1=#P{+fy|7v`kG5TY*yuaQi?s!6Iw9I$T?t7pBHpEY+9IKk@J*fmES16Z5(M zuoTxwk)q;@Lbv~;RkOrl4%M3CXZ5uXKMO+sZ|H;yTMysp*;`G&3WnuBL{lXEohf6( zZ#a8dV0qL-sQ>T2L=QYr5h$hJuFVlfFq9=Oa@h5S=ZE+4s`5guRox%eR?8t$PQBWK zl7Rh+roEM`Rf4~{qCIy1e-j}O9txOb(9U;z`Y~SJXz-t2L)AFpUpMHQDs#;1n_sXq z;u(`XWJk{(VtyR$G#6qN${eo&DOU5AixT|L3mw5O7M}6&+04$`7woyX|DFXk2Z~0b zRq@}FehVNflXJh_{ogl_ibcA5FYQg(2iOh|3ZDMAHF{&l2Tc^)^8ZlxmQihVUl?x% zElx|JIJ7{4Qc5WQ+`RvF?d$z`KW1ju znw@j@oHKLI^Za(6*JpUo2kU7g^|#DlzG94}+eq$p^fr_vfRT#KbdkZ7V=7x*7)SnX zu+C>(;ogI0DvT#7hfc$Li+|Trh=K=4zARYavybs-e*^J6MFX3d<>WD2-G8A@JY_tp z>YvY}_a$%zjUKbsm>`({-S?K8x~pSj)kb0*ZmZ~Xi-)_{qPxNCWT#fFlp)klS-Spr z7e-6~NN#b|ByKPnS@?MRjMw-9)I2Yg*;~dv?`ih>qbv9k=w?P`kf7c_;NfEA+6+28 z&4!il#6QdhSrBLoGq7;qp#VS=7y!|2ZA<&V6JIA|@0lL2H%7Q9bG}a{e_=^BQ=(ja zi8nA@RAbaJ-3Srq6x}i)w8Vrg`@fLeDsa>Dluf(&ON9aq31>lb3=V`!bgV~wA2<{vClkU}KEGRpUKyFlV~M?yoq6XlQ};4whL*ZT6t6lbG!UwbrK;@B_9u!v z^)u&EQKs}jDyJ>-vD`F?c>SB@@2<$>$0tlg2HZnPkS3Z$^u-xaRP12g+%!wyM5gq= zt9k!xiL>JeB!RDu|HH@#8fcB;jY#tO(lG>LUVu>uFMbq~CU)}n4D%BWzCt)7n!lm}fx!7%6P2)Ml)q&C zp>YCbmSd?sH2tu%H87~bQZm-SaHq~iIwLhI!nDD>-qR>7!TDblF8aP)R|oq7mJb+H@?S3&ZRyMM zmsfJIk()-`Z4|n6*rW?RgfQ`c7}^8fK7;^hZb-Zy;2RI0iZ&;lszT{nk0D<*FvViU zCbAp*sxPWkX9O7xcO^wLc*%M(cZ|d=h_rzI6cePYz1#$=-_`YJfP z{UMhr>WGEWR@BWdlif$y3v=p!_v=D0O%;x0(6ID*yS>F-3@%OSn0|x;4R8EOic03x zQN}lS(V46G?sh*lX(#Q^4C@jhZhJ_tFVidLxa5K}I+IIehJUydpL za!6`j56lwxMh9fb=+x-T$i%$3@Ij^~4mOk9P(!LRcPYi<22*g(RaybH+$}a!v;?jA zK}PD5I%9aEG!?elfbKEOS?gmRg4V5Pg4i$V^1px#th}}G3FM;%p6z@0Vf}f+6+%Iu z^HI)*lb3XP5-bfYn{Y+Cq6?)L#mUZq=V51ryqV)hvJKcQN_V%dJuM%hP{2Cc0Th`4 zzP*is0Cw9CHu%xnRho~jwMv-9v)`4Ep+W_PNr9ixrSd7uw93OI+oEcbKX1=e;#Jr#_1f_ME|69^l}~~HZwCF&SXu&o;H_!j)lztruFb!vz2x|64n{E;&N5canEh8gLo9VP4dN%3f-o$Jj=FR zr9>enPk&u1n-`B&70LEHx{Z4vDmsuBPq6AqR)-YX+_U}q>k&*v1GyJql|}HK*Yfq1 zqA<4sF^4xaE?M|QsO8Hhaq0ZyCO;(gz2m+6z2mTE$7x1*@?NyP>cW1IV;Bo-ZV?^$ zeZkF4=~VauU@`z1p-ZmLxlkhh$I9{Ww(WB$09^vx`0(@w#2=@eD;h4JiJM)?;3%PY zNufB8D!b=6LUSN6Ti~TE(zw&_yUNXw*rTfwlsY}UMN1Y{A&!TQi&aWY1Ma_j(OG*a zDT9|2g&`{~8aP-~ke_TMJDdSeEmM;i9z8~pN3vXtu_jNqfaF{QN z=e7Z|K`|>sy{TMH7*~*Hffbj0mUu{6?ByP45*%<~_E$;at=|-L^4{T5_ScD3D%h2O zi{l0J{fQh<9P#~}p#|N7eP#3Kr(~1QI_;=&5fw#8F-9@u%w@|qQ=84Kft%G_UOBmO zgqGmyitlN5-)FpTv@97bEGeJP1Mv+6WtUE(t-Ve@M|hnTWDhA!S~k;P2RITn{nrA4 ztSe{;482nrJ{4~{wlj263>TAFA}~3oA$_BEL-gfGl@b3<6u-U32prN3J}5<-+;eJ| zy|q%w6~!kq&vh+W=Ec5JIcJQofBoG^I*a&QBfl8IG7yrQ({!IUlq_z~+R?Pimn=+# z+TZ4Ug%IE}P`DaVDfMtmo1BuCW5=Kv%A}AOy8b=`8g;b|hp*vI)ep#h>z32X^^2U& zPSW0-Jk$GlU+nvqY&%VUCCj5uDWGe7`Nc<+BI5b(gb5p3(O~{Tc7&KNKX$2M^1rP{ z76HSrdshE8nk9N>d#xNR_DiSHsfsyDmDXtaC^lvF62qna`H|bY4Ba#}7jj5Uqnu(% zbtN!q-TT5LZEB~LX~Oi`jlEJ!&(SVd@lA~ObWo1p+kn*AWIiuq4)uyRmSaf-$Fft= z8COwg7J8Ad^tq;Q6ZcDez0~3#D1YqzjRWTe&P!z7zsPE~-Nch1Lmm*q!RFo*1{Y9& z;l%VWY02%>koi|Rz&6kNYlh`U5bHIS6wS-(yw|wI>baQ8iNhUp``h;1F}rZ3Pf>i6 zCE`<7j?dWHF8~a(O=j~?mcfpc6g|5iN7WcrzH~am8HtWe1iW4$1^;0-dcV`s@lZy@VVoBiKO%( zl7u8Q5P}Lu37j82)D^f?UeyC=VonBBJePWr8ok$qAMC?djdL_sH>6wI;mJpS9)j-{bTZc;|;w zI%Wwt)1M>mnV%aBZ@nfZTaqv)43^D_N7@g^l}teZak!mO?=QkZGBK6q^Sxy5@1Mxp zXSBpec%J5gmRs}Zo)1re9{_9*8L3c{P@shoW1}>tTDuCpK%>4Eu|yZl_KD zrM8)7jc?g=FFH?(lzBKx)Jo042P{3xTv%n1RUcTknJN^b`rVIv1KlM9(*_Oc#RRtv zMosR0x9&J^6mrZo6$_hCM7Iv6=TE&bOl;2+sEXQ^NcUSquS zWWjr!LC<6peaXe7m+5Qae(oG_NL1Pm!w15Lf9+;;412=yqb4ijiUCzKe&hL zDG4IDc&ytE?n%5}eM5{kn(Ft>wBa7F@?{vA8{#cjp3Fe5AJU$2BQmQtth||b(UBzu zkTD|#zlGYu0x#`COmUHvkIHRk%JKm4A3)N{83!qo05gpyPjH>z3H6~-JuM6UNQ~$# zc$m^eEpb%qNnj}>;pfOGyPt1`qY2*QNEV_FVStqfh%vJAM1aSMjN`G06Ql@f`@!VR zrXCN|uxZOO>xvr|Wk{YH&=LV*0EFE4Tr=ee##7cU%4lp}7F_#-Ne;UHzFPYkI}6_I zd^KC@YA+mkth)~fcq}(*w(i8ZOINSN`7Gnjl{AK{6me&@kkMP&f=MBzuV%77qKqix zW@}55-;Ci&51pu_pzibO{_EjICvDB90M#~(QIhVk>k0is{1*6Gf@Ue=Id%BhU|G5> zizSfR_#NE0hhZF|7Wl ze+A5d4k&|2Ko&8aEOq4^w)jTPmFYi0g=_%WN!4Usg36l$*3M=R5*KVBX9gGQ0(_Q( z@vY(A`sDDWqGNQx0rfqWCCprMRUGa5&cYbpQ)>IP;vqKRtDRlm?9k`WQ;s?2q@OsZ zQAxV~cTIg+O%J8m$*f#wRSGg5>%hdCkv5X%xR+)gOmz2w0+|m(F6D)FxBhVO)w>j6 zlk5-5{lB*$85vEs$>*3(N`yr?Tpv+s^t>crV1NVeB`h`)-;+}6ByBL$_*&*K0D?2e$-kgVpzA=_7%!q`_IC&PTM)KL$2``a-wlSwM#Ul%r9 zq!)YeIt38}$?O)O?h^B)Z$b|5X0J?UN|cI?t1^!kux><>VCWfC!YnsI>*)uBmSw|b z59>;#(}<&c%B$xeAK&54@GarFSQ6rUAp^2@`V#6zJ(jx^2A|;Et_`uKEc`O4seb#biuyfbsT0(A5k@3=hK(c+OlB%(iNpwy$MK5u>^sEa? zee@@<*K8A$GtGJLtALzW$f`*nrUIlKzjvf|0VF4)HPc5SCI3HnL%{h}(1w6|{D#YUf>vJqcmux_JVOw&WE#V@fBo2-M71ue%yJnwaC6)B#J=W#OFN^thP zlHcGD%(V-=9wP|uwc=bTH+}TaHIBy23-xj-#iZp=C64Vov=?R}WAa??@SDIbR{C&@ zpSv^YvHOMNs#g>DgA|WyD&(h20u@?a{RUv7PX46%2@zYLQz$g|`oUb6@rWT4J%NM7 zs9Jv9mAeQoS(Gcg#9Y@;*e`OE19-JdMX&6W&b>BcG)`KM9jt9$7r-&s$Tv0txjYbBp{ zxs@-azfbOT2}*}Ch*6&uv>MY@x2fID%!00iI6#%*@EcF^ki^jR#?6I*Ej*R(dbV`@ zAlu!xob6?o%JvYv^7VPJfcxE(#4g}SQj_J4@3B?)&(Ybol!kw!%B!eKE3W=+Zqz#U zbmIGt2JeOwAO{c~Iva6p8O=Z0`@Dmwpq>t)KCsT9+V^Y7W>5|^qar;#<2Q4!Rzgcl zGjrbF2|#&2HtRLW*&lgqZpl;f`A%JPyX5HYN?5ziS}=v?K|OUcQD^f2j?B@Xe!D`_ z>z|4DbQwXslV}-ZLttBbvc@*bdZCv9ICYlrz4=GqxQ}=V3g_lX<4iojqV;53CMLv; z2m6q?#uThesHqMGDTqFtzn%qeq`B{C=}bM`T?H@+N4+S!qK1tPO0bk}*a|BQ1HA(h z?I~@4PT1eRt7CXBocpx?WZsCdvg-#cgXBf~i`w^l|L(S>gg^1vBB^|huR?cg#DT4W z<;CgN4vNa?786RplS`|smUr1_v53=R+4UcpbFm`kiwW3F&Q_)T>$TE2zPPOp7aeIG z{q_W}XaXPl{(N{(bbY4V-jApc;B>wjJnO1fco{W;f$buctZsw*#oth<1mmPO79#=O z_v-aP=rM++tnR4c#q&DvVF#(AIW!4i-}0E{?G(5;u{!Q9qh=3%T)dsh{kIP_63lFo zd!}SR_vG^sCr065q=t3sdBdWf76(ATkkk?`%6ngKW5_Wp@Ve;>j|PA6_QKF)?B}Pg z<7q~ROw!b}=0(S)2I#*^1XKu)ISjtn1|uS=7ZeG+JDF0A}#qeU_XY#(iL zbiHyk@Ui~@x}Y7sGFkCo>fOFlF#FbX1zuVcPvk$=>xqrX$xvQxO2rR8^U`67crNjb z`ju($pW;EZ;q;f;?|fCCc!cp0GVv-a?RF`P8>oV8NmDZ0f6?hbz7OBfOp)oMWRaC@ z@d76xK<7ffjdAaupZzc}#tsQwM5`dsXVWJ2QrNXpVbQX5txh%$MP2AEy6A}7#%x2} z$S*Syw91dI<-sSV@=cG;_cz)s;^2l`E8Rrk8h02Fqjx=nE-R_Z&S^;`G2HiA8xO=d z!-ePlwEj^$6hy4$t*;0Gw)r~R8sJFlkiyv*!CPJZutnT&1eD9i|EGYF5wb)o)m9t?Y`mtT1Q2Cy@^R=7 z4Qy9yzL`dG+4Fy)xuRNXa*vydej45t8hsPS-w^=PnCFiDv&c{muqdW-sM-}V*7aV? z^&Od|fn(TD-`;JUa)H`==QJYk!-Jo0+Rl zrBMsi+xJTYQ}nKZt(fJ+8Z?(d&tHWkVLt808a}+h2Q^W5VJKjEWs2&^GcDK>a=K=G ze|>OgZtQ%%`R%<$E?649B#81ZP6bh-<6x!R;k*RnRx*00{!mUkfkerXj)>^ck5j8^ z7P7Z~Ep)%%`lme^$yC`W7z+Ksl##C(sq|6JqEvHzD!t;vp6w!#s`IkLSS@z6`}$dm z;{!*IpntmWhIagA8oQj9usgmMGdmKL((?;u8nF`+BCGlsL-WGDhdb%YfzrB_7ma_> z*`e5AYR{w&i`%7lWfps#Gp#(~o*i(^2Sy#Nw6^C^nJpNfg?GKDK5y}wm8-Yo62=5X2i zN8o!TzZ;IDNkcmE>0iM$W;I(V07$n4u<+ zmq#=P&wG6iqr-3sWVj_o*Y1;fRMu5f7XRJMl#l}N#ZI2Co1RX2K*S>!1D4`6dVdNz zi*sn-ILXJTh~0i4KWYV-(^8p{!YrH(<7;(cR}*O|=5k|fJ2L>gC`we>iUveI{Mag@ zPv~GqV{0^p-7lY+=jP0?G5UVhU#z&l)Ha{4RknDwInYTiUix80MWbFpSd60W;ANj>)pv@Ogw`8$0;OYu~bc-0?99mpUWjo+l9{P zCeW3{^qj`f4d|5>4S_O<yfB1b@SE5%~J2TU_Z~x;8g32%DPPPi=a`19-#k+rv06BawOMM#b#WW`&Z;H{1 z*I*>rG0$W1Yn$bk;b0zuj%HyF-t$n`9i<_r;+0 z$J;uqtmlx5i;d}tl(9F)`E~ium!}81$c>lK+oPECl#!NV@HYG)?>fo;5b$+OmROon zkSXA%;IdVHLLqwQzFfoRsNk-lvvzT9&UWqdF*Rbxnl{$u*Za2Pj~(|H)|Lw$DRd3& z$@R37)G|2R|Na!tmi8+(5#*E0_v?tlkEI9?{Sf{qXKh5IBf(N1r88_!-+p{;_7~lU zMZxu&{Q0j}qo2RCuA!_8&@2n67FM-6MAk{|kEb6O2{c$$c(jD2@7GLQV2W6-;myKd z(6$IYbbdGPe%v%%BV~`FM1*Fp*jz3_b&NwrgG}_P_~q1CNdwu936D8_0OH_L#g*~v z(Hj7)=ILa#nRUdvNbqz@&0Co|4Vk*U0_I2xRfn>-_o`}-IXPrrs5;QO; zynLtaak-Ve(PYNe{e3Q=VfuU--NY>T@S(e;(CI2dE!E-n*PhBX8nZq(!&*hv!f_Ww zwy-epa1xWE4B@f4dNB%eL5XOi_b|0|x&~Y3j%+_Cs4%ioaAIevPIR^ZL#46!X;*0O zJrS<9P%DpV0j_i?HQ7864ssaDiqv0zGz!0Nkh&?LIns!7L+%ZIL5K|o<=n05>IVt- z8)(dH1QnFd2zgYR=gJq8(0kK!Iw`!?>=%(uXiDAR>;cr>yS}N;0B6Ha7r%y$z9=QA z1siIw#v}G=Do!HgZ+VfjCKP%BhocthGDUw4 zqAAMVY}fiz@ObK;LIRiS?>j>WXT}Z6^0*34Ou|uJ&lW6hj~FgV2s<85v(#AAR?!(k zCW%s-JEIEhyl{v)Er&*j|6A$n^PGklY#+j_AKL89+EFnYy8kq&W5%GF3~X`KiB z{RQ98Gpboq8E;DuIAfc~6%I*mz)dy4Dn24si%V73md}JjwrKDiYTT(k3F8CZM%|YpB?$%9P|&dhy?!e=`CmiL4w;5H#Pt z(vdF3yJU$l$*ik?^tCfZr?u*R&#a*{_M*T0FZ@^Fh3pHqr8Ac%fdwH3tuyUjO+Q3U zsMK4LXIobrnDI}2rf(n)s(nj-w6_?3W@1PbGU?^+A~ryoL=e*fZAQZ=#`d9O_ES^R zxQcp0A}0~%Z^dYFSD%Q#_qlx4ahvG*;|V=e8`}ql1XPa}=VT98*7*`TVHiWT$7;|! z!#OSyyq>U%yy%PwP@WE)GeHJLl)>D%+m;VngFurBD*fg8Vr!r`OOna-Tez9bpJWoR z>~Ae~rG@oHe4ZdDVUL%)ovQy8a~Kje0sKxy2jlN6s!5@+A)P+QQ5$mgiB~@{F(y=J zm8M>>NZVf${*&kJAOiM+^`(_74L7a`c5y0RX?zvzm6UHojkR{@r zxY&08bM!xoEPS)aK`_9;UNP!`d$lwSTh(t+hZlP zmID7!imyX?T}XQ{WH+_OQ^}O-RK};?4ognA>dS-@FMd`MM?bp&9;I*hj?e%xb(ofn zO;g9@bOGOC--zcRSubI=UOd}c^b{R_$aeI?$@RyajZAaeOm#KnN*Y?3VXh{KDEi|a zH{SNqMr@iY$XH`z{}BNM{pzD-zMPIza>cVgu^{WII&Rx*jf9a{pp?zacT46>W$_W> z8sSbTf2!`7+X!C^S;0hUKUlN_Jz5mK*E?2IA`LF?vd*XYoFaStUqZ zb)k*$*XwFikZGmjzsTAPUbuNuJxz*{jmW%nKI+7HNY>(>y>bdN_dAtaKx+9o96CSP z#@Bpf^GZA)ed`+fWu;Ef;_J-LXAI=vE8@ILUrvhUO)grAg}fCAf#qD!hP4(g1+LrF ztgP17A8798DUp^VL3FL1tqyGXDU4YTLkvJDj=z$$$#IN9un7a;D@f@1+S-+E#p=0w zqVpuZ(!JRmBHw$W6^~Vn8SZL(AjG(^kyKuJ<@W^CT8%OQHH}Dce|aMK%?Z{&pn1iA z%t=-g&Mh~EA1S^tjo(=Ab!7dPgg;*?K^0$wRJp(sLaf=mW!XfSHC~;5M>E&ii4Xhr zJCxjN0S24P{2gw==L}A*@jur#95DO@FO8Z-M=h|^<+`1oUeRWcrrpiS8vuCAkxD84 zfuzdrsFh|9Hji%1kOj5|PzYD1hkhwv+4dDoWEiwEuRLh2*C_iQqHxfpjv6C)OFM_) zrJlcf&7s+aCnkQar2m0928&dqO>p~@PFcWX%fIPUtuNU01;T`e<`gGChbT*6j5I&| zE0)jfKYow1nSEcr(JebK`0ULIy7mwlLuOSkPw_Az`MVH1=B36#FKxnas}MnEX=lD+{m&Hk4H@a&;Q2JO83 zQK3Pb#oO#hbRPsKM|l8K6|Uyz+JKNM*)>6X94-a~FF&Wq=bf7Ncocp7lj6CeV2lt)V^}O=)Ga(g@c!;`R!)@knN9edp~3}Wn1_EG z&ck_XZZ%^a`^4Vy-Id08Jhyt-(JJo7kDAGgLm;K{?*Ie-0sJWMYSga9y8$jp7I{FN zRzO!l`?s)Yvl&I9@6*E>Gq!)cUWaDsOw9v}D`(TjoU;LGvSKh&Z9K}n2kVr4(OZ$w zxSy$}yzN{ul&Vu#WX~OMzxY^LbRGxI1W0s`ZT?%7X^(zc7_Iww*wF~!_>pU9R7tF& z*^X%B#6D9gz2hcr_(Q2P`qkM@zLJ}jIrdJi?p#OM;#LYJv52^L#DSSEh2QCk7klUC zn#P7*-K*@9annZT_;JJB7mLR<8&;($fUMCx2CIpT5dOOH9)|ZP;w1c?PwF%7ET1xD z?LrC@6HJF3f`XIbRnv6_wu=sFO~9k=u}k%7RP>7|F& zHO;BftHzzYX9E2B>S=TJ3qus`MDZ)A=86HK@WsN)za+eAHRrR&&Xb`!KFvI>bd^4J zPab|VAZ&*gr_AhgpVtIJ9rrty4)s2GGb#Z~1+*hl3!dz!(XON)wcG5I>QMwR8Alu5 z|7)B#?6UJ67U{MbsLIeK5_{|ckuN?nVRyfa3p>)O+g@@v&62Yu-tH(vF6Ijs{qbn> zJ$ylEIcyfv?RQ$(0rBDy9eta;nfzbwk=7m}Pa1=ouq&H*O)WiI{O@eH>Ol)<%tnN<(#(BVSR8LX zKDoB{)q}}xw!9`Wr}I+K7^k~et&#N3T_DUDEl533riv6Ah?qfjXYo%(@Yzq2Ipk?{ zLJ2hKxPOUPxbe`oD(r_R0Ob7t{T}9&?RNzw6oWWli!u#M_&;&)bs>DKnak@hc!EZk zfSuNSZ5sAQ+W6)Z-F~YMEBa&jEL8q)6QTV7UGt!D3{g&&fm;7rp!IpSTliuy%;d*h z{xM!P&WLJEt1xEN_cvB{UPHa%H(pl6uAja>I9rd`%NKOMw(vb$pGd!&nF5k#@iJ`Z ziwBr$@JPe&Of+8poiQu?nIHPD6w4<6WT_)$-Rb6?qkfMpfB&;%yV1ss8x-iNGK29`S8Z~I z!-{{~Ef3&c{34Je+i)E_Cl>7M_{MjIT0_*yUw;yuIMSGS@Y#pkUfI%X{-k1N*)u^A^RSJ?# z+n5RcZl2P@m&P6-Wv$(Adccv%<3PCVikFqNGZgb{d^>UnWk6VO#_+IjIT}Z|phZ?| ze3qbf{D6o^_^bC30hj6@JtEjg!A*W%ycsml_HU$qUy6LZJ$MR0oNqtcA|8AUqJUf& z?zIDy^Wwm@`J$zic`wCs3mPbMtnF{UB2FX6Mdh{AoJ|_ZD*NDhvE^T)Mt90QK=I^9 zFo-x>H;imkvlYOMN?$4pT&b3ZLZ~Kmcw zl2378odVmC=q98dxbuUiLw4^Z{^UrVuLN<*9w7s88s)M&3PCpc;WnNV5k^%pf2>G2 zqSX}6_1YjK`=I}Z2Ik{Exa5jZ*>9x~nt(NGSl*Q}Y_3vERsvtK0lZ@ts=n^jIw@LG ze=4fF#acGz=3K>{M8V(xfO@V8cifGc-Zphu^(IPyt5gSSwjDayZfCXy`=2)Ewjmv{ z$@g2YKmB+C@`^WzN@qB7u#hKD^) zC3x$p1q@ERt{M4%ZLP(vh3HhCF16*}x&Z_G>b{*bW-cQ6IEeNHL~5JR2RA0Xw#-C5 zcl!7M*R$)pnle(_Ph*9)F1M(Ab?d7238}q}1=(!iyVj*m)5~B91n|ov>*;-<+IkK< zrzOfD{GySJ`;B*YimeNUl!c)*%hCMEE~8$7j3PY+U%tr)kSVRWsenQ3+=eu5>%snM z)C^SE2}9-aH!^{?d8{xXJeSJld-oXw&aswj!$?ln6guaNHifNw<#!)uYj^M5UC`nk z$Q2M7&VXQPijMmJMh|)X%GmoncU`_YV$RN!#PGm%(X+x1$^q9Hiq=a0MdoKNjXgck zmkzglzoL!wlQIeK$(0Qnsbg|pY^-lY-|E*#r-GF!A{tWzC_&SwD{YefS%z~6v3C>W z%nMd`2^&*&ooAmnIHhcF#8RK9^IhDcw@e+{q9`fu#!`zrWmSIG5@M#1#4gb;+egk0 z3ydXbd%A=a`h>tlvJb=2iUX+nzov?e{H_Qz_+TL?PC6?IJNy~VPqz_Uw0Hc9#4#9x zLbPByN=?INA^|*IU}F8XbIe3NjKf4PiNnOTUJ#9yk%z3@4}tFje*y|}kF z7{zA!@Z32V_E8&pS+N?k8QRsYAR_qSIXy%9>3RI=6*vgWpCzKYwvD5l2T0|4d8M<2 z-fGbK)7>(&ADBNEHbvcbPjU?S@qvJ5=pvmC^bf2GP6-vTL3?mx)EMxb3kH(XJ_tM! z0tMY3atRkI;;*3RxeO)OGR5VO3E!)u-qCPu4MZ3hECCer%u?iM3ZHiNCuXl{n{QJB zrmg7~O*7=1Nrwx{g`Kz}NU$j4)T;t>6>m`J`4g3>tM)sEgz4h(+GwulujjAP))qt6 z_$j2z*?$+n49eYX+-_7N$5G&==X3Kt+<~^p-<7;fP@dC<6l>1G~rrcyi%Z3E7cE zC#+x7f03t*jz;Sl^NlH6?a8umEDS8I-h|7MW*(9r;y?w#z}moD{*K+L-@LxFc!A<= zRi##_K6Pq+G3)xKu$*QC9p}7AO@rWX4M%iQV0W%-EFEr2 z6L^_(6`f#4Hg1__B&$)XTzoK7GIh77qcg0_iHf>tbwu!Ugvh+nyV$lx^F347M#Zo*(8;GGnqieCHFy49ehN2u1j4`N9vxdF4%`+ z+lyp1 z2}`TeO3U4g(l}D8=e+VUQ*@v3WBv@~iM~lNoeigofL;eeI;SKQYTF z((;|C_4mpw5(JrRrO-QtD%1(*X4}qB+Es|Qsl(S9^A(8W6lw(XapkhwzzdtwLeai# zN1bKs&~sz8sPe|sy}xL9if}fu?f+3XmO+NKd>n7ZWjb!Yf`4jeyF~yJjpZGofSNTj* zn9U?O#I^VNBv2H(k?7D@gu;@J;c_QLEbOS_@2{GV#ERA2rOC)9c*9yrwyB}11VeP< zfwphZvp1mgC3vJ#R9cztU7+XqVS~K;RRv#@{Qy6Qdd-Wu&OO%A%$Tz|R~m^cS(X0i z-i3Wq@sM|_5AMD;5|mYhH_!!%!2?!eQ)!^zWXy1Dn=tJ(r~5k2a=<lM9a?6UfuQ8Rqv{IM%u(EPJquVtI|y8 z9Iw*0+PbmfLh#6@4VHSSCuM{@)dSA5zp^5FA*JMaHo)OSME18B>=Bd^Vm~8>w1f~c zd$P6oK5CN2pH?#PfoA?r1&|I2og} z8lu&1K-%&KVB|eOkqowB%DlgUG(A(+K#^as!$GC;9%PyC*0=T((jj|R;m^;|q7%ka zizM;whNKg_6)wWoGPLVNx+fDo%hU@-$`tk)d-4Wcn*d-9r4sFRwfoqmu2{fE1oxM} z+Z4PnRpC}AMbVD;U@3}=?@RQhzfq+P$QGwZ{sbLmk5Dnk3~#?2sw@M9d4;>7f`|>@ zchgyX*2GdQ$tBSg17P!i#vB^8*|b|YqQ;0?190l@-wa$jycqSVRj9)mW*$7e+q2jO zEZ~i-pqh3W0*#Nc&}M>~!L>Uk;^-w+eDyWKdA!1hY-Tw|d6(g8fy#-Q!5KLsE9d6jnkT-OA5dQhI&^c| zQsqr|-T^tXO1<$rkP)uBC*gC-%0dEH^inLx>))hU_M=xwD_>qGc51hSUOErxPD(ma zHC2@?vr?E}5>sr(CsXL=>nYG1Peb!#p4wvJMkq>5V)<-ZItW)F71^MSMCp%SWpLVWTD+p*5#h z;(-E1(*o6J%*HIkH#-q9MZHF-(mlglk^>v_d&1+kutKaQ>D%ku>Srur@ccW4w-{gf z#G*j+tEE%Z%mk&eisn!#tXN0S@!tpkUE0!2`Jd{nuj5xR@A@mvZ2X4X41~LQ_204E z`J+E`VC-dWL_$shI93L6b#&6_cb<>^Th_jdIt)jo!g-T6_T`g0y5%Wqz)>|mhycAm zAG$$?Mt9D0^RxYDw5!+H68)fzGuh*L#F9f9b z|8@|)73smLi#4P~tG;4A`Y%w(7)`^)c%2|W>q?J5c7VT@(P&Ux``u_t=^Pjp3^{i$ z#93R9-Nz3-_h-#FVN?M#;+0hm{_6?VaGtA$?uQ4uOz^u0HkpZn^*SO4_j<0Z1wF{* z!>kAEV&C@X3p3h7U&mg$MbFOGP5*^&43FA{Huri!QJ`EMN(QlDBFFf9w=cpS*e1_i zY+f>uP51JAN8II;yAL_)%2c5o@z_YAZ8!GgtD$wz#3q8JaXlt4_W2Jdd&Fid4Zqq- zD)_gIR~p}VlfFAn9`d=g zR|16wld*Fd-i({xgCoW*4#5u{4$QU$c0NkYkxE38e==o%2hRUL4-`k^|Hp$I?M`(5 zW^p9Mg2UC_C5o{hhx7b#c4m^tw(H_ol<9=5hh!F+eQ7J6MoVP(zXggnghr2tbxb$%b8sev3->4u* z!^zU`0e6f3+<{FK=Fz_U*p<;9;f{h@bFDw(qQIK}OCxQx(Ze$59QT2!M5?!6mYJ-&-bq6WWF$G7m)Ga%`97JCT zH{*5hn^a~vSaI=htU2f5&7NR|a|Lw1n)H*XCbhCRZO+KIA;+&9uAJ$ECus!Kzo3*q z2T5=JT_EC_S63Dl-nM*8RA)(Soik1O0FAKBxeq99Chd3HRKS^tXKC-9QZ8gY1N6mX zu;~u=moV-((&&C#`tGS~e0_1KKt;UZG*?Hn*L^*N`D-4i7O2Q4jmHN4!NC=KX|sE7 zLTb|fLfb#Bzr+hLQ;I&2@A6FtCRIyY~uW_{qQrD?uu+TChgvg#hdu>T-sz5 zD9dDURwE!5d;U&==lxj;=c&g?ae2@v-VWv|*Pj1hg1xo?9I4W&`V9QPv+n;t2d}zr zMPZYNi%UPvs0!U$A$k6p`hCx+f`9R8Rpg{&Z~dQtZ3N$0nKj}`CWpAPoh~`=uzEo{ zqcaosr!eEy>n`OSP(pc-sVmiM+WO?TtiKJuJXU)BajnYq>tDRm+=XuM$Ic@aJL(RI zOH1R{g1c^PY4NKeEK4q|%{A(jE+^}MSDTMq=Z1Pq6e}Vizq(JYF-4zW{)tc>(kAcu z9!;;N__OsV$?;m<)PQw`aVztk*x;}0j+b1F>i$l%-wl9y>DL{#0vF|75*oBW?J+sD%DT+}=NB(H!=JB(&$clgviJxE?)N zQ|pb{FbS)U_mv~2zt)apIPwTX`X;{}%+DPUx;_~#p^wy}Eett%xJdDk+1=mEjV*p! zSn$N`#q)m@)T{q|XL&#Z@TA9*^;kdI%_m$u75t2{$`#*F@E@raz=FVM@dxZ zQ|JG+_!GjFAjANS!pcEE`bnnn{g$fqtHlB5MpJy=8Y(e)Z}G0CjtDJPWm_B#u|X%8 zuhv@@QQbK9>nTVJ1~*x%{>oVy9k^c8Tv_aj3qQVK^#G0^5$A+BxAQQMHG%f?0pj{DpA847dYuLkE-&QsHy2I&TUe?v?UVqz{X%)>-$rqvP4 zQ-e5182GS1A2x;5hR~a#d?O7`Z-h#lW*yg>%1J4cn@N8-`hz48Gt>r#lp4d?tz3G{ z(2KZVVUB#&iVRn3IRrBB&suCS2^JDqxeZI zFMXX;xas@thsm}IF*ys^M)_3*oveCeVv9q0JPv1JhqL9c$+=`6E7AQQ z?7d}F+uipsSfGU#iWDv0Lh%B{odBh!I23nxcTb8_v=rCi?(V@UP~6>$JHa8zq`&9+ z-?{6~tb1q8S~G8E-sVWQoqf*toW1wQPkFqmmvhr;wueL;;Wh`lNTO{ImEpVOEi8*0 zZMEtnJI$^o^PcLs7nr`JO_s%Q-`f}QR7a~E(bDidtpkn<)d9RKU)&%H*=_0D?DAeF zahdTPYnBFw@;8fTYu1Jwbdt`&TY2qo^er7@rol^{AUl56EyRlo|8=l1!rWnK99$M8 zmKZ3g0K-iX7n0#SYbXmEp$QG|G}r%%+d!l~=^eo^+UvJbqYN)h8y$_GveeEE(*6@g5luUE;$EE5@4d#}@AWMwEWAE61EkV$gabo{f95 zJit!isEJ(cexsf*YTLY)KzT3%3>aPyJ_}8!CFmPec}Oc9_mS7UWV+sEC6P)(qW9No zyIbguX99VbOK{`H4Po}!y;DH^h7IDF9IY*W4Jbt|eT73U_=h#~gRCbtlSe)6uysPm zJHK-|-QgJpfNN8378B*KFh!5*5Rb!D?5|^kAAy84H-Q%Hty2$|gD2PbF2TS%&*VAu zt9^GB9gVQgEwQchx#I#UW6SWca;<;$$NV1Y#=f@iTCZ&QO;(g{?;++(^$Qn0zRR=X zKiRaDZCwJd^*)%4i5jBPwV!%_wp??+0eL)qry==V9riXa?mN+|zNcPTOj*UW?3iSU zy^;UB>B#(FH62L~@l^>-%Y(g$fDk$kllOGtFD{6`icJ-LW%y^o|LbDiEjxegy}zy@vke$$@CCzECO+ zGYg~;q8 zRrac&ZKmf?Ctvt6P%5|=u(~GNSjJa>lct`7a^_eIg77!FmuYXbqcKn*-stnYR`L7Y z*e~vt1{{gCHG&>^9T;v^=Dh-`XE)#J+_g4RHlek8SYK@xFn_)p;AFxm#5ww={aKm5 zQb0a|C0)DZN%3lC6uIrgf`8xffW_`4(4KZ_v>hIp#+`T#^mO8Jh}v3Tf!Iad=%vkm z&CGBKL$HK1xF4hHik@zv*Po9Af7R{ny>i%2o><-G)1|AUqEm#_Y$dQX~^Lk&5*Hk^YJ?17xwf%Ep9enG(lKkJrzE5d*C$iDp6Z>&ZPcA6mlj~dh zK8&}oN&x*g3_!zA3<)SCPPexEe0WB$3a$@OBBv}dra8koy+*7`bm>1mTFtxWxB2cx zUsv3lmCYyHQBrXg_%h!=7s5~Ja9*}_Vl=9+Xf(<1GC~v55t{ur^J_KDEL;X6zHEw{ z# z4ES_o?gB--aL+%krZN$<}wpMHmrx|9$xe&f8~`(j*-YKg-{L2<#bWlywIQeqrI zw}un=wBRm)n@3Zs1)tmP3yx(m>bx!mP5~2TF&cAij-0+rDzgTEy()_BsL@x7zw2SB zj4^I5yMfvA4bUibrxZK)B>GR2jB5PgA=H?GZb06S z6gkg^+W&E$IP$wu3o4=Mujlv)Zd4-9(>^uc1P-bOeoWE3y=b9rnImAlP<)~H53A7h z##cD2tfjLNJvPvi`qj%Ma;TIDvpMp`hIawOV2_0mqJN7cO5*Y2%Xq8*vNG8d9v?l8abvee8*T2Z&yG{q2O0f&?chH}o@OK-op>%>rb}E% zxVhntgoGoal+%Nl2H!Zf&TUNdL?1N+JWfmdrRd$7k< zywlj4xv-wg;9(uF2TAa*-@U3we6vKl`BZ4kH4@!7i4#=Jcec+bNhQQk{Z-@0fn{@g zM!j>hsbG;fPt?;A@zx3;M6}?4YjP^y02AV~a~XPy-+X+Oz6-f_%mf6%xm zF$mJ!>G9luoEMmr_VHf3if{B=p{FIdpJUb9Z(l5ms)N*Sy?JcdeX;@CUc@+tJt<9X z58f*95lO8_221nI*V-lV_Bg~?O`9ZC5M`3?K^DhHSmr$AAdeF!7JuR^KQjq_VP$wC z;EVQ*EZ$JXwv3*4=@*WLk>b@VXx*z)aTq5t^o3?K^Rbc82z*0Bm#d8e1t4MP$!_2U zu0tGZtVUR39wKhf=QapCR(v+XTigAFnhjDvHmgE5162k1-3&1Z^&=0Zn03(@YC!;u z(|P!sXH4b_Bi{DM2anS$DnMoR{4G|SFn^T?HBe*YJ`#K)qI)f4HCA@&R{f*&7jh3E z`XBcUr-M$MqTl~IqK)aV&#DAMrJd3RtNw2q^S@is|4Y-N>_xzDDSj6(Be%o5zuViC zYIApSKBl1yPTb~Sftl{O%Ecvv;RBMEa?7-BrW#s=rNx`_n0GZ4q|tVdTuv21sP6g- z!{FFtv!3%|XrWq$kO>o)@pAvJQdz<0%1lHVFrmi^6+5dJ__EfhpPOv@R;J~)`r6eV zDr5=oL@buqA$3It+vl?8 zp26)H^lu`PC#T6m9KO6dUw6{52yrY-H3culJ89P6QXcpzk4R}c#XWD zT~Q|}P*Fcp7)xAjw4!ylhmf|;vP-fh)PU7Bjag50GiboK7Ttxx8aF`jknYNtMTpUk z>-}a(>6x3bDAwD=e)K zEUQ0r_;EojytKnUq?&XGeJCs=!G-qsz-T<7%=YWD>-ExDnVD0cFYW<+S08$fSnc!I zKyYQz>|w22S7cgl`@Acb!H=ykQhjVK@u@g{Zo6gNzw7J|bVMw>582=1I4D?)coCbi ze5*o-XCKeCNfVV?1s#k5ro6AR&MP62Riq&?l~@M8?eP6`Y2Egus_gj;R>{s-G>qak zv)w5wU-}bVZX1Lc+M?5W-)-b0GIEU4h=&f_W`cr4@6{G|tJ*HJHpR7woX;!xJEEZr zLZ;7r*!P&^Vnzg9+`#)Khe30rWkk&m>ldwyKe0BbkaS8!89-$71>Zr$p$ic&<~i;v z_ZAF``K>vZahHVpD!v5`8`NAdV7}6~$~P0l=Jm)ly0L~VPD%3%Kv(sl$d|~Z*s*O2 z8Tg-j3jBtB$~-wwA~bj5Q@Lt05kb)baTS*p!=2&g>8edBQx8Eq(E&&NdSwc z*q@>!Pb*uj6xnJj%Ej6w(K4+-xL6TM^j2DUY ziO1kwSYQX+#;pY`QjmeaeMs8Hb*aZOZ(m5SIP{}y57lFz8W~A zbTW`#$j`X47)^<2(jA?G-KPFNq8>;^{P7Nd5k-0aQ-K#{+5ZQrH)0E0g-xm`2zp{m zSFgN4kA38DK$d;K1@MO5)mS08l9aZChFhC&DZ*!_Sy3pZVUviQ>q?zMwj*$Ke-vfU zJKdJsYnO6T@W*QUZKoUIOd_Gpm>2}{LuGoPW}G}#Vd>JU0tuhpmpIXfSl}LvN#uMx zEo|7zc&&>`YE0j`)C8YOge*wx_(wD{xR}`b(i3X~nf$X=L%87hqSUnyF3fWQoh10^ zqcjn2MzK(xW&K=*5NW91;SUJKQ(pzzmYbwQ>wcdmm~W-!Y%{iAKe_cr=Yd2(Aki3= z&@Z%d$716;A1Kbf$&LOJFar^;Q6hE>@sb^C_i?aD1$c8BLL+}vetEko-(cE-k0ZLF zC4{D2OF|X<1G7>W_T+fJ^DWb@7YFS& z2|9?m?J>uV-VXD-KWV3|{yrzVOdf_wD3XD*C16e>VSmpC z962rbyr@>;3WR*H(`xlUt9e@hOImZ`S~;5*8F$d`#P&IbM{}QA;B0Lh#kD^+HG0mA zIfVW)*ImFG%ay_d3)q5w^f5<9IY2BCyYuy9Gi`B0v;0Y99!*KJtP*l}QZ3{4F>BIS zr*{3Q+uyvZvKLkGYl6%yBz^bZ&pX;vZ@z|hIi)Tcb5+u8%hHgPQb$nieYfTt?DxPv zaR~YFwERu*gFi^DV+kIYfmqj+qF(yk2w7DqK*d2*#v@MH`DKUr)$v+L?*YvL zNNYY47k7QS%sK~|7qFNFb({o_@{G6P+5VCIRq?bY~Bsbt}TAKvbKJK z+H{RzsQ}f$tUcu8u02?P9D!cDImbmH9+LdbGODR13P&l88s;Y`Ys_?Ujy+Oa=spM{ zy6-R`P|Y?KyR>StCSE7!p}~N8g;Z^Iv#PKUcr;5d$ulS$&-oZ!=qz8&looF*qenbP zds{Q2`Q|qTLt%PJl8QUAd{ErX)@{f>*QA-EdlnXzH@B+yR=U;&t>>Kf(S-c z(y@4vQ2u^4uWsQm4lIX&{&Q}`Hcg6EoH)S{0|p_Eht)vb=Oxz%2U6MFmjuo=vf z=r3e6OOelLmaDb4U6DFke- z%FuJcVDI#={m~(m?$QrjroX-fGpl85kGbU-ljt+^PNPPI_==kddSnF(ojo}vPvx6s za_;J@bN;PZ#r+++`_o-<@~)gZzN-o}Oiy+rA*FpukD}1#bj~3 zWVWfFKP&#tGdKAuMifOVrXQQDVf)_KNre`fEQOyQr8wCaLkrY2B%)>LTBvlEcwoQv z#f$PeZyvw0@Y9Wsv2`>xn8+umn481Im(8?voI13mq|nr?FkPa4^88iUNY-da_6r{0 z+{Rz8yzrXr_KokGdq7pDN-s_RkMNjvixIWw|ApV`>LM^iZQg!uH$e2=!vJ#9gi7> zuzY9+m#1-lXY2Od?I^g?fE6qSR~6X57#(Uhy0*PPuYQ=oNYsM0O=u3Lx>+4he|Ic7 zB?-0CYE!ko91Xqo_YMl-Zl{W#EWSkvHfZ-1OGcvJyO!mwjU}gtC)aC=$0V^#y#w+g z-Z<`pI^f6@rF?ouNU zHJ>{3IqgPIu+gI1X{AujmDg!0Iwr6jFum`Px9RQBV}}}uw^g-x?R^*uNs>Id0%YD9 z1a0MAo1lDju#=!*A!S(=mu9F=^0H0bs7_fQzW`|ihBb^(eu&w8o28M40oQ`XCtKji z0_h2FdCiYBIdT@+(_a{&nGE>H$AL-i-lm%*=H~;LC591NFwo9@jQ0cAeZ}s;>$TSq z(y1w)FLo7Odzc3%1~`M3zJ99V%if1W!e!yvLQNp#6hK*@|Slj(e6o%WODI zOZBa-^Q9OhDS$t7_BFAR+Y6QLN3P@DhF{s;n~{JZt*lCYFA^Mvw!EHKcA(M6yP|~% zyCRvM)CL$6G;xw4#v?b193#j~&0cj_3`?A;IJ zjJ9ID2gmGtAM}5aiFhh}t1Z1)pN8Ny#|b;H2+yYovGeY8-{ zfmOcYT^jmHD^#vydood&p%zIM)Q+{-RJ`+(O7UTiO

^t&Ap&Ux%Gn_<4mHNpy0%F4Zbz$2)A{Im0`nIrPV@|>=W`|+WL2vZk> zj`v#5=D9T_>t2ufQjF|Ci@mNotNEtGVG<|1GvU$C<}fX1(_Btu$~A?M-ih*pVQA^% z>gTHy`xDzoq30H@6dhH$zTi&}g}CGqA{b{mXuX%OllsP(WLdq1r3(ef zQ$Jm?rwAu`udHU)Y>!Hh#gL0F#D;f8g(TM|dR$?YhZ{A99q(Z{tWCX2i%aS7^G^Bd ziU=Kx$knTaus|7l*)p$3EEm6#@O#5pW=q)S;xdlePS5w|!YyGj190tr`Fh{4fcJ(` zMksTtPDa~uNF+S2WIXimbBHanF&+V~A`Y}S${Z1MG$k5SumA2j+WJ+zCjK8>($k$w z^yOJgIW;9;KaWLj#zs5*0^6!cnA88-vLc^#|3>a(#oj#iWkZIq(ARwoEN)5eJ^{u; zoi+8b80m~k(n~Z{bi_#C)4UA&^>@}sW&AX{vj?swy} z`Lg?+*X8jfrEbt9R>l3ZbtGUSC(*S(75d3>QN6Ig{BOSLqCx{=|_Gv+U_b3#f)Xq7QON1PemhsuWS z1YhqaH^JC*iy;KpHNblh&n}sZAnlcT*Kp|2q(vS*|EB?js4ZKl;z1wNK&h)Divbm{ zd1KoWDugojE+zw{L&DyBuQU)zc4KYEaI08H=+?+q-9h z6x%6T^oKgeUQ>6;QU`tL!b-r8KYXc=LMVc&^^$V6J=7cQLu@vZK~macp+eu*=@j#W z4UE_Q>9W;~bce+4QNnzzr9Qr1424;#fx>KffW;d@31vrO=e*YvlQL=y*k)D=DO)J= z==-|)+fKxfYP-9{(Wf#&yU-2rE9q{AVHl`54L!icCVWSQ}i$a>T?4%Q&q9(dc5&?)|JB&>kc8(Czx^6Rx`FBZE; zKkN+sme&*}^Y14Ca#)YNR!f1mM=0Av zX~?&0R&jx9YtGe;1Ay<@hmANtExWW+QB2GoMt0m=tkKh8qQ)G_omXS|781t#>wVWGzxYEVB8C0 zEGmg|Z^+;#_0$`pRSPShhvJ#g)S-#-BfXk6KJp5E&^4g&T!l`PF2!l^aQugao3Xcg zW#lkB5sD0qvt=bzg(bOLR958&wehn#(V#8R2fZ6`qUT*Vi&sn~Ojoo7fKIZHkg9G; z;-fPmq8nwRWq;cw=lJ@2I?3}w%CrVNi87@Pu84mAUPV<<7Q-vTQUs(Jq0@S&v7>qk zF@128?7dDgMVKr-HKAWX(6|_z!Xu8wmE{a&{T(OINlv{c95Kw0l8)iG&%o3fM>T%M zSIA44E*rMUHu=ir7xBh(lwY_n=v5_lJt^FAV5OI759KNh)7>MmnPbI+-Qg2$;dI$W zYLln9wg@aF6nYmG@a9;x6*_D_Q`>$-{6nB~gsu{S7`;l0IE;4l$!2wwsX#bi);gSY z#2pADXfi;!>T{tuQc2H{EtjFweWnz8#C4qpIiSyLt2GBgQrX?E2wNP03D6WhX|r;M zn%6&fIDq(8b`d;%s@+*#j~q92WYEZ_zS0v2aR05TAz=8Q$>d=BoaN$=5G-Q`=KTEaO98}Ru0fie^`CRj*YYROo{v?>2}bQJrqp&# zkTaXTr$iimY0P?-7nUZQ2T*U&VNN77q!7i@ep5_M>{uRK2Xclhjw0a6ju@XmWZx>K zN`@|N)Mh;k8XJKvoHM}t#-Ta1D*?p(+xHnE4_SXnf4+~DH7x;wCJgLUE%?av{yi^F zS`4QzN~4(gEU~L{OT0i^fP@xy;c#WVr&YYpg%f5gBT00ERgo3+X@1V~&lnYCf}`Ub z>9-%{)1=0HYIZ73?NtBfX#X_S$8?mV%l{+qM|QzTZrk9sR5{%p zf4_?|8-?cAe@4dGA*w0C1#6&NoN6zNHwCZN>({^NTo=z&^0>LRR80nQ7lTi6td+^KeyV-_<(WEg+trvxdO8o|NohXIIIr$xQ6vO}@UkB`Ak0$JP`@aiTP=GMUPdX7c zF_Ng4i(4aU4r-oLAXCJ`0Evg<@%j(jWdOkLW%^8NEms3EDmF=p4h``>i3-q?`p#bfE9W4%VQk|LCKOND3`rWKqAVh9aJy(wexGh$7Q@Jc~ z%5|H64jF^KLrYW(h7!hdp=ZIyIKJ4gy@WBz2)T0k^Y{(_+cecQ*gSTbS*rhK4mj_a zOp&r>ba*@jqv+r4k!V*4lOTV*+`WR%g-&20b+1`(ZB5zuJ%zifvd2gqt4J6iA7oK8 z$<6brTOg1tT4&WZ$7L^jApH3gmCnp9X_Xvg)2*X|mjG;cv65$tKjh@2!T6D6y+RP* zF-JSyZ-P=5-)Igek>bOm zihu)tgA8~fvB zly(x+s6Pt0vAL3ObS{3C$7V`*nKvr@&(wcTK^8%pG|6I z)cNZ00XX%sFZLWwcJE@RQ|xVjmR`}XYF-AcB;l+3mDy)k*jahNISJ{9G!)>$J;Y`u zYwnp+4WgM?<{Dy(*ur@Hb4!2~M2}(Wh*)X2$7V%X#oMa(^DVaJ%P4=BwfQ>Rb~j_4 z{ZJ{?f*SG!v-n$^DMI!dvPB9yYU;E1G^J?V&<|4h;~|9gl#&fKSm4qtma=jFHP%C>0yzI;DBQAJSiPC&UJ>61R@xA4TMsELLDfuf(num z0S~x}oT$fh zDW^Z5<Znd)*WBT zM(6N2e7&)ac%sA>_!r53nANcZEG8E$z^otXRv&9EH)vYUy0NP6-Pz4X5~F4- z6tYpDDRQa23?UjWQ_e*nNk$nsZr+_-_JaN#TkFt^_JB|9_C-?)-2v9p9ndMY&|ODy zES5Sdl>AsTXBLQ@J_m{lr zpXQlCF~Fte*MaY@5R`KOE07S`lcv9)0Y725;NT{`Zg(PxgX8KXtD^+e!TC`ux22zu z|8Fb10Q>`?1Yp{$jlu_BtWg;)3@F)3lsC&T!5%678#i#K#pWY*- z+e;WNV*K)y)>uG-Z;2} z21LZG^Z&)49rJzaBHdZ|qab8Tggjh!_ch^|hj*r+yX5a0F-A-QT_h%{mT@Q98B<lnB-f9!@=++WZpM9IkWiPd;oES7&59p&S)LOX_YQ{t0RS$vFJwu zGt(FBzI>f|)S=@yNc>q*XzUT9A7_-PZf>$Vo%7Dh@MArAyZ#%o(_h$I@ZI=@G|WGP zaB^C6=&vOHlC}0PSymvEP*Vif{dbAflHh=8CEb=_)~w}!c^!mX{7*R2M+gPU4_@q! zNIhI1=W5qmPaxsTjFXQAeq?Wh$Y5OUPY@dZsCWDDhtooq$`+X>jLTCbG7++@l;3>J zti!xmFlNT*n}!aOR&+cb+xZ{XGiCd-Jei+XvEXuJ1BB>^y>YfLyoyTMj`j<*E|>%CUS z9T{T1EAaBp0z5dh=@r)cFyJ4knt$P?+N{mf0-If8)Hqp=_ItFJ!7nwiaiFGp-2-q0tIfK#3+->R=Ntb>+^OvzyD<}uRD0D>tWm7fMuP4Q z`)_4*y`JO5sEemH%VY!0an9QFiapYDq;s?tbSC5*8l9YZ2^s}e#(n+2;Gs(nCj5^d zuN4oZ#?}?H;;wN+LMY!GC$1Cr4UDi%BFFu{RlEkgW?Vqz#aNBK5HudY>V7G@O6&*? zN5bR&JOWu$@z`C!lPEx-u1A$dd&oM+Oq0Oy}yhT{tnnw z?WKM)2uJ!!kT{}|l63*0uz%|TL2gpQuuniov}`rluT8;qfIX3FeNfFSEJo)s^`E zRwxYV*sc|?A;}y0w35gR(>JKvPd*_5Vn4d$A&<|mTNRyl-$R2~9twsdDy#^H#>L$EDG`;8Y&aT}>=P$9sI1UmKS zF*K9dgbE`Txzmh+kzDR#oVF|8ta?;NegO_Hmuab3&m2&dXhAuLehNUHmLLX@7|Vs4 zeC{m7lf^10G$P^hH!ChTB0ml%)HD-xO4aQqs@;#ZOOP@=UTl{w=)X^zLtZu%DO#ih z1xG??-N&>1>&zmYdRN^K%P;pk{o%GoKaqL$Q&)2a>?)VRB)XC+h4R^#p8CUq2|YsY zwj>q~F?`;?xDGta-q_Dqp->8YhD{cZi8Zb)rn5C)e*L~H&*G1Rkt`NW|E zo?6jotDw^5olK&wK0D zS5Gam?j*{LHeSu``V7;Gl{IL}H3bn1QNbEaC92;aTr-{1aY)20Za9oQO9a!??1B3?xGSJXEGaQMWzMx%n>#iS3n z{qLsr)F!1%n$OTgKDjS34%dRleRg=vU7R8Qf36Txl>o$~tSEvW42rpW7{ggS`_Q;I zj%z@j_G5-A;RvaG{vE(V$y>2$S-endQK(&SGl;n?=*e56 zosVhuEBS45HNKoRkF{zcsiv~d-SQAOhS7^#M=kQs*!!s>a8)-S+SSQa+6m+oNJ}8M zjNq$S`@*;CN-Lkj`RRb`Vjy-pOB{{G^!Dr(GD$gbrfIFh##f|$2Fl8;pTp1lDNQ;U zizEl1PDbfqLN!RQNDd3ABevkOlSSQupc=Kro-b|LZrY++?T_} z2BcCuv205>rzgMR1f+1YFo9nOAKZJ=VRhjdjf(-;S5Bt2Oq4V$o;W^zJuZHG(YwK7 zDVmWCm(=%sWg4_FR03}g6R2^&Cmc#;piXGYqt72RO^i+Z%rFaKF(b-<6wFjtBvYI@ zwfR`6QGsdxCF+KlktYF2Swz_MU$p=b8#>ze$Bv8j^ud$J6s%`MNrg}yfY`I4#KxJ2 z+H1wUHRcadiDeCIWWDtCyzYDpPTvmcb0(nO=`_2drX?3CSrfMf(=lHi$dw z7THmkhS-K1+;iz$HZ!d~QBomgjr#Q3qPJU&M3clFH9pyj1u$vk*X zX3m!k_CD&`1=^%HwdNH8pYI67R4o$Q<+d8aCi5o=}k_LA9g3RF?yM3b>LTO4b-Yc_*=K0F#BM|v;aQMS*fI-1T0g=t#Ivg#YR*{Wa z^U;;r$H{EyC*=(f;+{Q&J2X))MZWkVa&QYksptN{6A#hcP+3I4N5+uVDD1Mgxj=Ci zLzpBQ`In?qrCz4Gw}G_WaAFu|2C;myJc|W;-urpYLdkq*RfrqGP$pjzQ|^{*KN0TL zldiFg+%Ws zF%#H^fp+`tE`{#3qL-VC9{beB$JW0Ij$ZqCqCeiDU6MwciB@=53FMEZORqOE)U$`7 zyW;7JAwin=E9(&j+oe_~F$8;^9e_ytBH`BSJJstPW@m`3EFztnUTJ8VE}p}OxMe4H zDZc`7JVsq=&R!6l48G&IE`zds?bDudlKwenK<#(?s^0JN)ncnd9+lHtLeY_?N{z@YR3SDI%#}K=PNm%WDjm8LTTPw zBM5uTSq(ZUc;$s8%!cB#gCj_kKl6Lvz4ljnF*g`uNtIp~Y_$;(98Z1je%v+F2So<* zK>{6L;$q&fbof=5+=}*WPL1&QM%)|pE9(KTKW4M(>1Q*5y=4w8lAX{IU@y4K5<4gD zJ?U-(_-1@F=`o`!rJMp(3oOr6j)uP2$l+zIoS2a10xYm{WPsq(V3T8(f+yvBeK+!vicwb zWq0{crk>T^FKVls5T5?MEN)89$ndUx^2Z|Vc&g?4h>+FUASB$R)t#DWNMy?1f#|7n z^JmRfpl~QM;*E*mvw#4G>%1c7BiI>-jx;_F+BKakW!c-#T8zy8SG^leR2Vb~ z+!6$QtX&0#R-C{@)tWX694(*`LS+6<&N#0Xrz-XIv#i>EHvL6%AC*K;XP5TFD;)Yw zh^o^_MhlBYe3XWIoqAo;sE%jt1IN^(HW{CJKKBE89txf34C}J6_ACdVT{rJ1(K5P> zriUw}muSjUj(!*#6v*lUI=?ZsO9h0AwqIO~KMpkbaGTXm(NW~%{E`7Q<&mAsLTWU4 zat=jrZ+j22Yq;HH{uNXlnp$GPj~bJ0Ofsb#d_m{iA9aWj6!3)BRW+tq`Nmmh`q#qv zp-FC>!7K{&zgorlYe+~~@4trhFzEl+)MQ?7|DhN+0=)eTh^mD6VUQUNKVv~(#F_f%+!MHiW zh!VJ^vlaCAb;K2{C9^FI=6A!9qOJOmO~j$SW#PGE^t)XjR?Sx6DOqTXO(6J7&)(;AicA{*ZM`^>$6W6@z{;+FDZ zlK~}sdR)I^_9)n%jr9VQNf4sE(liH_wVaW&Lh8Y2P!c*mcApET^e14v+Ea#r+hw#| zvcY{qs)=B0u#x&_CvYsu2Yb6ZIbkn`a22EixBtr%oHm?%fey%FXII+H1(4NQQFSjw z4oL`BeZ)-__IzhH78j=v=9#wRm`~t%sV%^ORfIG-fq!@tK{rJV3~Q`NrD>EUEY3j+ zU+@51+>#Z|tPYg^nf`yU52GJF=)UKDhWFR44pzVSs_SgV6kJkhf7-X~(|fm`E~yq7 zzWwNQIah?dYkKLvoGFF8fyYj;U20%$zfxyk7uJ5UYg_!2?xXAAZ4L5_iQ;_&>&O9- z5;Iq$B}vn!E4`3+un)fnzB&kJt^&gDk=VkPLy0^r-JC}Saw+%_TLI|ruXWaHm^1z8 zc3b^0XPI5<^0D&*E5C`;WYW#_bTq2U%pl*tX3BvyQ_L@6_&?VvbDpsp zrcoM76ewXUSo4<~c_kS(R`pn&;Y>R!>A{ej&y;m>)Vlh*y zc%{ED)2d#c#h%8OEz`LCxn8itlG^YR{=07n_c0l0jVLOou4LO%`^<-X7ZPspby$O9 z7V?}$6y*5_sHLV}Z)PKyYmL2F3*=#i**~>Q79qfi(4F>P)GfOm2@Lq0rmm-wxYyl* z&hY(XRNzn40FN-@x^FW?hNv(|%wy(PAStq-Tc{PvqWfhhiuLu#jiZGApJ|LNo%#Id zt8cZ(Q5!73NxvTra~JS46FbM86M(H1J_FP8{{9)PT0i5bdispt-tC1z?P3mbRne2J zs=G~#4g_!1;p2PJ*k3_v;c83{-@n#ju+ZLgqNI|tIHZBDL}1iDrznn}$}e$o--La0 zmGnLhc=IWBGB37=J29#&%L8t}g|85pmZ-8HHu9dfcs&|FWv zJT%|?U1@W!tRz9u_VMYwkcD@PH=LsVe3xG4$ALeh3CME{<0TD)evcv>Z%CQyE7%Vw zjC{HkuZEwXqmL}0V3+M z^#UC2Zm6M$c`PUM6rNw`$G8d7c8ve4vN`@w6x{U?%mq=-1(VIi}K0?(*d-sW1c z9AQ>w-42AGC+*%`Q%NOJV5MLx3Es^l<-s1yhV`4t%p@X^Oh;l>J#;nzBns5=*=D;DVzAhznXj6BhFKaz}yM4#$7@Y#8Vg z7IL1V=jgfgSj|DAhN~v*lKbFpV&H+q|{2Zs~ zSuARo24xAiK8oLJaQ;msSwy=KK-|O%E1?AHO|h}`(r~{i!+{`IP3e7daw`;hzbItC z@PZpzu3I_IhG_)v869C4OHLcR@zqmmr5){gRUUyk7o%8$Cdt1<6a6=$5vlNIw1LT6 zc8qZo>=KoB1&1wlr+3IG5Dz_2Ua_y9dJ3-QuZ^0fZ|`*+1$a1v?kl7*?tQK~TX3oO zYKTW!6@C$5#DsIPD5m6GK3Y^o07J@B+Jku6bfaeCG_1CRnq8lr9#m*U^=FO?RIFXaZz zU2SiG2ha-))yD;9)k3>pu{mlZhz%05xn_X-MPGq#K2>%=g6vc*<%8wS^x9spf6t}i z?B!+G<>^_)!3q5|WX4jk5_qD!VJ7JIboBWbDbEvtPOEYXfvtoOmFt~-<4DR zUEkK#Afu9T{dc)niuO|@vOcId3-GW7S>GvPmzj4+8oVd}bW?bNn5`Z1?UZT`fu&vH(&&7u;Nd3=epv!H*!v+b_BnZmqT zwdLgTsp-RTbEH}cB@1^Bmz2Uc#o0b(zQZPLwH-`)E7*?yO~NhNOH=2HgYM7qU@4@o zUqFycVyvaS&UsU~tVu*lhZrF)bxo0Xu;}Dd3m+rrj-F+`%{b{9y6%UGhFH23?y0W? z03jkT?}%QSUc6DW4rXI3r2of6l>=QcqPrZXZ&Lem8T9@7N6|hD@&qw>ewT;}_$m0W zZ`?M4Yu#6FdVHs>f>xnftu8m$a9$lGlup{8n1}}{+}f@B%0$MP+kcJL36fzv!;_$U z%>U97m%yTymr`p!jAD#|OZjtwPz8a3OBMOLIqK8eG?Plvfr{E&7UG51jf{TNCm)eD zsTK9E)C~&cNG)$N+clfhBJOWO(O!cZ&wV=x7zt7VWyiV1Q3O zgJL|ZGyCUYv#DX6O(J8;@J4qCk2!2Fd9K)`u&5>ST^zYpZ1dk=xy)grp#jT_=)89u zZXlnPNqT72*D+Nn(C)n9#wDYq;Yde5n=MmtpY?cVMu*S6F7$YK)#DS@MN5I~i4jYi z%0Ojb3tPJI2J3BGTq7bzt=r_%K+tdTFpjUZ&gfxTOTn+Fi;;x)y+G;Ml|JpYF-b0pO_Rc69DnG&xR8WV@TA7CDI&6828 z9k}zNh3+R$hqfJ!di-v#$OoA{+>ofSg9*|{Z6OTO%m+gS2YIPX_^ z`xmc}-$LCX=R5cEtZ^wxlSHp8Nio<6iBMS{K*A|(O*#}{VPgYuBySP zx?jtGrLcdv79#LwD@NeVt)WieBl7%O3$n=DT#s$*Ov}pYA)q_&cz;hykn!=ng&Qqx z`AfuaJsc9=bi~bN606QEi%vr%Z|BM(VAZ!B`O-($qkSj3iJ-m)-(P*Gw3?M!#XWWV zE@%LQbN;zr2*#dUsfEP8{;ik6X4`Zkhw#CjyIu{zUO)N}VEjujW3C;Ss+{4f0oI`o zT}uJd&ivtE#8^ZUsb&JL0kjurF?&%bS-%PUf@2Wymj#nahzCr4|Ma9@$5sbuyE1a9 z1<9|#UqG<&to{_7^1;Qe+wYuR(XTy92yTE@%A=$b(=XhzqZL8UK6`bud1Vwg8HsgQe_v zM!VKuEzgqWb)}@ABbV3e)skdIszu4p+3ZS&*Us!$S2r~n;+adx!_?#3Os*1#B-2sJ z17|u?SVixfBj={7jTWv4a%p*k6AXhEgpp6LUmG>6bBIZ%9@Dri*g#SwT6(P>jgN8k3U&9PFDdbp z+Y1x}RB$N)Eb(TRT$2bai_N-&bSK};}8bkvM*@qUnQA@NmVch}tRtmP44mEA; z?oGM7m9eJI1_rO4k;ZklqJ_vHYFx$@@jW5&-Kv~5<>BRc2!H!0WRR$*wl9gW*uYrH z@JcfDnv$|!y&Lx_Lc&Y0&B$j7~TxwHrg3+X9H`VXmuDFT`5@0wG0| zCb5l6zpvB?%(eN2!$rjkqkLxgm^aAu))>63&=L#Oul79|H!1;4=sA_zG!acyMKHve zB&^UZ)_T6x<9CmrOk$KLl@<9s=<+9WF+Gs~lhatuCs$9Hc8lZX^OZD44mvc4x4j|q z$%5DiP2PmQq#TyB(PRpt2spW0v~tan0Hec-g(YkcY;~0dS3N`4E^Jy^ziae3cj&d# zfllGhU$kMdJO}4soCilmjh)9%(L~F)t{|4DWCcpO}Jj4+M7C(O&HoHFw?3_Qsdq3ikX0KWuD=!j=y1RM*1zQp< zUBkBPEz_pLzB_2)SD)c$&_g`vrW8MUa)e_c#QI7=nJZMJWLdIlGuTmrAdil3-$p}9 z7ty1pAW-m`Qe-E(hz%24`o%2=Wd<{3g=nA!li|5ye@yBGXMdw*Xp zv6C-h)n$SQQeh4hrUDIK!WO>xs)nw3_0mP!HhrqZ!v1T-j~J80!Bw+}!<#B^Udmdb zl5Nz<-8*Q=W5uz0Y%3L!2hgG{VC5TN+pHNhpLl@c8rm|GvZ{qTgR}t818wh&K!;v&6tI`F=a2 zTJq!Dw$hNt`5w4V%{J_$dlLu0P&1Pe!$4R+&soJ_x%?UyWdRl!kxuW~Lp$fsp@V<@ zL1zygfIZ_-5^Ph-`*eN8EiZUO{l~Xqo;}9%%w2J-^;xw5@c*`1)9L7@O@NtzNDEl0 zU$iN&GK)F!%JRRk%j4LFb#&$Yc}kHt9~Ex&Ds7zj3$`|1#az3OGCy7(@p5y~F3d>| z;{XphLhsZfI8bRnRt0$1uOOtzLsaP1Ca`bPn3csq33LPI=IO`j+LcQPkx-gW;9kr7 zOg%b?IN!&&Zl>Bl{Diq&MUM+}S_xdZ?PO~M_;-x|%^LJS{Y2%l;+BVT?)-Un{O!1k zfvlBaP7Pa>1tLtXc4*z;Oaxz=i2Wyw|M5KsXvwG}ptkk+l$sSBa`N|cc^QX?y z!<9-phI&<_St(m=u{+POu;P9h#d9qQVCsjx-w#q%&5t6fHA#7vE?^q}u-E~mv zY(l+w*twGnaTjxxEJ#UGCDWX~owu)EOWW~Va1FZw*iglvu8c!R64AnuQ)%1pf6F%c z;!jtkq-pf_O)cRXRtc^6We)B7YZD#ckCmC(Gk`Y-6@0QZ#pP48MVh|nq3(9=&#REu z%v^}odW)<#=-~EU7{9aN zOGcRrDW%5fS!0AD8rs~z58DivF$y+-+IWp z-SJnAwT6h$9TQ^H(mxPCbPMb}4XYpzP@#$?DMS98qCYNvYqJ!>`x54Y>1X);hgXuG zIfOYg8Y^y#aT0 z_`Q+n=I?d3$gXp0NGYDC;r`h{_*pw zZPB`zV``r9YT(bRzAmwYU+mfnGE}mX0hsaU?ew3?oVi9dT4By~<9M90sQX%kN|1is zT)&ZjvpkyqK+A-|qYJ0cVa4_D65sjw-oyBN{xT&=`v6uH7nT+1xIu5IYRJ1^V1@d8 zs@|d@Rdzxpg!S+FYcp+Gz6PP8a9)YF^P_d2z}TnK@^uZT;#I6}UOZ|l?Od~2?!}au z(o*p{m9TRB5FOit)s+~=O!;$RwR|Dkzi9_;#GK(3(b{|oy0_w@9cGn5Q5Y}pVu>;F8wAC)g(UK*bQzLulMj{&wlAQLL#VT4Nf zGZZQzVZwxT{=#`V@ND+%+13EIwxfT=^5yhsakay_bEnSqar=+Jj}^C*C*g3ino@V% zm~nEDnX8`(dv@=kOqnuDtM0q+K1zgLkR}`vPiNcQ>nMbtKg(CHpe&g)gNc)FnPj$= zA-u!K;2k6@836kiJ9X+v?b~;NBu$Y_!y@0sGs2|_!);w`Z2&hLeb^zmfiHmoCv> zIM&X?A@}$2!L%1QARn~!8oy|A{3=m%d-gxReJc&C@HpZWU9|dH41ZI(j)^WGO#pis zj99o4OSNf7kG{h%fTBz_*MO4Wx_;|~#J2ymMT=SeB9wLVy^q4a70YPci}mTs+0%xE zxdJ_4DDp-#D)Uii#i;fF_0w>g_5Qo6ZF~iY0N)m0=IQ0s@bU|+PY&L2HO-#(lhRhZFDY)!5O zyTc*#_NHCZPSbzpH1nTrb{3A?=i%&5TSSMKh@yW#Y&QO-dUP2-9B%*FQ0!nW^D%7Z zBz6kUYOQR>;@tk{)Y)=Ra~1CYrb**z>YI20LV@o;PJA`<-yj+-ST2Wy8T+3^?9g?iMo9RAQ*h*( z`JejXLX@j|EeYwu&kmFP7HW*vZ$>Rz|M3|V;bESZk144O6{N%1zLV;qhwve023>|j zdh4&#DRKG?l(}p<2sM(7Mv_>q9J7^aE%zQp{~VrM(En@8ckrQQ1SQFsnOH=~ zs+jqI@XysWylTb3_{YWEe0PTq<6#;vx$6I>3FBy1+m;dcf9!kW!J&WIHX{W-PCYYf zB7KW3p+-N)3%-I76z=n%f79xXpMsAaSpxH41VzzrWGO|4!K$SdeS6Wu&pY9RS|WM{ zTU393wz|Zjc0(nY{&THp^UuyHBi`x%&Gh>%QIHW~@j(1fzqRncWdCyI%gXbI4=0N$ z-HH9#*@YFi_|V@$HB^NB=k9-N9n<)m=%0(N+Y`PVBp*$T9n?IHg-=f&aePGkM?YkZ zZ2t`{9*G$EfMa_8bBxX}mX8Ybf8GxhY5XVsQHL9Zq~Cw|!*HWs?cj(grSQ03y?mKk z<*!3m@R85lGgaPto>q*XO~(!%0gjcc_mU5%eTzT(GjNf1{;`gJe7^@B-mzEc;vIso zh;^mNn3i6G^Cnx7hqNB9VWewRD2w0#V)cxLH27_7*906>|5-qsGgMN2JwOC4V^&a5f9YZjDKqR`F9T-eT}|u2d8=|JbzWAxvf9G_a&{EFwaN8 zd4fSy{m+n=TL03U5++Nm=;*-eDZkNRICsLy9e;*7e8Pk5Sm8bcp;e+?fOUcO%eWY- zLP#C9|Fmy@qG?|bqqMoQAk+cI5HE9We(8%X_Q`R0&`LUc;uMvB?x{dij0lSaV%fww zG#pMDZcFEdBPe-Vb}VW&jD-GvjXs3T-zgK`OPhS#5!>hMffCD!SL)YbD5exm|DE*z z3=ZyWTdcmZj8doPKkMMcAs@P}`2`2Hs*ekdbM^C_Idk{~H6Pm(;WgAdZZ!MP$9*=A z?=$}0cgCMXY`qe=&y;ljVDnSEo~+I$HKeg9&b|0OAMq8O%IqjcLY0$nx_yMO7YxTV z|CyD;r#$uo-ZMpwhmS)0Y1!DBGUxbXgt`COdHu*!k5h@dm7TfC5_DAssw?Zf$^4s+ zd4}Bplr|^k84knad#3foZ!phN5Fd2s&ogP5l`eAzVFEHeoQvN0t`oH@{+vAj{1~^Y zK{wOS^G}iyFZrE4|356*08Y`PwvFcg=lkqoe6VQTuLH54o0C#!lq$rK7I557g5&nR zml5OkdDAlCw>2cdKut_#ZO=P(Hzzn1NO2#c*!rAVRZV8Cix39*nLhm-wQYhF_IUw~9zB}s)vG6z{{6qDCQaUuDP&{Zf--*WII8t@ z9XpTB__H(mJ}kuIGbw7)5AYeaCf7t$G`SjCAx6l9em-5h13cpLA|8{Tda6IZCdSbJfxP0D9k;yVw)kVX z56xpTvH`6B*XD20x-mZoh;>Jfr%aER0nXkzKHcawx!(}mpYul!i-Ygjrj1I17GtCt zVXQx4ij?#?oTRyG)-hmRFDRird0iITxk>yBE!;U$Cn03ce;i zkS8A;Vbv*a!bEgn)k@ktZ8B_&A66XJ^u*`<@~-6bY4tM^YxBNjDJUr(WXUpTr%R_1 zPZBXPkKzWK0>6gi4?*FO*-x51wb}e2&=kZ{E5C_^N&74`Betbw#`Vyt6zzYqdXi$HSe5W0^ z!NbXnUX|}&^cz+}K99Jw7qH4MFXem^Hp36dfmJJDGyX%+qXMv5#m~E0oAcpZvs%7Q z@iE59vSyQim$0gQ3s!p`-MF686fQ!~PnsEM*YDr;mA6ail&?VXFgBdnvWXr{ zONE;@qeQ75REQ|^Pnh{~7oR^DPoALP`}LL;%^RmqkUyxhRCoey!^aJ@>>jMR&0Xhd z%KTV4tKQYmj@zM+vqSA3#2!5?Jkjp|<=^zAMPGixjYr=rVFh?;%+m|_XfsFVXRO$9 zs7mjCl&97+w*Tu!kEE5~enW{fW)eqhl8hPY)b3rhVazDHfG@1Y-f2Ol+k9->b>YM@ z`u*EJw14&Qv~SsBap-5QSjE!$o=KS}4;6o_CH^8|tMfmduqtR&y&Cw~v()N;(}Fi-1rn9_uP&-Gim?I<+7r)WSb9h-@IYRbHkM`i6F44#bXg550wtFmbW6RSio}zc_w*p24Bgf}OBYo`2jEUOBz2 zcXx4YUc_MlJLk{EiQVz2`Obq;JIeR#h$`jj8h(TL{?GR`PHTn(V&jCdh>H#fHxk=3 zN0n-nuikT%y;4ljrB7%Lf#$D|NrS%)>IQby8X-Xha24E^XAP3vwA+=4g32q;_bEE==$}v1CLL? z8~VBDDjB&Zk3^q=6Q&)UFg(q2JX!!w#JpIQmz=P*U1Et|L7dPsFV#d`(zlFI{L#Lh z7XCDaw&I}D16y}VjLyt(cCZ#sao`|&s9Z4|I#dQFwyQ@oza51YY(u1!_wk~Xvs6Kx z+J1@FFIXw7$zsQeLoarHpNiBh2Oc#Ri+-L;TbKTYSeT25WtR`MU`yyEwb z0iEcf^2HHX_Aq@}<1IRO;2hVGeH*vYIymvTue{>$;~4|-4u%yu)+`nP z>gK;c=TDxYSp!DXvAqYyM!c|t`*d$2JWrA0VRuT=n&q+5tRPCN|7Ly(1Jt!E3CAr~ zPrip3w`_AmclFQgKN2fNVKd0jgV?aSia#uIao555#C?yA*sm|Z5nZfi1rf`aW#eYj z>;YpaWyUm=8Zls#r%Qu&?w1wFXOEwv%5T<(6Q_}F*7=jCY1Y8ev=iq5Y+3#nejDRa zfeQM=J3CgeLMdJDtn>ux#fHN;oZR+?X}*Dd`>vnA0?xBl6f536lo-zaU4L#sOwE;Y zkW`^6k5R+kAK391d7{LtN4*|>ShpLF`fNy?Zv2UY{7 zkvSJT_C1unSbi$?eAQro9R1?#$13fXIMB`<1{l^q^V^^0KI3Na{LhV@LzgaI5GOXz znK+m;FMfsltX6s3yglo-5W)$-X_|x{$eNB)XUTw-+Gl9ZtR=J`_j*=1iCd2SI?%H@ z-;b5P{62L)oOQ9V0u<*oB#du)l48Yvg~oL#iMv`CR6f+W*Yoew)>Uiu2L*gcZdjA* ze%w^*hWm4;?||jd<4lj(3kMRH!oM8CG$-_y6CRS3GBUUbGt5 zn^L4tO$cWqfB*DYtKuTt&N)CtdmWmuz@J}2Hbs;B4UxHEo`1~yGd_~-T(yo;;lotw z%;_mniX^gnb?KO2#WCM>=x3Dou_6Jje!-)m?{=Zzr_4o++eU$pZ{hjRD^uBC*RbR1 z?^(-mKVG5t#(hm$ctKC-{qORHi}ZQbH)Itxe*`M?Vs%RVKnhvmy?W{bt3FeK98~zJ z@>KrS+IWU9!NSXk2%}UtGkw8}b@4~8G{@C}~vXSC%u|k~rZ*dL~ z&HntsqHFE9Y4_UA_#G-vZ+zd0Mz!xl3r9>5$89Z+sU3JM+kuF%|DDc1)kGuL_cFkyl?A192TNHuHKK%;C!V(fpl>eY$k&T=||1$s%7nw9$=JzA_7 zty~%Ha59hT)Tv9q{4$>Iz4u-^kKJ|bOS9^%S_O3ZvJ18SpsnbbFmVFatWguZ9!;J+ znQGKxn8YDlr;hLEamPO|{-)TlVFP8!l7+T!-wruOfyqU56tquwzU~R zZ)}6Q9#$`U=Z+q+O*24PKgYP`*pqPL85YFf@LS_UiVO#tH>gw*#NSLVF&{ZLB*(3k z6k-k)eU>N$7j+~3N>NmGeh?DPL@(p7`3!B4=3TGTy~g zr!0wOV5_mx0b9@gxJ`g&3k(>A8W}1IR&bgy<{6GVnhbF|hX$&K`>(~>OxX$2+2Q_Gh~yG=XJt=>{pdCUeKaSX=D#q;tu7&`N zRqvjW{pYa30sT15*qej6$UL|E=uLksk=hWUkn47D;lY4f1OS*q4I>$D6#_Dmq zgis7#ci5Sq8Jt{NGz8PJs!@$ymIu9sh^5SNVmUrC%W{(|>vrvbV#juGMZaF&@TwJH zkL2Drxifpy-g(p8JY%YN<>EzGO@#eV?%W|Z;&z$9LpRe8N3l2a{dev2@3&7nC>?Gm zfC&|aM{rRHF6w6bm9j|rmrkGYj%--#;lq%3_TV9}7vk>n_~QUcQzfGR9OIUG0lOmon&5*OsTD00h~^Zd!e4#XVlZxysMWlSw4I%_a?ono^^cxAKS7yP_=*I zyk6HdNxjK<9>RfZZMb#%6wx`Ls2^Y1QK#gG5TT;42LyhWv;M)47sK=EvgjFH=24md zTW3zU?TseG5F=ZA_1Q&(iqW)d>-@AUh#HoEd`;*?PDf7Lv$NcvUj-}b9W;1W| z@-pL!S0Ua&IGo#gm*Dt+JVQ|U{G$6xet(!D}zBO+vi_fhE= zg+B7bUZ#rV!r~fRQ(aU{_UBlu%@B7KD|~%fu5?!j@TTa;xLv1khM?=h`Ru)=!2+9J+lBY}U-!EXv&E-}>YG4|%)(+Uyqm@LWv?${jD{1d-n?{;jOE9r zUa(}hNip@`GHbxCBPa)P_V{UU$(UcgevLo$-pN(fdnadAuW!Q-5%YAqhb_}#QPx&W zVExNR|LVP+vkJzrviJ3Kt-LE2{NE5$`SPX9UdOU8OJ9b!MVv(FKXdGal)3*Gbi7i- zFS6~$#rk6<NhpuMM@O`pi zxW3P{T%HeOI)5wk|NTO>ao_n0UHyFj@A`W)=JOtJ!=lyF&K3_<56nNmrYCfs|3`MJ z7`HCX@0{Pzncs~mW$dUSQWveXn&*xH*E9-XV-?`(byrq~c{a*b@=GeQK zYkeEG^A`U+!#jN}a-Q*==}<<^nGtPNY)!0&7h7?h+lz?Ldpr6A{ct-0LZ~RLf{Q|M zQCHB$!`t_GyVq^;P8>Y$^?tF9-2eE1bR%`%;I}`Q=hKWnKl!=>HFTRCW&FJ5G1|6z zz2$WKKXbqs88_xTaWLplm^);=Jg3=i{M>RoJhVj@vFRt38snLC$jUx_{G|7G_9`Ot zs1qyMY0_=ke=(jXtpD9SPhlSK6FonF+#9&c-Z+VIYgG#%dR%3i?4sQMyZE4_Kz9`> z#2hOA$MyH=+n3#S@~=`Q%x^?T~TN1D#K4y1O|IYE}@#xs0qi&&}{ipPc z9#@%ef{Ti&|8~W#^5(Mr`ICrAZrO!3Jqk+AO*LcW>aK6LVo@>lHA#_z0$q=|k!wUq z9%R=fg|ydyL4QzZ6lD(TuVF(8jy#;i8Vg&2pP&qnm7$Z{w$RxF2W15uZ{g=t&{exp zF-rgOhcH&$?x%V>RKl(5=apW|di9`{eZQt_*RN8Q9(}NKssa1GAp+y+4k|CoBU zh*!o|{HmA4HdR8E6(eD-66F4mgk#*QI_7%rAZ&)XTygN_t?A#}DQ*I+dI*jG@aj*% zW~=G(b{!BW^IZ|ntMmRGIuKi!dn0c05mg7qHsjA<)LU@afbiTx|GTG4Ay#;8$trAH z&l1TK(yx1i%Sxr7ESu z%HN~wH(-1IDau&7Eak^4Sash)R+Q%-#~}W`WMQ!Yhe}X$%S^CH{k-~f@|&*^?{Z*+ zmuTLaO15r;#@+t?r+O6D;g(%EdjA=_b;cB0{M8q9WYY%K3XDH?aLat)x$ykN=~%6f zuq;vfe;mIT9HTc5?kx_L5HxUf{W#oB!ZXA{$b;s)c*U*avsdWsUoa=0XGf|2cpV4Z zx&Jt(>>$i-Zcd~5%P4vFoZJmb-(f`>uZFERZZcvPmjq6Y1rCv;A1jrS)%0&~-mSS! zW;NUX>uQDpO$3kb|30k5#=&_~d;%-}GnXk3_EVy;7U*ORRQ;C>!~cZF;XCX=?8usaUNi z=%wzhgC_KQPOGQRqakm9i4}zpQF9I-V3NN7IR5E}*am(DtGwRFR{l%{_}*9dIJ34b z@(i{&pQdWq?pvkla{(IDwJQ1u+lo&fJc`wF4`LVA{g4oR{+>C0QexhwM96@S^M(ZO zf01J^ejgReExRz$+c_#@6(LcFxr4EKGM$xR8l|{#4UIqd7l-Fi%Q1*o8WAeNqg4oD zF!bFoCFW_1QP_cG?V%)IKrGL8#p}Ulm+5`1D$P_dm*_r&6`3>oj-(}{r(u_Zi6yV} z^YcGf=|`y9&@XhifY;BztypdQHC93C_kRE^#PNX{-cA+{=U8cZ&_u=EzJ>yjB-0mJ{gOu0BS2G_~iCG`;T#fS#;weyv}7 ziQOs{9QI%Pjif2vf1o(=;!yLSu!y9fbe0ZTD z?|dQgT$_*X3VFf#e*s~JIJWZDOINA&_+FHe?=y@)udZD9<0P8--Du)_B>Xp49E@47 zO(uL-ux)VKO5Zxp8%ilNx#vwNK6};u?^@>_#QEJ!^*cAC($7Cd!`t+bRpPR$zk^zJ z3xU#h-ym+;g*AtRTJ+OY8q={K<{4J0Vyo;82^yTlPFlUlRDl-jq^c$4vgCgPTjKAf%h55(3 z8zy!gL>mycN&XJzI`U-W=c6aj#L90z&!|vRd>jU4^^|#taf=nV&G^~rq!{$KEsE#u z@zc}_p%OAiu(EUK0-BIICGBLOzuk=@@9i9w?nQ17uG5r)P>V$sdI(Kn8zn1 zCVJ&&93H}>DLsbanL3Xid$9&R+d=OharE293*ZQhD2Hiah=MXmMZ)zC$6vL}Rp7#k zdOP|rVD)|GOj+p2;X_ibNYNrRcFb7Bhs~sgapvpTgR?STeDMYP?Y9LoB%glr34Qn> z$DZ|5*)nD6H=IS0Idf*(v15lOC>aOv-kv*GPC9-13}wU4zt!-GB*u9tk{tUF`0hJu z^2QsYqk6S!G-(ocE$cLC5>^J+tR=K@W5;3@@6(2)a^2jOD_3ImFLp>H6)IeamMz8Z zO6(dm^UqAFu3fuPo7V43FI%)|K|Ol(P;%_m@4rI1Wf#^JqM(co6$#fHjDOI-$TGtM z-Lebc9sOF0lZAD-Wf#VfXeY0{NGl1hM zk=TeT2{S;v{^5?J`xKv3Vcoh0Ry=aOL;bd{-g18<{k+Zf_@;IE!xP)5 zFlmQZE{lW4_ELtDkBdVtafS@^!i@QFI^OFvOo9Bv!2OrQ$@cF-sDz1>p~T~wi0l^h zGxO{=EeI#8<1m>TRNdXedK>*expNy0tMoVyueqQmJ>TRU;M9u^`zs5`Pzfp)mHnVS z717SV$ceAIpsJyQ9b=6*WZi!`oD8v2>%+N`E)cJk^&)-_wgmH5 zOPt~EUsCRxPt#L<2gA_o-uVCAW{D9%5Iu^D|wcLGJT66i#Y5FEl zCehav2O8X$JVk{4XSRA9jjo%v9{{@8U=o0!t()vd~TT2cstMp}0%KyU4a;V-< zH7XNUsmiuDJOvQ8;a)g!aqqDf{ODxD2OvIdkjrk%_TZ4YQP0+(Bx%ypYb&w+O%ijM z)@c-HT0d4)($UQuC|5Q77$fnP{*Awkq0HqfQo>XZ2u?Et)^bFok}G0-O2qxQXYp^S z7F(QrC-6KR{$x2hz%T_40px@09;)AfDs}6v_4t{2{_&x1-{SxnRxO8>$JC7)a82ZmdCY`_ma6`(K)S(g%0K>}t#Gxy`&Vp^B z*oxN}aetHI<~55L&h z>KC$-h*pY$v{)C`D9&!6U&uGP|8Mp8_fGo%e^EalPR7qV6)FK%yG!=xt-~ES#x33% z1L+?+{_HrNGiWTW`E@az9Q*8HV0;Vn;WV%G{!q3{hv(n)-ot2GuMt$VP6c`yVGjIz zJ~;m;cO6W>_8(29>sO`vpW-7n>=RDCUuca(wf>|ApEswH;L|sY$ZT7r9-V_j>Amqi zDRaTRQSU!KT=U&L)e)zc53N=DaodU*mEp?o5Io}8uKD$*DcClC56XVJlktB$M-}-1 z-)Z(BN{Ow^Dh2y*H87gnhW@h?j^mZeK~oYcft#n^znNtJ56Yk@rCYtbi`_0dQEE%DrXn73LJdYdC_pyPHqU-<0M^D{gQ3~r74P}ufiLE6^0KBJAea<@;v?s zoND(_mzr%%taWm;=`rDPP2gmJ6%o!CbA>98-U5C)j{-fwIID^?K9Fd^EcZ2t7Go19# zef}<$s#g_%mk@pbW+z%RXNmlc!q2X_IEd>q7U8I_BhD?}v3qUAHt8sT|24~o4;N@i z=ea{9hP2Yj9W||US^@MygjU(VWfwhCRsCIg?aDP;iI~Ot$`ysfG^zZ~`Z5^0J^LTR z0b>U@^E@je@5x=NW1j7I=2`j&sP&}26c>wbOcF=q;iIscY`)^pcfn@6e}(cijEyuKGrZli>Z$I z?eqE{CqW4AJ-pRbdj;H+MFG?FIYSS*li&PDSj}y zAMBhR7?#KB_QoG8t?8!^_~W2@{z-RHGWS12g{zlCO!W^~cp!!CuZXv&U)Aw#5mdk> z&~9=3Ro%OEy;qepl8{0oH1Lx_tci4()Q`zK}1nwhI2;czlIackAA zAUP3dyNKt|GnS}JyM7U4j4#ZvmB^mALzU|a8yGQ}Y$ zxwKirFks6w_lIHr$6K_!q1-orb}wq5A{0n3?InUNeZB7Swtw#b-sQ`pw*S)UQ(ljZ zsk|O&&#|prtvm);1aJMt>Md;3PwxFOG?gbm>P z^9Z)Lvx>PNwUK$$H(z$|&tZeSQwWj3c;05eqahL5s{hg-Xz>)=RXbux%1c&{JCQ)gcruB2E1it@y?zmfb4V%ditX_#{ zMQ-VTuN)b??XzbFWkz(+)Kqx{S?g!21qBG!yHSZl;dIksI3_n~?Cn}GAKQ?y9SWfw za8RY!H(xdxPpmKvw$)L}_b2ziM+VHZThxDa(^h}rG? zA{TB@KY}d0s)7Yo@g`b*GPBz%fr8ItASNH)wa=S|ZS)t;oR^B15sGB*hV95B|Dj66j-V%RAZ^w>ko~ZWq2MiR0zA!o6ie|0BB&2una^pb#k` zf-G&7i)wZRL!^u_#;vk+%uLLy%HE+}DvZe?gn!`j^6@;+lxdh>LQrr&(AT&0T;XsW zQU=bQGg9#e7gxV@f)O#K059_hud2Y;&#E+$Hgu`+mbYr^Z{E36Dl80~s$SnlA7b1p zdRSc_kRFhU0*6ZA`ENocxXb}wCNKgQ2~ETEY}I;=W4xabcUR`nptm~vNDUoBNSb$Y zKPB`9!>0u54{iCS;5lpyCs%)~hcPcThya ze^9Azb_B!C^#`g13Xydq$c#{fpso=Nkut)VK$Sot$T@cGSTA{s6!NYD2P}K@1Qrji zTNaqS!i5Wad-ti|xS%A@>)VgRFYqyQS_bOCfocc%GQy5EY0`v!#7YO~2@@vtGG)#z zWureu@)X`I{0+s)ngYPYi7HeA*B?J_d{}*;eC+sf5Ak@l9@U;ii4%Eg(xi#dzhvoB zZEY>WDh=unD7Z->VDdmAvZ5h#|2uZ<qbmD&CmU&+$ozmMtlN>@NMftJdSI`%m^is9bd+CjF-m9I*ZG z`%rdqAY$9O;7-CWHjb?}!>DI#!BLUS`@X~@-d^jhyzZSpcGR2N^fl=}JLj-^PnyAw zM|J}IQ0Z~+nvU;y{n`~cW#@RKYCkE?Hg?EyeU+|zGg>x-oTT2s5(T{tzl?z+?X(O7 z$Htw3c$%!AoeI<5c>}Q@`46Jt7->O%269aO#}{_#>xk-=)Y$dzs_cQjpFPMgWkK0o zV!X^5@brPxkJ~CX+p*na=NCJR+=MOlURO9~25FlanHP>7@us|`ZI&UWcT#M2U7WQn z6DzL~6H~<{y^cA&W8NHZWSys^|6(&Zt5oZPZRGg3Y@ZDi$Ejw3d~C;7Z{4U763=qo z7!~&^Y!#lv1ByN54Jex5TmDTC)MdFr*}7!Ar^-PdOEdt;=T96Ho2icvNxhv5=fPIn zyyLLh#ZzwlEiDlr0Z!tcJOLhd;PV{n4jUZ@j_2j+{r(?}+(y11du2-}{T};V9q%;a z)tYo<{d#X$HMrunB@V%6KWDGXCJM=-M$6grZ~Hkj0; zp%rs@<67xI4psAI>Q0I|kN(=}4=2DqOm9ML| zV~+5BaYpW25r8%8f2zv*?$0EaZ%yan-LO)bv(V-w$Z+<%#8xKDYWsr!4I9os-N zOdZU_?Q>^CZc@ZkPYs*lcf^^Sss$eI=P%{{?+trxnZ_~rRI7I_n2V2ZIiy{DU#J+j zigxMD8T=mQ6xmx4pV-yE|MwMAr&soL-W7SKYOCnX=(RcUuKlergS~F*gX{WLvB&qg zPq<&3rc6*`42u=&qGKQ$-ka$+e3ENf77ogGsm!9vK_02KK|6N`!QK49FP_{1F z8mb)Ru|xxa{7=$f7Ear;<;qIm!u_%Rm9{fA*tamhDyyq?+?sxy9OQpe|G71zI`$C< zK>yb}ASUWz(^ABdJ?qUMIst!ZS42G0C*kPSzva5)oXtjjMg9Z%^QYeME+JMP%e!>` zf`=899!_=l2EX&U$MGgFgVrKn4T;J4O~bZMe8cPBzRf$lF4f;u4ktL({DgNYI4X|7 zvCJ_!xgD#gEs$86?07rt#J1%++@7huRLotD*Nek@JRHVl#bFD$w{7{~-uxdYdN?3U z<8^03qoRn0#wCVzS!b-D<1w-$h;6U|PA+aMw+)AvDG{OTsRKvFQO-Qd8C?+$nfZ{j z&pV75q3ldH=`!N5v(32QeO_jp@jEyY^zYlW!}|ix9FAqmP9=@@de(0#Z5iF6kBtd= z86R)iLCAa@6Svzl%{`nRFLi96Aauu?O;Y!5IO$Ft;JB6eO~9CQywiC@#(4vqcJg+D zCu9aK$Do5YvP1)b%=3R{zmXn0uFc;i{GQkv@iMVm)No3S)sIRwkp8m`cx>5j97~n+ zy_tKrNt^HOPsbUed3eyikwuU@(=4u2ja?*HlIC#(+BYSk)*zJ|J=RD9L@=&=Ug zcep21V(G!Y>5`@!YLLegHE`wpB^m#&Pb+8ijJ_khBfIy@_@6j#HLv!m zIfFiMEb7C%56Jv8bB4KbXq1{W;OX=72hM$_=FFGXnu-&zqrK0rdsFc7g&lyOS8s}U zY<2Dg%Uu843a>{!o=+7$tSXl{!P3A`7hB}3F7sw3K86@89NxayW9R(P7GHYH#?O*! zrY?`ap9~ztD!QR@iD6w1W{TBPgG7Wt5T`891y0wmT=QBNt?SLQ^UUyHlQ@?$XTZnt zWOuAyC;j67W7|E~W)a~`;c)$-GA={nx zuf{XNC_Z)QxHq}`P_N~~wdMXFkC?4|4=73M%KGJA!F}>=dy1d)S8-kmOX7LTG10~0 z4H|zuGKuHbz$Tq!{;4+evrg$pI6SXK{+eR%ZL3WDX45v7z_2a{GbTz>Xnl3=e>@1h zlZTFZSa~e>|IZ!2F;$RjVaN^S6I~3$x*W_byF$l6G+ZDDHEI43RAn6n{6r-}-$oSI!*Pp^7D+3f4~6M;gCCo$HS_AUkCKupD3Z%_|;dv-Me=iR^+-bfF@3yVC9Y*tNxbv zsj7-9Yb>&1!-jd8wPVPqaDg1u zr1;r!iyc~qOXpgalo;0KU}ozmbPPnp1#&d~VXR`;vZTOpP`07QVgfIV<`I%%MQ_4M zXkp}Slmqp-z{%=SRJjHiu^k&rnLBjqq?R(PT6c5}h~)yOThOl=Z}MJxCv9Hq&PT9_rkGbbm`PdVpn%!oJwDB%w*!w3?>D{a)Hyo zvHwbee~n51q}N_1tfoR!qRl_m|M*x|-)P|qN`RcxxkHB}Tml>-Qcm!|19We~`{~r~ zorq<74X5+oOI7;}q#RY%R*?h0ub`i5V+-K*>!8J_BpLCnd7$&(=@0pimb`$=tVhepiU=O@iXJkNX@6Pd-- z|Gi5W(-?#?xQ=*f!V_Rr{m)&yF8VV7EmVnZ2Iq^fM6n57+JccTZKi7T=TPR>poCJJ-`V4d$ z+t{yTYkj!?MVi27?LYC`1873S=V{xl8G=jnKx)KYzK+!?oi zbp_%yFZ$|Jii<5HuOSxMeJN53kK)kmcaXt3M~l0BiowQMLl|%BRKHC6FaeZ=_V>4_TnC$Qgu^d(ABy|MmS>|4H!CNz4U;0Ts&p@}nO zBt&GVQ`m0K!<0B(da65O93{)1L-gPfFIw(dp`?OY}WrA#yBc=1rC{v&_#c7tR|NdH$4W*+$JRgJolK;s!}#M6~-) zlaVP7zkDmu^4^G{hG*VWm{%#Vb!S26_Ox%sGJ2g)&`yxR>R;UL3-EPQHV!D_2Eyj| z+=K8T2rYq_-JE8%ZcbReE3jM?o3fU!&-ZJy4LcCe8G4WcVo_Mu8}s3-Pbh98%rk`Z zxGzO2#S>tZs*&}5bO5??<_vvbs))pc<}Ja=vSyQK6^C9iqsbL`{-t{O5qfd@Y{b07 zJS$Zg^Xxzr{ib6$T^#Xc>+5;O@yrLI4nFEB{e}%CbN@3TL&?Vwqg&l$S8?BttX7Gz zQdacen>0DXF~lJp)Cj#ix7GN+GJiQG&yj;T?(EOC@XX{^>G@x{(W_Jr;V8Dwm`anI zG{Uo%!#`k4FrFigP%qJgZk#z_#z>6uQiA8FZqlP`(@?GdR z7x3=CHxtLKE4Gie)a2IryJ4Esv#y>(Qa zO%pGe1PBr|c(5b{cXuWP4J5d0fM7uf_rU_eli=>|?hHP-yF0-dT!xvQeDB?Rzi-c7 zIcsOnp8Ic0^{=|Bo~OI3tD4DUar!*U_Z+|8CC8ULh|iM$(n%gC1(^JuEH9@0)w|lD z0YbHBqJ+^Wy*yMP-73QiSajf8bzj?(ioblGb08+vfkWj1V)&MFRjR5pje9H584K!o zhw4#2#4s!xdOV`^yqOoUsSe-FC z5tStuGYUXpdwKB?O%4*{D_X!adJRLV?~?hbt-)L{WE$6S3BVY(r_xh(nRI6@2= zV(mHFj|ot@&v?E6I8ES0FTqVL@cXm|Ug45?@#AXXMkmY}@9lCAaZt1%^H#?4a8l{F zQ5yuxW7(Q&muqkD8l5*D&63ZPjgslLfMsn{#ChbQY8sv)&SlEp-kMW<022w_$2k4% zRPivurp0pa(+eIywGLnX_1rsb4fkgeVUZO`wF|}%BF>rKS|CxsS|Kgk=83}$S)Ygy zr{x@Lm+6la4HL=D=oWM?x#I64bZ{q#+dNN|NJGZ@-;cOB8*^dSd2U;E$-XR}-qtCe zXs(jH>U{qYdG^zHV#-iimrPChviBEl95`tAs+(Z(@>-4&7F*kQD z82-T&@Hpzh&-7T@w~D~Xy4`h4V-y&(#edGbF9#Yie1?m7OxXi3^`QQESn{3yBA>|n z3xjf{ii7RF{#Qkt45=SA+j#uzzFrzVm%JVDFiBfJ;(JM4o{Z6O$n&41xdHa&wg5=5;;!D@PWSMwjU-u!tarpCYe&fsP(+(f((a z_uOb2w|%3bek)<)z%;<^gtE-2CF*)>8JV$nQsqRni75*fxjhD0V{b=QTZ1f9BAw{G zWqJ)!QPe^o$ayUCZFFi>v!xQd5w9OgZlr@uj>lD!d%-wvC5ZCx&rD8L5gUy&(b;~^ zT5Mm;CzObyN=)~>f3WFJkXpl~BKmrT&p6&C;w%;Agh@cx#_jby(H|`!Xd-m;Tz$~@ z3Jq|-Fn()-KW}Lq`0gTQoXt6CQ1c(6m**3y@T60Cb6z?;NrBs^a{H6`tYStDt}I#( zIs;D?{80R;V*&@>v6tAj5)A1dC^;{cyk6&d4%KfCgj2j}pO<^Q5b=?+4#vs1c8HZe z1b}(-JPMzfz!igIq*pF&C|rYw?Uuq^M^o4oxo11k?{nFe{iqR@G2(Kvb$(V9eNovS z$AHWig0JQ66xTN;44X~pw!hG(y8?j@`U9G_g=f~E^EN#wU-5x2AhkdkSuKw0B}^$* zh=;N_6o2m*6(n8IGM!pH`1jXA)~l`rimME~yN%nH0=Lt!MUZPUiQRILe8c1XLB`x2 z?fnb>mavf4R~{~Ad*Sab_fYO$bWHF_Q)bE_=qT~|4g)o7txqVc$Qxoz)AL3VoN;%B zgn3>>A$V07X|vJDd_XR@KiShnjUQ<69u*%fF&76yB>a=o_~|ytSYgf8Nk!q6Pl9H~ z9R1>7UCmoAx(G(7bqeXHg$dXb@BfjIl$|M)@k(IN+Dtsnb01CFo?lz02zIVXeE{1{ zD_v6TCX2xB+JczWHag1fVz-J80?Z4h6D_PK4FxMrHlj`4V3BY^;A!4+T}D2$*h6FX zA7q>z?_ti({`x6+D;mug^UC2?)hcY3ig>yhf|6^;W%;#+JCc+ z0+}Bex;GZ8A{IHFBIPj4z$uNTvf)|vc@b59Tv*J4l7S6?c^u9byuHQmq;Z|DlvxF_ zcGL5yuy#E~e(5OzZt^i&mO~Y4Spa@q?oS^yz>n$G>t{>?k5^mDx2Xs;c;A3 zQ@hE>Th0rqg!I_5mnM@>xoSQ;-Fx+LD#PH#`66b1k)-e_vOb+Ac`1afSfoe24Q8C> zgv^PDRrDBRZ^sVF!@q*5+dR~S>s@MmMjiCGhoci9t%gCQH7{N^L;;vJ9$bG40zA@> zA%cz(a#M5-2;f#<%>0x_`w7bdSbZ$Gc+L*^&kTDn(k7|ySBFMTPXivh8%r*!@@Lc_ar73_=!RuDzZPZ*Z!I5EQ& zSr%(dm_CCJ`GEX2yXs9B6E$1(1CX~?pZ=I!@|=!izd`>ZQS+qt)<0J=du}*KJl0$&t=D=oY|cfejx= z4Bf}ERRT!hvW>f4b@#VvNZUR(WoW7&77V_UaO7J&bQ6oz@te~}J~%${Wf^_K#Ab?CkGSa$ z?Nkg$<^XYG`zSb)N)nkE=3YaJUqTH2J$(8uorfIX?P}%MN)LT-�{Hms6N2N@__$ z7A?t2c=Y8$IBDjd_zeHl@KNUDPPjiWA_UVtYBQ-cC7$&&vpg2$HcG?6a_f-+b3J%v z>4)UXpABzFv5VmQLJDKz!YLZ>^`YPC^3%T0&}U{;&;zoL&0#C|0s&fWc$GplIOEP;O9w=G8ro)k47noOZ2 zO--gq0&>*JVK+1l=hNxk>r&Kj>bY=b6{5KjY{&E+=~qkZKORvPCmvNG0P0a9#v4br z;?)CUvFIPuNN<>8kLBYYn)LpKIm91iV5XQz|JwQaNsTyCU=;@stkfqpsnOyQ?gJx? z1A#?7>A^HvD3;3>k7nXdq)CiUrP{u_0-*1eE9iUcG4=wP=M}SYT-?_HpY3-zw(zDN zUCj?Oxmzgh)V|fcVxiq7^I2rnpxv-!?>@P6s`(5&>Mf5xF8CdM52cQI(}N@izU76d zvL=(rvvj=(T)$hhIVc}4<3uaKp(ojvVgLDt?xR%NE?y4m$Com-E2pz% z0K{0=giz^@)~tv1DuU5Uv~j( zh=K9wzP7~;@-#imE%AN4f@|J8z0n9T&zC=rtb@18xPt58YYD< zybFis16*^TcQok>3>YPD`#L>5AWE+0yKYQK?@EhnNh?cVz?L9QYM(uJ&jY_Wr-RM0 z4~3hYQ_Pq0)@7dag z-{&#trOV38f`)zYQ<#5C0xI+mkY5mYG_)!EuNH4!86yIGXRZpL*&#Q1sYh-$OEQ$V zzge~mo9p0^LYjJ^b|-Xb=w++$Pb#9&DrQyc@;*U?A6$VjJNeY;2!I-nI=|81cjdfs z%cV15qd6~4fnc#1o7}jv-hZp8rIbjIlNZ#zL0!t*HUTqzp#Ag=nx#mZ6{w}0-ya%> zd5f}+_b2t|~^ZP~<(YA8VPiV|fnmDNQd<$Qvey#FZ==0P?+;xYT@m*e(By3cU$T2w; zk+$efjG-Gr#X3S<=sE@`wuPL+a^{&IIIh~u#R}SbuJnLx;gDRT7qZuD`v}Bk z4H{=XLi!um2vg5$9@s(J5Hp3I(E|>-};1I7NW~S#BR%ov#b2n(v+&!+RO=kV>V58OmQh2 z16^V>MOgDmzAi63jtS##W%3ycrQ)Nfzxx=I`dj?=iRN<%)C`fZXEYBNI6%w{X<3ol zkl^87Iy)>^*2Ki5ODJYwplhf3(SB|54*Hxf&X+gxi`)o;!rF{4gqYt5SH_-CbCSXh z-<)Z^>GY@R{v$PG&sXIhlkFU(rB@Tk>N0ytFe;^XtOzkWSd#+RQQr>3wurZpaXnIH zeP1-_H4*TFt0iBGb=*CGmPG;E7`Y)q(i@}t@fI4V+bs}5$otc!N(0aw-hKBMN+1=C4Ujd||7qN<@2+@rDNXZuEfz!TAB=g#gA?4`S7h2pq6 zXugP%TfN^;?IaMn?17fEeiQ0B8nnfi^fXWMrwk4d8);rUTax3rp5oZ!=P5k5 za_qKo`db>(-~30JYW(cEO7qvB^a8{DTBwrz9zROM|6gocSlEi@waLPvnxvSaO3-$Y zL>}Psmn}OqiuZrn3Vua&@P9u>9-WbzghHD=rJ}PE`)6Co0e)a*JO3Rv(Y72T*ybEx zhm@)q<<;tI`2_6A?cjyourK}ec)#*)AKZ}pmNXIN4?(;h-3tdc?t%Zn{on5AGLa>m zxq8M+TD?~7qOBq@4G3a_J2zx>+|SAY(uU)ZfUe6ZJx&Ye?T?$ zJ7}0WOIqfg{V#a`JH-CiSo?pfDQ(a5y!zhsypTcS0(ul-UH<3)2$;HXV~+c4j9&ed zE+)bM^K>^NWFsoB3J8v1y^>Ba~hv;r_aSDV|8r+zPphR)k65z9`VJAhL&LfrZD zGxgiU?Ef~Cl*29X54Wj{_6l{^HDHKrEB5s&a6WeaFSPaJ0|8k1GuqMZc!s|gX6YWt zXuvHOLt&o667VKPRBBc_H*2G2F`{5mzu^CA5)3EQUCA| z6-Lx~JNEbp)ml!LNm;(;eyPmL#UrEIMFzHcgOW)3%|k){Au4UXafcKLU?K^bV8A5$ zh-nI|{*L|S{O6~JLTwZCh}>$!VF4BaG^QKK(>Jxlcl@FdXqK zma~sTum1_X8*%WL-j_V4@8}|+LnPdO1Cqik*(RY$T)$uT-I~21@7ONy!Y}=InjUfm zdg}ibQlQ?2a8#nd7>Q`Q->4x^Jp;(bOYj*)(|wxi{Kj4tX`BA5_J8IeTBXG!=?4;UHm6rS zYo<$%>_ps!#T&8sQfzVmd2Pzmrg^=Q{VLbnw8&D*iI@MuEN$&Q#(xy}xDfpO53~fj zK0*Ic(B6z8SJg92_7T(MC=8jkyOIbcQS?Jev@+|jh(6#FH49_F;k&NZf2O~J{h8{& zv-Y>)En>a}_kCIDHsBKJ7GYjt$NC|sLgX>uVu2?W&%u;7^Ys(5W8Nfa0L z>@7w>CS#j?41L-ufF~s>?elpjN_wVj_SZ#wH&=#~^XsKglLIBa|tGl3Gyx8=Ni0$fJLQ^9|Ok6Ht03t3o$`bS1$ePR)o=*C6!jtWtwC7wRWlisYNE8-LR(>;+$s>Gn ziNJXW&74JK=(wd*{`j`QX%_1JAVy%g&xFN98tBAh!pG)=Ct<;43o^3ygiAn5&or4y zIJ;JQ@9QqmmdqK`fiX6(V-AhsVr9CB4)>EhJY`lCatFnh%i0B4{nZi zb5MZnKJ`?91Z}xoJG30$r|hzS>TC>MQ+xZPWWYfDmN&s^WeI_?bsEF4=7)Fcr2_EW z4VJ^bMkaMc-0hDl<4%EgmTDoOb$AQPpXk`blFLt)o~YBwS2XsvuW$%iCkXP6K{s$& zCXN-D3B#PmDuV0Y2%GS5%gBCGdkRjL5`|wjUWq?~IaSJ^X3{@!*JqKcAF^`A-_{Dk zKg~_pNkk{g@!zr5>W{yl3;X%eKFjj^;!ADw7Z3KG1=E5 zh~>CG*&xBtu<=Ab5rnVS-kK^b;5c^-(940j6{gq9p;e}ZcR=YWxVv5!fF5;1d)FW; zOZMXH4GoAo2s(sx_m6?lY#la9AVvsZLEsPyi;weyWj?LcA)^mr){FA`JmQ90GpJHD zptqHCY%S##D12-)BNBWB8yW?(-94-sDs9W#yG)FrXn2$&a>g=P;@LyEGJG)=cG)`G zNdvJom^;NosDo&L7eek#iVJrMX`qF>J-B%1uXRLMXgY`y0olL)9_f{x>$9hlvhA>e>nI5%3E`SL9ftdM;M$o4(ni5I)sc$h=VOSc4 zPt*yx{dLgHJ~)KElS2~8`X59JpS+cpka!9>x$afRtVKgkXsG-SLqP7Hv?oSEs#&Yy z`!J06+hjPy%#4JR4AK(v;P1?;|41Z&`;DOV%iLSyV&$iWZ@)gP!5R!&C;IqvQ>*X; zT4vZ!%)fmC)ZR&i8D(J+`!Ro@=Pb$mgrWEgmC5%zv-HCg(x3H46-%qonM_En&7(KTZ6Ec?>9sBK2a|dSoB`05$a8V zdB^IRgjm=Ly{F^oS)3YKY!G>U!+^(1{9Y~*_{kgrIVDA&%5pUnX^Mi+|LlYv-Am5J zT(ULJ%KUtvU;wH7V(~ zHL9J(t+T?Rw-$4t&>vJDQEoZC(d3iQn~&}@3=lPkp!1%A66oGbr^cajz`_-S7hDlw z9N_gC71Md)v7rA1U^NylQ3`@UqdzY%^MY0#_Zyfghl@P_rE`X|mSG%3DK@Sj+j z*_})br+D%n`;#@kUQcY@#Jd7hDVEax!tci1kl1;gS2(llu^PcZ&HFNlkt-|)!X6Gb z%&NYiL6<}xNzWm(?#;glY|0jK4U`Gw7c`4APK~QRM9^HoSQssb@US26uX!Np+D^`K z8s<3=ur{W?!~~N#kfJ2PdZ2-)Jr-FC-x<_U!6(Eadb<~H8Su;v!bOa)xd7GB;YFK)4 zJ3gPggbstb0^+s~CWbv!{58!y&=|K$Df$=%n>Rdpoeb48HFON04hkY0VU5B}8J%ai z&nX*X*L9G6-+gzQ#W6)qi3GdXYAa%Z@5*dLF8Av|v{m%VVPZ^B(m4vNN{0$%GUMWfU zSor1Z)utG~*uDwYr7nWWV%7>`rhkxn!8??{j?wtma_&h}G5#A16dm+KF~wZn9p{A}u)ARi&AnrW;Pu^#|jt{=YyKFv$ma01~xN#0TiHJY!yv%udde_N=lqnm@d+i z1N%L{|Ni=rM)z-Zm6Wx%OC)QW{Ssi`$B&(Q@R#2~aL2K515S-DQ03@+gu^zZf$fq^ z9!n{UAl4~O%Bl)3yk}CO*a{Y;n5+-^OjrQ zhIZ(|C(oEoJixtaz8^Bs^;`NB@FU#5(lH4{k;Mu>d9LNWwI-v5o@QJPwV;iS1lzzS zP&8fPK-7S66s!~=qRt{AXlAklZMCrmnh*)@N(J!(UsQ@GH{!_t@_{c+L-*22tu{%( zFDcbi{$((~LJ{_ZYcwgs@ajV&*GHD&t7t>}Ydwc7?zytWRZ?YksjoL{f7crTvATd5 zl?~j(T0CO7kGccKWxmJu1>`EO)!# zev2xRcAY+{!{82Z6h57CA{p`4mKp)p^+zqXkx0lWEAF*<{{I%F(|aMi{pch3@yCU6pZ$l5!9RT{VWaV_NQ5WH;Ycx6u|!>i@(bH{~3k3^71r1Dy69*o}Rs^0C49z z{}t1s#8>xCP2cahuk&fzOD0e%s(>9j`TI7-6PnRsI6fb^19mTrS_P3mbTzul@D9AY z*I?;Wqx;R}q&CyMD}O|ulX>UxMt7!79J6x4jIna7RFU>48*fUbMNtWQ*;Pt38& zQ9mQRagHV{L*seUk+-D~GYSbE^8!YJMS;&d&YPT!_kwYZGt@@?n#-a)1_>I`plhjc zg``3YiYtI^|9hK<*ZpOB;bjKVz2OouFF5?=>kf#D5#`Z;Y*{D~%Zo0Y^$@*E%gyK~ zy-vy(>m-#BE(nk?Y!H$9%cwh*D`$N!V^2beQgX+7+HWP9=2@KEN}TWj1a!YH!Fe3+ zU+FuN{D%yM(tRRNd7-DY63U?8exVe#6Wq5p8v(uq!0@Rffk!>`$f25DIEzjTD$gLH zmAi9BQ2ny!BGW}Eb+|%AgX*O{Swm%dc!Zv=fp>sUPdpj21cjK8mvte%GUcoI&w!k!mtp4aWCxL5KJ9M8x1!@hIx*-tZX3dBMH2Ea*rMMr+ zEmK-w_r!x?Koc7i4nvLay z8)S)ksY$UUh-c#Y_I&(;p>w;X6hjitUxtvfzd$Y7mqwQe{ z>?m5ke6I#iYk3+IX96`85}>FixNn?)EVV7AF#OK5-7pqNGoJ0QCx};iUux0~L4>29 z`0v!1cTL=gQ&LH)#p}@OW&BeIzk=$8GolQcj|DeCQ*}6j*;jqX%S{rJjhSOlX2+<+ zunfgE7`F#Vld5Ka=dI@#0c3?!zg)~Xk~bhjfb5xne#@Q@?G+yC6owIG%@I>&vX>iL zdrP1{eY4VtcxqtxonTB9lR^BN_sDB8Dg42N`hU&^kY8$CCZee)4Zp=}uX%y(Q_B*| zs`GMo!=-Y=gS3O|Mm&h`>__hIo5RC2jZ;ie};hXR#{l!rtpm|CE8ltOb?Ha@mm%BKC$~K?< ze6bw1CD;PQGMuu#v8DN$aCll1LWLS$py=Ng1;AAF*fnEqgKpVB)^yjV9kVZ$bv@uM z$#t+@fGyA8xff*;m+G~>d@!PfUrruYKBzz(+r(dkk%wKyhrw=;!LljDaVKc0-qsy9 z*i&}Bz|Fs0+7NcRrG&R>a25hboCy$A{v=VMpCn(nul0UrLc9+*fU;2_7EuqQ;xvEV zB^(;57=fy9WeF;~k@i6Ll;Y>V&TMQSU>UuLXmSK}Ei$Aa!}le%cVWruH&t@4rh#%W zc;HHl-i`@kYx0l=S^Koy9KU$YToK_s#+c~BeQb3BviD4N_&_yzlGeM(gf5++Xpm_t z3%>7<<771V(Wxel16^fD)Xp&`_J+pBm8?VQA39HDsc~fCYb$3j;9QT2EmK(%Ef-mz zAxl^F_VQhcb4I~R2v1r~`2=Hq`!JUKCk#AGE?#)w>V)-&-1{C1Fr%rDNBxXQS^dIV zrv2KYL@>XZBuN7vkwLkuh@*=}!_eScVrD*%1J>E{R_*|PEc3KaUW@#6aDeY5MgE4= z@O@p0@v_K@@km&lgWcF_<6!Og(|X{ZC>dIfV!oJd%H~>JW?O~iph#r0!`p8-5xyJb zx7U$c#_%3Y2mH^=tHFc~6H?Rhw={RdrsEAmAouUCBlPGJJIyY}`>8^e-n*=f8bxnw zA)yN|%?2HuK(JiWPgL_Xj`Qh2d$+*463w%(H+Cbsl?IC(K5z3br5RBI(`vL^&Ije5 z=6snQs4Y1D)%Fc4KU)GO$J;5M8ibS`f+QbL7dAu_g8;FyMYSkVRp!dgAIwsR(=dn5 zLSmY2qXTkIJbyxz>>h>5?lx;GF+h|$O_p!G*A9IcHH+UZ^Y(sTK44VXM+C!KRH@K&3gy#&Qv>~~ zANDjIvU7%*TVAevvqM+WzU7TpyU$8r(Tw(+A#DuqlLfi6_^^LY{6(qtWa3U%4pxLR zs8RMjaun;^UMzLSjy;@xpejC%AYBW(m!&pkkuJNCZKp(@seT?=4cG827&e1*VW+qwpD;nb z;It98)XpAEpU*!%$U^JzS7w*56^$@YO7S+S8B&#t2GstAm!MXK^YH89R6X6++H`6p zAT$pLOLqPrvl-ev-TYD!^O7qo;A|SAyjh@ahH6VRESz**`|)lV93A8E!65f&Rj1Uz z!wl8_p9(Xm8}RSmmZ1wX4J-pIZ1CO!?wZN-Bj|M3VF=XwcGCYqWp^T$1zJS_G3 z$eRw$ZmH_8t7r4DU zSVR8fSP*$mqKfZ=Hex@X-DApj71M=Yk= zm5s1+m>wlUOp;o~^{a%o!bkemEvs3RVHk?X;8+5L5_Kbo;s@A~X7uE{ujz2~EBP0> zzi|eig;~m}ngsjB-rgK~vqX4bf3$QQARfQIw(KoaZ$n;P0B=9Lg>D&+Z{AaERD#B} zs(>s~hzy$mt_O0{Swbx)0f@$6bzZ&fUw*S5z*VSet!-_Tyi{@6wrS(uxdzl=+#=wkG+CsI^&CtioKB6>Ixk-Rq7e& zOy{&cd;h5dNS^<$D3k>b2rj<5_S>TN<~`DT)0g3#5w+64mma`z(n45%XUCn+PxcOm zSddIUV}XUN!ti2a9*qR8-hOgFrTSFlfDd178HTuaic)5GSN1|VqUN_MJF75?)Y~kDlN!r0!;143DdR5O1Exj10zX2I3Z^li_)1~NS z{NWjPs~GW{EW5(M6bL9o4<1k=ZuHc%?%~KyyaO|7aDpeWh+|f{{@t*Q@jcjeM_!xP z3~Tmd1!|LZNTStAuUYnpgopl$jF7euTY7jChdLrd)Dj?o`LW_rwnR2hCQJ1cOCL=L zMU7NBYI!HoF&j5OzA%s_s^nOj<@oo~k-ZGJ@9J&U*&)oMakH9xZ`~&1&Y5L1=vUgi zUy~&XY=QP`o;bUaRDotBuYHx~5yMVjY#IcBo7b3ve}1!MtzU~Y6e4*2`S(&(ICtw3 zILDys^Dh`?zfsNy9p;918E~0NmF==c;#sBT@B&+~)y!%OE?K0imh;G%+Lk=5{%)wHQT0MJeOkB8oH(wNp$I-SI?WN{xgLRQEBbm6j_tEg?y zf1;Yk+uf8Omq4@yLKLT1Dmr0R8ciR0eVI>n39p5rUl}#py-VMPad*58$1l<={KIy| zKc@50%vZ93u5a9o3c1YmLg4M;IgMY2l>lAn>v8ntkBio>Eepe%c(*8K8ZBQcE>j~{ z19>^2OJx0CE|hJ};#5u=N)IOZl7ARBEI_OpshnKdkwB7Fl1&zEv)ziUdX-|3>fiR6*OuO88hW zO|hp$S$s%Wz$rsVxNLnF=7wx$r_SEJhaOX5T}%#TG{OQ;>_?fvnD(DO9Qk_0*HrFI zfZ}pGuEyaBZh|-0v-r`p5f`mxvjmLKg$`T%@{%)OiKKbq@t-oDtKCm|aAIi zVckPb0Km2&ftqQD5u*0O%*)#b2Ou~kFMXN02`>R-h>pKjsecx!~9BXG>kUAsU zhy$gi&!Q|u`Od7y9I8kCqA$8zfWr5C+O_qQ&+P4#^sQ@zj){vQ=Tvhd*=$FRW9^)1 zT9=53y>m}*l&={vO5ZcYA5hJ=!oGDtn@s~BH!>9}dJZLs%a$o~N$uV4+;FuB<83wZkVBt<29zWiCg`-r$qe0WZyclOI1#_TO{*6rab$ztR2 z6nG4%z+#v4pn=S9|2R(bAn!nQcyw!;JL2GE%oXhv4;&qq4H~il63;Wo?eObZo;j zIICD+B^$5*uF#&)7qmwVQk zBOtpM`o0hMEaQ6>AgJ(pGz}?B6sCznEK{V%kF}7MWd0XNJi$2rt8+$Z3g)Bhf`t^` zG%Yr9R9|*B=^8h$x%Giw4ICl*rA7XqzZW7{fTE(S?)C-wiK@oJp3VeWH@%_jLBlSD z`J1`&I^2Erl*lExe#Alx8Bgj_KJ)Ea&rhBZyxzpLt#eFTxqGD;adgC;u3%Rm_VHSK zC|5TPhP&j#HTdfSH(3oHr}^iZrHWhQRtCIpe%*mllC@SDQcoR-cByT>JL$j!cd~|) z_lWM0(@u0P*R#kUR~guRdbL5Wr{3F~Rq~c}+u@Bbf2-1+h3S6!WIu-quvkVcvcN+(nmzXbeJ%o~`bm;J(tZh(h4H9CUiLiFZd&YfJ^om#BFe z_>gbin)-TvS-wrUb^8M#V%W~9`u<0{Q65C;+w+rt|Ie18Bh1at`?cb5+H>?;z}Q_D zeNPK<0%o_~u%8j|a2D0h(^-nVBUH?y-= z+0Kj5eZ9*;5G~pv?SAato=Nw_Fvo)jlCjO7cIua(U^D{#e4)hBVr04UY$`9-kD_2B zOmD&DIO$?!?$))BYn~N`$tDb^laC9a$R8e9n#Mf6_ESOt)iz-q!2sMcqi`Cbh(%o- zHT%>nb0zoU1^BY!w-5VR#Kt4%W+&X?bAT?*(khnJnJu3}O#rNlc2<2~LQZWHKnbYk z|AS~=hnAuyV8gJQ>qO8P@co-IrVp@@3?Ks5t*y*bU>ZXkCLi6U&dZD z9^bOOHvFhR`ocMcRoI#+?n`|YB7h&w`*DA*0hDc3`r}4`-MVO8nbPU4TMu!AxJJM; zNpp&u48_UgyIYILu_&Z-MgV?OVOZNWlv+Ghe(jN!um0B|W8@0AeY~j9;Xy7pxSqY@ z>j~AYe4YDqi3KR6m%Ug1R-o3~`FFxhtHj?=gC4MlR07r=s#iV^ zi;T4((M>VY-Pg`}{qx5sD_LgB@?VC-HV!iIEoJx_V-zUX^?+4~k!Xip&EGWgZE-ZB z+?4N_e*~bEIYHBR$G`P7WA5eV%KxNiP2!?Tx0)dPQ`fPWXlQ(p3Ac2#Hnp13NKtIKV1Y#>Tyy2E z^gV#R;bY?$0ZwOIpVuV0^pSa$#DDgFx02oHDE<*!)=XP_;!bhob+A8?MG}JA^I0mt zcBvBSNUX$Gqf-{wG6be@!}D>dpNIi9Xv!>Cff>)q)bu-bvR)3~dkVtdd&o_o@63rF zAWo!l>0qFf#EG+k&vWmasN8xuVo$%v;CPuNhZ?PiL<3NDePm+lkJnDSkMjoh%>?)1 zs!N)cuhdl3OLZu;{L%u87>hfHjg7jbSZN;t_)g39k>ZNG&CFE!>6`R#)fVX<0O$J< zVDIr<;|C{c*_lp#=wolJUISLpO9M-!*SXM`XG-#5Ao%1&a%;hb>T*NY02| zug?^g`08L20`Bm&)_u|WxXh1NA9`?5ZOEElL8o)hc*?1%5fVz8M}`i(u|?YcO0=&o ziQSfPk_|nm)VpZ7|1?UCT&#RR>!m@s=?y|j3Dy3}KH$l(M#qV_w+Er+mG1ZIsg2%v zDXQ2P=F|gRnKKP^LmGb1Z?Eo!L#TcotbH#0&hlmohUc~Yl+1CCN?-OdAP0FES8Y&p zuGLEK9aTjgMI*rCU!s)Z_Z%dw@PIIAn5(pnejg{?;uOKMfybrbuyb`wy>b?k?lBI# zPq~8-T!)D0T8Te{I?4D(vUs9j1i`6gX`&5!Sy-;Wi~rFslR&ZbxWe(q+5F;2iSO^> zwI+O_o+#wZcrQzivO=V#NaEZyr4il8774OD7H*LDS=MIMC>M?Gy)fr)e+F>sQD)R> zr=;{807;u z4IRdyyaA``4R9f%|Bc@5!&tr{Vs3+ZmU=Jk`&ynpipZu*{=tdY`8Ic1aYg`Oc|z)f zZ|(E{~X z=C1LF(*V8_%^Wdwk<&H&>#m2nSKcrDeMPmyZnD}G1<@3Jccs}+^?Ntw@1`pA@ssiQ|#00ROh4;t0?!y<>BlJY! zQ=6xY$iu?h2HPo;?%2Tg$NPneL5Q_NIpDH&p4@5q0775AbsB8S5BF#txMev)`e{8M z$I1bYUD8_W1b;oe0P+3mhg17ISdsx~JK*A>8QyxislW+^(5&B)riU!l>~@R_ayZ`+ zCqQdrKuoP3L;2qyyTi0Ot$Rq~mD&ZoY4huTwql$txizjfEz(AVg`OfZ&?@77iJo zBa!#Th(M*ujJ#ka(A07=7c&)f(xhw^LY4i6{v$5w!R37+>E+QKJM2feA~HGW6R)iX z=~qWL&ixbDK{9{4jAf*n%kA^BpfiEb<-d+`4?@JHBrrjm*2i&BI`^2d0I?10+t)>+mpTa0-PN2 zX3KtgWhzE4FYTozQCutz38e;wcKbAB3ug%5`YrpkXpC%FkJm%AM@7=a&aL~FwQ7oU zet<3_VjPwD7dYGi=jzl9PBHD1ysBl&f&UC)PtD>YeHn^!vvEB}CO&fBXq)(W;xLs6ePt+eW0&FNBojTzmVBv(t98N!&swn#OWJ=(Fp6y< z-MT&JlHJy1(+Lnh1r3*Qh|rKB#t@G;nalJqRM{r&EHVbKqI|FD%4I&4E7Fq3+why? zrYwWqr#D3TJq^6f$aqQ*(Zd$XXB>IcZH7xzj7S?3*%EC`>LGR?Wq$?ty?%T!8{BRs z%%+kqxO2h^d1Tme*VCAX`z$^ksF+wVi>48f4L{Oj#c!vnk7uMmBPd zPG$(Bdz#&b#hUHgmSyJ`SWs0hDXZ50`A)4%oY}Mfa-FKTb8U|qj;BUI zpxu#P#ZLT4QcO25O4AtJmoNd8IXc>e22kYPxu`@+3+1oEl=?HwBfXWozHmEzJ9rRu zxn1_UJkqu#K1J+X5JcX9Adg=SK~tzEMq>WdM5S9*`U0HMVfM#2#@>qpg1hm=Gvb7zmjx`ZV|kW8JhHI&m^Mr45QvUz4ocSj5cdn)dOZb+ z=EG;UtUmv< zvv=LEZg;Cx6O`2y3b}t9<9J_a7bz4L(W#RUR6cL_C`I<4e)%s=_z%nZtJau&b^m+N z{+}C$;%l_fE?F{O&mAlLDqycJ6}zeBv^^ywkc2~L*faU!V71Mw+#{TXS(fFcV-6%@ z)*}DzVsB3o@aP=f>8Q#uqmuRE`?8vnSX-xSB)J&jjHF5_bJWFwS55{yWp9{`Mp#7vW4U!AKdo)}$_eK+dEo zQkTNC^>cd(bLY`pCZHYNUmA#rQ*`VjkXM`7y+aQ!H4mlSE%Ij6ngiY%J)NbX5r}EL%=D@gV>&4VMPTs-?CukQE3Nz;FcV>aqxlK=P&x_yfCE4d0_V?JXkm&=PQYVE?-N=}1M>a}`0tQ_$9fr7e~GJK!47_-MY zts%(5MeH{f7-y{SI`FV+h28gulkg+3+23DH2%QWjl&R8rMlMa~mvSErSx-Ky6$cqX zCG~1OYt4DQcjoQ16UDovSw1PmF`;NhJ`U{2@5uX%2pabYnJ&Rf2zTMm755drwJ&pw zyh(yLn7MU())U1#i0A zHXMK=y@FujbK5ZP;`3K1IoypdfCt`DJ+@|F$A1g)^87@4h*A?ak(`Af@EqADP zP+hO?x0EWhTksfv0n{j}Z)S>v4{scAHyG~)b;N9cOBCTM6xfiEC#k917`JgTmqgsZ z9SN*+>xwHl%bVyPJ?^{!8(Uif^l zM6qR~cuKxyHh@KDoK!vsnk^kSvDy#JcP5VHY`9<$FnqDq+(zyDHQM^dOefbMY)~?- z>c*%%0JyRiaNbz$!vU+STWo*m4!~S9*8SD1e0s+3@FbrzQ)BzCI7c0G%s5J~xxR_A zVV9CI2qaG{xz~OE{bJ(}Vl|NElqoOBzPYUH92?7(X_knX)VNImKHox$-F^eEJbO6J z)?d#;IITU9Rg8_Ob${GC>YYvejiP0~COBhIP^+{0z4(XD*(2Vq>yvyfMvY)Ky4x@x ze+b1f4hv_&l~7_p@VVI(y(a7_O1elZd2~81N-%KMzDj(?8PpC+ySPSAtZ9tHt@7Av z--+&5(G5JL|D=$Luwx8EFB(xh%y1L|0~ewC+l3;hA@{OGUzQxla~_i_Ef2m5-tChb zpAsrzzHq|JrfPw<*P5oQjAp7^l$^Ip>jJ2=^mjoq(SV)Cya#&_Qo;xI9?-qLUd6^u zk}10Rr?hpLbP4fF{BL@bvo{?}1!T`(|?rYJo)I}!ZeFZlO9QGa5I`-?pA@hI6`&QWhGv3F|v9c^PQv?wYc$7~dR8Fj=*(tX<~;U4W_cTQ~pXXNdi2u2X-d5|wYR({EWh3Sn@uw}?okZ@GYGQg1-| zg}`L)Yzd@pV_~eM6*G z)j61_fZTOd1keg^LE^K66;E!+Tb0Hnijjy$ZulvpE``XrZZ^WIX-o>tj*C3F>CpAJ@^_wH2 z{cP03;zOO-bx=6^X31V3eVtFJM=-cU^nYUSEu-Sw%WDN*{62ZQ@xMb z?gE*Jl+aM`JH!SKyLl6jJf3?)n>mzvBQO5yw+f49KG!nWABE+rp6Ho^$LWp3D$* zau#zrEI5jiyIS0m;ozYC2cbgv9`u2yzoNIxq~!ZM1}f+Ce&pLrn~BFvceKp&c~0mM zKm3I#hXmAnIfcoH2;7Is!jiBuiCY@G|HNJHq*UD%xisX)n7qk}a}p0oQjl&Yt5ERF zQ|IxkkDw4zZWB;iHvH&`qF$*)&_sREUv<*S0|Tw_TXWnni*iaZwEQ6~TT>pL(?O90#t7e~%&Fv>>xo7T;kGS2 z1+cMh%Ax*pfuCJIfh^HR=#My&RH+2|YcZ*Z@mU*i(~j@B;3l4y+6A&L&Qe2o!I@u3 z?$@h};>aZnUGDy=L^^v8u$`+KKDBsh&AE7C&nmEmtkdUFVT9I)@K?7@f%Kwkkh_4- zQ}mpLoKD8MIL0l6GhPBd3a3k6(!#7pY=A4nmIVZY9eae=hp&*j!t?W0 zH!r9`3_+8|?Xuk1Y_Zp-J8mZA=JAfESM%`7BD+}_X)0Ge#0Hd8+V4b=NH;_g<2I{C^3d(vWE@_-idwE)Ah{Lf8%EHDw((@9$+>3s)}Wnu3fp5 zMTP~j7?)wtw)ljdTts9SNl5+?@LU2E!poPh16}z%yZ+h=YFC-v#L?9LChGJ~E|#W2 zgoUkS1gNqCf1gj&fViGpcV}?{_ZtP=y-Ht!ALxu^cl2N-Y15WUY8!rA8oOWndziOb zUY+;KKhu^Am>FZb{s6+>bFth^hVORotqI>Q6SQ_fh`9Lo-`rVEe$j6Cd{xBe80}Zp z`C#q28A;CT)au}Ncw(pdQ8*g^nTAx-^43yb#vwwPEr&5zmW>ibg87x-FKy$kY9{y!; zzgNCKNKa*9=zcA>h0HP2GtaT6N+g&n;6^rGrh)ZGwe3x$ecRSqHnE@>I4)~P>5mW# z^eGF@M@p~v3oS*auYT}Q#>7sw@myW*U4amB{Cm1Mu*#34<-&w={qCXHtEFPrR$me= zeBN`^5|z{EHl#)22;Nj`x!i8qOhp&1AI&$jWM01m@d#BtW^{6C63qS*0hi*RLw2kr z&{7wFz>Iq5zMX-!mcrdT*4CZ*Q>609I<4E(&Sfi5OUGWrL-crj6i-8W?$Vh$#bcT2 zFQ_{e|5T^Ei?0@$VutW>gg=!h`y2-dyC8NDCgp1kAR0Jy5pV+>IA>E3?u0XQ*gc2D z8p;^msaFH^ zHCnsb4|R1y7a_rJj9(AP`*WsV$!8@#t5KSKTOd^MHZrn7+fV)5MMhH_o2&)m`~>R+ z_&knVmRYoV9jwPvnRg66)_pj%PLBWO&^m{obWjF%y)ZnZA zjFjwQl_-?SYTefNxBbW-S1q#$f50kCoJceFG3Ljm>A4v@?xFhd&!(n`yz8PiE(&r> zTPW72X%ZQh)^QxwojoT8LM~y!X51zUNBj+F#SUE;5_=^`k*_8=zYD(~qnyQlQXj+t zLN~@=$~2PgED*`aSM(oZSlX!+en#wS8ipGlzPBQ^w=BF*}Roghu{6sk3;*z3Fn(&M3;}9p^k5ujG z8G`Xe(B-*=V>|!Mr10~~%9priFaOpV6Zzj(w0A;ZHXw?*wES26%ptubpZ<2m z?7ziWG`(m({Vk@?`ro=SH`vi8{??uP-(t}khA&k97E6i#Z{6a2F=GEO=#s-Hp}X~9 ztKE-t%_{Gz24;VGUa;A2rOEeU&tALb!6ynpgGnD$f@XQQ7aMF+=H3-Dl&Cgf&Uqa> zftlj0eW11A={dxgFX2K3HTd>uZLCmP)W`#NtJn|me6+h!vU`GFjWyU0Umr}-&pkxU zw%}3DtL z8fwIG5Hy*r5zDh)U_P136HhFT6$-Xzg8(lFGdGKodctLpUv$C+FWwc!$N zBHwzGzTHU+?$qh$=!FB@0jrMH%Qe!}QDxWM8%$D}>sfv9Y(7UL*p|O<{Ao*Q0zJZ{ zlXa9~44p#R?nq6!{_c0^Y^YjhcR%Lhd}iQIlLE~9LsLc3wgA2J+51fj|H)Q%$CkyZ z_ozhBeGPuRJ%en+)JEmf@77r^*;W=40;z%q+QMG6}{ zcrCY}^lHsZ!78IBI^M#yy%t{hr-XK{2?OEqgJ+r4%RuKTH~rl-iS+G(Ti>Of7D@Vg zuy4ZScJ*96UKC*GRKE?A-r{GVYOq=ja50i$_jlK3Iw5llygJL3YwmHU_cl*xff6{f zQcAA*7NR^tq?kRz7e%Uo<`4JBs|q)390142cDN-E=R6+Mpi6SNa6h#gcgB+{TEadO znuRTW^Es34`w;G?c(}%UyL<4OGqb(AqkdpyhOTmWhicJ}H}kwStoA6dp#`Rft2!jT zc(lmuytV3|_0;E)Xm*?S73adJV0m1c3j2?C$8A9SD~b%|PpFx|Au+iU9smV~4L+Tago&SeLN3WfG-Ebeu=Jx=3x&8pODBrb zkFF5-)!mtMHN{ex9aqs$dWUTPw})jrBJjDjUlKN2(DVYKA*FY5o6vz$@XmRGf_~pM zJFm_5=f&;dO~?R`>FXF(J<16EV*q-&Oq)bU(7)XPxNu z%^a-FPIgCgy9@xX6tEe$-~LOEzV;nw7jW#ZQz})9umS zKE;g9gdd)aTx@>iv=VCmxNURp$7ilY1J-Es*!AXLZ zV-jjMRgQs7r`eC6v1kvVR{eS-viO;@ql}43j=?Fz_zQ6`A2Hp2he4TBq`~0TGqFZ3ZC%qc}2tB~&yHJrgdTE!~^NikzLuoMNsDe8>LP-RE}mCe@||uWVK#G|OXS8YNSY1Wb8fy|T&9 z*9LyHoD2N@M0#@`%)WgpnESf3)9L=(cV%3#TW2g3eXjf&14JMy!9|~KonI2he~X-B zIjYF-zj%2xpW?`3b1|GgM!lGL<#S1iYl6)y@*9{XQA@o7P(B&ikod<%E3+#4obdaP zRH{OvJxs%>t%xD5)k>mF*sh?m-K}s-&qlm3N-)faJcTWKtNj>P!-OMtYuQ^EJ_xME zFI-m_D!%1|y@xcQE#2uy57bboMUT6c$!2-~9@}|M&sx=g|KRdl0Y3hb%zn;pnmJhi zL$;nL4A19|hU8&<#gkP8YvzJGE3nBeB&|2B$NXJqsH*2^bAV}j+~es01?bi}M`qAz zAWI+q?5aa`bq{%zKmIY0IX2MwQPqdBP!v@N9&Kh=v;!CmZD7`9|N7S}DOEocf;v>;^zDoDrga$!FqIdMRR5w<1VtC2}5KVRG_nGgM~S6OgZeH&_T zl(;|H`t*f;o?Ac7*7qF1HSoHma#QQjA2q%rg=Xu?wQz*duQx80$z(So?|gs}-1cyw zZ9J9k-b{gc+MJ;_>g#BZuHEF0vm~cg@*>eRZDm)J_ zHUf2$5CdS1Trd$0ad3$tjw=x`Dj-CnglYM{sqn%>OwnyyepAbGmwHRpbqf#;`Xq{$ zI{uk0f7}*ozQra8zuW#Sq+~t@@U(KD``S{bogR_Geds31#${_4w?_3~z5T+{^;wZD ziNqT1g*$YfSbZT7W{7U-rp-(uIN%r4@@05(O*Jz1kOkMF1A+$Yexr3|S$u5JFu|QR5OEHV(@&RIU?8 zt#szz4Zgpo=IvDCjr#VpjuNpTdtqCHLV48X4!{ofIhr`za?kp zKkzp)P)!7$sA10T9=?3dYw6uTa~w|B|F{OD9!XjFBX7KpLb~hDXwdm~T~?s8N~;uv zo_S&IF|wn}R+d(d2Td3l;1ciBA0fQ&E+p>RJWx}6G^+FEx18L-wDhx~XBBtDjxd!X z@Nm_ggoN@`QYUef@dUZF0)G?aj$&pK++P8gQPj@k&Gk)&tm77*lhnhFM|4d`@?#Il z;=RNl-C)G9H#N$SMrGo(u;z*FP7==LA@wSh@Z~#?*tF%{WV2&$12_H=5&;J59P%YX z_r7?|EJw^=?Hfed`x`tqz0VP6k?%inqUdgZF)Po2kBUT@LSy}o-1dP>z@ROqq-gkM z>Keon%08J*-yEr;PlM->LjRj@);>{n0l!eT*S|Oi11xgR5J?6{T~i7e zgFR`bKFz%N5KVBoAL6fF5_uuKd-_8U_bAX`1eEfWr|(9(woARReoyJsyMJ1h(yMw}=J<`Q1yBxX#(#T{QO3OI;pU1E+N! zR1URA9@`%$z*f`)CjJK-N+L+n{!khccp--eq+81n{ZCZ~6@B>daOo(_HfNdd5edAo zmPZJ~3L0WLFH<;x>$hj$c5YZ}N1WtSqceaAb|j00`kRqRW?`+FK$Jx+@PdqzP4&g`T!}sukRjSB{prl>n-G{|fO z&%Zo{Ej-Y}H3VTH2)lwgg zlm_R+qhSVddI1P0lNL8}v2`8*jMh=qy~m2pQF9p-}JRt{uM@{J(OGjav@ zuW$d@008k5UVjC&F@&e{_EI#x^NrIJge9};MDg1*UqyOtND_C;v336Th2EcyOW=0n zrQo=k5$9xV-{*8)lSsN@N{sD|8e)swog?!d^bwfs3?(U3feOSSi+{#IBzEdF1j{Ts zb=XWB`Am!c19<&8zLzrWzIR%>Z-OKsS>?Th^x4>ytSuzCf*(e*%8kF+!U-Nezd8fl ze_m>4wTWOfef!R9voXPUWR|#ber*!)SUZ|er>!9ZZ{3_ts+Ig~WR&O0}owoTwlGgn9FA8|GL4*;(veQ{do;XhZ z+tP1ODXsco)yzChqepaKS~B`@rcLR#n}Nu5RfJPooh1n8ajf!=GaP5xKGpr^z$Mr3Mpp)BKFybie#q zaJmXh@T7zK>Ge)j|3<~kZ*&+kK++R>|CUlSZW1jH@Bkwvh^^NDHQp!B^g{3xHEX^` zD>KpPUZiIFPsYe{ayP7v?~AwaltIAd;V!%^Fx=j_!ENJ{G;`7)ki!{R`9NVy-5hJn*JUL1_v=(US8 z>xqDXk!ccJBJR++&G55*s8KjaLHxkP;a>P{d3ZC#@xEq4x#g?fTBe;ZJf*W4jZgvJ zQe{U19|4GF@YtPSRFn8;R^*V{^bN$UD_genKRxyPC!(6TovF?I(K;W1Y<&BBGxrt|>)MtBa#({uHLrf)m$kzc)wbmQTJmv8SMQ;5C^op%L0 zu6pft#~e)Mxn~LJS5b&OYW0DD&0ovHtK{_4W%19x+ywHZ|E$!iraD|{L;`5S7LTgC z%d0yvCD8D?8;Fkbor9h=-xptPMigGL3#VNMdMO;{C*jYmUEpA(QLm{f|es6y<%Pb5<*%$WA)Yf`E`O zK4d^d;3?K>+cpw~1w4G~mqu?@Pu>XnF_g(fgFa^fWD#11eE%KM2Ymugt4NEjb>{TO z&A5knUF|tsUhTCavQCL3((G76$oQU=fI(De*9<@I!)cpMo7p-$pcnLhxx7V=sXT6E z0Qzwks<92X9{zSa#KR1j)_gKw`!0REvzr$S)7u&P0VUJi(Iq zTqw?I@F|*jop#Sil^5|{u{uT~U7(SloF;)~kN`4GK)p+BAAXpKsFg6Tn^+DKO&@|4YVowT3kDhO0lVTm z071B^8BJdpb-Cj*vfn5!kBjgX1%CUG#BKowW>CW`-JTBSE^<;k^N967l4wb!u%4iS z-JPL~fb(X1haPMB1yQZ;`_uyKiD|~)ySeueU|=tNVsI^r;p4)+v%aa%K!ILZMmQM94MZYdfGHpJVwnHNYI8K7 z>Si|k-EHiD#gF|Sq3;KoIo~EwTIc>rBA>;B%3#D|*Ue4)GCwHnIDCyFjon{py4r|Z z>rBx(l-CRfe&2uvRr6m)#DZhOd?7h4r@q0I{QC~*`^kpX=nz-p%SrYm^j#gQcPlNGR7)?t*iG22P_wp(sqUnJQnr5%^9Cw}ld~+$-@% z$eEn(52fCOR8`+peR`+Bc0EL(<_c@kgHxT11(YH`nt3c6bQ`ZM{+9(1Eh3WI z`Yrkh-Wc0={BRU>337t+Lhg^61Yn35Lr3gQL`1KjQ^>t$i*{S>f!8J` zF;GYB`&}M&`Euso*ct)_Vt72IDFXNVNjtDK2Vpw`#PToJZH>((9z&TYkE(H*d4EF@ zQ9NI7bN-~l?~U5vRFnC%*yJJze_y-+jsnomQH>D%LpNcQp!$Hivu9-I>Eh9HA4`t1 zE7|>Cjx?1IHY|vs%@q}NyXv#Ct7k7}<0_moEI`q=@oJI{G85snj%@D;0FJ@KWrIWHJMr*M2$c@!`ZOD55Dx>CbVC9J zxjT}EOu$ZP7)y5j%4toiw1RY25D5fPEpjE*?sVo)WS~1h>IXOa!E&h|1UGw>bzdFx zT9I2t6#u#XqdT~{{h6E>KY(YN7;;4qI|+*v-x=R?%9K7K(}?g>&PG{eC$e|ZTtyeX z)o4>RXnn?JPIUKmJXaUrk~sv3o*4XYvS?}q_+j~V&j;9qsB=N^ZMW#f)`$2-WS$<> z9hqE~Xi#);$2D4D)X3+{5@F+a`pP1ROPV~ee3dOJ^t0f=O>_;a26WqygiE<1rQpFF zNMO=1uz#Vx}KZ`*2xeRsuEs+bM*f@@5UCArKSu966K^rXQ!r zE8t0uoyZ{cw~;rSmkP^he;X$Uvf?gn+ztPY(5Sxx2t3ta%Mk;Nb0XjeZ zMxFV3OHG`4at@Pgc|yk}dFFOB8$Ng^X4TX(QA+|UR5K5!ysL9Par@|G^D53Lil80w z9gR1VEu3*R71w)qGo6>&P;1BF-&LVet_h{t98gH_Lbym!BCX(n2d9%+GF+lUiWKmI z*gzCR4lPng{J-eZNnlHB7h8Y$TzsZtdApuc=~EgX&p_1n-56$z_U#u^BRP-HuH2>X z?QfSH5C`C(3)k%l>E2?$2fnK_OXyN)p6~1ByqXnbk$8c?;o=uV38G6xaljO&KqgFU z`p^Z-Axev4jD%#Hxi9 zf>J9_*EFge(H3~HtT{fxfHx4JT> zhEd!4pFQ0=5?WbkH#^caJ$87k1*<9YV@$xZ5{41N&Gx&|u^$%Wt9`tl8tW26r|yfW zcKXre(`4^BCVZP4O`%??ODGjb*3ay21#{SqA?0ISdX~I_-aovdR~Ns*w2rW+ypT=o zs?Ocu@j0bvMg_>;JRDWmL+8!nduLL4x*sfpSL}kX*gOrF(O`DBR`xSWyJ$orXczZm ziRu_CG$J~YbEO-(jX3ukQZED=Jk`l$RwIqCjVkyMCSP>V%^BVbADcT51EmwuUBKDz z2wcTi6o@Fo*L}9oLFGDx$gX0sWAIU>8>cr2B=H8rsz8W0jF>!*F@nfTPRjLVD;_Ow zNi}@z#t-IHh9Xq`06Ee6IY;gHGKHea6$L^6K%V$<;sZS}!drN%{yYYtG+BTcNqjc| z`Z5S1P>**5i3Y&N@}se>cmVF4zDukRU1ZhwlnsTeDwaE`hk9^X)?mpdcB!Wx}5m*UA z!!TUkR5@VT2NlV_p~hF*EijlDJ!2P`_0m0aRMjk*C1;2+*6tw!$0xY;8R7aGzYACo zvF`8T3r-D<;yx~}CDW|QWBT!lQN2qxOKe2kZ1*#lW-lg>y5tXrP)zMo&IldL5sJtG zYwhl^vd{R1%FmhI&^WMS)80*J<1o)F6UyYjW-=}uee*Ss!{t3$xd|$H*;|=Jany=w z(!LK|U*kTRaaR~iXuOY9EeS{xK(snA^4+o`jL^0u>b(jkVU4QA%lh$De*D;(njjpF z@9$;vkTegmO^L`jrSt#4*f~NMF>-`YaWp3b*zSCs6g^Cn4MWjVy%w=qB96Iut}!D1 zPJ8|1OZ={`Ru#A|5Rna!pzKD}1m*vqlm7*U8I}zFK9S!no5g>oCXD%=D;B&SCwLkl@&k=VJn4fD#m!37 z^X1DqJx}NPW|xS+Gbk$|{(o5qB=jS5OQ@i{nc8|z+5oYVmjusI=+&8t1`sPpy+SoM z;Xi94q_4V}FhrR#P5j?j^oeRYs%2Dq`vb9M2VB#D)js|IiFH5xmH#iEdWjp!*1rpy ziB#_Yi>Gc)IpFy(jQn`260^X6F!FyH@w(w0t74bP3CYa^!$TPFxOS(a{>`MHHHq=~ z7xg{%{(t+XOC|`Lo)AxX%@JnekXQeVUIxj0v+FQG+8Knn3)TFQ%5Bs zateeT&fI;k!ARxx_;{>pMm$eRuUmfhD}gcbnp)VcR8V$?-5$z^_?K$B+z8Ko&Cg>r z&xT2b=M)GFLS?r*R8Qq_L7SdOn5ca#ITG2lG%q?`@-I(R5ZMio6!QKfV#Oka2YHsG ze=X-I8|Ej=C?l2s<~8^`HBt4ihhFL*%1_&>s!m_Oh-t5|BC4NCNB_d{f?I=3vvQ;) zF{n!&qyN1I5-QQsD{jfmutG_E>oTqi-82%M1B;3s@^U32QNU`?QU*>Z1J7MR9;o)g9+(7?2w)>3u8AN-Cuav=EYW$|A^W#@C zCBnr*)tX<`F9VFxVyFM%w) zwI1kdyMZt#=fsP;!&;cL;$Jx76XtQ)@W2}PVP*ws^gS;S`&EC6#fUr}a^S+9oEMDQ zGMJ#LQhP3e*o&^3QC&G?wDA8?-}1Bpy~4ehNi#2H{h|L8|hkp zw{tG%>!#Zfwn;0uO+KefkYps}_EEk!lV&O$J0uvv&78LMePex)|E=>+20K-!*Qrcu zxZHMh3L~oEna8u_HY%pM<;vfi2*=GvjYfEeH?-IUSd2@B($Zj~pINg>M{IzUWIZRv z3WP@w0cPHv>-2y0# zv>Qi?I=nX)Tqmd=hg9)zwIoyrAru;|N7%kJ#U8m z{)dMPO;;}3zWVgHa+*V`h(j4^M|Uds(G|g87DJx|+JMv_p&J;Mndv$k64~2NDPOC1 z4yTo^kb=*VJ53Wf0&@61DXLkw2yp*Q?~>H-F3ov`MXcQUz^1I9iFwG(LyAno!|^;_ zby&E=hd67cBSCd%NFknx^^kgsQI2Qi}k55SFt;E#ypHziL# zl0d40Qs+Ywy}-rtcAo0SHbtBUI`$WU^}pS}irDoid{lPJT$?!mi@QW$vVzxk41 zp%fv9Z`S|!a!@Z-m6W&e*Ujz@3=h^+srYIuh%bxdcj~d|Ik(;;@3#IXJ}o_)G<3CY~ZHU!fdt{9(pq zx?RDX>HHH>8iAQNd=o#E>dvUc_4kLI#|N>69pNe}PS5I0;(nEz9#$ziRKUyWySqh{(d zPP5q)w@kaz_$8Ks5vKgWGqGzPQ}FKR#fB|*gNRmfnIE)8uJLpQ^wE1ir8$eZzL0B% zO@whjUbK2jQxNH&i$^;r(zsaXUC4Yy=z_>!T{x;#lc@fG66Q00kJHI0TAzL8JF8YA zKjiQiw0YP%s%nEDTNqS+pnL>gINTmHX6%nY7fA(+r2>QoQsWo>?&P~r_EP@vEB(m2 zFbQVXAyFuW%$CwjprCD_`}WhBJ&O`>`>y$6QK8|GxHtr3>z?^tF=K;%7MZw$@k@VW zc=nd-Gk6^!IS%vnkhjs*ngJsDkcG)Fdx$^TwMz!T7v|v*{fXKxH%l*GIWF^{9Vo^S0ARX^(lfGTk+2B}(CyawN12uXrOfkxBDZE)RkmTWBKpeZ$BTK!@hPIs zY1Nc8zwLB3nB_pM&;86(FfSe0D9rytTG?&PgLM?@=SZ!nu zABQlnDYiL_(cpALaj57$(f8+mb@~-}x9lf)#r*+L{{7vZVc8$$z$q&i1~rVc*S#Qx z&WS~2-#VmgK&v<5(`*gv17l-*1hG#8x5oXkO||pq&raKK10Aur5%K&Z){kT-Ih<3J zN8bVkYR|VQ1P#Tl>rI$x6%%mTzI#|22ZHwplH^t&QTNm3bL7&ZQcO9X&pi#!CB1IY z3cnCp_Pd6scdm`^Y=KL>&(0L=deKM355{(U@4ql6QhQKeu2^?2W}2*OIhpQ;6hZc( zflsD@=KDm-??ueo5r&x{^<$tlnCGEzSIrdY^kULrGvI_0_X2B*T5!5+v?Ea}^VN%{ z@H_zUQ|e3+t$De2QnqhtAraAAoMg~XY2rm8-xo8aT$z_bYh7B>Dn4$_W*3lsv?lCsc$n&K&phAF zUni^M1^Y?d?Bq)mOwFlW+G*9`&P*H5Y1ou4^9c`u{(4GVdomdEmM52m4W8&($+J+* z_N55I`@6xNap7YO6sJ+a!}MJ+Kr%~Iyni@@$BDr8@p+B{C0=urZ@i+Ih>m?89`PGM zjlUzTdpjqZ?=wJ)5t+W%ZlVn~X2u9s`yqZ zw66C!U)?1dujcI-M7`TX?KxSygtBEd1-G_eF|YJ!gOKTsI>mJ?e>d|o>eQiFR;GjH z+a?~cEUG7KU{_WPJqE6q&4K{&9O?m(WdHE4>13`Ab!Njmv=2Ysm~Xe9CEC=r#6i|v zn~i?waG0~W&5igr!`H9C8{ABUcM(;F9UE!rvE&?u!kJ0@t}e)`O2mPVkHrtbsYQ0{ z70Wk^1rK#j!(CeXo{{=NcTQHbi}r~E3aY+%9db!n z?s?@}RgVMk-z2t06*~(tX^)OT2{_&nZ_XX^K=B&S&9G|vy%R9lS1abdh(RDfPOcE`AU}T>?mT!>*BVI~}CktL7y3?mejT#Fd6x-*2iL~<$X~3}@mSLqA z^f)ehuIQtCw`(Ml=FGc=Xe>jz_0jp0x1vv^Yi(dIvd1aaL;iKzvJ2Z+B?Z?LZl_sH zaqICs|z@9FFi58YK;r2fxg#%sxvEf49!$TuOWxkOoX5)ca-}=gLYCh-Si0gyQ$|DbVgRKtPLGM%VR{WCj@LIj_55tP7F-|

RmnbN!R)KlI6VzZ!4?Pv#PA3tJ3%1UyBhvyCZsTQTx$= z8MR8aN7p*?*%Y64u*4qw9A`84Jn%h7-N0wgS^RyjM8}O}N>;3gwoKxmpEqm$i z7rFTm7S>*W!5$z5Nh~IvQ*I%z>u8MUa*WW6b|4hnl1;nFkx4a$1ZOuE>cZp?fK@pr z-(S^BA5vOyHSKFc^W7}R>R-=}l^ZpVdGX6w#ulM7um0&9TNl$JGrFSxsbZ;Ze^v^W z@6UGr%^3Ts8uYgZ_HiPYBu#f&ely=x(&qS7L7C=RtX#k2Dw7yK>WvUkO598&);UZg zGrK@*?P7`xTg)WB`*&md4GalEr^@QT3KD1Fh8b3iJZSZ0z8QPEm+Bi7Z|KsP+G9$E z8RmCb6(Io+U2`f35QP#M)>ewuxws^A88YhWWjc+BTv7BZCG~3;avil?QFX`OqeqH- zTLtble|fHinOAB)*|YNRMoboKe~}!@x}X<|Zx+rkB*gcx)U50f9)RB+*qUqqxW9xd zdfs}C9{{M6aw0GEusy~Cu5fbU%Q0k#P9Z0$ff+n{c~@q;zJmhgObfQoWNy9fd8cXP zgG{e^^NIrmF0@(}>r?|07|`T>LPtGX+o9`xMDd#v)=Jw&+}<77?2fmkOBlt zd+yPR`YSiy^mrVpX*^a;^Uhwq@(^&gS04==eH@%&^5$ZaNGAZ9wPx}_KkhleK#vnV z|4Ng}j(qk-x7Xy-@46_oE(r?1zivA2;5PWD4zc)QPUzPddz+J#5&S#%KS`kO{*hp` z&fg`702Z{Us?xk7=!>S_Y6rb%E5M0fhg0`MfmoL|dOO z@BS{6)yT%zqP-zwNWA|D`82fAmEAvx0@he?QLtKSrjk+AGt2D6YP6#vhfXB_&S_ ztK;EU807hvYaz3|d=0zp^LZPzD zXRYH~tT2>T``QTbT{bwr6YBy$jLJzF5p=54dUtNj`~=Z~#g#9(Wx4Vhm&zcL_uWL> z!?>~B2r4;AAcAIyYKjG+C)EXqTpix!Qo2(_)js<^HlAWtWB$7bKhIY?K3|Uk_*SIw zee6?yM%MQ(Iy8vdWvq|sxk1Cd!20QE4svYrowV1HZ6#vYGrL1r22l#`YXBKHl%Wl7 z^!=ocwNl=PNyD=eSM$;-K-%g#LiI}9K>`x|K)VnT@LpLx?^?I2L?}fiv6|)6i`g5q zX5L`?fZn*)!yjkPegL-JtV`=n4;9$oYZsjDaNEFrj>$z!VW-nhEG5=v??u7pC~J;5 z%SPmL&-2d;8g4$w>Unn1)OM4cl4d?#I`B!K6y3(@$A|^Sx13dlSjLM~UN(ssMae)_ zTw6g9A|nz}d^4S(JMX-F6K7g|veg~Wo&6%J8=w90;M`mCDikgCzD8QcqLEMfJ9f&a zd)ecisY4auVHrt;Fn*tirb2dC!r)6aOW>fLLDgrc4rs+Y@xR1EmMD(Mpi*@TM64;V zUx9{Y++zvGnB4yG7dB!HzV>oj3Jky+DFdBc$otKAJTQB7d?n(19xr@c;W@#plA?%R z*T&#xrI?yTM=yE!{2?2&GH52^1#Mz^QW5+^5|aW37hj4n_+xWYqdab_Z-#rz$#O|~5a=MaAraQ~1tHRT6T4taD< zJqT|a*H^Q?J>GZvFuFTgoLY`fA=UUxr9dU!Ca29o@_t?GogZd0hwc>rQgfpSkWt8N zWGwSzuA&L@jn8f*)6h1qTslkT2n%dtk-}9<_q&ZsmB{LQdS!B9T~q%X>j}?ad9s`B zxzaYVLV#o;J?=eZF8uFH}Qr!WHP%^hWZvM(&9vwo4xP7;1i|7Izk z)~0`=Xwy~LrNNXZmbh+!yXl~{Y1UyxLOZ(fLygc;ss48}(~YY^D!={5J?8?^9HUEW zXPcdi4fI3M%KGJu#y2VvkNr9&2k+9#>YY&^NIfeDUn2!wO5d-3sH)j=E}Ih=n&L1_ zS-4|`@eZ7aN?7eJFMbb%^V?1QaV$mCS!Xl|Nx`Fj zzMsyrJ6FGya0v?BK$!4cTmAeUO@Ir!9#u{kBrFZ<9XYE2&>A=Ijvx^M&P#dd8#yCZ zu;)SxR-BN1-tWXlkfY}Nx^v5oD8g?mHZg;+o3p_Wu9=jWTWseXuIpMY@eI~FQA=(x zrcCcVpY>nzE0P{DJJUx7(*x1hoo1jdd{yE0R}~ry(Ny;jjp^+bI>I$jRHL(KmHJBw zNKdL*=u;T)?UkhpG$tfBWLQqGi}p;nbgWsE=FPB-p2~dwq|b5+LyWA0_T4_;-IYo%1e2HeLGu+^!7hQf;1h2u5a$;H~8GpetzU-lg&aPQujd0F)*0R&pXpV zgvJ?*GVki2qCZBdNC?<%^%&bcEk(!NnW5oX>|8+xGhodNyt?THeyZeq*Sr-YSkf?v zc%QyM%twdOdMd)e1A~kQ6%AID$sCd;LXFD^F?@c4-o15bg*~3W8f%PIVpcWcG*^?W z?N+-t5uW`9?2yc^Z;>5vssEwmdRy-=7cDj&_qz(o)1UM^E-!dR!%H5!bz}5oO9uR) z;Z1$I8b=Lk*D2PE{G9NG3m#W1RNNCe+`S9R0Ta*-D_CKM#32U9N8Tfrp*Pk(z=K5WRt&b)tdZBVdfkE+dEdhC4sS|qHh;kYMMsJ2B37)!sX`+Gkk z-;Zh1Ac#Bj1pJ~!yL0leMft^`9T|R$l5{_P$|^W*Phfehg>wa5pXo zAF8^myELDm?W-B}IoIh+H~E|mIo-H#9017*ojyzxbo%%~ACqvmafX#CVN%ABx7X3d zFD1~+*+`qg5I5T(Uie-3ye%=8ftBq@s?*BDpnNWL9w0S?BWimr=0^5`%)@i}SVZWW zTFwda1xwTqh7jtm*1JO`%Y%my3KEcWM$1{b*g+dP+T7Km-?a4j7Zryt2eZJ9YkK+T zG@NnS3JRZ7wh+3;VA`)cv!AC>YFrU(H%BwMUk*l#-sRJGKTAyc^!2%8+f}xA3ChEs zL4-H_!F&Ov$JjxqFk-D1Bmnv7w8MxuQZdzy^eR3?-41yW z?I&2z4QrB!y}~RE4c_=giZE^HRqTlw3~OmJEnnb<-qxy%_|?tn<2EC5%_d^??Q82? zq{V_lF7cZ(g=n|1m$d;V`C04Q@NX4dA&aZ0*?7P7ZmkYMk$lbc*76TkZDI>idnBB!sc`abq z@cX0Wp#*tKyXAtRZ+`X8HUJ{9NJ@Q^cq#PSb|}^9aj14Vko|-YO{Zv%*SbB~K}q%G zJmkIATFhSBb0HkwUMjy*H39Tz>jq~aaJ!n#Xj`xiYbk%g=8kQ9gn;o6PXY4D!sm~R zwTmo%v|~;OTrKp9@!RZFfBiN9Hy+&8E>Q;J##9D$IB*CK2My`vQ>Mdl3$%-|h~SRJ z+F{z6qnWF#0ygyvoZ0S@Is({O`yPHlMwzGKacvl-L$TBw#ky|&1R}?e1`8sU`Hm%4Wx<_JZilvpXF;@o$}f$ z6I$Yppz!5hcTTli_z@$r`<~v8a08tia19_jecbS7Sh9ECW7@c<4O2(F-m)i}M?zI{ z*0EVpV!YZK>!$}6vf<;=YY1#r>HD2?Q|i?cHRTW@-8_l4ydhwBr+`@={jEgXM9^f~ zHZ@bgm!25qk*wsjAu|4a&sINky`0f$u;TR6v}u5KIF8>SdxSRB!<P4rY=o z&17%aVm8N=xIXkia*x1}$!j&ym1k$$mGXlEvH^p&0e5}JZbqVrp>zR0El9fa^$t3ndAeK&Gz0>1uzL6H>{VHpJx zM*#O}6`&O4-CKGh&gQO#Gy=%x?Z6TkpVT^mxz#M{Y=rs-*15+6fnT5IV09W(Kz4s$ z5I*DW2Ywsi-U4tlg-m@N`Jir@kF0r~?1T97!c{I6_^%YJe!Mqg%?jYrR#gu>!~V>J zT<{=D`*^Hs|3lH~(~?j5dZL-pr?Q)G0npR;F3MBt~Iu~9Ls&;TABqD;3@)*uVQ z=C|y3NSIK{(FPzZazs^oJL8s~V$}UcvMD=(;eoDcfUIBgi}qxH|MOq+<)qDv0n^6$ z(onz_MDJ@QiP(Q*SZl9cnOx=DeV`#vDeE*>=i~sSCwuhs34yNZYICTNiJn`@WkZa3 z#g{hcV9#*$gxn=s>2P&RK@qi=n(T8{afXo9sg<>9U^7H@BoBnruW=Aizr*tf_ULc; z11UyqvjZ3fDQzz2XNfSYJ?X^PcVniPasqYAbhIlK!%vY<@+6!SX+DWO(h7^zeu?CH zbGmW4#D#zY+sVGEvOvK)3Gudr+r9Gd_>9WxCe^b&$26cp^dmR6dv(|7_gf(&9x}NH zhV&Egy5a@jDslF2#crjVPb(pwu*&;~G1*rQ9Q=V^P7RH*Yc9WvzdB75QDr<5A5L~C zZ&q>`3N9|_X2qpsdw z`S94W(0u3mhDr2a+WFu0?>{+?e_>OaRo7nZWer2eq`=^3G*KsGW{9<{KWM)^WoNhl zDM}DOM9RvWU_u&3$ar?5U&1PApo`-5<*Fo^!l0>s3(uVXR$mHMpznGIm2N}O3Q zbl&IC6K>C=hqsTRVs-(^(RE^_kvCH;-VFJ9y)Zk{kBXCh1qaz7ohB+|tzVqws1L0r z_pIuXL=y7vTyOOM=-{|jHMAt$=dd@dUz{S0hg}z?p!i7LF5X?|a1tMEe~DwH+?Qa# zZ)lr@TcwiF;XdM&%aMys~gekwoW_5j!z z0T`A-vuwV`&Q&vk!y;QVk=$wvc>dhI1X%TY-W0hb6_0USI=M7l(=jQqoCDm?RD?QJ zTC<&|N=akNJP!;!d!k7LY4%Ao9TzZmkZQjA!GK7jfuD-bQ{O#HG)Z%1G+B`?_L z8YmZ-a#4&*X+@%aJg zmLb79ASUl}`5kUv$bKEv8H;VB`f`E24nU16+32t~)DuaW7fZC{?k4mC?Ana7N%(4_ zedVD{ZMM|&m}P0UZ^hw}mHot6KZT2U5Qh;G$7@8)=Q*l~h@s2_9L;@8uwt>LeU=-i zWE}WKwHXe#bJ#i2OsCG(?|Y5P)dv53KLA~E7~JIP)EDlTgQhkdUJ)mL$~_AJjPaGO zS|gfY8ua!GO8B1BU=umdhTrZ>(*v1Bgx~@GE0-pEnVI4ZgjzLx5WD-^(PUoxrC$vf z$14?@riOKnMFw`tJcQ3qj5?7|ejcR>H~<6Y6h$<2d_D`Q5hoBhSq^+17oE=Q^Aaf` zEOyIOu){|l3gE0bx`DQ+b7*3E`wB8F02FZBE9j9+<<@}>zn6JBZgnH^!LJj*-H4~5 z!^soIR^AZO1Cc%B*e{-R_(@m?2d>i`aUj)q@VlcC3JB(7N72BsR>`X+9G*bd`HpMj z6+u7POmF8h7IQNkdenfwZk7~{Qa2bR(O^#Pcnp9NQ1^5yDw2D`_On`etpom&oj2^^ zits|3&$nn5t(Z}9`m^gjK7{lXMRJlxPd=Xhwm1Uf1+Q|2R#~Eh(*%v+CxOpQ&j+53 zZW*0n&$ynncCn;%H6qjeM%IE5=K+1y=Rbo8C9HgC#i_q7r^7w#6qr6~OxmY9Yk&8J%P2Z9y40 z99u-yR;{ez!k&&S?_ygDI&<+u%MK^{cw$~jh9a)JBd;|eTyXKfVpc<-tGsMIGB)mNFr6CLS;HeM{4bofj~k@LUcFE!D8; zlvX(=$BT zF;zrVuv-5c%)1`)3a#cSLSq1n%7Q9j1>wSUrbF%M#-n(8qD8bz)8d`DCCd;E+C|x(wCzQx0eimMHwC?EAW&pci)W=w)lxKEk@G|lj&To z-W)A7d5N|e@b{|-6HhS4QqRAZuG>IKu4H-w#-;+Hi{x>0~dO09Y*U^r2$w2oEX zlQRvgDel)(w@#$L<#MeV3tE4@AQG7h*np^2$U%JXf3iOeow7uB`an^dNK+P!mu27^ z`M&P81X8V2<_kHR-&zxT{1ngWmoJLF;ks`oila)Hp|_O&zGCk$qO#s6b3_qccN%#9 zzf8wye^)2-{zvt)e1%!T-_=nr{;j%a^v4>2ohwDb$?so(n4(vQzKr_hlsMJ)NKKs! z4`@z4r_Q>US&{938v4J&qjtcB{Ptu^mQr4wHVcTxW>J2gUV|p*{lf1r5oDHP*1NGU ze7d)%obX$Bb$q#EDJ5$hKbSAzXddUs0eBB@-%d#aO0E}3b1xCe6i_X~3Vpb)z9 z3AXS_`x3jOIdbQQ_NKSpxG>~!aA2&g;e7TRUVirH3@6ovh%wX&5oDZ;3a+eDNC4p-V`kyuqxVN z>oND7-EXG5j9xRfH@I47Q!MY5CD0eehZ<@QrmW}~ivAh#M}id)-21Wf-K(<*zzSxiEf#Wd6W|qB+cssM_o`-qu#Pd*79?cI>Y)y z0%FMP6kco0+Rp6sJhk~XVyrdq(udg*)4kOH;sgLc~v7MXvUA`QR6O$G6lHzWf&(Y;#RB`fRZpOJ8!;qEtg4-*sY(LPRj7hXAi-dhQA;V~Lu%i)gpgRa)$ zdjo-iK?J?Bo)s_6)x=k1peW_jpDQ;WluzJ9bf8&xeSMNM`s0EUB(4`-SKf9)?I~QI z?_nywOp%5*jYM>O5&X1Vp@(7_VIC*`a?P~*Z6u|XA;U7Uh_%_{!OMHigH#5CQ`j_* zzzC-dI%e7Uf`_lQ_2<`A+2gd0(Y&NJX-c&Z!=^TFaA)0pG-k~|zXmQwC^M>JGesLD zob?l)Cpx*F+8Iy5I`3X-XlOjpd?$_&b>W<}Q#mEb|4msb>;o>^)1kAGop_i`Dz?GU z*SVk=3!B$)rShGPT(`4a0x5bL4cNI;iwAF45J1-$c-IK4# z(*Eq61kc@g_Y_v>pWIpeF3{%Df(LeAyLA_^l@e*>KDo;4uJRYYMV`icAJ;9!8I{jm z>Ml)L*LYf7Vr3O4A;@n3t^}^>lsmLnqe+n7C&j?YlSjnJhbY&$bYL~bi@xW*kRy_4 zIsEbP6-+UvJr1j0jRe~d{VeznWgx4hlI-nXnc-|&<&s^o#JU;NwU1C70@yEpGqR9$ zBeWZ5RN;dzOD`T$x>pzS>3qA!i&vv^z;4VJ64l)%YT3 zz|PEcZ=pP44t7(*4lpzScoH@Y&J?Z9T?SF7H3sQDSPX1Buo6$7f=)QsPU=*4heCBp zuo6Z(SPdl?sR2B!;+|{mVx3@dm&{qUXmB4xvMXP1gT$j@@V+hbH4^d&=?0Y*g`vEj z)5c>}h06=B$9yCiVBj=X4S1F12IN=KL>eY7H~v@&-=jxVJa2D&x?P2vVK_`Xo~R}w z5*3092b?7yQBjBV7```4r<^-9OJ>LLLz9ombz##jMQMhb*wtUy58*>EnPyvpj4Lz*--IHYEQqR8Iv335HF-7O{qt*b;+ zH^hbyHK3}zZ9cYXTT5!u{_y*(Y3fD9iIPJ=fb;unZ_{rbWrGf>+lS)cf4*|PxmkL0 zll(#Plr?hrERJ;fEjJ->lW=;&l-Oa)hB0fmA&|rcXpU2=FZ~B))Z`J!D@g!5=U4Pg z%p!l;2U%vyXE*Y{YYkO^QU!``&c%!XJ5~Z#Jy-_jnr(u(tI&8O&!92$n&I|bU@L+8{t6#XI<}E1N%Ke|!pLKae#nH4pg)B~+cUN;8hNn<1!edO%S(FVNH zesqB!1g@GCcv6BGb+^}Ekd66PTDS9!hS%L^>~fWXIBK?PrHG=cU`Z5qf4_PDrjWz%W|95a(5*A>CdQ z-L2gq<_Kx&>P`C5(67D4{!I5reBC>{LEG%vp>!t`S6D+uDZ%}NI3C~2eGE2(5(WQE zu6?HR5Xb3ON^yRBjYp5?k!E;T0Drr%?1t%Ybu;A@>rTm;UR3}Xxo2p%a6_D9X3Wie zLx}AK8Tl;oW8pT!2f2XH7WpsfrphWRL|kR2d_ipH1nlh=5%sO-l)i0ags&Uja4d z=~74yQQZ+&Ul6K(`g~L@_2vuj>*YqT9oi}Gb_!5gnrajUm9OWXteG}@V+>-{w?t%d z4XzOr88^L6^2h4&NDZ%NV0Ze0c;(6uR-q#X3U z#3JoIzQATgQwN9enCjRn;w?NN4Uyea@}MEl050uCPe*GuN@DimspsAPaKTKb8pHl2 z>HCXU%G^uiwmDB(X~q^77EUyl;9x%6t**nCn)gRw-B~^l8z3e4*8IKA$nfSSNQ{n0 z^<#b+DR@_x1>YCH5WlbOSW)C|nEWxO_<{dUB7O(P#N?lM2r(DopI$Y=TFof5!J9B# z-1lm{!aYnIJ=tgB@7&k8c7i{-JPk!jO9vl6K->>3jlQI6t?-<@(G4PxdEABmuHn8m zBJR$WJw6!4G{}z5-hGohuHs`!wW18=xP0N0=gb{3!nCZzNyl6oOvs8G*jQT!l@&jf z#rQ}Dq{c)C(bfA4!eiT zCvnSkIHbIxm5F+rbFl{Cns*MTQ={tJTpD@L1cGrf2{{R_kYz0t$3y{uR4$3L^YPFc zmbVDdq0p%>{KJK*n3(_dBLl$W)qg(dN&Hl~eZ{IfF*RbopK>2D`mhmxw0&a>bAXXu ziy|wce{I2Rn!6pKyar3cpEPtIm_FQ{{ch_jaxo{82M!Jp{;Uv>Nk}_dSepR{-mw_5Dm_#Pr_gtLe8&lcm zr|Aaj_o!15YsI7rhTq|*5G%tWaNGTx%G{r+eb7;`p|0SMjax3~P%U?OasnRL8+gi@IlX1Cx--mtdHd&y zKQcG;)-s?T=jKyPTYQo`v#oX#vFX!jWkHs!#Af8^S6+*LvRR2iXmc%t_J@I^lx=fwidwN z^wMpV9thei5<$@w{k|B8u{+FenaJg0ZAuDEzG0Eu!ZW-r2)jQr4&Y<=Jig`=bHGl-mX6 zJLJza?oqR=Hub|0C9wzqfLr}*KVi|hL+17m!RfDPX3j9u0@J#m6)TalEk*6H_6gu|+SPchW!$#AriG*Kj_&&`YSwr`pi(JJ)&hrbH?G?xt z8m`mTh;OH-?=S0*a^%9Y2peCl2;xfM2gKtu_#borpwEkZ;8mqUI_GFQ(J=c zR==lkud^#VG0Z^65E%iNY}6!d`bLgF^;5o8PYm5GW!qIjmS1Jt&wmx~A=#wgLI6`7 z4CU(XsP#)Ca8cKQ>pU0(0_X3L48(wz*gPvyAc>7P$%bHf{nV09nZfEui(aQVDzWpc z-gz#2dk((z^$bJs8wdz`d?kMfXYxYW>Leh_E~cX$&M`_{4&n5EFj^tRn=_CQVip;Z zzDEt(cKHAd6#5EdUiTjQ%AC`y1#a2vBx$fKVYhrC?sjdgSXuW7XP2)dqSms({V_`9 zL{;?_XAA20OX80PxQ$0eHbGyhmg_lD4#B z!9!Bd14delY<@E*b4=Ol%B@=2sH~)~hBlF9x;)C7HFHi`?FyLTN1t|AJ(_!YY9^d2 z(ZF{A4TmY-kIcqN#GipGcpFc$(-DmqabVYjjs~B$`%=gIqqxZ|L$Ldvn)CgFIRsSp zOdBbTn&}Pxq^cN};|gqmpuxi)kZ()Vu&-XZ(^QxT*8P#)ce7!{tzQx_eVioEFW40* zI>Q^oI^Mj*-ki#wzt=s4J47sizcP_9BXGB}G3I-yneIMZfTW92z*SblXW4z}y9L-G zdMcZ{ZcYUH$W?l!SOo0M&kfz|onuw?%ByYyBze0?JjQOZ=!z68wd_3(9o^4s(##68~;Vj~c^@})It z+A!CcBL4}xheN-;jNj~mFfER#<^><3%fqvN(zxuBg;&?s_#%fyc+(6Jb@ld9*+XMR zXWE?DbKDIKx-m(B;#bZyEOC3mtmGdplp_s_D>-naXAKxkd0P9*-0(M<%9WNAYrZ-C;rS$5 zd7oJwl{(eyK4W-B`x@cCnvEy0d^w>R=440GKgHV~tMM-*_`fHSMf*A&39Pv^p7V6Q zWj_6Q=mm)(wA1FS1=giAXwE0M z=gwWmV$tuJw)K#{iYP{Pa^wen#>kYkm#N13_HWOv?I@0af5u4vpYlKapzS_vIEbXI zS7$djM6Ef;VKk%vNHo#rBnE)EOSiiX<(K}&&;Q}q|6P4*AG)_dUvG5UqpMN;73w4x z^EVyleP_%N`KUE$_^a1z6^1hoZnK5)Qb*|v=lG=DuLWbs9J&uaSjS5hha#o1<}PGg zEb;G-?16_R{b;rjAP4wHkEt_kW7E|6zOM|_pmu|Q2!`uUFIgJY=P^@@Is2W&8fKZo zbFV^;POCP~M&`Ndx?{Z2wFEl#Ytx%tKKOim3H80JD=U%|xhR&frIu_*-}PL-02bve zw#Jh!MkAzZdesLXd^BIKeT?HU1(!MwBYHeYhj$m|Z+oQ&d^^uRjk&Ll2r~#F+^?2W zu)uBiUJsjpjo9gJkQ;cS!38!=gPF4v`|K^9_OzsIFuvN8>a3-3%h&oWKXeD#KAGN& z4|v%7gE82!70<<`>%~Q$mlo(V)L}^|`D?_JzQFIFzC!}0L0jOh;Q4Dlz9FAb+KZ^q z7>Azapw>EeBASosh(pn9DrA8z#KGUN4oG`7@;K{n^e;U&8LIYS%`RM`9qr4&u*AupaK{2kSLe zyGULv=oZ4Iv0gdMy8_M6b3}q~0*1z<-><{r^HY1K@khi7QMHsu4IaM~mkx*)Ct-0E z^{}&50Vo|&Wy}a#;WM_6aft4(NxBb}9t8(E;hCI!^g7*DzCaph6`O0_N($|MGBhic z)qex-jYZ5~D@Cw&q}o>g*~l~ov`ft-KtR9-J{_DomHvzaplacE9DqD+R5=WOh2dX_ zc=0R;lMSlKa1&VN)D-cg`G96G=->Oo3hf}}P8wW5tWPb?AK`BDF{0eHQ;9xjr$s@k zlVyK`Y1{Tn^T6ItR=e%#3mP-SE~H#nG-Za#^3OW2Q2#&m5q2A{wzt-6sON#v%v8St zjRspxasJ%o*vsM|hES=ty7#CrR!9=t%0_rr&A$eHwoft@!{2|45t|o*~#! zrV~m#Qg%PlrCL)lbO!y@%BI1hHJl-m=zDdjLdd3*Kh<0Fts@LG*!<9<`T~4?ymAis z#A#-?1_0*{?TLIh`X3up&_eTfXnFj0AnMQ&$aO|*7o;z-K%I8xb*QMRhz#_am!|IA-16sc}#RTngbbI8p#al7#ofP)Y+RxVbM0*)$GBYYSyR=)9^^X5mK- zoDaTi3P4EQpYB!En0KNT8`QEc1l}FEaq(S&J8yB$0e6G(8MienY-*47gtv2c1$T!7 z55$ZOR}o9M7hj$uqmRN+v5JfuPhYj%QUMtWhKg)rcIyxlzL$Aa5>4Z`7kfjf$XmDM zIS4*}!5Ts=t7?e*$0)A~)oBw&p}scNS$+6S z8eaS7tGH|P9$T3c`#JAYtZ|ogBpK(3O#@W{w{%pMRNlxa0#;=la>0DS;~|lZztCmR zk&K&6UydfD?(Mx+OJMq2n2Q5~*xcv>*2r|bl$ZwiGv42)IZQ-<3NB0Ga~reziFoF5 z*s%6kbbI;TJJEExXt~dLj|wP%AylB}jUspHclU~Z12@q=XdmpFe!{Mx&2}>vvNRd) zYijZ`b8mVKhi%#MMm^guTxRD`oc(%&4IzX;%Xp)L=GlZKF?cN%!)XG#KBj%~1;m zqQ+!2e|8@n#&+0t&%Q z1JBn?@;lG57HhRUo2t?@tbtcSdR6rmA?8MBWY?R>INxi`&o1wyZB*5SKg7 zd__K;FSV%K{O3iNRNV-51XBj#TvX~TEW0$F6yb0K+TKHWBW?l{AQ8(2FRx3^L3j-w z@7Sr@8{SfJ1n-bBKF$4Kti5GGlW*8JObSY)pmeDSNJ-ZakQR`ZF6r)Olyr)Kba&_I zZlt@Ud*oom*!JvyKlk(f{qTO?dF_hxIO{lm@!b^P@z?RFj| zf7u)E_OJDjvR({U)jWV7$ZRQr7ts;_4tBxEleborY&>{`BJ>>H@5U9)m<>I{Sckxg z1OvnHDHz?1=Ag?)?GEh8?0PASRR+vH8NAk#{ozEI)UZ{`dbHb<1;yY{{h$j2hmI=? zpNIX_zxd9k`JAHmzgY6AMFGeTSCS7;p5K!7)EmEZqcY9Hnk$_z?7z75pq!wlqsYcX zPB*F0kC-y(DCXTq4$i1_R3?MTiAm_j50Hd3!qYJnJV#Zae^D7*OD{^ zuAn%S!a};RyD+Wpt3rij&w>l$OUKnpYu$Gr@W}s`gwq9tc0E3_rK5}Qr~vePB7Eq* z#%s>n`d4(@ez0VDkshpE{IOL2oXaR3H7!dQAZMB5S{;K~wqO3fkYeNQT+-d%M=SJ1 zGdo*YxAX2Z9~1z-OmT^W*L2xw2S5r%Rv}(i!L>GG+>A$(vM&?=1S$?M= z^Z~{Br<^_NnV@jawR^9&Ms=n!7|+avU=}o_IrniZzm3#IbGBIOaO}8N%f3xs+;nlgw+ne=AnxNb)DW5T>|=4e z5w*&xz?Vi6V2i-NidUT*)!fMvXp?*^I{F=^nC?$*&u+hTw&E>~2~XZJT;xug#E$iq z1Q=0wcs&TW9n^92`i!xTMv-3`ontdS7j}188o_%M)=L97$f%f>Yyw?t4-&=$N;Gk^ zowXl4V_?W$CYWl253auDF>u-DT)MgcGz%58$$oaOOWnv)b2o3;OzAK?=jYVS4jeHj z^DMg|iMvLDQw^C9lbQWKonmrPn{q=HxB^S8&;{^>^H#h>~9K_-6rApu%JUNzgK-ey!GST zJ&h?Y^~wJ;=uj_U``GQwG-J;hQ*Z!FE`h|?q!*!Qd*I3347AT3iHY8LLz~;&Tp(jU zKRrgCfQWwUn}cr4Z9o^=K9ghslw0#S@e~Txx1eIQrxkN2>%4mSDjM@$MzN0e`pGhq z+aNySv-d&VPvKAFTnkAJx!8s2@%FLEvleI!q}=JlB2Pf+5RQ25ZnqM^uP`z#vFT0ZLid${vs^i-VvFk^ao%`kmWP{?Lll$ zapoBDY`$K{JEB$SR(6%>=?8(%7SRQ8vg^NY12Z1v! z{o3$tH_U*9+b7jj{Q3(TPCxMB1wZYx1B z-|o#io~DmVS^RuWnA~>u;7)v+8+kw_)F5sCNH8t8uyHFph-XqTm>YUx)h?QTV$o2Oql{gE^tLatjLUR(#A2a)`Q zP!GQ%rcT(T2(Rg$T-(Dz#_?dP;dOu(sf-wJlD~M<4IXF5=Z=gzuHfKsS|8knGJ|l7 ziEMWGVHT*HkxQ?mWkvIP0f>;=2sjF(0WG7)TqS@k2yV~kGU7xT^|tuM^a{mB3G+Jv zqv4&^1G(E4zgP^~8o|^#BAy)oA($C+ev^wzndj{Fclif6al0;l1*;(i$k=sbI65vd zc5Hc^dOjQd?Q`sWAUANWTG3$zp?%jDl=!M5sl%*FCH*UJZRy({<{0p|?!Hw^MOcD;OR_dtZd& zE|+L6;TIO9YvqPlmr2bX?#TxpRhpGgccw0o6Uf7IJaT01jWXVHB#B?ueb{~X?qt1- z!~>Z;ZoH6i?(5A6howGfH2;tgzF02T==w@>a=ojcF6PLt**p1{PME(a|0iCt-TSlf zXTQW=w90>EVfT85iRvv)I-DvXP4g>}`Rn6TBXU&D|KVn)0N2e};Ix>qVBGrENM3Wf z%C!lbF?#>T@4s!gR(c?z8}oQw^%N3b>a5tDIrjv-6+6Gl>qY*^z z6q|qiYNQ{J6*W_w3N7ICewc?g!nqhtjt!v3RBIk+;F3&ZK<>*aZt?{BT1c2HIP_ScCN5(pu>zBf<^1_h;B}w`Om9bPIsQ z$5%Dv{I>ABZy|H*4PiR^$7VB!J*-)DQM-1Hh+s{?$_K!+qO0W^3otkN&L)9i7 zorjp}hZUwGOO7Pe+{U$)Uog4c{Dh1p_rA4tdDx%4 zyhv3Pv>eF`Tm3Z0U-Of^@iVX6(Ko0oO3=x8ZJ4d5gH`zPs34ZocTxCZCd>2RJ9k|W zV94j^;9`y5Yt*?;(Q8T+;x>({Rlb(J+wcYQ<|5JL%Bw%$7t z+{3ZWmV7KTV2ENi4n>U?FB+HgxhDS`4_z)OpFf4$I@V;9U0S_hRZ)~8;!fWGv;hh(9$6dj@by=3n8KF_H4)xBVFX zbu=SO7fl=!i<%|L&RQ*%?qg7=KA)lM^3nXnzYVbS=-LLcW>Y(qG>x}b-YuCrnVbz|Wo*vHm2Y-wGmXGqkZtAYW1c9xE7ZT{jpO}cCUQdVYA70; zeM~z3`fI2ZZi=_jvysr@}7LgqNkhiGaVs{_;3LTHRv<5P>Yo=75 zt%$?+QFb_T=AFH4p&JF;!_0N^X@PRiRK(?@f1~kLTd8>lx@w3k?2DJCpr-6|HZ8{p z%B&@AJO4FH^bwi0oqgK^rW#r_pPVBIEkt0uj{UBCRfCG~j^Q>fCG53nUdOgKDfco; zmroIi6RWeWjRqclFBtcFEcB0&H4KJa7D*aJ#v^R&mzt+dBkcbHFrubcM0~ajth=1> zEtd}b$m7z$e9b7w3(SRIDT(*Rv_soBt=@U1#=s7L{$1Ud{M%dO0nsNW*-+ZHW$wya zY-*zEJ)MgD$3LmDlSuYlsxPNj6l!y19$%3_y4J4;!Zj3WA0Ml9l0e*T_q`-DP4V$@ zZ3sc;wvPS}6t+a&d@MeY@w6FfqL_$tpgH6GebZA4>hs22Ul&Yn{*5H;sx)dOd0Ry5 z+uw7v&oL)vZh~Z<(5LB~b@#b}iFnQXYAxb4X@>psjKob~L=#xZ=#=n=!Y&Ri3M z_E&7XqpJ6`b=l#l?%OnRnIOd?T0lj(R)qHsFc&GL6{eJD3ZAN^gU$~72}16NB3w^OTW(^WCsc*K3 z%2Y18Y6T)~P+q~zIWDO4TvTwwq;SU9^3d(xd%Zf-cbyQ_cFOD0ShvNAVWC1ho}-uJ z@8m_7B1%_}aU+xl#A!BMCDh!7)UOd4>Olj)+NX(2Vpb{UnG#GQ1Yr4pM&i9@m6WA+ z#^Q7QC)!LxzAglQct4G`QPImH$EscZ6)Ei#Dv1(MFOvy? zRdFiy5-uy<8ZFU@zDhJW5xZOkvfcN@It4CPVvCM{L8%fHp?E-jK)j=HI$+^$+@NTZ z*;;kC<+p`_iBY?ppUog-s!@QD3txA*UqU8po*1ugjrYX!vy0Zl>|Y4US;dh!J(b^7 zgG+CGDvNW46RN%gV}5-O`Yx9(8JP*sM7r)?pj>rOnS$7|Ock-j3bEEGI|umfQsCZ` zz+Lp&RhU+LsIJ_(=_%30n8;2!x?8`{!f}}EU7;vASo93q>-H_=#m#_{m*%oi--Qbr zsKbHSL#MjPGx0nSVsDX-EJ#4!tow$W$l5cUEIoV!_YF`-;*6?im5^Sni^7jsZ{{bi z#8TYLu`x0L`K0Sm3fH9psE!bfO4?4=5IqbzZYE%E{!i5!hs73xg~=;nX}Dt*FoCAq z=+^1o)Dm#K9Psg#M!ePWbu0=!zd!FNXjmr$SPr2|PR#7{)#QwOVHmRG!}Pu?7g3Nh z*HTttJyU=CtIM?Jc8b0`HK;cNemDN$qig{PK@3SE4ZMZeNY;=Upi4E|T;mly-516Y$05a(3=I#nyy zU@*-waC$n|z;5+inLum;GjrVDK0Q5!&*gDSRv(;b5Vrr^ciBRC#w!dSw@9(6Mw(&| z8eYF@_8W#HYSt1|#2K&$V_I#G2RY=bu%)y!bqPckX85V!B`h@$29a5*>}|R+4X+&P zw=i#h`cfUPlw2EmEBhq+qBTRdvghgJm+&)f@-B|(R@0%hO~=WfV*$Gz%So1142H7b zt49GvN7UeA&Y;t`vf#|7<=z$eopfnKCg4;kN1@;>b1nVFpBozNtwWzC1$8{I(}K_3 z)gycqL@=}|6SkMLaM-T-K|#z&9JhV(_vvGePp0cPHYud9DKmsXCARoZgf}mQGNmYT zIXOVsFc}CQ@;`E97qYC-tdHH>^NKtR5Fq>}Ip124KNkATd<0w$wF|0V`Gu|i}k z)980Ab0YSGf`Fzw(_+d<(fiVWL1B0YP(FNQzUxI z@!c@5-yC8%y1;EUE5v#gEGoQvv|+CFhu4?Sy$!^m_U^};^Qi$Vcl{X@Q+zGj27T@) z{^2)9OQ2p2GG`x{#G04oCpB(p=&Q4==CKi^5_lryt#^j!@dB(zTcM_=#hEM8Srf2%gb-->CX*1>wAD3ZMC*y2c~rc{l}t|DF`N5M}-o{DR)f8w)_cxCRBT z-#gEjnlN=h&95`-`0ZbT4qFd%O4b~@H^z88DGi;w!M_yuq<&OCJllyMG4Xtd0g6pz zHNlD=pE*4=bbBz@-nzZFE<Q|2}54@R6PE? z3?FjYD_~0`56TlOci!^I#&U=s7@oZ!`O(dZ(axOZ!dex|v1$1cxMbkTfO9ezxXgUu z1!3=C1u9X|C!he%KyLkx@6Mw{zV{}64L|k{|E5VTR9sC+U31;;ta{1KBv!YdhiOnH zRe3Z_N|Zm8tOQ6b zDXkC%5a+r1!hjSw4x2DEAqP6fNm;wNTItEtXXWnz8FBGD-rhm|tg9Oun;L(ABUZG~ zbJbQU&zfu_IoX}Q57sCB7*smWu_r@gHMg295Am}`HBQNT%jTefw5ve-#vlr*Ze3VueJWa z_X<0x6dp+>7pyd0k+0t=zL{!%ftLJ<*K$FktFgy~Y+U&lnLrV+Sre`Fcb-rM;dK!< zVa0ck|KgmfvbD;j3hlN}cZV?f(<{^!%KTw{tSv^obj-U;n>#u0NHn;cx;sy6?88}` zu`u6`MlqjxN;*uXil<4qUpc3>;`X?-1N?2o%hkHxz(s$MxCkE1Jq(|{@4AK+9O!~5 zzPMhlLfc22$Ak35=4p55Nc@Bm2NJloUvAV(LP+p;b2A>`t=sY)=~3b%cWWSgcmIyM z=~hCz$y35l)ED?t>Itcrf7`?6+HTJSKI(+5^%FTlOzxm~wrUv-HW<5F_p)-%!qj_; zJbpKuIMWzda^hd&){1f0Pe@P{R6!4+jmvv0-8j*jYhs4}Yh z*zZ?FUHl||gu(kE{q^gsxOA4RAA+tiPr`a<&vZpd% zg*tWIo!R-2wkp-tF&_a*un;-zP1RT$Ri+~iQ-ZNLYjV&W*YmD$H(FS)ZSb7kvnRzkpjqru29P$8zJ2W*HyPw{ zJR2{u%NRgQTp6nuD7p^&UHp=$=8Gk0HEV1)Bx*5%RgwZausw=!O|1#|@!Ri5qPPOT z&6thNxAs?R*kIXlt*QcA%oMezG3g_@!oq2Umxa}VzGzEJ8YT!k0u%K3*-G&&T)1D? z0bgVUgc?OZb(<*UTwm8nA>nKmk3dn{+9yc$<9+O>WKzvMj{JfIVaq(xh}C$)$|*YQ zUGlKiuVZ+2Tzz><-MF*s_{e*%yepQn3<$<%D5Oi;z?80)mW%;(ns1ko_^VD#9HLx9AK!Kh`gC_Y@IN`&GnJ^I` zvFEsBVDjYr3EunqWe`m>dQpcBjvN1g=(H43k#537*CLNir>k+ey&!Cp@$*{M8j;4E zXgyT+qioZ$d+u&TwvD_u$8}ODyv^;nULKZ2+$aWJHnUo7Jl1$EqJ*mD4_| zY#DHo>5I~k#PAnK$#Bznt~+zbt~I;EbHp71!KTb}N2*V%*yrgKWDdQDbofUD;l!-k zCGoGf<=TMWnuE%;n6g`KY$Z=s(1SI@=cR=mvPI_6%8{dQP|n zy}2jk54YJ7N>?Thb3U}^EP&VU{@XGOy$D_gufx}IlD$gRhG!nvhH>;J?;amgjS{tQ z6_3nwWZZZ#K%AX7j2SD%hyrr4_|L9L1ZFL)l-Z+cGHK(t??niWz>)af{t-F*(JH^a z9TI*zb(_e_K)606r13IVkNN3XrjX29JH9jqlEGaMYnK(U%)!i}GtPDvm{3J)w{JHAu}|?V z0e5kdw|{wC!@5G{B>sLh`p5o)DtgWS^98<>?;~)l19#n_8fn)G&E~Uy zj7gjClvT0Ws4uc#%UJ;4Dop~a5OhWMbQt(PAwPLNhYP!;}*~^o`L|J6g^- zPV2W?a7&^X0`;;H%(2cy+$u{_wCCB6)%$CYoqoeoSA?_gw|3i;-0EZG8^3`wg!Yj7V7+u1xyK)r zE_tR__4;S=TcfXLB{jvEckGIEin$q8Uw&yrZ$!dpqBN<>Z1zpvETUO8y6hw<1%0sL zH!#yRq@r@)@Mv{#)<7K}9Ed{~NhxGP1_$_*fo<$ViB_F^;)caHEojzDsUOX{*ysu6 zV}QFn8$~g`Jk;_qFB;l*1HmF7wyI;xC%8rP<;oop8Y{Xn>8?cHvsJWREJU%_d3U0L7=a{(-1Ec5Bm1{E=y z=3`;Zlr)uoY=7B=1OuDIu|;rHy2ywW1)X45B$52`7kf9)>^d%I+^&!_N^E^o=9hQ4 zE^tP=D`HW*5BjC_VsRUHCA6A@P-L_r-hC)J^;+Lvx9D8#akK_DwV5;`^FD(rRh z32evlRlw+52!}{f+gZ2%&b{zdPu$wBBjwuwI{0zo&{tzy2(;B;^or9y)*hFxgq;FN z)Gd_u%#7KMCI%_I5H+g*M%d#O(>BgTJb>v;i-F2zmz%6|LEigC{ zTWc0ZFvns?K?Ki((bs+OS{=(IGWszG=(@_}6#YBs>iYI|8{&H=4_#Ry5rAQ`eJiPw zd}0(Wcwdjc*t?}F6^%Z4CWsnzW*TN>dALlZ*XRxN$^!Tto&lx3u*F%m8m%-6of3}* zUaG|W^Ml0V)}&Lp=MOFISK`0nqF+VXz!z?MVFUUm^@iOhu_kW1$A=9V{oFqEr8v?$ zi~8qhICS*N>++Bh%rYqN0%aEoJhbf^${X7P=53M0p zpU~v2=|!P@9_y4^PI3PIylXgkSWvi4RlGI3J#aTx0Dq<8*Ly*v_C7VJ1$r;2Q^{cu zM8fZqP~PpMgCjYAy^XU_mN>}2H#ov5NC@dE?w5S;H|>}>gbB8g-FWdRzvuT8>boaF zBVF6Jk>SML8z}Om=z0Z3APb4?=UV>9lhc|4HjZw_@^5v)H{B}|6Zyk*Kv7C@hf3R& zDpPc8NOpQnlWcs?k3cU?z&YxibOEUCpK6&9ZQmZEYJhHPDPcO(jN^WNCZ{k!@?Xwu zg&Jg!nS{Rc_xZ{}3t3L<)$4hZw_*qk*~ovP*Nwk&qfB7#F3^T{?n9u6+H4PD8nE*1 zA@HxlsSyyvG2p35GGHx*UGFh@)=2@RG6Dt~W~{9_GTxs-PF@^sTaSnRBE5Q^c4jNh zmi8?83iRW_XD>C2s!4)l^!;mSY(!uNiVd41 zMcH<%NJk)_y2~>cz#7|XY-nsE_dlw4$kUfqI_)!a4DTgHTb4zNkL?@LufiPEtV$h=LAHDIB@h!tJXF>Vb+R^75D z?h#*)wM%%*bZ`F0af>_%5boDtHN1IFOIfAgX*k-4yeVl#$+SKlttDZ?@_MnzPdzkp z66g&61^k%b0s~aFkBH)LZb1Y;ja~06(W@VF>Eny%YTX7OOSfA`BJp$l*C&>;I9uiD zr0Bfn? z%PprCGGrvx7!A3&zV#d~K2u3=q6Emq>FuTPhc9`cLzs3gd60l}{^7D`1N}GLdr}AM zRpC8TG0d=T^u`qkBtJ1p`%44)o^*|$u))*v!IkQyy4uuylaNE5MQGcfobAsB;LulD zFoSz47Zw#VaVNsrO<4|Z{>p%EMDeguJ)?(5-{jw;4p_L!`fVcOA7nPPboc^~snGc9 zRXCT~++k&+bHCZJGib8Qk{&Co@6Y*5et&pkw&c*b)gI??I5?k8B!*bL>!-Wm3UKpC z)j3XYbmc7LB}0xeSS7tn_i?J5<{TG>Tm;ra%@8!snn+wncPyv~uzzW(5=n<7j_Ykl z3ek3S&9Hz760eHPEUvEI-z-@u*COX+tu`+^oAHh=+l_vR_%jwBMDSnc4h3zT%vu!A zaxLtGPn`5fZ7_uXi#~*J@@#HZ#mAh42F`z%2Kf%Nw*4*pdd8{O-RqpF{hmF9>@wX> z4zNAdjVRN2PM!97TAC0~kKSTn>T_lS$rcZt?&g3Xelcq!?=Qs4@$}=bkbL`&WSvO0 z!8yj+~Mp@Z2UNpOiWPsoawI!xw-@!O@&~3vAl6hVCcd1op}{d@uaJ8mT3VV)QK7 zYB6Y^zr0y~sbah&)dhariJG$V)<;X<;(9*~m{Phd59qxO!go7KZ+ z(x?Tk9naU;(}b;t!7l3vbv#Kls0XdbXgud+)C?YgG}j=9Y3I`0%y4E;-^eZ*y2dWo z**66yIJ<<3l{0GgiB3941BS~;{g6}HJgGLu;kGk7gVrS1H|ZN|<-@;GV37|5N1Hj> zfpq7sFxo4jL%M(>Gg+U5(GqJ~4~%j2!FA1M!$LU{-Ee2qbko&-OTaxD7T~=~x|}<7 z$IridP)1EIGHEXF3{+1}PZ{jjU!@D`b2uV%8`ut?OS448l5^;y=_QqYs$N4aH5SA1 zs7#KQrXq@`q$#OVsIYa)HhgeaW7rUu?FHucd5Rpq!V;52?1GM?#%tCYmW=d{+!qYY z<6D>>fvw;c>2j>XyyHNjx_9u+mk*Jae={Y-+V65xs&w-e{E`dpCp|0K4$xc(wB;02 z@QD65UI^mpb9*2DhA&R7cm}A+Dekf$?F0dvedOP~|L-rdD2zW53jIG6DXH&b=uZ1i zjHgiHn-DbjCwq?uKr}e;*&)p<`?}r5y)(l7#;}z9_kt90Q=nSx#$Z|E=`FtlGsPj` z65Zh(lcUP7jpzxY53;iKHT#7tr*AFoJJa7Cjqy~dv+NbTGRPJ(>m}AT8$P4<+dynU z3F`D}I=tQ``9Ez-5Oo0}yN&>x5Pfu;dPf#;wyyMo2b;o%kv%K>os!Nn#*oh}@>JlU zfadh=VZLL|MTDRjhGWbAM9Y&+6Os~S!&Ah)lTnX)s4xcVSQ-%C+)p3IK z1o=~OaaMJSFn`CZvwNyNTumO#|Fv8gi6G(i+j-Fj`z3Z<&SQg%0#5rqUWT{cyf@r7 z2&Qqmw|%LQmy8U@STCQAs9xS4!ntHZ{>w@ExcS*pv3lEOvFq_T7#2SF4g9h5m52i} zhz7I@DEK^8Y#spnXot@1YaTxH3)tM>V+Wo=FcLcwuyHEXWrZ%Ca!EP(oY&$ zigtf-XP-HV+*&y~7$rV*?Mi4gC~)^2)LO9T^8HVTp=K2$*Z^-;-n+TJe$=G$)!OBs zBMOM9?vHfgDgT+-d~n%_dvx*%$a?rcQI%Xw((#fsd~y1E4G?`a`Fa=yB-~GE&QDDK zhPYk;9|qm)eUzk3djF^5JsIvr`Pu!7+aq=B?FepX{!F5OBfDCg$?T&2^R1Do5HtVv?8T2LW7tl zE@~vo+$Fo7X|+k^)|o;TVw=c{e!4Oo&nr^XKO;50k^B~dCEz6reRCDURCHm7yu@VfBOoz=j9Bf6j&rW-#4CS^ zvt9cz=bZ>5`1LG93z9~(q;VFjzWqUFF)NF(%F&!Xca~P-a|r5(stZ(}x57H~?7tO5 zYY~GLjCxJMoQLU0R93koU1mA^e=q3!|8{{_KbnZcnmdYj)LgpnX64Uji>~Tuj{P#b zVw|_CMmP<%JpIu15~R`H9Mk=58`v}RgG#YEFp?XVW7GH%Z{U;e+hYpncSGjiy$HA! zX1=!XYT338m2bB+Vr2Abe&DT~|Lt(0o~&bT>S{I|6AgfYAy$J?WHXc>wul_&tJCWxmmR4);MacCo5<%D%gL!aYC4Go~2U z$J+Ed&%fxCZ(r#1Swn~M1H?8_(NS4RcGQ~93*Tl1v#2eF7{!`t#2O7qpIr)>13Z7o zH9*}r`qKH+v|teCrk47Ai3;7CCX{&&aQw)^c0{qBj?6*>H9_*U+067@E zu6|$f`2~7n>w|^ka8>yt<$v#5q}}M?A9K_w;-I4vhFIem@JNW0I=MxV`9tHs3(U&m zb{lRyZdo*Mc1hukyo%uH*pi$HeCnM^v&?3>b;G|T)ce``Wor?1pb;|G_T2)7-cgqM zYS~<)u_juToQG6Q{TTc2Y+Wc_%(q!?8y)*Vxfg~0R*pc4b1;~B{{2%Y;rnH}!O=6` z<6`?e{z@1wMGMn++|L>O_Ju$>e^NY)^3(4akr)f4sU~&OP|q(&&IK9}Q4`8(Q!ay9 zV~8eCW+Hv|khX40V<1I&KH<}jLdN3u22UamEaI zrPIYvXQOgrRn(~_iIDC#*^Pq<@$ve=7x3GsO*PN;^R3gVZX}eTsxSvj+tliNSy|IQ z(GBtkcx?eK9`EWQ*fraM3NBYA$S9_|^g%RLk z1pYLjJmH0-?GJvoPor0#L26$)_2lY1elEPW{5@6E8vQJU>H7%AIUBg%cfLT;eyf|1 zTjYmIo4J#n+Rq)*!HVUw5fRpl0YV-q!-Ye~@iBMt#&X%v+2Kb1M<#!XZb~!DM%w=X zK*#<}d%2qRXN)gtz1)LukTbw{|314+aNU}&v#t!>OVP^J!A9e+s+TnkDhB7W=zy(& zf6f`b2-i1s>r);zDqrX_)$q`_Ho88}@inspd~NKU;6yDE-S`*Pz+H&faFtEuc+0oV zVvkGnx}2l5CGq%z;@@xor!Vv&c3FNa*kk^KIK!XT>gr#7ApuZ^ouXE|L)V;LCKID4 z{4}WJ*G5CL@(5|xpKI!K8_b=^Ub}y)>x+`dh^nUsc`18oUf&)w` z_IzYJEWRIDJc>I%mg5S!NMlD=_bbnG`us%=6E@A$vCz&!|B#9V79dp@?+ysti}|TA z&6bgthkXq{BH>Q!3GsROxmNe$&FwXz{jHSE9jJ=9f>=M&nSy+geX?}S_oHmRDLy_$ zJEQPcU%-x;gE|vfcT>g5K;TiP#vdS7{gP5Exq~YEbamqr#n!n0lQAODigudFI-@ms z|IU8m%_EpOU|0LUPSUv}Om0&0@Moj?RsWLwbdxRT55FFHUzezROUN0yY*aea$scEut zGgS{v*Q%ZE*9m_duA`PO`kMB?t*(&ONa_Ns%{+gP&GWt`BMgwsey7YM6QgsPf67nI z_x}4;a)OTRD+@P{q`xQP1_@tCh{>C;&0bOeNaLo@e6_{|?m={;ur?%J9-I>2_~VIe zMr{`_+ygkR7Tery3v6pTwJ;dhm(*3NND`L*pB8`^pKjBv9zH`A`~0VG#7twT&GBS@ zg*T)?&6KeqIOUXN2Qih+Ct|Z)o7b3%+3@7kQfS&kC#`QP@Q(kVDILey2I~d6yA1J! zwQ~aBHi&!B18Z>{@F%y7^vNes=hvK4t&tFy7vkYib^~}qTQyk1x#YM^Jdr8HQ?5~# zf0WQ;ub;DjiTk$g0s+h7F{6F?AJwWX#9$^RiYV-Pl^)2U*|E9kQoX?f=Q%PZ!!c1f z#j>c16)z&@Wz2D0pn+UIL`f+*A;IaFRuLM%(^`i5yTYOr4g*Ru4z)K+)yBLjnCZPi zOPfw}^Bvx zM;QW>93vypWK7XR_A`;Lfp9v1OIj@C{hDNNYbU3B8f|QPhg$i5)!Lg|>D)-8pgT-Z zQm=4^W$y>t1><~YTWKYD9Dwg&T#KKMFGO+LlPui@{F3| z+pn08PQgFDgGJ;e_Cq_W7}bg3{1a1e&G&x(*VAWUL6IxdPY0AH((*Pf>)1?k~hZF$_h{p(7N+Tew5XN4viZ(d=j{p|^Ril+PJ zjHIvx()bKs%-nsCe~{C-<%M;C)zZK)z9fhWL8%OT@_B09A$`tNt&^G^N_cK}Y8i6# zjxwR4=5?=77>dxm%kN=evB{#TsJ$kEYB(Ib;F^eftEvtJhtsLv_04mkNWB$aCZ)PQ z!oY;o=Yun3QOP5*f;bKhtV%qyuQX4RiC0Ns%`#d=*3Z-hm)_&d^AE+N3((h&wA!K^ zqvQ)nC2be0BQMjNvA(tBlw^C@@H(suj@pVi_YGL}4LqMgy)N&b64qBkTLxJ`IbPGZ5HVkPp?T-z|&Pp1Wep{cED=% zm*B9{x{}|F;bVBbhjat6@ggGr4S=x^CzW`fuRY}oo!gdx&M`^2gXaz#Y?lq7Q6bKA z9GhpEX_yjaLzjaBH+~2fBHcNdTg@9ZRzLTKt}`jK z{w1@LWHmiVJ)Eu4VMEsqRTlk6qQ)wRe0b#4ujOhx%8w2m77All$^Q7bX3G5j4%vfA z^t(rJZ4)7z)8s)wFJ z%TvE};@+}F4lf9Po;n){zp2dc&T>!LOKuK*^S#L@`95u!Y*tZD%y0dxl}C8HRjmDyl@u&&5usG5-Fss)A;y-i#6-Ox`0sbc8Q z5jP6Ha2G7Sx#fiX3|q3}sT@h%o7*$!^xGl>ADLsly_3z>`o|DW^qYbTvC_c2_twGu z|J{B~7;(s;!i00*f($o`L}PqJ8suP0?tYHjt8-6b+(;XfV4xxnf=jb4%6KB@u$To$ zL@^&EAH2%O@t-@v&$9EDO{`vB-5$l#jJ<}O^I?%wGHlsdPgWBK)0y;?7Wp;K|9UiaMw zKGK_AfZv=Yz-8<}-cTpNt7aHQc$o?6&?5)5J|@o0*zKuk7VXwyLO$+YnYVjM(`bGb zqg7e&ZaAk6Lr_qjKDnci`3Wmm!$EqJuRqG>BkOHnLA#DSd#+1%7@mnwZ)E|I=f^m( zSlk?06&;eMwgBV#leQp_>XvN5)>pr4b0WK61qnbaupG8)Hi-kX`H6N3&t`Kop*g3J z%?+Nzs@UQk;jIwW9isDwM;aETOp1|eLlDCb4HPW!&!ik(ft*sDEMQ1k?%=9L6KT{Z z;xv;Ydx_Tbl$FET>rVKuff~o1)6dDP(MHJ&IVGG(IM8f88Qk66AG3;$GeHPFMs)Xw>Hwr3eI+PepJ7be#^TOt)rzu!uaNQf~Oas~;n-Ax&$ z7@vX;Q&9Ffn$^-hY>pe0$jaF=Au@$M-QCVR``M?aoTFb+&nQj($z=uUgdDGP{&U{| zt2JZg@hij={8hQ!)J5tJ&LSttre_vxth}qtlM7d zBo|XyE1-1gFxw+g#O9f24sei6czbGxLx?o!~~^O)m{R4X9LG z{T^KUhL^1n9SU|cr0@9uuH9G7&Tg5%q8g;7K5H2(+2;( zS#yNr(g}nVdMqC`T9G5~9``MUQAB{NcPs9u;&Zp%TYb_0trRh2q*?xxD>s9OMxXyJ z3*d)yYvid(kCN%@Tob*VND~WJ7CpwI1ek_utad7BqWF;bO6i}{t_AE0cLt+Tgq(n{ zK-k*m4+MM>#7W~mulkF#)$6$*LyHG{x~TW3)mHumq{u}_Qno*IrKO@6S(1Ye1Y!es zKljA*_Y6H@L+m$F*mYH>48d&B}c#2ZGf4LU@D%v&@KDjVGyyF>KrncPEjj9Ko3$ zyKzsYIv?GwXG|+e?0BV+r=}dSlzdSy$XGtE(*@iIDU07~=(Y!7?iKyA{sHFUsBYO& z@S*Y!c;NDvzmZdAc^Gif>q0o5KC*?yaIY;GIK$QN?$0*TPw7Mq%Bc*V$3`ijTsi*3 z*-z!5L@%3%I${#cI!Y?}K5IF zpQt?0$<4~wlc@D7*aDmFV=xaOPI^h4Pv$yu>{BHibL#6=#H2%~h?q}5pqMZsI;Ez> zECEk|4tcfId&nV)P*o0ljvda=BI}4KU?5}pyXy2gK-&=L;B7$-7c@?6 z*Y%HdBv!9V;ko3u*LQ?qUTV|jS190l?0IVk|8)kFw6s7t169SDxtC6_+6roY$@f>&`bu#e)L~(XdB9JnQ4s&*J@%_?`qZPeVU4IZj zATHntC+7;@-8h(m?0Lu_+uy2i;G_wyPtZs9nx)PWsKgbKu`MwX8B3JMbu^p`>}Ax2 z2>a5joOILL$+?#&6K6>cRTW0lQO3arBNSe-Vdan&P(T*bJ))GBkZj6T%6EpkYmGlg z?>#Yx+-iapw@V^eccbqrae|z!sSL>qq$rb%CeMV3?7TV<4__ZGQ;#)BTXTU&%fDHZ z2l-yd)@9z))IJ)0u0iHCj$GXjW4?Sm9G>G=agrs3$)LXg^jIpabzevy(rIg6itgo! zU8P90AZ+&^8wl%0IdCeMkjBS22<3=1KBsk;wyQ|)(5!6@T#xcIt(ZjyqZ`re0>N@k zRs~1E{2yq!gUe~x?fyrf^Ihxw3`k!Ji_V)}L6xEMW5$3D<67dO8$8fbff9LnxI=L*lmZ2cm10GUyL)gbT3V!7p}4z436$dQ78)Qx zaQB4dz5CyLpXYhbc+ZD3#`(e+$w;zR=9+Vnd)?RkT_9Mj{`qdP;P%e)W#R3BC*s^+ z+xGa|$C%B$uXvpwe(Q5QyY(j7VqrXtM*AY&$KY?yz&adMyVPKux6cx0uz20`AWEi8U0UqRo{b94lU(V=Q^DowWB8J&4$) z`U#Or?8^IF(fgMcR;wV_V|7}z7{blfs|>A{U-j8CR?_|G0x}sShR!rvqb*4OY6Zk()^Ec+ zj``EhnL^hf*gWK(Y}1@Z6%n&Wz5ro6gS z1@NY$JkLbA^Ie{D@)QGD<^_}mKF?nrQ+%QrK}W+3$Rt{j!{yJYi+zwNEif?$K;_!Z_+br(E)1Ct(%m7SWG=rD5!6L_RLPB zdP5|l+wqk2gnWzGYiNpB2K}Z`C6kP{h5-L@bvp_VP*6HkY?yEubLf=t2}_jFNZri{ zdU(Xnx=g{FNtWb!y}F z9|%+$>;j2Xcuf+XKi)R3mB+1pl+^$%)X2X^Gn_)}etW=e*pMq17Pb1}HxiT9M633F z3G>!CVgFP!G=Nj&`y0!gZ2TOx%op6-;DlDKLRE^yUVw8Fo!}^LYtnF@9R7^#OjaXc zS$4Ltu$bkU?>BtE^<_LD|D?G8oeeMkrkyR}KWTT}6jG+Ds-gP&l#lHEcQc8Ov4Nj~ z{Ff--T-Hv<+})|E>HA+KN3f+%9IGA2 zFoO0nfhA7;3T9(h1JAY_WJW5O5mRY@tf2PVg%mwGqPZ-^K~-${*nMeI_qc}knPXIC z$}?ZAe-F@b-VpvQOzM-V`uq`G>pPyNlrP@ei!zhqhd-e|YH`kze`1A1MnSU_z{L8I z%;`5S@7CmTOU$yTJgy2Gw{!6b66)>OVKOqjx&6>=`QekKXVOm~nW?Y33T=Dp&i7&UT;g+GNg2e{;(?2Kt|hDpmuYY0;?!xBg0Gp&#&{)y)2eb^+QcIJ-EtP8DP z3oKA?w`{Tp9Q(*^?U`WBqwfb`93;frFFu!yc;VR{H8*z}Rk6M|&aisuHWCT@LSmOz zbafI^@r9smm*1?61ykoSstY;v^QC`GR<1i(q4F_y;v`Bp?*&|^xa?63 z%?G2XB-Q2kf{=U)U0N=!Qw~Q8?~@QiCpxC< zXE?@x%pQ^`cF~#k<+FvB77JC&Cqi4AVJ|6d$@yLsPt+)LKARYQbGa->ZI4_NgIQ0@ zcDnoDu9Tj4U%CG-;wX!e6$hD;`@Z}5T34~_YR7PQs(WIMDXhP$03nf(zvE6}iuV#5 z)L5QcAZVah#Hn5iXc{qPq~+6mzTwBLUS{b~sW@yDEK>~ZOJjhXSJ`5t2*=kj72E>~ zMx52=nYPxjO{bqNaEJc+g!nzbwe|w>wmeJ#;vAM}Tzt;K7{h=C^>BhNjQBf76?6Wp zEbec292F9t4^I?Ir_&zl5;2jDv+fh5-sn}ob6s$JLZh>jAnP1^T`4ynC>r(- zUWvarg2vuWO~UsFR6~Vcj=K}MKIGlqX5aOmvQ{TwKKv!Vi0pDZU;0UMB3DP^QzCqy ztovQ;t^@4Nfm73l`jBimdonV(l4%YDMPm7G(@lK*7(*UM(}p2jSB z*;w`O^@^U}Io4AAK?Cd;DJxrAz^1@Xq|8Veww7RMIQc-w0Pjmp_#r>H4W`6A{YUq+ z@z-@0YO76wmqbr?ZJTm&n-)90RxSi{sVyH|T90L@!TJ&k{~9pH)2>l581D&eH*jD2 z1mRZve(+rVSx1?={X4kkN}K=m_eQ(S+}M=*;Mpy<#g!e;AwklK>V5Zr4P`j1Sy#S% zA8|t`GWGe#OPkws8lBcDrI@5ZY!dE{3>2QHbKkJJHk9=WL)D$Gt~ME{#--&lLJ`e_@S*{>iR` zz6C~KVh*!}XDoB{QjHvTS0_QbQ+sX3TMEQWQa`q9mqbg*dBu`Jh#(bPQeV{V3S&p>q5B zq0{eZR`PD)H5YG&ZzjIZh5$IJozlt+$1|h zyM&!>?NNtPLoRLwca*X-xIAi{#zOn;vK`Y2Mn-rp_R;fYT)EgO*Uv+Xx@=!e@8PSY zRYIP1*B4!=XcOYR68thVtn2g$EA1Ix_BnV3Q0Wn{({04R$Y|6zvNpiaGyZu$YyL^x?zN{k}izI_ZI+;D>xB+XF(;T$sieSTle5_(DyTLfV%ACF;_n(wcqsL z+C8Lnd&h=fd8%5#tL`nZCVw-}o&SMzf71?=8pwz|`aTQQlFU$iJXJn#gBM(^WJ+9% zR86lN;S}69Azdij=7fhb;6M{PyZM5w>cOz=3dN$_@aH~CS<>DNYQ12s-g}xq4GK$n zyfis=-BZuZ?0c}-XMp3hdw-q+$u&o*Zu?{BI&ck|?$@ZQ6jt%v^HlN>f<&mx4c79Qk|uk{Q?7IW(FXEHRelyWAhtnS`S z5!qKQ3ApFM1@52Zuk$7Y*Di{o?Xv6o_Kcpn;Sk2+eStfu5P3diSq zaN;5x25Y@>wyGO*7<1nP=(Gs?KcH>4S^e@>n#Odn@(|OOt(J5@`p-4Ii+kKZ@TrAT zJbG0Q?CCm&b{f41{6TsPr2{RweT6T9^S4a4^|)^=#-bPk{r9gJJCdvXd?Z%_oUq&X z^o1i^ou{KxIOoyJFwjN8*6!%YKXXIEkcsZGw$QVvxBa9kJL#F3{g19;`%$27p(txeAbufzi0i=n;moFzZoqjMTj-+*8oTLoN! zEs@ispGR)s2QBfeDV8Xlvy+OU;?mv9@k$|#Kc3x~>|X;U@U{fm+C2AS+=u;qtSa#y zL5d(M*b86oM*R5u>=vH44>=UNU%I29zph0ro-^{Jx=yU(6Ur+`1BF{pOpb5sf1*s< zJP38WNnNC;)~8f?NZOvm(F<@F1t;X4G>n(p<&!*s`|ZntxZiy`J7Ok<{0hoBLim9# zPX>~?B0uim`nRrx92fq0{ug@BlzvYj4(Ym;$j>w$C#{ClyklO72_Wn) z6Tt{quAb(w)S;W4-dZ{zd+jkax*ZIn--iq4$OQW6EfS)^LTIZsAL4-U1j_Xh=kMHS zl+L*nU-brEiQCQLClXid3j`?PR6=uWLKl7qP0EEOcB*52TMV=+N{@A82|ce5675{T zcD6oADq0h5{+Zn6&U-7w<7`+rg%LD-eTCI*hFgKD$Vz9tRXKqc%p!LFSK3G zJYpz!NYH)D#Tw#8MFIW+3ct%8D|tH>B)KyI+z~~iU@{`84$n1bA@ntj)+BHxHbD|e zK>X#v1nqW*zS;*N&YsJ-x1ykbpa4`CeMFDWCf#MTwmR(}tYb+Rmv-fxL(7ZSXeZ3= z&aFU7HnvO-uUM}T?JFSAUCKOES#W;MC;W*HzIA&><1a)0ha;nC2=Z!6hH0(4-?hPU z2#O+j)r?ksT0{C+6=VGfd4-N;>Scn!@VZ&utOUDO4Bq8h*CIrHjru@_h@L-VHrWSO zxB{Z6t-Bn1@Ff?#$a^$lxgr04a?jVB*b6WT#0u%VqqNm=%lWOl8fVaR!JhF!V@d1X zy)=fD>od~Jd$3_bV^=)It2jY6t!ofI;)MXi_KS$PTRh1n*-?u;N@1ux!u_&mv(FnS zqE9#L68#CB5);I_u^EP)$v5Xz-_U?H1Rc7&=8~xO2Jna&?C*B+*~TjNeKVQ2)p|je zz;FMiFW{D89<9zG@oBL$5V8Ljaaa=WRQq9h(h9>y^UMf+^cGFWdZx=z@z0x5dA-pm zN@Fr`uVo5ZmLQFw{+pco#2e;er!h3B?`Ryoer*^}CKM!ML&$CLDe>zXr}v{_s=}Y% zrNQ`2TbV4tGz`(9h#hpu1(XbeTImBlmR^5xb3AyLwHg^Scrs_e!~~65F+oq-HzVtGv4qlghWq%5 zivcVXnQSpE-G$@rMlMZYOY&d*m(MqkTc>&+Ev2+w zY~%S7_SEEs3u}u!-Zw?$Y0uEn1#A}V)$~MC_iP#~lBYR3fNlPICcyyDB&&*>n(l#= z$>lW=C!KRFg|O__eysKSuKy`;F>Shl0`70O*m@g5Xg9Eo)ff^e z^&6PwLchXRN%;O1XjvS6kx8(!AqPH5nN336jozWQfk)y&%sySi#yx zfsU$>l<1ODOLWNNIFM_bS1!lL)H8yv5q@F&=eNSU+VKkC3GT`k%@&L2o z6A{!RVe5rb6c$;g%nfSqst!sKV))@4e1wo{c8jV+)3$8RL!W&-0V;$A0xTlncXfD$ z|Mkfg!Xgdp!;`v-9IFe0>h28Mhw{=qA3s-4W}z!N7c|At^;At9xE362mNC|!E2rII z<87fUmnPDvYnw_ZABl20tY%O%7)-PlZ5Wi^qdMj3x=6Rqzf>&nmEHPr*}KnAA=E8B ziMpzNNnP)r#%AQ-ZyPFiu6;bJb9Q7WbNy*>N+E1ae#RiQL+W6B(HiXbi(qu!9C%9T z*OSaMbkjJHl7OR;V1(NlZ!-#KxTdP26>8$DdU(~3j&BNkiP_SRz5}EJPoWS^sB|D- z5XAV0Sz;i2grgw{;O__YRW5Bwnn96OC&td*S2>Z1k2dDOK6&PeP&#Jm24*LmdiMz1cbw|g!rM(+ zzWb#XzVnVMjZH%@1aa+Jy9POgbXxJ^BIB#|Clbu=y=5N9RTo3_k=1(lDuv{m3)FGh z^w;3BeTHKJ_c!910AsLjW{YRkY_3aP7G2o)ZH$Y8@r?Ssz`R`#8<%`wv&5(+s96zdOc_cCOwKH%JsLj~`4CddlAlD*6 zfYlWZvB0a)ko)cU`jib83s?^a+(WegcMB%2drSNdm_!?`UF;&LI3@tCjTaV|WSaZ? zP%aht$R-TjQ=`>nI?&(l0@g6OzSn$j&99h;L%oO%gRy*hTz*}{5WR=jeEK3CKUjc? zn8ED-*I?7m^yovFWr$b)@`npP+v9@lMyDGsjN%vEZHI)SuVNqeKe6GA!tWW}@0OYB ztQ>gJ0J_{?Jl94C=>XJ@zxLoIuP5_dnLawBOOl@M0C(q#P}sNAOTzZ0x|wqxqqBP2 zfRnr0Bchsdc)jHLanF-}5b;XT@_|l=kE@=itW}(4Gw%pH%3DPiiH!t{Exj=bI$?>Y zlVLp2f2j*J8%L7RPK4kyzTO;4gRb2{%k_9~0jGDHvm!N}{8wsimu>crU$L1l(S0Oz zJMqnb3J6P>{5_$8S3fKg#Gad~QDzx7kD+DAkm->qfn-V^qt}l(-W9Yp+V_lg>NJ0l zS`PYC3TTr}mRelHR$Ib&+xOk>#7udxc5%r*I68{`JJO%=PzJX~Y9L6(MbX~cUOzdX`6 zwMWsoFFWr>ieqcJ&aZmyd)(opDQ!Z4m9I`4E&>~bO8Ttd&m&Y;HYQf z`$)Is?HafV{SyBpuVV^YAHr|Nu^fsP#HH^my*tVPRYl;wY`p|-8-s4Vp9vqVoEQH-eJoHwu#UY~{fW<8 zX!1C%*-TKlR4-#px{g%1%1}W2A@y8~%Z{HPd>QwAcp(LVvTH8YtKo{Hy6kjFy;#JQ z((6EK3mTWQ4-!t$UHqIj$0d&%Yy~qzypW*XHQ`dH#`_`0-1|0&<wX z6k0T*FU@qZNBU9 zLLTqA?RJ<5b7htw6 z9s&Jjbxk)!O?{om60b}kUF?^F=~Mjbj(%vgh+_%7X+z^3K%Oz$tOU^EfhHaVKoGoSGC1_2V`xW<{_0eyxAnu z7wG3Wi5g-T4{2VW@YU?9B(urYTa6Xi&6aCRAChzGmA&R9_c_-F2i-mE;%qsnIocBt z&sDhPX~e#F4kF8<6SSY>y1fFU8TMY=!HMLn=TXn1ZY&~d(=S;yG8E9F8{{X-W)&2= z3Wf$$k~jD? z+5=!{+U{Jgy`$#OtvjET8=PQPLZj@w1_k67Ed~796Qyl8z9;+RCUr?|9!!dYTkziZ zg6PaSPDSUH_IoMuVHzAyd?^t_7D6`cNzNY;%@(|Wr$Wyz?1Gq>W4Kqn6pqrt8*m+s|iC~r2 zwQrGUhLYFd+pXk$NSO}XbKS+Km+9`R3eGD(?=?EVb80{6FUoRca*J%TLrEb}Tp7(i z*emY(7R8&q0w_wZmDf}dVLZ8I&!tL%Js)JF!`oD8KOeo)Gisc1jW$A%%yDk?GH|SI zMtgVop-PQ1=4X_HKSE~CR2Waz!NkRgsURRO&7Lt+7IA2hq%P5b$7d*mIs|!&|2*k6 z^1IF`@|X$IRfIMzQ9btmRG1`ndr5qI#p+%{FjqbLZIzWlC+x8EAuKl-WfB}^Y;7Tx zDa=Hs&@uCntmaQ=Gf4{jrweUuNDy}hWD%W@%(0S@$+1b~v66b=C+TG=WgiJ%D?BVE z`TW>xAq6~Df$QsW%&5CvN|!1203xYYx(g`u!#^BW z)q=Y;#D~solCe?1gyr109gLq&=Z$GvQ}UqzBR*J3`1@ntLy*5tsE)`-q41KHg4d(7 z$}dE4SnR(u_bVpuhP>g5Tfz7CI(Q|uuNP{8w>b^rc-KXqT+yo4qj{t$_15>Lj*(Wu zmg$FPCx+R+!-1!Xl3%|2Zm|PG84j$ZsHq5ejs}_3X|#UW0~Imq*5yNn4W*Q@uhv;X zkzAHXm;1_KlsIuaTxFtW?P-brbWyFP4QO^qccwvKJ3|K4Y4@BElFA+C7J$TMuFi<$ zmG!|T(}{S@!NacQ{;{d`F`NDwju;Q^Zf!rKD*6h9W|cb(ti`S~x*-zs9^=a3cc)}< zybac-S+mEK~tHCNJfs6M-mR|_DDFnbi(o`yTSD54| zjZ2@=|3sb+3;B)Mv=Oz_<^=32CmMEF);qV59085|>HsQ@;Su+{xJ!ry86f_r7|j$> zS~8&s0sWyds(W_C!LE=Kbjluhb|BJ#DG?0)MQY0EFSq)_DYwbcy)wHA+RW>MXR#1u zs3kpGX^x%DsTcXK`h)7()_8u#!eCZ6C38n8e%TQDN0lfhFHt~Er4$NbS^YY#E)hRh zZKClqiZs5LIA>^`-q~)Y>=9r)%oyYtjL%}NT`ArOsDsF4P|V#^A=vm+jO|BBC-%AG zaqEBA)}ow85cv8|JB;fg_(%OT5>nfH)!?GbgQaP;X2_fB&eNE4{|h_a2c5CFNt+h< z<1A@LF{{_Ad6#c1gaKeW^=zeK8UQv>m^X6gp2Zvi8-5qlj=)%ZB>8raHgEmQ6Jb58 zOuEo7!Wy4pc}f?unM)P$=ZJOUt2I!54VCUA0j7Tf%we4%#Z!H(|QJ&tXzTo$aop zW>9M<7&zgLSvkD?#+pI@tCLK1Qf=BX>(hUg)vKj;S|FOu#F)Q0eSpBRb)*YeironX z&^XF}hSxJuYB4q?S>ZUr49Y9E$^;#f|7Xn2?)n1uiLx}9&h=jdjLX9Mgp&9!$FOj1 zDHg_(GI<>%`AZHgTe41Z5;{quX&$v9t(9Gt+HybaQNudMJEQPFE#)MS`h8ZR=%Yxk zLe$gLVV=$L8f;N#senifJ%3Wtn=yiTa!(gSd0X1~e$&mpXnczpO?KvBd@Ju4^T$l3ZQQv~1^MWzQWA0G-= zX@8Lhb!0oby`_0n<41gvq4a!0#K+VFrY|vx+dt>|gdO>6=I_3xOZPO=qWOBqJMOpN zJbCv<#_x7+=Rhw3DbPLq{Si8k9~JDLJJOMOX7E5?$yIUq_ELlS#q--$i3)4#m)3(l zNqBs~vBO*Z*Z(ZZobdn5G1c1f2^N6P3YP2ITYMtRs;hZpL<~$ zt@h>r^D4>AzwfsikE&WBVGo;3Xn8MW_4e-;g-Z++FsEl!9N;CB{tQ=0FF?iVeVw3{ z))#8Mrq5UzJv>h0c~vWI)-zo-K5>KzZ&dEXhA6dU&%)KgHt*&8)_S7c(F#5!bj5g? z9)E7&ium*{7(_9e+(iKZg;%cVZTQni^!HxVy>N)$J<>5*LQnK0mV6$K_XzUni5Cej zeydaThNgD(lA>j_W-GazNRR0rdv0r8(MBYTgvE1~Pp%&$IFhTo4B7Ik{R6Yj`4wnr zQiyGhh6Y1breJy?R_pN`~(6(p*0iRJe ztwc-XUa6a(I)fWxIGiWW{iNqibW~-Gx)X(53Fjr6)h@O931?x7fWNef-a0f7hzK zHW^Z5X+82cHV%(*g*0l*+5YS6G2C`^Fe6(){-iwBuqC;aa2-WPe;_(T|;< zz#tK0rK%}=_OLxhfGa%?*&b%mC=hh8zg|WpNm{OmIm+$)-1J{F_=Y5qTPV3vzRR1` zo}(MY3-AKGaw3NYIrPGt?S*jnrB}TN{L`G!CcqJ+7Uhb z-rc4(X1`Dxx<(Ff7a%E=QEpsB6FHAm!GBz_U`aun-L?u#~vKu z_-AW-MB1j`3mKhjANn?s!|1RQp9*dG$Y;bw*rCYWAPkz1v3oQ2jV|Y-I1hC3}Jm zoBEQZIr!$k3ja+brNw_<`!^tl_UXZ9gJa*6mCcP7fqMFGJ7@ks($=2)0pCAar5wke ziRfkQ3$FYBeUT}pcOSQb+5LVJj~q%1^KU)TFEHRaYC$ zq@k%~&cgdfJC+pgy`L|vxaqo@ok=@lB&JUl7rN7x0Hs_4f9_fL`$&Ao7G&$a6`j?Js@a>y5%ZENgK{_u4Zn zn*4Jg=~jM;^4ILJON|`tfxlbXkXaK0`^>hRgNM4fSY1Za^jVi zpLx_rG$yvq(hPfp_Rm|74NaxhN4){!Yrh{5ge+f|;8dDDYfm%r-PGL=LPY=;+Y?Mc~)3YIBp+_a;&if*9c!#>#G|zc?8Ly+#a?|3t4WH`vLYpHUJ6V1L@jt zPW8eoC^RH-cmdE_+5$3ulBMcV3*$!n*`k|!TbeTa1Ard#PJt)$4nQZq1j~NOz4IMG;%f2e-XtH^L*R0N2WETN;p?UlxpV}%p}s}N zj=L5KjSEV-9tV&8YzIwWN=rD%7p4=R}u%0&6D+zc6)k2QzT#=hV-?}%3`AM-;S93 zcPP6wc56njPoo+A@cVroK!a^(&Vz^La%JzUsC6rKMsgM23sbkXawNE`FCH94F$}BL z!1gQsJ+4aXoh?^90d+~8YT|gZQLWq4gSkAcv4=bw@w>@hZ?I?(`OmBKx-Il3!guy; z24hHcs&q8%!UaV&qzWi!OU%6@?9}2z36w-Prj-^3CB2pm3?l1U>CEA0{0!xj{ee_d zbS7h88Je5T`nl)b{QIF@Nu0LlGrH$;N4P@R)yhnD{*UZd!jFnwz|neV%jQ&$QCoxg zi*HD7VpczAMIo%nt}AelT2huo@g(G*_>vfo_4gk3-B*kqpc(OREQ^*Ff{^3uU9+PH-ic87gelfx0W9;iQ0)R)kQM)?43GXO74HJcwa>zyA@sXZuY*%z;|5$ zF_nCe8dP*@KsWmHx$i$44`x5!{`%Hawrk;YR==??u8_?Vah+PpC6Xkqg`LG(N0zcE zR7z8H!)}bt5=2t`G{fxl7QOx5-@i>!G9PWtJ(4f?ZFzt1OYglkt``%X zi4DRvkCue~XI7-fdu0Oh>7m)okLr!KMv>|$B{h!0&QrVI_D+8?0~N_bUp>3Usfe8? z`x%?1gP`9hhD4*Z$v?1{cSN2%0sfKodmEpX(?iWYLKd%N{-mzfCnGv)=+?r+L|ZQr zmnmhfZnaDZR9&)<(ixq1V_yK5##igALJMc=S64=VaNx7M>6K=P93Q&9Mb`SA?ZEP> zm+!|beA)H`RtPGr?Je&lJM*io?G`sW?;Jqrr5CZUZF2T1UrCX_M*eKEs_!7-3s|?y zXt%-kYH}{hpzgtMD35kEiZDrVS${%rw}6(fuzqjiNSCHh)@b|d=BV(jVhoY`Z$l@hLl zcwyJ|5lKFSMR!iTO`;3%h*xWB>eCcmF>ufO$&(AC8j{ z!NZVLj$n32g_qaS;WXbw`q#3|KPVNgO76fd;lk)U0%;JkH`~ec?iJ$0MI84KatGT-r(P73%Mh#T znHoL`e!3nDx8#-MhIZh7h{2Xc>KHw$Q9qB)RRkA7N8 z<#8A?zbkWh{-7)ZgApxI@oKf{O+X(vRMfc~&H5%ww*7F5vikd(ugyup34@v0%Xav8 z|Lx0VvA5R#^KK4W5ZC>KtaShI(sHiP{*CB>{RU9{Cd-Me(JD}^y^ z(6B^l74+}coU9pCMY`ZN@pTO}QHKA4_8Za~w_cS7hZ`-s6+bE*CnXH$DM0uy3uh*m zd{8&|8?))-aO$+Ko^gnvQx2BCJ(XKoU{P^AS}~SVD}Gm)$N|Kk<=A zbc^J*Xx)#30D4ED9T=zk+WimYMyK70fs3Vrj*{5G6cct@KQ675X6$Dh0y7D}PX+Dy z>3>y}pCC?4rc$>0GJvHZ@sBS+)Au=5o?)_xNQ}RRd(N*vrxcl^evm2RSfC<`?>!rn z;vp8A*Sz9IrzfR+Wt0CU`e3|X|G)s&Ku>Nyq^bfOn3b~oD(b!QsMD<-G5o6CKO3Vx-4H|B%>VO-)D0W{6l6ozM85Ij8f zO|H$vIpwv|F=gdu2t084?F#rhB;$q{-j$U1sA#s#M|RS1AmH zj*V1V-5T}xk_)Elery}sgv|l_9kPWm$?mKAQb6CrP^g4ne1oMKgpdgyKMV-su=|LE zyi5xp|8O{&u_XLSBsuuwW2{5`4+07qZwct+@(3nnQ#Y+@T`G$A6yvu&aDrw#b^1O4 z*~CJSdpF)zw*sQ+{Sg2(ez^u6U(2S$_{j15p&gkrjFq07kb)ie`AG^ieWWRM0-KLF z6LbAC&>!V{-nFOgIwQXj1cMjV=ERFE{8IZ?pL5qhK%VvsRjpV5@FxgFSN8S=X+{27 zCvC8M!^vAEc+P+?EJwEnEd%PnW;Aa}q#OH|78ON)?=kp2*1{5MPBaQ}99TB-DckPJ zz?kQjAKRncw5APEY8ys>3$a`cSU3Mv2v4cIp^Jjg`Ke2*+vqE?_52rDIf%we8*Zam zcRzm;Bl-F4#b6%s1Q8N3gh-c&$uE(#?{Cwpqyvj*PIza-)(+A^4(xB1oW6Q@Y*5`0 zJUhFF<-mW-5kRCp1Kn>76Y0KMfk2Yc261AjE#ni&d0Z(AX~%cBz1MGz&}Z*yZb%dB z`AX6K8YXJCM5x_zI}l|LW>e2E@j~Bg?~m}I64acac>Zl|2CHRI{|T#v z6Lx)*X-Lfrc9u;A!2fFsQpXXd7%nT@g!|p24c6q(_L;3TdTAC?$Nul9?}d*5ueEG9 z3A|vzDl>V253!r8P?mwk9+`kHgr((^l~{9p?*BY<8Yd><7!M!(`h2<@AB}BoH1=Cv z8^_Isl;=qD;z17Xu=TG~?Ya;dvbhhGzN}+(8y-aF4+NEAKZjhFtAa z+x?c>j!xC7!oe!dT1#P^9+zb7Y`>+_NSvUi_Qt<@OLnYCGJpmy&X7lJ{p;qb_~U?i`^^Upg0Bhl1cM&BJMx zT~!t^!46Ul#Nkg~l3%Skg{rA}0J97Ov#v-e2qJnrKFmA3SW;m$*0P!S^#@5kg@43$8lw|ZD|M_#4oGT+ruMuMvg^S&p!ab)JQfs++!lm5P&<5Ol zI2{JmQhq`|@8%3ei=Cx(1DPY4y~t z=?b4INEX(w@!7o|>Wgp`$AHOB`@*wX+poQ$jv^!3I}MgQ=Gw+O0asF9`DEnbe{5Ng zM#{Mt$Of8BF@-aQ_0%1kx8BLpa6Q4ns^fmoY^l!9Ui>#x|C7M~%2b{948wOHYof^s zfcJ%b7Aq{sveR=PE+eL-z`|UlPMyEf9RS$2oX2}>l z1j(K`%}_jerwyhUCMOszetA`t~1mxM1_|Bz>ctw@&{Rk5uEGw&oJcAdoY1!^qsQNC^*9>6Bweh)Y*WL`BC zks^O^M-OQDWC@ZTX?3oLoHRJ~SlLA;QZdwde}v#u7AA?BobAb}Ds1WC;I-aqSq~&+ zHsM)2su!qi#%PBv6_%JCd=XW07rbDs3x7WLo>Okw+vQb7k+16fbS*(vwb9o^2Cv@j z7VI+ZLLx=m?S~H?fRl#R3 zYcpMb6D{uO<5*mvI6LrhCiqDutepd`1ayB@>7echyu`uu6=6Fv$`-OKtD< zGZjaW!_O{rqaPyVZbyTK0k@j%MbM8m&nxpaBvnik|4jDD$~Lu;gs^&4BeY( zJq=rs8?tmld^mPz9Lqk8x-eb%o#K;O!MBpQa-40ZI^7TDEkFl|r|s(UhRE8pmjPWC zzWqEoi9_xu>ksUv8+}BdSkTCKOlRFFnd9G($EDw2NLscP*+=2TAGq0RfrX6Sj2K#v zJi;)^4Ea*VyKhhST2dmZ@;L;>-^n@Hg*1M8)^*CRr@?bac@oT z9wupVjkBQ}5^HUpFFk{J)aWL)+VIMWAH;Qmzcjye;~7I#1N0i?5inK7BuMX;l=z&i zYu2v!=Jk-taQW>Nrai&gd?S&|<@BknS?oi4V%fX$J}Jt_EsxCLl@kF7-@Umtqp`kd zJ8cgXF8p3#cPOq_p8&5l#316kP*`3u1|pSJ^CdJ$?tsdjhU%%Kr*-YPb@I9%a|U11 zxVoFbJ9XFZXo`Y_S8>!7kE5wZvqeNWOsXobosL(#n1 z>+T7ycf7KoFFnWoS|1Sl*o>sF?+_Q>){w}IvwUOk6v0;-tt_*~?BhusRo>2Y2H(6|GHH8b%VNDDNvfYC2Ak7=KtL5WNUqlCd~LzrAVX#{ zf4zS;p2q|C{8WR`cko=OeCdG>yn3~el>OFjN zKmyvtPo!e!p9w%3$~UrE<)eOTQ<5t|BbN05MfdTX#f3o^pWy?%om9$t%E5xC%9=ghlz zT&BLYFQ8#3=OO)Ss{0)$YAvyls3xa0o_{PuwuQ~=W+;9VKag@j|BIswqm@0f{rEfo zB;qGrQB`)@5(vsTYMZ@0FK`SlsESa+H=-5KO7ZwbkkR5jqXe<9?}n$ z`Y=_$-RNZSGM_aw(7KR$VCVkd*d_}$e16*Hd!R(Fwe)Tx{d2#u%o`u40P`nfT~W`^ zz|^Qqc<)ruCG>rk*a&+~#Oaq5x;#zocS_S+Rq=QUU1C32FTV;>M~HPAoelVi<(!1m zMJbifMZ)dfOonKxYFtHfT=y2QJlV1u#;-A`ngK&1u`NNs^UbR>#%eIWVRuc?DRa$X z(?QxW@+`sdJ49m_c{%^8dC7YE3e}%A->#G<=lWA%bluZAYo&uDL)zI`$P_21&|$3d zp;QZXpt|~2xnAfG!G{8J8`Yb5r2 z5a{{X`T>=3oV#t$vtMp~+&Lt}))3k^1BEOv0TUmPfKnfI4jp1)d3v+Lq>10=oaiGo z2J%nxezJ^1BEBvHaQwJvgRc4Z90|6zok7A$@31rlpPwe@h&n)=Ph!B6m*y{eCcga#y)CqtyO6bh(p%f0mE{ys->IwZilB zdShPs#7q8iTKES=Tm@Kw0N`GY=);b)1cK^z{Daq&(s$?6C0wCqkgZ4(x+kw+uv?vQ zznYY~`ok#++@y%cBc`TSOBZ0d^_^huc4f9+%TrXIWs4HmqQ)`=D3?@-2NWflLay?b zr=KQ&D}T)+{Q6mt@g`3(R#Cly$|g@XZ+o?iTjSZ>L9)QZA-5esH=y)C`Cp=R*zNbv51IaB1*AwCqp;jY^Lf5zeCIg7*4A+5z z&=KOcScsr;*EUwj#U>qX!b9Wj+U(URp0F7OQQGTDL1=t}9u9B%QI_*AT?>yt-J*@vl6etAzVdyqb z>SMmp$C#4xlK(fEYlW3#I3hePLnT~{?kIYSs0=1Py*>w6Zlv~0QWY@XZvB(^elxAP z1zJ_cfZ|BGxJ2|Rk%gL)r0a2R^?=&%d4D)!;Iqn276DMd5Lp6TU?-RRPNAULYJ-;j zq41t24KL1ebPg{e45#A;=CW}}(si4Hjmz}e6U|ctdc-CtS3t0RyKQKS!gw?>izf8uj}+>owcf@hWBX2k z-%q^lqf?`6?&sXL92?;-4<=^5`+sJhc%d+Kmyp^0vHf7Z&g&vk2-Y?}0pMkst9XA8 za{ke%L|vgPMf|r`+@|Ek>J=&|G*i>!X5ur$Us9jD5VsXCyHEBRJ-`O{ZIB{Zhg5{=Rl33gfxthw={JmwmC7{l^KKNs*nGXpUUXzBoG%l8nb`NiY>I z%0j>LldnN+EM~|AHwuV~leDcjU@kIt_8$i>zJ3T(#l05?bA$&O{4=52zEfYEgirr| z;xDQow#fXnCSn-HumZ)ND$j7|rDb%&7-$B=%0%iG(3 z%~ebMjW{~oPfK2X%GpVEjYg)~?F-^&BDL)mQDz>BOQ?|sztW}LbYEb^Gl;veb)SWe z<`yc@kWw@W+nKoWdVBcPunlBpipmpb@Yhm&0qvS5PLD+zRc}5 zwYFz>>2#N2w@dZe_Fh-4*!4#<6#$*G_a&~aZJ-AH?S9Xj6)Sygv{V=^^0e-<0I}Vj zn%D_hbC%$_RJM*@sD*34=T?p0$epUCFs^ zz~SWVjzil~Jk>Uf05%ex4u1CIu6!{{HDTMMK=TcLcjtNN=t14x(NEP$ z6`sG1->T$tLF=TP)_rpKdxy84F?p@~f@Q^$h_c5d_Yulr93DY+3fIn=(2e>Pe|oH& z9@PVgeJw1;8o6PH(5qn3hnAljq5gz@^jHE&c+^)}+(EBOCu&}$2N}3GZCi@BNH|AB zPZ$P+)~-4$$;JFy?omwj+HFJ7n!Lp7jMJN0CPdi>ug{;UZGu+5P&3!YVmS84JF!-W zCS>s1IoxOg8iXQ-C^&^KF1xHRU8(oOXZ91ciY(|=4ds$orwx=f;?MZ3>C-2eA#B7Kw~x;1#`RFa<=<$_+a@2+ z<>z`jssWwEcCXi7pjrxuc&rZKX}$hN6nFh>&HWS+a*ux6DvB-qa|s3kUy{0#&%s^@ zq({h83howBcD_m7zn;;hNw&d5bM$77>&?U(&fSqq#mU{aJlOMU0geOCg!QB*?_=$P zzS59?xV2Hr9#F)kNwyRRUUiGn+_{qpmUAWd4Sr}V&NAykSal7HRj#N>r$%i;)-e`Op70RJm+^=ge%T%L;b8+EL;!2V>#)VNyA@-)4v61QGi} z#-J%8y@CBMZrZ^K&QjA!NhJHjdiuk4UkF~nyT5vZxWBEluM2U!9H3h<9J2TWXtD`n z?cW=H++B6PS%WOu0{C)NaCo>ouQhg;j1drt0|eOZ1_GPJOdla)T6N#|$K&+h9gKd` zyT*G`^K9&_t6c3}D->?KLdNxT(caqhBO#ESlG>% zA!nWGyWOP+DIh9v=(}5wDp-JuSfLIIrC}o3i1eM=zO&XQ$$bA$22;;v+9K4ysajIs z>BmbjoTG7kZT-R<@az@IGPWa#D!A+ff);m8h+J2d!4IUq`=Q(e7OFjELC?{&g!rRl zBO@{0VH0gR{V_!MZc)FwYR?zOGH3Zon zB}?daHdiji!0UMU2uF9)rB1Q@t~Fi!)y2$a1kF2tZ-yrXsuuMaj*jjX9lZ4pqD__6 z;PftTJ$|Po{39EO=lM@|-SQR*FOweY66rS3UWm#VybthI+0PQX9m2oOI}HO~ZH#IL+1b3nXW#_MM<8wX1Wdh+h6Dm>N~ zCU}ZmRZq6mh9t}g2#N)vQdQj#BX#oybJLRuS&-bQ8nm8W+bRQt7O?#c<0uE5mz1H&r z=Pd&sEXo_S+CGk=J179?OB}Z_YnuBq0TVqV?a-1NHbtW>QMp5??4(M4+}n@fEjvfGck1T7o?< zkTO16#ltaI@ud}*t~4&jFls84L+VGCf>KyB&7fMO{$+i z;>0PC6M;AkgG~)i0rBS68~mB{XB<-3f~?j-r}gL97BfG`#86sZ5EwKD)`$m6;X0U% zzQ$LJ9BJ=bWqL`?Q2Nwk=gMny_2&R_FN`AQ@K6!aBgr)^hd;1> znD4XtRzWS=Edd?l-kLqV040tEjLiomVns53s{?%-GPB{pe9?vj4wx|*PA*kMV8hQ) zm}+2(wFi1f6SI+zrSp8&lJyHX6xz><%d39@yHa~8?}sBy{5T{ZwMADj2*0hd9(Z*< z{P*T?W{1yrU1<2@qt-3rSRB3j*jY#%G$i_RJ0@^QX{BjI90r=a;Nv z;7X^>PR~HY4%y|APcqemqMl&$m5EO`jei^j%)fC5?`z3aTTOTt$Z61TAHyUx^OS$jkk;(NZ<2*&%?RS>1h->n!UlwMN|m#jUHz)iT0n?LM=x~8E=^M&x2v-? z=$*hPqq7j_&-Heo9E#EHg`#Kcttt6_ok(nS_JqR4WnOtW0Q#ArPwVIv;Ip|y z1$@AE5n6d@=x%Z)t}Xh2@4E9$^FXT?IU)rA+D1T5%fK# zV%thjsunFV#^=5AlwBR-z zpamf{l;r-f8ME)>Y>|HBEFhHeyCuX`mXB%JphmqQqlv}D)u2&AVqZeE<$88SR^Bj2 zit}%5P}3C3!vMWX*2Gd)&{`GGvmdB8EXE;yJ|yvQ65`@{E6Wc_G#8fo9#il>(6f zLCyj5*pR!E{lkYA*Cv}Hr(ihoHjj_c!zi-fnUf>Wbwk4bkWjuV4YrH;)ws`{!G_i3 z(gee*Jxb7tl~Si~(ms3*p2O^;JJ;Sh=K6Kuoz5$Cps^bUwe$shfI~#$Si%LD7+Nx> zG@A-pWc#n|&CJR3!qTc(BZ!%!8(%yF>Ck8HMOi(GF{_<-gHD<7lMsN|$Mxl0&hJeR z{%9WcPf`0tFRU|p3LmRXayk&H^s>u<=`mUyUsWN(EpE7t+bmxnr)BtG0>0Ed*{Uq` zpGk=Q9{h0gm|ud{&*~d37}54&Hjj@U_9R`oB}kl1_)KW)|Gq>v`B{Nu%Jk^y&7*?{v_7YlOIWUPcxZ& zx8f--$(xIhRzza#6Lr=FaXb=tetX)rkh8%e{e5q|u-$!ifg9+IL|Cxm*b#QtcHcdF zmp~;N?g9Rer{Cd0Rn!WKluu=ru?HEhcxWeU&p5ShvCiPy1JqkmlkaF+;Q{T?yOqm} zK&JE;9nyyg?zRF2_AW(4tOkLlFx~jVbf2v|Q2$p3pN;(Y{fR;%7H!GJMq|Fy<`qZY zZ-4RCmLGqaAH+6}kxis^BJs2#d%a^U`*zpz^M{m16_1-?q{8D*TKnIKG$ zA_R*_5E>;)i9~xxII{20#Q_x6i8RRu3xoIkXd%6;_+_=;~ zELceZ1h*zuXjUL-W2#Cro!Zi=Tj9ukB-%^Kw>s)A^&cR)kdfj@T+tg&1VJAEKhE0F zT;z{6BagGSrv6V?l}0a&3DzdQ?>DdfDV4d@TY5<}JTp!AAkR;DR)1X0OK1+#~o#yJL#X;{oLZH16bd#{yY}g1;4P$wV1- zb`|@LtveR zv*{1)?{ZcH#foq|vX0jdWC|4g8w0Uz?~+d7wD*Mu2(cIXIF*8ssx>T{ttVr=qYv6*=0G`w@qp9T);s~~2~{oyd7 zORk%49#lqi@B{Oi*SFZN>)*0&%kTJ#3A{-uK7WN`KLJ9lUA_+k-A8qM2Y(+-wX_+9b~DQp z$(vJhjP`XElpB1N`kl*lQ*)d#)9ZWji=V9zv!QIp0yN3}Kt%J2*gye6Y2bdWM8ic= zBd(M6l_PbC5+HfOF>WE2S~=MFhyFi?P5ir$#pf(Uh?^vHjza-K)teGP198pMAWTnltoP0b!K#`AWI=oG_D2goyjl5N7$0^}ykam3 zb6^=rQDxU~^FMg2v!a+7!^Hu?F=ltzzXZK-m(PUnDv*>+P&=v2v5+vF1Q$DDnu;d> zv;=YPS(-i_mE&r@_)duDe^6TqC=OE?Yfs{b>y1!b!|m4n+K$CF%B^a=Eul32EuAXu_$Ld}IF1hzpoQ0|+{K8F__J%@h*xP^Rnm)k*_ zgaAcx4?~Tr?7efv!lBp}hf}~Ah_;$c8B0Ef{LUv)GVCxOo8n8CpT8}Gd0v0$WOB6_ z#!&9OzHEtbzDK;4<0yEvV2P!DRa@L`i8@ZAI-?0KV;*PIyQXx02#Qrh0v`URY^NP%>#gl2 z=F)2X*j5C8L9!g8g>Q=DaqLQ5Z(d?yPud-Rci!su-(L<{& zWvA|2ev=yFIw%>B<=YESMx|ezYC8|X*@ZW|X~t^X197S-+fEm7CmT??YVVzpgnJ*3 z0KNxjuxJC6uR@2xPdZvz@bbe7tQYHxyHF{1TqdXXnv9G~sVf4AWjaCpIg%!Y4@;Nm zGyav}(NaTcBLF&pb2Ax&_u519jR#|9#io9pnapp!g3#xU|KHGJeP zqu)Ow8-FZ9Tln|9n@6kML7WBsl>oc=CX@Fqb_+QH51YQs2F;e=7zOEv1Bz^~ttdALmFGr&7zJc$57dfAz9F*` z5QHP?c7JqBlkHW{)u$>1<)u}Djxsr zS%RQyAEx;PG-xWA5QAqO30VJF4#+ZBaO!@W)34&uYZ|=$9904K`IewlP!P*ZYtzX`?Kh65MOba^9$K{u z>k-xZx8+hXS9Oq4=mdhtDe}zMg4s(zD&$d%Ri}cp$w@6S;K7~A(f_5|5_cHg-?`lb zeKX6UVWpdm%9+E4fEbfBJ2MosTl`p-5Tuod!^$99Th&U!Ui{P2S6p@8m_tsCJS#)G4aW?(tC`Uin_fQd^^u@IN*bRr2m;Hx7x+*5= z80HHzQiSkn!01vW5+6HOtjRxmpc}fVi?&~BuAcw#Dy4`#ZG}G^K*DY&D`$dXp2$uG z6NMU|q+A=~fp^PSJ;yVLK#u+GUX%fnH_qjHZD@1VM!^A8 zE%MLmZ-U%&RytZ(Ng8QB7fb>t6cGj4Yha10{D3iax38r~Zf8?`m$fKr#>zPya{<%v zR5j8`OO=7CKr2teA70PbAw>W&{`jX?y`)G0?}v>k0DqxBfv!>md^AFLLTvx`l}dh( zDl#4wb^FU>J>S*L6*S2Lc&?9UstDmqmxZ@a*Jr!U)#XPWRY)3eU0Oh#D=L7z7ZBkb zGWP7WRLcsK=^?TTiUC@BnOy7N&)37m9+usXqHbeD_McZQs83BWBq@c@YlX`3`J8ky zZtZFDR{5RSrT5)vr1}UhCeq5-DtGwbzj-GT+ebd>4L@?5usAyhcp!`I$tHC1CC~4r z=dma>+iAzARxN%JEQn|~Uj3yk+SpzNwDP1;GN(6Z&8J!vmO<1hC{BQzZSF_k; z*P{D;J_BEmUkSCF`Ph3G*p~i6SasK&oCJuA{YS8-%4~>|)vHMlMQTrWHSN*-WMxl& ztr>xRx%3^2hOKaKEJH23eUET^mCXm^3h81<)Iu5R?lBo(slTo5~OQ>s_eadWHh#fKym^2pA)F%Vu(5NMekSNbg1C3Dk*Tb z*xuTbbD1G1${Hh&=70#!gkIdQC-oA?EhSJv>L5YVp-;2KZ2D&Rz(-n!x~o#X`*~bC zD<^n#3JVs^PT|p9#XoF8#Wmg0Xx+ezWyDLbIsSoR5RzvH%1X`lIpR7~5<%k=c_y;~ zZ#Hlu>Nl8y@j@{6j|zb^eh45kYix1CzomCn9(__+u#m81jyF$$yR0Q%o9Pd>(NqWS zwJ~7Wvn~PlAKT7)wH)s;gu3#fs)8k4)6u$bt&G2JKGM47uvpYSL}tMOHIG)pJ)SLZ z{iVm@pnfxtpW-?LZrr?M6ZRh1jrVcwwxcK!i~cY21$Mc)yuRXEXyyFwjF=&Y zhLA|$!(I9vmVH0+XCMW(J^}V~`L@b(QZe-5f+Q{@v@NP9^C5jL%K~;iK6uG9Kg@pc zk&km=!)j3f;(7b+b&;N(HM_^Hb}#NO0xVub2u+SBX`fr_n&1#x9eRWogxz4x}5Zxc^O8&z&}eXR7ifOJu%M4O&N@+E%ROQ zVrh6U^JnILUb02Kxx3#>n==Fc+L`>T)CCmg-FIlb7TvDuk6u5ql<9a$zB#*m)Lg&P zmp~T@4J;YeQD)9tO^4@fG4SFCy1xU!xo`Y+!`V*bsp~ z`SWpDZZ}&C(>p1A$q1pLmQG?-OT2e^OxG#N!@`Jr{s)|fu8&mAPTp*JqXhl}USx*1 z7hdg;Sr~5Y@x~^VqXYkGZc)blzLh*$rKR?Ny8GGS^qeQQo8+VTeb>3|I{#?`p~W`Q z|I*@l6}fePg;L`LdPuoO#PH4N*w}w{#A#9+$o(HB)MRO{lW4ve!zo2z#QkRx9{;;J zhz?SSUn{zss`exDpps!(9Z^^jQlig7jDP#v35Yeud66P zy@B#hjxuIm9VK?p|1I!;#`FKUs=!;&ACivSA7lB;IysgCCw@h`7yo+^9Dx3Pz<)Z% z!MK52oqycuU`~R6_#lCW?{nT2Gp+SA@K$7HX=45MWFPE`0K?0Io^-Vc6Rkv{*9N*3 z-LIp*c=4_%=^4}ju>_y_f{X)6k-J*_9!^hAPDam8&U`tp_r}P3O6T%lt7`xGgmP)x zyn*_GG@fF_hV+5E{4XbpuNpHB<`SxX|Q`uc2jP_BN zOgAKJtC~Q|?u{4LOb-P>4_(M9w_+yW6HV?mWOTBHEV%L$7E%$kM&o9zlr&7!5 z+XPnG&UvGYynRmBynZlN_$<l{;4BF%YDQ&f)Ys?M&)tK-+aIjgmI-u%CAz$u zi4e^S|H!DY$amnoyjn*866&3WN7Wx+=0Ypjq+dObN-=)GyK1uWJK{{Sr7<&m_x^E@ z-o{Ra_d}Z@L#^??lsZFO#S1Ni$S?3jSXTtE9#7WyPP+I)-;+9k>q!rP94OlFZ?%=~ zf5bJy3T{cvp+hedMOv#zm(__b%QcnCmL;W+IrIU(Hqb{d$3e3>AEyOoQ)oE}l-9I0 zj6{Z0=9ng7y`gsDBDu#;9sSil$fITGpi2aPk3%+VPlS01zJYlT*7ybm> zzo28`-hsz%2$;J{aqRlzj%Vvv=zRIC72)FHRO7 zf6U5N+e*$6Z{bzLt)C&_+s|Zr6#5(1h(VPR$mqGs&=ehl4POP{pLsX4r{)&Vqa^Co#s#-=Mpe0Th0j>XPxeS1~ z!HeFKo`R**QH^tl-L>!^2FTy9$3m(Cx*AoEalksa%IXKmVxB56qK1IXRx?Uye+3t zDT&~$E;aJ|wTXpK=~T<=1}ePFWakOIl)le3Tmi?9!}6|;>GEI3kqSvtIF6azJ~^}F zKsFx?`+)kBsd2pVM9Z;La7Zwi^12WpRn`84tp7S;nG+4-(a**^RYh{!Rpv@9P|x( zH^Vcae4ntrTx+PzvD1AGXbRSs6Lpu{mKBP0>{^Iu%E3PcO`wub(SKlQhh5!z9{kAH zC9yyR(*^e*b>W{Z9BovI-I_5W3ArSnw`Aa0Q7$YaT+^UwPlVc`B%*NX3Goid{e6xR z!2PG`IrdBcx+#+DgC`0@z+6F0HE8PA6W#_pB5kp6r5e& z(y9pCm>v|itLUKL3a&BOg%6*;4z^MSC3#9kezN0R;ZgOHb8PZZ(;oVt|HQ=9%M)8C z>bxQ}N;3HR^`}<7L`t)>c7P61x5l5o%c44lxOdMGEWW`x;Wl&JQiEt&El5 zf{4BQB{u2W`uk<{=&&3AvS7UA7-C~*A7<9elQH7@*OiOQ|A)HrOOU`#=~4M^-bLE7 zTzgvY+^t%HZyxgK_qGJJ0SB)BXf&3EpY)n2zN(9HZ1Qm} z%sAFaGqZz#tV;D(e&*DYGHMlGXcr^n;15L<#BNWq&z4vEuX*oWGAJhM-(c`c6v4m1 zr=I%AFFZrBGes?tGN+{mB+ALd{v(KXVNm+Y!H~3x;;pR1E6pN+wZvh+dOSiJP`u0r z&F9!@iJql zmW>1|sh${plGE|;Ingw;*#QZJ|JBkP=E_5k<@Z-q#9?tiqCpH|@V^HU_(hDcN!$?N zz5cwU)yeV!ko`Tt##!HkK9&jD%K7dXfXTFKG)VF1Vn;nEol;D>Xr zZaq+m$c&KK)IBi>4td9-D4fNvFf9`Dvr1cBe2Hf>_FdhY6$q7&^xTf=R}(y5EvYrl zkLCcEMk1d6`qh?YuJllw!|in&2@l2BpYHhxfgo%6;JRF$$ImK?>=lF@%_&&So$(2z z1|(TtTr%c_bUh@(iL)AnAV*_Q#at{*(2W^BxjFtx)V?UUr?qqhu{Ype5Z zIM`adK)~j~Ub7+vamAMaH+aGs98w7F1Lsd&>)br=u6OOqv$s{HI6G=A)~dqJjo?j# zr&x&e)13et_>C1~C!8(8z)4-pQl&P|O4}p5b{)Ffh+#6=z~@I4gg{3mq;*~l{Ybay zXiY8cX#S+yGkcOWDgU*MBrEfP1j&GU_c%ry^B3G=z`hJoybh(5)a z-b~#he2(;jvA_Rp3q{)@5<}kbJqV)mH2F?2ll2povZdiBm1?-j4~dhbr`#Vu$|j^Y z`sg1d>vH44xo>&e@l_KqGdn_f;|K6kP5{T^834V!dd^8fJz%V9#sF zA~OVh!Uz3x&*uWmfJ3S2ISZ%N);is^6i?{26kY7aLq-@i5HKHDCgmCu=^~zR2S6aQ z6hhBAgd6aKUOtNwAU+*LYZY*d*>m_^j1(~GnNE>yb~81{BORvnH;JTGNA15e_?9w)uN^SFaX#vcZw-8nk`HF%VA@f{W7@zrCP3!d8^x0p$W;GOhdgPo6BWL7~ z)eUMLNyfig()nwi6EoakZ|LZ>Naop-kyPHPyV{!?g~bg|tbGxpuvBxsUDj;rN4n)! zT3%^elid{EV<`bC3oE?jEaA2Va|~p>;B)KfuDn#Bk+eraDD}*2^>JdvNREOSpkyKQ zIJ^>JzR73L^W&(gCjh(XEnG>VyVxOfU+w)~kw1JSTPUTZLaFv0v%_pRrTG0!YHpV) zyMrFIa2akhhMR{0#7)f?^w<94i$970B|*>@nrSXF8g$1A1dG1aB#}Uiu1NX)Pw$D( z!vNR}mm>kQ>8fB;YA|+T!Bb7}{_LjLkT~~%X*0iyp76_m%MG2s2H7YOjOjMU9nC5h zAd60y$)})M)%-~UdF|ppS=!Q%ef_EM7$#h9K0@v`o|~Z-LCnP6I2jJvOta4rWXZH- z8!slGt5i+6<2(HQ2Jvz_TQoVanz;?#Im z@vt}B;9rq0PVs|ZLHjgMpnF*&O9z-Ck!?^s-Os>ZI&hnB*MYpTmgrO?o=9L)7@gma zR8X)zv8*jUv$s1vaqU`Wmzm-AL=a~d4-hz6;*8vT_rZ@15{mJTfe;Kwmuq6(^#KR@#@(WH}dNA z8EqnWFA16OBC)6N&*hInISLYF_)65uaP9Uq+^pRL)UrgP+Zh5b{r zspL@8`8m_1HXAzU!1ya{Xs~wk-$?zawxNw&0iT~^*P%IDW?3N-M*Wg%uJzE zWYFyxt^{LXdP%3{hsW(X(hmDRYX4V14-|DaJq7jwOSAjsCMy_6V)jLD7;S7l;r@@< zfM*6^#x4Kt$EN_>%!X+6&1Jm5V$ zZ{O$D7k#IUPBwXC0ab9P(r;5rD(ofiNl_FOM#_9wG=1Eq@hY@XWk08!aM?Yq3|U|Q zF=J*7M;|6PcztH&X*d<6tn3t+YWCdURm~ej?dwF;^>FzyY@d9$GRW%Krz!bxSGZk` z2>fk>WyZj~6pGh~aOfX`pl{s}3fu}J5G_(l}MQo|r403ChxD}pR`n$RfGWBnA zBYd@vU>w2SkQ{+9`B`5u_t7GAm@NFw#F3wqa}t@5Qg*J=K3;CkIY1)!yFE`kw17W_ z8fE4Tn;2*sZh{us4;cA8X-zXxe_M>rJs2@Msee= zTxrzh=C-$C<0Lhw)WWoAP_EoTvESS%oY4m+#yL6s+vB^RZFmq_AW}e-^#*`&6OrL} zSp^b<_gpM%kIM_b&>NN5e zP@O}a!O>!9(WW^z6Tt{5(cNO$`*!I*dw*2B`+c-Nu`gmKQjNApK|GdQLnq@bjM%b| zyfA=lY^RpFHwhFU%%U+vz!!_(1&`#d&Iv(BVUzR5MC4z%(J~F;GXUHt&f^w zoV1H3A$K1zPa-f`LIm!6`1KO?1o^L>+9yX0m{T5x1769Q)EVzKxYJC;LGP9ahQySw zge^YU=SF}dNEE(Of@zY)Ei7Wmak_g6fIRm$zA^HV;((HBotu) z@rv1{9E*cb_;^o~L2B2I4C?z*PCbEr+u+o?AM{;t9k;^RMXdFz?Yh9%*4qU&)&_eh z_A%c+-kji-%Vy78*L*y_{#5tPiW++J6s1`!1yAf*Ifxu#P<`K-x2qxzv~i?Jtc1A7F_Fp=A$^O1vT{nMLuL1a66?1J?A*!b2YoEFYIO!dxV(0dI@Ke^Zj zUviah+ThId%?G{V=CPPreJ1uL<@CyusrXgCS(gp1;qQyvq@K?#R^4*xmEN6v<@a@X zI$y*_HXLINLdh{m$P#w!`uh*`k6VJbbtpw1SxP~2*bHJNs-2q4H)3fYO{tz!pIUaW z%kFm{XdhEb0b*IP=i(nVQRZE;_CccA0HHUgXuhk<<*(-f788I^YGhOaON1X!@_gJd zW4-MJh^A#cCrUNK`HM0q>_XOkeV11NUvoh36O_9wh~0E#4Gk8G#Hr>3U*~sl(0`kL z-u2cL6$%myC=zJ0Ir3fae%zI9KTp}!MCX{mpi+&9C)dr@26Cu6@H-BbHeXCrK_}LK zW}%^^pmdBT(Uhq{T#QfQmDFFqfR!c+8h=WE%==D0rzlqtTji$@D*lB*qZ=V>IKNJT z$35rr{L44XxPS7j8pOfin4e&p1vyM(J(}fC5eupwp#ZqJFm;JqzVW|Vqt~yeSMins z2dEk7JW&5_xrIONA0NU5wAHl~4G>S&Wio|?%UN0y$EHX?bf@W-v!hRl?8mJ5e(c~} zONZjuq&s2|7ak(w)M*3HK4)w^6Xg!!0E2GZh}^4z30~$?NjPQ>l!wR+K}MwxsK}+~ zVEcUxD!Ah4vJ!I`W-S7&O>24gu0I@@F|*P?-G_*}-=i$#;&B=G=E9ApWyo%`H|2L*ll-Uh6`!?6~Q-7N%xC0Q2E^Dx|#tVcmlj82{^ zEc6gWbft9LxT!8MzRknZ`dKGuS)I($JkA)eB-ZuXW4ix-yi`b`StHIi>gRfm7}Ycj z&csUbupnUWkj;hdHLg)*AdIj1Y7bO@2as7X{*Y@$xC}wi-+qCQk2_b3w}J-Z=^oeP^Rde~UR~gUYi>I{0ng)SMrD5bhcI5$8 zVt-KEs2Jw{Fj;p(DJu9Sy9SXG0;T94s9X3WI8QFzUUuZ|9tmFE&p)Io`^2Xz`Nn6s zi{n^5E zk-rGsJV)e<`&KQsSn<#0c?#Pz3iUEy-&&Y!3u{R(STqFVH)@+lA$v4f#OvN+LQe?_ zzj7VD2yC|^1eA-O9gfSdH;&84g7zT>K|;RAPE()`8S&toj3|y$=q`JfO}X?o@x3Q;3oDDW)N?!ovzS*yV>t zbhapos^YH%OphLJfo#26Vp`NB$JIjR+OzS3ZQ+IW?)%68pc&)KF9l@wDU*6y`8^2` zy)IDe=(E{Qa*C9SXgqNL_&b`nWQ-*skK6`Wwogzv`n+^X7EcG}$G!5c_!>I*>tX$n z_h8hs%%iV5y1(;WR^4cvylH%RH|nm_QX6)K(OIu?VZ+o}5GWkn)o9wfM)epCh`q$z zm0#LCBn6`8XT+Y>IslO2>u3jI*-tL|q8aLJNM6|V% zelF5*K5Qkdu#^_s<`@N|&_2s)2G>x&-sBoHwb#EF!&`V2pjMRJH^YP#mK#yw`3~$v zM)OaIbEc~M2hi#xo*iebi`x<5??x%a>S+lEta=&h$vu(uoqj>=Od46xH%CTvLjv!m zP&A!J@pL%8rhf+fFVfyJERLvI7bUn$a1Ry&1P^Wjf(Hp6T!OnhgS!O}HX*^?E%*=! zI=I{5&fqr8T)utwJ!jwh=l-~V`{`$Ob@y9Ut5&^L)ow@jyx+I}l5ywO8#%bO$(&-R zaNVFkjGo>=aVNd@YN@gC%BR#L$oeq9&cd6e`!DkJvXF5q& z4s&JNmUiw@;h!w!I6?jBsy|JZ7BzoWxB7`?m z0PNkptVxwtpHf*URtwWKTZCbB1?88C8}J^KHsR-jcqbBFtAO1#)5*J^|9Ne~wYz0( z|EqXBXqyvu@=A-dY5N1+zq^5{IRdUQh-ey@SUJlYbRm-q91>^;de~Gco!&dK@P9fh zm@SRpcQbtTAgCTi`(2Gek@BaU*Mb)7)@}f0(6jc-5l34`0k$)lKEhm0a**hd1S+r| zsdX@-WiW|mVjHhc*uB+KNCg3!IW{&A8iE6WxnC zpyIanv_7c!+Gw4OIj}Uxbu|>fB5+#Z`_45~PEu3G1+w2!|Ka(X>qKnRa!vFgl~l~4 z?eTh?nE8h*Ty@cjg#z;f+sy%T43E`oQL0rn~&zqmJhx^AveTRTrybs4|7=*aHi zPYHL_g)QjvqA50!J7&qCQkvC%8iG0NAaP-Ha$|5C4k^n0z`G3we^vPrvou_9lD4U% zU%QJ`vCv7cqq3{MyPDnwCm&hnA$DlgZgyEe^;NFW&7GfsBi(=9+=$Hak&vbgPIy!I zj+67(4ODHl#ZEFIfV#}Ot3|ugO~`&ZIQ#Y+s;Vo@I_{J!A$g@kkvsTkuTrXH#Uz8MqER*q=KG=_Sx`Gn{)a=ukH zFihWlt~@wCSR3Ply>g+>YcCN~etb+IO^0SlJdn=Fs4lUhWWID+)nBvHnYDZ|V{H95 zdO8xppJC{9u6jJHWO>KbW3LBq<_9m;cYhz%r90OLPovpU%Fb*bj!ivm*z^h3@8jd# z51+`RG{gqR9XAksox@x|V_kqUUb%OL1)#Gyk>In1hC^f-1A>^)4_gedR8)a#wH~sVIgus;jBI_Bv}CQzNhoK z7GpdQcF`D9&7K z_AT5%0ei%Tsv8}*-4&NZc8X`J;Ham~l{zuNEhY{;!fPP%_VOiU{2P2I+ZB~K9k~5( z-<6ekLqyeYz=z6{lk*3ZIb(~U@i!rRXjfD5LZ03?xRrMsas43Jmo>Dk&6j%12Ju;h z+C~e0>&5ey{7d4jymsxlnSGdq9k?BbzZ5lsvOPoaJ*6xyH0)>4Mm*lrPgq_;dp6=w z6yBmRhs?c`>L7qhe%S8JvdtY%{+#1S|7D`2o&DSA#HP$PI{IW+4r;eGk=-s z&cO4|*iBi*g8Rd5uLL}ba<(-8rD2HfA7yaHDJ{*u*FJYS$!{jcZZg5|!+ z4<|psNUP(tzr`eg@pddv5Xcr&+aB^AR#88=C&|Qv$cWeqywKFZH zFoxgb#CuQYBu4(i{TnUfx*>c){|g4wYSrZv*jcPBm>1q4ekSj8IL|FNw4>chl7z3@ zcN=1X!bM0N)KymiKf>k?WUnJ1ji6t@V_ajMPJQ#-eQBcEEib1jR=**yY`mjn((j`tyqcDuv~3(`>h z=L@4S``#!ee?l~5?UDr2^( z9}_HX+7s~j(|xFRKla^o9A%T63PV>EIs3Euzw|FW07?W0(D`Tdg+=7udG2rj7+cty z9ya*VuE{HaSHeR}kE-JKM}h@-UJ55OlkwvYM?M;d#4l4ZxM6{TGO&MfoI7~YmD&OJ z9%$$Mu<&|U2L|twC`Y-CQ9j73k_y)_x}CWHYWF?xNtpwOWxA)D)W^v^_GVys!}pg+ zV%GD4d3XldmnOKQYW%O;)5mS{XOKct(f5}Orh4|Zs@cR{&qMSOr_?Wfm*vw+zB|fc zy12oUrP`U75!(qg#+AXeGTlS=e;XF685XMV4i?Nu(bS>}Yjk?>3*T;U2-^aC!*wSD zapeCDnFD6!N#z?U0t<`W;bSev8!8mlgjYiwDh_*rW!Q}LvbS91uxzao$!7X6^)po; z=`Q>EKTXi+Xq~Qxj^DfstlCQQRihZ`ZiI3fBhcZi1u%rbkAhL;Le^7kVA{C#K=d%I-JKBSLfZ~yP@0Fwga41sNdMxy_S9JPidRZ;)DjqVocyca=@-4 zTTb4-+#c}z8bOb0aL&rCmwPDROzJlg z1E}mH8_JvkdF(sR0b~PMxIT`Cvc&6;IFj+)p`~{cvz=FN*tJ;K=hxsG`8+U|Hbx;P zz9^iR>MoV;x3Btg(}kIUV0qJyL;8RHkq?A;hwfj3$sTz=lj^hHJ_=RwmMR#*i4%_J zNb*5sXt&eW+%jUFQNP?UyMm%La06`%y5Pi?C;VhhWa!m?mgeHEsbc`rPNT!SEb_u3 zlMXV{lN&KQZaV!#MyWQq-?NtljD8l|T$|bL^NAOZd}Vm!rN;t0T78~@Z{JMO%$HeY zZnFExP-Yu_qs;+4?dfL5jUf;R&avZg`o62Gt@>X~slKSqSEW7GxFG+0EA#w^=hsOy zQeD``uD|$7=&$#OY~NpxZ_VaF+WzHv%XYu@+&6U%NWEI@2He=PuND;Ewsj#M!sg4? zG>(E+naX5KKlcP;tp=ZE8=AE}=ZNCQoVrz()vI;O!cJ3v(U$Nw9{VgG3%>{F`dAMR zr=|VzV|c!t<2;h1H_bhLsMTbN_1@~>TmUj;1*)h72g_IUTYn#*gYrCHQe@}XIeWb6 zG}l=qDZxHaj~NrDgw#qMX4%I8PgFf@r!n>|pR=Z=S;gfjIfAwCsssxlx(6ep&8W#1 z1iKy_r9pmSTeBjkOb-Sse?$QJ?@ha{yHH^oJ-JVTaZ2o~ zAr7kkkILOb?&aY8F~1*XF)E3!cy)s0$DR zJI60>loGy93Zjs@DCG#)02ShZUT~(GrvZAB6i~yBuU@eyj!-n5$iMYQ+a!(Q4VXrZ z_s8=JqhS{Al-UTZfTb3IOL1HXm3Zt!z*y1e0WafkH@)No?)t&Nnr0&OUZK3|Pl%*H z5OPx4Z0d%j@f-SG!wDBW^|3ww1iC+XJkx8}JDNEYyrL*zKwy~;zBr$lUzFkAupL#x zwvbdi>J^2c*}|vX)y~(aZUN?oYd?{Xxp4+JWw{C}Rw_53_i3rGZI7r?%0!1WpsDI| zo{KCeZaB%)0Z{Q$ac8gU$7ak8m^;cEVi~)bqa9JlmAYB@`0pk^U0Ubf15O`7-nPAD zq@_tB z9B_K?5(T-vq8IFQU5Tq1R4MU2{Pky#sK-du%9g3)AYw11QAa-sMRdWt>(7rq6Plji zQe3WaI}Q>D96*ofEB4QO2VgTFde>!z$A!2TGGuGW^^r!;gv`Z)9!-;a9nL>HFq+z= zloWT;K;Jhe`2^TnHGq~q6k{tPiAZ;gmGL}@YTv~$ei1FGrM~KztXkbRMY*15xni^L z>0|fdKj(xVLf~<-4T#qExSylkISr+Z%0g$)@$0AD&8dGtoL2G8&qv;i)lp7odw-!# zk0}ic<3Cr&cJHg`T=F1OY-~jJV$=-^`a{1H;907G$jk;GI$e|Sc?_-Rz2}Qq7m0qW-h)G@FJ!d0)f~-^v^V-J+8u@~}M2}5lQBpv*6uMtD zQ_YEd;M<_Lou5566AgezpzuoT@T8`otJfp?5}|Rty0^mO*CzlmbVkr3GRzJSEOlkd zyY1z>TB9ZZ&XblkgZ|gnDj&{mo74P*`XkAm@D(_Ud%o+kO^4T+6N2;=9%onEzj+bC z*Dr{Qc!W0Wrk%C+zWLE*Cul>RSclAI7*6jnM{py9HC=z<0nxvr~LJT_mG@Hz=<&htE zfhW2x5g2?vG`QVDnbCV)uPOe(U;yONE*3Q# zN3b9G84du`*!0GbVjG ze#(i>!5k<2zDyqU4lzE4g24HzGzjcE3F7}_l_nJrKf9V*)y8Bvcnr~z700G zHA#5zn!Kk8A;+!7X&><$cLe1ug*c#cr$xmWI}U|fB|p!1feY;g+;X@d-z}KJE$xpK z$YY2d{vy|xb!X1_a?H6x(L=V8~)1Bw%znguRC_dm2F;N9+%x5DgnpDE@D$D;91x1&vE)$A&Zy2Kq3Y#uo9{opMj2v? zB+A^LlJu8leQ%u5-+{Ogke)umL-Txv`!+B}^VoXsU_06$B>NpGZ7jI*b2sBn$r+c(u!aMDxc{us%w-j_`_pprI4EmAWp};G}T|B+N&r8Vu67WU{ zZQxL(Q?Y|{H}$s7TKD5$|D5=4)u??ZYc(SLrb?yi^l09qH)IYyHFhJ5++#s4{%`TM z=vqa!!vt6T(-iLIV;t8eWxK*J zX+2l?<1_TUsC}1Oq=79?8|8yN-@IPkoMkJYuMS}hKE#58>k4{gPRvX!PNwp(Ywi37 z?X+vzjRR%)?PI)%U7nv-EoAwsSSl|MX0Qy)Fbht-AVH&aO@Wi9ZxbMaic2`HUcVpT zv&t*?1m;}M20IChI|p>UoIOAMBhQ+8GIh~>evxE~1(Q}obL5!iJN$e#lgGN{Wvol< zZhNBP*9o|X@4!3E6;pn=p=-!isT89&ayNe4S+19)Kj^-UIjD`rdu`M9<3y zi%xr1;qMwv)7vQ1^k_#`U+FoJ&3qt{7U}wZmTGW8W?y6V z*99pi!QD0gGTv)&)Px&1p4^ot&F8-+zNJt0ubd$S$&(dJ=l`JxeYl=>BMi+gntOKR zE(RTB-LsiR!rS{WMjnd^Gad}476LjHMO=>YW)DS;;8gv5sB_Ty0>kQh)F z!+F)s$A2o5q%%_-GiVljaX}%|Pj3-dlWX3n8?Z zF$7hO=rOg1oN|)2bL00P8?jt2a=tpue|D38drKd%n#sV`kmU9fI(M59EdRalB8tdi z`y_H@s%wC&?h&CJLex!#JJs0JMI2zu*B@e}?nEHEu}_{NWiwXYL^pVogiMWJm9H}RacYXd$L9|O1k+V@;`Cw2vjpE7WqKQ@vv0QM5QsL?|! zj#!MbWrXWq<>9qf6Hb?m77F=mqWlmFj(jFPR=<;Y8@@ITV5X;vDeqp(MQkV|*tL^?21&)9l`&q$_ecrYk9opQ9cuKq& zKW+CpSJbjLT0<6z8;N8HJ7ZDG4Ft+-mg13-$EB#Uq(MMKZ1v!Q*wajT@Y;HY=KJ%{ zb}f}p$*p0y?I>y1>8J42C&n1>b0iFTU zY!)!O%}DCWdWmyCKM~10wW7Q8bGcLVlZ*kkSa64B(k0``BzvS}&?l+=dUBos6d$KF zWfwDidtDpZ1~mlTpYLaZ&h0sW8(-+`e27)>Wlhg+H3CO|T+1ByER6T0i3zsv!O~3g zoU4HqC`^ezg*~!??y?omofCHRMV^VvLBWP@PCF`W|MM>l6GJ7Jp!Nd$65T?jTKrxd z3@xXS_Y_x44(-;;e4U0kt_SPlH+<%UhA0wLXK$$dx6i$)ELItV5+~6(sm-6=u~z{C z(})}4drayrWhFvT!GRRhdlc}Lk5;3tp$V^-i(Y&Zl}1()&1}fBLG*(XzhSVbxbuT6 zzerw!oy9PM5W*29MNn(WMBh(bH^no*K)8{){r706S)OdRN1@I%5s_J%IcWo(%mNL! zCD#uEHkZ=ULrn)aYQExC6oko=y16oPr*RN6-7WJ&Tl1UNRkrux9d_qO8yVxpirrlZ zKc@&ymNNw5xgtUXs|haEjhMY+bc}Z{Lv?9B**^wHC~K_U)Ue*l_&xEeagRx5h1*IOI;rGLf%B82$iTfkBdGGFlQkN4b^N7rSUSxB zOhh1n)oD~eBn#yGCrr$p4>zd!;)fg6R&q<^f{~^`Ap5|u{ZM4_i0hMhr^-u}QEOGc zSdo=fU-fp56mZ0`v|zk46Ra>6dq0BcTeLU!ZTkf!l;2 z7gbV?95T4GFFWC%a~@JrW5dyko=t z^|*<-zxQqU&IV(gw$pn5*T|;CGf@KTT_ri{3Dz=_8@C9$i@6E91z~HcAde(V9|f2; zttFLT;r3Vl)tr+rbwRDT?Nb2^ckc&%Ny>YfTVZ2Stf<3J@Bsvic*tBNkv)g%pv{ap z+>_#cWxt9BC(qjQ<6J^S{!Ub{v_2XRi2?a;MyKP50?%qqZWuY?D^Rr%p&ds%^c%sX z3|!nSk>V5^UQLcY6{&bdgle5i1*3H1DL~^k|mP zQc5+)Ybi*_{qSH4wNNhUX6;EJ*Yk==+$5F~)=QLnPQB=bB$KNoX}LmqJJ|6e)!bF> zpO3K1_wp?9xeM+|I%gqxqkz!MfwEkFvlE@$0+)q-3mfq<4d1arfd~TD;}1|O`%zx! zQydOrQKeR@cycQi-!FSFTSd|2!}pzUR@y#Co7!%iYlf5SZuIz*Y+X3(Mj6tw%L*CA zJ4|=F(NvUh*Vu7mw~yEayP@EhQDewnCyxB2Z_MR;H`mXuZ>9wCemtxuE<-wQLlgJc zK}=p0rvYVGuei(SUfF2q!Tkr?>y-39FqyNq&yL)CEmbssEcE(fqLh+~Q$Oq#sjWg+ zb{qG9)X6ADwdO_3EI&uFg}k<*I1Bq~gK+BTHf0s-+`t>pd)yyhK~&?7IkKzD&$y`C4%xXd~_Y z3A516$4-PX`r7j}U!cF5=fNwljI0A}-)iSo15}p@lHGhgemS&*`j+e=tR;PlK*V{y z+!4tWnee9iG{qQq*d2`crFx$9v++} z_Oum!raZcLA5kg*wA*eV7WEBE$|{8+xNfwRjxEqnSybs0cT9}yc{ zAM06(h<%200>_EATWb;Ly)k}o60%oM{0}_Gz(0gz%Kt-Pi%)?mY#7LL2RCGjHp?fh z0Q$cmRS5^aq6PmeXmHlh82L9L*2NK12Y^V)*P#)(4{+;d`?iHUY`rOwxxx8J!RG-cs~`N3FyJ8Ow& zRT2%v+2dQ6D$CwQRJ23Zh3$4TMl_86 zKfx%jaY|Zk@W0FvDqA~Yu92Q%^G>LJt?it_&&iH=wakjck&8PWG8HTY+5vKv^T?6& zFG!G9<{EgrK&r{<_j8%UpqcDl6W0g_mZm#{VEcbO63U#()c-%dxyTFC!1@i9LqNZG zuzxx*J)%K=(GniAkN*)*)1d~4ZT{Z^MvUFHkzx-QTO}B!4Y1-apz;PUeMDBoZ>QP5 zRr6I(&?MLjR`?Jrcc91i&WL#6-U|JE-jp5;!Z+|0!Ag(YPQOz)!~ zL!WiR7wlaEb&W8h8J0#v+uwueK_^}-Rn)kxptBZd`xb4@Xp`z}ID2zQse=!NqXp6B z{&}G7*W^N^Q(HVO}L$fwnc0;3ibAfz5sFlFGyNMsdcJn1EgX66QA`8ueCqz zD^%K?gU}?*((Q4*PZCXb=4WG-fL8ev&q37+8;^!H=%XJOKJ06}+p#_<7IUQ+LMrS= zEtq{PaUr6E4-Z5;SAsWO)LofHguAU2xTqeF>`Ud7XraT{RjKZG!-4LNnULN;q3m22 zMLuw4u@;w*1!GgMURrvM5703mNUR;;w)jWQ<1VK`1h{H+GiVHBoz}CT7}KvW`t4VD ziH8?^jB63L)yVAvOUN8>)zs9f`Yq)8)5JzPch%B-1S0)Fg8urWd!-lQEHfr468C&{ zc599PF(+s83J(rBKOKbb-nHSC@5A@E(P>y%mw%)`r9 zV6;FTEh5+RBL!)Zrf;gVKFpNVo`I6va5gU|s#`qqauoe_@90&7?p9u>U~6C&Pie;q3#yn3oAS+5M<3HO1*3q6wg|HsPM5#aJ6CRROf~1&2kW= z5x4Qtey}Tn+=?dX-3X9Gz(3_Q4Tr0(Qv&Jt`$79Mj*R&724YaHR6K1J}G z#qKo?-OK~Uk=HCrm|%MyUrb6(Aohzkv~LKb{zqa+&YMAQYDJ`BSI)`WdXT4*C$TKH~Twu#u^4+F4Q`nf9zgAs8hC) zuujI_J3bU}h*kI&N9iIBqwt;ndOOwQAV@NeB{crB-DFNFvj2wMwmHs6?ddH32hTXr zf`9Wv=$?gQL^}0f)zuOVac4i6C7pskb3{yC8U%}(e&QC%lZAYtVps<{-J$JA(^%Kg z@X7yfozC;kkf~hO{hQJU4p*~zy!^xL@Z%LnQWzVqz7Qy7j&ks33feY4%3a*Oud3-q zY;ve@egr-X^(vTwdzM+2l0eYLyCzLDSXB+rfH#kCk z*8$*S7EJN5&}D<_>pFzt^fK!xUE{TOYooCx8%_p2pw?M)9VdRlpPF%ocXG zdfV!?_zyQtgthTZ4qK%f(U^kcP{{+Chmb3Q+zg@ zl<^;Yd8=9_Aan=XXrE=#O{|+DAi6YP9N9Xu^rV_J{66>$g#yku7EnYd1H{ScCZbJ5 zX$PP4GwTrxr-(H__@x*5Nu{Y^3R3D*qHN0lM5j3OoPzEDgl~`=gW~^m1$3!LO#ct~ zcDE50DQyu)IfPOCzY%)zVhBF|FDPGNfdr%z{_By^MUR-OuYo)-{^Ot_*n}ohxE1uQ z;+`@~?(pWnum6`{Y5z`!PJ-3BFWj2iMDoGZ?A4W_JwZBkrN+^qX6mYiU1*8!s}hoM zWPm1!=ih~sNC>n$rBfhyfS zZ|)A!+;kZx1K9eR-+`RSDV&o#aWa0{zP;XcF}WeWU_EnO6u+(_Fp~gZcyY)@S0n()tx9;?8ASEiO>*H(0HWU? zvg^=P)HLoRkQCT1ft9V3T=@8*_@Xqp=av#s$71uaymV6=tOZRxqefNE)6qNcUnDic zyZ0$th3$+3)H~$eoVzG_MQsAn^XKg3qVsMTSb8`A(a0fALSZw8tPBM%T(p-9h|S*G zmZr_8Xdat$_GIZ>HRs7&LS-eVC}BG>+qr~RyTe(VGU?G!&&gP@=wnuQQs_R?s4z*? ziWw#{|5opPyNW0l+J4?vGJsS3{XW2ld?ygMc8TmL`sksM8@7!B?k#j| zuzp={eR8b+#hgslcbhqo?}tkC%RH`TYeCMZq~M231WE4gGxzVo)2OYwIyD}AvP$8m z8MANJpwsU>tL&%m&O{v(C>rl-+>7j$HYt`Q?2leQ@m5evwU`HBYxh zN^Q*c2>?*M^n6?Y-oIb0t-y*)u4=Ics@Q`xcp3^NtEm?Mw;$-iY9XsW$w0W6Hb2GG zI?wdunbcyHD-`05B<}IVpRZnyttQ54+U**1_sdao>+dBbJyDDS1k!z~s9#m88)#okjMmDyaKHgp!Nd!aq8dO9_`D?MJDZUOxGC}M}M z2})iwrmoz1%GR*Bo{E#@pcf|H4t0SoFr8cK(sw5j9{}%`~M_ zh6P$^ed5N^)wJKkv6nguTV1WdqK0(wRyqf`vKS#2Ub1uMt{>VF6-l0a^P6gc5T98A z{*2Zq2_BhaW{I6VE#zX;P>R+VhG!~#ed^xmG(UD?<&q9M-rqxSc7}^t1%CHz??&V^ zrw>}OHj>+OtQiTkh$!N`0L5-28IcKK7(DeIXp_K;GL+1FKX~tljCh#0_xHOtpcp>h z!U=Kic)GW8IqAz%lZ}ac?T7ihgwD} zJUiDl`5IgyuxvKxtK|9}-YgUR=O!zJT!rZ()il45xOr@u|LD~vONh}K=t78RzL{_JGW=fg_e2-lMkc9E#G5G`hrWA^i?7cZJc~3* z>WVI*(N4tTO&NXL&KOR;SkSBBKnj;PX@@Z!tQ@L~+olgV7PRnd=1y|43iaBh_(X!5 z;l9}QuflDP3b)EfVQIT*n|(vT4R_jr{(3zpFW>HqUDu~fs}C4Jm3`+Q$?BVilV*YL z+-+Ytg^kP_{!r^Rf z=fmJ~xCb?yM5;UoPBJ-qX!085v%rIW>4jJLLn$?`PFQ)RO# zc&${bp$1!gftDUt+H|SNa#4j}Y-wP?p6J$#0%N}9N|GO4fQUT?4|=9DZ)WC#!U%v` ztP_^be(;CdoR=DH2(&ufZ89&(tx7Kb(}Y{fYuuFfTymo0+%))!@qFTB1^W%?!W`g# zMM6=4{jS>FMs%}n=$RDVJ>Yq$JM^KGqJhN~W{khUi-93;wQbgwgo20ynvn|&DP>%N--!yw2@qulC71os;lwvP*fL;5=cfb;S;6SP!)+|8g5Y=UM z3~Fp2nuHvgi4<@PB27G43wyQ@uFsCwSxaJ&0aMibN3-m>HL1i?Fg7)~cBinCpQ@VE z?EcP@vk`MUzvfkt<$BbuKHj|V3A!cp3Nh#Q-4#0Z{oq6Y6fE#kmH&Cbta`(qFrf&b z>x)omVr(}BO`e@dRWB}{Mp8$Q>qlmHACd>1o>ppF6*+lqF1=K!$-T!y%-P?Xt)O}8n z9&djHHxV~=xh2|8Lloz#khlZpCmuqCGfyT`PkkZYl3rm%sg~&~TMjHkr29>q(b2Bi zkmN-}n{C2+{!pb}NRn4q_)g%1oW6}%JkH#GaPYY!dtdWiuOs@h1PiM9G{zEVS zZJQ1^nJ1^oiE%N0#g6RkGUGL<_Qjxua?m=clJ8o{oH4O~i=a)FT^NBK>%t7b9;;vU ztK(wP%`ZsJD97y}I`+AznD*U1Bv2J(2sh8)EOaAE2|^)c3*i^rgZ`?Ptv2nc7Fw9P z`+;x!{}F*M-biW+JavsB;(sqpQkyzzmQFOLQJp9qrQPBryU1td%^D>Z6AmViy8fF_ z_gi|$I+-GQbC7vu2wDGI6GS0pQU(5N1Bvc+&DP~tYW~_TNe`R_D+Ep;*OWMz)k)Ar zna$&DaKyWA&Nj@69h6CXNk~?F9iyLWGGx;qMrn521OA~te>``GsxUHOpo3^ALXtxJ z5__-%b|>g&JL`k%4O+eV<}@)f4!voVLx#YRcillX+yXTyqD6lzy(dS3)umv-Snn0q z)J&^qN#S#o&uOWYbBSSr5w`k1D#fP3)KrYn7)DYxX#b2x22$^%yS0?$dasN3xCuYg z<`7u&ooyLly;}e=UAR)rYH3w$UOgE3BXV&c!9LU(aCMs1n6s$RI(Ga|l?V3^DRsCJ zGi{VXdCkJnHDZ+l_yMvTeLQHupF&l~06UiBmoYdp%b~)xur{APsXa#Rk38noXWvanm*jSjN8wj3mq zVO`HYRUZBT)E3tWzdH?o3Sd`vhBvzRb1z-PhK_9q!)Sy*GfyUiK}?h$BzcJNiKicz z7v|GU3qBt(7OMM4<0<1?7zCa0AHPJa39ml!eZJ8Q{-&eI<(ofx|MiV2;C}b-*H&;b znEq7t)ZntP4>kOXk^Mba;l9i9*#~PENWzK5=Ey;w*gku{RNxDh5UF4ZRt`Y%O8N1R z^Y=o}V#s|@crki)UX3u2(b{v7 z9Xn(n!RZw2=TLmDq&eSi#zHWQe!EeS03?d~ztm8>we9(|90(9Y{R5H~ee%oxzrhg*jFo z$V+d9@&E&EyDPbKPFg%6Yl!SR4L&Auf~`>H{V07*+#DV~c^vvE=Sc#4)6 z?!Y&ju~Ihryh^%H#II8~iXR-Q%V`jLwi0TTWjoZa-Nefa-{h zYivx=Gbch5*G0AT=X)9*iWKT`v?iU@u~fMMttd^s`_kf=9fzbs!NXs#kr?FB7{QbN zpFff{kn;Y+X?xz37%d(5Fcpz!qo*2po!S>pP`HJXxJWU4lX_3=E=VEd2&{S9x(Rx; zS@^Q2T%BcK&cn!fMmnTTnt%1m>p7v+=%d6K5G&&iT1=ZpLE}rj(=Y9IieZwZUadZ0 z1i%GdUdeC^;4GN&4B9)fw~shd_E#_+i!U~my%Owdg8 zP>56jz3@I(g)gp z08OJ8i=^I#9SKlxCg0^BMWW8QnjWLi<8HIWmN-6N(eWw$H|BUks8*QAtfE8pG(85y zc7;br%@XMT6+Ef>59T3gF&lr*}Vx}F=U%+%4O9rV5pOf|)BMH8fLnkGSI zGlet+QS?VVU|DI~tn*7E#P(d@FTZ&$NZ4=sQVSfYs(C#2N02jw6~;aww%=wL2%Hsj7&(V@ij03I`74^QLW`rRblZ47j`1jAAUYv0Q@)udS~GIuT26tfUY|!OHv{@UeM& z7!c7E>FjS&-wq$~cxS5TX}Fe_wMpxPDaoRzlzuo7R}+85q55r2$@yPo?;E4uazQi{ ziY;xoNu8JmVE4TCb&&- zw;%W^H-;NSQ}+|n+TpSq%|}?-3n*p$7CXxB^7h#1gnT*J=W{BqljH$B?$}AdbFm@3 z8g)cxOWIe&0^EZr2uEF`QQnv6XR!o)nddsWZ~xiLOk>;5Cj2mi;C)E+j+z&}LnsPp zF~HlBY%ttkaJkDfK*%1$#ZZmkj8KC7*oSo;UppG)_|j^}j`P~;kqsTAVBcVw(2Z@ZbjcfVz-nfQi8GQ#^_92Pb;KfJw-yl0ub}kM0GILA95q_IvI%F z(s@mS{WNj&cR@&FR~yB~c>eom1(V5wDGn2lSu0t+L4mz!Y399L#VU}~ac;bCFhQsH z*(;SbugVfGu13FG*5uZ7PJJz)YzA9ZN!QFg4 z^bV`|CBv429V*=xqUD5*?iy>E9-+CT?5y~|G1E4{t1QFj&LUJKV@Aleshu14%4my% zrI4gOm0FX52b0{jLbVTx%Pk>QBhF|Tubhw~0!Mi6#b{*-%PYN;tJ#Ut{@=Djh2}SN z`hQ##J1TnND{p)A(n-jD1(4f`ozehBQFYbmKlhh}{+}flIoLqe#=nQx#68fvS#JsDaH4G*~y7ft&XK2k0I6eapE8p!`hiE8` z6(t9?P^Nm&58bgqJifRo^58`|@Ec&)^!+A3U79b3J?FsV98LS%cK`|T@8pbo6~4_E zTG3nJ^pHy3TJ^OkcsNxv^_8fs(XToL11+rZPu;iXZOk`?}MK<)YAP+jNR9FODGi+cPiupqqVM7Iqg@Q+mp` zrvtqJ*iLQszAN?84Kv4o9OXfPaN^f_EaKAQ9=jOWICR@k&b_J87H(D){8a~a_i0v= zM5Q}%bX-41zd?v${m7sIx!S}z2^kEq(8O+*+Dx$F7pbJCuT-`;!P`fZ3)YCzH~yo<%AB_w+pQt$&M_*eF|coW_eos2!WZxa~Av4 z@>egdc_{|anr_<+r0>5!B+q@)dU7gSi?idNYES~Rhx45S1TR;P-~ZzPE{v4|!lwXe z6}}~eCMXUi;0t~J7)T{Awo`D!W~eAyy=`UxXfPDbRFkhJH!n{TZNA5jhJ2HdPXwCU zoiA9Mb1()l0M!WRxnh{qRBKj?G|6qGne=*<3idMwh95tmu;+!H0NUZ@&(W`nHXWi- zl7clTYk%}H2I|zKFSy-e?n_omy92tW8oozb-GZ897b4ZHjy{S>3Oljl&bvQbCI=^h z@_L*EOib3~tNjZq?#Eo`+z-Xi_%hb?me>gKFd~n#e zpx3)i<{Y@{r)D&awTmw7cr>B1Jo5#bn(@KLj_Y~bRK;#d)X4A$u9WWvzO0vVj-*ty zcdte!s>>u3#%!sA%R!NC98vt$?iBUGL-PC=hE)f@fA7oiJ01SClowizHgrDP4S(25 z^{#N*Yq9V@XwDd*`=eaDiuzH7h9=8EB9^RXM5}Y9pW~(9L5C*CfJ|bBHocd+YV#&p z&8qZ5Z&@c7&mNg3E9qv<)G}@v)&GmVuZ)W8*}e=;Ah>jpKn&O5 zPVnIF?(PACHV%OR5hMgcfWXCjW7-VNRlc3Jt{mQu&)blWxr?3^A z`Gvpp^MEA4x&u+uS1%E9`vuJM{4@34^W8^pyqk|(mt(r^`jy}6v7#SnKnBvhly2IO zKiiC7WUU+qAHOO57&ynd4tMA2HZuvu*ms)ykn0YT`08X`q4-^R$#}=!{n$HZqR5}E zF%N(1Qok$yv(%sI1G(qt5ub^|tclh}o zwXcc_t0o4nqWJ6<#D!-JoKvpFiIMsG_VpO0_!fk>s^=ql` z0H9X|#p?A??XJZkH;N2tYYq`2_7%g}#S-V_0Ty7RNrj$uDPe@}>u!l&8lXXVpP-4O zCA$a)_4}!%g1Eu`$)`&+`0np!J#G^3CAxl!PaCNjOTg*w;~g*V|?N#R1UD%BNJbilK6B44t*QnP&1y+&t%L2`rSmXwb758f7ja7UrDa zp!Kc{h^08+>!{+OB7eRcGaPFLLKqI(_nF0npXq=q#Pz(BI zX8B$_C(MOnFYZFfHsSo}ppKSkW4FC4?rrH%Is5k*%ua1O>5rgv`%RBp+he?9(`)h| zuY`#QHs}L@+6~t-VLTLDW7fB%cHn&U2Z>E6-UwG;nlrw|&FYh?!GMcMmxb39>jfbn zXw3;y*+Zi)ewwtn;Z#I0aRi3@x}+%0S+N-4n58URS3!ri-B9D>X(M+= zq{k-w#m|Xxnlvm*ErIY1xa}&}g~#$50%@63M*)|=PMP8b@U3JPHv1{{4y%e}5vRHS zRO0?MUZnTcoSetrx|*#{BGV|&s)C%b zpmH(}c`l2@&)+Zf=UNguN&GCa%;@^K38L5M2%jyb&8tbMwR|)Gq_r`KTBcop;9U7O z$HHA#SbiU6>M;{@x?y()_w?Kvt{SxeEJ!acs<{iamGt{){7)zS=l8(&%j-cK32%Y1 zJdmWAg6SI^P=w6Z1?hw;@Z@r()nkC8zySvP$apMQkJ6HGQckhvYt$5=-79k*DY`UX znx$n88`)X%p!w8U4_&IBP<(0XJX5?cC$O4G%_7T4q*YeCwwPD1Dd6&)>OE@Y*0F#5 zuSzKX0tj+YUdvcKF*TM*c$N59hT_gmGax=kjK;R7gAwarK3AA51piON|L68RXbkJ! zK2#El^gv({lG0vRMG%)QdOy<$zT2OS%7yyb&(5h;lh3BI4lIhH<~IV?Eh?g7Y0bw4 z$_t0(zG^_Zg8M8R<^8+fDa1dv;9R`66Ga5de?SV?=i|jy=>{4a#wL}d3p;w=i=bQg zXHA!)Lm<$#v0Z$3C^qf^D_z!5zS_&g15zeP4=W=i*9q{h+U#<~ zIF`Nxi{{C@txc(km^07Pi09q#tp>U^WchIv8bS8QU1V<$Zxl)*!nlP2G=uDZsB5rF zGU3x+S^(w9#B>`I(lzb$(|D%j0O9RIq;yR^U4nOOZ-1e*lm7CIT=G$_0oelK&Z-Iz zP5PN-1m1>EE(Xkj!nj=THoJ%TNH5JNWN8fq;gWpNOyPK=gJw~`bK~q` zz@9mQ_H)k=vDxu`A@I2yiTG;BApaZg&>I%hyMI zeU|jBa@XeHjyI6=4_w-wp)`$jCj={|YwOZpJZanafpFazKD7scn*&0^Qr|c+A&8hz zQW+(T)q&9&_Zr&fYv`Mn>5a{>Pm76GSM*%-hTMCeckM78_b7dbN*xVjFB9YC%a_9Y znLVVb)%jzO@X~MitDp3S-;b9`qSjg&*ys1m`W+Ph$N|HwpMcA(c3_V4Kba0L2J|Uld86mky0wW=A zm@rS9uD)G*5b#r4A2JI1c#a%?u%>??+uzLBe2g3K(1pOcI*`5h^p5~*i-+F(4G4l} z1K0zW?AJwSbBlqv`}wXJbaLTKNqyP4KPPhbJce0)xQ5w5G>X@@SQ~{FU(t{KZBsYK zdS6ZHtr{sG#O?Ofj=srPCeE}k6_W*;Y&vd9OF#1UGVoL;H4OcmZ2sHK3P-Xwb%x#= zt8_6?8Bxn3I!@W*wnt;3>WPFa#gjPOtDs7WzgGCP*4} z=kTbG;~=zmUhX}c(L>8NV4T=LlHqLZ>_YkSZ^On|q5qc|)@Be4q<1yur%d}+pe%nm zd!kSWi-`DbJ#aX-?BAQ7 z^V0k7=M%dB& zw)Fn9AYI-9Hh`<*uSgTmB}*k@kLkirMD4q3h5KTIAP0q56DU|zMb@@#{Vm-nFO z=RjD;VBCsSGhnN#5b4eCCPU*?FJ}P^6Qgj)Of+QoeXQA~yr4|`L0HqEnEm!`ZCRw$ z>($LtSTJPeOet?wU;sMuyzR9o_L)Nh&;+4Z{KUlapFttECi?)khcb?-6M7=# z3D1N|dinI8N#5BOU)42JYPc}Ty#KMsgi}Gke?Fr4Dj^|%wTQ)Y1f^Bi=f{J|=NRG@ zJF+qON&*S<&;8oZ`btuO!L0=)nz#5Lo0m}gzr-LQLQM06b;?)z4AQ36?*DEz1_I;t ziseT>#PxBoxxF!E5c+<*akZ-Q>eL3;VH);Y0+(w`#!}8QJgMkwKV3Lk`oHh~_-~(L zAP^eiUTUDZMBG3PmkfuXaO0p!~7OWtX9ac0)DZ)1gRM<^!&j*Tj8->27&&~{+(SCV-bP?ZdrdG)RMs$n6^EEAN z#RsYWUo=vWL}%dRAxS$LpbT^XDs$u_lSG^b{38BZ0{I2=1oIM;l)oY!b$_N}Azvs8 zlCPhoWP*vssT^%cbbMoP0blF#O}@1YJ@4DC+P;*1zJYx874q<7$)YG4>*M!NoZBE-jq>RKR80ar?Fn);P zoBF)-^(Uz2%&)_~u0hLOl)GtQTp0#GzFjWLCK+-4r=xv5(Cd+nre+@jmZ~LEM{_>k z5%Rl{pbsN_so!I*nBL$47s9){biNCq8V4x@LX&05MX59)-nxC0LMjV=mo;x{@;cmwh_nsq^P3sFOc`t#`NlDB&V*|}d znIPCJNBdS9s1TKc6arCYYw%$9xrcKc#IT9`mJ_+*ioCS9H$fp4=Q%$cTViq?5a_>CfHVtN45GcNQ1vKVmoEi)t zq*d<2*@h)Kz$!Ye@c3(A@(gHQhBc#tCK(PNQ2Y7w2kU>@Ah7reFa#!gat=1-eh1LC zk+MlF*v7cLy^pB`OZ<7%kg`Hcpch@ICj;OSSqZs%Mqqt@`uXMwyIZjSQ_6R^y#q+1 z7eEBt)rWQDX*O2OlAXe%3_oM3Hru!)ClX~Cwz1(m)N7*i4eOh5Y8=rM7EWX{jHn1% zL&mt5AIBa$TB8&6QUbYsOauf{C^O4P#5@zIG=MLWK#fSewd7DkZ;lj)Fpq+>xAkyE zM5dn`IjoQurC@*Ld9L__^g5z0sA$=U9jlO-Y#6oKaB)X?PGOpBQ1T05K0C1abkmzH zQw!A;LY!%8me7U>ouC?@+ZFPDA4?KK$|L}#LL^+x1;5>$8!7t>zlR$c4ZbTRcS&W7)7(6ZMX#I&HU5omv-yc&7k})FrrabxiZB$U z7W|#oEwM^yOg1Y0WeOA(RL#3l6|=vx@bAq~LEBRSvKkf^E}(J%zdn~Pes9{Cv9<}3C(iq<*Y)jnf2E*0`|*361yFuY`L zv}rzepUyNzy&GVU8D~i`yqZWQP@YM#U&!|=&`6J%Zn8jA&1}Q*@tuL$2A0_19nnUJ z*yj@V_v;8>fsr9)VDKK-iUR(^Nx=&$VVz2wccn(J(na_<)aO_!n;s`-qXHd79=`pU zIM-g)98LdGrd?4AOWciOhBryZe#zY>rJ)AiJgl^`#W0Sp%GuwQhJ&jh?$}J(pU)rw zIfUb(@Vc0Cw-{TDKQ>{^p=2rphiM(CkbO~A`{ljmJ-=XrNccUFQGB~hwVwW%e)w7I z7EI(5(Om#TP?*PagdTe5V*{M)1fnhjEXvvN_g@sB4A}!tvHY}O%phb!ZkfB}u047k zE{ZT5j_#CMus8IWCGkO-&VZi~!Gg9p70VtPB9m4245j)r$hM8dALz@H`2+_x+=+|~ zdpP#+%SR$4;-Y(IPq9a3C*&RqNJXBDBat5v{R601q?n(tIzwT)5ANhmB)rUGCA640 z^<}APphFIXQ>=F#5{4&i80a{!FF2J|k7W>GQ7URA@x6HFVOF6yAnymd58m%dH^&oV zTsvVFfb7OT+yg*967+ZegxF*>$DqdM$fxgK!$n_q<=XMY`42PG;}fN4*!SRT|DU}4 zHS4s$Owwfj<#-p2LcJjB)7&Uh(@vhc;bQL#CDoDIFj|YtVQ-Pc+Nxj4ElZMCLVpou zmmVr*BF!iBov4S3&Y!kShXvtd?fI%*%|J{MkJc!QM{C7r)wRUJiW@KSt{|8 z?H|Sbo~QJfG)O_6T<}AH(Bp}Cfv^m|{bS=CauD%PGeqv|W=|}kC`v)UKhualXCDp{Xo)S$=jdX{T zum8*j&d14kY*vnh7pz_EOuM0k0aNb_3Tk8+e(vV~C(L}l#hCxL8il^3i+&Ea7N>8@ z8PjvX`Ue=#c=_ocUi7&>f0e0cL@m1ezgT%vsXUU86H`%`@vx9{r#d3kfcKQuFyZ&n z|8k|6$MLvJ>rt0nz0(iq8&KJ>D8aVft@4z;dLxMwEO7=}>sXJp$b!+4Y++&lJQtsh zTjmctsV5y!^ALKy@DHra_=xMTvKv}Y4|iwtOogdEw0xAA2K!N6auUzD?H<%xkHI4> zG`@uX{{FMMmQ(@W9{8-L=)<$jaQk#|__~NyA2Ci%V(%k87+sImG&5R!GV>QdGsWhx zPnO*8g+C&0mVLE-ju7}*iDPjhg>$mRzvjDDED=+F;jj@!=>GHYuy92Q|An%eVs;Ie zU$)Y2+)nTSh*w4b5joydQ7mKaK2H0zr)SFm@t*xJEbwrLpx#zNKs-n$ugCLgvgE^_ zQNZKM*v(Bv!r={FN?xL*{$yeLFbZNAk>4OsuTMZf?S)&ge%fQLi#e+mCeB}rtpzYS`YyT)axVj zf50Gzk&+6iTKTSMzy3~PkI4Sd!@1pTn+(?bxaXMh{j((nHvEZNHnpgQ=YIoXa_AMP zkL(EiKS;{Ymd)SqGyTMYQjdfCc?TxOiDJ)deRUaPJ zhN;}!^-uxDF=$Ef!~gdcE;H~<3%~jCkc?F175kjq9O&152VmAzCCMB|XX*U#hM=J`mhSCIZVUF`LTZZ`KG?(69qLve6- z`$I0h+V;>-L!K^wd>`yc-qd(V30ZrT+ykz$NbWzm?Komm;xs%gzM7aA%LOT9%BCFM zXWI>I$l&<8=PsF>Dez~2T*+A)S17Z^S5l|zlMso3=^z71fw1s%KL4j1ttSe@q2U}P zO5=0@pe)14LS4ik|0TbAi&=PewMuSl1y}ynoRPKIboN_YZcgC2cQ5$+SBralGagoBOr9h;{t}sVOB}|MATwjw}65!f4D_{nQ%C}wZ8Bcphb=kp5UEZV%r$qvUA z%T121T{h>5BDABJ%wG`IF1RXx&hK^m^pS#m8!}&%I(6W9_;J7B_TVU_0o?>1Ie+0D z)T%}o5pER9uls($T=YjkW5xT3O2=4#-_Xh+ho`?6npok^7Qv3cl~cCbvdpD6RF*m4 zep-)x_4Eo5JWJrm;}5r_qg@?lRFLS;3>cwR+V~M=6(M=-%IpS>h}4!UbE1vNRN$8{ zF<$E8bIFv(H|lKcowzg>^zBzmw+{-~WXrN|7qp~nbdLjaZ- zV`Lt8{CA3dqNlMYv|9#~=)MQA%*7tZ{?5J|c?PBVsuZyMbCE^sTupL2&DszS!`Y-veWBZJ?Mp@O<;Hrn*o)6*zrG`lV#))Xg&||7n>AvU1$U4L8>){>DL) z(D<$HILN>~XaKnCEN)5Ck|dG|$J4Ex(WPOkX*9AP*ZH)1LB{g9sdnVG?!mlu)3;Dj z`O~e!!oM6^WS}Hvo%TJW2qM{GvFr)!bl*G*OWHmrO^7xXf}XF`WHN$*uY6uy(4~}3 ztNg8YIMHDCSkT<}R&XH8Yz|FT4M?3Ir&6Ri{+O#~54Ib@%kIsD+@*U&e3c(dZ2NuG zK<}{o>qm-|v;7~%IqKiI$|HMseep>N>Yt9?lYh6wh};E!W0fkY{8JZ+5xQaz(97?> zHTQp4(6X`4SxHOn(;k<2>RU(ptdCP2BWWOWNTeD=Z@K)$oy;4lDUEV{T;IUDFX4;n zGk~7T404LAk?fZ^*P}f7q|tEA{I(GNuxVo?Cw8Ah1NI-skzvhkfqYJhhy2va#3XvM z?9eKi3bmlgH#rTaN@m|=r*^@BYZtsd!zQ$I?L$^n7ckx$y%7I>_TeFsCnJZX+x>-f z>~0y|+XB>b1ujT%>GNt^p>FRUn?tgIsq|+># zsOi1m=}*8Jp85gYlmWZSz6Bb;?VQ@7&o3*UVH1lnaS z=BNcAvTc7Rckc0k9xxjwU7WbvH$Mh<1HGXvp%ELv>GlVn41_qmq9q0 z-ZYUS0uozSD(7w(?FEBi?jc@J%ccFM8un)kSGexTO%J4+kq;7ir($Qns5A)oCPj(5 zx996T^jnn{WyE$W7?w||o~;tZY#G0@W9eDuI=$#xlu;y(*UuS^&o&Rr zLyBzVHC;5NA(3*3y<5Ea#`#{GvOH7BmX$Kqlliji>BFmc z)sSq9{Vw8i(?1d#Lj82UmXHiqw>F^xtqW%CJd8krslq;Q73!e0A(SxuUh4J0ggAa&uT@`;%kc+Wwx0bYTj$Wx zFA4S5Ju-FEmXm6OD&397{p@0hllgHXRxzA>!1 z{oS>XXL!pNk*e;mi972a$U+gbp_fI~m;JUFePsIk?vdP$c}oo`=i~cU;E5@IZZFRL zd%KH-HX0sfmB`T}-{|knWQj9%qwT4Rkz=$95i?uLe)p*;&5-IuFG4#FuaaRMLZwj= zlrV2^{wyxxAx)uXekV#?6*-Np zlbp+{`vQ5>$_VsBBS`63q}EALsYcnKymEYjg#Gy=6qWOJ$9=4BMLU^43?Tig0Sm9B z6;<3__~#7o3y=?S;$+!PQzYft7B7-{%L+5`eaa)MGJKqH!J>q0*;X5U{Bjy^F$AT0 z(JNBCTuqd?xsya(o`K@mX0|rgjQVw&*BA*!j>Z{7dQg<(1dyZ1GR&uynXYV7K#aXE zm1V3fvAuRWwyp(duVkNon>&k&lU2UocV`A@RHwZLY0*Sm*gtvUECnAae%m%8+qebV zJ5p6c=o;SHXF?}vupXl-<_|ENLzrJ(A3T{K<~q-ekQDH^`H{iWFk0=%TA(9$6x_$W zS$F$^doc@5C>ge-+uv*%T&%h8TtZ^f{CD4GI^XZHXXB&sXkARW>>u(8I14@E1!aTc z?EUi73-`UMZ?R=|_ghV4()9F<1|mcqFlWv2+7jMI73iI6 zm_!gT4134_ULVt@Akqbc3xXE6LXl_T8Hh2z#{cbEd#1QCU?^+!5hsaWziRG8MTtdM zmz&-82`duaV5-U-@|1lCpgrGlYLM3;+cHLZCI%+klm!7XSm#c|i3l$o+nvNDylgGp zX0x?+X(kI9%r|#NxW9^iz3&(VI!BY?#n=0AY@eD5vxb28t4PgW+%=&rFs=axh}Q*Ub%5dlTFUnkL(+BJ0NKWe%KPb?e(W7Kn9&*y&Q50+VkE zE-=?c-0rU6=8FmK^}3W~_m$^SuJ~-d8H-4PGXpd)0*D{iV^r-ta+MXXR$#SF;AqH4 zpwwN0&s!|(_1eeLFtN3sRY7+ca8Dd8T*oyle4#lkVQIL&e^VmAtM%g`xZZpIyZCIb zl%q(SIz=gAUlI)@_C}w~cQQPMs-9|~nP#37ZB^@|SCdlmcbkxSF;1Q@Q7J%y18=0D z+gR?tq=|aaEaVD2myc)r<8MdmYI^=V_Q9|x~XwW&hYM=jo>X2^PrEltMl&-TkL_An>Ai_$j@`k6g>OcaQl(g zDIp_?U`^~`9!7$JA+p2LI;TKf7~PNes+(6^5a06T`3;6N^qv?ZDhGx8GP}uyed(M4 zDOUr>J#9%^g|o$ST5)vS7f9Lk&4y=F#v5+A>B@K0GfjepHkSY=h|KEj{R36C))-2^ z)=4Q?^m6DW*?Ktt@hAh;&5f%nGgTabKrHHEo%=`Q*`|nUvHN~pG}9HnD55cU#%b5P z_$R!O)1(F@fnZT3pJ&P=;GmX1{9@;YT<(U4X(p$KV6!j2vl6#d{FNv{6K|}i^Z>`d zUJGpzcHQEHt*}Yhv|0W3=@Th?26#p*#)97o15_+030G$rT&5Eim{N^yMK@?4N*m1R zmknNzmTk#tZ~FO79l>u#1CMRS%{}E+A-)3FpbXtSFV^3Y!Fial1uqLM_+isn*CQe6 zCrQFx8BrZeN%Yywyo}N9c&bPh;DgVtQ z4I&0VR->|{E@yCQ;GZDOW}n5Q?Io?yVDG%PO8hSblX_%f$H;&c6y}nOh1WB`jcP(vUE9OACv#*_e?n=$*QIDNgZd6q zJzG7x=CLoF|DH3uMG#a!;}h-ukjQWGX4}H!H#MHps3hgd@gzaq<{p-1KkiSSH$A5Z z=UtMMdv7+ulpTy|$SYcy)HX*VkOW#G7s9TJOu8*^(6NdifX4{1!^p*V*MD_pi2CuF z=SKtZAqOYkx9Z`!aC5=)V52ME#0|!aaaSj%+G*BzR4DPpJST$3VY307>+3p)o3eo+ z5>KiAJk9+w$LsIz9yfvnES9*o%AQ9dQwq%=OFObpyRr082d~etZbTF##CG%4PwHxI zmX@nC17lg|d6c-^#P?~8V#$+*?jS`Kw5rz~;FYpa+*hkeJpL6a7SY#}!2X#_`#x7j z@}{+iq-?J_@%R;H*n*6mGLzU4e-~zLI5$;7(sUhg(n+JZ)sikKuyE^VJ$4h4VR*K% z641^Di0~Ai848oOUG-?>mL=}~wlg7jf|sXOc8ylEYK_B4()m6*5m|az)ur^P%e+b> zHC0D(LR#_|`b~pTFXx4Znl1gJ?I+xCi&p@Km&qMa|E1ZZ{Ft0}WQOb45zrd%$+8gs zz^A)OdFDqy*IyWtMY}n)Fty}k?1hq??A{uvv7P1<2Fv)bUGRx8o{y`?OcpR`@)PTl z=B^kLVgvucZ36In;Us|hX5>BS@{L;d30!nb=~ZeOrp(N5-ZKajBV~~ulVl>%^WaF3 zqcfVT$qg0<^<%aGb2VaV->t~oyy$AqI4`l}TLTa$fxgST|0d#d)V}0Ce@|w~N-g*W z5(C0q?|G@8+_xIT)YmS%_4>s+ z1=h^==X&Kx*(2XA(JN1f$K@d&N*|-F^V`Bu!p)w+-loT}AoLf&x%*_J+FvQ)hjtHjRxa-R< zz=hYZ#~!f7JFpqv+9g-Ccsks`dlR~F{Oeu4(~+`41Zc!$KYrpWTVtQ@3UUmq+HiZh z`h1n{0KCClxOcs;Q=k33{2=S@a(C1Gz3W6km~ZF)c{G$>`%fcAiKF_(d+uhV9y`<- zR(GfMm2*G17@l-}|2A|>-013jrm3l1vLlmS&%^GAlrTEtDdbq2lW7OwN$2rO>juun zZJ!tlwk!?ePaFfKn5s)}bs}E!KhN5OpEt%IP2cWIrw^bpM{F(Jv9BcCY*JNmSHu9q=ErLpN;)vfz|x*G54`MlIo9G&7=Rm*cqv2x}( zfv~j4vRO3wrzFn@AtB3Ztvl%jF(;DOpKvIhQOu1M%f2T=(YnUYmv5+|8f^Z+*HS}A zCkgzKs%qg#eF%;%`1=_Gc@9X8pEhFk-jRVLAyf0Y7Q8UL-mA;R{nU*7>yHDVE%z;n zoL8&NgFoNM6j0vYD=6!JA_Yt;)4RWIoOUy5dTa8+X5?qkl>v?BqH1WtO68xHk6Vh6 z3g7VW)rNx*t8KzB@0Oo^I$?W}^VX&>m+H*m9BHn$DDMYu)-nc5)e zjdrfu0OxLmEh<06HQC)fS+a;p1#0fio4}Srxlsy}$n}M>@HWbu3^vi72_P^tG6@R_ zXHu&idZM5KxZ|0)&^-Jd^U0m{MP?lx(6_=1&7`dPQF5XR>-?HU*WFL!3KfX$2Apbl z&Il{MnU6E+$nu+AA~$Nr?WUb`BKAo7+X(3ja*4OqKo6Z7Ad)F!;|J0~g~^Yf-tC#_ zHMt0$QqU(TQH1$P(S|9GpJ3lX46?C^Et)--LPO1{0-d#4ut;Fu`Rz4FA41S5e!G)f`HowR=o8M2FjZUWkw&+xJ*Po5}u_-f?qW0^$>#%yX6d>6NI9NSE#Ucg&zyC66}si7?9M?Q5!ec(vO>^ z1z}^+k;{@u?N-*rWGixw53Q;?Z*JUM^fNA_Gmw`>vGsHdJRRsXJRBSFDiaicL78Zp z>%o29M;oY4WU5AK)5~N0G`&WMmH$90#mP5X`Yl;-I%YZtae71Ks8ihNGqw%`2XUBA zVQLvhS!1hCu8XSU{$9bx_Q5@;)8r+d)9NXGm~V|JdSkP)@g+vU_t=h)F;-;3Qk$U9 zvs!b-W-+LWU0I5ALA@2KU_wY0jm++q%X_7574Z z+Pg~VwJ|T=0`S{)W0Zj)?>oTOJ|Vy?rB}#v6FMGd32SDz z7}4zza|J#qU6QRr!<#bgwh98qMS1KS2QwA3SqV^;s9i!9h-)Hcyz=@bJi!9;y)R@} zmY$;cI|E7|U!(=Naz@YHfI|zs<`&C}6%#njT%gy~isph;f$t{j;(prW%VGOhx7+FC z!g2<&VSTUTl$%u?R&ObtCJ&X!89HCzC>!zF#qzN|oxcgLLO}H=kQbkp1aPxo%>Hy3 zf+#rN?+NSFS_QM}R-!#(9@81H19u_3E$BF^p}`drLz347#o% z*CVJT0FIZ(+Ob@i*5J02hscTpXb)o*@|TcQ7yYWQd%82%(Xr>{z+xRuj&A$)r&qlj zAf|Wy^w-wI1Dj9dFIaB_wH|pxTa9J+w5sAwfe;q^Uvf1e$#7o5Q}9;EGjMWWm4q}2W`_I z)X1J1pA7jXtI%{diG3l<7s8#Bn1 zViP+ieARI0JL}6=>~)1bN}kWi3zo`t?EGKpaDp&FDRU0%SHabgHPx#3mfdLsE=%9! z_SgB7d}YuKs3_b2STHO%yEz&)y3wottTRqbuj|`tnrO_cEq}?H3%lRMzO^)W%U4~g zJrt#_L2d6sH;SA~(vJq0zIGF=7b8w;F2Y~(;iFCaJ{qgt0avCbe73L@`El?<$Kv~m z4vh47@H>hU=lI$S&tD2;dk;K;fA{2PtBQo>x_Tw)!3oj87OH`9?P94hol@NfO!g(h zI37QHc|y~e9cX#%eYBig1U+I-{uhz*a+Adnzy zn*ZHnqbaU(kD~`kwY588L$P`EQJk&-G9mr_?_X(o_-dupCrLf#$#ArL>eZf?$OVZF z=HoBW`(D`jd@Xu;hPN30=$~Xv3A^80;^WTiR?T_M05>+)WEQnsZu_GizY8;i<)rkQ zh*X)S`AS8@fOE<9jfg_)oBo9i3;xgI-5E&n72Rwt4~p!tijP$X`Vpmi7G>Tk7%xYQ z!Xf}uDh4wdM69xDS!Tb_{&b&kZ6PeIyAArNHwF=Gz&p)hyg94SeZ^i;R*P(RGT3b% zK$$SX32M=?tCc1?=Uwu~W;p|xrynz(^|k%Ryr4t}rWok~2J?)wo?^~UWwbO+SF@5u z1xwRf(S)slV;x6m?&c@T0%VZo-;)BWS_I8JyWH*wW!~;F(X_p_r3r>!&zZB@+Ddo$ zY;8m6#ko(yet*qM(}rNNwD zXJ0smTA7;er6?~!AYoexw^w+>>AQl1n={>mI9})Nx1EFm+Z8^G_FJOQt^UHdPj$3*&hW0givK4ZXAy=o7i;_{8GWK9Y%d%#NRWH3}Z_PEmT<1ki#7+4RE z7QW~caa(*U-6nS)__7Wk#wcw!!YHPCya$dkwjI#~IbX!QB?r10LkGR@LWB}xY6VI# z5g)=xo#yKGcc%;Q?4_wru$sn`F7$`F6b8%@`~}@2=@G_fy6^lo@m9+n;1hD7#!4#n z6+Jj6!<0@t`KGnh?{SYh&s+l{#=PH_(VVIh-^CxxG78j7s;6zS?1sFuY}Jb^R03)C zy1fh@P6x1bUGRIYwQ&Hcmj$}iCJ)~M!?+B=$;?=8di4mliF9)l1=P87VN-MdhvydQ(Y(gzcYno_{)UXf!TB2If=Z~5ijvGfKp`|9xr6LC(fac=#_ zfbnGzV_aaP3n;9Sk)fcI^i!#8a!$-!NK!zKID|r@D$IOn0Nquq98r~i%P6ra#-(>x zK55MFEhItlR%5{1j4X{Mh8pM^I%0k;lp54~l4^Zi>M8U~l9E%taCnN{m!mr*2~|u? z5bupFP&zLEEl%o2pM|1{cv|_vpr#ok?PEINMTE~bfp%{~_ZCFiIMD;Ci!f1;W|sK@ zg$hF)x++O|Y9#e@qnbmLQnFP8R-<>2X>vNc&{G$sly<)W~iEXu#uY`u5j|ru`G*69V)T< z1U`_G&Se68zv{KKL#Y>7-E6CzbXZ75FMfSpnw)h$j!tE7xs<{(be|(y z?ddC#CLQE)>2*kceR2`?^?>qbq0Ki)T04$4w)re1wLy{yKO4;@0y-UY;raM9XqAqL zzDZDqDtfcgke3HVZPLV6xUn_cS3CPc*>kxIU$1H5U0?*wGc(Fk4>_ ziG`~f6xD{`%$}tjb{!{$eV=XDz9NCu7N`We$M7%QYfqO5kTaj))QcEf1rR9pXs*Ve zn`16KP3L=}_gkiNH%XBY6dS!#8cwc^;Na}st?z^&TJCah;$D&sPv%!eUY9S|cRG`? z#k6e=3WoSPTr?0IN;eBnf`JAxa7c!8tgT+O?OkD2vjz{le#cu^Ch}m4oD0aa)I_e3 z`fmZ-gXhjd569S*ycl>_Bhj3sz&upxRKzqXgc*i7UC&UE98)Qp(w_0e3~0Kkpjnm3 z#Efqt6VXfd#j`#q*+XKLS1j?6_b<+3DV;2wxnj%JA)BJ=$%y~0u+E3Bta&2?;vHcylf*c65E_-YGzM`)uYi1Fq(OL@lY9Hb8{EnfVdc!b&WFc;9S7W^9 zDhcPLLW-l9cU8E#%lW%apAnE#qQ}IZNMf~G%ZHp(GUH1UrV7YZE_1bk=k`!1j5URW zu6XYbvjZ7%`zme= zXpg{Gn~k+eo8T@2Y&N%jDbM57LkJPli7W`M3gE{Bl3eQd23rO^P=Z*Wql&e7F|Y5W z&+o%BKHdZMCl`J+j>tv!XhPO?U|No8;tQ-vI@kW30oF$6h#gD<8k$ zIK4JkoW*MSh89AsD*9RfW3Gu^SsrO-E0_U^5|5nUp66SkpxSSC1l);GF(@DZd@W}B z=v(!xUHHH_cTbV_EpzxB;PSn32t8sM(8ICV=;3YXDRSDQ>_`;kex)|J$igt9d3ZIj zNpwsoWEAj}T~YiP*CpVp%o%q_=n9GAZ`beJ{*hGv*tKi;-Z(WWe0M=yr^C+pSfX>S z!VoIrCX}4)vg_0J!2r^LI|!sr))dxwFL2})0ja!!GTT7lu>O+j)-_f`5!1*4^Sf~;V=E!3L*6u|r6*n8`sxV~>sG$FV{a7hOU0fM{J1Ofy| zf&`~=5AIIm5Zom|fDkOWHw0)TxJw|oTX2`Q>EFF`@B2>G)YN-#rs_$}pKG6e&RJ{i zb!6}TS)V1Vym&cXV_L2!Dm(Imuxq`hEB2jTD1_6v$&V4V7V)w6G73wJL^y zOfeGR3qO38qf>urHgR(5{mABBcF=DB)Nk(wmj-nD6erAscQxvqbV1|SG0E@CG=MSW z#4$@FouPzgcyxCPL>DAl@mX6=`9$rm8>KF(=j^tdu_9cU3#Y8G^|3{0{IHgoB9~uk z%JnqvEZz_p_9fr$Swi4m_a@{`wgIH_h?0FjzUkF1=N~;$qernbTn0}LcZAi+cP%c1cd(sP!V&_XM6wMBZQs%jcD}jZ8Q(j&urdA!>@j|F zIKPW7^}^76wbH(A&pyjb8qrq%*(MFCv^61Q^vt1n;xdl1#zA~MDDd{wfEU28qU#VH zd&CnKAyPRc@17HQ%0ma^1BnNa?E?+t>rrbdcet~c_0B90fL%bgmz9}H9@DviSiQ8; z3>}zX@nH>Lu1?-tijT69-&|+^+Q-IY+XdYmdIw zr~M-nh}??04QcoKI8U(F(>c9pUIyRu1q{-4$FwA~hXn@3Nj-y+|C~wUk5t8XL?Z<_ z=Z@ayjr*^Z64>6fv~)Y)P0Z3zM+w_{e?G`vV(m{5KoOGV15p|87#2tX4_+4Ww?&FB zB;D`@&weUMQnGJkBHd$GJTfS%8cq~=>Wc{{Dp~tl*5eh+9VP~TM!^}uT#Z6MeI5R{ z{j0$Q%7qZ3$g|)!^z-&9qMp2NiujoTZp;!B_%HJEA&i(%$(VYORehSiatdD^`&pPp ze_l>z>Bx8)F#%ii_nE#!?;FYDCG zVa3Le2q+r@sb(xUpI#n_au&a;k- zA;t5eFGPoOOZZp$V3fXw1UzInGIa*r+}uH^6jrh2R_|KV=zYikqi|CjKRqIbC}m6w z2}oE}B!Si$R6b3#{}|HNOXvH$KP>4rF{5D%CWUg-c65FD$;DPK#sjYmdHV%NT7a#f zp#$nRe~ewYFDKQ^gUS4FJE49p+&^>=yg@SWDeofGzh<#Rl4yA*??0{(oQF7#DU9v2 zIVWY`<`o-4MVnRldG;QBby4dIdcD?1=5;&Gu7EnpWYn`6>2p3CjPi+#XxM??n({D4 zd1mAh0gDowdy1Xlt7)h~N!m9xZeY>XE zVN21nA!5kkgMgl`-kYyT(*RuVo(y-?RFdD~Wp@2Qb#l`C{0@`S$|&IjMS|F3)i9)_ z9EgrGe7<(6FkU(w?}lwIL0l2!wHVb3#lHpqWTO_HZrB1Z-d95Ub}U`egV_hWEN&KU za`szq=ezU0F*>9n#Q`3FA`-c&djcPgI8M_umYHW;lF$=*^qcr1bS|A~W%{MBR}C}_ z9r}+b|2d2kqQ#A-4{6I zfzYG?@q-@p3_tNeX>%)>22tu4`SB!aV6bV3mL*c!g)aQw1Tjf|XD#z~Vr79wX89gT zODqFUP54^u(@Y87y36s>ANPVk#tdUYsXtEPsYm1VZaygt#oVF>cL1}z_3wHyJZE^| z10yCBE!8>;LSN~3-#@pju@eG{@6TSihlyj4_^K#K$qnQ8MfaHmWOVz5n)GKMo{pI; zvgM@>nBDL7$i*khG^8&gSL_QLyRqcBxVf}sFqUo_*$QMWcg*=JvmWrPLQ-lKX9Uqp zbhDgSmVeJoMDC&L_#KZ`=P%beuY;VwWH)E4UsXH!P_fsnTu6;6#u?^D-9`v+ zI6)|V=#{+VLsQ5=_te{a)BCy-s?VI;0xXN4lSw@iWFr=cOTki&L&Ik9zL{vQG}D0o zr7|%;(!$t0b}66^A{?_ch$NuhJx$)d!esv;=nCp2;Bp$7x7|vkd!aK|TVF?PIn{oO zFdj1%LNwT~tC+u}c)``+uAn$0^9&c7n&W`FcPj%ir0;dDlxdZ6Cg?&p$5dJr@GR3@ z)R&3F(P-z5gaTCi=Pc5{tZW?GYOChG)HpEABk`!p2Xv9~ieaov-=Z zLhZX(sZ0n%%P~HI&E?r0;1isn*F^l?0B9{khV{wZl=`Shf~v0`B4Cw`(YInUm3 zM19VP5BZBOn}Z?IF;nu8t_wgswM)NwtYH!4+HWX({LWp0XQ9Pin2S?alL$mHD1NWk zVmH#6&T`zhK`lv|h5fO`olBi=3e01*HV2#5{YW|LjwCQ2_NShCthaQD75#Jal)Ttp zQ+Kq%y#B`X&G$MvDaMR-N!L~Yr}Dz@q3SU~JK>w^zwv4%BBizTBtQx;Bv$LaRB@sg z&%>3sbGDkG%uhPUZh%r*?qD;mt*A-kfSruupYM_&Ne|12N?d}v0fj;8$$$X9kGgn@ zLqC?yFKsH0x!7fX=Aud#oaHa_kq6q1_9G5Ql3{%F0T#mv^*17SpR2=eXy$G>VLMKc)CdOCIAAlF6rUdnEr#E_ z2?X&9RFhBE%OR2bganB|!$^ZzLvIYY@<2XisdE=|3~#wPD$F6zw*Rv!Z8`=t>wO_; zo24O0t<{h>u>GrXDCJ&uEhKPr*COd@8t#^h&&`VG)gpC{nXmz6Mm14U0&2%00%F>t zSAEWEV6E+XjLGg~GwpPT8;F|9nBDzTUqAg04@hMFIAEi>(}!-yjATdjO^}NFq@`2( z_v>2{z{oCm@_l7oMDQ*9@LfY6w+wWuY;S4fk3U{l=PY+bX^g$?Kg|4vQ zex#x~8TIz=K8Uy*NifiW zkD;8Xuk)`9n>nf7BUkM2s28)pQcnt=R#WdBK7tt11aQ|3s6X1=k*D7D@xvzS%WYi` z|73n1beO8TLB+%fzn2tPHZOj$smVL~iOdCKd-tA)Xbi|>9cwtJ9PeA0cb6o>@IF>< zxmdNrr7;MLBJgM?L;eHLlxS-6<|daOC{wvdEll!9+(vQ4_rei1VxE$5;8z>Y`O~NK ztVao3?O?O7-4QXPf_gb~KfcK2OHE-@K?P_DnNEie6BBpxF##>hdamgOAUQY;M54gL zwYU?ANu>P7NZ(?b2c8%2J9@(UrF$%zXxc@}0pR%g(aA%i2yz1utFA+hg&l!(l@77} z^!r^iL^{==M_Y~yxitR)odUJSETj9=K zy)o9vC#bXn^ocp09B<_i`S9_3b;7Dp==-~QN4K{2=PzP5=NfYlv#Dn>K$9Nz?;d3H zG4M(sKcmrzi__qhca6wIZJD|ns&LCXK4G!59rw|bsrPhXJJ(&DyBackE&$u5I^SA+ zl3ByL32FOC=-b6Mvt)xNHO0P$+zGzIvhOM00+e1~x_ZzC@g*L(v1muGO`%`c>ps_y zHAZVff9osUc^~5fYv@2<0s=m>&R)9dk!<<6ia{Wn{j>^M;GB#g)?rF7-fnzzoo0l< z1;Zy#W_0#kTOJe(tM%;tJq?fWgVrDI>NNrx7r;|MkHQAj>YzmkeDy?~p_}=7nJfeI ziGU>;BLxL4nw`fSwSedvClV}U<#Mw3i>YDR01{I#)~Wriu2kP5;b1~a&SiP()XN!7 zi(Al>JjX%0vL#4my48>6av0S;F7D3jLVw@hpq`pa`&L27*$tcUZbBH?_jV^oCu*X0 z-lwoT6=~a~V&oAdr$BoCi457#-pj@L zv7R@N1Jb^TT}Xi@cVQ94=pV@m)X{rJ(k$cms!Jg0 z`NIwZc{b5O-f5nr5q6Uq+Z1q~R-YEzk+gSCd{i-T6MVWz4Dle`?nR($Urw751f&Bj zc(VwbS-Pgt|1}|qn@1EaXZE1gI*9gi@!8Lx&zx|62up-4esz)wW=FkaA3|HL8l?24 z`^OjF(wSPjDVs)8oiTJ0elTawJgcGIIO4_&qJFQ>!&b3|)T|`rBKS=&dYGjIzVa}&{%A~=nOyRlp56!x`U4gt zf?_GA8O0kEO2XZA{UJ)xB9+;nNI#+6;7hr2JE0|_W<3Ok55c~zel}%;#L_Nuw!V!t zekL<`&;Qq5LkzQx;cYlSk!0sFVhL}_Zk^WQGd3%Lr_tyqhboc=d2=UeHT zM5e&a&ZOu>OR-{;3iiW9HM!S@?Ga95BRZkSA@A>P#Tw(~_Di=L?z#;X4qpJg zSmW8u$DiGaoU4b9hP3PTatY{3T}juc*5DwYXQd14HCnLk8{pzyom+Tf2b6#{_nDH{ zJxa=7)UugI=;O}5QJ!?bl&5BQm$}MOtS@T%g8QnJ{Frw~ zg%iJsU+#R=v-y#GMN`{%e{i-n1o+e$lD`+(q&m58C%KPK85>#qAS#>fZ9b2WMi|?H zi1lO}HhLdaAS&x4PpH@Bl^Q22Q54AwiM$>KZD$n>`)<$4^d-OD@gWkwlV)#C!vmJR zMjI%v7~{zPYO5AELH1){6HZ1C*P(^cx-03Z$YXRt)6rYuuFV^H2H2il?E0mkMKX~T zd$8~LdPn$_51Ig+8Xmc_CTB@rCA0Nr3uV7&0=!E$>O?+ja?txy9Vr=p;y4^bK(`7+ z=WN5WtO4_tkoZ0*TV;JqL*H@Os#NSW*a;e`?ZW2L@954!dudq>`mp`Ne+ojzg&W@(1*^LgH&*m77)UQh6d!BUL_>oTafXPJt6RhE%x<5_)TVOc7QrWhyhnh{-9ut$`Vy4= z#-Ypo_Fnlr);o!xXO4QRC9=vsq)M@Sy?$OIT)_FyHDl&|9}n2{#;7a-qZ zg$=Z0SMNk$wkN(nzyok>v*7YGZf&Ii?!NA>!$lh+Oqsb?3yTt|g>w;bY9hPU`V`C^ zs>VWiBj|7a0K-F)k2rsys9z9;YX3kZOMrn`WAwsVH}#_hT-NqVyOM~83Nvjng3k9i zrd+3T#^3IKE!V>(4X{*4E2Zhn7$|}mT?;jsc~UN9L|wGWdb4!fLY!YVg}XJrHw*~TB-|XKo+mrHY?_`1z3?w+$|qbsv^XXmKYVC>=b zLVQv5GmD#@c9+?}M(Qo_yLtL@vr>EJ2i`dTjHheT05>soPL61CvyIdfsjFU!%^{bm zheSMi9DW?C0>+;%OePkMMSfc|-{adAHB}DI>3ZXrzWS1yc)8z2B(4|H9YihrF{n90b&r8o)KP1YS`}X zPT*LW9VbUlW56CF=63m+TTNFDpeQ+Gy~PNnD_VJ-Hml4e;pJ9fgvM=Y%$LBerKs;3?J7OUy@O?w;?7ZYK=fKXEg8c z@5qndXW6V9wi!Hq6?vV>?Ef_LWtUu##QU({Vx@GRt{oZpZ?{&j=G_3J&OyZqfRC9(Zt2Ne}BdyMQgwL65;_d`0H*FD}rp7 zbm41UU>(!cJ90L<>9}?c6uVcwX1=uf%RmTb6Ik%+zi;q}qY$d^(O6e|iINHQE1IfB z^KwzWzX4OebMNSU8)`S{R|D;OQx%|>;Q%eBhD5!Dh^)`ooV}H$4n2l4sfX<0%#8yfJ zKzbxUT4YnPw`FPrz-`(A;8Ky zy@jjgy!Y`~^pS;JJP;0E9tMh>eJj034FKIvuKm|~6gLrTS{=n-DXA;E?P*O)bXX~= zGqp*P0wuOeW&>c+#b)Z5-H%S$+~89ZZ~+z<@j@_D$g8eB9@tl+C`ZTv;(6t3-x8u= ztE1|Ug{x7<_XU<5Saf)2Gg%n4MeexSU57z^goA>KevKR?s@sZ5*}f?Kx{XIGM16aj zdS$;OL!+(wxA`)!1q-*Uz>j5aN%O{!xKABAR%kw z*OaE>XTbrhs8(+0-kP0>8t-3{Bwj@Ayf+YQDkHr7Qr&z<8L9?>TQ>;BTUF805j|IT z>zW_KxL_rUNxH#}Up$nqzV!>fbM}mSdT#XZ8H)-m%B?Ms=iKL&QYvp4RSB!+&!fh$ zD{=NFAG0Tandg1ayZU>!cT47uk;HrId&ja1ldi&g-P@%+ENC95tNxsE+x0gIj|W%n zZA4~Zr%qplaS-2|KY5BT7kkXQq#?M}!}B&Lw(rIgIl|V?F(tqz$#iGp>dlS+2=Om@Pc6_X~)XDK3STcdq5U-I~r>4PmV?FKOe_w2LC8*XX zBe+cghOjANz-I$@mK`&WHk{XZr+6P?N=}~q$JL8z?KaTk+z`8)byf@+e;zBw)CnAYxU__N^>G5qL&)|okqSOFc{^He{ z1KDS>{UmGD10dFFN8yXs;1^i#<7ctZtMz(^Y!<2xv%$F^ri ze8CZK|IGN^kl5J+Q9z(OD)oG-z>gIyqZX#NH?k2EF@VZIsx$N&VfE zdlbjc1mFRuo6iIH!!@isM>VCJ4+re@vlwJPZR`X|A`^R#vYi0#JKeF(z#o}8s?tYf zUq&Y%n=;T5&hBC2=vNVDY5HQ5*1eXm4T~ZcE*Xt8_(l4B1dmv$Ywh_b0%1yNW4^z} zi*Z2P_g#Q}mw0tT9c3J|&xedzq1S)yW*@#QQS|k4|BFH-dO}5}=WF!VX{FCLfk(mX zo?r`y+9FSxTThTfq{T40oZ+}=%f@B_VX_*2LO=C7t_xMbwA;L9;h@!8Me(aS>W6^j z(Qo+xdA3o&hEzYi{ueNb`N^B3wd6#+!@Gw1u*CBdyqh-V(zA)3WnfH+^;tWJ09=$4$HkR8`eA51@)YuP9U>czG}WZ^QJl$Q%df3 zLe%;lJyHsJ1WhILg%8LrWVCg1AP&>iDN%*v!0ttoH36%&X3=>$1s$XJUUwRhc zK1n-fa0iXDzUvsH+JSgw-p|{Ms;BK~P937SMza|!*aqQh{ka>!A3o#z4>ei@bLo4; zg4+1^Lef{SC|@Ig{`uX-_N1^3`gnM&!L&8rSLDg%Xv6aSZtV_i!ntRfK2U{6Lm6&) zIl7w{6}asscBoKk*KXye9^!-8v#v{AcdY@CM4Y}3@PVYz^#(qUl7+FBx+G?+TbI=2 zaHXr5GhAM{)g?M!zKky%pV$=Rn7T}v*iNbSgfKYVe=oH<-9=<}h+M2g4xuz4lSb(+ z@pIv^UwgGpoFFSm$j>3f{K*Atbij`SA3S%81O#qPJ}UW&ACJpH=rzh9q-Q6G9KfaoH^zHSQ4Z3-QPrX2f zjTDIm2wjt{8<=ff(SG$Jws^JQG21Qfi|cr>+h<-Lf>|Y`96Pc8!oV#)8>MQR#bCg! z81T9V^MWJ!h=)! zuZqQK@}X-j!A_xT!)Y#}yi$N5Oa8@FwKt{fq%X-(Pw8@OBTCJO0$^xi6F88JVI~sck`G~ zt3|N&A1n*huBqTn-0ixH-U_noH0|yuQOwH49cgVJjISs*o%i=o+!M1Ko82j8(XotF zD$U14p9oF<1ejak@cfWE2Dn?A>Oza{}d6O82&Fu1!D8p2Zr~`hCJl1n@U=$?9j>z*}~(Jk69S7@zEdC zC^4|_DP3sn&`}=lL~W1HHeBB=+H;$%8NoLZGj*7E-cIvQB$E& zf#qkzcNgKsF0^?Rwwbu@yyT0CP&4HJlJPDeH-;^(IMmd z(v1vz{CUJOK|r|w5Jbt;3?iTQ2)?Ywe>`ITWwt|D5!L5zX;vtT0ZTS^CnhrF< z=*jf??dVRHW?2f^W;V69*;U=Xa}EXcS*3n02I-yGNAJWB*qI0fcs6(On`y-PT_$pO zityawI&KC@4vs3U4wIJ=8cNFzX%o)X4ub()Z>ZIL;%*$eM^e)Dl_53z(e!^#i?%hB zvOhSmuR)9grYMqj`r={v20tXYc7KHRgATM`Z7=`*jv7q1viy^Tr9Xk%FxDd;Kf4p$ z2w=B*rRm8K>oj}*rHe*c!Lh7(^AQ?MU138Q)wDMrJ1-$8VnmL6vG(SegBgMD z4X5jychCQYV3M*oT$jA8L30jP%wrXu79Jm#V)jEFcC7y>5f6UGw$>;auG{nrzWv2L z0{*2ZgS{iZ))O!1j5@z>5P-Jb9dhplzR;IA=U{IFhA=--A=BFEO0MHrzm5ug%El-$ zvTBJ8wumFT1csj;P^+H?R}CQB8rFYGKAl5ULUu;Fvi|hAl92y=Wk$`AtMMb?3Huca zhx77P7Vy?|CE>Ndd!G@>M1x;#jEDfr^DuE<5av7)fh9e|nW2RNLp@@jM}=q|nLbSH zKoh{<;TD%3s+;g@o8t)<^xXPkkQ56I{J0IH!{cYyCclt!K7 z2y+JAVLP6D3xs3-068$muV(>*Dx* z@Q%Gmj8DJw*DiW{W*VfYyR)7MCN5T?DEi>i$A_t;QBl`dH~_wy^`OH*!~HbYyX;q1 zP{ZFkyp0U<$HV0Mi5x!PFWS)C-s$FW5Dc>+@~q0)HYi?KD6$@;2!5JgYt!|rxUaog zn>c5xLFLDFkR)t?tr4g*h^>FR}0V8J&btZwX^ivVqnr#^i*vo;Et3a`I&n1_@$EXee>zb4us+0d{DMD6VEmOS6X93LGYJvkab`HET=ci-)@yDp6N?`DeV zr2SfDLZ9#|$hSa1x)yqjh8&tHihn-Pp5g_qtgXmuB)vY{uvv#7n2a|_;;r{g9&P~n zG@(*;K}1PgHzr^{fN)vzBsdYficg>)EST$(wkCXfHx)Rt!i@&kFD-kzzm$GE{xD(B zl6bS2!gxY}5}e-yjAx@V-hOmjk@RonHI|ko+<|}5kB>HV#Wm;Kz0GG(=_TcW9p)su zSBZx%+Ghw0x_5i6*i;z`?)B}I z$`&9{5x2LV^UxM&fuM&1TIsWbC>4Pq$^L4_oL1B?33`8(fnP{-`G@t5YnR~}dhDPy z;e?yfDig+C>~;Ci9+m?r3$a+|?p}Pqvh>OZ_QC6vkBoDGomQmxQI3uEW)K-;+TkeQ zo9oj>?J$z2Wah*?dLNoMOv8N-H+GWv_A&%-7q4Derz&T60iAsi!^$<>43UrM z^~I`f4o^mKA>ihfXdTWFatL9bTQ*%T_4eVZ0<0X!rY&ciAgSupDQrK}8>{Y@ZYVvA zodjSe)5S$xEq-ccXo-@80{l)MeOj@R6s=i*yAAHp^mGJpnhj=_s_P~CKA zs|xV#O_LL>gmsdCQZhU&=tRxwy6taG_i{}zdf!Psm?eLXUtw|D>{_tVBFGnDCm&sa*i_j>Bj)g&Itm+XKM+aoIF$15bPMMdoDwyR!Ry+t#CmZ!((b zkVO@f80eVuwj9X9Dii&0LhO@xHaw*m9@NA-d6nZp`R4=J7sr76u7uzCFo;F zB%9L71xNox4uQ28!(skMR#P|5m)b&QK^iD(FD1NA<@PUlLmsA4gFhH4XB&-EjdfUVNXfI4;0Gfq>7|bm=4l1ej?tq zxm#uO8s18`z4GfWs$X%3SHc#++h=)Et=~x`JVP$ynG2R&RbnnK^Cv=H{>^0&m%DN; zRp8`YF?|E}8nFK@&TRW6o{uGylZBLyO1UwU4&{Db({USCa5p3 zLVnWUeBAdV=5P{2YD^^GdP>Xx!Dw0uH^s+h)o)Y&!Z$@Hf*YMH>!?RMLE0?4QR2-Q zX|JCAUb*nan-J#8Nx^V#zFuO5Ug5IAlJDx|-(<;j_Gx>lvdp5sZP001e$-^g(Ll?Aib3`H&9_90d34r?2PgTGm4~DCePqTVneE z^jQ9GEL_Zt=d zDL3K^LZ~@-^@fKu&Gq?TV_JJGe*33v5J9^fn?}vGW64ohne`k)P{MHvErS|rF#x&` z)ZYC#C*F>dB(Hy0I&g8mjFzq73-eFD2rbU#V$Gm@`o>hTl-utga)EYMcgI`bZp{xF z%m9ZTOu8MtH%%gCLHz0t9Sp20>}X5MzdZR*&HkmS zQcY6(KQvcQ+mZdJBS)t=U;I<1uo9pAr{mnO@Q40Ge)lI;P^1m~#&{vB(?3+QjrUvH z%oyh<%7?`|A}h@A|2bA7pN9XNvAnX<_~$J~UkO|NQxJizfF+zWv+faSkA)>4AH_70VLCiLp||6{1H9bXirnTncd%5&^;P~A8eL`8 zn3w;Q$*Ypr|2?QFOm?G}KA-WfVxqo9jpyQ^^>B&T#MNm#0 zcq$g^a#@NHi<{fSC|UyzlLTI{0=MBieWxbpFy742mIuUy@793Eu&OS6)EZ>AHCMC3tT+b(FoL*av_T9zaXEwQzqL7Pm%h^{k;-zoj zB122C^p1e5sW?-}_Oa_LbbZjG=%v02ueMY%N*r!(+6v-l(UZu|YE+v09qe{f-sNt9 z!|e>Fx&`#~!SNo+jCAF(a*lvboR1KW?8AlZmH!0GvAk7sMcn&$Al32dPfc?7OTSDw z$KN}L*J$*}zseIvOcdOc%TZ?OFz566HrZ{4oKu!^yaTGo7 ztF1FXeC1Z~(8ToIN>LB(NRuE>hV=*DIg>`&Iutye#~_@qE~bmHMGuT~1+%Eq@9!ZM zKU>QW)LvTtf~CfkDa7nF1)8DC1I-5&3Fix4Z<+oDYJyQD>4@0#y_)JUE0n9n2Ba@= zV;f)0N#C<~ztov|InG}p22xxEK}puaPQ!#zLVKRADBde2g6&h%Lj1UkaLPld9t!8; zw#DeZl@84tVmPuCU7?}HNq!N0Sp7-V?g3i~&{PD^V@y(l#XXg1lNqc4{XwlSL1mCN zk+~>2a5<`uKTw`vO+M=_SxW$81C`o2oRV29BwGUK{M30HQk?HSWAMML4nX?Vho~G!)(ALCEOJc?|B#8OmQ}`>v zaO8Z`t~gw0n$u8{B+-#SGF8TLf~=+Ok8A99_tSOCdA0Wpa)@c^rBcnB#<%HVog%KZ zz2((gnN$RbD^sRFby;AP#~Z4FoF3B_a|9)f#6H-#n!Ot6Aw2|4v3Dd@w(Ywvg&0DSVjCu?Yy($LyMp zyQ`}T5`3eEz6KP6q@D*a@OTEh^72Q1bVYDf1N#u($T{U8`_Q*?oIzl^qg%GqApW{* zG6z)L)|__^tfGvWgO*iao+~*ff{CCkokMPx8A)PDS2poFd6!F&9LETH8uL5rs&-P% z{SDcikbQafXfC+{=gc6TxB{h_xS_Y-C|y8?nhx&3dbPOmq67osupG&DNu6Ep-bPzA`*q4f0)>O z&;_AlpwPdlTWT0}Doo+hu&9h#Y4%jD-!-R|Uw|6##MABKiAq#D?u&>@p-lHzhAH6! zrDPP-4$TUMd%ftUs(SUY6fWb%AA55=PwlOWTXR1yq;NemBp-FR&m_~&o@XD^}VIA310(h`HOE86v&YhZB?ZN4gqMxR#H6A_dpSCkRbu;LbHqfC{ULGf=VqY<4} zRed`QTly(|MXXrp(gId*nrp#x2B2~NudMn~2AX^mRYr=I1TdTbGu=MIKK{V`A)3w5 za-nNW+H|Zh)%+rB&w#emON1EE#3~rE zXIQ#Ys{caUrLkd-=%(mgI)0>SjSEkZ^tRU6mMN7#u2Y9DZ3H=|OQ2X-C&t`2h zhTLs}ZeBgcSD7>aVn$Qv%ZPY7+9P)MStCiCB3wsYIl~ub{4;UiVk}=}6v1w6@vO#D zbbBS84lJIEvc~dwiPU8LPClTbE>4i;P0euV=oJk*$X4TW^eZc4Ztm?GnNoh|gAw?< zaoetkpS}|bhi2v0f{Y?pFj$Imds}wor)``P&i_9>}~p)aAWp4E{=4ab9sN` z_a525)xfz|YX*fT4lI*~+}sMPR5$3KgL9Z697%;P)`nkR#(Wcs*UpglE1Ydo_r2=Z zm$~LBO4#YCD@yp&?5~yZC^!YpgN`A}Wm%_aV)v!4QbF`eAKq7$K_W#`Vqtn?uzBVmvp#&poLmF~K?3Tn0^1uId0=zQce(&%512!zWN}U)*&}tUpuELpL81f07vS_gxQwWMU4T&tYMCA*h%&K-Ozs z@~egv5c@lR`H6ch71kKoDFOlOEme8@v9AB%ZB>p1l6`no-X0te5$1e36aOyi2zNM+ zTQ#FT>IT%wKfRbGz4=t`S5-O@L@3wHYDlyBO9WL#Nt)Q-XoUqipy6}j>@RHp*1J+W zI6AFVp+ad?MewVRhF1oc70I!F#UnBfwO9id>tb593U0qBYcNa4&pGB}ps=7{pC;XCQ-$&7yA=1Q_^fnmsj6>BT>8V$ z78`UPGi!dOYEt}vwVHEc!>+2L3#pth?eZcgc>y_D!MOOa77ZOB0*j;ZPe>ZM5@Q`Z zeeM5W?f?JCF#UJc+{!+ocAidLh?y;%aX!_!^@blo(o3Iz{yZwSsoqOevr#9ErBrR zWI%pM{{+SAFoH&IIzR)RrLERu1{a&m+Bq4(Q;~N5-rrihZ5Qf^OO+cc$~xUJ0jZLETl2+h??o zSNh%)(|JnEXf6;MormoamdG4>s>cSWIz}<06%U>NUR?g0!+OaQ}1`9DcoAkBVFKCGA!( zm&L>q&$;&ae2XPk+tp^LYQw9ZV<*G~t<&0x;}L1;HF#>z7*o_it2(uOaO<*EbXV%g z8YuJn7ImMWyZJ@Po~Wd1=Iln)=&Ve8fI^kM2ryrw3=uglw10XnrX_wXP}#6*md8KV zOeqWu-21s)UBwAXEE;!b>%5fA?yx#cn*u!O=U-F%lkrvXG}23Ii6Vr3-U?i=a$bFr zIs0zEGAW3{8%q;1{w9-s;`cYvktQ(ii(9$18V7p3+Tx*;Bp-L-$anyds>JSlf}!(& zVehSj;%eS-!Gxe8Sa63S1Pc&C@W2Eo5G1%uaCespZUKS^clY2r5Q4i78f0*H8)oLt z_uYGUf4j9+yZ>z6-MaUmsX9HU<-FZ}PWSUX@7rgii|H9UDL))RZA4&7f(LN_n$$<; zs(iWJG#8mFG6ktOj7H!ZYA^Lk_9&8vMUs9xo3j_HfUh{A@Ovsp#rz!(l)K0UOzQ~r zW;pW@bPmgDcXr3({pXIp>zWose4Nxxs(~)+lcuK`4V`{K@*_e(|kZFwOFLFb({No`hcylLGpdE+!^@7Zop@!d9l?t5l{v0fP z=(=NcarScl$r@5NlxPKApDl$80}QUYz4y5U91m{wjih4mMQ&xPYII4?kC#0<+3-Ju z9r>CV;aZJRG#}u+^oSt6`YmfgYr@6w!OGX1OKk?rknX2#m|t{DdU53GY0j~Pl31UU zFgBmWKJBsv?0k5CqawT;xaf{7qC z^zhl#b4ST5XaiPt;LNY|to6<~Zx)5BY@}AR`!S7_4Zo|UzCYM@)j9X1vZ!X|&=B;n zT#bWpKM`5F7JTq)b{%=w>~m7^GgTvEns#fvZKv4^E52)kJf!WDtR`^E3>z&nh3>z$ znD2wh{%HGZW(hdj3i`TQGL9%B`xwgZS?StN#Jof@)nmH;(|)!PLrb^b>(_GamCn!H z=ce81WlK<{b~JoGkDJ5@$C+|f{djd?>d#$_z0b1#{m8AVlJ@d)d4Zb(To%^EiiYHhPP|hrLw(qU{6!hH);ZRjkb8u&m2d2Ns$r?#FYr zYHpgxfy`^^JsjPZpc+10rN0`k$W0-q{Ig#2B(8@Qg}pk@9R=9R)RuZL6offG(kUX~ zl>21J@GjE2+IdE^d|ZpC>g~aZTX6wnzw&V@nRJAFIO9JFhV9sgc8l)??QdJ>oFueN zvZjRX;;_(Xxz!S>-pR&f!2d`Yf(?AKw=i>V&RO+ThX^UI&4p*21qU3jx&8N*1S}73 z@2&Z>QyEl|x|La=cW@?ap-qZHWHwqCpzKZRPx8Z;iU-S4(;*+N*?upC&1&gLOD+A^ zLKv@c;ZsUML^LRLVygv@U}eF^P+}{hum$kON=SSVcQQuMpZV@}Cs7k zO$z{EkNy4OUDiJ9&f0R%XzqAXT_!dLg+1$ynvQf~)V~i^EO|iwP>_XAyvJ!i@n1Xl z;=c{Y--y5XsO{sS)Z4(XdDx-@7o=KJg?NEqjEZ?_`yMVmSm%xOS%Pp#aR1M>&F3`t z#&;q7VwcYpL!83DT44IMu)U|(aO0zV7!2`Dn4!6<(^tKjHtuwJ@dfDg@ijJ{lT5cE-MKhlb@ z5$RdLx9SKA!%2$A{E5apl$R#FrxiN=9y<=PXO`XWDt1+CJC_$nZqkKa6cwsV4s+BK zm{4p#W%qo5ojM=4<3XE@{4ZWNyIwvQc=(lL>uIOwTH-gs>Amj&XoIWog%!dyFj&S6vo>`Y4- zIuwo%aARzK^XG{g!|FTJZyz}ik=wFegR#GQN&;qSMR#16ZZF{H&smV0A1c#gw0R?@ zPyD~W*Vv2m$j!b)T0FStT0F+>r{D9=e>PeRrGdkKNNj}yLH&G^v$nV5rmF`Sajkd} zPRGDaxEC!dGHU%Do8UUEooIJ8)0BWR_se%tz-$DAkB8{|qNrBe4A6t4!NwH^Fei1(VEUrXNSZWUMOhXJg`JMi473;#wjT=D4ArA)seF1KZr58_*vR?4NyG#?JY}h zacLJHZqDz+o{`l@Yv4}T7j;dwrY}s8LTd#GX_M_US@q&_e&yI@`32_}00*{kfI$9A zgbG|Yg$YJP@edr=|n`asa?Ge|vEGX{47apqE z7f+q584r+>uqh9D_ux4#*6XwQ?Xg9XXV-wxf2Hh{^@x}Llj7LGm)RChCl5#c`=Dj~ zf!z~>!Pe-(wWchr)~*2qd`pYl(W}|o8TZ2_2%$6bSvII0QMOHH@7tEG%}e4g&#M4K zq_wY&l;N_5B^xe9a1Z3N5a!! zSTmX*oYe?&m@~)WWyg$yAqO*GHZg9E0_RZEbvipVo}6cdB~ysmPST5dKTgPjZFBxK zFAT*4Vu|oJr*TYGav3P>aQr#r z{(HRx2KvPF`FZ!UXAw4hEm5#kOFVE0>*b(5;D`6Qp+2n-7lq{f~ys z9u+KCdut+?hS>J5_QTe7ndF_N3!Gy~fi+CD{gRUO#LF z;U+91i^S6wLdBUj{P0+|h`g)~sDm3kwmo0z&?tQf--a>2o}59xU2(U^VPd?ILe*3X z{5K}f#XY#dI%>JyHu2E&aa*?&T^u-wNY>N*y$=(sjPH!NLU704S?HWW8NOxwnh<~B zBNM!3id%nX+}RS9`MvaqyvZce0%pXKE_2&Wse68T*$sex1&lqgP%Y-rd1WFa*_=FN zcH5dT%q}K2j%xHzmB2?zg^*mnOrfBzqWW5k?Edr~pN30i0!|sjE6&Qay3*467%YB* z*2P2wlj`WBfWs2a)Oa%!U$0yf%OZ$dtXRgH`%XoN!2(X~C^%hmMeQA$+M5Q@)9ON4 z-uAD6H=!mR?#SlTfXu^yD~xJ-8q^`gA8pNk2rq#)zAx8}I|M?#v2PZrD$`^fMw7uk z!iUdg*WU{k3$!2x1_I*A(%4Dq>rx@^ zqt`^5jUGGbX9yyjwzyb$3P2<2;MQHbyIgvDjsB0;aKp1SO7*jFdlHgi<`0)Oaz=qz z@{895rlAA{;)MLv(Qcf)0k>ZQy2X7mnyg|7a*t4d7un0_ct{}!?QlU*D`4Sg=^9_V+8 zf?)ok1Zx&R*C+`<>kmtZ&(lUB>beT3UF;pax6`@~;essv9Y^dDWo|y=!NeR?q@ghj zkR3$WPM3c2ljN8In`z6g->D67QqPW33K8yc&%1MbT9sXE~(#r#bdGi3}iJ5NrcpMMBX9^U2m~ z2oJf-+J3`csL0BrCFsvPgNK|*x#cOZDldVq712I>9Z}qPO~n;wx#mgOPmszp?i{#C z^L`9D;VSGE;C)4I&Ubxidh>ozW^!uV*qx;BOceP>#9j8G@SQ$F?q%nWexog~SsUZQ z^=C-a2%!|w_%3%yVnNb{Q?~bYFqODIJ?wO`GZf6AOuwar0w{jA_qzvj`QT;777r>f zT`nkieg8a|otu=!6v03pvp1x)7@@bu){H$>e3=7dh(IJpKn#OZ&oRc#)QaI(0SMa=%C8M^832;X?Kx)uBT@LqEC@V?N7 z&4pC$YO5S0!UH{T`CC1qbEK8{i7cM15rH5I^i0UlKdeRA>VZ%vhMDYA3T0=QX8j}8 z7Sb=Noe9hIWa*}?qJFc-ayf)3kej1KMG0Syd5=>ig5N!`b`oAtUlg08n6-#JJflL4 zf)N}+fZN|gFeQP_pqfJqbJFr3wvck<$A0kEwvp?f;|KEzrUZW`!)^K#UH7Bt*Bc?f zA5OnvgAoxFcViiq(nWA_+B1fajha} ztBU0CK90BZMYNS%uelS>zW{xYZej&EEMH4Jk0$t~mQ6Z@6FBx+eRAu4ViX7#2`CRz#m}NpV|i>dUou?QJOFJB8$mH zLgAXXm)9LpcIVRvS$(aqwmhl78K}uS$}z^&`9(Z_x1@vG+5j_ie6AkK0uW`$EuEjr zOy~tN3?7tVL#4SN(ICh^2ghyxW(Ofv$@QLYjGQnie?78PaX3qn?ag4V6MiY`%tAPh;w+V z@u=w@nt012ID7Usnq(@zV2$^;&;+8C_qdtfZF#;U_Hb?SWXlsd*@-E_1-gEEGQwkt zaQ%hQAU_P2Crr|$hu85_NfyjfND33_-rm1i5*O!r3+? zzQF*kAI?I1jA)`@HJb?DzlrdyH4u!Zc?XZvF5+1$vJm_mj;FQoMb%8MBn$?>j@A1@ zc;$`Jge%jXE7K8B+Q2;7AW`VWB=)rEY*#xUH#vVq%J@rm@CnV)j%dl|gj z?GF2+zC2pTD|d$+vliu;*X%_0Etb0$epZzQzEDSy3$De=L0eCc2TG1NTn@b`0AA1o zgki#oHSd*Gld#{vV;f5PHZ82(ZuX9bPkB?rw*xH(_l?k*FP*zs37!=`mYV$lnIMG` zV#YEaV1%EF!ltn70j64U}3~pvL(1J3hY;%h_k9v^!!cC zjE2X{oR%P-1KJ~&BY)RR8;;s!OFu-l(KT4yFmV6!EX|VZP23(+JmRyp5S&)H<0GyH z{<1La5$g>YTDF|UmKHtz9e$nO;|w-kX1|@L7aQ8gXSJ-L%O^jZ%E|l#7i6iVH1XC^ zd%pMegZfW_V=Nj@Awo8x!FySycC|H6>S~^n@+e&*Yt3Au*DsH0UCOHs??RSa?1V={ z+{41@M8h%(N>H274)c5W9W~|-YSD6MatkI0X@Vqmty5>U)2@}{l_r{+2cv^npd+CA z3w0gp)euS}y8_s)Qc_VeDr$v?O{aXE`NpgPISMRrBNb89YdDIEi||jLiK7W7_>Cug zFIlV^H7raWYOBUV?ac8t|5f)fHF;~9oUa7Y>Ta(ywI)rW^kgfFGl=v?cA>!kL=k8? ztQNGtbQEXKd+J;Rl>E{iF}>qXU65x9ydjmcLSjtKJX_pY`DLK&;Gw*2;qeyoerb2S zmEW*tM<`YI$rF0!Pg3IQ`7$P-(A2$Rm;(JmlGvo_1ZfM^VDP5DAmm{94F6yTGkFjzRommIgz4duD5e+>N4FYR|2xoGVCU zawlEnM!&Y1u$M>tO_i0uD9N>YJE>4^g3@9&{ffVG>s41Ls-j}s&bU<68_InX{)eWx zx9I6EW#>g3X}rvp7LL#xR*4Gx-GZ~Hg=#AdQe$5Xb?!}9_+@K4xzo>irA{NE^DYGl zjZ1n-rD>37R=k)o2~D{s(0V2Mu_Td5QEvRlOijl~<0%gy>Pg1crLz;O{SJW=9-Vve zO`ibC1Zt_Aw_NW}3y|{d5uaS-ZBdwoQF-_r2k}N-kY~pLZ}>u@UNs&@v9hUt|LM3Y zih!!=^JNz9b~`VZn*vq(RK>eB@n506|0~))&zKPJvZBk9S@ZE;&r!Or>PhjmjY;I=mjADuj&k?z?Fr*=;BEXhV6Lk#d7PBsghM6+OPw z8V7#6+4yO7*Dzqf8WY;n`#V@|&}#^cOz&(V9F&RVdfe(L~UlH&f* ze3C!$VIqcgOkkfbxIV{f_qX$M0?>+ugAdz`eX=nO;2xe69Ztl=?OG$|Oe_LHX|0|U zuy%#W|5JULbTk$o<}8E6h4`)TybhDpTUZ2i<_Etyr6Jr1x@3}t_pSFAn{>acWO3s+ zO$VJ=nT{vl@tC3B`CL7~iDAk5U`D(j&uhl#2oaoM1CND;IyoI)8M&W55b->hxC zd$7lt;`kwK?#z4k&4MQveHq3GBT6eZ+KwX5cwL~CPA-M$S1{S zRH+ml9pJK}6bwzt-{Zb(Fk<rezuDA<`0 zBI1A~*!K~Od|b%+vGWtTH8Ji1yRq{$y$TMm6D7-4!;kd-eEWGA=kdi(W@e9nyrbt? zXZSDhVr2NPe+G^V0RF!_O%8{3>br>ag61Z^>Okjbf(C zO+QnOwDqDShlz7Mdks)N34C71nLzx*5Ng<7i>4IIjEM6e?v4DbW;Pq?Rn=muEH%$8 zi)RQ$rs+L)c&^aJMJykMM8&o`0p56@r)GlPC;$fa0;aP^mjwQqKwX*mTCs_;+2Xy~ zI;U55&LsxUPP*@dgm-^udaPj)!aKs4S_L-U_cf1w z+GjSt<#i+@A~?CS+y(M~A<6qIb_k}k9#80P%4@Dyc803tS7?dK4?{c*@=9H5BYp4aR z(ZW}7hi@b3kx-`_HJBv`o8HYPhaPTs?HHWsI1q+WjRn{(bAR};&OS=)j~##Ub)a|t zSBu=%G6-va5x=s&8a~l|c&WJRdJCqnS*qy|4pO^+n0|S9^rEs}M%(fo%QT?Jz-dLy z72^B0*k-HcdY@*qIwnDxG46P;s?&r7%E5Wr-?mmU7?y8^Ylh0Gq zv0K+fa>&l2cm=jw{gd?WznSce5pp#+Vn|q=vrX2H^50m9Hg@!uKrZ*wj6Z+bza7&|2;ZhvPUTAjzKQhKw1Ouf#X4N zg*xj0A`wI})r_KX0a&{vp~$^lW-N1~fWMqVa?%=?S{>P>(PBfptI!i?r4 zk8}g2$n|pvQzZVGdT-{9{t8e+>{tLD+)B&f9N3fM00Sm^1 z1F6`gZN8uzvqxMaXN8)zTd^Ah2lNYz*F=@?oCa(3gy%X#;_g(&`*>>to+u?=X}G%> zE8SoF-fKMY*4&Hc`(tIqtCfd14<5HtehG3~0Z?gOSq=zYXOFDY=!dvH+3nW3RjQpb zJ?+F$!Z%1dr@zQD z(e%UhYq>=qg#>#CEpB^nr+-6ad<((Pg;YycQ0F2)D-l_)oH4$Am|1fhF>Ixu>Ze`( zF(VdA1Ga3}wD)mNV&8sMbbs*9_=TYm!dfPSTkL_!W%_*(@yW#GvOs)YZ!Hw!K8D5~ z+V)f{0U+l8l!n>T2!;+)8bWDfEeL!gZM|cr`tg)Zz7! zG<}7z&DHgt*PNgrb@(r6+{I+q*<{K3Ss0XzK-lZrqQAx#ljumyKD6?DfxF7e!XEf} zB0rChPTAlX>@uFSY8$t9G~R*lOFY#rLgX#7eRAs(Yt%k&XEf}e`KAK7fA*AnMXW%$ zf0SXJwJC@ghN9G^-2`vqDNYE#gV-+y9oAa98&3n?Of&lZahEppok=+snj5W%1I4(~ z5>Q_12vEc>yMza1zGzJn3DX~5JRf;8)7{K8aXi}|&z@Hn)6(eSyf7ek{4uUz!jq)~ z3VJ!0)U`vYKT;kK_7yA%jVya<=zUc22y-I=ID|87rX0Pw1mX}i``+61KFjenfK~Sz zTOKnnDPP+PO&#H0@juzF#{-fDbQQ9(7LX9al{@}6Xd}~~rZ4=);6k?c431iGC~BBnWWI0xeY9mviTTvBS67(m4R>P)n5bqu z1G|Yo-Mf9sh^wIQu2=avH&grg5mmk0%Z3ltEk-O=c!jkf6A%nGF~5$HU-7D1fSK)= z(V*IxMfwL>Ex1mX;S#sn74PT!B<4guO8!zE5u1Slo+|O{D>u7$42kK zEBNZD&Z^(u;qO(B%PY=pf@3kOH(SEB@Y_wk1zM*A2Lmu+d|l@`)peQ)zfPK?%w#UO z>3&VGH5n#Vv!EKj`01G?i{WPvPF;^~TjX(rVjc2?_Ru&-wAK`_%P@dbq(E8Xy=%L} zz&U*o3El!W{bF8hJTdgsR!dcwD4YEC-8u0clZhGfI6^xUF1UEaCm3POE2z8w^Bi_) za!bDYE&kNAcN{LDLg*fCwVZs3+#~}#VaaZKeB8P!RRH~_SGwp;78gNE~joodra;?SneA{a& zdh71BEWdxJ|702cIaC&Ccn7s4D%xcdZSS!bxyt1J-MLp_@yTi=9YUxhgn5|zjAOac zyEP*=VtUmHeToNtJp1o}{!zJeUIF>vA+lxuHy)zl3SLyGt)Mb;Oo${i1c#N6P%-Zv$6HwKrx*B_eSeIVwbFWM-toO@|XS(#fR=!(ksOVysHmcCqA3Os` zl*Y>y*9FKc6E$Wt2-xUy&H+XJ%f5OuPnj9ZG9{e2Gxn%w|Am2d3~h9|p!s`{HlJA6 zjeX@MZi~G6HfA##fm_ONGGEWrKX<^rSRp8GCPe?G_+(?G7+ECVPVpLuGZU8awq(tN zHo;w{6G!y7)XCBf=Ba^8#7uB&e+wcfU@Z8J_+hE}GF@d(KiGQvGNKJGSaZ!-;*Ylf za$Qg+#DQ6iU+|LKXfuv!`Yt?9)?o*_-VzGG-FRkIk9=VMp#L*OogD&zJdjU%5YxHy zd672fM91wNOPM)+rUkrKpZWH~613^7Qzjn7@G#UvO?MbC`{)~S1FtxYQ^)BQ)Cd~1 zIb@cLMCXe>$Eu*6zMjb9xDxmYABdrkHeZYYDrvqu4^+vyFF6zdWqx-+d7e8lU4xNH zjr|7fJK&xjqiTxno$S5qG*G9o-1b+5Ny1k+{am8Wr&1Xa)#z2xIHLXnWhZ>YhGsn}GvL1xa8BdNWN!irhos|F4)mL+x z?n)p`%?j{glX#ISoAu+^bPkl2@j3duGT1Q9v^UE98ho)3^H}~1yP=zY`O5t)OHVUM z%HJJDK%KfVaY;xec#JS@3Hst2Z5A6z4m&P9S1L5f(8~SnP1@Q<9r_q|Wm2!`*s*08 za2FjxxEiEolfi%?=@@am4cY!AAoN|?e}ePw`2$0Bi{H(upcZqZw2v*TAQ>ytV2sxK zD3F-FE74#+>Dj(8V}fEvg17$en<~h{`EoVCU6ooBpz$^FsY>G$Cvfc zgriiCVlW+%yy9~Si@WLG9M3~?z(8o}X-XC8w1r&1wnat@%|*lULiT7WY%LE?t)wJ8P6T9p?2Kbj@*#ejLP+2|cKi@< z<3pLSK5Y}g*!;#d?c6cs*+U;N026;>_0c$dsR5p^bS1o#ef8gLUlXuCs6W;az_QBk%Oe%ykC`6(iG-Y0~IqGH{|@~Lzy72 zZ8#kt3&GZh_$NoUwd1;tyKz}xgmG-N`iV#Lpe1lf*#xV=LuYfOO;z%rr}4to(8}7jpTq;V2`Xg4@s{dtM4Fz9ysfz0d^u zk@~%464ZTZG>-U(_qHx8{aJW9LwFM>fb#xo7oO^SOvLy@gO>cU@&W>K?6}egoV<#}CA*SzLdxcOSYAkKkW=&;u<4t=14cjY zxk)RcAJ!KcE)6!IhrwWL5vnUohoqF+$MS-8y=YBt+1Re^8|K()0En3`zN8$<@Mk5x zsQg^XJ^jH2%V8(V#U(-Ag2qS2z$=V><510UE zOK2P@$F&Y;n%C{bzgz@gNa$Vxnr9I8D<7ie2{rxnlJs>kQ~XXuA+66m@b|fA*$*9M zynJHv&O2PcDuforh@pqE6BoV{HPf#mQL#>Ge;@>y%@rNxhI8^K+h*T6RK z!PYi%C98+2!ajxiH_#G-0z8r1kq9r596iR94^oa*KjBZN$&M_*Xp5=%Xm14|MUV_x z@a=A_9bkZe>o7AI@?D@NfOP!5DX-v{IBI%3vXG&Be#c>CTT%U~`<#Zx5d$NZJZ_i|nayPL z>?zS7C42>+`fC)co{4*QPCu-;f_^D6Rr1na;W$Cf>VlS}=aXh5r*B3i;;*(56W62T z$pG~wA}G#vbrAmhT6}=}QhBu~C06#2XU@hcRM>iCoYU*1L|depQFM^)aguSQME>)L ziMFc8<1%E-of~*D5uvJ|CW4GXEzNz8fAZvLwU77db7lAgcV6O)n;w;d;}5@;8oLw= zSkLRCLTeM>EEb|zmHvtd-Id^0yRCG_gU-ZN%MU)l6b)B857+E_{2EJ-SW|ITU$fvw zg=`a;+8;K5nkWE$K|uH{$>BLsCv`l~19NYK>}8n4d_EJvX<{He+repU*-fC6ASwo7 zMybOj@mg(5R}*YMHXH@NXT;qh$*bx>hN}cX)Wq`VO7V*}e*%wXy^iS!e z0)YrNA+I!j|ZY<{>(*uM|NyVj9K)T_d)w_*%a1AGL0J}3CMLbL3&nZ~*_&$F3MP;I zqvcT2qIz*Id@bCAl3PvvL$x_tp)ivz(j)Yo^Y!b9uK{@cp^~)YizrM8eWto2a!el+YxG z)%-}2M!%%+tFRVUz7ZB-d13YnV2l?w+G+b)|tPN_wF_-nSs?q2>r9TP7PecY<6rgrqD6!2wZc-;#kyhnZc9u8g z#%G?Wt+#LXjZc6cD@sXmi~U5j?C{C~c3gwnLKi3`LeFSGlQD`@?}Mx)!zWtq7+&v= zcfik&5Wf}*TRs_8T08AlBIO4ctE&<1j;01jq18F1FG-x+9aYuUQVz3X)rz|TV#*dj zSOpN}og#uPT-%d&i(TX{i#M{O&Q~c1HDNOzxS8@K$uc!oFs;tc%xr>g#QT6Kij`J# z%DC`p_J-daP|hWf;AgoyHguGFKTE=<>B*J%?=Dk9+{!Zp9$F@g=jw&}#2eDw#OCU# z>B*M50-k2(`|_L^4w?(22U+lxY~~Tojk0lh*atjoO9vPo7$ctpKgBn<;vX9IZ&Htc zA(Lxwu8|HHWsWGM-b!zlPJeRs#n<`UCH;V&A^0>WvvZ}MKtCGNWXYua@;=G;%Tsbst+{6!&V;1dIG{8O_qha${NfKm^Mjm9j*@%M5yv^Sps_CY(SetK zPwY9TJ+20j{;LhJ8jt1qu9H2cRDafqYw;#dMB)>GO1YLS|JaW;k!^0i>*OK;HTabcZPl2jPz*u$;p8@_S zp^*`y-6-KptZPIDZS#D^p@_05Hf{!tNW@g|d`H_XG4O)*F|r(`U>SHxvm%#36!TU^ zDr-aL@wJrl@4F^bHi0v1Jk zAKW=q?jDw!?(L%;fcx7YdM>@kdx3eC(IlZ1?K zo*KPFTQf*d+`>;(Ok8pGc|P60f&q?@__+y5Rs9~DCO8>#ekHS=z!DhIg_7vA zPcKT1`>_XuUcRz?g}T$?7Fq67-fsEVSAlQ_?r2<6QBmIBX6xg#GpDXqVz!?90w5os z_$)A=M2vQ|r|A9%An$t*Y?tC!M2ebRfP4zc!QW01_rv)yZaQ-OIm+QbzXpC#o|7LT zbE=Sqj|+8)*u+$7d7tefDK3x;cgwY`F~y+UCw#3tro&oWg%lW%HMd5uc~0De?s*qM zyss*KVqG5{wcjNagJ2AN$7ej-Y`PlkqVW?sQjm!TF+Y5R8I{+g1FaMn&R3w*o4E$N zepmzO_Oim0cM3N^7vIcWSez{!dGQ#tqPk(} zGx`MAb>K(hgE9i4v_x-qQ`?NT_6{d|ZKHi?OX60Ym3V|!vPnX1ODsj9r!V2cG&%JN z@w7E7u^vO%Bg;OJ<1cx;`>FV9n+JS(49K?sb|q|}#ENIKsburXv0B>D7BS&&t!d^za-9wkMe?2%bgDmu$ z=b--tM`rb}_4qGa-!^Tlm#W*l-X}nT-bng^M^NE?+`wXW)kvAs{)~ic2&LQKdyvy3 zXWB{}SuBY2Cz)6Fdsnj!7t851@=l3Gmo3-lcl1YKkMXOO>zp}4*HvCrwKn>_E#a9PVKg$}}UooTobs2K& z?ou|i)!EH%?`YWi;x=`hv&%zWGO%?*(f0FM+}2}aCI}S12K?}6)a!+extr4g#phUX z3p5URlI{_b!L#70TEZwTHp<)q54RYlg^h zF*jp!d+kga*sa~mewxCC?@-sCfT$i3eQ;&};v1!(vf`Z|m-Vuj$h_j~BAvB({ZmWnCUA zMPCi%yuhSn$_sp1iuDPt!Bh~9gnfciECD*P$0&p_dpk&hgO~JCRPI08P6Lu57&>PO z7Vnp{%0)jAPk3@n5&Lf=3BCi@c+|Zl{*Sgn;imLqdLi=bOzQ2%E0H?JQs#6X6OHd^ zWhDkRs80`zwMKb#UvV^Hz1g&Df%9%Hhdq9RD-RRA|1k@4c|_t|DqkQJ{(|xY|LxU; z%LyD`q(ZU^4he;Wd#rf+Uf%`lDcCDa?^f6Q=rl)sWYTdqp##Bk^q|-I@U2}%Y`*&w zoMpi1-$H&eA>>eDHzE=)m0b{@#(_xJOXWTPbH^J55sSLSTZ199KBvd#{m}&`OMW6r)Ensyj_C@5=t2;2gr#`WCE012!Z;!f1J}| z1HwhXNV!t<&koj1GcS>Zz}Q=57@(TRfmn%N_7;(r2_j~Hdyl}K`;(LQaebpmR=;@b zj`&y{4+qbAz!_OFocPe3F85Y0@t;E9@-Ux*w;qE{2$4jgJ&YkOv&hI@Aeh@D8Yh`> zGz)aT8;6(7Fow6|>bMlA&mgcdA|2D~I7}6kz}I*kMrhdoEKLBHpXD&>u89j%x;xh| zeJP6cEjqJp`K(jPhjJ`-M+(v{lCrXQ%d><@mDV9NW}^Qq@qd6p$`aJ{0O}HCUMqn9 zsyY`8Ab$RLlkyR^oMQ35d8cJOj)t?Kax=weO+N-zUWf5WiRSz1ff#fuW@e`)vALmv z!_+buLee(nqx^iR&ZT0+(uq6xUy1)C43eROBU&4-2{{QP>Ai#aBVFm5m#h@+xlNz; zCj8_{dD+*}5lS*i@>TI{o$l?*MRbeEi|BzpIkZ02e2UcOA86?^2UqoB%si%Vn4_G% zaoKXqxZk2tr~LU|m&Dq?%~|-Zm?$k^zV?;H_aZ4CE=NX;_tzLQ#{XINd%(;D)qiH_ z>c+Oye#zVaztv#&p!1&@X2~f6@f5!HYs-^BFUV9s&P03$+Wu#T2Q;QylTV`*3Xxy+ z|4N`Vd-400|CvF}$QYgfO(dl$+n4VndU8({ncl%G|1-k@?)UEhd?stD;vByJ%-|nL zwD6zL6f^k$)CeIw^H4G#2TtjyivJe!=Uw?m69<0=HY`%g+u;9et^NOHm+^R?w;W@_ zzIrZ@Crkfd4vyLdisqD3W})q1qrj(!>152*mA65a=!&g&M z+6_!rQO`A$)U;jGeKdS>i=G?1It%{nAIO2A*7k)6)K||N3D>!F{PzPrbk8D$%-p*# zaj#GxmJ1&^E{$*L(FP;(PR#}o2=l+>8{&Z)x{>%*s~x4wnFG}=rm2pn$v?$f4;pU@ z{r1qQXR`(mb7nzrwU`HT+BX0{Xr2FQO_89|z1%pG#Rri}2}G?XA5$}t!S~Xy)7O(S zK}VS!RQK?mHImA`(^wUp)w8@cS`Y!e#H`Tut&V-AZ2(EsM4q-{Z;{s;ZirlbM?V3_oMY4E2 zqSjiKWv~5q)Aavca$WdD1Udn7fI~1;zoLA!^INtaUF|NFZEVUrUdKt5qKm16Ha=r> z6iTsdZ{11<;Ozc1-{j84ND{w{e=mhB20pXkGj~WM=Bd$o3Hl$btV7;DSebI2y=@6P%PBwLVwP4k{{B zS;R~d_2nvk7KW~mAh2Iq6RrC$!hMf5AO7*W~io$k4Q(iID6d3CQB+?Kjbp4#hN zibK`AWYy zDFO(2pwl!$R!E5cfaBr=3IfX(7joGy__|Fc;k=;1YDOAaWj=G^+~D2kwbT^w+I%cK z<9SM{bnR$WfVKeH#e%KWy@=9I5Q&>Z!Lmi|)rWIR z7_tnjh;G~mgLL{=d2hPD=uaZ~GPI^E`-rWU@*~}(FAvLfTXnmrL>FcoEsw zwp7PLwHZp7$Yf5({Ctdi%Kg|M)z93pQzH=uoMKxK0750OIU=X-j6A)|>>aHV5L+~l zYu3{Q%28&v@vBRPK&ayhB?rm3&M`JG4+~zI&*;!5b{+X%S!)B!0-dW!IeTFzWUl)7z`1hvF@sF#FI&8Q97o_&A+=XqZ2&9=|(Yu8!#b>7EuoOtu) zM!BB5f7xdEE`q-c?m8JY2iK9O^;~JKQBHQG<$uMT^7tzx!M!h4h|KbJ&KvGdDABVs zQxUn4^eVcOI;@mHKUFZJ%PxSMxBEWlFcixjNh@Q3E=`YtD$fTh_NTAqN$lZ$*w=F$ zZbs3TTYs;<1cOgUxH-badQq}9Ewp!gmTOt!Dy{HXik6`+!wEsQACL5f53>{NtL0YZ zNo4}ns*m6s`TwqPHyZvKJ&fq=<=tl5_{9PF=4{MOY+fvTve7;RtA1zS`EO*|qlP(} z71o{Dozz=47IS{0nPKcXQ0Lvi_sZ^C1Y?Bpth2iDGYONorJ}DBxQ(Z5RC$?LwOf5( z^YFay;~9O-40eO<`5!CeIYB-QZ#KA~k5fOf>pHnp1{394URw?rNnZq?eHiv5>#0}| zA-y-LvZ3*{1>MfGx2yk68j5>4tqzIODyQT+DkO###M0{HrA2ekV~ZrK3hs}Iz} zEh=L{VGl&Pk##h==p1wdk0uOHTsilor>kuVj!kEzQCG_7Wv$Ueb~3mIFz29O0*{(Y zbc6-Nku0F-%(<jq--f zPWutCMxc^8zzLL(c0bK5_0!$a>rSU98%=0w0Q}N@Z(7f!ON)DZ7}Ai3UX+}xEESY) zi@2bRb8`4jh~peHgZ|dAelL$yC|FU%lkE8!_tU*Eu4ot!BiDoslCqgnI$Q~6J^0@y zMEw9UJWN{KA}5-38hreIKse?1uJ#iT z%e9IZjO1J?hql@C9VpP@2ObD}Gm^Z~Vy&Xld*qohqXNd3cN>8Z*Av&9_r5WTu_2Z) z+jM{7+J`m+*fgB~JWb1_a}I(gd~+P=Fk3(U-9bUT`yyxVg7dc#l9VMo<8fK)HgDdm zJMTmUSF2)mrpN6p1E@vK$&Q>n1AUYn>`9k7by#e+@g&rhEF{r&f=^1_ASP9iYNdGo z0r(^gB8ihBq(o*?X~~J2@s=E-mv2a>9d(f9ko@AiM!#Ht+G)gHJJj^IF7f#|Hk{VAsWcrhb^HtE zW^2J`9~VJ;JD;wnjD-rR%ygz(l_!oq`k7q0$LT~~IJV@WhpBoU0!#gm)>R`sg6XvQdwBc7KM#QhikmUEFYmxYR~s-$@(4y2INT zZ#H#L(>x8rB>@Nwm$g3$XHJXMWm^m@45UQ9Qho*hoWwfSjNSPpE5+CL1&x}5vGL3v zojz>j6sM#p=gQV>N`262vfnj<4>{>)c*=wuZmV40c+LKR8lM&o$o?;0PJ+)2;2?lh zYECdRSLgg+iMLE4o3*^6Q%eZ%K~+B2RGL(mBB9xAlF^)qdCe4d8xk|xPrtNaQ=W2@4-7*6 zFK$?y^>p_pCNsyc6(5(FK(DksZ||GIpm%pA-a}~z<>*YVz}q0BZl5;-k~hvrD~H8E zh8LRKqB_CON0)|EFZ#~*gPo9tSBuU@3cmu)IVa`*AlBUi;wcvF2?YI2LFzF2QzIGD z`}K$1Qq5>jM5ah0+ne3gAPwO1oSy?? zDXmOg{Fzf@P`{dX#v*mq^l%KOd>a$3;@8Bh?Ilrs0WTVTR@5)lTr}347vd``YA;a1 zwmAGnZ9+Zl&#P}5L5rfSW{2#*m#;L9mm`=8?Pp^=mbU(JS7jja<9rkW8@bN&c$-$V zd;nTI)M7#tr5?0?{TyEDI6YWqNB^M3Zj^gS=~ba8CBut?%_6YHh;doLDO3e;vcP_? z7JN{*ABb`OE_8ptje9K<(?O^3@~xOjvhmfQdNe)SU2yfga9)er$FF-&Ym{7Hmdm8e zwOP~=^^I) zvzd8XlA$=q=?>96NqqdaCe)UHU+3JOqFodqh!q+G%dawo!VEjJw|!rrdm4_Gmw6vB zyYz_YbTVuP+GeZ5u6$WB(U8M-m%@D6Ba?5~vo7h!k>$*0c?}xVSMg>4K2_(CZtMRS4&VI-S&x$AoEZDfO$&1qqripleS2vPoH`ki> zS#qnLEysgHSL)bYhOJ=63firU_~8_{d%^PwChkPZQEQfP1Al1ech@Pbqmmrl-_yX( zrX4`Fi?dXg@N-!X!NI1I^ZP$gClBKD4UUNQ3lZ0ECJR>&?`fOyNV(_6{Mo(o{D-<| z*Pb>%>)nF09DJz0Vh z6oTFc>vXWWxh;$lXs1NBl zBLOOH{LE=}1RlARmHhc#Nq**NNY;)(i zWB~kbF7U7QTqwS8X(QI2$|7b&K=3_LeC92OJ$D`^l$I*%Fe``8gPMVmq{dzxcA2dj z^aWn4ulr2le{Q4cIt<|tcBN1?PNz~+9_j#> z{KOx8a{*|~KWTL{xMm}*S@De0P`S#7|EdGOHTK;jJP814gS!GSxlq^7p%~-e+@J7X9RuXYJZo9 z1U@v(E9V|^fF!J0=p~$a?YMF1G-6s?X-;mDQdCYK=@tCL9qH$4Bn`nZ#g{v#8!_(` z1!zZgJuEqO^uBot3E5uROY|p>D7quo%9ot)j!S!eiA!)apnspc*#*ET_wn|eeXzLi z;C!!+rC%4f!14GEC%H^U1e#KEN!y&}xxax1t}=BZNwvmZl@yH&ZHz04cr_v7>cG50 z>4wO-9shz=#goap*p=~H;Lj>sa;sqVWL$lcva-_Z{q(Z7kxPk?q=F$9xNuU)EqO$P=Ts#PY^d#Uz zw^CNfx1Wm|4~wS`22T+TBEfS9zwem5LuTcIV9nf#+S75&qamd3Coh=#_%t0dGYht} zds)JFC*j-lMjaLaT_a?07{qv`EHCAJ}S}wvfC8w6kuk^RnHBm|3f#;kQq6j%w!!oLQk@Dy34=eNB3st3d z+r@#RsV*Jh;e8_P(++_*Xy@RIbf;i=n}zbXmKD-V*Og``zV%H6i)d-^>Prf*t12~@ z8&dZIbfqx6f*F1Rd9X;Rwo<|6=H+l||LM8UP@oQpf`H%K>eCM+em)|@E4CGEbQ=_K z@h_K9Oz&ubYsWLNHQ(Bx`^22p#N`e0U&r06KxgxHxL!NL#4O>QHrmNcG<3+Nn9n7@ zpJ``QD3nRF`~#4*lwUP^0#qwlOMXotNtevJ)BONId|2F^x9K2$R(vEGQB+f5vH(e3-26U6>(x`KBv2md33$U~xa7_(&Izd+wnj3|fG!ZA?*c8&b zlx5nEmw4fV)3QE!I^>1W1RgZhwR=?Qu$$9HxKlz7cLu4;fZH?oMBJGpq!rT0&KOX| zV3ys}3d4GfC*MIVPiSq(mH@>J8o`_Uu6}{_`!)ai6Xc4ZA38et$-K>y<%$0ZQ?xCJ zBg~&g&98fgX7cB2cr&xOtBY}h+Mmtaex|;wV$Ia$7eCbA_3>r>x3408<4INa#}o0_ z4C1cqk!4b9!JTaW1%tVVOu0>)nG`sb&K-oU#&OLe#0l24vM`_SY2aE;)FtRGY+1nNsLGT*^uW{o0j;T*DNs zGbMp7H9ymT855riI4xxN=>t03JjsN_%^=M+zutMU@@dUjRx+~cLth!{b$c!ltzZ}h z&zUz5%12=vi57v(+UspMgc`k`s8&L4)?hgLgY}OTI!8qf14+{)&ohRqk=NH~jlt6& zS30Z(I39O>bbhF%9GNICdyyq0?8Y+{J6)i!e_K49qOpU#yBi#&b^-;Q7tQ-rmLk>5 zJpNidp02kU#hDl{)=uQD;^3EkTn@N!$MYUc!QACVb%Dq+>Rg;kXK4UA1SgyH8x816 zIZC@urV&diZOH=mS{N=yBHw)b+?y=c*tFD{07AlQT*!m{v{(~#CGF@DaSTs%8}V;3 zb*If=wcbiZ`4-)LdIL6i6SnYCs!=`8Dtt6YrnJw1tOjXJbY^i`^xPl<`F@v0qc=gS z+TGUT`)^8bw{lL8t9P%yA_-(|hv{GyMQ1DZ+95F|`bE_}dX(CHsE$o{+ z0SYKfe!5y0qonhxIE2Zmz#iu;!vtK_#2wA)Bi_D1!@SVSmFdjYdS^c~J z5C>s_en~~YGu*zyFME%-B?5izrJ7=!fjBpi(zoJ1Uu%7KCvJ7t2W^qwR??}~3mnPl zemVF&b2oZ5&Z)r~T+?Xr2%-~XXOntnV#AoiU5JOPE&^k{h1FiPRhH9JJ>H4m7!YT?xEj>1U>DZHT3@Gh zd&3_ZU2eQZpE~X6DRL%O)B(43iw0ojXMB>M(2XGGC?xISmiY8%k>LrKH)vu-xzXM~U990+zagb)6o&??To)51LFp*TI|O`s!m%X; zQB6KaVCm83H`AVp*MkXakYlHfM(%+0`-!a=3iMKQ%S+@lr_i_0}QfQdC994yxPygcO~ywo>KYZbgDlv@yONQ)V@UF?IH_C@~C3F+lU|0(`XXtVN`md_Ia_HsecC;S1Z^Og}Hfm?hT0O}dT}K_G z=|xF+guOmbhVv|ZR~y0%Wq`#FoFaqzSAK=xgC8}m9@Gxl{elnjcum*JB&^N{C6Of@ zaCtcImRKhjq_qI%lg#C3X+qG54H~HHme*!Sn+#73zf5o+P^=V$UiR?h;g7<+$O-REKFsOws7B^ zdn0e(xh)?Op;Kz~WsPm){HOGDs#2in_3iQ1VdD#hvGsJtCFM5J!JfAG6-~}Z&L4~# z6VHL}36U))*WpPA55u{dG@Xgf>Rs6EmDaP+k>cOlXof#sr%*c;whd$GHyubE_?s`v z^|M>J>uw1vdf3Il1j$qrml-{a#O_XbWxbBuLRh3-F;~1BMHr-bK6-#)%PsOcG7b5; z@x>u5Y(gC27H}5STenTh)mh!I`Bx|<9MJXfE`;z?f<7-A2EvrzX2T$}#R{Wps0+N& zFo2U%fH9w?m83n{`z0}hX#VJtvMOBaxAS^kMnJ<*RZvvSsgzTO#DTuEL7aw&IHcE$ z$nxDvODFMbyY|#?jpM_ z(~&*qTb&8fZE)Bf9GH8GSR`i6m!}$66c~a*(r78{ZtIQdSfsvT^ijYc4mZP;up+H| zLPgg)fUH=9gAV<{JC6Y0KJW8lmO?CrX}#Gn(mnPX zV+&4ULLE>YV7&xHimkki0=QOkqy?!hag8@T`=0Eda_(kvfi2I|R3?H(dSYdLMNilE z%U*obX-RTq{i)-Y z*`irq;l>WkA?|9~%AvCYH0@97_8g?ke9-2KV*%6{A~mSY!`Y_{T@ETg&+E(`7L7@l zef5EMV-h_6K`;vMKd#x-tKzE2Rw;L2;5k*`W>av)8u*F7Ymmr z%3zX9_WNhv3wAbc<~_=*{Twd@de<>qS9cEBL$2x?LFc5u-5mpMS_CMu&pOFAlB}g# zUreJB^vyneVnstsqETZF{@zoIfJz7d7{MZUbr~>v{}C+VA>s9N{N8V6FJxQmPtg2J zuR0$7DniPYynxr2LsE-FO&3~^UZROIKa;$gZBr)m-z_P3{1CFGN_#DnfoH@6v0k&A zjKvd-!Xf<0ZWzL&jgEm;LsGFHq}HG)X}Rm6scr!h9^ZEQ}zB@SjQUs4yNAKHEV-;OoAX7!tx9M{&=XVixZ&Kc>n>M62XXdAAo(~?(4%f#2$<)k z8-jrVnwCv)gV;r@O|!k?<@VuFOIGhemtr+m!$nH@W-wi~Sl}CxaVJdzYu={^Os1l+ z?L93MfysRl54W=xsQPGDX!;uami%_^wh879QR zWNO3kvPZU5Wyy30c^0lY*_l1!?4{*8?2gedIaeoPkMG3TPa8CSEpVCrQR-43&QPrL zXF7}=J3;8H6&tba{{>OE!Y*%XQP3}+U+;ok26Vl9h!%=ERCS7322MCqiIIp9^;iYeWRW$M@d9^Ez)0pBjNg#Q!8^*vd@)l^X;7O%)x_4 zG_#`?Q#||K7AB14jpMAcUpFC4EoQboG{^-TPp0C+`e7n%Wm*Qna!qr$3;i|fX0rDQ zBe5NNR#IW))4bR}f0%Z44)4FJ@n=YBjw0qg@t~}Zw!f_}pX)-JS{V$#7!I--pAVO* z9>Dh=4M)oInUtS12P<)bX7Ei)<|7ubITdfSuh(3j$`l4rBjc@VPvXD7KCTPfnD3`h zB}Ut$0W5JkuKYN2b`p2}LQ@#ZJ$SPs<1Bo5@fnwqEPSU6>3$HVo^v1WS@0{semvXk z;j{8eJ&DBP$0G+3V`UwQ|lb=kvc=BH3o58gy}=@;g}sJ^#fU)z{ITLmRByz_>H3gJ8 zc3_t!h~BP45klUWvrgclFg5(tStbbC>?thbNofPr9XY z3OA*Q%|BR%u(Hiib7d2EL6y7K%Wm#9LAls??^PM}0&4sV&Bi^|queG2wUp7VwaR0< zT;nZwV&#Ma0XHqAbL=w>&QLEc>VICiRGe1B|My)N;%6f^?rOjPub8om#4L1vdTQW! zA>D>Ifr`IL4)@k5CdKJYp>i?kgVo(xJ9CKVlgE|MW5oxgt!J0-_fxuOJZne#c|@^` zgp2bQeV6tH?!$%T^y0D4L-1*~N#R%MYHy`*Ll)gpBqp7ZD6C;i@-hsv?GK}izLXvt z%U@-rzV{-ZE^?^nTkxG;n`Pe&9CO*_Ff5%0Kd&;rM(cFD(;QgQpJw5>9~h_Vr1IZA zp>MRA*8v`IrW8+vU!a+{zfiQl#o;h2C%5{U6I%kvk7XvzlV5rkC6xAFFTWg!^jN2Y z`m7rD!+O&iW+(h{DBoP7#jm1-ZT5p@jrCQ(l`bUS$BZO3iU_B^qx5gC{)L{{H|#wq z-?dG=qO~)UK<)j;SKE$j7Bvp%g?ziD6gz6rF>fX5uy4gG7f<5M`nc^C)Ec(Fr-FiT9(t-?s`lvfw!lv-_GJa zAu;<8GqfJ|BoyGz%bP^I6Tl<+yhN*xRDO3;D&O6+`T`llFyZ6@T&9Vz`-c&SJZ&`xGExyJ z(%oh7c zo+Llw*94I*=HTCMhe9k8A1;)lwSV!cTk?eLj~L5#b}TX=8iDqCp0`(AXrbbXp{Lx} zocA!Mm3rHdzq=3JCoE|i8w@Gis^Zkv@$r_1WuJx;--D);oRQ$4;I=UM5Rgjho?^@a zTK0CYo{uv)WiKuuLT~&rjCEc<&X|WF^5Oitk2l@+nl=?M!IVtf{<`3Ep(7eb%G=!s z$oY95D8XY?(a0#Xi1`SPLIT4XCUl`o_fCo3pP<1d!_c7}FpXZ!Kdu*vWrqAVy#S!t zX{jyuckecgLB8t`o5+`fSnUKfEmTfP_|YfDWZW@oM=8#;p5?TEF&+D3#R zJ$}|TdKZ6~oDz|s9NRPNO@r4`?fYDmc;4AA;oFT)n_~9psgc(0SQkMMPYldsDIk(1 zy@hBUkCDaA{U7a6;FFkS2J41(*ZJ2-iXHL6W8`%>N%=!hAQ7$~mu|$j&O?WNjfrgk zy#0)*)o4F8tnR#NUGKZyL=Wrfs@EraNVXfqy(Vjv-&*|uo0TDu2`K1V>d~Ocpm!?! z+c2M?3wReOLv|4-XR<72)=nkfQJdWPx~Y0K6=#n{x*=69-=l#WH8RBw7kql^LVMf6 zc~N(boXy<`PdlQ@toug>q;%^Y^b{!b{&6)WmStGqm()rFpWSHY5JkjK%qhd-TCb#N z=F>J{dp7=vx$u^)!wVJ3D7*Q${iHd#>XVi3W&U~K*`=E&+2kRM9;7@sL043I7W&n; zh+Zde3Y%VzZ=b>VlXLyTL{Bu$Bmjp44@upzK5p7Lf|$q zx^+0R(SA7y>Un?FhPH6lw??y2UHoF>P;j1SR54CkJcg&|$T%h^EUGHPudxO4+Rl9BMkrugZ2!#KDd}EM3R|2RmRsLt~F3AIlY=0dP zF?7~l9S~uZhk+W+ z3S*YS&x5hqPO}3(HbAJmEyq*MlC;XShLbLNc1b(MytG3HJzFdcz7TD{vE}#`Q#T+n z8G?ViPajXVN@rsZj*v0%;jo08Ax|7K z8-jk5BL~>>xx($2SLCn3$z0rov?Zl({o{(tIpF&DwuWj zPz&dYW~*6-l`c%0KLEDnt&hCPa-IN%8sD%sS%V5Ob63ps%Qvr99}5QcHK$)NS+M6y zOO5yZ7N=tG4hWwuTRwCtGlf#xvv-a^;C~^8n?|$pU6`<{xg43r5o)0GU?M)^>W!7i zqT|fw4bA-fGmjWb0u>#1`wwBX@4{k654AnALIA@pa3&K_>}#_i}Kj-8zxWY2s06aeuumv(;%H!p&-tRK3)_=ae@ zlv1g(SN059G33;5O~P$vmYQuTo?=v!e>1+b_`!Sg6?(h_XmPV*c`(*#hRb`iiqr05 zH9Sj^!6d$xtm699v%?0&cV1vJOYzlGiuAK^17ZhD8*XE-z!&(Fe=yf2??x2eD2JzQ z3gXS+0r!3>HyQbbdhjPki2*GzZ4?2RlD0v2L{*AGvnEkZ!yejRy<27Q&B*b-cR`+L zeFT-IF-={5m17YNV+kcd68%JJs+B z-ZI-GMl7$dX-4x>W-2HYgR*$~^0W0L(u257vwMxFP&?XAe8QgClz0iJ;3uU{)7ah~ zy*mcLR24#%hx7ia!mfU=+^{r1nQ}D8)CR|K*XW%Z1$c)D^UkI4ChD}^ayzln|AuR( zrNE%iynbZ)-brTJlZ7JuuqU%!XKo*I6&w_$m zXkeS*n_YG~niBTG6Zzk2$ghP|<3v~)1cm+Jfi+BUTc{QpRnaKkCys_*DW*!bgPfvQ z9XcDw6UoelH|+}Hjmfz=ojhc26!w&__F`12(F zMxye&J2~>lp4LYW)-XavUUnC1Qmmt!D>F)2Is>2xBBI#g{^FX^#Zbh|?Ba8_u8ZnG zMUmEoMKErP9QfvPNMTy!P5%^+&R(RUc#4miOMdbT-p|bk5C7o32jM_}xmfAOctM0c)CrdDQ}S_X#3~6>u6cjpmK+kY-mchw+GHxtMXN9Torhg=r{%_cznj{iu4mNDACg;+ zJB0R&UV>Q}CNd@iV;0v#1mhTHx9aiXHjt_W>1=81joKocictJa&e~>@0^GBY${_ zXh`Z0(Dv2toXvvD#Q=6;qA<_X-R@A&pVpUGHD6xPv=}s5`B@U)Kf?Ao0UW@?;G+yO z`j-Q<+%C2Q5|*rXT~aq^Pg+Me4-rud41u`cspT?U7KN=QBGA>#yQd=c!xZ{;x}qSd!g(Fn^GGmm z)@mN-Pe_^frbxa0DNdgjxgOb+CbN1$X-(ptN_j3xd11Nzqs6VaLyHE!a~Lh7-A8u! zhp?NyayCg8d3}$%HL;Gogc%OxPu+}!-G!f!V>yXs0|Sl6deccY(@)LD=2rCr%vYEzO7&|?shGE#`) zvBwPfo0L-nFDptyW%lCvC%mL_Hx(e#9Qt;T5Bn)3u)*rrU#6IpZqIeAqsN)z`vXky zDWCfm*2>`|$hqk8yxJmz3_D#Uf zo)D3K*Xw&>LAc%}Tq=$*nkSKMI2?6t>pHVd+|6OpS2j>Qps9!l3b5yL}@G@?klk8eTv;v#k+ECZ5&z0PZ_$Y^5dzKZeiXD!o7YKN`*Fv9oe@BEMRP!xU~hUGjak{Qg*`P6@eCHrsE@-#qC668E=Cej zk0*M(!nIv)Ad~so5#9c|Oe9euywANOO(-(}lgIV47Q8)b9*Q4sOL)w1>E@5tV6TgL zcb+WV*7>$=TbI^?ui8i}M?!>5^+?Xi7DVsy8I}|m)G3D^v`SjFlzwD-r7$I^&6W!T zM8uc0{v`eNC*+cHgGgAXx+DbemkISs@Tk*sfHo1X*V%#Hr5GJ$XHr%V-2xF!;OLf5 zC$X8IZuv~cqIEX98Lr;%q)7~Ov)ggAl{VbeUR!rJ429MHt2-CtPf}fe(bveejvI7( z`{LJLHf_o)!OZhAX8N*S`UJVImbmuBq3MtQc7Sz@R+lG$)!(b1T3%G2{zS*dT~M47 zn}v2Y?sEHF`Ab-3O!TC)=fmTP`&w!tc+ zTifFpVhDOpLSp{+)Wsr*GZ8=4t843u{`7DAT!R40hxsU1qn9+e@1=H3@c$5ajXVFI zrlMtZY>_iaI%|AI`-XMyPo`t@SH&=@>O9#@M^#RN*Tps2V91Hc%e^Gt) zmGNJ#S0h66qJO9j)@$9@K>d`Y-Abx#{u_O&slAxQo6tmXA(r@(w!hYh zf@E4Lf6$|=ut-AlPZwd$#GRG67fLu&c%`+|cwry6?)T;?)@5|xx^lMkr>M&TBSSCj zE2ax@tLq*thay%*9MrWmgI-qp8PdLXzbgIWCDoa&xXrjNWjWQouuyX2=xDT+Yf8N~ zA&2R^ljEiwS=*k+E?efFZJmR>Z?DckR>kqTra9EIjS4s-pEgj`rZ2-7zCQN;DDU9d zrGLrqk6PDm($%k=*E4hWBT|$x>MgbC$(wRm_E$lBkdJUTCYqM}?^Mh>A^Hs+WI;tA zFNj`big!mrIeXVnV_PBGAGOQhdGo&hpEEosq|Y0MW+XD+%QtJqF zuiIu>93|UiIe_Zp-xkdrDDE%rW<1T4!#OO99`4@g1FC6v{56#ui^tny_{(|6n1XTKuG~%cn++4{Tu1y+Z#8)B&}1?t zTWf*t`rVNLKWJB_9TxIXW~z*!!{m~#B!D@tDe?oorB>>^D!w3hWBh;HsQ=BMtd}9c zywaDN#>0qrxsL0nh>#i8z$0#ZFhBm--3QT>aHU@7H8Sh|g1Y*Zody};yIuYEttW~- zh0=tbpsEf=B|I`RVJb7(n#1=1rvQQg4e@ky>G#jdm2nGQ6Us+&zG2q={Ff4lVHqt( zY05`bzfboC`xid>yt>tkEZ>*sQhz@WN5r@1WRMnu_N>7ii7CN=&|;jlhSL@+j-It3 zAl7r$oXMci3!s`J%Vs-N+O!*!C!GH#HB5i>FeQY-$>Ozk8U4@nbrkupT*J2oDdSe; zao;;pa0Q|B%~=yJ4Zab`)ybU&bq{>Kwvz5 zZ5_tkPT^{yx}J37dDQc=Z5q5Dt8zozkt^;cw;O(YbhW4Y%IiiVOeI>P?qNrp_jjvmc1B&$pCFg$lCi~Oh~mKzi#eF2ktlV& zRBK>{84UHMKV5D}I?7)AIOs#_8*_o{(fh17XHZBS7t8@inob(_Pz)=wng6b5Uc}^Z zL-AEIB*MPF>jqrb{s)9t9Z=-S@$y%xAdapX$KQWK%NcJ8fKS7P$MXo(ExmZh?6}ag zk_=f$HG)GeYIFoWZNXgv*M?qhJdw@0$3YbIuLdWm6c!mis16C!J5~=o{xz$)vlw_@ zBd($8tU)dEMA?3J(m@pZ@r2~m|#Bl!Qq zi;&}7$VA!RQTVB$LT=R94EKF^B+=AA*jQw$`iNhE)<^AWl9SkVOY>w2;t-j=Sq|st znp|qAs+qLUM+*u=47fXGSw5eA=FhrilTN5yFWpo4={2oT&0jiv<{TpJayR0|pVjoN z!XG*0Z2fI;aP^#3)*p4mBM{KU_2StL`Hvx*ui(;nIa(}ntxMS@6R^p?gAwyK@D&-V zyuI{L9@Xh4QjSJF3qDi$AW2D+iV`RA2b#}+SR0)p))6^`8WRUt;>?FAy0*>UhjR)` zs~<1EGD}^xX%pmHP2iWiI-wSN?8g7sOOnFHNdMj1Q8i!CPxUop!9wxQ&VG~o_kiJx z#iL=*pSFwWG%Y5YMaa4<%eV!bbA39U7dzW4jAen(kMSb-ZyT$YW75h!IC3mLGc$bd zSa=5!9 zkLnfQW6}u(Y@<7(6l{=~keSubqk?bkW%5vWgVgck!vRGyw3;TPnofBVf0lRx_7H6NS2K& zM=bM@0M|UrRR)pO9LlwGAmWt1R{@eUmnoprkaMD(!sxIG4Px1UNq=X|fFaW!1ESBQ zrkXjE;Qobdh>zR^5a-;uhJbpQ(b=O7_XI|?P|05`)=!a_0k??~_NSe0yDKRoyRxEh zHgZ|GjgE11QUIO~ogy()6A6~fJ`;1Cf4(_oK&A&;^@Bhsb4TekHh=?oX|4es->33ss zANH%eDcnbt8#GPDk1&2#Z6t(9oqYXH2Q7Oi?mF6gY&>)@(v~}G0_24b#@rAK8Se6+ zQMwATlgD#o+(8)CnB^c1QFx>J{xS374Y4h!QD^qmnjgh;W|(u?E_*XJxw3?7nqL0D zw)w&3d0nJym#JS;YcVi*`#ibm@nW}eL`xpi0Tn)O-^h|dHw`5}{}1Lw zS?O-iahVrY9xKnv`w<+yNOVd9NRau{Ceu{u&GVOz*zlY5XQ0RpFM#D}-X4e8@Q}8~ zgu~3JsY0NK;EbdU#gdHjd&qaj8A!4JgsDNq+6!$JX!-ra0XLmJEvwz0N{}V<>+Ue- zVVt9?{q0W{e!%F?r0mBCMI!at8e=7YXl~WSixRVh{6I1ki1Me2-kU7eF?o5`c8E9u zLOP-bA(R8^b}q68ensLhN;RHr8XGzPBUbIj5KFu{jVm5aDVs!_=f=TK8tIpn)N3%VyaXd^=e6nSxr+(2Zms(nf~HC zuAD*Ja2iulncM^ZETS+Q2{IK-q8a?so27CEu$%n6;T2&GqJ*({Kf(NF=c|NuR1Ms} z7bd9FUB~vH?2Qk*yU|!a)_mP=_Hvtq2&y4zgP<7wn>n&~rk5{&trDH5^Y&yvf;t13 zcJ>fBV3y85lzk#}Wpd({#K900V0ca!&U%Wz$)6%GbT(B*)+}*^L$QU|9Q@y`JNdEG^yl?fF$3R8htSvVc zN>JP(EoR16>3`<>d8I3%XD9ub@E=bs@ys1>;9NTuhKY(`y-M^K=$}22Vu{9ypKkW< zn2hYibCz<%I9X`5qa&jjH}CQ;giVW||Fb8E;l*Z(IHBM9yDyq7H*4QD0v@btigNqQ zqq0mA-(}|KVyfnBVaRjL*k6#XCU4Q`@)iHbnx!L3If{EDwcMl5`PJ@$B`hm7KHov^ zJl`aob;gh?={1*Pkp1eCaJP|lzim)(f#hK?Q>IurkWr0`l#V2K3y`PUaodK#vGmr zh6ws?<1{yV+B;%D?};$Jedanx5V1Pr+l&0NA~S+D%IsBf=tz=2se^}hYj$N4~Re1+-n43+oZpEX<96)JDNrQ z;t)Q|@Bc3G;oX&C=8!{0&Lix!(SQCWPjzbKkV_CJ_-wRgFnBpTPO50@;@@a`@0URj zuPkpsobZ5%+|@E*$a;?`xDln>N76Pa$WHTVw)=^BF5`vU?F5$L^v^4@8cxb?7gI6( z%P=>x#8p8*^;aHy{{n8npe&&W+*~i?%x(^jHqtnRIvRjd;;&_jeCaRtd_ev*{7xW8s6#_Ba z3ju2puw-UYgm5MPBo}@SRn$D;MXOKTj3|k(1(uaouBy)j_-B&^!iFKT2nJhkzOVCs zMvH0DI$#?buvnFCjn^Vi99*!mdoo|?$0JD-gnBAw;Sn3ZdWA zkSjF#ngC0W_wJir@Wx!7)-R`WqohFDt0pFt@I?6#iQNT_EV}(SI$Z))#?Ql|nV|(i zveEQ`QALka_u5b85&XKHxu|!8M8UGye>n$TE`BHjq^}3NQx_=&P#>|52OT7yyrSa9 zN#tGWquEjKM0Ae9ihC2m73WT@+Kn&7Z;2Qi76+o-VEj#@vq9W9MvlgyCq9?YEbxY3 z4_ee=(oqvqshTWiOjMcq#z?VRo^i|n(79zd0ja8njS>d9cYgu|tlB1un^i?;sM92!9rAB!tdYy^ zVqV0Q>K%P_vz!O1e%~Zm@Nm#e6PAdaDO0WQLiEk;_u5pT$>&^w zYjF6Y5tGh~e|P6s)G3^fdSwpKCdEwF4Rz{q8OdLk{|m4ePq~R2*pIQLlW5#)3>zMu z;CU-L!L_Ts&xXQ-r(UzgQ5sZ?3+?w;CDSJm$6-Srfxofvn=6~T#d+dpw( zklVY!1|y?Mw7g_ru=;HGVjx5x)css>*K|h~pA8?*DOSrzM(C4B@&aK~g5NG;s1jse zdpI%9liML`BdNZ@{H7(XpVG18%pEy+=5!EfDr7qbiRBk$L8AQ8;nN*XfFNIy`1{*r zIfGSKc4VG8+j7t9gX{^ja-$|q5I)1Z9AsTTv#UH0qr(scOyV$+_ge{I2D~% z)E7H#zxtq=qWdxKVPx~|{vdj^*X7h7Qc+a#2+R5c`tgGAh86DDVqkFGAl!};I0U?XS5uwRW!6a`Ic@uFXt4Fb4D50FoKMikJVWTrvM+A!tJ$I?i zt6Nl)?AWdK9m>W&v(-fhT_y0=Gic_# z$w0U3(teVTOkca~NaJoJw^`vM9!PM!yblWfYEL1`=ZB=e4jqwP&>y^^R6bZGrq>%f zM)UfGj1uJ?)~_Y6L8~LQ#T@j;9iP=ugVxtPT#6ZQWA3tw_t@6m`+T)6$We^Pu4|84 zmPUl8B2KMcdPB6`qdvCx6-78M-szKiowt!Y+@)Va&K=JW_K=OkY0$J(eWe!8 zG%Ud>vWRcXB(%kt?#i9W0Qeh-jMC&&XWHF(8AbI1XF+9~U#Ie{l+D+$xyj8Q4)o4t z-#rsc%gmh)qa7X0v|*AzaAxNsVp0}1aCrz7P5qYHdJOw_cQYRISq^c zENPFolsx7j`(U8dFr=59KRt#E^N*nj%N%=&|$ z-+zc24#NGS8UFTzM@In!cg6OTsEG9j8^*}OMIzR7vM_SyRP&}dh4B5wQdq<03GXb| z){bl)5UwhK^zaq-mV1|RS9B3gcXmip;8Eg!Fu6PAllaz>wpz#N_yH}*2f``;=Qq6_6m%_ zTVP)r3Fl>cku>TP8 z))t}+U{)}P8W$@ckOdJp{U}+Dyc!!gLKHoX5RtQV7hLwrvB2e9)XD%DCI}UCA8Oza zXK}Zcx^tL~#bRv9t@kI7X8kBM^dj%SEWoV^c(JhUOd=_rVVtOYw$d1@I`J5k`2G{$ z#MEhqr{j*-6&7P@!r;x@3AwV_r{qrF&+rt+xsJr)EbNKbhyJ_-BV>JO#I&1rIPaz1 zy|1rEepwy8Q{|iG&l+4}FlJ$Y%&QY&C#>`M@*{F-;8nGuXz0|+H0IcU7j#N`m zqQ=1JFPM))XNq@bnDNxEC3Z57RfgAHw5sY74zf^nFFvyCQn!FTmb627Zy1t0)ptQ3 z`({=_B)QG{6vIDSud1(ZrY@#8v%m)Hg5Pb(v?U(uPqvl^e$mOHO||WTX!{I5*CKW8 zK5XEB^X985C1#2V9Di=;7Ej9U%Z#4XBDs1>_BPvUU;{}1*#mp?cc%Msl^%7 z6%rSvMI2eB?<3lK$S1JAo{tWZ0srKAa}q>5M3KafvtCMEktw|aqW&yFZ!2v@vUOpz zkx^qd>LtORo zOIb2J74K>ahi-LV2^iVAh#*og-VL1lxa*1F>n=SsoXFCmZ}Xz;!%e~jmms;4=6F1I zOu<-FD9%qPw2D#V5^XOpT2mM6&V2gh2P}zB)!4BFAm*=~Lu8iSb1QJDSFPhtDwe>}8$aNW!Fp8t~mnP6GC4+yQ%KptGL z6&v*Z=meqi{?_v6>CTykXLFlxMF4i%{szHf#vQ8dfvA#N62ivw43R&VfiaUGB2vm1 z>~wkocjj{i`PZ?ug1%O5cU?(6zX{p>BZ)BwNSB8)uXNU1G&DyuYuoruc*=aWYE6)Z z;oV}F4H?XJt(9m(wy`@^bQ2$<2AebhQfYPq@GOqRlq=k}iptaVpW6@j9XFJth0ooi zly6k_)OW!Mg7yioF0=0x4xsRM#Or;k_Ip}^6E40G+A=Q0Lg5M-l`UD>m%_5FEhu5& zts%R^$B%n(+S`dV>t#Ysm`Q|gpZWUrKMnr`0dV;at94TX`Wc1)vj1hrD}kYxyY=*rVI7e?woM5xXev|bv<$=eZua*W)%#@`@%$y zS!p6~%Z<7ss`D(G;7?oaQ_a=DBIH&$g`isrY}~-!tnJEHM!|^^t0Sh;e12ypW-EO9 zYrM6v7N#~%CED4u!VxIX>zvjtJ^yVenL}e?vv~f|wxmZeAIX%WES^nw{oBgIc-8fi zQ#=+0MQhwsGBdg1eZ#R38LqUencqI!_buKG(@7n8zY=)nk3(gEns?JTx#@yzK0ugU zH-`Z>R?eXFXGy)7vES+#KzQqsDe)G%Z9S6j#gAp5%PnquAYf_fV9 z!e6}HDkW>9#*dxK)Z960y+I1YBYP={SmdbHS7DW?04|00+faPVj38%D#@iLrZ(fp*$s`FQMug)&S8a(NB z)a9>pC>B9t!CBSkOg(_PkVgKV;rPRV1LCM`-r4HLkF52-=tPz0=}*Ix9cLt*VQ{nM za8Z94)56iWxiqL!8r|Mg=05HDDv(FWY}4fO9j1vVJT$Ri`c1kJGTEdUTZ}2~9)2!J z2fi}NyZ-Ak52aCQNoi%eL?f%4WDT_xb+cKIYzK3YEXZPWamX9)K>dy9+x6uYMDO7@ zZ?XE5gaom^k*cu!P8Btz&TorC&~K*~DU_8DZ-N?TUv1H^j+e*;IERXb>Ibdr52SK| zek{h3OQ_$sOe9pkX3i1`BYIwq1Qu?wwcxWIlG9PxZbe1v6&OB-4N%Ni5_8UV-B$zB6hD5u?9AwXg|79Cjj2iH53oFQC}BS8(mNgFxzRS zwuk1pOjb``)|-B}ilO!L>(@KIqg$(D&l_OM{)kv;w^1i-mFWJbE+-B^v>lezBgp{c z>qmXq=!6UJzKLbM6PJ@Hq8k=tQ?31dee?)n4bG)2*E^QEhnrUdOB2i8?PvbhagoZZ zmQNnU?gK|gzI=l}cPzGhZGPRF367Bij-5GQLby|&g1djW^zY7h|n#{(t>$ZOs zQK@uqsY{ci5UgGibQ{63YKKR^(zt1=Wq)5+3?y9x?!^^gwzRoCn6_*kIkCr34oL8e zTkP7eHKKNNf5T_HAQq?ELW2ry3h)~m z-B08Wi>*mdi!=|_9gQ>7E;sq|?}fB(HhpZ@<_QgrN5c6Zxar~{Kg_=D$Y z`B7MOTA)4Ic7BV8oSpxY{U zwv(Gm*E!Ey?*rI0E&O&W#>adU2ksxs!FmUL6l4stva`f&nl#C!m*wvj+x|Pz+5Py7 z;}QbT23*cz`vQL$h~#uXmvxX!H?g86Ce~I%3Nnc&P5V6dPbw$$I=Xz}a&f+GMyUaw zrmR^~!r%>=*3I@&2zC4ylE6Us!PQaE0fo2Z;lklnsC&}Nzn_08K#SpStV2!vR|&-A zjFoGazIw+{y8%6(h~J?WU#aA6-9PC}lAcz65>3Q0XELN{Eyhqe%p>2M^h zdpN(|>q(y8Y2T`x2~8$v|NaPkw=o8!0D`F>apwxw^BwjkG(-Za+|LB!N3!W`Jpzxj zPWh>FI(9PPR3~nzGX9MMC!OD7IXEN-Tj5Sd=SMdato`<|kLP0!wVTc~Yx<#Rw{O78 z(KnA~V$OQjQED$1(g4eA&q4#Fs{Cw-J*(R1WjgLI6i28v9F9@BlCuMdkErdf&S&G< zTdSfR&`YFEz!jx+?sd17YrXP1fG@UUe_^)V1rQv)i-?Foe(g25F57h@V zh;ynlQL*R+ILSyDT=GIiCa3>U^qqBj9nKqzrbxqGFfQfCrxckQ$e1Y| zs$N@08NvM3fg zh*g8#Uhiw#0N}6-S|)#woNwI*v;4ET>e_Lyp8E3mV}Nt4D4CK_K&iGfG|pGMc4G?G z7E08)TUm5Svwr|H9NY-721dzbGRxa-X!($14xy3E{uuAq7*RBjFjPdiRkX5d_bZLK za?_&3{K;L=k54A@G$0vM-E3CfW@VA?46c2x;_{zw*=&)lIS}i%b9)JjfP-v__i2g9 zeBP5rH4&C5pJYXn`^T$;@gfs>sWm-m%XD{;qQcd$z)yaWyK%<43PY%szS;z7bM~hz z1NK6P?8hfCF0`yR*+F!qiWsIPjkp7A-5;&j1$6SQa~VjoH)DMz)ppfN!j&kQ+$W8b zxn}QK*T-)xEBEHlWhoA-8c;6;0FQU2^1OUG9t)hd&aQ0BB&7msy=FZ&g4?0Gu29rY zPulb;oE?0_ia063L(NbbwyGYd+_h??5)CZgH#yWO-Tq@&EC|-#5m2f)E}i2Yi<#0+ zVq|E~E(V)Hz|&pj#Ak?rQkJ9u`GTcv+fNwahRB}@qU6u8`~cAOm4!a-^jY_*o-_| zsJ~PXkFJ~LvE=$;rD49JSlYAHd>5Q9jtx^z%{Xq`R$XBY-UM@6ueM={Y>A*Uxi^kF z_W7x0`vwyhMK+v#W~DT=O}qxj?xUxOX@Ce6@qNjn)NAMr<~Vh&*_J4@Uk`rJwLSB( z$d`$^OYg4Kd1JQI>z=$_z0J{oRw-9r^~y)Ajbe&0{&T;N#K02$EvfES^iM2uQ=bCz z0gW5BarLDbdutzTGwRCe_-hBm#;7r!A>ZACkm`hi0~KSho^2SxqfV=4 z+VsdK+&z(j4NTikV|R`|A%x>+IZ_0v*M^FOU{c+hwrJmwBwQiblS#~(8HL`1(yhn% zU}(d}JR=v`xm5$a9k?=d{;7Q7zPzJTu*nNdoVU3zc4@3%E>Z>o4} zUui=%SsEDEqh$t&UvCUO&(w@X)V<_^Y_sA@yvsb>RAZ4S3arKrQEbmOgJzL05^O2W z?5#T!q=Bqg7Mrf*Ek-U!_F{CW(P;GyZ`QGAn+TGa-m=e)szT#G?~ijzzU2%N#{C@5mS}VR$JgTD z{;vcH`cSFOt0_RFgBDkT5%(vP$u`A@KO20uH?N-R5F=a~{r*8Z8@7NwSM!3JVlw93 z111nc;m~m(rzsjm{u{DNNL-ZQE6?>=+E_qn!=B@Ms&pCNC?S zIRLogp2f(Ox0Xk;VNR;3-K%BHg%m)i-~W^_;r}gt=XKBFxqlaGu6ZvWh({b>2}hT% z6ivEsu^*tJ@-p_V8!P@O@ zzPiJav4_h~PNCk)4pB2tyUEfID`gfK$JHuw%V%~F=?eBmuH*Sa);Pj84fYpzRfaW` z6#Zk>N7MS%z}Ei6+?(nTztt2yTL+^PPT7a@JD#pE1TMyRwwwoQ<+{6>Y~7aIP`A;j zFPa-goJiAoaGImbYFgiN?!kf#p9ZZ4^HuYuV_J~X8iQ3*#qa<(A;7E{mG-QzqfiLx zIZ!5*tFRs~=9VPL%)HkA`Lvg(6`6-TYbnZws~>%sTd+*}qDm}sMnx)_+ z$Y2z;Op=g0qv3tW&LL?jce-lY4(E{%%-6k6Qw2S6w3xT~F{ACbeP_AXQ`?-x>|gZS zDAqQ}YXqRzhhQhj9^Q)w_&hUler{FD%zm50TTselbXO@EDi##?O5o|`R~)`@``Bi8 z-QZE#JN&zR^hiH;Om^c+;PWf6;p5=;p{b+NZ4$SdIyL!3afS}^6GnxvoQ{9=%{hL? z=aIo}G}^9eD?q4kqBbTI3%~9S8GSF+seb2nPHotZ;>n#Eq9E`6?E{wUM_;@tG}eG8 zdou3EAEe67^&y%>n^7f4&RfEC?;D)+2^L!+Vgx!6-4MR6n8sw#p@Isg{DLb)eYG-E2xwY4r5xk}D_J=djQ~#}fRM68|;Y{Y;)&I){Pwe^$hf3?b=HJz>@Ak+QHQH<#V(u(WOk&yR5{%G)KjZOF%4{(Mo#YI?x zf|QZR`?+nq1DEE`3sEk5X;>>{5)1#(xU424f_imk>z#GTL~&Y!E)l+ET+y3Pp{zD2 zA{)tEU@I_IwPjj;4-&Pi-!*JE~}?dg(24DwOQ3%0TE zh6Ent&W6;5n{d}JuPwP?J9DNvx?k6sFv13-;tMs(UpD?Gt@l$Vhf;XCuNF-dQBgS( z>tbfH){aSG7e-N5;RB!E^8P-W#blySW1TPlHqG6qJf5o(h76{Z@keB_oDfC2=1HiZ zDZE`yO25}yTU(M)=gPAnrhvzN zO?CJ8Rbduo~Wy|hZR;b z$w#bE6KD=aX?}SNn#7vPrMd$1&!1IMcM{NzNR3n?o1y7u5>lxwTHA2=V40op1rh3_ ziiaW|{sgbwHtFL~e9o#|& zQ2LA-&?1xO-`RWQU+H_BZoHmgW7?d#2HW3CgiZmFt5u(nyBB}#R57|aX!;H%^x+ld zGwePSEnM?8a8}ydhE-xG>HNvPRodI*yezQyQn3c96k^QimT=M{?KR#f>EFnw6~bf5MH{ zLv9=0FO7@cIyE|9s*RFEULYy__6V@Ge115QHBE?iRK4uDWfjsy^uDr?_*W3!Z5IV> zMji}H%4q^cf0X44)l;Yt9XE5R?4|h!q%e&CoRCP!2o3-zL7qu^3we6_F?Dm%3K!`0 z_(dRLr_wL({r18ZRwu?@qLnm2{b*_N(JPsakEMqf!hdSB zl`B^hOtKzE1+-OfmIVa0!$Ac~%E!QNB8z8!jG(83Xw>_G;T`r{-aZ9eKVltE^Gb!k z&i#ZX)(mxTiPVO&vl#l{-uS{OLzAeUuLHU5A}+C>6$j>J9Dh19!Z z*3YAlOj&385$~aEvj~sDT?FA;Cn2SS%5Kc7`^GBD8vI|(T}NjTf;mW4bGFXoxX%lma?+HWI4q%ZzCqgI`&(8~33^~>%Ie%9KtLsMUN*Ks&H z2~W`-?PEIkS8v;QPSY2|V~&ea>v$^oV+t12nR$Tcyh~#B)6;0ITv|wELfa1^#hY>B zG6b7iXBKYePA)uCG|d%=#uUAyGTd?hMG-Cs)zbmb6oL|>iPt2wy|N1#=}y=icHDKx zLj9Ocza9aVs(Zj%)_em@&mJv1e8X)$X=zJj1|;rrJQ*lhiZ=rjxmSSJ2Z5T&9Id8_drzxhh~i2}e`Xm!vF&wa%Z^GfSH@*8(4{ z@4rlV#&URE@`2cv!!>cRvWc<1J65aBCARg)QxgIBI{s0}4t4Imjo_FHJS{qPsb+uS zj+!5g>iIm^`calS=1Fe=^k!OtpL8uB|v*o3uE)lGdE`{0CKvjo=8o4+39K z*hm6^&T|?7AE4GtLmq;i>Yc7M;!rda=9nseMbu*#wtPuv>HYZIe^ds}3tDNy*7g7G z6u~EA*(SBHE3f=c=-6Ed4F^p-8{K>>{}=6_{Ly!dA>$ZOg^W0uG0++948ItjKnH}%_rWDp z;I7jgtmZ_2>$+`rrVZ45v7d!lwyfwJZXhp=a6nz|h?gC>izi12;~$7yU~TVd?LP3^ zT$@+p+mS34tVx8qtgzIS$iznxkKDT6g}7?VFy1->Z6LJ{Qm0SE$H1Ews7|YM!iuPg z39!i3>8H*jKq%~CeE5zsBkeEoQnAwEnmK`T=_HnGj<&a;mB+sm>^wGCpVdIh)H!b%!M7nh4RSo( zU*AP{a>1q_N3b}fBAwweHXn*tgWd{n?t1){|DyQEXe%9IR*2i7qp~`BkW2*<`$6PR z^>$fOvnFkpw>8{Y-dT`>f^ROk;E_A?o(UO-73w@I-?6;8^9BTPkS%`>r4qits8K*? zCE_zxDV<|wAF1*TpdM%zQH;{p&Wv-O-H-&zzttSBQ%~Cow;l?1Q&zi5ZPas=zWM=d zffd7V=r?Ji0cN3WOnpQS+D=5hTgtspYErngU-4%QZ9oTg8mkJsqq~};a1pzP&vFqj z*^h`vjNQnsoFWBD?Rt&jvvA^HR}*P*29y}h@^ALC;NBbM`;UmiK+h3cGfF*^M*G(k|M_Tz0#r0guaH_DH1_&t=ZpM!!9q=4 z*jp)O(|CG3mrfdM1SU1WV<5+eXZcNoxB+Ud98F2Cy>H>ur?l2~x9%{Rd)9g#ektP_ z#2~pr8K+tM@XpES4P=lO>^*O`qaQhi%Pbsf+R#ELngWp|1CF1r3%M8rSyYBUvscX& z9=vs(&f4!pbV&ktroBOU&sdlRO!`JI6+yJIe|8~9tXaz1Q*CH)NIeR>!`)6KzDX4` zjs0w6%TBuFsPQ*{gson=s#HH2_gP(<;Z69N^lsTW4T!Mu@s?RB&u_``ITM_rsEUM< zU|~3Y5t`N*+sd(zNi(+(rh_J%3)qjF7`?kKfxruLsvkFYE%4dx zr;~>kxa-+6av%|tE#zVFnJ^Pw2Uwqx>>yI&2A?4$c)K%i;(Ra*SAy&LfjighN#{-A z6I3YH6LQcj=3fIqGPuQanqc|5(F`IiY_xd6z`1L{M{%HCkl`6VGv-`%>>A%Lwr2am zgvXL^SyM)EDw!PbaWcSK>ALMWFZVfpv-iY7n_n{E+x3ywM#o3#6rBpqj9f$hZb|N2 z!S2iC{G-&KJb;-kkA4ksK3eR%|d74`JCUW!v&;Fsr)#0-?5>79-eq3 zq6l|#{SK8A2)vCg$~E49N&nx<-66be7M}P2suL1)f(4T><@;p#RWl|(+UU#3Yo~yi z=2N(VQ%#YxS_8m}&EhXX|MHz9@^|`%!!ytj?rY&#tQBG^2iMhWbgYKWw%H3Nj;!UZ zTQj2X_i|7`G@J{b{{&crKl+bskQ_t>4MwB_fREEN&_dY@fDYmWs0m1`eGn$0rMo)levU8{>XOEPMp}#ZC z5613u_m(R*Yw7=-Q49ML2;a=pa9yT76nn%G)wvc%o1fgd7VOJt2`CWGMiK4bCbj*?oc4bfZBBx#N9LTwpcYf*wn2x4@Wd&YxK`(FE zl&y%$mvcC6YAlny33UXN>*4|A5Fq<0J{gDiyXdrArV1-*%Na`tO`G8HI``5BMQmco zklMwhq+X{ur|FopUQj@Tu=V`>a#8PkkBn?G(6;kH@TCL47ygzRYIkl)p(4Q3}F@rE>?WYP)i`JXWm~ky4bN4=P$9A?SQeTBPQ9|Ne^$;1o7vejK&&r-0jf@)u~S zJ^)s|#X`FilhIqa;mihQ0}BrI0&U=-g$j7-7t^mY163hlF&xf*V<_;pDeEZ8H+1SUs{;>G{d&X+eo}YK z=)86n!xr8_Arc0HF06x6j_lMs-wXQ;h4#eAX`H>>pSxN3e4BK0!5=%X=fSybyCbYX zS@$OuZ08GB^t_()I-EUEl23AJh$}E=4ok~tFg;eV*0#@ptz@;PQWdvlkZm@?MoUua z&QMxss=aQ%uE~e(GBf*84i)N)K=5u-gqIt2Vk?BeRF=zs`DcRFQYm-K?qA#t!x}P6 zIxUF?OsO*`J~azC=qMC2v1gUHJPVktOqe)eV0XA{kv*N*FFQw`iw9q= z!o1-SgIhL%>uVhcKssndPuvH)1H{94^CvP*eUZ!dfw~UrIrMtG0B(h^1fhux{>EJi z0I*)0HaHXol_evu!bFqyvlziT3wc3_G1&tXL=<)R)cfsh)){Y8ywd#wLG;QtiE z;qztWoVQgiVMeRRG3r};$VRr`i_=AiqUdCW!dl0yuYy%6g@BIL)e-;9Y6|QHh1e3h zFm;dxXQ4vOP0J)osjY0gs|dMl*u~MP7IvWx&uRkp+LkfXF)-T~ij*$4;7sS*<80lC zZu5o?o%I`dLlT}}>HC;Y#%=wRH~o^NUdx+*7iVIlaLpUdCqSiyIkn+W2Jc49C&b?2 zMEe7Y4Ah3>tnR5})~S#)qvLl3%=hdex`wDC1lEWGi&>|B78ol^{R-UxrWSPj>ufiT z4bN^95Ruh~fR`#nq1gl!gK}OSY%C)Z)7f2=ecA=SHXa2@k6AkJQbZcj?`Yl3n9a7eG z2JcqlvRUThJKZ`pJ@{q!Gism9bf_#qQ!e;lCVMurdUqOaeq6`k4ChBOk4>~`BcX9v z{|86~#J{m6^mg@cY;O<-oFy2O(};@oD~X|$xsylLx#2B2oWu5GdE>OHJ%?)8;b*?B zOy8ZCWlhkCPcoE}mEO-Ed^>rv%~)ad3n*cdo37yUD{d%YN&Td4xL6lkP&RgWv zNZ=}j%6aj=a2h-9G}_~ozhyhNws_*}!qh;iEyK2hZju1BKMUnCza?2Ku`G+Isjw4V*T?cU^N=UMZ< zC*S9ET|JI0;Dm-kSd|GVrYU^5*aPyBgr zm*M>xyEuVnF#qbm%lppDdH4*h>cc}yqRU1R^mXV?P5&;B9@YK$cU8;@_wD~(PM)!d zlg(?>XP}dNV-dsfXOeMr>)&Oz!kJGQ-#xk&Wi#JSQz?DRQAzIociBL;P(~p3I^bV7 zBwETAD)?`5Pr7WH^M^fTX?_o>?_NLgf>jN$-TqzX&-fn#&_~d@jVR$REcst!dj9}9~T!G_b;lorT>Kwu(4RPQ1+INTiw>zvGGx@?7 zpQna`w6dDhsl4`BG%*aYS@_$i=_?YRf56@+*~$T;Nxz2L0^goB?D=;<#X8_opjKCE z%SHY7$8N()_Lt0&-YU0L=~)M>^Z2&|_Q~x(nej@#a_p53M>vK4-{BWuo4AlL8jLO& ziD?-T6i~0I3;WOM{});w<709iBKi zz^b>$gYbNus~>Ps+PPR{y>7%hVu89~sv%%;fpM)8S4Tvc>=7XixLOu=p19$i&OPV| zS>BYPDsi3^hv(UzoPCEs#N>*)23~o)p4LMQRXmW6-HtiYVgq0SJMG<-L*5PYy%NqB zO9`_j8ccDe>HrLP&R*~>zr8M8UzWh;A2D_lW5^TyM9ck{qQeS?fI>U z)cr4CDqj=~)Xj-#1&PupdCr>W4BW2N72B`sN|KvmKW9THmV7tx=H4}Q|Bt;-TT^va zwX1Ux6(&xo@c-mx9|CBPFvoKE6Gric?4bo;mO!a8@N^wg^b}Q3_lkESCa)7hslt%E9A9NSW*&AtFtsbzwQg=soj zDmkyLR{pz3FIElaJ95K0Dvu|Yx&ibP6YN+;D7Eh$k?<@ctH`v`sj9@)1Ku~5ujdVC zs4=uj8uOaO6TFJxbX&c9uD{(JY5Yq!Ubv-XHaGD%R_*))JHOm2#5>&W#IfjGv9Axo z{C@GrN>9ow3}8cgG@*YN^dO{SS9Of2KpZzvrJ~7r{w+mk0r)|N->DOGKUdpk@oW zq+W-NnD1j$rq_uAS$Ect$_B#ny60oV6YwJ!@b2Df$F5yFP4u@7kL@JA znqnMShm0a%EeIlMLdqJcU0#E`zNp6yO=|^3HtPQ7(K52K3}`C3xqjHVC7rDC9n@H`jrthzQmZn<}LM&IY$Z^?}SH5u}PZD*65w{?l% z!N-Tgvga;diG;qbqu4Ng-v@?XzL-d&byR60!r%Jho6zxiu0+PYj|#Q{+x7XlLlbxl z$L5~}kI6a$sunhmf_Lt}#!8)&w2d{EJp(fi>B=Ul!&~MDI}y*8jCmag8nTE=B#|z3 zpL0PWIZ(zeMmy&vVV9o8QorD4>8~|hxpt!;5FZ_kTPyLlvDXg8q_qVzcxbU~uXapJ z@7z9SN_N1HtTa5%L}|aUJOQOfPizBXGUL7tBha4YN8@|bBjB=`$OKx8l z>ThQFv?HcHm~(>G|I{1k9(=|s5x%cvu3qNCaAv9dWiFrMnix-CV7vSx&cg6tG{<2mRZP^4RUUTDUzaP4B?sBD@Tz^jCq-G-q z0QJCKM_ikMW)>UeIJr0X7 zQ9k`NBa1BCP?eLXjbp~?4QLw+R|sWeyL-Ql852vBF8r(5Mq%v*Uur>D^@j(h#b5|E zoNI{^yIIVQZ)&RcZKl_O19%~qkTjY*a%fj}aEqQrWSgfiTYE*m^iatf?8+~q1M@gV zql>AjhcZ{cTy^hDUh|mwJir`LHhw|PKriUPJkU+ra?z}1NzguiuPuVR0Akd7jrwim zPMTcER5~F6hI+V5=jUK2JrD37oQ!P#QfMdT6@P}?2E%Qx_3yvn-sgoXT`@ktLjT=? z4QM;lQQiqo(2Z~Mq4{zXsqS}~>xHby1C_BtmjnJ6J8H){^+vXQ$^c&V1M02)W%wae zVC-H`3JL?yqEKGAp*L((YJ_Osrnho%7*+$81r^_eYi5~I|Q zuJ3!_{TnLhmhFZQzKYCWQGLgcJ%flba&CHZ`N6t-#=^rQvC7H zMJ50HqW{kj7zb78v<@3N0)wPBX$&Djk~P|U{!-UB9{@p4qa422ccK4hylomqSxa7J z;^MBCL)+h1Dv5ckwttW8Ka4cUnuLhyieav9nBQ8@UUqf3b$FcNsBJ3#R zeNMK3w@h7C9{7S;$K5G@dj0dT=O-(|wgwDdE1A$!`?rQSw&aBP7T0fLzSZzdA%Q@8 zTTOfNZ%S_@wEq5vL$B~euFkz6?2Dr|=q_FR;%Vip8zap%jKb%O< z_)S^OT8gHT%YNbFRi>JtCB#kQSw`0(4e%!s93=VSYEZ8^Ed&!DB{G0M=Aiy^;SH#c zmRL(7efzHHefeB9r%j|v_dLwiFWd=DR%z)4!^$M{bWmR}l5M^q%!~VWK5j1H`?SyPN;^x*(Mq+AJ5Pw1yPj8a|c*dCPiiW`@h>_k_el=Q|I< zk!|P%CMe8?H0*e?y`iY9Au4e+n~&bQ#|>1fGCX3Z`b4&E^7zLAlox`PO%BR!$8Z=O zKBK?dssy?yEmcWd!uj(>XHfggVZFi~3H5w_l>vCR0riil)wqX{j0G3h{q)nHKDR)( zNySJOB=SZ{pjLY(Te)Wf#~ zF_$Il2h~#06}-Uenihui?()6-IWsXp_NU}QM8!z+Wn>5IyG^k7^bXx<&*GR_ z3A})gHJ?tmGtG6;T~ENX(OP6DiZMHQS053bVs`VS;}|{Gc8I6D3@(~sG3q@Syn3%o z)jIQt*F9db%%?bSD3FaIms7WI^pr_bLJco)iOkExd7< zjL5mHzfWjCe%*Q=bO3tkG^47<07IYL@RogN+D(M67{TxyyPIyXnyxDrctsN)uX16B z=^bwCbC0g!G4?E8eeG3x=lW+~9Si=6XO<$W5>L~}*87F5Zkx;1{@h8$ubbfCX&Rj<%c~N#oFvtka$&JyXZTd% z!5E5+#h!$o3WS$IB;ku9Te0d*@A7{mE+SU>A6{gSCV1!3Z$qFtGUE9$sg$eupWaEz z|J}Pbp3jzfFnloOgXWKN>+=sn@w6neFdIq#ra4-W*6aU+y!VW1Dr&C&r6@4bX79i(?69qGM8=tvDkdIur&PJo0Y6W{myzBTh>)|xeIe$1Nr znUkDkpPjv*v(K~d`+A5^2&-Fslj0-o7!KwS7R6YQHF8&)rP}PcIoIhN)GD)`D7}0R zCJt6Z^gn%p+c!<@uiLLNItuhp;SQ%+KDyw{Gu|DtdL}E*Z0QBFDQSuij(wxK}-_RqgWIXi5G)ned5Ut&$`~aNRb>4q^Ni>xl{HuFwUwV2L_#H~uMa=^nXM%U(C{ZtU;>>D0d$Lu zs$*Nr&))wS%$3>60P*>YRx)c;^r4m-<#$5vhyl zhJf&1(i#LW2}q7p!&p(uUKuAfK+ z+h&Eb=2DiTn&}l5zthN7 zU!@|z+#TBT)O_QGoicY9`qk=9?{$Ne3`4f>8R6S(`h2bIoRZ%C)<@Z!o;lYq(=@$A ztUUv~y6K%Zj3d43lsDe3$1}*KQ%Yf0(Hf`GDi7*s6i> zXEK2V{R`3th9ZYT`nI$c4Xfl#V$Z`+;d&qsvR(cP2LMr4?~d{N)nMIe^q}H7{rStX zw62H|m$VsBb#=LYX+5N5M{^qv$rhzAPnO@X9J1($v6W2&xKaAw?&!On!C0qrf#Pg7 z;8_T7J8;0}mc!MUYHNt8(AmA11YRkwe06%BDrvpY;bhK0gdYFCI3mEFLx| z#)$9#C<*;~1cK;i6Inj}o@5V{iccc)PDHH;{*pydNphn;AAwv~GKsc7$A4t3f76&Z z`IdQ|)_s~I5Bjv24XXtvB;dvyBgKi3vW4zJoPs0R8XZ|H?q#104UGejs-|~WU7Ke`F8&AF>-#^T* ze~gn5+(Wb-o00gXSW;SsjyuYvGcIKCO{n7@)xDD=*n+jfPfTs=vz(ALVS|t(Q0f85 z6%<9`qfyGKy?$;}RYVn?(`2wI^)-CPWAQ&gcjkzhx!|O-CWHT zB-VO4_B>xk}(?-nM}6Q6$fDTU%t$01Pe-&o|u zj74--Ldbp`+8~x<#<5#S0(!m)2lQb#eMqOad#!u2=WIlE7yQoX{oUP|O$_HSZ;+^p zcgW+|nk|@r3A@g1*iCNCtMnyUYMk!|ftnj(W4;S8- zutjs6KTDFW3q5pH8d+YC&{6`bJWN0<7=X3m@)_|Ea0zu5sP520$aHe%PT#i+8w^(UecvxGiD6LoX5Lh9jeu6N>_6Q8k~k2;LgBiyI9h5~_ka6i zT)HqQrsmEm)=>)~c(|~xNs^M7^YfTnhY78tCtTm+s4lXshi^J#7iPg6scPy&;Nt};J zMnrqlWMgansa-8|W6)k`jg2uARgsK<*qjEO=q=HJXc=p_hkuHMxOV#9h$pTgF5E>- zC;A7!O#HH4Hh1RYkfOW6wNv!Cn)Usya&;lw^n$G2Wpg@3D+HHWXotbDM)VmThl|kC z5oNr)Ygv?t5l3}Pr=k?+ggDbvqU8D7mva}7fM2c`mnzG*W7T_)o~in zmoxtQ3B_zr5>@*T%vHf$=L3l1duUb8qn7Q3b1|DmI<)f_><2wUeb9xS?A2!G_r1(d z$ZA&Qe0wuy!~qgTHT(`zp;9u8lG;)_Rwd_e#i)dQv4v)AM>Zeb)&r$(;qz&;B^~TM zSAZ0)p=0_I(*b*0Bj>|7r$#=RM-3nJ%=$6a=Zjbje|9HQucHL|cRmcAcpl9BA!6e1 z-F#KqaiK`xTy)93d^d$^o@&ZEiX4CJYpcBuO?P@;#p|eBY>MU8*NzFO(#)g`Va6|< zMrnH{W=yS4fD))V{o3EoN>{wS_}Ou@H%bjgQ76IM+AdU@Yl+`Yu`5gqx_Vi!mOmoB z$bMll-s6`0H2<9Aq$EXZ8-`sb45uKYeixA2hLGu60`*+{V? zY`oqhj|{d)o3giCZ~MxxcT+s~`QqW*m}}z4=29UVI#B*`gF<5jXoQDAFb1P%J!|QQ zfDzQ?LBu_cx5{F#!pW=y#~KAPlyCBa&XrQ=#C5{h z?fPMa^cYd-z1hgPyeR1_)z1BQ@S`l^JzYQNa|od)4hsWt$e^Ce3?G;)>fk<584X-x ztW8D}t1L4(tu2MnH&$do*DmF*7=jdRRYkfSE}_`tyBQhQ4Eh9DJcIlcs7T+^|5|-;NU~s0nZ>7t@NyfYtrP1{w zAx-#13iPIT+Ag)ujZ$41&u~!05y1{4$uEwzx@PT0y-oh5O;0a&}xw z3U>Mn`8&E>CxZ{hNgGX74?Q4nhr6VZ3rwMU? zuI%imYUGCU2LbG>S2ILTvALW8pJ>bGAAGdfz zw7sGtRkPK!*y|=@w&KvZHt{hW#xJbG`yu`U?(JGvt4r$ZJ^WRwszE`6b!yFDD2XDe zIXgymmDE%4n@lT-_|71<^~NOeFDkl6;x8@w51)XXFb?mJNbj&E%+0ch=vccTL-+1> zv6*o?qnY8m6aKBhotcM>nB?=Ui{L(o;)!Q{(^IWD28ivd)$p!+l&kaW>yv$myDoU3;Br z;uHk#p^p1JekbGQ`=C}4@W{f><~XkJdO$-(BKSOXs5ZlZS=g^m#%uP%H0?NWwq)BB zTJr6#iGFX8mAklPt%b3Cj;aV7=-rPzGQk?I{d`jrMV+;uVbrN*V&g$@u`1w z^!?w%8V}Ued86$x<-jVF~7znfV&%Y;~b=Ro;t_5Jf-8>?T&6rpn9wGV%L4& z#-*er8`UDjj%x!$uj9Nc@3A0M-&DzlCBF01| zOmC{y9|VrjQW&cnDAv4q%=6IL^=xB{qB#%0CI7$(XhCJX9@sCa3U@!C!2d8PoliLr zSOf&z;z>eG6C!z?5|MXU0P&aGg%}GAqt9d1{9hEt1Ov!))BM2m-0a`}IQQf9Q{KM#cU0+xc0laT5ZDhuq;9ljH8)HkI91 z=JF-AwC$=xX|j!)O~REH`uo0o`$Nl`;b*E>Bo9RBd949=2b>sz+Rolzf9^u9G=2F1 zV2$j~>;Buy{xUukmM6j*BJ-7|4SGkrhgnn<#*J3U=rFp2{kDw5L<$+Z72q4ANQ*w+ zwT~xVuIxKxj$M>GlGC)HDgYPgj`0>^9!Y;N{=D%gH<0>0+WXMdhr!uFG_+s)@HCX7 z&VPfz@V72sQ~untaQ8<(F+vWwmn@@4g&#&&=C&a;WRcaW^|?S!xm0efwjx+VqwSD8wXN>VckWorQqz{D^F;CP$1-ANoJGnCN+B%!c1o&FG09u@ zoJJhDHbI+ETj?MlqvQan*6l(U(OTmega9MQpHEk0)bI6G=Zrt@_10%p8AGr?{#Y7= zNrM_rEU9TjbK`OjnhkvQTO9x~42l?eJaw(~dOz!*EOlYKxMV;V!#PflFSJ%>Eg-4$ zo2p&Sz;!}jgR$M0zH@--h;Dyg&bx4fHpwbO3PUelKtpSe0mGU_HRfO5RF^iTFbjx# zZ(#fyB?1>0D_3vW z?XP{A@&33(Qh2zZ@Q=9mF7bou2OJKdp+QHb$xnaovKYI5P*HVYE#?{$<)x&4VIHNT znWRV(AA060jDoZsJK4l7Oj^NO)USx?1w(y|YQ+uxulyS>Tx`9^B|yi%C#la=g3RSE zqb3z@?a-LW`0iu6P7w+-ECUyK4+s{oL8A3hKD~=~%U23@dJQ*NXqm;X`E)<$Lq8mX z{AV3KODa}XO8lFOqPAcTYka%F-$)aJ-WG}L!H_5IF5Jq!gJJD`dA=I})py2D=CRSw zV{1hTP31)T&eI`-<{_>8us*-0Bb!n!|9vtsp)a|zJWluDzV`uX-?;waWCirF z*TS134fGX$eqjV6iE@G#Fqp5ag<`7X)g33jQd8{x0xG6gBWX`IdyfInEY zdpXLv+t3z1+cT)TFN;jeIT2}mRTFmTPPkoA8cxzm8S)t}|NOJ?=kjEhgN*CV9Y|gg zxy^F20MWs$UpbBIT23N;sg`jcamb6;!Lz=o5eRq1j@7OJf~P6%G_rD7LurYf)GQFH3`% ziL}r@6@LHg%iZB5!Q-Ejh_~-FN2Y>jvA-7K#MgUynXX1tpQk-!&-cxMH6R6>GE`f66lD7inTPj!vqT$q-(*jxjvO zI9kb(BD=VmSXq%~=4i<#kw7Bp+eihC*XodRk0OlsIk`;m9b4F3#PV=0^Ib_pXyiMG zx%Gz6=10$gfJS4RqP^!@B}h=fCBu@-Bf9ycy*Cc%)F*dN>@re&*H&4b-?jRWJWP>C z@BeZfR&gz7Td)XJyV<4Ke-|bxZ{Ro)(OY7PGpsnsjuaWhn3CQY+DC?^1-qVoeJb4( z^_UPrEws$2{0^j7T3hju9tW`m>DNN%>Yu2F+X15%6b=`lD*dJmnvt1*`iNlB(Z@q3 zEP~=yEpxUWOFy_I?>(w)+$0{!!*d|U*jYkpvE!8A$L}J1)jx{2NiJ{6a0Mi|KwWj1 zndg60GZ=puWWw&g-WtDeT7SYpp73>uw%UL$cD%PGS?yVU-?cG46Bd_v;_~J$qY)v5 zx71sU6UaG9>G5@Ffm>ZAB~;0j^@vZYqT7a~cgv#<3D$`W)^=iO?vI)vHJ^-MqxW}RvJ~e30t8S&{-?~SEwNs}Vqy5gn`3%VL*T9tkNmR()k=~e<1d3MJft^Fn zY9ce}&W=fp_;OJ%9a7OTi^kb1%Z+F$^Hau*ug4X7YZ4sVMFxDBm{PVbo3ukGqPEU> zpJ{+#;judNdh;C3S!23Bt}ETiCiHYW1=-PSEU0L8@uteuKW04j4>5NNrJS7;k>nP9 z5%N+?0PUROwcVmr6O_#`&zuUccX|PNn5R%6mrlDp$u_1{MJ4veedR)NDTHfHrR(?P z`Fy~yl>Ntcb}$Y&e)TI*FgMNZMl1s+y}C9&w;|H7$@7&o|6erx5njEHS%g0dzqt9r z07DDBf4eHc`miVbgA5XE9}4@oz~~tobN{Vp~6tQ>ap*NBoqk@ zb3rn9Pnq-UU+xAm=$95g-np`s(y0)Y_tBM~1|5Om#?OHg#0&dj(C-51exX{JvsWfY zA-EIpGD>Sqaxib#&g+CBoNgbLwi<4a4$i~ajK>w`q=-QC+wtZr7oWd;c~et$)vopw zhF=MoxQ0@=dV-w@{fxCDy!An_b-3#ww!b&gPEmG6;(4{;oYN8))8$h^8`?4ZAVSo^fbNXWnKt{II` z9kww@-C2MC$$qVcrgF;E--i~|G9QA>610t*9hJ0h1|vb~-UQlbFy8~d#mON;5sxGi z4?|;Zp$x?C5cgl976BQOY|lOA26F9+gU^Ntpt#wS>rRBob3vc?o7avpvmWB+TG!Ic z3?Pcwg$h+;6-Wn?M_D$;=;~4H`?55q_za3vy)dr6$6u6d%;rVeBE7Z?Xmdpv%Y6XF z*8IiMea#BnsS)yR4D{``IE~hwUfMD3ME`t<7;Qt5fa!<`7X`4s)OvfjK@M@fx5`v4 z?VPfZH?+R^6=CH~zacWv5qw)vz5Wb2ox)3`*EifnBgDFdyGcc;cL&Fx*C^jZUrU+lr>nb+gim9WE6 ztF34=%eS_vso1SF<{SI-CnXsIeT`Hr15`$d80mwxh!|-q*0>?TzD8|Zw)xN!3>e3Q zO_yl$Nc+)pNMEU4Cu*XB1I{<}I<@bOU}<%j$fqyy;_s*^pS)Jx(1M_jTG}_($o49O zur1!YqzBE^vS;fs(~cA2Zc0@|0k^brA(QJr92PqSbJ~b^>$U!njtV|t_C&58oMrV3 zr7?JiYw`n;^rX;=Shj&hV@TW!GZoya+hOzlI^2oEo0e)NIZvcGXXkU}AlLhFX6<6wzb&Dj?uHO0=AwWCDD~#Hpy?2^phDGvp5D)cw&#Hb;vmzJiw>R`{ z?$hT=c>U8)G7fHs9R;Ck&JxMd7r6mQtTr;|l#)+b*|S+BDrWSHAAf8oGlu+lw9KR} zRQ(9*e7$pn{UPqKW^F`YZ9Sx_XjoU!Z(135a+1_|Vd;n1SIGn+q1xbf|6-_P4w~O}REsaw z^DiJ0dF@^6c^XxLxOc7sV>AYG&kCjnhROJ-93x|rF2`^D;w^SE1+3nPz33gi6ny-_ zAkVu1Z1Lx z!OtDU1Q=^nkJ|5mm9T`w-F4A%@M@cT0-{;WA>^1q+r_aybPE9hMBEHSqg>hHEu&lY zUKT+CC~C6aWIL!5{xk_#l#;4X8>i@TLdmF6TXuAqAp4EZES9UuN#BOp_gqfxgl!!) z>;xl1?(1hpkPy)UlZExR0`|F!)2Ei|bOB`t^`#T<*s4L|x7mZn8Ms3&FEav+1jVUA z8p3h}b8${Gt&pbZ>r3d&datzPV~{U5Ru<>F$A?hIjq&SG=8pue3$AhdJy7TKr?oD1 zq=WB#%}Ojq>~h)d@Ps2_`<Sf>8B zR1D?TaBlI?)37JC2#?-E9@Y|RUOF>fu2x^%WF(|ojBQNl7m51kIv9LhKM%YYT($mv zrn(nZaFW!rR_l>ecMxj#bY~f`1j9hrmF+*q>TUOPbjUH zw#(0`0Im<|RI%X=V<$DK$yy+9sm0iA7P#t zUp@B~*#LQ2*h4qE(VG@|%_WjY?$%P532;!o>;W(j#8#}vN&a(Nw`)i;fBO52QYB3J z;6E75#K<++p>7MKbXtMwNOzmaHi8Pgv?D7>-v3s(2oX7bwt}0b+|rk}0iwnpAg98+ zaQSpm$1o-F-)|tU1i$G?vvd(N1rk|)3*bH^r4V`~ZI}u1`-Un6EGY1XO{l|;L{d+k z<4Zf3o*YbL*cKQL!RE~o(D7wMueT?Id^pnte>J&fIV(cEy#%GRf_`nHg~2;D7sl!! zX|?FQD)B6NVoWiu3OptqH~a4ePu>d^?5gd(Iu=dNyX&lw*IfJW#pc~X3xU7SAS9P< z;x3v~l_e&Zode?Um9OVQ{C6fjj$eF#UE(vN-=hNKj{G*H zc#VY*WN9+j|F3j%3|0EkQd@t;BMSOKz?m3>6Ed2OK%db>2r8YBj=xlKki*s`BBG5y=eup+izvZA&j{`}253fHJR zxkOEb#ZDG8i>|>KQB{jqDf9b0E9zHg55F!1(=y+zXA1VL)a*TE6 z<=hbu*~^}*G{D{c%{z2}+YPWfVVQH?xpl{HSL8PZu2DGk+7rRVT)+T3tWffO-p(5a zFD%Sf%Vu&7A*KQknzA3Z9F{kyP&OZvXT%hiTH>{V^T``8OruvQ^tllxx2r`+pzJLe zobva7vH)ZzN;>V{6eMqjVL!}cUm1`!$nAdimIG5(ox$0^E1pHEF-3igPA|ay3XyGz z&uN*g?)L^ONNpMbJv=4Uo+~YiQIFzgdGSwsqGgUcr!TfEVgK)k6#m;|yI$xU?Bb)J z|MTd!V^F9&XnK=ajzHOVH3>N`3quJyPrv9L(ZxPXwKxMf7OYKtdk@iYHy7`^m-nI4$Ik!~m z+-FjgfR3FRqq^kJis!wU7H6Vo?g3yZ`tkMiQ0@kq)3=?G$;W^GHJ?+Y@9-yjC{F*) zO?Te+A1SOv z#!Q#LzQRYnH;Z2Aq*vQbLyAAbZs>bqm?J{0v1ZzF8MvcEUDb^2v zE|Uq|h2Js9g4eC?-4CagPjHbO96RS@aaM?X9Sy|gr5R&?%XOCWo)gLMlB;{2B)0ollHtai9(kFJ=ZHi{80#Z#qECt>Ff-3=`xQeJA zgA_Km#I>3ZrJC^yGlDRObcX8)$aA)xXx9(=!=JujDTw^%)35f!^8~8m#QH+q`4DnH zG5kwKSU65U*PsYSHFE&u(KrB}XH)se3!K%x$7&9I)%)MG{srLaTUo{@%r6J)rL6)} z#(v|x+Wa3xRAIH<>HrMAXXHCZo5Cq!r_3LO0ltQwq+fLCVhwTjvMf;AXoqD(w$xSJ z*f%JDyo&xWjI-?_%YJiH#3#m>=SicMB>!QB4x?%;d{B`?27C0*!mhs_I$$YS{O4jY zjr*su3zs61dmq!zYX#BfE5JZP{|E7Q?VSR=lCxrm-l&`N1#;?`k@CCt-%o0j3lFF7 z?ErMm^<`yeeeU|AE&21yR5*@TLQkxbnUEg1Hx) zerM}Mr&Ba97QXCO;YpqN@Fkbh(0|VrJ910mTkFe>!dH)Rv{f~`Yes}VUZ*@C&fN1{ ztfLbuXh~4c6MSdS>uQ5hH)0@5X{NT~DhEJ8YUkQgLG$OplkXdEErv3h)&D(eNi%js zgw|Oko)K5r6k|x6*7r!kA!Ff+mL1d|U|_6s)-?f!W9dIKonhkbT;fW+Rli8qvn$8@ z3yl;+FpB53&=wtns=!w`8FEJw=1^)Sh?t5GiZXh3_b(^@e>!aXcILAMlm?*K-Qbqm zC^4@8?-K6H;{h;k*zq!M+;WtO4)2d<^|%R-CJB}KqqEkau~=m6wonxY`TsZjWxHbr zrf?TIMTD?o%TyTF2u@ibr4injmwF=v!S;I{lZ>;X`>HVc@JXozIHO$xO<(L+&(ws9 z((a{rU}Ixy#vlfuw#51$vBWswq;)@D2m=+H!|~OcV_wUySjlT9f#8~|Tmn0Nu2*+0 z?q5uhg^?$o02=*KbFJD{(t<~)rz&PQ|8(DzxHWHd4uBY>WY!>POyVSY7bLodp1#kD zO?N{pKmfB|C=Qg&sH*VVJaaOR9qK%8IdQohX=%5s`a$V$N?s*_>O>G3A=_(?pxTvV zdidddwJOC~gieIK1o^hh?qBAn)N&qk8}&`PD_@AFW|LC#AK}E$ICx0`wSk$krgea^ zEe9bMq%{^bX0FJ`PqdQe?t<oDnJunHIvk|? zZaj}C7Ld+@L~V6_=F6QvRdtIMK#z2Mc4FV>PKZ(A!Q(G}O)&wzW0m06zTbocC5fD~ z#tql5)~bHKc)$g(R&fv;_x>EeAR#kG>N5O7{eR#b?Lkuh!5t<=tmE6F7|5)>jW4zN zCj9#Puj4tS{)1u6+=~y`U6j#G`$ou0Nh9e&PDlsGYP4zMshs@>2?Gb+iE6B%!{exB zs)_wFq!?jL${9i_OWcc*9!nLtoa;^I1}(oPE6ToHT@xEm4jF@$8U62sa)5iv`0o+W zUAQ@r$On34fV|;ht>4kq5;|s>awN6|ja{e{sl`j%QdKV*v!~8P^ zZC^J!EPnr|$#rzhlE(*q5ezX3K+`2_qR<}+dJc6Dju*{ zDrB@VMoe5lgh$Ec!FmVLq z>%Bv$rg0fs@V>KDC=Kn9#^5fi_hxA0mZfO$z5j_Mgr$5lxO_LM z{=uru+O_uc`y5MlmG#I4CL2MQ20m!(tL2#WhVn?ezQ=_73x&(YL(5KGWQ?hNWzj>UA{ODkI$@39mvMJOl0Nl5v4+7DJME+W}Y z>n$ViD4I*k*4`epT$Q;UpT|3uOlgc4V(bn-xC=I*K^epz0#B8Mr?|HR z3b#E^L)05O4SrU}4`hTF1Qxz82liL@6;|I%c)aR8C2h1>%A6R8j#i;|JSWA&6jw^K z$X3qc9oLFH%_v=#A*9SV_c&D6cdUsnyZ8{pa6^{kG81&Qy?0t)#zefdZn{q~u%=AA zY59=x@)o5(19Js={{=SBprzo%{^A!(C%o#=6X2=BKY)e-Z6Uo##^}IZmEV<}&wgh~ zK5X7mE#BwYMaUj}Zp3ve;Q+2I(0s8!pdyX zbjX*1KOWbTP!z^md9uD>Gl|BF3SNByDVb(@-6YliZh-5p8Y%DR&$T=ciatxfYXc$x zywI_X)z0J&R@C}x{K>D=Y<^b{4`7-rbfT&k38_8Hjx)6!MNo>epZB~Lq1p~tVvFRO zz(i|j|MOEpbg7eV`G~~1GYRT@k@!1D?6#}($}u563kQk0#OJE5#P|_U$et!q`@O(r zly!pe=}HDM4t^f|=%xG0)|{|lEaQ1qfZs)+J#t=V;5K_kWI6c-$Zph^;+Y~{_SJJ* z+Dv={piF`iOv&Nhml((${+CGpUTxRn{+?gyz@+4EN~kgFGL7$j(%UhCKLho#_Swp# z_1`B-jY6Q{CQd0J6U6`+C=km=CcwW3Q9M5dcS7GnMDs7jJkXn?!yA#?Uge>UH#dyy zkv}ongR}Vo2%}Oemvd#c!v<{a#Ds0v-5ETdeIy2O$`yZzCj+H(7rj~oSnelYxYLX8 z_|BiiX`6&>SzI5ll9@@cK^u&Y%DO;k@8O z5Y2Nd;$O*4>2P-|jzS);R_D;7kUTm?ewNLKIje>&Y#xwwWI2Lfo9FaA9@`K>DJh7{ zVEgbPctG;%d{!LHH{%qTsd@tCh`>UP+g}ZJidDGs|K6W6;Y_KKB6S z0mKnGyN{zGlJR6xGOLHPIhPhTd*}yB;XFM#IqN9Ly2+piLJ&bYZm z0~ewWlO^0OAEK$cqi!`;!iL#L9C zOyBykuM95kmIwL=kZCWi!3?;YjrONce&!H+w*OflCA_<*xIx}>n=bdMpG+u$+^=o# z`-pOG(O0+gbI5q+fr81~oj8+ZQ4LuUZS*q}=FyS7` z&qwO5SqBr%rAh%f3SO=~VOT-W zQiF*^2Hu-Tg_4u>H?%HNi1%fclHGh9>Tz&odslr70kK|-b>~d)J@o8l>S!GJ0WtOv z2F#3idaQnAV>8owCT593o=$*IdVIPD1nwOPHYRKB2dgXo*MUH65DzcV7ac$t^8=GIlWDnB3ia!(b_Qr$#c~ zIjy;47c3Ppqilv=zT+N@=T{p8K(`_H$Y}mdpW?62=nDA1YNkWimjY1zQyTsswvEX_ z&2tBY%eB3|liy&lrc0G!-I9>kCf@F}qr|wksda7j|_+Qx4^s`<=r6a;E4N zQ)8lh|4?8C?|^;NXZJJzCwA7Kx2k%2k13a1d`$)_UUf@1W7;x!d{{S#06Z? zrJc+J+=gWh%Xw=Kc`rDg20C4w62D!U$P~|MRxGVw#@j}Vxh5i>!Gw^{wG&b~GT-J~&KJi%*31S9&1AFP=Yw(I5)2q$hxffE^_H0Hyzh(BUr2u}Cb!z?zsoSw z8o2aAdra>5bGIy`lUP$mUb`c7>2sZr3U73S5ablX z^7m&!S>iy-4uZYHlwIIKOyo&$R-CI+8mD_2$fZ)8051{t&&wBj%`2&`T+bgbhv z_^Kvl;$cH9MS{8ysd0zBV$`-~OGUQNJgRm~p_QLefZJVVPr{?a zOwY-WFHmvUut56K|1l2ZZ8IqWfpA7QH|<Z`pa&(DalKMKKVcUG@FUY6!TkVQA8x zl9uP>nE z%Cc>Ij)8c8Tm@okeGF3{mZDU;yguzoYkx{eO+#zR*&pvwBn6s%LitM6p~7JqR8o{ zkqVRh`rUCSNrtvb~Lcb(||^s2s+*1a)+8No9SZdug{ig z&!>C`4FLsV8aFiLLrQ%Z8LP42N=o=_+rsHHwlgD-dFvC*8bs~9et(U15^pVXW~hfD zm%DeZROy?St{oW4x5kd;3Y_9(ZMmz7YFm=iZ;|t*-xdm{GV$j>alK*c_!M_RunmB3 z%$>Y^vtmOmS&z3plr2T*Ofn>x4C6ZcL#Ua()j_t-e5hQm#)fEqSm#% z4{6`YsB#Dyp>5BsnQr+yE9SsOUVhGEOcsEA8r38&TkG{;N9DOB+5ss1eDFf*h(1Qc z?v$R$gJM)WAE#L)%*i&MGQ7b(OEXAo33zcQj!NTo*wd+QEvji2sJb8IR+NJ8+TY-k z_nos?`cJgY+iQM+&8IN<;N@iDmB)*(`^F&%@h)5P5miKx9)E4=APm8UR{J&ZfJ?GA z&9+Syf7V>@6xhmbmvUYz{tVXI`M0IYCYm}4{V~25?*vRBR7BDY{CY1exwzOVd7cDn?B1fxj;Z*Td+WPDdp!dsVA1HK1qq$A=YAm|i z3syB8mi1az16`|o*K$3#EfZ{pp1z-=mS zwoWF6I=qj(&-7u+j8=Y`jPp#6o8Y^8Bf+s??{x|mC zGAOQYTNH)h7J|DZxCM9VK!SVlkl^l;AVC^;2?PsHAV_cmG%gJd!GpVd&_)~UzMXG> zXP^D5UY&Y%>+W;kt-3#`qSjnJsr|vlB;u>G8t`+1$+b zqmSZL5ob&_+H@|d1?JX!g?@CJ8WqOaszt4gn;=HncC2} zrmpcWNx<*ND~U^617!xXUs9KY!wQw@Xw(HoqKrx(Jqstok2Ulsu_l~b6=9x1Lub>a z!|TE1$0Ku;Vz(5oFtbce4|?k}9)3KOWZJ@(>kpnTH_%+m70_7dxS26_8QvQ=IZvcNBV?J6S3xWCIhkm}pUbrI%7OBNn| zsq)w!^ndU;T}mLZAm@~^rX-K$rS$dtxp@3Q2yn~uzppJftW7T0b5!-7904`qZg1PV zu6!0F{hknR9-0vzBg`u^FVOXY39;^)candhQ~Fqq@(=fryXwmbe0M+s9992QiaW(y zH`$Lh;vCT2-lxA58T_V3awI9L^mx37`TJo{@cH=Q_>6QE#x(zWQ2A ztzT0+sQQ(Z)9k+m7-Xpi_5Kz0Z|SCf=ZjzJXqJwj(AlOWXTZ*yuyArIcL?`X&(}{$ zpHLNMQu_=LGIdQP-@UZj;QOA(#}w7YWO;rrevEByPC!vA{YtouH}yR}(9wIN;MBLw z-l&B`!MgY-irV4b)X?~5MnzDbA7@&%r2w7w4B z>rN04H$Kq;`=j4&$YAkrI~68@d(&vstuy{b^$|T9fuOQMJMzZq?aezuQ}& z4NN+sYY5o7TwE4Bk=pvLA6ZEZ#|bxfo5sb>F_a?o^YgHZ2JYvSw2aq;TvN_%UV$wo zfSv}M=JftN z;i|#g^X_B{`)c(8O+Qc5i>CYc+U8^XqsEFi=~+848aUmO?We?WRq~eKSyOJaxp$0O zFb{OL`tIwGTg2A`*C#Ep2?rYqGWlebJ?Fcl6t_*YfI!SfOS^;7&`NQqRwX`o)5|1b zbkb}sR4!|@T*M)49E;p#H3EE87C#p0w~_p-EE`tcEq3B}BauNkWS8NSH62XW*kp}E z{;5o?b382@@VnaG!cd@_L*G|9MGm7P1!=yS0)FF$1YC25+br6+WHqmmc4#+vDF63X z^@>BcOJ$zXrhwxIMf`M4NW7j2@wY$tk!D;xY$BxJgDdjy1J<)k>JzE&iu_A7aT-LK zUEb9+bmz(;|6VvOfX{(H{AMDPk&7)(3wtOF>pB9=6ZnJ>>m@#4M%2*XBfnBKxnv%O z1*`Oi(yFY>(~is7^6HA!?yXO;D)cKek7rGROp7zdu15-qaP7{r88cqGyA z4)lC^X?{1mGT>!g)9AMM*DtXi{vtj;M`g#5iflePLG93xr{Tdys&9HcvP7JhEEA<4`?;jL ztCI5$_)`)RStb~tH)~W%xNK}iS1EsHJsuHVWj2$@{Ib2G{em3{Pt1z>U*Q!-)pmUX zcU7=d;?#M7vEyGV5WmDl`|%H6FbTy_;{A&#{ymdt_K;T-oBBL|W3PL`v2+!bu5R{@ zh1TBb4-@U|vezkJQ)V<22}fs11w0}p?r)M_E7`MNOSgu3ZtR~59=`j1ve|LFPvVm_ zmUcY)ShKmV>BQ!@5A>mb39WhsyIFNfMA*x%eY6EHP@|*l>%{oh31NrR@8N^B-6Ji- z?ex!IH2Ei#N;IE(ac4~T1EYE;>LcnRXR!Mk4A)SLh0i#Yc-8oZhYQOk{+Sy;_ z&y}tPJ0~1A;l(2ghD<{#8kD;;vr09en1IbM7B*fw$}4ugB?{0BYh>`C*XJSTSUuN;t#&-YosQ^EWA z2fLSr#(77_E3{BzQWwo&urmc;7(M47#gOWGT6F$k+aq-|^+Bv7UDBfSbkZN)_8t02 znL)Qc&iQ@JVheJwJWZh*lRCR~KBi*t5Lvxuo~@F) z-%s2gcfHx+&D@otzZ7xGb4NL%3pkiuUN$zxNB0Gau7mhpRzhx$HRcRj^cz0X&I-|E zU(dH!ui%4JaVm;eMlU0#zxw2DLx2u@XjOpM`FG+7eD<3CY$q}dwkT6)~ zn})Ampj9u)P&JaC5OwB)8QZl#IB+w>+~w+41MlojYdfdEJ7*q(P6Cx<$$65|t^kbl zMZZ~_Y`QH~ShSkLy#d_UG9hCcITB?I>`Te-Dlsh)2ZkW$Y2v+LU$Ry7Swn51p|ux;ku5}u zOWs4xF{98;5JR(TE+08{8bOx#)xwCmAS9;jO|A z;w!!XYeH;{by65EXR!l|0t*^3-RmA2{jUEK=l>RPcHv;>iM3ytUE6>2B@wws8Qv>W z&*#%3NJXgmqX<2?hPvFb%(`0qdR+Y6?KksClao*@R?9KQw*=g#1gd5xk4=XYhj zFi$Vyh`U+n=hak5xDhj|bHWMGe+Zay9T<=%WL_-AVQNoAcSAf|W(sw|$bs$~%~hwI z0oO`4FkLMgqX7vH= zU_aQwNsnp-JY7EMp|vQSzJ5N*+9mS8w!?7ESrS>Xkh$Ay;8IF z)6i%X75kUY9A}sZrXSg7M?;R>`sSZsQ?h_)5e? z1jq|v4NI|uTbqHp?IB7$+w9Q(56f9jg?)+m&ZV`NjOu@BwmQ>apeOa{fT#U3uKsm8 z6BlPWYYTA6_c$+z`PY(S8zvDjVeyB|J|Vbq?mj)zNn2|Ku)uOK?8DG&%weq4U8(`3 z;%oi?R<+3}E1cZ}#x2l;{}^}uKMnbxMn!}V5S4rOX6roBKg=$?&-te3>F?9cSn(HH zUywE%1?*>z9#M?ax0_l5ipei$LO?6b;Qf+34T`pRtyf#={1y#7jGX>rGFNk!Wk2T+ zf9?#V8N1*(tFrXdAqxz;KRa721dSIevYkE(FM7Q_IMC$2xq*@ixxXsULQo0XFbg|Q z7s+fCt$2>L6LU)yGSHYqHy{J;*DL8xbpw4Vc)ccG^VVe@EXw@}bOQ;Hp94B#eBF#zEu&1)dd@XvA@<)OC} zC%`Wh&{YQR1^R{jr<&v;*w1qT`h$v5X&i6`1{0g|5je7u#BI*qXPqhs>^utC(8>0Y zYI2&^mj6yp8{Q?dTV9l_{CI1@THCrRP!!!D(e3_%1T zwIGF+cM+H|fgv{VoP5I&931{;cv_pzTe*1bMvq{#I6>o9W?DEnz=MOLN0C6Arh>bO zmTuWMaSL)yuWnYYU)L={GT_-muUvx?Ei=}!&i-@Rsc!j9O>`p1{oN0%xu3Ji7_m(Z z0F8T*=A-#st*s?~@kcSAMP=s2jq2Xx5SaA!L~AMECE1|Y`V2gy21ZI6zZ|Cf%oe$% zILpuvupGT8{jvq@J2c(D-Fz$FJo{8=2kBNVJ|N^OHeU&sW2#6Fm)n#PqMyqWxpAJ+ zJ&T6+KM1fkZ#8~?U*SPkYZxouOo=-+l-2`VGkokxWzv@ZHqhIc8sOwCE3YOHB1lCj zfZPs97EbV+{c?tVGNl6i&vv4M2M9FZt3|x1HHF5!P^gzXMd{OLDg7CjY=&0H`QwLta)FiE)!U-c+h< z#u+DYJk+z;yYou-w2znm)Fd?EYXE}fxz7D-^gLmot|7AZmiFwf+(98Yo~tvesX+J! zsNPDEQyGT;m$5FX?Eq{f<%04B_&Fhu<5CizRBBD4%vtyksPZF{aWbDWma4_6K+y$W z%gEKssiT?2o@<^Z0}!X@7S*|d=62aFam=|Ep?2ZL6}K0AO}vR$8@PJeD7r*~ zNk$mfq*A|IPnI+!kM{;sI5zv@X-*tw!rDKsJt^uXyIpkcL@h2h?r>^u-5ix%+G7ZO z@4a&A>Hsjm_c5>>Y6{p=YnPb4ozgMqU1qaa4Z&jY`DAvb%idvuo*$Dzbgs;ngA7tR z7SF^}r2oKtI@WsD(&?~-$+9$9Fm4{d91?!cfnyqQ8NX{ms@-EaSSHaA91BFv1HA5jbTlKDA9b8FFE$Z&t24$7L))($* zihEX;AJ>KZ`p?vb7bxGOzWl5R5#Se7B>sc$p9}sk(OIwxl>bendpM`=zX^zsJTlm0 zU$9SWjuSQU&mueY%%3UUx6P3RUPbt&RmP9y?dZ6#uN5P^C>AUzNtDK9PzkTAdf&V& zX`Re`yY+bWz~^JZbkNi57Grb5@gs-L*sSa}6G>{_>8zeVNFQq2HvTdJtw89?fXI zB-arBPqn-kx)19_S+|Ar*x%!snNI$2wL7*M%L%`$YJPJH zE}}ZMcz%D=6vyz84{~0&*7L59bRK)Lx7s$@7jaaRpBRe;shq);yo23iV9^jqksP;gYc0%h>eNskbeDFkpmMCcz`P^KWW=?Mw<(z>_;qz@$QuzX<=gl4!ZGr9J+EFRhLgZ&HT;!{V&AtWPIF>r9Jo@ zHqwt{S{Ej+NVW%4!<}UuYTfaR&!?Ox z3{(Mcui470h=)*yt3B={wShL0?@EM*^;W^5WQ{Jnw|>o@iT4yv(NMI*-No^oz+RT) zElgV?TK!M0lj|~@BkN)^x#QtELOe7%2cvcixeH47vk`#ZWB*!;Al2rWZLSGeLD114 z*niJ+blGLL@|wl}HtZ<&@uyJTf$mYn*vqAyB4o zr2S`CdG?>XO8cvDV@GqJCH4vHkJiS!&~6HMH!vY#IGlOjP`4?3B-F3VIOKz96~e-w zD*j^s@W-eZA>d4@KJS~^*r!2Cn6BbE)ywBf1agw@t)*{*${-Xe(Jv?4f$B6EnWa?Jj|L$>s-{M}$aPn+wJ{ z;c9e{D^5b^9$`mR=wwR7IYl)Xw<{U$cAIK(H@J3)x6X8#dyWYbbPZN4n*jl zG5p7qNm;DG;ztGD$BHdnn$6u@>nwJsb@__iQ$KDqUi{8&kNzeN_`P-a+f8A_t4fS%7v+O46AU!NDX>C@X44i>h8?@9fqFsrF z=Ye|Y8}RI(LQo}zy>g5fB1`d~_J;KP+1^E(6}U3O2ZFF^B|gnk_z}>l_BQ z!RM5hIvY;HAKpSdQYq1PaXRBY5fSOT4`TaVcC9IWiJNoHE4ek-&YA`vdPbPh$3f<&qHw6`7)pAA}W3{x!*`$6L+*T}=vFXnI*^Q_JP= z*TK8+yA$u-T)!%rmpJBud-_>GhY> zf&n4xd>Se0OB3IQmTaa)Jb}aTo)7yAWHC_&IwVwx4HngRc`NPvKNOlhw(Z01PNTCW zVy6{C+TSk=Y68@MmgIgMY>;aHhRC7?H4yjZ3EDQ6zbmOK*C|hPoN7vHy-YJ+Uu}RT zVVmT?;BtZXsBfHZTV?4lPD#V}EL~uynoAU>fyZVC*jRo$Etmw+f>|B4{!}jCHwL20hvK(WSfIQEz_6qFmQPgxjK{XVW?E~ zwycgu$gai@5qh1^(6hWDAG=KNcewso24*%}w>=P6m?rV7_d_K3Xj*0$=25r6r!U0} zuiIuhp2>18hRwP|4k&B!+W$S6 z%DcM~BzA)DH`|U_xvTmZ|NAjwe7VhD5HupPxCaBkw@9urx0qRojsK$-zy-2LT7ka{ zXxZzSc5gBm3z|+^E`VraVAbpz%@&?)KfU)nj$}}5WiY4RWIg4)bbfkuwdlILP%qx2 zS5_x8hBG;puzv)KY=J*8xC256=Z0qJJbrao*-!2qe|kyi<-7nM8ZeWI09U$i%zslQ zZ|%pDS~u~%A-g_@xz{_~Kgr-?%bf!|H%yCEJ#Nc)ohTYWR$#96JFjd5^IY}88gc~h zqllQ63Loy8wO0d9?7qR%gx4jUH%9>{Z$anJTRA5_A`81oIBM1NaS7pKmv(cM(x%GPiA}^0D8dOoI}NMB2*1OL$%UPX`uiGIj(|d@zZLM2S49pyuTf3o_0lPh z_w0OtmM>a8e6P}ocYY_%o^V!T{FB^lVhqTQtr7MNntk(QX-BKR5{;LCGqGoa^VN`t z`~8RYTT7z5NaaFW^6-P%rC^!GA*G2;)28uWrib9_NL>CA12{eqXv2DM} zLDv-rbuW4;)~CxJ&jNlo;MmSZns!c0V{r{Gf`{(jh)DJ0PbmmF*cr2TxF5{V^#eWb z#Sr}(H=}x?dh^reGhsI&7^h-^jli$`V%DioKc&vRzbEEVjrX=r<>-sVpK~HT&kKOt z?vgq<&72Wc2>*F0DmCT?;dxVXYjNQ_Ze?I(JyUuAGKo#%&;&Ov@db0u2S~;*&UjYw zl3G34Vh<5Q@;v**uVW?lLIWZht~PpUksEXmoOq4OO2wKz?L~7A{32(H3;|&V+3?6` z8DIS_56f;T@_^kUPgib#_#-`h9B|ewr)2J6X&dKzU(elSf=xZ6PL)13&WU!Gd8`aU zZ;jWr7V^Ih>?s+JdcHY(ESHhJ<2I>hr(i|~0}*~4I86j-TrGPu+}A1gS4;WVHQyb# z4n&?LpK0A=`j=T%w_j;M%!<|=r+3;FXa-k&>7{*6&EmyB{7%_w^BfzAPi&UF%CP+W z<;O6+`--3@wDu+vXvYyvsL}L`;E~seN}&y|!UQG_OaHtxmagm0UV*Z02I|uO>fl z-OOZgIbcCcG;%WQ#h|a!%AD|L2`=oa>)^Ko-mCFn^+8J><&Du6U^xHmThI8b*q7l! zy%_xAs>$i5f#UQ2u{<7|Lq5N!%073EIX6^V?|`Mp>+N+Mczp&i{A)kGB?ypgzqGdk z@v{ARcWZu;sQhqyi5hBs&KG!rg|Uf#uBUU=BrVn2UwK4$N3aqdN@B8VnJAg|V2Mu> zd5aeyy_@YxvUJTSW2oV1srG5YF4>dXICVReZGEoABF@bEJBEt0W%%~YDz=Hy;cLa<8Ln1DC08GNQwC5){wKM+?9?lxf8sio$V0@s*e7K# zw3^pxnPO7_@OqTO?hwLoA6@QB*aCTfqR|`o1m^M(JiCpM(ii1*ZI4pS4`TRnO?FQJ z`(<>A+Yov?_xp@kvfWa*#?p5hmgRN+r_8iSLWn9Z1oM-6fc-tZt7zCwK~Xo@uO2b+ z$i(7g3T6JygJ^NsOhO`dsbb1lNpVReZH^$|$;q^!=VavJP6N0x{9=aO z(jt2E{7ZjlKYgo?_I6EQB-M5Jn^b1oJWr9e=CyMs(x5Ha&2C;kd;e_@AwX`5YmdJl zc(Wb+W{L`#V6>XBIhG@t=*&k{t~-z76n+~%(#tBOyyABfu-c&;5`1)3H7B z+#Wn2F|9UP-U8$CD!H`Px_bOBt1Nq=>>KiEnnY8IC=wG#Utdm=d+C_UY2kzQ1&ek~ zCYGD5CPax-EKP-4*x^+`DS`Y-EN=&UGyIk&nM6Y@*dxM|U=taG?4yl}#jV`pbslcq zrL{iOYWmnUQTpLMHynB?g@M~tBOwvPAi=9*}2+rce7z%hIGW~m5m&?o-ckx-jvU4iY&P?O;{#1xNoxZ^&KP!KHRdTn=Cvn z)32nSH^WexXt^w#c=~m&ygPK&Ofv4g3yqLDaEQl|x#p{A^VNbQ@m#;&=1I-NW96}2 zY2pjLa{Z&CcdcH!rOpfWRQr5^^L5+F3_yI(JhTYt;^Qdt4}>zOD2b@I+U>XG$|a-k zk`?KF#eDB;`#;MB+%hjdSo-TA3HWQRjRt&W1k)A$u zk9yRbYTYsc?ZcAz#DEMbUyr)|;;h@i6qU!3VD1W5&Ew|l+^rP-64sh$5*#|k?_YQL zi^qgE<0vi|*Mv12$vvz%gO*FBM>~%i7wLCiiBj%b;#5#a2D+3&d0P+jx}#eRSm&a+ ziWCdy!aODoEpiO!Y@Q5qTGF7HDkbvO;%pb!_Da;&j3AY-PXwjo6TP_w$)LJr^al&` zELryQIA;|9r_$hF)|9M1;)7nWNT`-R)glT^?qgWAI=g5*%S?2*GiE`BKqKmD_ysme zmX-^(Y6S`2BEFoFwG-MJ-aZklNWqd&njsr`BUM=4;2dTb#RQX@lU%bscdZ7ulOVQr zYArz!Z;@ig|H}evl8elHFqK0wBfsV=u9KI;Z$bgQSD1O>?HI;!(JV*gHx~a`rB%2h;1CCHH)}NvPG&AANSPt>I8L@lQxb{RsIE z0ER|d+<%G{VrwQEyfUnFj=9-g%E0F+k5i}ocW{NTHuYC$>o+&9g%l?6k3XVgov!!~ z$qlp&bs`GfvK+pC6_`=&?Z?JhdzwUq&U+9$Go^#BODk|MD^(TcC}_5k<_PHMMdo}L z^YXP6^#ER?e$%@5J>MB0G&n?{V$pU<3varjl*^4{sy2M)w^UH2EdH%aHPN&3y(4Nf znO^$iCyggf;PXt!stCO>gO{=w#VHJ=sQcZ%H<6V@Q`b2Cd0j`}g455UdTou7#WEl~ z3D*;k>DJUEYfFOBOef~<4$;aTrbqFsn?7hYEpxeGC(8v_>y@+Ktb?8T7RW%St#s_< zd1=FWC#x$0Su}Ft3v8eoEICF({4a= zexO${bm_|+^+W~&!&CWHGR_o(Y`(UQnd)BjVSQuQ9Dn%D?v~e~CV(Rm)~B9dg9{m) z_@>Hsy%eCy3MVy`Z$0U_Bf;Ekhwjh>;TTPkfy{cqTuwKixI@VxS7Hqx9O(pa?Pe;B z1Z+oX&zN%8O1{UeV{OLtyFOUrB2VYWF{U7wWvhjH`x!N#@N~RKVlPTa%fVBr;Z5@s4EP7^>i4*{vKAYaDS;!Y2^2W-SS~@E?AW!p>BDkzKJz%M^2yc$P%o_7<5UNP4Tf!w`rJ zQC!@Udnmg}9NREyuW`$wC+}r90cxmnE@x8KEwot&g=kST#f;^#GuU?zL3R^07{*>j z3rzBV(-cJ$SGW(@Ww^SZWLViI`kkLLK;?_8va#xYKaTQYZt#^2S8vn?tkAy*gO?%? z;Dz~=fn_j?BcR2o*xDT~Po;`yVuCEgH6v!mH3J)T=uC}1Qivf!EFV-OGr{A&RfW5t zk6wJ4Z`yUxD3`Z!7+XN6l#O;Qi@H`bQp;+Jrk-#}`m*pt1XV$+0VBqjLf`EoR zzSHsAmglD_&P@%r;FnpD`+5eE8!qs2?$~^ClP6kgw9(+iu6qU=_b~r=1vYG7= zZ4m*@R5j zU(NaD1^iU}rjnYxKUuQdhji2qQAgr|CWe^5h`P!Il!b`*^OA%>6qJ*PBOPRDIi5WVi<%zHxxw`+-tC7Fv8fBp{vCo8;2LG_z*kt_|rKx@X ziXdB_gh5o>^9_%{Br*HKhCU7f*;riuD`Ma2%^}UzqHpL=kWld+mHn^8q~kZk+JznS z;iFjaBhgg?bRtzndBV2P|>Y zt?xFhqUnKa!!gnmlSF||EVcw(soTvI6-WNdyfjx-WC9&)MidlRgAD0?>4}Dn2#C7A zAKFlLt?pdM1@m<7l4(>g&yPu^fpH zTVV2v_wBp%t&en{sJRnrMgvr@GA{Xj%F)vgk3cGnA2j^Amn>vz-Dn=OD?APfFn%ht zFe-V{W+p)Eg@E>BxtXFyF20^?`w(s+GAe(;?{e=m$9C=it}+_X&fEQ90gbxdCZnQz zTBuX5-?Y(4TCo;J2J(P;)oqY(x;D<+&^|lEd4!)IaCfzUfNp-vh!Jt8-1DuWLr=g! zFbLYjX{^eIVkJEW0h-qC@#;}q*Uq;%bWqT4(D8x6LHzztG;7w$jh!?)9y_8N$5w1% zkIZ-b8$P|eG%)Q^F)|Fs#c~{>nseXmPXf}Sx$r!F@fL^2nDMO}s*_*$$ehclp?~`H zQjT}ad!(5(ZV}cPIq~rl)U>Yx=hI$qM-?=? zXPX#nV9)f-_?K96}PZ+>7q&_doR-1 zX$h}&i$x68Hufa~@xWN_7muT3JXnYfRHYbx@8)Iz0q$dWoJ^E@pDbH*sn$9y?Pfu3mAhWfwe=a8vd z%uGu!%4qPNIrI`J|6}=;89*fkB{R-eJJm9e-d{^Wg4|qtPEJ5p#F;O&I)=B*@H0%e zi42Pa?Pw1$(MR{)OSZ?nR~ zl5MYjcech~lgG1B^UDjDbPLA!V)b-ba7;HZQP!N*GlO-*5;*6D8nC&NIRtgE<%@dz za1XdY?a__ukpV^IfhUuo&Sc^JuAAP!=al=!-tS5FRbQquziW9v4r>CH#`8iN>=KQs z3f-A_*&NaDcFU+K@6LgoDxbB9Xz@nT3Jkd*^T>;7nL!?Mm+e~yKPR80soR=LaQ!M& z{==2w!~JB8F?-lXY|q8%yA&`_9{}YlXPYSt!CHgGWg)U!@kS`H+qjbuK-x&`b2)7= zeG!2Sr#Wt2bZu9^oQXWr%$n5DG}qedn65TRyzL2p|M}$6&6t>g`maW(`3ZCouFp}! z?A|Ux240KFClHr;Lp?13eku)-U+g@&2N}@Vvcm04aunw5%F*rIPuj1O=<@)`&`#%> z$S*))_t?jKXghg{daZEp72lG?_2k{W$eT17wRDJUkX1FXF9P?nAa0gQTqv#?zn3!x z%0VN-lG%=Y9hbM)%HLR?9|5`V>8;g~+Z}HKAgICc#zwdDcOW$zC;cP<0(xTV8o(AZ zl4!45k>sp4WXq_Z#E_R&eRspl=YF{7$q0c)qLWnvMldB=gv22?)%F7dkTKJ6wuDhe z3nA~XjWHV!24eA$J55`aG>#P@a$w0$P593Mg5jDW&;Ff%4h&Hj~RAXuTBI0P$U1kRG+-+=5VFd&@|4*Bs&i|Bpl?us8#C);UhxlYM*GXEN}qPw*i z$yL;7-W=EY@88HNkVlPf_DA8cg@_7SVvSNlS*z20**EBmWF~3Mkrt@~hW?zCLAH2D zhXzX)L<9!kH_VTUf^?8^6vFUVKHX`wQIK>q^XHq=F7Xr=s>ut;ra++=NS#qok!=Jb z;LT9}mGbH?&E54|i;CUcV7+NKr4*N6fRa--c|hHxHy3g*8Nw}N=O}ngQlwy~FO|#F zDwA{8wk_)0UuwXi5^sIC$+plJWiBr!7dnG^+u27OOg9(^a$4UPOU+dh{|X!rr#O2X zs1?WJn-4o+bGeudWovAi1M~36(6(f6o5j z_o9+7r1HTU|JFKeEd<%x>?~52Xh;Cgvq#m=BDNMr!kPQXI<|qUIxasx#4ru&#GSg(t60c`D!eTGx=#E68#W`G}+` zA+79!nBgzAOCo2%A#>5Ij>Gy%ZSrKgC{B#%@cYk+xyg(S#zQP zwEW{D1Ef~(d+C71baWEf)g!6$`(JT_MGMZY zX?Bs8Re^m#6}O6}mL?jug z__LQv(enmj4&IEzR0C`_Hk9^mc8?893u}yUS#4o3^yL8AGr>1zS{ z9l#E5e`51TT0D@OQhNAVjPdihM%P&xF#7xCpZYjwpA8*&HA+)26N2Yp58kURG|u%z zm#BB{_)hBXCjr;23;{w?UnE_t8hgcK5L$}EWPuQ#KHxpH|Aj$JUvPQE_Rm-#gscq% zx3Wq28X>-$T?hrzizd9aYB{Dup6FJ>kXgRM#>B$E-rpl{?u;$*sRUm2GrZwra}O^* zV#hLGn-HtB8dfmxb6v!hD{a7Ga0*6VRR4LT9miW1Q);DTXLTW0gYi6`Nwv?36sc4? z3U$bb^z3tw()*HFEJb1v1^4GM#`lu#Uj4|g2LV@{D~hif2(jhtw1zCi-pAWEJdT~t z44_{t((`1K?}LC!b?IijEfuh0Umff+!^?J;&dD)#<*#&9S<^Ik!k-C}xDZjv50C^Mwst zT0X_7Tdm z>@hS{lWPI-(~sU%W;P^$8%ncZ)X3>+BpSzE*Y%&@B;VA`ME6_DG*_1Ouw_@XA;f(C zt}f*)D!dI@%+V*&_>FuFGKNh1aI@uz2I(275gDa536su|0r4`w+H(Cd+ zSMK?ULDxHE+Jb}cSlb0&r}~hzzyDGMY5=ka(~+kh15X+ujR<#UVr^%#m8TC5jS2Ai zQWc5=joiKH%Ey-{SxBdnkz?P$bU>Ac#D~KSDNOl&*iTm0@@V5vTm5;K=6VatZ)PHl zUg{UFb)ndO$nCuHeUiFAO*8E!k_DO~)6B%s=0(vuxOlBG= zARr{TQS#jPFR_O7WBNZHqIkd^u!Q%YC+c*nqkOo|bF7nW5+oBX9*$ zMZQ^&Sj~KzW*6f_;oJdYd{N1osIw7p{OLeSONl~u-Wj6yP2Ju|@C#B)a3A9b#oU<6 z7XJbBW%zGZg4|a#>e%bNnNhPLFp3XBLt`8U1e#2l+F@`}Z!+v$z>yT2IEg*i-@DypcqVV!yq5D&wxG0&lJAE?FgsE7hW+Kn5UjQaEU%v4 zb~BUJ3&w&&Utxr5p81O)1fy1@SNDV#tG1gI3jZBFSBbf3&SvEf*##|iA{f_>~&u5*y zqA6g*T_f7nR`L2br%NiIy9N8wbK{n{tCdvfrpnCTCzMyvlsw?dW*SK46YCO!m_225 zyjI?rgt05A6&B#bzxu%7my%h&+}jVn^+E*c4F_=am|iZ_OzCfpB0dUU{^v>Xeve|>QQ^B1x3c<+~e4}Sc>NEH9jC}`?>nOGx zbIv=e426GY)0Pn?M`M00C!>=<@r|t9Z}&^MI>_5-oo?&xwVhh~kESki+oCQyTHZ&bBkQO_KMv7@h_&?# zzsBPxTbw4HvN$g~gXkI{a>lw%7;&U7<058VBps=28+dleHuJ_TQ8-EOS|igx&y#`d z?tYh7+d){bTm{*mYpQOe@M91%{P87Ez&=2Dgg(LQ!PC1t#7msA_#7pj_h+hg>zFiJ z^uLky8M#`Ab{>tPD}#?>_!h&s_O(1U*2eR&^yqoCyYP=~)yJXG9lG{)7LmB%M_yk% zsD6&N9?D$_oLUcq3OjJTR#UfGhLDxMsmlnB+-ZWic26N*6Jwe(56ljXOn>5Ie$1L8 zP&L8#WG}-^UdsC82)kIMCgD4-Irhcy7WAoB2qc>m`2Rz8(RvC36~Wb1TpeN5ytRW$ z|CAz&KGi@CW`|7$jM$;Amy?NVrfW8i%v3Hu-O!r3hZ>eyyP;YNB`%M%GDI^K)JGyQ z`3I76$i`~eX*j*m2$TvrI`>}do`!+UWiqIq8;Qg3|2uNn^N2*n_Rmt1hwEFlq9-aQ za5IzvJn}EzJV9f;{r=e(4E~B|zZ^keglG=}#N0U|)Ojc%^9}D06=a9hI!M(Ni}?q6 zDGv{crl0?$oaG^4vUN_)2)*n-bmcoB3#qYPw=aqAyUmsnm@{-TcHnb#mTdd7}U1qk)&i z$d6g`+yLzJ;Z833N{+reYUl&jKtT0{I{fBjMrsUmIc+%Cb@vO7!(0kzn&OYSJ^f2i z8iz%zff~7#9=y_~^bxlBGPWHdzPhJROkb=bcGSTD_|7sq1qnzv*&Ofoon9!C_yx#c zLiLz@uNH~QSH#4< zoPa> z@%&sAuw+$}>AdEGWB8gp-Y+$|qu|Jdk;EK@o7Pz=*Kv^EpEA(>5ANPOs;MsM7giAv zX(|d*W1~uyULzu)^rF(6NbkK9k=|9Lcj+~B5P?vnmr$hF&^v@sLP&CMp10ih`PTRS z`>l1?{U>WBIeYJOW@gWsJ@cCxv88w!?5OVD{@yCDFirnIQsz3IU-AejxOU|vQxkPG zB$$g4O?&V0pWA6UG{BqWj@>^=#|e;_Ae<~jHC?1Y+C(t`#Z#V^nbz_T>WxQ3W$s4c zlQ^-V{;;ayF41N5Y7z-&P;x@I0c$bYM?Dr3HMX@a>5yQL(Hz;i?9%t5q0%ZKb<0=P zv^r5A$NaKeFA5qGxsZXixf*>kw;w!}yE)Qc20i{jHWP1^9T1j?kGZ<<1}L!-V*RNd znvIocSK+om9{sxGidQ!gAy3**(5^2|LAM(_xD04=S~Himr$|j-Y4m7+gI}1ZS2QX) z&tGdPwC=tkS~<{d7Zp%J&`14*sQ>+6HGT`b4fnnr8+GEHdHoSnf7pwA3GU$J)s?ej zu7U`f%qBWtq&(fuS52j_1rA#y$Kz5a6y1i~gZ=q5pRsoi<@z5SW=kCtT$3g?cv9+- zu|n1OMd%bfR5z3FmybS^TGum8X28~?XNizpkZR6Jy)Y+oxcy5qGnc4_|obM-9 zXT)nV-w>2Rm4`BX{YPpPT8UK!+J*_vpqk2Dr2{ATq6wpF$3B;Sd@s)dP7D{M!6P#)tvGEPXt@-&oquf+0dQ_f}PlN3VaJ zu%f)~IAdPTfOUD!r5mzH1w>68=k{H5t?f82#>yg5Zx5O0ubFa3Iha^MJXp(UU70xg zbl}Hr@^4#9-YWm3Rwr2M?57B|eq_f3Cz;=)ouuzFxDataV_6OW*DybRn6n%gzVJXw ztK&{`-p|ed7HygZv%B>=u=9Sqt^;U2_qf*!#rjet7;X8+)jqlP`zEtjfmGdZ-1?`( zpT}+2K(cS&)<|`9CT!k6nWQ+GJB`XrNNS~03rxdie)Mr>?N`@sQ?Cp>t;v_az?Ebg z(ipBiGop)%t+K!UPg*(uS(!1EMpMpw05Bk%3Kp?PH7>Si9vp1aDU%+q=Dg_{Zd==(aBe5B~cKw-M0 zk}WdibS)l?lxI>jAblfQ_~mu~$rrA-67J|C+AcM_2?YwJ>Pg{gV>tn5Mk4O4NcMcU zT#nF$I{@^CdGg}_MVsGrA>5IN7`5~~A+2?Ndb3T`lsgT5mqnDcEN81Zzw?DO+ zwDdXzVn;IN5T2`_&>!YmMsHqJ)FWL?R?e_X?c2b~AoR_M%oTT3qOpslH$PqA-Eafe zPt+{1`;OIpmL_Up=*q=rJAclFfj3`y^;>jxa33?g2XD&$WFO3nQG7dI`krH<@U9yn z-ZKKQR(*d*)j_B`6G-MP)3UDS8aGWS@22#2vDxr-BGZQoePN%yipG{QXIx3&0r~F+ z=jQ@nl^zq?2TR>e_bR;EsML-|lOUbfbLIU1s9xbN@_o;Cg-B8%XS+^``%M#Z@2+(> z>E^?(pVJX?2Z`EE7ud$FgS|wC0L0o zS&J`^fEtbBMc=)O1VW9vZ&Yqgm$Z{+$mL}S(^EOog*Qv z$%`JoT^oTTjQ2GnRJ`Ez?pdJ{MQ`J8USNN_BSMvdEd!xj!v-|p z+?PmL-r3n5`dDs+b?NL#pEml9KAIyJVmuf54pt=M^meQA!9$ucDv@Ineu2$iFdEo| zAKCDzRHP-Blglx~&#mjJ0!}d2kriNc`H)wH-hbYFN9q!8j{!BX(@ zx4VUSqILiRPH{e6C;J=IglN>fHM*1+KEph^tBm#d15E4r%u#3^HV<}+*{hbmC~X&Ti@nmO!BJcy`NIlLa`5L^AZ_n zQjlJ|Uq7ThTDZ+pn1G@`KRLMry{M)t3CM^U0Au>hm!ieOfdHExp6Nk= zC2-rUv!heq`us6YtBsHHaD=}rW)pH}^71VMA!(&AFMo--VOiE*E}V*nUXnRxy_vM% zUX^f@Yh?b2P`WohGGYEb2Qy2!^9w;o(CbNhT<;_hAt*$8Hlk!#B=gperO{z?;&M60 za-qf~)#dp$xwAI(I5{mO(?`-_~yll;f-E+zPoZ7HVNQI(R z#HvJo(EK*vcRCY)Sie`}w_YPjBK!h}gP7qRf>xGrK-c=%-yp#)-O`W$}31*AQ(PKi!)HL_0 zni@E4W~@)P;OW);1IU4qPhw&%Halu+c`?QE^5le7>d_z`R@amKw`ATV3Pjo0t8Z7q zL;DY_{AHwqlS%FFLJAYc=YIbV^rB0X#Two);M^((ar?Up_gp=tDQJAX^seW$%kIlR z!U4r|-Vk7T<^QQAi)h2)UQ^&fxsX7QGNtCdRUTvbOQ*eLyCeD1e!G*vmhQbWApRBB zVkJq2&|R=5Gnu2+0t*FSq!RjzbXf7d-)l{IrsNdaQWCQRIZ=NESdG_Qvm1Lkj<7mf zJTkA!WAE`UVO4rXn1_Db0q2>mq%G{Dx)k5&9dH6eIA`ANhkm-SnqU9?_(j>#`IVEm z4*qIw!u((;!VI5ug@4*xyU~ICWqd8{$?J7hix_A_0_MnJV`aM?J7F?@Xf7cswhDZYeAXLu)yU?(X}5*F4CQN``q|CA zs}jQGUu3-f^N=rsgA7RvwaRI|8`0=3P1IU3f2j`t33{=O_UWl0^pFzhA%FDW1a8|z zu2uZQ%CY}`rur-Y!-67TSoN}!D}E12O>sY*hEJ-~@0;yOS_{Yti9w>41K*X_r$q`Z zfl@hISx*XI6!M8?iab}1K~93@A1TO1#K%m#<+P~Yn1*I1vV~>SDbA4&wSweWA%sg! z04=nS6oPMb|BY~B%F{5hPiK<<&$)AfMu23_P@j) zwQe0c!xt|?;Qp4g{BWu4&ygq5*ZN*my&}zmM1V6|mM;NE91Js1hB%jWB#)QI-!y9U zxluvPRG4bXAi;d|%}BY&^vkcSI2nk(pt$oP7&RnlMmP(Rlk+yeNpU|mZDn$oTAK$5 zRq`efBLH5s<#IFKtRV5jZNVXa>B(>Mjdz1jut%_qHs`I@i;z^yjK|!tg%h)yem0O3 zKo~f-|HXOQ$V&@^g7F_=QK8**2Hc~jZ7lPzBOtW7yFdUOV1P_>+u;K~oj7e*Jw6qj z%i*^@we|{fOXBrfl8f&>0};|TW%#I$#yYk7Nw4q#x|TY8uu_^&3UexlI1@FYtc8nN z>6#%YZw%{`XB*h+ra11>3-P=2!duS`(Xjc-TRMyWdQCf7KuGA8F}FW{!X#;HuOql; zyw&C8I5kw~HaX;DWY6V9S|y^;4@xGa@sKUsKdNfz%X6CZTC>6(%V8q*aR{i1a{<#% z8I(kSXP)7CIb&hAE5;ci47uecmuw4Of9YkD=675%JJUTz(+Z)5EJX6D>|e7jh~msO@Zd<+7! zv%yX+7hnX-K)*6nTvST-!1RH{VfUto*2>e0o~55^iS*^0l0* zi1|+oc78|kJsqfo!J6>t9Mq1Np!(Xkg+D}k99%ccGYjTyq*t5=aRAu`*i*+FnYh*Y zS6hv4Gc%y0>U@APRL?3!npwLAFS>XIap!FePvE;GmWG^%QGS@`QV~2;`3|)HcvPzi zRPTPb3*jtEQDQ-XAcwhCT?muL2JQ!1;)jq+4)g%BI`h}^a-J#_+u@XO>N9V1)8Q%Jq5o_749RH!2ibj;=pmN2zNBf|V*9oOM67v3Avv zS1_@7pBy^l76C=vaJpHEIxooBdt>%ETM|ma*M{{^u{u}R`bF94@5aqhi5tm`EzN{{ z_%XfQ3q&j*Fjh7pJC7q%Hz}2Q>X;Un@%Kdx9rmiabRbMdLbmak*9;MX=Laj@reOO0JSpKhVylR@j9~m<5i9cgt5uN-3dQ%pS}AdQMQ@$T+Z07yMHU4l>^qZ zjqRwUpW3V>f&N4v;EtW7-A)6>-?E1?do2hIiLJ#5Jv`Sg1;#z$gJ&bz~zG&qEn7Yb?R2Dl!)Mm;W`S989P*$#iJYe z-qFwG6-V?-fqOgwBhNtT6jvV}df8}1BE8?s_O!T=tPnzRa|;gwJsX(#m=f6JDa!mJRSWRm|~KWT8n z?!xRemQGOFAQ101BBhSraI^~FDNGxq+B*_8VW7G^I1F(>!nvA1WWalm0%cP~ds6Ir z!uE*8i{3aFoOa~DtgY+1;l_P0EAW!6)K3GwaS=#onH4zYe2@v;I_;>gn&M)#+)BV% zusnx-ab1aUJAbfK7rCf^8D-Pd& z8e|qEPDcfALH|Gl8;)BVS;MrIzfrp>R*Z>&qE4Rs+>3tqi{jZ);~D(Z=f5S) z^U4l>(Bq4AUMd5KMPn5i#u*)3*Laax)p7iOgH#eCqj=zJsrzwBUAxj+%CK9g9bzSh z9s+5DuU_T>TT*^knT@w9ad5pWle>ZbLn4T3E712oHQ`GkM17-i{T{rjf19*kT2!)b zESqv2gK`-U2Rh^9X1|mc%2qqX7s{@d5mhYn5H-$2<%k;nl_KOmMkr5b@6i`SIPK2T z@~Av&bN~Ry zUN7ybRjA3|+a^yvN-D4liT zVw>6QbQnVBG*(^;t!REbv$H4hxg+=2(b#XBsTFY{xjoGBsP;vuxFtShh-sb1+$&)W zbeL^qUtfi(v705!7R2N`?-szG`9X(h4J;9f7E>D@UO2&H)4gXxgNME8*k=?*p3I() z=evQ6aOZj4bSPxuTt^~KK;q@gB>JLEp7P7tvrdQ&EdzXkxq6t-Mq&F7{V1Wg-ZBNW z<@|=3CFOHoq!0bG^;zT7ZtpY%hH1x&iXDrF+mHxMvOnfp{X@F)UIx$CdafmQF|a(U zGNah_zR{3vjbV@vDrDS^U@I5n2 zGWj%F#ninY>$Q|4I|2!lG#?6l_h zEBS>CufFn}sD0=cQVqwZD^QSC$_cj}X4S7L$@2cVV#ued^Ch9r@xyfTSm@tPgrIx2UtW$H6+Lkq2ggddcq3WdtnHUFA7kHz@Y8F-wH=d2z zvkR>DssxInc6=XaaN|N48$3#9y@^{&2bpc;Q`}ZK5cbl@%0$Wy)HZgGZcG)XFn`Yk zny_1m=|3`Q>KyR1Js9&5u-F2Ah4O_TL?-RJG)_n`6p#*dYQi?7is3AbRC7pl)xytg zx%aUcYH>U2W`*RiiZ-kfeG`wG_CU4cUk|!BEj?P9Ky$#4p%`h`E_Wk5#-d_CV)rGHXg2Tr=5VxnK?}Rh; zVi#IQ$o02LxJf4D4&?$qYbj=iYu{SXvA#Izaa&XIk~$1~_4Fk1Yug=SJ)HKo!Hu=| zpbfMeOaW9|VAaEtEv{GTuK{1ST-g0~rQE~7=O>e86@A;{?`uCWOoDa~FU$(nxDic? z+}B+K0b+D#g}t70B}JixtZz1M1!(57?_fpHRPG-=3sT|7BiT5R+fcPxvXK|rAkT06 zrt`VIPqa1SLtSTCX!Tax=vvwbA4PIUV3=N`3f38(QU=JN6wVD{bPA3r5GhYGZ7kO4 ziTjCBSAEEwoIZTxyF`pu#a z4j}xo_s?P;su49lU7c7Zy}&F3=tZ;SY^OJaj4Jm6Rf0_iM{9@$Ra6CEluVvz5%(H9 zznL-11cj;dozC|3XI)O4cj`f;Mf_c6p`jorOoBD(m>4okM&W_Tp!6ND$b{QmR!M6DA zcZO^&`(VL6Uz^CNzwU*N7vvZG)(+dK6(kU zHmTP}_KZYwW-EUP+WySQP092|A`t#*9nuHy1T`doM=gvC^L{*8wS$KjW%PnQo!=)iH_;+uA%tYDTX8z*aE7&^ZwFjp`_g{=9LrxHuQPH13h-rulaOR zwM5rIfeofINtT11F)}n6S5GRktSrFpbG%~;hfJ(IrKUB8T)=j?IpMVWtp0dGyO2aq zxMdD2a~;zI?vd=XWD~zYbT>%x&5$>LCd%G5o!Cfh?FJ7Q$K-22FWj#Pdt4vb7{AfK z1tNDEeIX*X?elC{42H~tgdY?;EjiaaY-s(Q(-r?FiztE>1kI=Ok{Li!Ef*@kEv0P_ z0&IVmPvWJ(c|r5tTY!)(j_f?l4G6RqOC>S0O0n!YE;|Aq552%KZDz}yD&Q{7+?&ly zmKk^hcrdUo`0VqTX2eJRdu9md(evJI3d<-!87c!3k#xS*iWCsZs1e<`d$#-`m!&uX zR~;b$meMlv9QHwML6~@Jz2k9XmqOdpPUuM=(s1SN-kvj`lABosU=VmUJI-nWZ@%$* zZXPyQM)27TunqwJ{@-fzh6D&o$5Jc$wnuT~`B4q9GXSA)I?Ef_O`+zNdD(Y&#;otI zV)SAL#k`n{*B5)copSoI4U;MEyfb#RSw_II=J9#pC7l(|7HmYqE?k-Kmk^U5)&(KC zys9P+e!{us<+_3c13K2RuLST<^fbj5pB+{T9H^wymokf1U@Lj|GK zn7`!lp}#CM`7&CzSoLQPNXG$W`++XzXnoO!r(2IMqJCq`iC340ROrn}a(uXzdLY+G z)+ruP3+4&WXeAvkHHGR%_<0il5uE7xlpt!a1Q zURM<(-`Sr--TV=-5h@r6$4LKrt>MX0b#$=oY2L^uL}&KG%iPcG72k)YoCnQs6lwaU znQ!_`-dO{MzAi>z*s!e|#V=Tb6`0Pha65{j;7};UOgqivx7XI$G*>$Q*GWF}Yvm^F zW2P(ik=p)F*XV$_z=)qxeiECrOh2rIt`epRWhlh(Rs?7zq8yg_aw9w2kz+Y&U@y9diND2N;mlR8~!<^hw`+wD9>P(C2d+=U}kWx)51T4h-slaEu#y{ zUz7IMczWKI>VQI7QNZpVG=!PTK$#@VN7da}J>5<$785@vWIa?H?1k?M)N=b}N9Z7p zv&;0&u)z5$ecJ0igW|8eaNrnZZ~MV9qHEa$xCqRKjWi|tPv?s!#XmEalrC<&FpLxB z{7zPSaTPY3(c{Q}1ATVSm`&C=0QI2mST8em^rq-gxwEKZz^JKFEw>#DWPj|qgf*E; zTTPfafO}5{vy#wO+Sx$ngO>`u`bs2)jQ@zmt|+sx zIqAKJ9$2c=pmMuTM@C9&y8zFpICH5RA~L4J^ctAiN+BY9nXKzm3*HS=%~#xXOL3qV z6NC-=of{LH+x3iRrhJsrN8UKha#}- zhvwL1C2!tJ9a~VoD_*^DdJ*8?edgnV|IL`6@{6l;Nl5OT4dXrS$mlvGLcuq|{cAC> zo<2b60D;DW-&iW2oZJ=zHmo1*tuXtTa=~;?6x+|G9OwDxhulVE=3y2{_3I}~^OyTS zGLQQvs7Fbo4#@`jnoB8`0x#$o!;7rX1^M7ECQ?cpZg;g-m9~UHY=X0zwpUD1^1+u%;l+z2z@qrWc_zMWy%C; z?gc_=K2|!7(oE%3_voFi9rJFjx!-bN;5^V;~Dc zM(}2N9co4YCZX+=Q(sc_apoJ&0HRENc>cmB>Nlt#kMs#vUd`g8y2z;Avc^uRo{z5J zp9Ql~F5|jIZA!lOT7-#EC&2~Q7HTR_*6RvO!BYLT%i39s{sP@*8V?nG&VwlxZW?=u z8Rd){XrFsnQl*U+rjq|g!B9czHhPf<7CxlBBg=CLS$BMU^2@i44|x1)`m2~{xD9r) zNgbj21@{^vm07R#3U$4b#rv$B6KA<7gSmJ*lv#DZ*zQQj%_}7JUtX{8;)8e+z3-8p z?iYZ?CoRt;VlW=w3O?MBnIr3!Bog&T^!UgN=RKU07}gE(9&=}Y3@jEdV|&Rd4W{mE zaVLjdQV%ysH!2#P@ZEl=JqtkFhV;##!`2_1%X`$~AMoNkU-Vq7ehGR;U`|>n!{#nv zP7g8Cw|mCZs}yh^&JB=^-D?~l((+ZNRU9kue&_c*_}wA@PpucIGCd5}2QJpwd7B4W zQri(?!%YYD*A>S=Tw$12B(%PkZ&&s<9id;3o%eQbc<1fshEs_+t!O*bfh{g?bmP@9(ZR>oSWxg^p(h$1%7>$(@Pw|Q?pR%ZFdeU zRcthz&yK(Q(Y&q!HisuU#-16UfeFF;T=xR{#yo|wOhM@fwrxEP+^>K+u%(7j;5Kk= z6Ic^^?s9rY7v^zUcC4+U9DLK%zY_QSxt0goJHD&S1tw?)UhV3jV)&{mWd849)Hlm(ey@_7FcpKfGLdxS;I zfHIV!wiDm49sbEKnvRAA%t+MH?hGQ(AROnAaxGc>6-vF67?6@wQHOqUlCMR4Fx*F( zou|s&H3p$<#j>Y#q){~{;TE(8;@D5&G;5lOmwq<k z0XvLcGIj56P#f>IJD@OwT4e8VPUlS_u8GM?Z&J2JzIo$0CoYT0z*$>IdkSygRl5FF zIMLBRy4?C<#lG+IxRnLT@zQyt77Uy|{9>D3EVK-p&$*GgW5LU9gXJ z>NPi~Hhk=~v+$Gr{eou>{5-QZI9=Z)OU*yd#c}RDE&z$qe_tkz&3QNA$ie{IVBc z8;gxs$JSG13xdR)hEO7hsEda@Yv+>y=?gqo4scF;DDh=!SA9Nw4Koq%cRE7LTF6uh z-gIZPb#sNxmh01c%gsmBH(n;|EGxuOyhzkjrnClY0M1E?;WZ$k=kOa&YcBZPe-FD> zG|q!R0P;=Zp0paVar?C|3$A$;)0i?De#jzTDuu+#feyeo*z^HyoZZbDth@$Bw)R9A z5O%G{^-_%Qe+@|D;Ovn2OU6&d`_H7pc!u!EQGkkBB4%}2S(!Da(0sJ)sh`D~3+s5U z+UGtG$_2{}y%mQiCV)9!1rjoG?O;HeicAvv+bPAPM|^{NVY;{F9$tyaz88Oa2QxF1 zk5C8yGB#eT?JAH1C(C>}OOKAfO+Hd@jizt)mA-tY9U|J>qDJJ<7-9f;1$c*$d?r|2 zGAE@1&~!AzY*G>Rj(@zR?XXOjp({q}>acJ5h~}`MfMGLVr$W83{>3G=eH=NweKp?d z;CxWE^SM)tiXbHZCwP}GaV;fmsy>qtThFK3_ii=HB9`(VeI`Wi5t1=+T4PKwKa1hv zq}!8N(w#|j<7hWxOiGHq=d0a!=VRbq;xk)g#f0SNGg&0+mk;K%M^A`aM4sx1GGAs` zmhNZ_Z42^Avx5B0yQ$-r42`!I{i3_9O)cMwn`BY;*-f5YqRN|{w1$>)sIT*=@3kK` zjJ?sL`MwAF`qA^x5r?HvnS&bU7Cn@2fDs<@J67i5#pHVtO38p=W62_5$$yVBZNu|h z3kmR$@)Pe{=oqN3%cYLHu)FKH0%G%i4Xrw+zNwnljbYkoCm;hK z9R8#Zb$APfQtNrDmV}aSTBQbh9)xD(S&@Om29Tehm}Ki12k`Ez3a-#)`Mr(o+7}G$ zY!mGibF_lOE}OY&j-}HCBqS;Spw0=t-Y|meK+*D*(#U>^xEl!%SrJ;q#}cRp zj!}69qTMf`zYtxzI6D6QTzNNBBou;TaEaT~9jl=&wT zW7=vmtd!*_iOvaj^YxmEk3phk8WlA~%uolMuy63^D<>_ur6p045cp1&iFOD~A{#|EhB+B=ffr38@Tmwm}HE&1oM98QEm)KP(JX%8F9Y4K>SqJM0lXE*AB5}f)0glz`3tf9yC_)n0*sle;sEZn`dxSe3{wz?0#<@*52 zO6rA#Q>n!b6QFBkDS_mi;9Qij%F1a6wWWTM`O^gnS)*QgJXpzmzHmAk2Ka$~Ws)R_ z6m|J3T5cdg7wR$>9F$*MD#BGfSk4c$JIBc@?>Odlz`hmv$4?3P2Ng=I-rLN-&K{_W zNu|sFvDZ(eLVDyZHLQ=zXCjAO&#>}DmMyd(SrON@GQiB-6-Rxwecp=-i{3fN-d=kL zL7`uxhC&aA1b-Rl1R2nK=3oj_!@yt8)e|)z3K?-&-;2e=b6F|WT4P8~b*G*txZO#z zD-b(8p~Q>*pwf3HCLQ0~#-6!jR`0epD?j9o;`tybP5Bx%w79dA2X0wZtIbr*I5DDs zv?-f6Yn2gABECcgA2nqIaDt@$qzBHgQoPQIZvuXp;|b1WYzEqx{!@CG%=!7B+W0pV zlf)0600}B0dNWqFPd|AU+|OTp$iT6*$!cUR{0V()uuR`Yo_WI$SJHjw z@*#bZ&x2sJ=)72p!68g!-)me=NbFo;>kRZC5iUn7Ci^)dD?FQ2+1yArMG_`lZd$Zvp#)LwBjLQ)Ff`j#hPx$Q4yYFKj8TQ|Lp|WJ+eB zl=o_=VyA(5&x{Ly91gXzOl2}J{Tu((TH#dKD~5J)XacT46lYElIJoOw&Qn6O4T|u< zt53fueD#k^0Iyy-cOLVs0GsYJ>%}e9Hcyawm*(lyiF+AVb;x^6;bWM2|1bXjZEvGr zX`CYD^o>8whU&20$>m7Jo^1u%=ViSwsIV9M|A-AEw)=cI5%9o-K;s7Lsf179G8BD55 zw5kWvE6q%{;Gl9TmkV+moZD`CagE6Ybs*gsb>MzTvS_NyZmPxG+ehxxl-4Vr1x{hp zOa83(Zhdn#KgcmK;w{5>Q?(l#A3de8dqUP%r3Amu;`!2UL=^#*v>kMXyEX@+E;ez& z5&QSO1DpJ=Hml#5qVjX{O@WFdcV4kFH(f_J{`}+Yf8~ZFKJ=Od6IajT{Ue&u?s~m2 zobax1{A#u|Um=l0Q}W0Tktd!#}JE_c zN)C#EGgv)ER@HK|Lv%b?B*5d%=R z{Zz0w6VPTD$_TRT!^czoaQzaWcnhv#=ruXKvnDFp1T97N=O28x!@o3n203eKDH z{_5hQ&xkBLeeajW%E>r+xsq@(L0U>h0Tj^xa+-K_6Ce|Qzym+tQLnt?SuovWRdD8B zjif_B0+xb=zP<^5c~6qkR7xn-;rE&Z__p0>d$xYvGq?nHv6#j}gdfrrEs77$G|}A9 zv%%hlLMOy`_S=ItfQ?guf&^ZfFC@Lj`H0HRg3NC+hYf?6t5Env*`FD;W^|P^g3zCe z$g7$n8}`HU)zc_!)UlalrV?l|s@Cl7uplc$6=cv%0C-ufguy)qI19Rd)?Wh z=tr_i?bKSf`<$;+F15%7?`TOQeA>OHQaAH0_#9W?j9!B?3EhFb(Y5W^=vOM}2QNba z<={ZYtXdQA{a@j)d(Tcx5rj|_$lE~=|KvCC?8~sGZo$tf+9DsjWEE1OWAw=&9>Lq< zCg{YY*zjF$nJ~kCBHPovFH|3D6rO)I|M*R0(+9U)|LNCt&HmmJ?_*4&sPT6DZ?^ny!~kWM?!eO)_tm%?iKi7GO%uj7A~ZH%sjvX z6J&D6Tx^6>{yQ^Ep3TX&sQdr;0W(O=Tu5~@UFua99-xB-F5R=VPv65T&~vF zrz#!=8Q?4`%+UMhY6qwY4dQ>a;=fzY*W8`IHj=%3 zPi%j`wadc`wNmk+-4E|g2UiQ&PbDn?2qpl<_B$i8*u80f9*-d51TIl9@XG7T`r-WK6NTXj7~@aEy}3n9)x7p2XDls(!IVVUuh996IsKrW z8D-I#gSP3@YhoH@4`T@bgg$^%a!Hn%2Mxd1{BrNUX`bz8`Z|8aF+FB-`^u<$KXR@y zz9$;W2^EhT+0`|5d;cTnCh<%$VIa?tru&E98|h^~Z?bZa%@EpUaGYxcR5B;~8`TJ*s!Z&F=eWqSy!X?GA9~ z@neltnUOCFx|=U(7}|%Fy8q3Y$>Ig&T6+*#%jUsDU&?^ERIsewhY|K#r`fFJSgmJJ z7}D1JtFZwjXzJMp_5EShy&5$UPRJ*v>m1|*pc~E6#=~3nj&x~OWT`vk!S2EH*M${=c{^88)(Y819sy_YI4wXW>Wf&W*S9HxW%jRay*?9jk;ozGyRYWVZSM2= zTo$7l3lTz4KRVeruzB7dq~C4&_nII_In65elJc7Iy{*~g?ELz~Eph^E6Uy!2M%@}) zeZ@u^YBpG5=eR@;xGpk2Sfe0p9gcEO)1S`&zilT=A33W(ZwPXTz9lUDdHb!#iyNPb zLTCS<+xYdf;U?l6_gM{!Uo{7*Dwn*iaWQ?_59MW%`cC3_DJ$i0+ok=}y-FKS;kP1G z6xNzWbPvWU?e&#ZzsC|0?K0E0{A(C_w(+|+qE-KOZLyc`@$DTF5uO6J{+^)gJpR6# zj~Cku3;sTv730rYGV6Gr&p1=}BkdU@DM{V&j(n&)QF@YA!l$+N&%WWr(65fB6I!<4 z?U^2qcK#o`Xuq(?TGCXLF!@7`qpFn$<2OI9c#5^=Cc?oS)&D^+#6-&3Z{h_@dW^1gdEL5gu2or{ z1Jmr)X-krthc_eYzd1Unlv)0-*|wmc{>4Up36Gk8u`v_&{rwxKnT;(*g16CE25Wf~ zeGtyuBT~lkpE9uXxBp+yZEp;!~3l%6KmQ`jQEt6_~cY`!3?kUUdjLm|_ zLCsvub|1`CY-Our8za34@7P$#FyUa$_5m=s%AN!M;)~9~G|j-}YE>?F)AC`!28XRF zlL-uwS`NTXFfGNP^kZRPmioQm=C@Qgi~yX0flY>XDNDT$aGyrc*|XCKU;t)x0{Tvl zOd3t@Qpv1^Bc_+Ho(_u{MbHG}C(nyo6=LLZSE2>Pp&`l2v(A6Ey1WkL47c@Ug?%@c z$~#lmO-BVb4ua_b_AJP=49Mh8%^|N>E_}3nUJ}%z7w6%kABM0xt^Qp}X{B=nX5x(w zBd2p$@wr+tg18@vX-TWS$tmTzuzb+-j5Iio(+RYQ+A4VC?q98_{{9>khXVbTOd@eK zpqa@euNKSnbO=u@+msI|5pwBk+&}t!NEM=`@Q*fVvHFR=*qU<*yBQgoIWiK3OhNw^ zhm=L6?3z41F#rB@{B9-#kOn#W2_Sz>eKWgRasaNE0||^zo&N_W|1Ss0Id;upLa_v( zmk1$oawCI1i94iJ{*?DLBgM)-u? zGWEngG7O@$hNUFh)1U1!ERd+KYHBM!eji0@g4{c7;VN%J+x`h?sRTno{&ObQmdzEX z6APG<(K8Q5%i5bUUW9EU8cvn{=s;6p5YE(1n_7BpGUu*;3X;lwqHwmV)deyY+;*!t%03S;|<3RSPltM`+f0*R8@X9 z(#AJ0JNb%buk*`~e6pm8>F?mV_w|_ER^yt=H^J1||Ha;WMpe~pZK6gHMUb4?2uhG7 zIVoGoNpg;obCis*MNl$G&N=6t1l%GyXOJKuS&*D}T6f|5o<66)zGK|mJ-U1J`1<_W zFsf?Jnyac-)eO&^b<5PnDeTrG)O$+eYier>7X0l=)C=_2aX=Ec|1T!-u83UNwdfgR zR8Bsj=fEH%Igz{@5#6O%G~}Dt5cg%n=GwSwN{s{)}W z$s2@zFYn)d$Kt}slloc#&msCRdxNP2{!TZcQgyr1TWCG4dKe5;#KfF$U7n(J!$?BNJwR-`^q_AHYKs-_7P8DINf zvc9r$@okR7=k2W7J1EuSY(lS=^22{FWtA9zJx3K!L*@y-$o@o!RW*ZeV|b3fuDSK$ zptE$Py~A34A>jfBL9bq%Sh+4y$77X*b-S36M!K*1V2V_1gHI=qXN<5Pq&bh1H(qTL zaW_#eFYRGJqRe!Rh~_m5O0Nuc0p?Bp-CX&T< zUa)r?>L$#GA-?w|QN`t?Z7yVl(oB`gP0%zV`i5Z%h+V0?)SAyc!TDlp<;wo|^95&6 z;_HwId|N=ob?a%faRe>joPm$Uk68-a@YikX89_%H^^>$%e(jN3ZEr-@HgBVfLm(h*qo)Wyd!E0cfp z*Z>WE7bUd|vjL7B5D*HZ!3;73cM97<>=HD|%|^+NEb;hiaiKMrLZ-M(%@A3a5Yp!S zDlOwDZ$txbxFzt3Eh);^^)BkLZD?dTPM1u7;VM@DtBIHB)GftwU(jV0v>LPBwD2KeW5Tjm{HEHY_TqX++$uhx z-zsE*@LA>2+t5dX^=A>+8@l1)3BBy?$4^EIW!|fS)Lw(JU;p2YjxRvxx&EFUZ4*1~ zQ(xZ9OqR3bGNXV5xI8uMNCvJ?@D<6z2aj zHg#Os+Y5PXpj9qELh#`}u}Y*y$=2B^Vg^;ZTX_bpfIpokpD}tmpMTy!9V`Fk^OYd8 znL7pGQt^i`CT!o&l{T^t5q)@M1jby0cn73fTUW4ChuR)jBCdkMRS2DL-CSlv#Iw~m z#T&5D7xq9sdgjb16EMKHF5@b0c~BugBK7@6y%k3>Ucww^G4)KqYV=WEH#>Z6$yBRL z8WTVbqimv)7h0utVGjtm&XAv6yR)!77akaXFYL9qPYG}3W64kBZqyK_0eMAFKu3T4 zI+Ye0d1vav7F*=!(USB`%rF-p@HWr}Glg2EdTS3O^e=luXMFA4XzB9e6 zAL@ioVWQ}j_u&a#Ixbh@9XyWW1hxxf!JU?AVjZFkCA!yk`Z=VJ;B+582b}F=a^)Yd z$yXP+4%V`o8?3l=B2!`ftJC%5LN%`BA@V`g&8XfX{gQ{Qr#`G=xXLA_l>z1wZ#~nT zcdDaD!pO7K#WlCjH3Pwa5{HGIY1!Wzb`IuK)~Lc8={v9Ge_rGJciI2SF&JO`YP)a* zq0ow?H8*DrguS(+N6J0*Vn!bujgIEu?B)AaMwD0;JrI`E{4X=r$~c_+VdURs|2xMZ zjRAmg)$uVc7UW!MVzNbd#o8>o; z&ct2Owac-}t5E}+8w$;~WuOw4s{b~6X9-%5c7`tNkv*2$1wxZ9E|`t3go~}++A}!6 z<otSRzvfy9|x7Q5g}88v;8Ev zFR|{^+Sw{A-~th;SuBg0#{wRNl#tqOqm(54ko=Lr0XQY>-V-j-?^jaJ82e&jV7{C^ z%pKWyK|WE5G}?u+6@w={!8R=DK9!gyPcN1OT7!R)k7CM85{V9ibV`roqmcsw1VwQq zTm=nKoDX3^w_9IRKB%;cf>x;Q0xOwtQf^Zlj4ZSMv(P%8>g$K#DR_EWD+-(5blhgI zfjntCFM`1)k^2`vU|dcEwhR3Huoi+`Ed>-ZimGYq<^Vot!@Wv2B{92Vwp2Aex!djj za(uHKQ~pP-SY5pt(Z&}}y0y4+EUPvI{PE^*-ZN4QjtS1hY!LXUk#twO!(GC4)YTHW+K_&qt0) zgH7xH^Bbx-95rWypOP%vY$jAubi+nCw8r==#r#fLMJ)gIbK9pgg02PtF=RQZfc#qp zC67>*j!Ct8!b|2TVVxkttg?Ydb?knsY4SLyzq!r?cb0hds$Df2W=Cz)YwrIpc4Ypa zHzs)@*&w(tg0o+MK-6pm0$O?$r!z^UukAm0I{(3#+7U~++5n4rJdJnzfMJDswn*T! z?N<3aC@u4wo>BfCx_cwph2=yUsmwt}q6apeX?@~dhG`6Jo9{4W}R zkt&M4#s@qi57r0oyTj<*Vtdg`lk*Elcq8HcM?4z(?y+2{d)=Xw0m3q4Q79?}N2|ke zUG9J!6|$B8w8zXU5c z;AXk`1za-YGV*_OwF~+A(#jSI$TUJJjHrd)Gk!eVDnAA{vx#uM#Jpa;0zoom$D5gU zAp!T*+3we>)m@zPFjEt1P`+yDOy=S?u}(_Q>4K8ozEM{;V@mirB<3hWE-eSqsQ679 zpWwzR|3P1Z;P)3@<4;8-BR~V%TN#Prr;;Dvc~f1O$y3{9{ds=FXb7GHv*M5%6MHeV zoMa}S#&eG~;o~fe4bo?6&PaDP!E&K{><7*R7&;q%A?KtJ=#6F>=lH?LrpR@DC37!< zYb$aRjNYC>g_`Bp%3!LyJ!F|~DYhkK? z6z*0wz(c-cKH^bOdSjn|WR*5Zb{T(TPMBDOI7znnM=k*4Hyz6_13H{ioKq9zp1vNQ z1Y?z<1jm_xOz{Y?ib0N0+Aj{i`f)a6$TS%bs*#`)2?C;5DPc7nxwUH6lqr;@kGXD@ zTnZc>JI1_>%@e2_pR!)m6y~%6dAkT@fL^bFi8`^8eC8UDG|K+1yrA-AhZy%gbDT$+ zTv%ua8y26c7!wfii|kVdfp)+*p*PCv?P6jhL!9%z)x{F2WE=Hq<|#`6G`Tn``TyD{{Vd4K!Z2$lgyQUM55P+%Cv{pt;)D~%NdB$S_L`b7Yf zNc3*hHmMYiQNf;(KOjle!^^2QyvEY zcyEvM>!MhhtcbO%xeV81-Fi1#U)1@? z+md&h+~#Ca&TooytD*3NK zCsid}M5VqHM1^mY#%oLLhj&w&`o$I?&a0jL_wn4du8)c2R(DTcsHxjxHtjjgrFoDp zW*%~(l22UfDQp$4HbgKz4PG8;^l|kEp_OQrA320!Tj^ukrh}ZkAOxI;s$$K5e)_(Q zrJYKM#)(bbW*04GQ~o+3iXw-ZIq|r4GIm$DjQt4<{U831<9VmxQIezx;z|7<8ZSU9okVJ`{s!tgqYG&Ck=|SKuT)&Bhubb9P^!(fLyOZ(o3wMC> zd$ci-@iU#qNH3yD2TbfqTjT)XjG>n&G#UCe0P5?Iha(Jfpuy@glV1I}atA-EAzW-B z&O30^zvOBwMPp*Q^RD2HbNgoY&+i_AX0oei)=yW>+Kf-OSi3~C5t+Y({M^~~CZKFp zO3Z*)+ouxP?sFkpQBoFBKFlfR_$3O-V{X+3%phTlKG{q+B`dBnjMDZZ&ZTz&TL8+1 zfcYB!#_0%(x_A)@E1=Es0-SjqObh8dJ)RnlXn0YT(86r;L;9v;U$os&jw^c!#)o?| zS3MQ6K&1GWBlz^#Z+ zj+UhPa~RO}DD(o>pd&{fD^PYA2WHm;jV#aRm!5+0l$X5Qa`0~ZLEMMYiD^gPnx1+Y zt_Zrt9sz>4)+GfSOFcY*=Y!CRB16C%{g^C|>JS~iZ`vx#sOXDH2)=J-8U486YP4*S z;;cvL@OnaS?|LHVkq<(p`@i|KcAzhei_NJf@)c|@P8-Ygz#weJl?@5%B@cq+H~4 zDQ=wX6lJ6Sjj&Eix9KyZV|*c%KN*;SvCRT}JJbTS2gwwt@1+i=0=N?mcA%`VuY*o7{|wQxlrm;^g5W#I4)N zl(_cuTKR6&H&p<$vS3wLh&X4;2Tim2OIEXdM~^$Q0&`xBkg6xp}B+(nD^$TF0S&Sx&>_=jj&m;z6s3}<5GvW+6(Z5 zkKFVJKO;c^JuWiK)40<{goWsyOSHB|N!SiV{$}r5u>BMie~M0St`aoA33u_TX8)BP zrx@+n26Q6dv;o@z6^pc0i9S-T&TrE)pi+_4EDx6{8hIa99xDIRKzm`*0y8GOrNZx& z;_I|6!oXSj`=^N~9zJ(bIrBEwkKerkQi#OQQ69g!kv+UBYBon} zR4}XZ_sB6*zs##@gY0QBg-@gbT5gw(6ph1eYI}K;zc%lRdjCui4`E+&Qp8#+HN=na6#_>LLkGr zR&W{v#Zu1X+D3B@;0PPqN=GYFY|Z?uZZ1J zZ>kkI?jC^1d%vyyU%x`4pU^U!PrjpqIOdld+)m4}1~|H*Dv3>;sDZELkSUF);;YvO zz%YLqn#7Wibb1BZJBbbW7x{S*ZDhkOS+_z{4ZuG z8uhDv1W6Op8<58Grm?0B<-~n~SiE>zP1|771km#|?p?;>R9n1TIqT(WRl~tVlyaNW z;@)S`)=3O!ciDy!kC!m7j-t6(?TZfmt6AShM^Vb{!pd2f9dgg$L&LS=v8Tn!qimy3 z^+s(mI?RH0d_YRyM0TIclfOuL4?*wU4l4hzDA~QNytK?-u9`yGGAacz4itlt&Q?k6 zTYLZ;F?ECJk>dk)=K*+W)2mRa*$;vxZSgOkc#x~GCj0!tC(7tAr?MHN@E+aZPayB1 zdMK=4^tq$xL%!Xq(JHvzva>gk*{F-M)-`|aCOmGPIs;u=Z1*3rdKS(SzUeNEd`PU0 zG+G*e07gu|Rp&K-K$VcRS$+ns2l~ce)V%jdVdt`ZyG<_kvK}~Ozwcjyj*1E`z%}W( zB6YPQbtq*t4?yqQMz|7Zv#t~s>xDiSRBi;O^hzv#_xk!uPru;#0x zQVbv7mtU6tp%(Ns+qcNE6XPonXRM}BVLG|950Pvts;X}PB?!z0#fMjqmgkm7ppLuSs*wMIuurS#O@fRJ+ZpOJQLB;$d)Ijqg%^e4f6X1J9A1rH zrxs|Fe0g@blKc;iLZs+5YFm{r-2NqX}er9Q?0I_K}wS3qp<*uFUdJP$2QJ z?!N*v^-yB@|7@!)Xq){n2s!)1d-4B*ru$j;;a|{&bzPk5|I+qD?Y}z(ai{+`5AWko zhI3iUp)tzG`aUbwP3(guZk4uQucge~?E?Xaw`xn{?ZHq+K1|#_KA?;xYizw7o3N-M z!^IHZ&)uVOPiguNJuuh1Z<7}rA&Onr5-mNN+x8J)#ARnmlHY+0` zws-3RP$u1fPUrumU?Ek5JeC?)xZp1V10(@d7l4g+DMr=SR4X*}=B^qLW{CpIY$lnk zDxfI53oA~a#0c=+vmp%d?iv5Oo7I5VdHT4sfg$TQd%x3EYY}(+FzE)PU`2rDg5l!K zbmTkI$wWHHKm30-fEv=sn|w_q+RnA5=p5pUS9EUFAB;eCyc}&9tKRqg7P>B6c6wPV z3Y*&Gv|%{i&qSPqhR7-rg+Eeg9BvH$(bgyiwG5G*BN%;;Aap|&M#6kudmz>kdS6=i zs(;)vJ(?b+zurgE*RxOm4}{qbY~QMD&HlBzx}TrsMADyM%d@-59PSjblB*WTR~jl~ zJdfl=L0z(4cu#cAA~5ebT1_&& z3HlYm!#W7h7DT8T!S;h`%wE3DsOmE_t$P<*S9zZ44HU29f62GH8G=NhR5GEWMI(?N z2<+|mL%pRKQN!{0c`lFNN7!vobM|b5=!v{hJhU>UwqVvU(&u-mLv^`nB8SPgW0l-+ zq(xG9Csl`$EB^BY3_eyHY?<#d7OWkEN35bG(;|h_EBl#6TEBG}N&|CgP{Xj~!E@s81UaELbc=6*?OvK#TFp~PWw(zn8pJ4t zkLbz_!S)!6T0W7c2uvhp)A>?fr{8w!reqN0oI?Ez9?a=^=sy5ue*?nifN4zQ8E6QP z7j5^zt&rAeP|6h-NvrO@Gc5miz^@I!;QM}`Tt7+`35k4>+3q~%0z%CQ(h?W#0oK(p z-i9PB2ZJlTSUb9fD5~t(yo@UuFj?lucQ(v(y}~LLEBuAqIz%)vgc2O?QO4#+PzA$a zY(v5Wl&N&ysD3YfmT7aN5PbYAeI=o8(<4d{Q0;il zbaGCh`)l=%%zprWfgtG66M38Sp~aqwW0sg@8`N*^?0gTaEuW($kp2 z{~^%V|1V_N|GY-fq{Ga>ZnAmHg>C0^Uk9Rdl>Kr8lFrTTE_5pfYqUkq?UxW;mA^Z- zEVhHsHz!JxOmd8~nR7H2;e_3C!)#XQrmvra?7qDaW8&M85HbL;+!wjV&JtR0*(b!} zd0L zYgvk@m@9*0=`Ls818JI&`%?c6xd!;C7`59Ye)9I9!)z&v?Bs;FLuRjrxuquO+Cl+@ z^fRrok8Sfbqfa{y80YqnWJK3VRz80M-F(|7? z&SAP;z!3SUM#hGc8>AX#SK4p<;*70=iqO@`04b(8f?0F}^}y-0`bo>9P)5xwa0vMM zEyA$e@lG*%e-s`X*mq5wbF+t-Rg<0$n?`TDb`Pva6+fDs4WzZ?4KUh2 z&G;elz=i_f&~k#}Fkm8dz9{uzyp!a!-8k0j?5kVZ35PDlJf!s`oUim3%wze@wiD4mkYH_+ELybJn&yr(q>OB&_A19oNi+OWJqI6Onn$=H2hE zO}!(=zG_}c5jGc^$jGr*BTE%-DL@dY+dW|TZwCK8f`69r|Nb>NxH%hIu3db(aLEZ7 z7*2zwH?6B3&`Uf_*PpC={j8UFS#H(&Gx6u%h^GaoIQ7Ka#u&@ksMv_!p1u`Lob8;0 zn~($KAktE2&_Ix?e?beBVe3IB*LnCk3=}ju_)l7=r)Dq$3zm<`K_~N-YzYccCt+J6 zV*-MR*-JLG42yK*A# z{Y|=BVaA_jSMc>vZ+}L{a8NQs0-p}?;n{8n?F_|8Ps|C_xXa#|lVSnqf`<9k;TdHp zA1UXT0UFs~Sf5v?WmPBy=Iy>H;N$)yz8ck4x^<#pjUw zlEKogT@tN@gcNQlt*c$^&sL__134_X-WmzFa7A^6%tuE@J3Q!cv<$T-}o)rZH= z%C8>vPP*Lsm?O0s&JeX;OUorT5pEePA#|1{XtwrgV>tjnG9*-GRF!w0zRR*7?&BPI zc^_9YqMJhc)X_P3CWL??PwJ!4$MDP@VX@t>%9B=U&ZA!tBhK_vR?(p#q2|>{W$064 zK1xe!J`8D)F5vR~p;Yf3bE_C;j|Bo7>_0t&;~gem4r|VMz+e{0%_|!xvUjKcrnpM3 zbGOP)W>@w!Twf`efLn|r(81yko8ifr6hgzJ$a|6pMf zVd-3S$~i-15*V)hkuk)jhbG(oHM0%xu%HdPR&WxLxP!wbCdDlN7lR^yF<6h$p|&_h zSo$o`j)#Z$qD)fMaJBeHxTOk6+5c)W(skNcPPoOkCjI8ChhU&nG($e z2Wi?h&QfU!LJYI@CKaD9Zd!8_vArCDp<-^Zm%>6EE0QniJ>P*A%^NpYUs}9>Tyz@s z)3_Xge-5oj58Q5NwQ{-0cMc?DY3G2^y#nj+C?*v!s{@p{QXEFXlmPt=TDoZ(|3lGy zmnUH2KwW^G7OUY(Y%E4sHefkYMpi{X`o4~z*QZc*s~#}+B%ixd9YDiwJrI4zb7uLD zNXV@gls|7WP6C~vaY~NzP9|1B^Z?n)B?Ojk^vtY=4VR`m^R8j94uIGsY5d7Z!HF{< zMsD6&%Vru!P?E=l7k(W>@IAUOHnp!NPRNQ|Hb^Idi;Pj?@pI?bxCi)ZHV@n%U?MaNKXPo*~PrES84kEMZDKE!(=>2fMz^DDemO9gr^J^bZ;(p{CG;(ge(HCOKnd~=swi_6K*{MoTtDQ{1sX3xtnKnj3Eg+r>YPsk~pcC;5B4;s@AAMH+z$!ehrs$hj2f zqtN)n+OHV_pX9;IapWG6tlZLER8Kb{Nm9NWhaO>6T9#sbgMW9XUq4=6eZl{a%YKbQm&t2p-JqQ8`N+-w?UP*JUG*0Mi0@Ywsw_JGcde8mRQ+{##GgE7 zRla_I+f+Q6)L16p?Dmtam%et)6ZE*nab+)V^A<(jH`fh(7=P6r)@gONLfF$h)tVb5 zqgkT-abVx-a{un+|DfSwbk%h{Npt|`hf63z3Y@H7aDE7NE1KLovSb#oC4|O70waZ2 zcAoMuLBmat9`nA!XG`FgUM(s^(Ya0%2n?nD?9&-n;Sr+xsv2?>FsdS50>e@F$gHAH zCtkXN`w8#$)P?*lUXF)?k;1AwnSsUA@S2f<`dll|5OO-2ud3$j*x9*}lL)OH&?cse z?Gna(IJtErI3?uyyL`ewbxliy#TZjIvE?9apCtc3U4X#R67WE7Xx0}&)K^YY8$Q;^ z!TB&r?e^Cufs7UYq$d$lNJ{!*QsMV?ws>af9U8Yzw1wg+#FXF$^x5|g+&{0Sy2`0i ze!f&Ql)$b?#ct_ak0c^bYSm8<=o?CIi*G5k^T+ zsL9G2gNvAv}D!?pRVE5EUB2BjgZ5i>C1g0EXSBfFa)6ZUa z?NY86dKf6tNl3Ths4}k+M$>G@i2m2)L1oE&rMYuQy!eehSa((qN1v@1*tGr!9pn zuH;|waD+6NC#`-yl%#MPo^0iuZ-Z+J!sEs3?sU0tsFYmANU374S(a+yNcVr_3y9OZ zr+Q`5OnOkwFok>DP+o%gax|Q~HZ8dptl2TT5(BGGJKZ&LINmeDT*BHj{nRpk)+s47Fp2! z1S`aR2$|=;x%cjOWbVZe?uq-P5+waZe3w5HMXVml+-6)18f8x4zQ;g%(tKdF){Lk; zOuXAJ42nLT^tU^7Yt!o=6KM#ZosDSo49#I|(WdD|S9sGo;CoZ?8GMV(Ya%-7LHRiZ zN1+XfJUz$W=W|0D+m}Sbw=cr%bFTiBUj5u7@ca@94J+5)RYfF=_x2{m@4-Z!=cyLE zGG^aoMrZhu;&hu3F+P+Pn+cG#^n8r5Oto*Ykg({Kc9m*%8lBmLK0glFL0nH_mKF8RvtbMo2Wu3b_2MhU|S?`oRKu=K}NP6N6?m<3I+ zq3`_~%WaI_)i6ue-<2qbfgVPK$mmjRZ1O-^v6!kz=%c4@DF3;!9i{CH))Cx-4WgHv zhALaT{ZuKkvV@-vBVe2vSBtSH`B#p8HOmwdI7VY2Ob(f#fKT1fbZQ=0{N5YRQD^s# z6}}|cwEQ&JC^k1jP9z*JSp(}%?>AKog*nBHXF@xy)6lW*$23wwgTL-SkeG(mpnAH>^tT@Im)$^X|F%nw%)2L1f|hwPc)8*h@z>w? zmY_#!XMijNCEDl@*)=q_&ob~2&q*2+4wKBC=xGks2d}fuCCLms`#N*S&h|c3deeW4 zuZr7RgqwT3Z{W>pra70R`C^>FJ4q^{zRM+M=tK$lWnOQm{hDJK0d?TM(s7u?BlfIJ zv5-a$`G2@8yQj&3<<>V@dd1z&6(Hv?SMmkJ4^Euz3)_!=z#BGW<645-;zdr^!uZ`# zDm{=tKNFO&xZ|)bmCrPCc?=oe+1J?$jOm4-^2ab?XDZ9nAujF&zg=6rXH+Ss!#B+w z`eeKMuZ7;}|CGPkfr_FMNF59w=m9SvWjkvKqfmX)ro-{oaugSTlM69J%eD1JEX{evj}J6RR}^ddLeCr}i|I=326b zorq(b9|xWGXEU-C&S9QCA^^c34bX~`a&(kC>G-qleaJ3DUF?+FE=;XZXVIRGpE1-^ z73-Q0DK+4{at5k833{e%Bu65)GcY~p(!+)<5y~SxSMMN0DwXjSQi-J+Cig;qp;CBk zbb?*{J9|RAE_-NtFUvSvaFMyE(z8xzr2+JVXpr@y`%d}LEKIae$F5ou{D$TWl1gc6C1)4?Ba8X9}Qg8&FGaM1%@`%J%V&(mX9E0Bp9w9Gg^1eqk`o6+~M zukE4rd6v7X6ba{O`#ruf9>!^}X<$EPksHB3?&+>1nNGU$?H;FHB=YBZ>b(*8i`|AL z2js?SC9KV_F?&=}?DaEwG)y4V70iEYOL`2ONmomC^gB*XX*W;MDVTMHqp?VGmr1zT zgT-EjWL)U50dUM|<;D#B((O&hslZlLM;DnpF;vm`hHY{6_+oO==Q@UM~rh< z{5Zb~$!{ch*OS8B1}?2+^4*-w7fQI@Z0dlb=&#ND6-Y+T#k|bRhiO-{Bd`@>S$zau z+Jgo=5lp- z8(4kOSd(b&VL)!b8LRlLINkBf<($w)(oKzO16arXIpjVZ5IQyrOftug`#ofL2|xHS zVoRY~tNONOw_&F_czquyIf&~*KrPRKR`?wYlY8;17=0hWsVLg)dpTgVxac-B`G_Ug zWH2s)@asTbF7XqoL}43BC3=qMT=6lsjB)-qKt7X7Zs;M(@}Ih%zkHnW1HwZ)c0IQ$cbzl#S>5<354 z`Q_0xQU|0uHa~7O_F`EgbQsZ zrRNWHaSEEU!ej=~N|h)j(Np!%*~a&m;g?1pKiul{8<*UDs_t*W2>^W9g?LKCvv*oe z=Uqw;($LUWL;u0T>X(DUWRYvf%B7e=!*2PA=!-l+l8fs8{5_clf{Ql_q`NEHW>Ee0W zSWSYv-a_`h);M79&-_T)Z#tbk+nFpWeB*_@jCMsCefb@?f~rJxJNmxSN<>9#--b{5 z53h;iu}lTEP&{FYNyh;cs;Zc3Z$f<6qTQoEkXf5+Nc!wQKX0g65rfrz0etsaj{&yJ zeX~NkFw{sEAhOpa&N#%>i2Sl^zgv)MUoc~W+-+nXPggNv!P`tC??N)@lPhWnlL?Xv5U_Os6@|&VIE+$C!imb?hOa_cNY2d1 zc_hX#V02NdA%I@;dr`&nAE_MvHUM%XpQ+As`r)o*e?Vl&0!9}rWb-t6yZfJaN`uI_ zWk!dW_j{n2Y8Rh9?{LdXRPT!sgQ&wn1c2K7ojrp=?gOoCK(wkQ|GeEBdsfaR=N@i9 z-JD;UUzpvn@MYb7PfO)}I{jXtpiK5eZX552YqzqMqux_=-a|gbXcQ_BhTt1pe!z48 zd7I2eN7KmHEuQ#{&O5w%2A=M|DC^5!bMWn`%B<&%uhQ#P%ddjeh5=6aTO@z8$LhP5 zLNk+-QX{lY%hDG|TaP7s{QCBr78X*q>TCBB+;NT7o*8I=57^?GJG()_`4dqYZ9+=K zKAB7RGl{x`4uyiJL0<@eHHgk4@aDL%_t|US+zzBfK*RUHAb*(hMZpnMl~{=DYMR;& z_Xg@;=zvFa{27%I;EjA@eKRTi#(rLAx*(l+$kd?{llXV;?$#Gf%Q_=POCLTw52iJ> z!?gH)9CP*9NVaT9wX1U$>G}PpA!(tZBhNcw;&L-{soyq?u+-W6mB4qW4(^tYG=%^q zpyWnQkLSh!&`G3J$NBc>y?We%NXXJ&!@L9HwMp)Me!Cvd7JZ*VuAV&VD4TgyXx&g_ zw*B6hmg!jV5SWlxH=AkZiTpEs)`<{%qFvhvzA`A)#^ty?r~PKKWrq6@-;5pXFwCe) z7o5Xrv=O6qR{EgG{@Z}vbdgn=KV|rG^~#XEr!?uuI7511Z%A*@{PQigY^@-TI?9!W8p%QC;#wV`2sTu2>z---XB2oxmWE>R>mgfB`^mT3$Q+d6#PO;3KJ{kPdEl{Ss;HcjG_Fm%`^HSwE)fS={qsxA+HDlDb-3jhrxttH29+cW@ z>Dluc6w3e*#4>g~rye_QL;ecguTK}I^u+eWr*_|?W798>d{V?wEpI9Bpp3lMHFgyN zBwW{T&12DP(;fit;*QtM38r>nBWf8H#ZL-H$4!|}jEVZAwIXTtHZ3 zzuUsCcF%t7`+0-~eQd%cUF%-kPd%S2HL%>RKYe&NO=$oIl~#c@!Rk{rzm0StdbHO} zrj9nXp9)`Bl-I6=-MQzXo_{=kCGW2yr;0-Q42 zS0tX-nD8_5-|L1^AF(Q_A*ET*l}K>o<{F57Syz{*2TZesh$ZN*4P%$~3Oe#%3eJ9d z?x1$}MD?3xFEAJfd_nHnlEQwj+~*hWUSf!PHeOag9hSo0u+VzB=KvB+wWA1{s;knCe)Vn5Q&!l*n;o* zk0J1r{L$N4^4ne@yWP+IySO|)&R(pN=v10B0+RA(__@}3d&~Y8pVY^R`X$jXU1~uI zf%kYsz=+c~dF*@#)=41Ei-eD%apEC3_BlV(QTOy%bd&t2FM}=;j7pBj>MN=(V6(}T+w}RWF4gdGo;>B;vC*jT! zWpNJfAVmw*#bF(HvVI|a0OMtu<}CFlqtl@)Uel&w)?(gbfSuoQL+`bJ`za}lD{50B zq&`C7M4%A(i9ckj^ep?=GUa~jz%V#C>_7B9cI$_(-2N0@>x0Z zas*8L+@|Mdh@6|@uW>070;4}t<{jYm$-LGY=5UE zR$`EPpI=|8tV-N{g6)wTRy8%07EIFhh`StP6!H5JV*NT`6qO`YXlP!67DryaDk+e-4ni!$B z3>}4TRlk#B0ezSOPf4-v;e)W12i^A!4{o$ERiQpmM?QS;lnsQ4Re5c;EKc=4*_C2! zPce-Y{oJfjQK|5mcc;9?p>~sMX_T!cygnr9>~>AzCIq;UF6Y@cT_~DN7VtrG{rC-J zs*Ul(#!zcaP(?01t+|c87_PBswqd@R17 z9*82;U)UBpBJ{vF7*XKr9Sgq@zelyBSEl0^`FJt=1!w{!~qr zj^F(oG>{`+bvQ83i%oX3~*w*B}vd!sB$?q8458?0tZ2o0=c0WG69 z8ntsJr8~5#O3zLwKRq!ylqRoQ;zEdrLq-c4;=j1mvNwD}znRV4#Lc0}zywWD>6AY) zjVLw8H8qWR#lI+X-qX{jz-m|mixS0Xzo|J%7qYk~06{HdqYuu!+%}C%Pml)?B zKF1h8E8QO2ari(e;J+S)37Ud+0$Om4M(u>6h^_CUSbtdcbeQ2ujuc}`ia^`t(9KnF zcS+^jOnamFrn521%B-sv2}f46q;w@}i`70e@4v4q&1A!0u)ai{cbEDuvT2e>AC~;v z;ma{ivE_KB?w|q=WbPozZSZjSJx{))@bO@6N=t#FeZzMf*5-_P{+b_B2Xs~?fZ;pU ze%ic_SC1$QsXhA8PDc+JhN7iur=GE&t5zAYuLh6!?NwQ<$LBfR1z}C!_i5)_PuU&q zH^2>j!xVx{`VD<_P?GnLTmS+(-PTmRO%MTo5mg zf8xWmk3;R)>(^quqjYiY>(%_$&QnS&r;)(#8i2hU(jiwY8D|^V5YiBE)-WE*E&9Vf z)go*Nyp;3xJnqXF`+2*!ei*Nz2sMWopJj%ji}l2rCCVm_%bV;gn&W<;U1S92S5pA1 zi@Sa9ILY%!=t*+stGR+x%VvAf>}5a4M8jmS?x(ly>o)RyY4q*JL=gY#x<>>}Z(CkW zKfLjG3+#7wkFLsxpp$F@{BZ2^*d@AWboi1C^RIbni{i>|cOtYN9pO&GUR5GEzol?~ zx>e8(pN1A=1*8V6v5-bAz{tX zpyG{p2s0jn53>H=fgkYh5GgBc+1zG$aQ#C?XMOzF&lD4Ld^eOp<>JV6cGy>*kd?m%EM;h+2*S!eSs^8i;_1Q}ve}%L%X4AueXQ?dgxUB5bKE;J@X@DqK{g!(S3Vo^_9L6 zi42LB`m98xHKA+$Vo)TOCc;uwUz(gdxV}!j?4X1iFWHq$hXeX{Ag@?vtzNiq#(!V1 zD&F6h)6tWLsL~R8I!Tm*hrVHQ`v>Zn^ON1tspX})SdQs-~{=`LUzAoxA(FW<0F=S{%-|GlP9^ zt#&!muc9t$nS%ivq%`2>A_!OMCjaZcEn!Q*zs4Ct(Kuz{+SYN!>@IeR__v? zpsB=BH~Ou(l4T&uSH@gyDK>~Hxypu2*Q(v{%dp6KYgx~=ju&{B7s=Z^4nK|<_F6pV zbQBc`xAKB`Ddtk2x&iM!2Da~XxRF~{;ngHYQ_SF z&w^_;Mrn6sqgKOF$)4&Y?>}q(Qp7Lz zkUa~&JBM^TX+;z49$ywA!&xe)R)VUfPS5j*qB$mt?5Qb4r5Z#5u1+B6yV?yGgjeC?EwaKGkt>X= z?CFG`TB9jlxPmyd(A)bxfs~Fk0dN2NZK`?&Z@aUmcIYm?T7L0EoNTJPOzPhXUhhv3 z`p*f--OCDJcpn&xxIgwWnB|I2U#XCiR-Dn-aWVzke(7Y${JuJGJjF#I%`NhY5yP;| zw?RWmZKnA_IgDcI#LND!c@$Ea#4r!b4fk|i3q|f8axP+4>hzgoj(i%XlXdHX$T{gn zsL!P}TZ2~Yw6_il%QT*yY7QI~P>9dJP=(y0&Yl`z*B(fIyv^t7*atdss5OZZkzz`R{SqeS!DBCwND z>vOO|$kU7po$n4KngY=qvNu2E;$djg044yrIK1YESRqEgurBwAV6RP+WXl?AY82^J zk1w>8PvD@7Q`$Ed#HI~p8fv>3WzH7TUYgekEp|LKmU0p4~EAd+BcT9 zQ{j?VPEIN>mNoJ(yX1MdZUrUJ#LEvzL^Rfc;TtIzw0t)Ncxd@2V2#4bwqI#%par3G zGtapiD2&@JHV{DJC{+>uPe|o7A_4~ zsoR>@JkVy?8V6Np-rsRcwgo1HJX&xADjYrHtWOFd5iJL>#N$0WNt(^A7ghrsv<;Ol zjzZS)*r*;*H+bhzgZ%*Xd2Dxu%kBy7e>Q;_a`LFC*HDa&*FC5G6SdXE8n?b9_qy5w zY#Cko79g_tVi82Y`i^MM+J*^91y;$- zKT&`~8GPsvqdcQa4|XkgGhf~a_pC07$AIm;?`Ut=ZKe6g6@;YTyjO_)j>?5R_+SHK zaovek_WrK#Tv1<#s2%z+bGy6Ua38!nDR}-R?KDA<*7HNghzP&pT>erOGN4ta;l)Hl zb4#6Kx6Lgvk0)c`m&Vohn7cepa6J1WWo$|?Pf&Oe?_-YC4mFYV5SoApS8`YO^Q7S** zJqe&yW1`l3KIqc539=7~R?kKxE_G$S1v|O9e|Vyw7qi5M>uLUbY_Z$EP%k2~cbbo5ypEK)H8pw-eGfT0Zw zuf?Or`iPEuCNHMc?-KZd;`;@UN?rg+7o#$a7CijCwg{xH&|kgg&7TRCFDp=$8!OcQ z^vA0M-%e;c)%bG(VOJm5bic#+A_vV}n}?Pt3B+gjXj2irmwia!EKnD4y1?>5MChY& z@-Yu12UR=_b@837q5%421FP=Gam$a3uY975Xg}oQ_{f(2s!-nSD5HTCq7&dd#?ElZ zvZ=EH07$~`HqCX7Pdu-^K7v=?Spj3iW`NnJCP(fwtAO{-Y$tBNNnRr8qH ziw}BRT{*!k$6Q8~nEvuG00^&&lq|57%TgJAhmuG$pv8On47rsnx%eSVg0P~MV6w7) z<{Onscfv`4wTs|=wr8qG?Txas($U^m#J-{||pQd3(%7 zOKa&H#~fHNqrq#8kaO*R!6R*{1e$ye9R1Qz_4i?|s%`U&RK&lA zY-Z_(UgST~uMpGw`$dTGPe1I$K~r*((lXg-ft@{4;BK0AW%Gi-Ha0-R*NDbqf2yQ( zhtJ=`%jH|Rv zC6|U)^G9;8$u^eSE@ak)+8V!0i0Q;-)d?o!)r~Q6uq>#6sf8 z{WmMTn&8Pnmh+>d#??xq_I@(@h+?x))zzEAqfVSmvUbC2(uCC=ua69lSspOQ&jrXM zd!=#MV*Uf)@`0FV2$|=)sC6vIfM~YHB{f%vsPAYs_qv`KP<{}OyZ4!9l6o)N6^#Y! z8Wx>tNKo+Jk+?EnoVzkN5q;cQW>%vEkOdHVv@EE>V!{~6p7;aacN%~ggMJS}JV}-Z z8R6OS)4ojdb|DQ9UMkcKG&~6#oeqvt`tFsfx;cKus@)uTlRGj;_BD-p&0;8(5=j|G z{w!Snqp_PB!{^cp2JfmzEI(uUW3*v_pdqNtHsS57+R+(bjDlolTFD{!lQ;T{*j1>! zFVOR5{Eq<|qo$x20=%GaiVNXy`xC{~rt)HzXXVE#5>gt0V>%FY*Kx9Pw-7|8tp)i= zMSWMjZSoQHB=CYu_A4A1GpP*HXPAARqUpoiw!Do^Y=JOr#k>uPZ@KNb-AR8VqMOvV z>RUs2hfz0Sbja9{RF83q$(PefmWZDOgZt4_Mo!F4HduLAyIBdE%4!sZAHASFSXc3G z>06lF6zhV$-E6Z6&~u6)i>;hALvjDfMLSDE;B8eRK%jf!-oi!>7U&DKd1rr$fBCad z!W85}(M?Nmg@0&^RD();m6d`<^zN|M?3Y8=RV2@-0^hyQ*!^GlEI7=p(puE9Cc`X) zxAYt9=p`{r1{-RU8MuPI5eZ1(jhI#x!v;f^_xEK_hC9X?Os0K;4MtRa02tijHBeqH z2!FTD+$g$C8Dc!3*Z8_NxIS+&Vq%M+f{^>^_L<$_~LPAJx zA0S_@7}}GtHOzg7nZaT2$u94wMN%t0B7xnLUOcG$p+trIs?ZB+>`w&D@yS~PnI2Gj zs@z$OMk5bhXAQ_qgPk_6ieO`IQpogzExOMv1(7K1ZLrcY5f({2)jglhBNiWroIl#% z8M0iy&tPvMzek|5%3tFtVSOXk!(Hetd-+9B%M;zh;a#m2_e_M|BvcjM1Ov90qMJ6$ zmHSLNV6?h)sU-L~COR^6>d$ZoM~hf0Z$g9>(sNjh$@C06uh6Iy{i_`1g;C7hYr+^| z`-h9|>wU9uQ*y<@*H@>Ks2?T-Ry{Btk)LJA+`&U}Rb4|dF3}z&;BNOju(pdBqk*{d zDG$OvU&hi=7CvjbiSt@pMStErKWdz8^*yd_!hS4`lTyL^u@Xt%^M2A>Ma)7k<816v z$Jf(pzV3%CC7KERElL7Cq_kq#Cf#(~_sIu?0CkQvWPqo<$_mM2O`M9@j{>(+#xNs? zB(WdssfSpJLh^Y2laf#3Da;!mRcHWz3n@l)@gIfszv`u8c&rYcO9_qABRbeUEGiY| z1hcnH;#Nujoj2ycW?*_Ap&CNrcOSr5KfwKsn7UqW@9z~Rw{`!qaE^+df|Om=NlpN7 zKOJk7jVrZBX4F^7Wj12RQki=JYLDHj9ybRDHMgNk%d2l=O7PEOKT5d@-7kNep_x@R zw32-^+i~Wqtq704`UL&*$*Zbdv<>WgctagVErJMLGDPCLyv`$vRf{udy1`pyGrPnQ z^`pMqks?Cg4;&0m;s)$@vpP%8HDHH9<1Wsg_M(-veZ2h2pLI<_;Hs$_I)_yO75T2_ zl!3Z*4n4M{lhDoxv-(aJgK85Yq4pfs0G_Q+*j9exz%P5bpPBdT9>Rw0jg2ic%716f zt-K-92za({^57|!@O+*oDp|(EU!JX?EN_LH1>#tRIDLG_=DOjg7a=QCXdbF=b=`nm zsk}wq^a0c;ii*EZL;InJ;AD+wsl*_;YC$}RJbQh9`Cz>_8{Ms*xW(38@XJCAV}%p1 zRb-X{I%pf48p`i$=}FTmwv$0uO}u(e|SGxn-nR z$n+d?HLhHS&xp~QdX8zmM9`M3%f4C*1F@ZJ)!S;}wgV0?hf>=MfLFT8%h`5SjWaG0 zi43P~bmDZ>d4XniwO|NEOIt{y$woKHC3Y-%s>>~j#+a5#!Mo^6h*y$tM*9XHp0_Op zR~o00XLE2&@}kEBOB4puvm~RF7)23!EMuH!-#}TxxQKDO>DwX z*6V0e53|bu7^&Hez4uXQ*!Z$mb^s8)5mPIe>JQkWp6jH=Cfn}V91*(kW%ZIO^4|QK zjgZx8LSdwL5~D}54D(%#29PE9vvDRHY==Q656LWF9Ja7p&RlxS!K^<_!{*2El(biu z(@w?M%Xy1v6}vKc@1Jx6jX-Ynm>JHTTSf8COrfsx ziIhK97;T3*D|6hAW^rHj^0&*L2Y!hR!Qd!E`9s|nZTXk4!tIp;r*!78HP8J*;`GtQ zLVNC}%Dd#SY-d6dQ@bn`6Okcq!wX1Axq=je3*{xV@|fZyDvE#Q!w`Ia)Tcq=jVcvX zT<+My>&~|Ezz5$1q<7+2LqmSFMeix|AY|&JGj#^(!%gLGv&(5sd*}~T%N6{-a2PAWxCx;yj>`} z$PKSk{A0aAfZT1ktme{f}bm~YwIh-E3ZW{eK6WIbucPpflu&v2r{#%c=(87Zw>orZbL05zEi{;+=exV`nv^LOEjCx_f#=>H$ z+!1K^{*k%qLrWtt?~TyIMgP#_teSYdI&V%E7Tn;0C%t52mDG>dG{7#L?B4x10WXP` zLm`bL-GLE@0!B~hOV^5C)>evKRRy3oAug>;oZmk>)7yC#1j#Z0kRE<1u6YfTh_aNq9z6h9244Z3=dO__hYWDRZ0 zP*zM%kHy{1a?VoH;jG+C-;vb?aUP@clEP$>605Ah0n6F=cHO(r_U1GEL)c|gj}-UF z0PkZfXL*=8B`CE*Gab6?p-Iya#Dd^Nw#Ho_z`zEWBMo9wCX%66dZ@W%T!5%DQLW(F zT)?ZMQGb@49SiNFMNJB>IlZX}?vo%93fF5Efb%4ZT$xYhgT*P9c`uJi0U!^g5eIN< z$_1ki^IE}5(+bDE(o6&dHNdJH$40*m>#o8`_0Qyq;3AumJiLn8dpQqyroINv;YbTE z(3Jpa@(qkPl2JJNwF$(%%%P7MLU0{v3=qcY{pm9z&vhAdMmv}*t}>Y&4IJryq;OU8 z%~TCg++4i#+9^lfZjIe+$LIHd8XMWOEXrEQqP4%La*mM(TgTj?9BcaaH??tIkjPc4 z2hD|zle#L3BBn&y0FG5LyWZ!cMG0px0+me8Rr8ZJUi@#bk6+N{9(EvlID~y}`eKSb zCs9*#It)~G9E9lxg_7F9=Q4=CgYZS)ySx~6Wr0uUbqo2Q2AE$X&hMT=y^apz%lkj& zuF@5U!N6bs6u_Sc&H@v9fVX4PHujU1xno-63i`k1lseO2Z|VzIQMi3TkELlK`vR10 zz@$Fj3RD+2F?#fTSZC(G7UCU#J>j~{V>wuPF94s)ZH;@5*1CFiqU@<9yx z@VP%)^283SpOIKVz>R>Y`4XV#CL_%3%`}f^!0YK+yGjdjbl?qTU}F)f(xPLo9*5_E z{1=lab0*$5$jx@B{R16%-cb;l!JclQtB=%|RLE?uKaX7gbD?#a5@EmZ^h1h!r5_!n z!ibF-UNI$MtNiV6&4T~!CrH8;W|9n5n!i0Mw|$?O_fN{$r;`7uI>oY>Yv|vV=~tYZ z=RHY9IGxKU=687?aI1-;69+NHYl0)UOs$igP7O0S^FLCFsV4a1=qKThFvk5I`~|Rk z;omOg|Gyr}QwfLFu3uw+w?tVcD*JcuivRE4!zRKO8M!Veiq|fkNGh<`1=xDF5(RvG z66VQ4xb#D1&^U}(<8qQwU_Ra0X)r^LC~-h8%AX%=8+0{i;%aDrml3tUh6Bp)NHK`} zyno|B&iu@P=-1hvNz8y~SW183`WZ%1Y{!fRb%g7PB3G#wpXJmHv)_ULNQ+KcEfl#A zil$d}md4pr`Zhg_jHS9F=%V@_+XGa;AW_HY+#C9iuI4t~WNyKAR}n5h!!#GG#0e6p z-W|7CmsYw2WGfr>RJ`K^&Ifo08Jtst+aOXK+3Jsz+ia`1llRR}_f>2C`3&%-4!5nF z2J48`1l3UL$|8!8ZWGKh>S_9|pYs(oHG{ivDkf3$m>^q{Ot-vE#el6xj^?J1`OT|B z&;shP2=qy$oI(hrxj&#vzA>tRzyz;x)a+$UTpvX*Rxe0R^qP`@o=#pu!~c8$Z`n4J z7}J*}#-O6BgZ0n=?^CQV$m+pDVOGgQ`$%tAYOfv9W!V5-F+=;2N=W{vQ^AG;==(%% z2`7hduf7Dd?g$@-wim9rHXfUV!XU~9>xinuo>1=cRv2@I_gQmFH!iH}F^qRR7ykg{ z3^E%eo`<^0CT+fd^i1K2IM7S^oPr8EJ6XAT1`>P^bG>iI@UN6w87iJ9akP1@c8BT0TaMue1@@vJzztJn|}5J1p75YAL?F$rphuB|-q z<{Kb#P`JqJkj{)+c}ppfRFdME!{L=m&tjLNrDuQxVJ%A7UnQPO+X4VdDO`S%I_{dH z>ktRs6{t=k-3)npseHtoP@UJ!n|vjgtr|x9 zqThdX)HJMKBza_B#WWJfqODB}F4t&_3Q{vF6T@`pe$Q)9V|z{RNU$_x50XCIvV;$S zuJS(Io@wBmd_9qKGtBYXs*b_*cE1 z4dvO6$lOZqzPqgm2s7x_ycSf_4OA#YNNTIEEgFhjYs?Cb>F_FKj)`F|>$L_!k9BWQ02 zu+GxSiIpUQNZ>`$S(Gom5x3Qg#T@>;R7Xvby35PY1cCNETAlI4m#+ONfOmFg%-oLFR6z~qyX=w$7bCIAFr@} zs`Y(f7gc*azXZeL@=Y~SgFD^v>Dsdetd#KzBm;qc@H+2dvi2DC`Wf>gO@(ilG(nRg z!symxGESQEPniiPf$Fh6GZ6`QXTbFH*&%7ZQwb8!PEo_z!7`h*&8|nRXd)P#SJW=B zb1R1HojkVaX~5Cr8`m%1&SHd;Z^+r5S0Q8fIzHr0{njK#3|(gHn-t##hg7 z9jHW|uvfBCHwK(B{1_E~YJv3b<- z9~eU)&>xQl-e|oxZXFRk7>qGr%zi#g%2K9+4Q2o?ibY<^{P8)=1CGO#wT%3sZr#|a z4lYOz4xos{?O6y8e)){>MksClvp;hBOd$G|vAu}%r5y$$J0uD(~aw)^agWJN!= zuu1mU8%@#X-67JfpJzd?G~W9qMfHGb?=Z-r;+L5;$P{4q_2Ul2Hiv7(<9`0di-P%& zua@bFK3Slf=pzQCALO1Bd3xj=od+M7B}F1rXM}?yjXG5JjyZttUv#ctQ%(yy#~%hD zlq}6pd`9nTvLE1uz1t79!mQxXwJhilI<0*@nL@IDqAgjbzqGX1#%a8mkUz-32eSZ( z$6k-|4_tBG_|bqqUy9zY&8sUfa5uy8OWr|?HbVR_j?%xaa z#`L!86B3KUn-?Ih#l)D8xwgNCOPv585RLITl;w;OC=*EX$=7sigR^QezJ(B==iKqv zKepJ3o7Agca;K5(TsZBqHX+Wf3Nw?(mS@2O^xkVY+kI_ZaOrvWrnLu2x%#V4LlwWO zIf~Gi_7K|+X5hQU%jqBWWO^R=KMI8unhHx(s){msSDotop^cO7zPTg^e(Y$KgQ`Nn3(tFaSoT76I8;G@lTi?>KK^(5G%f{3J-_>|3jddf4zBjSmtma9n z;64kvi0&mg!CgA|@U^a`;fZ1m$B#obwgZe_o{R%@IHEjD1&}+_c&q_J{ZWQov>k$8 z1IZ|TIbFvcrU-Z=rfzq2Mbh=@Z|$13jp;*r4xyyA`UE`X!NBQ8_vh#&v+dh?h^Vxy zV33_jul;?7Y!cznPcnpG^`;EOE)b?lm^w(QRAeG^q^h&y&-angpEs00YzHp@+2W{ z&I=L14omjdvHkh7?XDIw8U1d@86bMD5+l5)nv^tKI_=@`HTuRzzlSRd^>F<#0soRPCzOgr82;k8s=@2XXF@0-W;n#F;N>M!d4 z`K3X=6@tx$BH=~q^YNVV%`0+h0?gRCl$pK>+3Wpz@jyr`^v+1Y@#V!USJUKhi1|jt zB-ho0=j0kWHCn0$8~U~d*e|3>AJdx|ntWR(&MS{`uTP2|iI+8%^oBEi#y^xFY~7@l zzy_K^o3h(-Sa{isZhqqo6^x;sRlgdlOK5a zi2liGC+V^FBXZv-o9_;*M)wR!TFLrDIyB-gn&q$qv`*K73DgXk9`P$LMmyFJbwIoL zA%MjG{a??pq+1()zi87FUHdLblg?@6@0<9w?v7yo7arRiFRPW(vX}IOrdkRq@P5E) zj~OhYEY9TVQ~uG5)s&Q1dq(#LT3KI1TcPiajMpLUp1an-nE~;aVDKi`0@ zI8KMasG3DR8lu+oB0(%&1*`R;z0HF5KL9kby>|u;{9!bdYjK}Uo2_olzXiZ$@&tS% z)|*HDxdB39%$L$*GcGBz6H2bpye_s-jpA8l1+D491cgMi_-?8)dJ)fNc-Sx469)*F zA=(qYzEcUN+pP4sTB~d4zt;eKT50~skTebjiuo<|)ec|9mt1jBj!G-9Z|I-Cs|nCPZI%;A?CdzHjWx3H0~Gj?H-5x#?x43f7fwI z%Lp>)oSm&m;G&`*2SGub-500un`QUwYyNsc6SKa+%WxufA^GZ`2a^jUtbxBPfAxoZ zd=%8V8ZldOpd4Q3QYzivE+s;f<5m&As^#<2~3WezmVyXT@vKFt!Q) z^)=Jr{Q=xrvlPlzW3)&e*}{8s6{@pvhC8i`vILE^H!L59Y#r*d@HwN)L#+$L6J(1j z67X9V1{1@@UJz#=dS?m5t1E+6w-H6bB~f?vHWmE>0y-Z zrANl)?gWHQ=VEtpFx=&sXQ?3@$5Cq4?_F2>0gP%Ja6c@QH@#1W-rdG|kytY?1^RTZ`%&+zLFwQI{9Fz(;XqWa4JC0O#*lseqX627 zow!_9@RpJ@;Q=kn#(TP4`pgEUhJ7m*k)Q8sXtLC4K2KO4sHAKUE`*y+kYIjNxfY5U zcAJAtmD;-13N6N_l1n77xc=0+>NUe>Rkkdcl4SuO|Kr16utZ#5ffl`pN*sx!rbBoB z>Nh?^%Pv^CuQtQHJ+pE8~8B7oX5 zFNul~Mp$2ZNHpU~QozJ7C;KK5*R8uE-0$h2Zs8di30*bDEKHZBKoJ8xNifya_bfo| zefL#ZecZgcAZf<^=d`P1+p!O~Gv#276X~FIs{=t^F3PrSOy1m_`Av%}gOG^|zINuI zSBO40cF^O{beo#6_U}t<5aw?;BV1>ZjNGX)TCK>VJ6YD7uAhiu7NUk9{nA?_LeQm% zv!a@o=eM>Kfe$vS>ssMg(OA!Hvd4gg0m!GLWZcp(#fJpiRoX5l_;TyQyiHkwt?}Au zjQCO#?73MVIg1T+Tktw*Y;YgMrrvPG*plmYZH!pHmyc|H2@pK*;qbBix%Jv0&%1P4 zS!2;@ng$A@Zw6nM>YF;1zpTxk2Vfq6hrKO-o4QsWsrNZWHUMw?rMLTD`QDpzeZ2(; z593~UAD4tE6Xazk4q`CBs(IoetAn_6_CvmvcwTMNPsf$OEq@BxME$2hQf3iB1F{{K z7*K(B(xDxtYVCGPU-+UUynf51!>jzz5l*lDr>?$I?r!BeTApRs(&Ra4sfw90VQLw+ zDSat#05hKFzj-K|?M8s#O0(J9XVr+A4snV+sfj^Xun_4jA9Q54wtIgvr{nW=*~YOj z0G9BS=x<^#1i!S)0b;S};e8c6ze27aBZ0QDSu^Ce=4LDM%!PAM^pi)T`;8NFELU() z_h|u#PgwiL$j7SE!)+s2@-`TP#J8c#n9HFmZ@UsiNq(`p_Kztrs>z&vB*-{~x#!XV z{a%f?r-{nFTcDyYd%772n)rFS^ihKst#Y8Py%8T?g3sPz`LvdzH-yA2&`j>8e^LtL zZ9{)=C1OyrnAPTo=4{k=vz%Z+5J}`kwbN-A0ir7@gwb!EGymN6?O(hbe}vw1H(

jZeI@D7F40KJBAD{7e`0!N77DlmeHSdQ=1XrWF|9jr&2? zFW@tD(BS1i#QJ7232Ag$_pC9m;lD<{1KhVZ_2V}`(eDCIGez(7R*5wn zO50AAsZRk==+Pcqmy-?md;r=@;p5C%F-P~n+4-NhAGp*XYG;Dfu`z)oA2X)bSd9L# zHhwa~YQQlUU2bvD3L_ep=0}i7%8dNwh;b#yw~KH}j1!1URp+Rb89mDuMQ$t}o}O@} zfH2`NS!4>CaWg=|WhVc7;Je?yUjNL-}1j7x4L79VW5|K@*inyd{HNa~O!+5eec3px}wt?+meD3;9!{8*ewTlU9So zcMSnwY6wS2{8QUDNo6Rs zF&-4em&raC(*F|CH#vilg|{Ga3H)hsjcA10v@z!4)zzS3tJ8+iNd~t><|-v#7A`*= zeJE#cL!s6A;k7xdO4N|+L)TZkZg&Xf} zpuHF$|CF@+un-6ji*t~s9WvHvZD-Rte0Dqu(@N*Hd-rDKdpq_^=0?xXWs(J6lotA7 z&a_YhtL#)zJ_wW~U@`bH_j!or;F+M+N?nI1ahqIyO*(|`_--oEqr|cH*szyNFyX z=Su`R@(jfg=2|Ek{Zs?9DNlaEI4B-GVe}7bR<$??qWtB4FOn(bpR(KYb*_4?=#|=9 z5q=pJOuRG;wOyt^P!WYXsEuw}_`Rp}lO+^3lHRxY1rV zsF|_>OxCS=Y~*H#Tz?tNOeA5pRq7q|eb`22GN57eL!y6|AW(DS0ICoL9(-Y}Z$(2sNW(HveeW=Ncg_yc#^6|XerI`GFmqxWm z*0D)Bhmv@EaU8O=@NXu?IuRJ5gN~uB%boWx8+>7chj#}qbx6?Lw2acFwyjct;96PF zJ>69r=oEYc%)6iBVQx8oj^vO%#zWcTOQaWqx>9A>BGn-0C#7|M@mBBV*L1VFL+sb_P)ymPvz`l6tq8^yP8GAMegCmgIwQ!99f~9*P(A#CBI0-%C=M6Br_{wQ zCXBxQ{Rrr+PzJrKR+G%xO6IP&VN{StgFn0KB86stPXNx46Yx1L}w+KsxC~xx~M91bF8KXejgB>Or;m1Wgd3*xtMnwo8`w`i{DjP?SANQ3B|JyR++1$s{G+v ztXEp%tckvA3;DAT>_cHJ_{7-TB=RO^e;a>e{5!$x6TEZiTUy9kOweb+yQBy!Q7FT6el*!q)+kWLH>`3HbjfUwOCSka99l4YZw@S zN7lbMZ|-$Rw~37$8{r>^(#H3_yhe%qt~=}Pje!=+SKL`GXYGs-+cX}EzG>}g<^?|A z2+s8j+lY@&;5XqZtj86nGn^5+o?GrWUH%qsIn`O`p;uW`ZPOHsf@(~zeFjEGBw^2p z+8-P1AKDCq>Xw?V`q;g~q%>LW67*cD8j`6$@2lK_#ouJHHXH<73B}}Jm7reMiARi6 zPO^DtqQ3SSK~XKdExIT}QI!+!sx|VMVe!ex}TJ zHg~M2PLj_o_b-ub6LD-x4a*Q>CE$dt8h|rR(**f^+$lIdtq8`t8QSo5Z6kXbwZ1-q zUOypOoNmXC9;gv#>K3*NO4hkLz7*-2#z3i29Veaj=&bjhU_~!?$TH1kivoJ2dgTT@ zQ;#r>?qKTRjg_YQ+gg%cB92ccm+M@q=CAU7kX^4>Yz&7+yHFgGmyX())Itpnd%)kj z-=416Mg6*3s!D)1NT@fe@Ypi7ih}3D+QiZ%3hUR|yT;Xo-4`QOCB19STjJ)5mA4=V0NSVbwe1lq}k;v0Q z$M@P_)>i+mG<~cybJ-3Y1gik!deHO}75!3fFMIUH43|*amDQEP}N1ShRT1keufPU?q6$A+=(fV|+~G z{CTE7=%cN6{3#^MISi5(es4>^TW@x$f8mKMS#I<6(KT{>rW#u4ve)R%YyR-uX!Lc* z`uOZm5s}-?``jC?f5fx^pv?i3j=^vEIAf3kKoTO+FY?3#z9c;fZdZ3XkR2I)u#qmd z&BjA9>~!1%(B+^$(L>-lP|#PP@7{|`w#6r_-tJ%~$4}^L^Qr%HvF`Ree(0=aUO3bG z8q>MV$HWs#!xKQ~K9zc|Uoq@jR(9utS!}9&ZbGnf!7-T6p8H%K)g>Nd45d}Rxae9Jqd06T zFPPzRZ`FHThxCbJ$`hw4d7ErOPhUKJ#>wDT2sjHgA;L>|QDyx-{L3r%ZULo`iY>c^1|KW3FV zk3K2Cr#9$6tS!=Z%6P^6+f|0)(FlCV#n*hWf0z`3?xT6!%@L}$=T9*MZBH8;tkm28 zou1_S^%i%NFYO|Mu3)<5($svNSvxI-<`YhShE~=1=$e0_nxOk6#KnSP#;k=AjumS) zcX#m%^&%7t^dO+|1SOx5UlZ2u)T$U(Cu?ygU2IHhjHOga!KIc0%@koP5q-ZocfX41 zms1}2VQz|mmcRZJp44Jd66w%inYlo~A~Q`<+*=a6y*okO| zQ&k#W!Ez(z-A`^${I4^H0Weg%2pr=Q!gcUki(3NjNE(aNPc7d^RA6dhe&ym@TsJaN zTqmD8fJUq;B677WydF0T2k^OoiKuwqX;TIKxgvz?7-zkl{-TV7HNHYu#;Nur7uJCJ ztaB2BOzW>6CE@xq)@rW3`B`b+s)DPC79ZqtDX$q{@4hgW>0{cyT!j}-o~!zKciIvr z^%gP!dvK~u#LRsEQs>sCWRN+!)s@)}pfTm>sAuB^nNIDm3Tq3SOQKiYC-t=If}ZBr zlMor zjv`M&j(BpAQm^*MujNq*>-kPAR5{#3ctLJnKyDZF`LVS{CeTj4i9}|Zwc8Y+8 z{t#1x1N~|93RXY#Jp4LlDFhG^k)P0Vu{q2P>8v<QVUrAnGl{;%b7eQ3%1^-Gf_j_u%dh0fOrU*TLO_JHg$81sz<2 zLvVKs?##@`d(J)2{kwnc?%mZ@y}N3yRi}l!1xp2O{+FA+sbvX9(sH!Z>iCX6Q)Y~$ z-a>c`s*cFS8o$4I>LDZtO*9efuG0|I^@Om=4(?gO?bn*&sz&1s4^{nJ*DiY7yOSrm zGe-iaP4=5N6}w}Yizd?zT_T^aZ4dp5>PF2OE2R@e+QOFYy7%i{YO+dgvo*oLw!|eq zE0N3Htl};B85WHTQ+^g44U4{g^H)LHn|;YjNXO%RovkL-H10Ef4RqVIsZm~(pv6|_ zhh~W)voC`pQNg(0q?*BZk&tl1mz{!;m2iRd`U#jz z3^&9}AxxCX6vI<>Vmb!;42mZgI?IThO_;=X$J!ySqcWCYuF+(_`bJQ-)XYPtjAwRw z?77heYjWr!Cmu1&6PHzjG;lMXqn7h1jxm%jY(~$l$6GMFPoxW7s-Dy8Z)1G^w*`C? zxn-fHIBE7qIIOu;^zT)>IyfP4%aWcT{8yMzk5RsZyEWXEp{4tSuj+x#<8mYHv`c-i z_H@ZCiT%~`hsK?8IL86lX3so-x|s*$FHmT=HY~kxa7I`W$c@oA5{8i{PO73t`26LD zTNfymwVpYh9jj=Wei=^(jL*DQE?+B~+moBd7DJg~7eUlu;ONV5WOVzxpxr5nEV5(} zCUy*-w(8rffgLtD{|P9ws_+CjuY{Q30k*^Lgr{Fwj}BVZm_Ga{l8cj6c*4oIdAJl* z;BP1v@Y+;fI8M81{)zx1+`TOO*jQ}{z4Zi zEeZ2p$_=#{L~!5|Km7gW@d1>6C;rTNzr1Aw)o+(VA`>HJv zcse|MetL1-l4aFn(S~NM)2CL4jbp1FHO(2yjs)odM-q%l>#ZP5->lP%Ad z=2A0AXz35F)MWGmqxm<>6bl|2CKZLTlPw^AND9G}ED0%Pf!p|4{DJtgrq34qt_7dg4SLTa^N#RiW}OKQI!k*1X$ z2X&l4m!F3eM#X(0v9G(4G(J1$V)0ivZrkVVP67r4DB2k01HnF8rgk4js2u5cjitAP zS=sqsl{vkHy+?i**uk!`Ztw1TBduKZ4-%)s^3Ah>Vj3`{uXch=;w-*oU^0-^>g^8p zL(~FRa!^3)bGz@tuW@4}5*Z&$17k8TfM5UeWY4}VCV@F{aaMDY5qleN*P3!hz{cK0 zWcGB61fF4vGZ}JyM0CIjwEaTJ; zF`LAQd~802-i4!R<#*g;&6N`GZ)0K)EWofQZV#&Mj24omW1T$1T z-wC>d-(*liE%^Fc*~YexAb9wrNWGi)3-ZYB&BfAbsGGfTNY(*?&*h!1As zPr`=RR$j;^H>czAJcR(0FeX8mkqd#al`Ao+-{|)(GCPoL{{cOgWB#8^t+3L}qtG!u z;MaD78)8J_YG}1?ElN>m30a$%)ENM&cP#>w?gO}VlW+yFwzxYm82G$N`Q)7SxS)Se zVGIGE)*oQXb*&Z#9BI~d5nIIyLnXlze-cvZ1HK!!I3^A@JI!>}R6Dt9R=Z;g#n)al z4_{b1!m8n#-KIfCwm(%EJ)mX*MdTtAuY&IrMWVbCPrHxl`}vrjMc^7mKK=VPsLBt1 z-Fnzqz4P_Ehg9+WLLxT*g`ob8`rl%VN3oj?dpoCdQ?b1*xXlTvyv^+8N8RpMrb{s9 z+iVX(EsItp`?Sw`a6Vrko0MjpoRKknmuA=NV5xZ&lW_&n1UECzNE5huk4VVloTh=X z5k;#+J`66nP9tTBdCqU+bVvXiOQbaoBl{vqCh_1ttJY%RgiVujpBR}KIN=49L3aJx zNS*p1LHK4?YwIP@xnh0(d$Mr#6g%59k+ePTg$ggUvu>oBnq7+fj_fJfk*&>WRJCtf z$c+fIl~}Q+|0DA5=t#JfYuy^sy!ssR%SHPu4_%cx{_na-7*>B?kibH>CIXqbq{34Ljo}^5r0DHf z|6X4aLrSVsdli&~v@rZLXC4y2RFn~eoPNS{w?)AjwFT|D>eXvy<~tb2B$q4VXhOG# z!~asUj33upOK&$|6#J`Lmvsv#Xy*H8*$p;(VI{sYv*mzdxxOjNSTP?g?gpG_2>dNz zfrf8;i|rxC>LgFJ&<-qa=ZX?~q{&Vm%fq8q!{y>*QyZ)3#Yi0o^;xcIGWn;@ych-1 zqbssuGv2bNF^G|s6{!%i;E zRLzjiHu5q853CPUr%>-}i`msHv9Fcz9qSwCUBo%Z)tC$lj*dp`1)w0Ei4co9G6D63 zyeB31^Li(&p0XLZ|8hJKOvcLIr$LU z&wEpeeakJo90W{U*8OZn484aG0)hDAl!ZDRFfOQG=Hf%4k5K4-6a1U*-7i@0r9;o% za&OnyYIS6YdJVM1zckh?_BG8#cO8LbhC38-MHW{;qKqB$%UPG=MQ2@KWtpZxI*F0U zuE_>|8(%HzYc)}bMDtYjLQf7e3{P6WkD*_Z3R?px=I2xMOH^s>0gFD4fTNo8=9e9% zKn6q>3pdeQ)xeyF(8p)`Z!?h1c3dZ@N2&nR?pGN~ zwUTqzZJ(-s2GNEO(}9IoJWafqbOlCu0f#&)cC(p980YZ7LvO-MHQVUkiXq0jI2KMa zQT&cTS)Uz|)_RFep?b~NfAzfDy6eAill9iq(JF!aRaQLr2iESdtGtRi4C1<>i!vfx z-`p+ceQT<6qY3dMQFa>2?9FzR+JvPG1|_vXUXF6en&MK4`@V6Shks<2IE;%L+PRrf zs;8Ij*4&3hcp}mU#(4&3$#hy+F;qG~$Y~*rFEudH$!=Jl>C*BYvlatq(*<(TIGy1* zYU_v3x@y~pok;kISe*H__Nxx?$nohg>Qh&K4Ga=N^>?yL-HClauo&x1@W}DQrxePp z08nlC$=b5kb;h&X%4CnxvLTAx9mqMQ19IW(M3DgVrj!O^%7|f?6#w4Ed9fU)Qh4$; zt)hiWMytAY*O~p0`J<3y%9fFqmAmKFZ#4Gvn4!lX-B*MyWA02a2K`in^e0@gV{VxN z5z5*(@okI>FX}A0;M#|LAgRqi|*dOz;9zFA zHGvO(cik6avplc!&GPYRxpy zgqqx%xjmLx#i}R_&PnkYnu?g8E6Bes-|F#N{0Mcq7>29f26Q%9!+7cTLc__P^z0(>AZ|7d#_q|(X z4^P^-pQfn1R`!Bf_`mJv?1e6BRS}RvVlQm=X`}JY&cWxy8u>WbTxYq9N7=7kbe#W< zAjrVtyoeZr;krKj4QN{0{qMEM9fKvielMwCS}9{Y@r#(iWl007EkRx%8Goy_4q1r1 zRzCDCKHI`^usC3zem0uzG*O}z=W>oQ=$A3C{d|Wc*v@IobXgFG&5}6)BE_||+2I@h zpqe9(o#?=G$7~0{9FB}f1X}tfF8a;`ePKz5Hhvs)$9egzr4L1EptXcY=}7y zWtQc-VPY188u!EJ5Aa1Eo9Q(GQyuZ)GY`xgTa!_av zy%6Ry0j%0v=9bGc6d09K*L-cvUzP9=dYnVSw(sLflkKG+6_hFwJ#NUIN&WvX`BLp_2tFo!d#JCz?G>Cj+25@lr z6}SjXrg%rY8Q`or)kR!EVqrl4rE88n2V;J>eQiIW(LDD1-3}r8m0z)8Ga<`JN4Idg zW~rj=3Wbui!OnZvj~yYlthV_6hrFLsGEMWNm6w^p9;Z(-_SzQ?9lp;(r31{fHVuaC zI;WqSeKsT)R@k$zISgpUa{s(@Punz34*pvitaZSs7Tm!`Y z@zJsPZ8D1DviCdZZBEsKZicwksl`)Y)3`?96U@_YMkQlyd-?t4*>x9m#%Qa>UA}A; zEk0%T0u+4G4s7)CP0yY!4#9~|fA=;3t|auq-E$T#0HKaB{rVbca5uvzgln~itiYS! zY#@wKcKwwl0CVp`O7B0iA%HRaa&R{&2=nOe4}bpPUdi(#zGR$C_O3N;TWUGnTpK) zM3Uwtgg|biY|}bLQ)Z0^Kt}?ggM{UCo0Cu!ekZ{Eh+ezN6hLczSaN@Nn>3WlWg96zJt0rK@?P;B{}Tkza>FWU9}tQ?I2O76qAnrSHw0 z4nHb7$k91)(QV6Krp$4u@YsWG_K^;Ieyy*3y_P&~w&+OCLHM67p4UKg%dT5lUkvU{ z)O_C*LnmF|XDL=Yu&cYSo!1_^Nfz!}#$_pu7On?uDxKjy*J-TVT6T3O=9^`jJO}0J zvp*o3{XN&urOK|Zb1-Zo{6<;s64CWVO*}MoX??=)R75@_i$^`FvfA50NX6P=>I(hN zqmw|K>fw%jJRNr$Z5Q*l>KVOyz;&fUx~XGfZmQP)9jCxdMs$u4!1VGd!huFVLtQo+~`WsjvZ6vOS?qEU0ee9=3>lp~D9O`RarnL%mzDRJhUrjUx9 z7;N$FAJyG#_6N#sowg|0-5qJP7-x1=9eS_W^UKtFQac4*r)bH*yOk!4T!N`o0+rmr zl{vwHrZNwDUWa2x%G;*U#k6b_^{RVH!>*vg+A!Q{n>LB(wZP|<9f2g~bX&zik3sPS zbRxVh0^R^8bR0G$HmLXpG!#{w-IvTHk*H0#(+MmmY69z9=4lPuSnV@6w-rCDV0@U) zK3Vy9l) z^n81E1SS9+Cfe3&O1`hX-AEt&e#SN3{$0$J=LAaN)8VbPFyCOymtM9$hDziR)Bhwl zsVzE=4*hAIV>gx11bk2~CmdF}jH}E`5pMH=Pmil^SgCZ%F>eZ0{kVqq@;F`UNT3Kj zTqNx<7I44nxwq+q4nCcwJYz@aje+ay#YvzV=H{xD#{<(%nM1$g|5BPW=89!eU<2_+FZ$l~2qEgVgSa87FeZQycgDHt2lO?AM< z8Fx!PfrA946M>5Ydd;8yEKk%^-sLTv6zpjrVps?J*(kO9D#%EIi(GA92xS>3J8gPb*+{>4{SfRb100Bi}j}hc5)%a|h-zplwQnZ}T zY@}OC(24-R{pZUJwg4m(4P2<2^Q`_81mubwDI@)7c1j0SW_&EGc|37(yI1Hy_V16?ynJbhhnL4LQ5#v?_knS6%-^A$m~|@DNVu6#WyOPG%~wz+u!k_T z#yraa_!{M~2T3+NZ;6zxI9(;fELfHc)vD$?rxrLJzKE^8y_X(6=*(Vh z?p)@-C5`5CS>wMfJJl@uvu)`{1IDe%b8B^tzt#(FN=})1D1N&<#tT=W2Y=Z7jc25f zxFZRY>DnyH5*A7;cp^Ho|BejtK?|c`3C-JB!xUZ~zt~pinFXEiQOX-skggT%jZz3Y zg5$rP6dkyk1lRsjzY!&V4-LL_eEYmxG@gCCD?;)GZqQpg+gR4rH|d@7QWwSjW1?dJ zR<~=%2JYc^Memk8e9J(y^YF(0>k1aaH>9I|k}+Aak#JGKUGDO;_rAeVWY2BM^roz_ z;pGSMwpvoUVJF1s*o!n4MtZOurfJwi_g?DweLnXwDL!U;L1}z-KPv(20RAq$DRqWV zyZX!zX`9?1p#;Kgrw;=HC)?W0S3l-k2I{AheS~v!mYy$#C#5)(J14XzZ7wA)y|i&d zzQG1V+b125Mv)x++vDvt_lzP(wl14mLl!KsJ~D@`*K%QB5#AAkdoNGZNT+%>-z8f9Z1FwA_d=LK%oT*jsy4D2T9%$Hdk@l1q`jkVD#SA2k z@*8W5GY{6;q%-OB+Z$-fx?cDzw%2%9AN=|!l@PAqWb6(q#(2PG@ z)ZXM~D@dNf63-#6?ixS6E^i*{xpYtUAmi;rVlba=)l18pS19)gJXpnozk5I=4_-zx z9fq8O#8)nZtjVv{fFrKmGkoLPZ6XV0>oDx3EH0<;FjPBFK|%;aK&c)Ly=?0?L4BYp zY-hy;;>{4N!$CQdU$~+>J1g3?oZ9xjm%qx;$;EXFU3WRe-%zL^dlPAGrYV)_GHWdv za|9#auN6UeWiuVW96p<`*hr6kehPW70bVE>r&L~&jiA2lTh!XAN7kYzY^{ILiXdtv zqsed#vuReJGG5DKeaJvMQW%>2yW%(uV(e5DXRqk}krPz!+Zt44T$`*A=gt)u8Ka*u zcNk^fACF(s-@fyfYG_1JK^&brZcxkJe}bTDy;NA2g7%QsdtK6arnXpi=A{qJL{hCK zSeG)V!kagMT~Ry6EMB5cQ4=T{pM=l7k%S-j=uQMj%s^GFKZ@T5RE0*z7nF~tO;oP7 zfh*N-GaLa`8tMLv3{Msb^yqA5(sWc^UjPZH=NZ$Sp*}j!>mQL%CQWwmIH=eD%!g+s zN*<$|-Wy%-{i9bs+#uz*n>a4871iDB1E6vn`ooUAL;Zxz9Xx-64FWHf6GsXEE$KRy z;jJ_&<)9TtJxm<&eKe8$#Mkc>8_$JSaj@B5lF%*@luqdhndi z16X?m=G326g~npmzCL%2dwZ@eF}aOtF4&$LTM-NlNZO(g(hSDj2rs$J#5MuT?_?Sb z-+R_(X*HJtDOUv{gf-5C4>awcZLQR79LIGVad^r2;q?cUw?`MNZ7l|Dm<$e;7kt4O zIx7D-&&12vUwr1Rs~$W!Y7~L-uHHp8Pi!GclsYP-m>FqoQwI(&>WSen+3lCjrJ^ar zmR5cwzGRKrlzlNz6#<>dk)Nd!nePT;Ir`Z3U$H(z-1h{BFI)`(q_LZ#A?nCIbmpy} zL0}GQ_NOaNHAU|jy^Q+eRQ+~Vt;k9Xn58&fQ&ddlPyzlx#wwxV$_T<^<;Ulh;kjDH<+@;}AuV{s0B z11{ zgBXROTw?qW3PqsV2F$arj5yKYnymK&=D%UY`7Mrx@s-_=PP7o0iF(5pF5k}tQgp11 zco;BF9;KtQVSfws67MDc{8Bqm{;18RgSNd};~%7-f=<(jU3gI4@wfy{8L26B5zAw} zjP1hDY&)S4^F}CNG%GRo;hIr;&!g^sLsoHPwF=VWI=zZnOL49U<7ziv%{)+wTl-GN z+}9Vg29alb2>w}Hm%4%kf^-#~ebUw5N&IrhEOod&#j^h42!zLkOB=zoyEgI}`kMVU zHda!s{`*UFPp-CNdY-@v&*C4vp}p)qOjtQrLOD**G28Vup4jhk*QFshueNnB(xmE`(DOjkR_JAW<$JTkXgROi68kbh3 zv7An^41I48MLyz(&u8lQn+t;wXVJPwB3HxlF=ImQLu&k0qPULa)@3b_@wuan!(%9I zbTfLppv-PUtf)qCuKm+vIuL0ZAYFqL7r&TtM*E?)7}X5Xa}z}vs!aBMhat=usf*om z(dB-3E&IspPxsBGA+xDGHYR#_n`b+E9%jCEpqhn?I9L35!_*<>yK5eYUEk=%H&e_Sb^bSl z%$|SmNQ9ZWcft)Loo>0FEbE?eqD>P1g&w`}xO^1#rMF%)+na947&>+Y8BzKFUZGKy7bjyDE5Wy?pZ{=q4>^IWoGU&e`}KN+ z-nX4mtOJutM8PdJzfDLJjyMPJ-G9`%QwS4((+P|wV-HWB25AWFtL&>8y!{w#_{(aa zcCKo3<;UaRzot#b0+sw!wXLpfuUm~tr2o?ST zz0TlJKNRJPzxQ)@?PN?sUh64A%T=c z%Orkvf$MM7*4%BngQe^=?D_{J4B~2eIX$-W5`$lwvF*!qF;73UL5F1rm@LmFgW679 zu#qWg)!TM<(MBt`+=l!Olo(xOlJ)iLFjkG^B)B{10uh0X2J#pSRkN?czD0JZ4fo9U z^5uamxmtAk_p%_}>Zn?)Wg1jR#~Qz3J^WFLn1~UCbzVn_?rv{iCS}4utp&7%`Z(7hVnafKW=CWdyT33-NtDIu6NSQNnq^2&3HV-7 z+;6-G0X?vy+_sr-oSaRI6U{(~jfM!@u$Dx-!qTI87vO02skN6km;iZCEpl0#WF%||!^@9q~H<`Tccbq;P(kgpl zVffDP@Z0(^9#5o7xDChWu6SRRR+sWWX|bjzt($w52GlxyX9_;2yd|q}ky@-@G7C0e ztioh6on8vsy0^LlC+~oc0x^T_pPaPN_l3;@xS4DMs~2+^1&5i;4B_)o=Dcpbk01WC z63V-k(vF19I(iruLtnm00;NEDLxAelGJ78p_keK8a}EC>Vguwt;fC%CTnnJjI?wU8 zMCTANd(W8&u zM&25~-DT=nYSZa_DkHi}o3_Y@d;UCzmZ!s$Qi(M2IG+R{T2m@>taMqw6^3#q7jLO# z%|;ad#7Xq@)?zPa~2f$PCQTA8W9bNbi=jwZNoC?A2jIA94CH`1?Tc4+S%k)>H4J$yD6a-YrIV zkA2FU^D)F~^&QJL7bIdZV;yNaoSRLuL+&o<8KlmGq3`f1(rQ<_$ggqlA%i3h8v#CH zt^0=}=gv<{tQgs8c z9M&Q7-y+a8;Qa3*WUoshRowU=DJ4>hmJyXZ9H&BIFa{{kteE$Q_!gEIx;#0A5<-V# zGNxp58qme0(--b0&b^o<-+r4~S%1pdy+N~B&lf=$xo~mmH_tTs<_b{0Y2->grJ?_n z5nDV!`%(6K;lff3fe*&=`36glrl`WM*5yQ~b#9MMgMBi+z)m#Mjo3xS@zI>zx70}| zV@?%MEwPDiNLCR)ydwDhn#$rasO>RBVEH_Evt;1_HDZe}NBwS?-Mnwvhxm(R6Bs;5 zTz7Ab{bJFpv|DnvZvBbPph3EM|8eW%vgwpjP!{a;PFJ2!p1{6*KdmlHmsnk5ilP`gJDc#~oumxqneaJ38Rl%Nu^I))5fj|0+D~V+( zjd>b5MxQ2Sk^u%~vd12Y1xhH~(est^<`j7^3f(whqPC#*RBJr-0DC@A+5v`oWb%K_ zac25r#Eak9?)UDW!>QwtIKa`7=!;-N^>i&<1`aoD#rm2|EnW1 zam!NX{X%NXO_;%q^oS#JbDUKkicf_ihjkZaF#(TXpLYb_SS>D@!mxVf06eDz7-8As z9<*sg!P=Dv+dORj)f229uTIY$bV61_7>|7g-Z5$8l4O@H2IOxhIW(N2>B6G51LjU1 zq-p=LxLQ|Sd5vRY|JXcX4PhS8`$z41A@qDW0Ku1UX5+tBY^l|2$b?1D9Q-MD{D*mb z7;nUL{rJdalOLBp)QpF<5^oL5CpC@-yY)Qx<|H&JRj$l`SYEo&Hi9PcmP)mNysLRz zXp|ZD=XJV*i5x4NE@x8vm>3&5XR&&;G1YQs<&>q?qUSN0GQkx&KK~_RcgH2GWv4rH zy<2GF9pDlW~@E?lHS2!$>i_ZvCfsi$`c;G!ieUj^j6s|zy zX!?wnoexy+i_iOxZtUx+#_3$|MVPt!|AO=5FOZHFcFuqP%-@VKGx&co8v?z1JTIZn zi^=<}+c5xQw4wQJImFK206WDjl~7x|EYtNldH zsX~}9n9D$&`ubcgVJr+7KQZU6-#fk7tVV&a-&D%I+tG>7Tj3fN;?Xu_H~YbElg#~E z;fk&bXnox`*`q48VbXM0x-PiP!7Cq+IdXYX_5LLF*G>Mdb>np2or|7q7y_x4@ib(0 zmTD%_mj$y858F0rHyEfj61!~8@$FqN>8&X%wXD(=;{2R=md5VD`%bj*<^6lREKJVC z?v_d-T4ZfCdfrST--mf{K~8fmxbdlB%9}@Mn>_%!?p$*H3uM+?Z}&i_hW)Tli_yqU z<-$kI-_@O0FJmG!@kAPR6g9wN92bWt;?iWc@kqMzLA+5xw2#=69jje2QjwAIAoxb= zh?kK6oxwylJ%^e( z(6Ykyd(}MGCEBTe_0u-xpX1QC3_Z)lsUMa0`2{0W5Z%A5NPAK6JnS&FbD)HszFh`4 zQt9?Q2(VFy7#Bes(cf|-(t!`Xu$9x)g8Lnq;NpjiG$HUWv8vPK&PCO%Y4ZX$+Q|0O z{5Nn@+1T=-&BJ-1s5m264Hn_1oNoOBTY6c+7b22>v>4E@>`M9DQir^ADdMk_OL6}B z&;{d*G9*~;&p_2Mx2&BMmv5^<*0liYTauG8oA2@WFd1V3%Vx2tbqs&Vn^My;AOp|0f!)~jek_rE@c4^NMzlDmt)9?L{%1I1s;_IH@QjyHvmSSoezmLg zJ8AQB(e}DG;kHYmdcG&q)_ygjcvs1a;sFNNoXETUP%IR=65=J7b*YMvj;|xFr^)Yv zt;4KmCo~1)59YZT97P9*t&uvmrk#nYz%`F#qdHWbzmuX#5Z8VMGgaayp`d89upBp^ zEsy_FHXkDuulS{JSO_l(dZzW>=GrL&2F%FYo60#RJUuDox-POg8j1TET%njJ+*e*bUJtRVSPUo(&_3`%Qk($7c>6F<>a@fuJP2O`AIOBqwE% z{oohMYNm||u4GvN3Dr0|S3}u#g~|`uuwh#FYXX`ue?C@Sb);GuJ8%15mRl;5GA$-# z=2k4~eK!-8LaoVZ)ow2MT5wN2Hy#FW0`=(9hHzV+q}smcU-V-ay*->G-8T&_eIyV~q-FGuo;FHCt?TStha zfcqSR>#X+H`FkzkAcY|ga*lK;zW%6MK=Wc2gpP$H#eAG@u~>ARIDeF9 zNrp;Dm?2XT|J@pN-*G&RftRb!VRi1k@7-Knt%oyGOx2f?_iOL0F(o4}$!^CRVD?Ai zbG?=@fF6xQYj8!>BiU{5SB#tA_g4a&KEDeYv84ff&YjA&LqmfvA2kYtB zEf7$+WlRY=5M5V&oXTRK(-^R9O-7pGkb+f3=&@4e$(QVzGmEh?6lue zdoTA8X4@#WrRLfFh(aA}q;A~Rh6D|vd>WI>({(E?F9>1J1<`o$)re53Dng1g_CzTJ z-3@bdr0%iKFVCifWJOqCEm`Vjp_L5c*kHhiclSs*5&m~sLBN5bDv;7RN7U$wY2O?= zgD^GX!omO}?Agw4Cha$kpD)lnI_UK+U*OA>Pwmd5D+Z$+vasphBDlLzv)s9TI=6kE zpJu8y=AehP5g`8hwQJDw+tGgc1u#5Re&|Bm!=s+YtKeZIShff{F@29K2QOSoGn*(v zF_qizryO<0bH!ynbELC)35?sWRi`c8&($V!AF<1c5!KokOp9`AwCV>#ySOlK6!Aa3Pmp4F z(mFH`bCbxbihSA4*fDhA>f@2dZre) zzN^E6ZpV966LA`<_{Y*XlDtiM`u>}U{tM=Xu->^21;&Mpo9Oe~&Z*rSJ0JiOInWy> zUOhyKS%p7@$a>uT4Weq))#RiVcEu8TlxunDn_-2e)T8Ho;zXULJ^JIu&FkazMpNdN z!9%~G5t||=oV@u1gF}27YpyF zUT3cP|D1Z56+l*N6!^Cc!{@V|54UBXep3-yMv20F!AM`K9{NYID#4D68(QSR?+0$A zh9m6V?El7&G_1c*@^KbCy0p*9jh;UI4p&~9|9xbyx$-Ea)i=z6gTYYT0`}8r%0gw7 zp52|3Pp52mBvuJ%N46N>)V6oE`WxB#b*s~xffz3MeM4zd#|2m~ z-5nNibl)vcOhtWS=t0X#Q|BveFANG96paKav=kO$j-1i)Pi@m*JAwaJJ;E*kcSBFU zE!bQ;?5%|iCHqJQfa@7^M}_tTCun1zbXd&WZtYf)qKSA$18#A3np{P!Wj`&(ZaaP8 zCtee=_}bAfN@=|PNj9&csY&ZMRlC~*Lc>E(&7tLAqZ zoSRJyUi$QO)Ood}!-6N1-0)yEQt=N)9ecZZov?z;I9Fs>8U-;xwmY7DMNN2IAK9s7WpxnYVAt=LD<7KY`Kwt2>{bro%b5^r0v9JhWuY zpPW>a`MQ@WBKwK6SxYD8QB%s5{LpcFJ1$@6#$9^wa#{`B-~*=C9sxtP3)>-I(Vj4l z5qp+3OUs3@1f!k{Ukfbl#P4-De1FL(VUL*e$ZE5yzSWGu`&wEnL6xn>)l(Fj@X-H_ zEY+7X&47o8P;m-`eg4)AHik6rYT72-j4PuLtCTGWx=q}+0cKzfAv#`6d)IkXK8m}p zu->Gc)8d4~3Al;*;2j3vKNpn48wguFbf`3*c6eIqRW&PETzfnlNj^bL4d`gMYFPV{WI1?U3iw@E_*ddZN~Yp@ zJ>32AMP-4hY#33Eg!=AC=s#-caLRL*9Edh?U0cP0{G^B%GgJY{Br2 zmP2KAQGm6LFvZVdf!k)*P^TOWh4mgja{D>Ac%}J;#%jB#Ab1{odqm%mwy_TTT}+$P zs~cBGgW=0iH1#*4ok(P@b|oCta{r8<|GINX0wN#sA~Q2lKbLahK3e!bK62>h>fbT7 z(u&8<_e~y*$8rJ_Lwc(s77+9y1y5ifX#9_6Z@$CNKKsuEHNd&lXxo=xi2$JhoFAiw zcu)JD0wJFYgjBc`vw5h=1!J}**S=h1K_4bAjno$n!%O_v$_DO)EEp}CxafNS9>is7 z&2tXUb%hhv{lZ^+0lREg;ds2<33hxx5#p_apy!DNYMc%w5+VYGQ|7Yf33W!0Y&`a` zJ`|VrNVnhE1d%}6Cz9YvW+z$LktJY}5@FG`5kV{$)g69rCyrW{3dHs>mFIYgA3G!j z$Zt988D}P7d!3KyM^p2KzLmKDyS#Aw#ov32GvLuV&8Yvr+_sEF{v$*n!Zq7{ ztW?<vW#XH_Av%>O0)(3FpN%*ez zTw}Ei=2nhSJ}U3&G0z|tEHWhQyh^TVWQzZ$(b)ZjY|CY_S2mesd<;+Qjml&3b+dE? z@qMZX4nh&jtuY^yclb}-xd;LVcH@JA6XkZ zvtUvWS07E8=4|ZnSmIa;a?=&8MTyc?ztJelx< zWGc3vZO=moTPJy{@8A2r``FUwMm4n60{sUq*dAp40KM{i8>GU!hh2*?Seg=KE{=@? z$hhit3Ts}%oAcfCKJ|nAYthXV< zcEg8sf`BJ>8CKITOB|Jyjr!gO3eYdqRd-`HuI^WoLcfilI~fwn={CfNVKoFK;1CkR zf$uP=!no4$nL-)VFdoTYc&^_ABxKgQn4}R{TI!YLak3HBNo-=c0Y33tgv4I8j@ z-bx|`A~RJJK^p?!?Cq8kPe?2n9Kd@FDvDWOo1$busB7K|DH>>FdGfHiEeEV0;|m;6Kaf9tb{!XdWyECGl30r8R-F# z%6e$KXc3EJx`2vd)!!zLRb4q({TK;cY`0AE#?y_w`|lh}`2g%a{^?uvN^5Aa=1(5E z+_`|)<43j4k&&Eu*VSD*w7ct+Po;kZ*54sl0Za-najb?{WTei|HU)F(*il1qgrwj% zhUv`c<*|tyask@pKHV0?Hy8>?2DWFzfkfF`7==nJD9V}bKog>1R-{^?X9NT3SMM}Y<>hIg7Yf{C1 zCpS3Nsgi4uccq!}m<<5N%fk7HLwqY}RA# za+cQ#^|0iShl*K*Ncx>(GRnD8N)CPKm#3>+_i587tdRgxSA+k2H&fe&?qH2_6miV# z1PZEw0;Aum(yGV3^}D`BgPRqBVEotHR|~Jf-@F{VbC zK}w-G#WlFQdvJGmhu{`mk~jBr|Lnv6)B7dYl}s|TXYD;_Cj0EOmJz=%Ebiqj6hsk#Wf zPxVUo^D_Z`sO+e=Ms5S1?01gyZY#-@yN@=%aocp6E8&Y~9+)QC)Qt80o3y{*~`%e|WI7iuPlJzVm8ki`!2*CIy?b#)4!EA2-sc`CZH67_EX zS`QzAInMY?T=pw2%MF-a%N*(9em910js?Ot;45i;aYH#B7rFDIMy10U-3~h zb=v|$SEU7QB4hK7Eaul z2VbM#eWE>*QNN8c?whuO?y{*4l!VypXI z;T6g?#jm?%_jT)O6Fl2xJX&fZ$pdO?u1Radw84Ji5EF>mkxZcF<6fY9n!}(|3Uq%LQF#v-NlifXctc!oY>LH=7*KD)vGbTUO;57{R z1AXrV@t_!c9F|Od1-y-C)}#Wjl332MJ&8tDZM5(JHy`JBouSk{u=?g=W{tqctba}g zr)6*eJ7|1jE9z;r*)_5LdHS->BhGJq?q!bl&wR;AjJf&Y>QRmLys#`}bEK@}O!+*k!pp;Cre5W%%TZ zb@lO)KyXx{fEWufG^CX&O@*@7<`Em&9n;U6Fx@RU#wr`gkvKoAYVKgMpjes)mK&^U z7W|sgn$&Txu4s^;jJ5MkKaVuXeB<)MI1UA8U#*F{0A;Xr7CA+_Pz`~$^!C5V8l#M# zA{5E1SngE3zl2aPaQrhS$-iUzu5Wy8CB$aABc<;V@Up!ou4T3y;MyfI-evm`r|c$h zndZH_pdugr@P=X}AsD}zbvLvGzuOD^<%2$$s})1w|$FYR#)f*z>GS7Y1G&wc+#dKu^0nfIsTT z9UsrE4l;1^Cx6MUtG!dv7)N5iiSy4^&{jZ%#5LQ5krEkG^Zz!5A#HDM&D7am?l421 zpJzkabBIZKim9)3{FiB@Dmf9~-1m)}Ma)jrG;`lc2Udx7NL1Jf|edZ7f$>cOYT60k45czT&nLBFj610Dn zQ<{PL;F6b1hp@>7BlIKhMX{i1N2-mq-Dn_q(BbT@snFAAc3t_nlYVNDT}OWg;=vyk zey*+hr%ti4nXh5tyGVE9v$G6X-$=6tQKKk@InZA zGOQEJmHsl_R;{wCcNn?Qb<&K~pf_eOPP?U$#%LgH4*JIP>%Wd|qXZYL9L{m`NTX-H zqN`2EO9K+0@VdD3ci6+03P0Sv8e9_MRB@5Rdg*KE$IakEK-FO)lWAa29Eh5s3B>4T z_BM$|jz}Z&#YXR4w5Jt94Z?q+*4AU~V!HR`3d&Ym6aL+tWd8^c>Pr~zpzLxobW-cw z|8gj_0j0dZ8Pb5-(@tFJNxZL_5RmZ?_Oyir{YnJ#xf?$gwOGCjkeK%9V9eV2sH6J# zj~;$%x-_ZD<-jMTo6_F;U$_39oBA)8siK8)G*F6q@`#4m=&UXM+!q&Z{;BEju#QBO z9H(`q@Q1^az4-qXMuh%P(GsHjxE?v{_I-z1hN0n0l8BfK+e3r`$&9~_f_NZt&V;0L zXzn@jq#xP`5?b7Colv7rE8YS}wgT?uF5-edolnT*9U-1c`-inwsnfi~0pAFN8bZ)` z#7D#^h@O0|TDKqO=YJ_nDRaTQk%u@FoL^h`!AXwn>GV6PB96BkInI<1G?a{t{9EYk zh1%mU*SX=*a_J;_q;Upp5K;swC}$+xgYoDqc&<+r4SVn4J9h+ZMx-_piH92}^qI#gR!>aG_uyc&ES4*m2U3|MJA>P233t&#brj ze)joYHRIhi|8jTsSM52Y?ig*ULRu#Jf`6f@7x^;MeiD`h52V7lP{cR{dOTmSE^yY8 zhkPT#RgTqA-^9F4oV^=-Q3V~pj)cT4T=(BdpC8w>ZI{(Fe>b1asnlNj2btn){@QiI z2bAPS!ScZ`V0QOm*LZ%-k6O4NqI*ppVsK2uM-QBSWfDqnvS+`I@o#y1AdoL?d ztw1aZ=9I2~JV{gR!^u@!!*`*YdokM1ra9Uxrc z$R2YD=x| z*=f=CG9Oq6!r*Vu07lSxs_2sZ+Hj)p+8633*3K45$PJ}GhT@8f+teVSGYK%+=UgAD zZyoTl`!h=3W#p=RgW_v*uh~H3qj41VQT1-9qC5}?I2OUIXyYsZ!MxL_a`e5f*!SSz zZupkEC0yruy$x!SBI1}_{4$-`wBr53J7~fU*b#>*Nw^oL_>1^rq z&h{_Q=9(-2-35EVoW7VwPI2oSS_bh~*CDVJXj@;0JwK|Z*4t*SWC=R&Dc@i262*R& z8~*H{M`RoGA7Bm^&A$lr10m1gc7x+D=|ZmentgyaI>G}W>-L zCx^_hBI7q3S#M^O8a5^e)$4xDMWiu%wqM&u$JUOTUBoUYl5nHgtYp2&% z?D!*ZweMqBJ~LX)H2>D1H$i8@?-Y0)6hvv-N|FjZzS|~DtXV02T_--ma-k6}LFSh@ zEp0DD-eDjJ-x2T)Ers;XZ_El`&Ho0T_#to@qEIDPu8t3lP;TYkGw()R5^d*i9NUf}5& z0AmTRdf#kF4mqzCg2Q0#rzT$(&lV>*yc_v`FfKY;oIgwdY}NHN-)uBx&_NPHT?+?2 zJc72v5cGu-8}%Q5Z;ZkLrUsEf`8|77y9j%t4MoKE_~Y%CrwWXXOhHn7_FeWkV|&4? z=nr?%?KsYN-;(|Bl$LKJ1kC_@KsF350{EY6&ia`@Z*+iF*CdD+==gK|m%(eu2Qj|- zSJcKL5xDUe=q%McoZ|N^nt89a=#Jtp&g_pLwuMqt7p`yr>H)W{)HII}Ebhd%3WE;{ zLFl;cDW>vrLuUZvqje#ns%zlj96Sj2Oz_VL<|25)a3jb%@D2t<7vtkIo?;OiBWw5$ zY{Jr&-en=mX4B$!{IFqCe zIvXqh?(=e!vu3dYTmrf&5ZNn=kY~S|Rss}(5~t0_Cmzq*WmmvAEn7p_&rp z=aepVJMreW+sexE{9UI;iz4NN>xw|;_uu?|SfzMxS}<^EI9#7NpRn0QuL_(2@3HD7fNX%P<-&gJ#pQt&Voea0#WR6zj`QPw>dmk7=R z!H%isPY3eWcSikyE>P8Vl!C0y^SvT#2|<-U+Y$BC6 zpH9~)SP?goE_o4pK#n4KU91NuPQqN?XHD&Hk`|aj=ruJI>@H56R9kgjAVPB*&F2Q| zWPhNdbA$E5V3*ILpnyl*rU%{QGwS(zy%sxmcD?F22AYzD#2sUd-+P0xvZxBnwRT@o zF$ptq?hLgm^xFyzI|G;u9K&yp*S;Q1<@M`07roI1C_XvXG`k)dk>a=>-{D{HPwHVG zu$V0JS*|3>$FoVd+??jift9lVfR)s0zqmg-+#D~emAZ2!%|g10rVGxwhF%wIRyon9 z2pThp5hf*D%@zk;V((gvrqo^WBGu!Fl_qi-GaSy9y={BC9F{le{*7Jqf!j8bT~E;% zNF^m3MWuSznPgmJ7IidzV&L?AaY`cGZ43v`0a-X4kEl3z4cExP+#@EpxJ+;Dlk{i; zh{aKw<>faeF#@*K%y5>Rh*9kWG}Ajc{%z1kH1HjQ(g3*JW^j-WkPF^FXK?%!mJWo^ zoc(k0Y=J%T>jPaPbmHX2`^qX!L&5Jkto@}%zZ;Af1q~JsM1y(Ed zmN{eCMUK`L@Eb~(K6X%qcjUP>3?Tw6Vmc7*pZ_IYzRqe!c|AB;%veahLYq=VPOV(m z!(=ovS7d(oBIy3%mRkHI!z#EM-CVsXkZjsKDfV%vNHy+_Q2dM6yMyVe#B92s1)X=f zh1#Dk6|7d9@tY8zIQTr51Z<+S)KWQ<;=?r_$^Y6F|qHKT;m z`ke2r>G*n|*9v)PCMuqPIsTUGY_Ipo+_VK0mVOgYxtt`7n9gYXU?VoDT?zI373#{D zNe1-jlwWP>>ue$_AhSCnmj0oL#xu7y^&RtDTFxM!+qty@&HT)zq~{QNTCuQukRCGU zQ_wYM`;0Y*NmWqhCx$(E65RNR6A`}UfJXCIEA@A4P=y=x)%K31N>;*vP(5Is>WJ9R zZH4FcAq$zI{DzFaH-r*@xs#9L6h5rXlI5JJSS+UFbYt94z2+FpC1UWA@TzLHVbcL_ zUAp)2F?yRC3TP#Of?Gn485;YJ+p+uSK$qen3Of{Y?~ni4Id1`OaAN}#2gQxnI}&~1 zif?$6fZ9={HqS-Fx@dKBmXvhp$`^!6X&#T| zx9OBw>o;H8I+ckw#=yMX9_X)0GnJ*hxKm)rlfEIC=0qCnS)uy>cx|<<5UJxIjQLx-m|O$!{E;s(o`u_EmS6z z)VV>GtP=1ic==HU=E{dN(Wokw#s+flm)Z3g@IM>&7VUoMY?)6U}$QtAqR(r<)Cj$!)HJU;GoU;$k zBqfv9ehUwQtQ)@;9zH5`p&1>G53|fd@VxrlhPRv{NEPrZsFL8YzBYNJ9&jTV2)_=l zF-EK6t~YLu=o^?^@Q<>8CT6rqUmdIBd?m&ERKA2o|^Ne2Cq(V4w)yg%bk+xp|C z`2q**V}F1?v8?@{zkufw1{Qkxk|N<1F04LdQC=~h#*DLP0}dSLiOHR2L~zR z|JZK6zRSNbEsnHncCjR+b~Ce{Ddh5jKmRqY_Q<~FM~rIjGB>eDT^9f0_l274 zHmJ0fN(B`JgA@`P`Lko@a8L$t#CNAE?DqwwssZD-cLAk1ZA-n8fR(({*acho`8{ofdC$RJbf8?-APQ)-FG#i(>()lrm?*GN$Zqe2{1bBwO#I$ibOo~yYI^Zh@4_;iTz zuN5C}9;r8}WP0WR!nAI1Mg@PRRD@Fv(CwCuws1z1MT3Vl#W&`ba{Jg@Ot;Q}nR_;0 zrQ)5fZc4S7e3Z%DgFSB#zADxGOr2xUw3Z?irk-k1soC4hY{*go_Ws%kg2C)QTbVfd zj++i-+=)V!972kfcn;MRVoq(z`^&w&U!#9QOL3A-Go{0bc`VLpD2CMbq^yXwN=$kH z4zQysZ9SDpqCZ)1Kc8LJ8*$7%5GK9dy(y-(s`g6zt`Ui?YO>sI4xTjY01gFRZNTsSq|88bSbN_bz_6KrmE-4gaqW2K}6YgxPQo%-(Dsw1WUY+s13&;8cAD!|2k zwmLJ7s2-6l@ihKAl<@s?Br(OCi~bNUp;~6K3`|0%4SgqFAB%SKJEA(9#c#>?u1^(e z1#XmioZ3%CR&|FwK1)19-n;QGcN4S&f-Z+Mmg@Y8@jTWyq}WsyXD&Y-d&a6;zl1oRGNpVSTEYe zu_rM!_~<$O<$+|m?-+e$6es+CT~Ef-wXn@T%e{mRbIDi$cPAEtW6uS*1~dZR4athh zHkt6KOi*=u(11jKx`*8xJkss~>quwFFW3p^MJ=*3MSnxZ#XL^WiFJTWxQL?^aiBI` zd(c)h`d7l*eg~=n#wbNyUF=c-dLDj0=}BmR|2}g>zzwM`llI0O<2dD-(GjA{df2Ry zvUN{`vhHo>A4KaI^8HRzw~o!TC7@>xr2X0|;*L1M3d2Q!m7aV4KDS(Z=9hP-XsCvo#pn^mDTKjN!WL_rZ#+%p=eenS70_3-IfpMXiaqP1ED}NZZ z2W>Kzu%PhBAS{di>RuP4GktjwJ(lTMa+p&hyHc_z5Nw@Qcu`&nwp@9)8)r7 z5m?K7X>J>@&eVZaKJ&9y{tQ3R#TT^`m3Bu}ae;eQIQq4kvX)N*L zb{lioNsz)!1OY3a{#oIJUd4ALzH@#W(^%YWYT$3PoJl>FAu7A=t1bTL?QLP8a_h>qTH9Qi1mEf{8o8WOAAFdEVQOeM^8*v5VBE z^+ASbgU36noM3`|10HxK6u31WW;y+^N?MjPX8}8^NQlu1zMi@RjI?tRBe~m&f}ddq zHwAoU*FyEk4NmasEK&0n`3*M!Qp4gqwnz%WZ@~Aje}kWIbgX6?9jg)Y+tOiM$Yv!6 zDb`L?icfk2sW>Z#-(HpJ)P3_`u6*R=+~)J#sP_ol<8hzBv*@+z6$*6=)k2q#=xlgQ zodccuP6{iFO&3G?7jMa$KR?pu;p@6uRlW{x5fT(Wq!}ob?A5d6QBgt>8({LJDC;`` zcC}}C?xRS~FaEv&Pc)h@DNh}I#3)YBOY2oo38a=)X&%uG>|% z2({XVx$%C+_N9>Wu$N88Hx-(K4f=Q;^fTa2pYAI7cut!&r;&$~Dl{3++r(_qG_6U+ zronR-%~#9OjQh|(sMMPrJ6G=2=wMEfp=8C&>;)au<@Bp&EDuSi14`NnZ?pz`M}Whn zWIRuskMTO^v2uAjwfW@sz>e+i)out&91sQjV^et|br2 zHgMc?C<7G;a7Qa9#PgNGjvOnM; zh~4X+lUkgX;Kkf*<}B(hBpDooww|X8q&SART1W`_+=U3g3#SA!KaPEszSy&0hF5>H z6pBNn+h&^*Pv3s%Nq>NY?o+!r9@aa_DZkacJ6UdgBRiSJ?M|arHoa_!1p>#I(*zwU z9XOn@3e5O$Jz6<-V&6Qd!v3zV?*r9YUa}eu?Jy_Kv^Tv;BySK&1YTT#xP1lN5Geqc zn-MLcVDX(%jP{$ya!S1nG9XXT9lWDGhi4=956{UhQLWGN%a3)w>j3En^;VV0G_kgo zwMjb0{i6J5i`}hHu5-*hvS$t((=>(`W00>0kzVqi)kr}LB=`-dWF|QxNq?&KB;kyk zYXuKw6_Xb^jDjXV_5kMv=0!cWsJ+?tu-}6@NKUd9t9U`EpLlO3UWK56G^QOPV22wS ztLFg-=BPF~Uxg&6TN(O(#WEnw3!<4p5#ZMvf3Q>h6(Kwpkvg4RZQzQU5F>@SvqdQ; zoKUwB7$W|3M#?y8<6FENoo& z6~J;AUuc7VsIy)P#^lXG1%YHd{&ciwcTqIo>x>{Nbn zG-w&D28H0_wJo@PSk*7X5WU(zA$@J;|3i1+8Z0|Md9|tuoDv4{<}JA@l>BM@Sjo8C zdHO{3Zl#IF@)6|v7=(74@hcoWyid`0*Fi_LKWg_KU8Njgt!bAd0eV)fF)z)@305La z#lrV0TKFr8a!f7k8LDq38oehjV|#iX1u7>x}9_U8J~IjoMJF>aZBhG zEsTu&Luzng-i8?jPI~j0x_zr0sJ*&BlI%QGMvZGHr*5_K&gPWssyT7VSGne}zybD* zzTd%KQLoQGiko!L!)-ThKHPZ*oW2h-j9+W>1f5uNzW?QyN zR6?G$hO3MLLu={9;)%8zmdQJS+~LfkTj?;3_Y`?Al0`st9#OF_zw3IocINqmik(vQ zRKWf5VC+o8%&3dcsBqz)*Xq8wH&TR!8YmPn^{aRzO8x8#xrVjjc)8->HW4%}t3_ZP}3T`&xMzaT`wBV?ZCEvKNP(kx3KHZ|qb_<{tG-HECtJx=ZerIxb) z(?uw8C+T;VErHG0Ii|S|bN=0|>3&7!W%`nX%M$0%xc=SP8X;n)5#P9Fg526J8||-n z)wN%!W?Ns`KG-5vCs*XIG{pBx#k213-%zql=w^C;ZQho8{uE2i!bZmJcT=h-Re(d4 zTL%u8_`uEU`g!M=jA@HNTU+CJnMYz^ikz6b>+t)3JcEmEZVJ>XVAyRuV6YN%jjU`b z+au7j46?Kt?zepY1u8nUPFGK8Z)$nPE!puOMCa67eKx%xPGK!f<-^Nrtp^0PX2+jX z7GPw>!aKsvS#x?uYXw*H=GwG19=QtB^MF&Am1YC%Vf%L;j}pl5Sj(~=@&2!WChhJ{ zG?zqdxz}g}6j)_(3f`xsVxQB^{ zIV*@?5}8d~Ap#UTdbM1-PJHwF|KEp1PP(olcv61}<{Pisf79fE4zGVHX=_%!T5&r? zd2OJOt(zZ|xl+OI7rk=kGTlr=X{4o{;BD87h@S&*^3k!6U(5(24fX=W%p;M zU|3pXs2RK|WsGJ*l*(Dy_3*Vcie1@#Fzm}?oHiv^xY_z`4q;ctE{}iIMF{Oji>q^w zTFVn0&s-066g%qI0<+(Y{2?{)QBBn%&3Udk23xDOfo z3yQq273(*t+>WTD>Gf#17!>hDo`*eJ-0cxb4G{-w`jtrT?q~u~$m+h|uNxH+1WPTddcazo_%$TFYMOIZ87u zH9fyLkt9AmQOBBoOJUN}ClO8UP|9DnEGLUYU9b++3VrNV;{mFUcTjX3*BHC9DKleN zhi0D3v5w@=qR=|Gc#I-dL3La zUP!#(jQyX>O1>!;L7_)mRMGQW+==c1?{on-F4xmLN*|FzdiAoRKzm=U;5GLf9g{Q; zwe7?s>Z5B^yVaMW$5>_PlWHOx11=tDC~Wr9`L;h&0Oq1!VXn-S0N=lde1+oaVxB#z ziDVXdyH9h2dFTX?bm*VsXvbMQv2DQtIP0Kq_gmbWy8;4 z&zkc}yjJ7F&nhfg*z=9Od|qR^#menzb zb(FF0^X0uM_}==x<>kUu}JA zv|-j)3t9KUO!oV4Ph34oz;Y2k77^m?t!P~*A4io@qWLmAPyg2@3~|u6pxLK}u~g&K zFu%I+m~hF4NeTT5dzzQIb-l>vxr!Ama3xRZ?hEOkbm}Nv`UaVkQT&e0kik6~V=lFV zh#79r^#+qS5V}&~W%KOm*jGla?fxjya|DroJPIx{RJh~2hDNQdZ+EW$Gf#{XD#@Bp z=Ha>SN#mFh&3+EwLzBD-gjsVRA;;^L66uqt3B&G}l>gn0r~yM+j6Y?d)*Q4UcDLg~VdL*@^TK z3&9aw35*f^;egB%=VIo2m2rR`ajXYkOP<|EHs3x23$|!(l7}8q8_34Akw;%Nr#Oi< z_frdMD9wo@eYOrmHyp)JS7AEz%pV(F%pxacZbkbL;ukVI%Qi_y$1D*(&ma9_O8X`t z)9laKKwM7CPeBmzSV`nABmbVp$xOL@W3h-9@+%tKiNe|W|Hn^A<^BsaL?Py(_5TeQ zb}tF#|DbipHCv+TT&!79K0_Y*VhSxn+h-JiFl1Q^-QFTub&%lIrKb;Kgfd2omzMu_ zz%VGWm!v9m1|2Kj)v5EXbU6?hvDAXsV0Z-xc<(ntcMAw8TYb-t7hNZcRI|cV_nL2T z^ofXxXP4%1C`C@aZ(!fed;fk4V;q!VC|7M;uvTOq*v2(DlQhB7!RIT~%Kj+xEr9gj zIKf6U9R0hqqLLq*0{?rT>P->D@?TBYmRCO!VbDjjn=?i?TLr6}#?XxE|A7du>(s03 zq`YSWjpgcmtkLW)m)dWQ>fZS8?! z*#8Al9w)}X;r||eS2_-K6nBQ?N)_ynBJTCHz+BK92of6SJA#DYxz$5==zQXiAML{( z(5_+K9#5T0X^JO-Kx5AwLiI_m6ypU=)pQnqaPb+5Ue==>aQI-_xRsIIZ;Suf>ER}A z;|r*^ORL2yRS2%IdAHCIR`1gei)UZZj{tTaW0&)dUQ9Q9oV~7aibinB@A4-{bxt$CF}N@+$oO=yNI0 z3!l1Q5+<_ul%w3X1eR86G+>~|{~H}4ziU(Ig^d5b=pap000B6IML=gW**@{527CMc zN4&E?^=)@k3c9i4&yU9++d&^RbYtE_K5Tss!Xye1Qv@)YZw3UVpv}cHDzQ-XuWQ#^ zQT~vzR&bJ!=LjOzV?p=Vp^maG^MLM|jZnq@>*pjon*XwNUu?X^*_^+fiET>e`!#~M z>%UFG7uO!wMd@K1XxvRR^x7^LDKKlrQc$djmZ%ahf%I3Sd3{tvP0HwfCr~Op6W0?V z;JWuZ6xh*DH4`)n-^oJ0lCDaPW}c0)vxjr13p@Trn{7-lXElhi0zL&LgGLmE$FB(6 zfQv>0Ps^d<%T;j=@8szVN9}?0Q}kD(qP$G(`XHdwE`vNPFH4q#)ja1ogxw>C5t_kg(NX z<`)P&B(O;KyPZki?4zgyG{Ts3-t@zF{Ga1OrciZMlrHaJZE+2f(lbnDGJ*(TNr&$m z(iMm&6BcWfWt|1nR~pdkzEtEMZ?Io{$BNH8b{tNk5Q-#hxA0{q;%-Qx-r)+&Lkls_X z$LaDA9ddvC^~H0_D3UkAV`yAjie$0s44N!v0|_Ul4w zM5gng?c(&2NiYA4=lLN&oq)#V5WD?mKZv<&uyP)1oa}UZWvLy-&2YSaK;0f@!Wk^z zU=#esl}d+2-phl5<@>hckjD9gA60^)^do;+fkgZ;T^MB^KE7LO_ff-pQFJ$am0))v zZ_)IrjriEs_~Nc{^@B_6@{7LGg^LFaeMSgxr7cWk3G7RmF68aUO{*ZNIrvSj^}@JN z0?VK(&g)q?jNJNdH2bh?WFpR6c`|PEtZF4uWFbIs?>N(ntLvmr%>B61^!4u-LfFOH z`in+2y8eDgREO&}F6RJqnFg)KZ$t(21SX1W;ZT+ExtjCFm=EJJI0eVmiw_@<}Dww@y(|8HukV| zz;;4|!Wle;cgtmUwkTHbc__L?p~=Sw9P%2o6;X^xaN>E9lF`c{VCT_t1vQTd#?8n~ z${ild^ zU0<2o;(8~`ckF3x{&Qt(5|6i3StFVzpRmW$)WCA|&3oE<4_{=8i3HqtuXrNvf8dm9 zRYl-*$-JD=Zf$|tUO}Uv`z`*{(T+MnLfL8o!4-Q?6RS&IS?IqLq~em;q672 z^J2Qj-C-U0kj5g>2|atBbZ_MQLuE@Qt5t1K-WPKbTC3LdFzTY$Sz^i*O`?ZGZ?}XH zG6hrreRbey>Be4Ti}e(~EkxZu9Q_jfOoY`rAGC68{Bz0){*&PquipIRWza>fW4E0U zBF}Bwf6<6)szGJrB5XQ1@-2x{@O%*vwyZ%$jvsoJj-(nOmo#Jw6H6t(44vU z4?KqZsBxZ(s<-#v&D;Ri5*i;4SuIyuISLa;x3@=A!L|m|t!)lmkF|2Fg+5^w79Al4 zv-bmwL)d5gGU?6y*^g8lN0|2z`w_opqW)lmXUGA^=lws+(uwmafc0;#R`9Z7$dgIG zP=>(ypNkfUwDZ3AJ^36!+~mWjHgq!DhF0kY&a@OI&nYV#2m3-y2aCr!WS+N+=OqrO z4t*kzANi{xXPvH6^XyS}fFIL##p6e8?DF(BJAWx+pyrRqfdCbKMk?NvH&-gR@I>1B zsZzZ()^6SEK@7_HHo##^Q?2hcpOD8>xHcm;Rmm5Ees%^(X&}G&v@(6%+J@(Wb3sMc z>~KTTng2{{%lm3W&38FMD)HdPgT0lj&tb)fWjK@Nt2UlS6lEKYyX@XDa~1MU+PKdP z0pDKTim4@NaLDDlr{PlLdLofEG+0eWw^_hTZnWwq^Nv3&E#Ym~hZ{kmPI3YDwo6QH z&U=|^k{(a1_M>2AF~zwOk~NGVg`2ZgS*;64?wiQ9Pl^AOy5)2QVNKthEYAvdQnPM{ zFHs0OO$f$0rviR*@LJyply$GCwu)X~sb1ZiH|OGx9}O$)cX9+@Zqc(#Fig zPhjio9}HcWA5P#1%720Ufj!z$c;G=GIs9^c0|>)QjpTGQ<8Mq2kJedR5eynJ5a0z} z<<2f_tSJ*2dJ?DpGt(7c7M#A|^$u&U#W10RBaO-_w8GM*wxr_Bfap#R-w@vyTqZ<~ zyKgVXQoindWp&HrML~7=@=&qRP$ymWg4n)l?DYE0=qXH0j6*dJ=WHQ=>{yoZOa2Jc zQ>)|Qd!awEW$x4cL}E5D76+`R;NYk{Zw7}FS=t-H%m?+nWy2e8D^Vab(kqXS%%U7B z`!5$@rH+yt)51wk@OFUD1#}&3iF}B!A)US!JGaskKozzIZH+BN@Xsp;nF}|z@=s-_XZIpMPR1!#{OG$I zPXV%sSTx;+aTZOi-WLBDzK0)8+ggn7Z)Wua6kq`}CM#!gZ*V{Nbfs|7S?acEd>CI- zp46=6P`^+5k=+kZlUlT6%6%HYdVY0Li`iMjYb?8+^+lxT`e9tF^gU6LPNJRV0->8g zG-dlAAAovg^X@g@WsGMn>dPZ*`Gil(j?9N>Bn5rOzIg<}I8%ME2=?W@QOd>Z8Yi~L z{awqULr378S1VRwM$CebzT{>nRKN`2!hOM9>^F+?HiKlHgZ;4+dE4l185FxCUqMn| zm+|H3*~J2rA0p=qQV}w6`T4D^P%W@B1EQFJOPc3c?1|Pi9iEsc7ln=@>^)s|vVDRx z!|#i<>pmA4T-U5dG0lVVkRj}>eE_+>-T7O0)g9<+0W@mWh98s zxj8KWIHom}3iP$bc=`4`AYMb&Ez_j065VRMuKj&6{}p;De4Afe#04+DH;_VooA{U; zDCz=kEm#li5=3Dw>mQ#m5QWv(GZA+f-LDgeEJhu|8xOr<1=&UJOis*E03bmY>nV}& zCbd2MW*bg_ID2y7L?1JpD`f?5nbz6^3UT2(Gt0@ z);;6N)7MHxEK73CSqDG?K9{fA(`sJ%59eIbApsRgY_i$zn3fs^?P&z^t(QEee3_OW zAT+adKYa@W^bS*;GS=a4oNH1_B6>%E)8Id|XL{O2WtCYr$z~ec)&PcACNKqveQp6Y z9B~)J@ywD-=6V7f$9Ab$NEF`ZvvJcp+!^{)AGX$eF|ukJ%10SMq??l6STW)k=(o#o zIP-(_Wa&3PUVT|WLaPv)q#zomV!nsGjygwkR zBQr`dgJp1273{%6B3Cd@pNE+lNN_>Xrvs$uPP8gf0lT_kw>?d<*fzM&?&`xKuV+;* z(SUyC6sCT|52z+#B_jk5=wr`I1W~-!$lyAoTxZ_6vR>&A>jmAI+A5Bm$f2XPW9@g7QHLCSO9^(H+LcfJu`5q zg??8vvFOUR?}O6QvqI)Pgj!k)mjPNKB<*4lz~FGAsNtI2uc5h zYBwO~bG}ib*i?9^b{{r5t)Ih(?WOwEw|eyosUd!qyn!3Y$21S12fjK!avUhkjh}2%yW29_QbH6a z<>7Boa5@}md1eK^ZA7)A#viOUhu|t-y3$O+MzlT_P0~eJ3@5OsAluwScQu`4qqbn6 zsypyzH@3yVd?1sLr&!{3K61msq?|P3Q4}>^J397_w}3o_EDjE&hF-izy?WiAufs6l z)*r@m$7+eaTje)d#}!mS+fOp|Wn5*WY5nlTP2Vc4_TY&o1;+Cl>KuC&HgUh*Y~^ud z11+`e&C45$01B4vxnLejq0(XK0QT|I$#zPt4)MVCc~^~dG=nM5J!ptE++>A|D=FSHm< zf1Mc~TLGijdId-gF=28(FCcqJE|`m%>iWLvLx9HsZ9Z1uT>?WbkQklj54Ub|M23Fb zU=1CbNH}iwV1EoP-(l77+MmzlZB(7i3e(87?hG??WtV%)YK^CXDJarU{j&1U6-W&& zZmKz4+iyN^i(BnH@sHod3W)pc^}K3EdwD5&yP{-S-k9dbk)&kZzd0zHE@gQBs(IVz ziRr?y@lwl7o?fTdtyl zDj=Y6MQrlHykok|7HXCn6qx_{b`Jj}JmfREXQn^lWA)%{z&%|x_HX>j=I{ob)d~~`XtN#CE>@CCE=%TjK z;#Qy(E1CkuwYWQ_KyfG(El{MmyR<-yTZZ`fM}5j{r0Y9zPU`bi248u#wKvG0JTu;NR_kC$S->fa1*kx_L~VjGewf^};hAx$Cpi z^qHt|(2%q1hC1czZGmRkA3dsOa<%ftIXNun2b;U6za!&rX^Zdi!{nf7fMrkGjGGm; zV+NAy5Ta1qj$@9McK^aLdiB2FClL{G1y8i$KgBco587Do)Q2klOLju|-Y{*gYe30z?B28%V(N65Oyh+Vva$cM~_NpORwfsLH{hFb$h@9y~3c7Ich zLI3?_D?exb^(yfR9xI5(=LA|}&fQU1qi7u@1HHW$JW8SbIyzmgx7B2Ci^O1DWW82x zK6)ja`+F6y7;0+7#Pl3{8z=A!$`OEK1+1B<3ay{x|L0dcj66IV+_j^+MQpJNj^fyS z_IYT1qYh~u(@ibaE54flog-9P#RMpD4sAj>SsV58cm?M;$5OQq5ZU35&vl_ zuc{)U!My%3jFEjTsmjs9U#^@h@Li?bz}ddnx4IrBog|!vJitNmv8%QuZ#qwcgY!pL z15-Z?KclyL)<&?LA3*Z#$$nly@0UyiR?-%n*p}7XPC8Lzi@Q!9`pZAAJ*n5v&)=w( z?K|RC_lI@ZymHW_!Dg?rxM^{PX={pjH{bJRpn(>Grix)IQ!0F7h`ofM1C6SMwt+6Z zr`So+KSxnaT;zuj{5U8NI-}V ztlm!K^rsp_@{}FtB6nXlS9GfF`l8B0Yl5AmNnU<1mX-vnGrtGs)Y4qlzyAnRkQ{u-R;>V6vQR{C08;_zhmHEg0=M#k^CHv2h zoBjJh#P%}vqP8v}>puMU)aFNa{1ZfH7htJ=jq+LtA`g!d}JJj(f<>R!9 zTdmS?f&`L|MxP3WKKz%bdjM}};23q{22T00qIaL_tn<=KKDA*Ir|{>1KGnqmOAhc1 zr@gg8{fm$lW6jVTsx9D!C&Fup)Ra8KBJzNnMP@ z3zM_^wpr&(4TzTvleI(hnlcHCT^+x)>G8ka;t{gM4XN@45HHMPz40=TdM!J2Y5i4O z&Ce=f`UK#St&n!((Ejiggqtw;K=sh=Ok69+TT#2M1u zFYv}xY|IN(Uf6D2N4#N; z{}g))f_tKR4sYbThf_K_A+5un&p+35_=2hwIP&2GwYoQJCtgQ0!V5>+)PU)tTU57p zY%Hone4)|gO@6r!EvhhhCEK9r^g}nkz2Tkge?dFEh|)RmAVz* zjCs>TD^|zZHWFj{m3WtFlun{cw!a&o5s|nzUNcu4^pdg6MNcjn~EH|C11nva%q)*gX{-DtB61u zp*0XbX@Ro*ufarsbC;VCppH)Uf;s7IlT3k)^#pIbz^+qwy<6Ws zbw_n92y<-pkGD9h0}=Ju;)Q5@gdkseappVKVSB|HyENhVP4z3I6LQ|3=8hyS^x-8+v=v7nXZX1C7z zfCrs<#?9@grAss~coz6@?mZU8J8;XS{jnwxgxK(MSa=H9_UYfavAf9*8V7f83=)Wq zU^gviDq%d9+zuaZzO2V?TocWNovz9)aERhGI}5uH-=BN=mt-8Zu{1iV{RwP=(1su? ztzez_d+tPU9K)mE9Nrt4X|=GM>{5(1NY9+R1T~rbBKDz&1v{-lD%f!)h)LsCLAe1u zJT@}3Uq>w^%{g$}?MqyY*DKv<$QIVJpU(xge0fUv>!QEASUopa`1({b7AVk{s^{6yvZH{C(vj;kr3AuIy;G_79L;jzAw?HoRn6jD`^Xc zV4%Dx2Yt;AV+gP@g`vNpe=E{j#Rfj#YfHI1sNa7X*rJAN8?Ne+Ha@Hc2|4@G>s0G| zE}b#?czjS^w3S8b!hK^C(U{%ChsE zFaRz((oQBY1$~L7qN`dxArp&y=D!hYQn0N8Su?xX!PbenW_$v@AcswP^#RX+e-pUOOP zAd>;-=&dbb6%NRl?0%ekqLnrw`$?7Q^*BFnWVB_HqKa+Qz2XE=Zj?hLmA{oxYGpWLw%R3 z%Q&{VXfY^L=4^Oy%87){-MEOxY23ZMe_>$2{FZO@4 za&W&ZiB827WmYnk&TZ=1-{C z52jwc^C(#6t$gdpWbij^Ek|#VVvF21YnR#5%3MkAmne$*K^!e`&LxVtO zgkHaJQR&YVK?K~8L7_^kzG(UiV+FiIsYsj)V9b6U5~Tnk9B&j)--P7Q>^C#Vq>Qldvro+-I+esSxjX8K)>ewobD*d6)!o5`F>FAg9qMKi0+AXz z1#FMa*&o^oY#5DUpxLsF(%>SOw}4>l_LY;Yqb6_h-+Q#jYg|;<%#+9+D{*Y|5#NE} zkL~l{d*kESFIHHvY|6bRQqjRc!5*M8A|ndZH_yM-3)CR)cx+?{w#GS zT~;{jY+OjTVqn#^H*KMuO~_i-$3T(fsVs3k%~prWMqszSkpI7S?RUWE4xIMnA;@XC zr**dEyglNYpbde-`z)U2Dd|C^lFMbYECVHzKIZ-OVE+;imOW^>5RJ<1sTHt0csDGK z*j<%T;S6~~1BcUkTAj7)kN)X&_&CQtkDM}Rj*pgldbTP_e20y7yz2gy8+icUE?$<30il91)|lD+mHe8Pv|Gv+;shBC(=x}J!R_n@Rzap-&kbhGp>Yv#J7vI<3rC81m>JFi%YA$g& zMtCj>L?<)O&q;+j)^A1$AVkZn1ngBeHotA+)-<4Kp;S}We?I`<*(_F7!jkM#!X41h zm4Yy~vQ-E5mpwn#nVsi9N~C&YP|Mb?bV{h^4{?JX;CR(=CIaElf9}r3Klr4aghcE&1@IFI>y+}k69Xi}e%@pi9ItW=R#`aDGLkx<7o8tG z8r18te(9#q4eoDUyo|HYygRRj1@sM3%=;;phVvv*HPcrMN&I(am5Whj|NM8S{{HpG z82-0a>3aMYHPBp2y-CXCocOb1-1vRc2CwXyIF<3 z`Kxo4h1cxy0wsmwfoy?`e}lZ|b;=V$sFTwzfj^wAuNw{i0a220kJ$!aDXbQs;k(>_ zfwO0-NAmsRn%QZBn$(zlzbhe75j+Ng=N&dEBE%5*wj?jz=wS|PMv7o+TPhlvgfB9X zgDW?}({|^R@J8RCUG%GHUa=7N_)bGx^V|V^u#U9fP6Z5BjYRYh)40;HSJeF@uU~d! zPlyZuEVJJx%&D61DNH7_wS1T>`7sR?r|7@G9--eF&BL41@Qgp0Cif#GrexvC01j%j z9cIuK<+R5!P`UN1eqOYOhCl&f%flViCnR3PFIUPj@v&T;X%q%_ejhPqkX zg}ZUyt)mNR-_B6d9o?+r*4N;Fu3^8L8(;Fy;j2gv8X0gMRt~ZKp&MeX(W%W7QE#%| zU6n#dHTHV`g2y?lxG8A#^q@K5(BrP+kfpN!RH_t;>W9tE0PN6+rOjj#_CmBF3mmsB z$DaZEhasbNfpM2)tU2ND;P$=>#3caM&ayEFHulrY+7D0kba{w=U=ct0Wx$^E6Sl&x zxRl<|TkcTXk(s*NE^lj--(ASgH&KnEhaMF=6 zfl5yJtW+ z3bY=Q0aXgM5ft*H*JK%~d}QboR@F&oavAvKqLpvU`5fQ9yvu2VyiQw~wse^op89{e z0CaVKL^$-HDq;AW<|`u(W&37am_{W~4)Tjw1D z&~)8JckV3rN{J5M1}I~Y?Afho=M2Zl$x9sP2PXYS>Bm2+u3Hp;m5A&hB_ld4JT6y} zC-twHV5h85F3!NkzT5O9Z^DUlETC5e=yFBP% zmR65L2S(^03oF6ztDkV!4tR5Z7CcBgWBHj5_Dj%M_y6pDmXRTuVBvi$KF@ivQ1&et zLPfwPfieOPXpxn#uAZcY`y?pyL+R}*5YcNacrPIBoiIlS_&NVKbRxl|=ouQ=Xx*P7 zayeEbm#nY(eq9AeWcVaX!TR_E)zO;DduJwF}45 z2B#`fk>;;AG7XvZ2C=2_v7LMCb9~2WMyuO-%tdrIkb@s2+C$_AsjBZ3FIu=Q4oGb_**ukPAzUb| zP6iY`?c}QE)sFKod%CtyrdD0^vrT*gmi5}g6{UA(y1x&vpkZre07l1r$ECS9iA5|R zrVf7I1YG?gs=N*nLtKAz&ZI*a^~|KkPmmog!SHgg$SdT1fOMZFFs z>c^;=nbk18qMf*7{pB@ZsMrmdl1<8<7CB*3IHWhv;e4MkatJy8o$0d<-DJ<(p;QI;BGgf zGK3BAr15;v0U1^aMmR|aGygGHIxrYh{@_QhHi-)7Q|!QNn%gs>@AmnD{p~;@rNpV+ zv=ySfQqoi!=U{8t9^MntrQ+-U@2g^eJwKJ^`;xa9%@~lM-7C51-q9K?DQI1;@&;?> zUyA$YAEc06mQ}l2$`qb-bBDS@u8v_R7rR0iEX~SG&l!`Yo+Eg`w&uOICT5Iw$a>=7 z++H?Pt!_6Z|66!O*q6eOt7ZuX6EtO8)L@k}T%h`Q(3@`)`oV5nn)SRuNH>v*|C;ykD`YrYmqHhYE z<5O-1P_=lSeY#r)DRli&yuI9R0&m$aJHlw~Oh<-a1y_Mv(Cy6t`8HlMMNUxxxhft- z5kSpa;Pw}3B!}ykx1q7N?Ftrj+U)O7pl;RLvfkE5mDM2=4Vp%`ehsg^fkQY5zV&?qz3FAAbi*YW_Qot`ahQ~=pRl1ZC&Ui{-!Jl5T{YH_YxN}3d2b{ zaLT(AE~vM5zx?%AbX~$-98`{x(Yn)`bUqfbMco>H5Ev4% ze~qc{9E_E5DHX$BU8=*MmxmHKYly&TmU+ZzKKbaJuY3vfe%L>qUp&YbR3kl9(D0VgQz?Epg zMaLJ@>&nQ3$aX(rGoP1#Hkv$HwWacpe<7K9Z1aujA7m2w!OesZ;a+|Jluv5l3$pfE z|2r!ApEUw4{#dsw(%o&K{a4a`ORd)HPmdn=Yhv@9hvsYkVfFTkf4IAyk+|V3mA2iQ zqm1?lDD)5W4ys9dTY*i+O~}!F|8)4|B%9Zi&Uf%%JI?vf_=>3=26K)A4y+!lVOkV4 zqO0EH;U%-r-Mk8`#fqMFwQX1CK}n4k@MYVR?9=>cv_XR2(BHl1ot#9~>9#&BomVgG zBOLo^yZbcojb zd+Va+2K;)24wo$lQqu+I|2(Dg(W$W!ZYCVL z)IJjwraOJ`>I=$td0S5>`bp64MY;vMX@-p(iio$VM`;xx5lC-F>RfF`W1|#B<0CO5 z?-@)bW_}cH)2;C^wS`(=m6<~tA511SVu?_Byu9Jns;+4&x&OpZueN~<@Bz5fmtB3Q z=1pk*&_!hbK@#}dB!A#!3dD5KhVjeoUsbnlf>>KnV2RqY5x>j^#j}b=IyYEk23NgLdmnNxD(IPY?ba~gWN_C z3mdSuOSgCm)e(GBA4+E$V5wrH{P5F!ck@@xun5cg)vAFgll`}ECMZ+msT*-0U-%uj zIAV6!#S_cv8&OjF-DQ>W%;{tB6gH>p`#H9-yi>D{6J-m0e$6S*i8E8 zO$3;@x|@1%)Ae0W>>z3`F?q*ISyK!ipz#ca6#5+p<1kaI@iy7`b`&@{jbcwlWHY}K zMMPWvA^c46EdHmhFv|axHXV_b9_EG3bb(M+hIfR2D~H-i_g@PZ4_<^Clirs=fFN@u z%^C%71Gjnd)@up#qjHly=7^w+IEB@FHM>kL!O0!I95hF?o|(&uqOZQX&zM!jOWt+0oasKJE z>J$JrL}(@i0he}uJ2vQ=o^_}3gl(3#@iT!?zN#|})y(m7IwQrHJbPSbHsoK95oFx;y-+h(ugfjf(;l*+t;(;^TLiii^Gg0d7WmG<; zK&^6Xu>G$aKijhNwIR)DFq+?-6V0{D7_^YZSVI4nzn!jbH*2pQqPZKM_mv_7MXvn6 zR@?r{BtLB0=BZs5s!z9hzUlXLmCVEUWlrdu3$^z!h=J5oEKt#NtqX}h-XZ<2d_118 z{L?{P53k?RSwL!EuHD^LEfx9GZqA<-t(ZoUPyrJweo>^rZ~{Y5qY7O2R<(Napn_U# zO)RlxMyzeT?NiOqZ^UE_qMq`8@faQg?IBx%fk8aC!}l({lI3DSVG6@%IA=7W|3uue zU#Kdc9KKISCgf=3PXIA`*WV(Atb%nQhq4;8%k|Rj;yvk;4=?3%xj)M8LA#`;PzyRb z-^1v^`>sSRZ`fL?a2MUrHmfg6P-)x4=s^ZgCjLs7MQl^1B&}Ah=Mk~9Ox()r+^HRBJNYMx=-zK*%yzL?uBiC>B#nGr)c9ZwpdpScbY7aO zib4y#HRHx1a)mr~cacg2fU=rwn4RJG(%0OyubC1fk zqS+2!TA(hi(d{D<(k$OJ!?T6wZQT@IU;!u{=BFvA>{Q7fI$U3k894CfZ~y@})HP8c zCuVqIvh%FL%!elHcLb5Yut{q{LH$(k0OnvnTA=iX399gzaF1wq`K_xudhPbqqD5aA zpqTHE+o$?%*6mV`)AnE1!uH&$1`@w8uJYn!9RpD@RRDfK`z9QCu!Kk60nn-~sCzJ7 zUztkJH|ta&*{Jo+72kXiy)-mLa8(INr17{>v%G3dj7^1RF-0oSfv>Iqp1DMBM>RQuUmCCdGa$T=(R;y-_vCqJOoFiZgTG|wYWr024+uLo*fFI? zkmfU~vg=ft7kM>EohY(uuyvI9%2&!QPAEW4+_tiQG^FIgBgnH0F)m*JkhDcHeVi#{RsQ${5*e`NK|wuJ`5hRF)?*|DzbGxvI)BAJL~Yfm<)hy z44nIT-*Nw5i@=e$722{9ad#&2L3*5Sp>H*$V>}Y#;7^|^?X#ZoX?inV@K6OIaj+Kn zD4f@I=oP!`U1L<+1{kdlfkSry0nfJ-6m1p^+KazKI=~zY%hVEqQQJ;<8yzGJ6fM3< zYJSs_|Tpp1{wsyPNh?~_#!gXMi4oV7O5gH@IN*Mi-g?<)!_>1MOi zwbed{CVxaLEw!`Pqhm)+Hy1^^W>(@oKT~ib!Dsc8;DjG!eix&V{S|Up#r-BN*N#G} z!UvW3-Lem-4+{YKDYcaD#sknA$L?C_h?`jeV>Zjl;ULi|MZNQ~n*^OL2fOe)l%ZV&Kffp@fxM{}`0jFL*S?X4J%@KZ1b@?! zWRC}}b7|I8#?sAJ)?AErn*~6HLaym$kjnzi>zg{@!j)VYL*68br4lqr=_;_tE#Cay z=I-D{YBh0_s99N^0p5Bi_baC{;AHg&*RmQSud+?oMwAd=!}=wf0bJ7nQE?yA%qwg1N7QcjQP`r{J+?7O>iS!#bLj zU9U*pS1?Hg2@AUQO3V-A)ZYE6tfU}aGR4-UFuv@!2)|sfl{yil{QI9wSC92$$O0-d%Ouy$4fH_Z!j9KZd+6|J~ z<7^VAYR5ztKk^(3#(LhFouj7oe04KvKQ-@|YWiZEa@c~}-DAM?Tl zLdZGOcOJtW@BAltGc$wA8nr|3|9gk{1)?wR1WTrhzcpPpWJ9`)(l~-1033@2VopH8 zY+=OaGkg935^L_sx3v45EWJps;`9r_K?A@;3S%>0sSuCUcr_(4b=lc4IuqQe3)8H) zW+1JnI$MG%CNHpRY~kOniteSVkNhMkMdz2beRg^o(I7Jfb8%J0Q!>vYQOqb~g)VV9p?uG$*9=279M zxzbC=#H=unCPuyNv$gP50vJe#U%Xem{fUsrI_~63Rs%BJk!~zS0Yw=KN77@e=w-h9 zZ|Ia^0#0}#+rxdf=oUbm4j2WUX@~6IFWZT5F!9A~Wn#>Z*VjcLorNs5MpmKIuR*Sr zZ+w7q*nJV^=&u=^lP1OwSlwO7Ew~Fj5%86J&oY|qE{2L`MISaS_qx(3sH^xT_Xeqc zR3BxDvH`pLwk?Pu!65BY6)S|wxFN}%-(8GDujt*J9+FwS-c73ZOoU`ErTa4T<+ipc zt=!VGz_ePjq_MX>bXZUI_XX9AvM?bm>f`Mh9)51+tX7`h*J+mF!E&84_@@xs*h7PN zvAIuPCj@5F+uzaRI1DnYBFq&34iwJY<;W<}HwJAn*Vhzh76~x*4^tsPb0WPg*Uu)(>af=$!!% zljW@cvB8~3>1j%tw$*@?0S?6@Nj;4RdKoRRisf64n%|#RN32h7Xu=U|znskpip@Dv z@*>r8{htbXpSXm~8+2~cY8S?IzxawO=Eq0g!e2gqLYv5nCnPGkIhjA>=e>CXt7OO=)`k8Bx*$Wo>S{vdt{&gd-peL-jIrzXRZVu4dRKT+@F0&)bA9_ zJ3dU-^PtPRsbuCad`C&6w#V4ju^R28!p)21S+v>?ZRP)saj6*&UfmecA#RWsXDaWU zHT_RS%U!ukRW+tUSaGlz3oskeY&GuYe{qY<7d|_gEsq1{Q-xF%o1;h=yCjeXNyV_+p>})D|P|t^1 zLVSXDcAKIKD2LSWn~7giLaM$~-I*cKGk`JD$}I||*~U(j>=5(wwgah>BwiY+oFPuw zJ*H0PzD>Oy=HmHflj<$9P!mX78}@rzrNd_OUK(#ZK{{-+D8nnJRuQ!lw>8>Cg<_cW$QgkUvsl$vLi1NHm&B&#-y&RDnY<T4xUteUewenJ6q6eL2T9FsyDBQTjPZRT%O=ZosbYwtedv z&iCpOPTTI`w~4%xga9`POXTWROTl&TFbn0D0b$&`O3gZ!M4I+3sk&YjmD3`1v2 zo@CV*2`pTgF@{y2>WcGAR(-Z?&y^{xM^lM{o#VhW0(b$U;aCxdTYaU|-MWPzLc5|) z#5cv!MuqFwe0{yTke5ECLosW`%slu(Mb3lSj1&T!yzX4vYLxx|4hX5HLR)> z?XevZUphiDdnIf6=VpW_|IgDXo=>tve(9HzB^L!-W(xLgHXaglZq3V-&23iEh%iEk z>NX@MGL1X%HiRf_8p{o6uJi{>+8=3)ua2}@mx6@^hF=-hwi8=^W}PK>uV}IpeC)qO zK&C-2XDftE`I^pL*~a)xRnfEUQbf3ki4$_b<+l`9IlGJ|ic^wDgC;0Zs_t6X_i*<` zHWNlx^-MU{c9{8%zm~apg5OVK^Sk=^**stnqc7tN&{K+i5F42Xi4UpyE|6;cQ7pw-H^+^Qw>0zEOGE8_v!`S$|H$SyHl+aZ?3wiXD z-}SY2-@dy#F}M{LRBL`8ZW`1?lH-}vXUjn*&!s1ol)w?w^CTewRuwBa=lW zlMJe7ercdc0j<@JC@4mkkhI0ZbM?$6M&NA5p`y}|iw2pme)m(JC-}Z@t{%*)*|Y#i z+56#C?jiMO*X+8MNhb~*t415IH+s;oWIEes%m#k!Y3K6zvU}_oDXO^Xc7>$9NM>;{ z^Y`d8hvFL0v;SGu0Nghr`?5_VnOD&XMth~tG!q5cQ~bMy^pBR-n?)(D;Qwf9H_-9>x@&**cY9%rTNRY_M+|ljHfTa~P;@eBtKwQW~z2bYuU8QWV zXVz&+ibaFD>$)s6GKQjptD9fcLa}zlHwhKjkMl(zm*@iR@_WlYv57yQZ**eRYQPA{ z6RCK$(Fe4hWqA>z1BC#K7q}7xM_d$SgQ1eXq?T^HFprdf4~Q_f^sgf4Ev*SobnqA` zN$HSGh%1*;>Z!a;v&uEs^1ng`CU)aNj#o0|it9TVX+A>>*i)pQ6TtDH&VG} zAlF6H{FAP%k?Mh;9T0~Aiw>^u8pUui?9;Q$lW@A;y!D6g>iOL^Q9lgnc_mn4<+$vE zU-UPZiW+vWBbSxav$^AI#grv8x0d#FUlc=cisH6*Q}3E&ocqG{pp)-z&@ET&VGRDO zQ1jYLq7EqEs|`Kb*i}G{D~v~Jy+tWRI52(cVu{7`@(<`of^FB; zDxmN1_oiV#g%2(NEV=#tAj8nyq8)S9^;tnz=zSFRzhZQPc^#!x=X zmlL*5CHBM4eL}~3R#xo!O^XFW`CPE!z>-^pGMr`|eOx?Cs$wYuFv9RIRNQwvsA^qmhy2B5w*MZT_ zNLwoNCi*ZX?X}@YJLG`x$Oaie7Pj{{&2c&M2zkc!=rC(Ug`@A_`HWrwL^k>^zbIH$ zT5{gzv^wH>%C1YoX1FU^MjRu6`UPrfph8!shbqweS9}~5pLRRL#}wbkw%?dY*SfAa?jw#CJ>HIQEtBtH5b5tdGpGe zHY4ABrvr3a#|EwGSGKa-XY8V6P)*`zzomH;w){rPFEpojvu^=Em|1WDH^P6(9cxDX zK=Qz8bQ@Qcaz^qt=n(RON4L*f-MffcCQYJztNcW}`1dX?magUcEgFLQ8E)3w=J5$u zJ3f~!Vdjsh!HG5~ETO=+2er1tc*h$EAMxmxdP$-OL;__J=SR-Sgi$&QK+>LH*}L_Z zH|sUcR}3mQ4@3QStoH(4-CsHb`EOWzFxn{1Ib-}fqjHoiNSf5=NWx94q1;UPfAn9t zqE~3nht$)0J!D%Msli-6Tll$lFAF-&Qr`%D5Yc*x6Vpt4;rmwl^T=bd#kR0d*YlmW zc{FfgeK8+zuBgm=KODCS8EA#D)2eH%c!;zduP`L<3FM3W8=p94z867hmF>0kSg8I> z;g2m7CKpMbii}?XrjugD7YmxpuL5s|hi|C0xvyRmMFv2Tad*0xe^?&7zq9V?QW+kn)IFXC)BpGGDZuVJLb3YfIL>d0{HrlICn7@+novx|Xkp5@YjD`R8U%On}eKWa10e zBq4%APCc@A6;cUjNVua9BvfZ4H*A_DlrFh-AR-l7n%y$+M?G&Gw#Zv$H^DRp+#;OyGZIg5F`RssU z1?SJd+0a=nz@7Q<@V+lj>yN=HvSkmQ$Iu_l)g2MPYUcjL$0hzqBqFN|;Wb5(5uA2> zFvxkv{N6%lauG4}%^bO^nmNG6n8d>9e+JS=F^hz9Vxf&CH>Ca z!Wg@6?W_}g*F_qncc`&;-Hz|vH*W$F1c!b9ph*zu+st%+o4!6n!GjPHVsMYK%!S78 zAa%z^0#?UrvVe;}*UjgoKITn9vq(R?N2g*nEX`?T#pC6M0GYQ~W_0RU9G&+aV@kwP zW@pro7>uRDm_&TTf)7d$SA#G`du_FcrPG&9v6tl{|3-2NJa4bPPXByR^V24hv2tCS z3J$%C;TY1AxyV@Wgkw_N?C9p^Ewl4@3m!`!SRe_x5&MwRV0KNG^@0WeJGu3}DfchAQ(nTu z=h#Ts$>XU9d-mtOxfl!bz1tit`}e)Gcu<+;f%|(07Jn4ZSJm#S1cIx}8;NVF*Q@It zOt4Et{n8nYir+No*Jnus-p4q@iLB-(8tb8R|9iI!1nVT<>a3~JVTa*|f!%s@@FCuV zOpdwV)^>BZQA>)1a|*qL&;nj^lQ@ef$fAG0$B!T=mB* z@Mu!*7zF(j=v=DQSmbnC`@Qy1BBN|E*KfLy5t?!wUZuicGCT<#WK;NGpeLhN;z_1? z;h#6)Yzd`$Zla~T3i~pXjH#Q%Jr3SOtEYNp$&ygi~`L@^Hj@9$`k zfKPl8HlOd^wC*?bQhhFWri}z&PqPf#_`AYY|CXxuQ{(vhMFMawZ_@#QM@Xn^(U>^*E%gvzZ4A91Jm{H)V0P4jv|y zdyuol33bRy!-l_IkDs46UOaMmM}MR=BZitkN=+l_&5*`X2Jmj^XR@FfxRCvwTq3t& zyiXEI&IFR?adQAAZbR)$6 za6*%iY4h_L_3JAWLuD2bfaxG?)E!Y95YYJZ!bN}` z%#jA0;4E0Z!aw20NPaaT^Zm|X$^gDDP%R1H+(n|{B75PEk1|VP8dY~hEUtERQ>m;0UJuW$j6lXPU*#Z2=*X)`eV`0=7C zmt~>TAbzq4RjK_9Nup+vG`0)o0a?tO6P3Q3?CV5T^^8!u3=H-RRjk1n$JgkHkURwO zdbj2Pe2>^aJ;I(LB|{KPLJ&tsFa`g=gJG^9qnf(?eiy97>MqE02z~9?y?ZH7^@Uw` z3Ng7Cqn7%DjJNJPl-B`C7=|;Gfe6|HJZKJK_eUT(^zy*}4|#7H)n?bd4?cK-;_gLS zAUL#8G&se*xH}YgFII}TxHh=Edx}%s-6`%KB*{O|`~GJAXXewaHQ#1F-pPH|%DK+o zXP=XO?Q0K0S^G4|C!S0iHo27_2ufPg=iuY_0vsNGbtoAg-$2Fp-sWr9@dnR!qf+F- z*AhUkwR_VfqoG+#uqIDI7i}ms1m+Bxb*=x3e3tNYe~%XB%47i!N%6z={E0@l=w9ja zCSgCgd>>fiBpV1HeYsdNoe=sa`VPfZ;);Rld&wM(acvU6BBSqD`ZKc94K*9}a3LQ% zi?Joj74gbu^YSNs@bOpOj$LPr-7GI+JZxW_jWS7-2Op`bBeX%f6`gyW0#Q++-@z8o zCqWfYHId4I6WufM=;E1@=^%Ft+b==E?O%v8hHMat)C&8avXY?5&i%W0J{IpAP`>`@ zH44X-K2u||#GZ~Hh;F^mn>EN17}WfDZ5kaPWHeKw<{T;@tLXamE9@)0U>fcjd9_;B ziMc_}F@S^&<(h&cLod1)=|oLmZ~_1GW+XR^q1vbq_iovP<|=GDgViqceR74pr$Rgi z9W2Q75K9CVs*&2#v_W1qkA(aLpTAL!%cZp3-c?{rt#Ww^$Mp4LK>R{aqMe;ID}mEi zPsz8ovI6*r!SG`kSt}gx1%~+o>wsT4i`uo9=*5u&3q{ZQU+O7~kP(8z7B4x1o>8B< z))lEcG6jsSM^oZQv;)w2eCEB?I}KK;EciQd!dSTxbh3+}li>CP$eGp#s&j1W&Ocu8 z7&7KxH{VjF?tU%2VEqe_QV;)^lVJSkb?g?&+&bCg1J)na(dvqrl&`Ec^nPAPsh3vA z=~znqkcQ!ubD_EN7!X!X-CiBzo)tKb7ai}=brZ1wGcLM*xRy`J+De+<8&Xht- zW#4c3AMdP(V4kz!0PqqDkJH`IyE%_H8e(YYaC0{}jV2u99Ih2fWW>P@2Y4fPt1Xy- z3!JVMhFT5g(T8Ih2D@(igoELhu?D+19Hst0o+pQV;f!QX^Joj*n_K;Hyl&{WwL`dC z^;-6y%#z(1Cwjup3s{+Ias8ToY&Mb~ zAO-I!F2X<+eXy79(d4Mo2u4D(PR(Cp&|xB*In?-N!k zfrI*jSA+fp_8$}M>g#xD6NGxpOAv&`=q2ob%yem z1=$(@Ln%^~j+xJPsyBb-R)hg4a_9YbF||;@Wg^Qt8Np^MP|aGuG`c)_G^6&%uFzxd zlXw)v{|p=LLA}lDc4^yiEvGuG`TCzZzS#=);dOy;)4Ho%xOnqr+(_Jo5O)4&K!`XT z`xg8gB1PvS8%~vKF(Hzb&A;(~p3>vy0@(6ML&(RJZXk{fB=&OIN4^?-XHTx2&8?nX zp_Yf7!m2q4?hDt%nM8oVvv8rA!8mcs!e8+(gfGY5{NKF1N9W$YZe*MgM<)IcDz`RA{W0r}9t8(Q_QB&4xT4Ap#ex36Z+~F2aiP6j)WC&lv2KCEK#_numOX)t zOzL-)t^bJ^za~yVvr;I#KyFx|iG1L=ZTY51)eY$7V}{IVT8%gf-Nws_-&9SGso!uo zNnhV=4O6?OW(g)+^;g*MJ7yQCFJ*AB5aWB7e@|0z?`m+l$tdFC(WC>`XgeMHPAOd+ zlZdJC`>=tnHEmab^aX5p3DFXe%S24QTou%Pt6UNfPdahF4yqIOJXzQ+GlVBQ?l)#G z`whk$P;n3@4fH=d1QhMS%2HqdCQ+621oXUVlYWhL`s)yP2vCo-W|DyypMID&a7Jh? zA&MWs(uy}i><)ZJd0Iw6g2zNHo_-90d4tsVL0{T=xMA6f?f%(zTxf&;?(uZL<3Q(x zl$XHabBXC9B-Fu8u=~q=f4srD>geZm9;dTk^n_HT(BXc*-oiHF%uWLE5TXa>3_cMe z@=d^)g9g!8fK%{k0%;Tc+3$jr>F%*nHnK_Yum%qc6;WP|0g>PsPasOM{g1N+{S?lW zMM&D*vSpk&Tfv?$Wf7~Wm_%xp(uJeI={opf9iwjRI2XDNNUW<&b7*3)vE*&*Us-_y zMIajN;BMfTsK4oUvv9fdq7oB7do1r&EVo@Ky)3g4C88HQ&|-U`%7jhL7Efm1dz7h0 zT{47WfQZJBHU~2r4S@c3P1p)|@w4?CB`LbdAe|hPClr`mq*@>YBI04MQnl4+OmbzW98EJ*DYM4J;7|liCw8DO|*ZZSvgGL~+z*5Z;l^x^bpa&DU zBXshCO5d-2+CdcgCV3p_x4m$3Z4cv3ZEl?~excP`{%()DES;iV*4aQH&E-2qC*Wjw z98Yp}ZZ5M{^v~NaXAElGBJrZS*&3Z<>K;n0Lp;dljhB))?s^RhR8@8VlzI<0#U3?D z-YfQtqGaG{=mh$D|__OSwLpQgr*gHyj?VET7lpDMkhUfG& zAl>$h0b}VmwT^(t`pELlHUb^GhIt$s+u-}JZjlNf7+j!%+0aeb4yRYSBGn}cE-z@e z6?V*9aAqEag&?jle^%pvL;Q(%Yh+?t2moe;Z$IHDFfcmbq?pi-tOGpJ8QvT`x!;mY zzWtk$N9gx3(CaA2zUD{$4vc}RhnN+AC#8wdCcADpG2#JuQ`{ zr2*;H*O<-8<9%aKz7G0W>6VCI-!~%lp^96Ga*OB|-kW^K1X@w*ptqZh+E(vZP_}=1 z{(XFtUYfoVd^ScQ)a5JsRwd(`n|76g>*ZV+o@TK)3-v3qhl@PMn}ew`Wh$2AeH;aY z+E*#QV4NcV;#?DWBnT)cJd}`^M75xvPt3|Hw^y!JsjjJz&YMDfkvGN2=7Tzc(IY)D z7i5)H(o?K7(ASz&p)(Wpp<*~XnlLO_{XOB&aedQFO| zETHu^`q(SR|%CYtS(}&-W{tSww$l! z6^y2;9d3sadqUCQ%4eZ>kIF2weJ&3FlZ5?zbZ)dFV1X5Jx;dF8+du2{+PZw9tir4b zCDYR@wgZEPNqw+ltKb1RNh6oWp5N>Iz7RDk$;V?2V{!6kdBCWHCzvEx>8--ETVz03^CDRvz#-!QCtpNee$0l| z?XSxu9y?X=S_`q+>}8L~jT=i*--}73gatl=BSuV!=F0nyNh&UD;J+ZZxa2piHc!__B`tAy9~hp*<3s>J5_TGgUD%V&lbO3f z6C$7I&Rca(&vz}Ft+U#1AqdluTZfTN80MX2C8!_I{VDyj_d?8NY$(XJ)(7i*o|vjf z>#!Y4weYx+W!mFPkDvtWq+F8>S1L6ylVuDPy?zn4UG6;dc=~G<9f?EW3;Ltv+*BT) z^C83V1~ttm`yTk|*?{}Ye0`wD%cXC-E1w30+abPDt63vRA(QFnDCg|EJ*JQw32t@k z=NdiNug%UPO5O$M8Y{7^rOOx+3dy53$e8rr=Nox#v&~aijAY_1rIyPgT3r(j=e+-! zwG#B5G3Ij9+kdOoCDJ7OSY<|-lIb-806j50tC3{=Rj}Q-*-GpEoAs#nf@t-y7Be)&Z_zLr|0$7ZvDIC zzgIDh5zrPG{s#x`q-fH~YOQt+PAPAW*oK@&4;ZcxO+eJnP=cZ__q(hnto*e{zRWZW z_0NYqDa(Y)b4kr`-lzFy964K*0G(PFFTVmkw2svC{>qy&BKk(VdHNOhfX`vk#FN|C zWq`+zX96c=Q(U6>m%!prbToa^qLm2k;ea`4&4BQdt1#`q3rpXNn}SaSmBZiV2O=)e z;qx6j&c|yMd4w*I#O-<+s{rn20Na0k;K_u2%?q)Oc>>_Ax4`dEzoDwiW<4v_3;!AM zhY*cV^1aJzCRG7_VC#ARcr0d(7IAkO!#`aZ9m+GA(-*W#V1tqlC$aLFfyK7>2eY3& z9=wR1@bg*RuL$Q{t~ZletQ2&b25Nj)VSP9IQ){aF>ROhlbMiEuNqj0V5UqpfklQc2 z`v5z{yt$bNKf4^Ol-3N`Z(yX`%}c@ zWaYy$&6nq_2}5zc(Ui!KtOLnxX3t*|KS($P9i9cdol!njY*&<$oV)?d*MFElh2|`< zhb$d|{h6cp8|~yHry^USMYybN#`)57e!I8|nPLU4g4Fs<0y4jCiv2Uk(p8she^mZx zCw;gC%MEzyVQ>`XxxujgJI9cRqm_TmBQW0p5P)j}qeKB96Rr`HWP+}`J>AbSTLcxCFgpqqBUO5N8Wn5BW2XcA=1 zW&MAs{smhKY>5OzIs?Z%T6vwMCU^KL1FlpQ3vl%tZbR{)Z0+9p90hV3&^)k4-r~kB z;N(pqdxn5nRv(7F75G=9-Jz;EtUljKv#yxq(Dxi;e|s9X$PH4q8@^d4aEV&Rb2L|f zl8h)j+Qc3(Z(mm^CI+!Y%o$&r!5L42A=~_gpE#pCNjaa1L3!Y#%E#m;_5|hU%UY8> zIcef1255PVKLE8t6P{sGLcLe;*>WHY%d2rZ3_50m^M*6(!p4Ed(L5yH zKf{9qNr~s8?PlA}k+aye2N1H9m~MEQ#Z}Yj0nrivCf7!rVgzVhg%SReo>=nfOO_oa z%l6)lxB9{4;4X$<@+b^)wWFSPK{-1bH=1AR4(%ZIiV+Juuvuy1Ab+`@kUX9LUHpo3 z*87utxi+{EFEI9c!I^KQpUELq`uK+N8Xn>(X|Ih*` zwe`w;53h(6g83e3H1*Q39`gfPTUbo;#@_pG2cHYVos7J{>kCkF(rx8wC3#m&FN&JT zx%wO+9RSa{;E$IA`rR%iY0#+4u)v9@^JLCI-rg0Sd6tc+-oC4!lm*bC2}rhrDgAPw z4}U3QVLOuXl8^Zk!`<8s|g`YuVC z)ko&!?GF3-^Fatoo5G>z(_Tr$?c|OTi@VHcOvn2h5a@d(a(k?Lk%Nx&Oo=mGQn-aC z3~d5p@l3=NE7!#{VZYcgv@B&Ctd1cCu1?~+IfBt?K355oLho;Q^*=gb)2w*o66U?~ zbecbxdAf`t6V;A-+56!_`N4;$O1D!JqVyR@>={L@4~2E{D*epclePMjI2>gXm@ite z3`d4}-vtSM9T&$Y<&gIKdXIwJ^Ns^oS?x`55Wt|B`h(Nl(l2ynj4bZynIgrb7FY0e zNEk+K13!}Xz`Gtlu7!f~`3kid(u?m2NpC+Pj*zd{3a49aMd;&( zw_oaP%Is4BbA|Gn?QW0J-leGi#;$rh)rrc9pTsu1sASx2CJV}DVflS>@oJE!lVkx^ zC&1@Q^1!EML~SwS0-s@Ii8!BguNOUCX}V$X+DQ!itY&V8z;UFQKtc%mVqk z{PD=vpQ(iDA$ZTo0EiVmUbJIz;UC_2>^wW{SUZ1jtRoK8H=kt2cu#+AWw}--;4{8q zHLy>zA!0P!MZ7>P1(giN=kk2TBVKNRjX@Qe?<6xdS%jsrp+Bh^*S%>$utTow{Pd2* zaVERmdmur(x+@Zf-Qhjc@Au&f6^?5a)clbopo<=IJ`Xi3$TPRz-7kU{a2g1+&BI*)yv=y3$1c8x_36 zVET8IPgX{;4SB)){^N(ky^l*pzp(HyM$Eizu-d<2FM6`m?w6~_9N*G0VO>i_I9kZf z7NI;J?$3{UxZHnXmmMU+Zh2_foMM-d0+Zo8efQo4=)HyaSUkW!(5W;FZgT{FHfb$fMbJ{Wwsn=y`A#g;}_(oDP_QVJ<0qb$51+VU+t78xcsMC8>);% zHmwwVx7#yd&rPJWBX0ds^Y2nNk81q*^C1ZSIOGBCr5L$4L@QsyjX@50#Ym$_QKY}r z)6O%tE0q2dZCwt`&X|t0!W%*8N~mo{+rlsJJ^}ztBN`9e$<&;yjf;?`K!1=q`OAOa zrOtv)4&>;w=2#mkq47UhG@C4bt++#dfj1r>BqVLe<6*b7o;TRMtik0OCZ;>#Th0uJ zG%fkHfqqy}I#;%6W0u)Cz#^wKjURt5AG2hcV*}_YWpJ-FFimI4R?m03$O^mxJ|O;3 z1N;Q3F#Y-Ew%4JEl{28lbir!{{7n2-fj9y7+uXdmlcMqNrr0?TVK4jZ!EqY{26|R* z(`}k*b!;wVXB&RmiA73Ei2P`gmhPAvG4J0hlh)}xk?E_4f#(9dfyDJ>US9@)F+bN^ z#b4>1*a7p!q(UP zz58=UFdNyaI6>)bZ&g>nc?0|H<!N?u4CN=T?oOsVb+lNry);lYB?B=36$oXACiUbMVHS|LE+T2^_ zbfZJ=8EF43BZqVNAPhr>J#@jC2J^@_++q=lN)xDw1`{Yeu7SmN8z;8o{Z=H>91&is zb5u_4n_BLiVWp;HZ95UKvHg(@#i_Ap(B-9z{ah(MM><$Mo-cVI*mX;f{qA@@^4`qR zd?!3A86UfyK)2!Y6y5-XWo_6mWLKNuJazhJYFMLyjk8m9<(hrgs( zfv-Yuvz$96{2$Kb^t7euWz&=1>$y>Wyu`4dEgqNZfG7EBoxDmLPEt!q8oHC?So1nr zz+uu>nV+9<=g)iq@yyrQlUH0tN|Ht@)~e#NxWp!G^qjVTXWKQ}h+)SXnW04er;j~c z{*rVHrEjkgm1>H@*_wy5-!Uj-8U!1B*UeXQO_Re^0wkrBLr!jRX9Vmu#-ix;17(_gXiVI-)_ z@@+aR<H_wp5|L=EOn1XJfAUn=^CDY@Mh>}GGrgUj&%NBw`zCU_rzV{WGQ%JjunS&VQyBn)zrXZBiAoS8a%Z90bY-Z6`eJ)tT#b--dFY9vcpLzSk$`( z-mR7^83aXGSX3&Gy#dzLoW_ILMmvAM@;W593mhgMjrgS9Fd#m;ow|(#2|MF(rW-B6 zHAB8I?48iiB}jqa_pjc6g)v{ik0m5N-{l^w?1~h^N-oH{gwhL+?d*ctd-aRl8yoJ= zgo=iO6S#s~Hk`TVJUTTu z`TN&$_MNwRofVU4RTBMeYC+`Huw!k*Bl%%;caVzDg$3F|Ho^|;K#Xvs>UZ8&jmwmm z4A`>+N8R^aCbhEw6re!)O;_sBSH)sB3q2G+C9<>oz!tmwSd3+_z}s84qO(mpj#Ikp zbwlS2V=OJ!(Q?i9tV8Biej3jolAdF?!Vz!(Y9JtR^MhdhompP(Nu8|H?4f_S1Gh9j zpVYv<$T{?Kje1V&X$t6DxivN53-z*o@z$+=(vHng2XZvmk~?oPI3W^4|s^*u))Y8A`^o~|=0s@c_-04lgF4MBIl zg#Q}6stVTw>Mgd3Vx)tbbyv;-M&EzE`uBgmI$7^uuNkY3Y31EO>m1I{Wv}|LJOB4J zV%vqv4Bgosgpk17XoJnbhnx>~GZzb`CUSZv-jET6EPjqd^OPz3rQAHtsc+{SZ5GO1`p4)cvq4;RjHw4y71 zM@4Cs5KO{XkFS<3$Fm^X$LGsf#O`tlty2kidw) znTc8I@DN4~>bb32jpb)b@#-BWykJF%5Ho_qw@ugI+YSJH5@?rPW0;A{^~(!64u2k} zDd^2Y&%h+4vV9;9aS|{j6VQAi?(MWuSl0wlzrRj79H5Kb-j1*YkrYp92N|&HE0gj1f*LU|u1ysZ-`la;6)WYsO>A!y zR-^nqJfzbr$2S7|{K_2D{Yqp1Vqb>`{VhpkE@7OX5VL$L+6YQZv`Q6ATDzw1bVGCf9-xVut@MMHg1V_lG<0kE`)(wx2ekCa{|6H)OHBiJt-x( zS=s?$3#-Jys57>XVMh@OQ6Xo3v>H8DuOheCOV@P4SZ&P-mrEa{e&(AWfU9DC*b^`T zstb<7gGSV>vEE6w&@Za>N*QJ2`|CbcN$w+(`@4OqXH0}~4arQCa-00RN+nMW+`QiJIwxbdi6F(Ar)n-oYTh4MT^s-8Q3XvC>h-i{lCa+XGilRT zYAZEn^Dup3FGQM)TN*R*!nmxx{>srcBA&xU!KTG2uYy@cO{-s7;TMPSUWGc96=c~=FsVQS5}Lt#N-Zp{D{+4#UUMwNZ9zR)BP%z zeQ<#y>Fjt68H*>(rqpM)MMlI^_4%U6bK5JTo@#R+kp?hJ zzP*e$uo~EEwEj##a?RB<(@2uZ1{$(@@9>!$=&Z0$o_U8?2{q(aoUAMo)1Sr{{`K& zyY`&u*JPHMjytOH+#)`?zg3@vNT|yxrKYCLZQaG9SmVxjoKZqi%0|<5E2{_45@F7=qON!HCb+jIFecfo+iR`mP+UN?aQtj`n#piHW&d9C zt!3VyC0Dk#y+>IeRH&#`*bquyCEbZs9)X*ub50TXxa83(Bu%cx)5+35s#lZzVgVpt9m)Ws+!k1D?%y`u6PMDnzU`FAUMW8q*H@O17rD>=>e z!y&@sbqb5r`N}7_n&zEn+PF2HX;m6OW=d46BeL!?{k&E1qooxCrFJOpsAO?Z{4g5{%Jju8ExOKo1Ms{Txe7* zQUWkNUUbry7IrlDep95BaQSL?<&~(B7@evx0hQ0hO7%md3E*wH&bGnnuP#ZTDlC13 zT-ZtP1$rxIzUbL?EqXMw=kVTK!@xhAmCbwsz8qLy$a@v^lyM<(cCm2o-EY64RK=~@ zBvFbwAI*-?R(vAyLx`#99Hs$l72C{%*5R$OfPtf~C(vJuprOm9rO6a6D5wbSmW>Pj z1>X8(a{0$}B;ms^mH?BzJso2Pc3p->qD?{Jh8?RHF)pKGVj3VRyZJikw{v_Yq&e;w zUN96}jHln!tBM+99C-F2^SvG6{$SycH>>VIua<=hlZ5zfM`y7eM#~Aujb(?2;BPU+ z913#rw0V|@O6jqq*|N_$2X{XECw6JoIc3gtl>*ChATb0bv#f!ZxBP&meQZ@%xJx&l`waH<5hjtVl`?Fh}@K-CKBQ z3J~0yQPE!~d-)a}9RaAFmDkc ziV?MVKE0oJ;C%KyW_j2y(ZUgHSiA}UCrCSx;CW2$c}Q+-{af&Oe&uC`kW)5Ll~I8t z6({eSLJSbeXc9)l8;J)o&8$jk%CbP?pC2| z*smTE>uh=KW2cyZ>Qqmeg%f;A;lS9#DlfYmbGqT&Z_v|1yIFRpvk+^5_G0Y^u0`K( zZ)-oQf^*B^uKRqTk&6(Ic@STKt!N@(f>So%WQ4fL0b-kZ06ExsG^tdriKoO9)WpZ zlJCA%`hJ2wxqc0?T@8AI-ud~kcxb&Le59$ z_G`H_3g&}Tcgal;=7FgS#o}#WMZO>1kx;p-?sSQfI{7&>u$;gEv}I+cw9)aMxcrwr z{>J}T1$-Gbr}d?zZW_aI?`N5M*bvxis5M2))r&{y8k$RkPIcYlasogY*ssug_#K>d zW?15eH8`)*1DB39CPqQ|_sME!>mRz_cDJKU-CMVLlZ6dWF<%jB_9xk$y9?)F`*)<+ z%IMl+$T58ei!>6iJE-1!&QPv&AF+^*BqEAcsou(`4tqi$v^V-$KSIdQ7mr`#n;}79 zqgNGwO$D+R;R)b!xr;#kp{=Bd{$)Ae)ch&ME7vC6?P0n57`dUovU&rUUJH+c4~PjJD`osGR4`9?>^$(n1JKsAd}GK028 zZ@f}thG8p-r0YsM?3JgZ0OpyGUY_*GokBeCVlO^&rIq$mI|kRd1YxfZyi73Wa_xB6}{s5v{)nd>1^XMVx~}01ydc)XVY7{S-Y>* zV2n9ED9le&(jrzGKVSz@YKbYxX|uxF=dnoi5zhQBV?JQ1{WLFbD%1Xz;nYs~`YYa0a&J}6%&`fS$Di^d0> z=35h{gVNy0PgWIZFU}9f%fkK3^Xl0X2+=U^HH@9>&X16@7Dyb94;&VD`&yE4SW1D+ zCfo1s{Ov-APblO7L=tXP>-VBldP(iLRQog0ypEq7yp4$B&Z91gdJWdo6*b)eeClU@ z!jq30DAO;GH)TOxe#1j~HDFkDHyq}F*>>lSrpTzZ+w8cCq(9f;Y;wF*$5>F{dAin? zq>#))?_&~hC6lLlMD}T7%co;t^*uuB0I9~XEwTKKYz;*ISn6j*CksJCQblJK2!~hT z8x}2DK|vqhidie(-+xWMG3P(t{b$g|#Fl+6dX-~SOFLEXlnnrUJugH8r7bJa;`j+M z^CJI%BHV0~KwHlUM1r<6@YKSW>IXbtKvqjv#0iOJk?R>%S#`D@$>-8*`N&*g1ui_g zs)6=@R)dTip%v2(9wc?jvw#1{agah8z1NrTRR`q)RH_V=C-KOY1~xS4IFk}n9}cAr zf@$R6ZU%zv?A?SzfR0Z zKh%l&(K%H>CZrp?4d40zpBnSt%r8aA=?}&H>cO3W{7IT^kv??EkgNEq1a3c!FAz@CugH@k7{#giiyHm)#QXvuv?2w_jRNKyY zTDpry%Wlv$75qq7XRy7wv*c^|EPT%A<^kGl3L|ZVhbI;(%az^iOJyU76|QfUmgw4p zxkw#HcMBB1H&{?-1yjf#G&>eS8*IMIe^eb8n(9+UbzMz(WA>-ss=&=rEbs5q`jO|y z_-lrYGrFutX5^S}S>K>^MuMKmt~xeYkG}Q`gDM86)FEw{iA*HIV(!}5*9$mfMSYRB zU+xZSi3o+AR9n5*GST>7Lw|mLIC;w-`*)yvEIXxw{@0o()}Wlv#FveKWUk6K68g$6 zsc<~&L^Xp-#u>kc)@XkY{1@@Q>zkPRx1^_y`>HJ5r0hy21A{FaawJT@?y(lA5ei zVYgVr6FPmv-fi`)kenXxKekxLJh1xikL`?yp^$qA)F#v?v&f*C_)4An0Y#iSt4`-r zI6)$ugI2r&#JQis-YrRhuwstfk33Hf7wMe&P0}&!f;qTy;dxJBbOg;~ z8__>}R&C{s@_R8nkYi2A+(HayQEqZW^&5Iyqt~6VP_@M6wNyBo9O`ApypP&pv_4CGn~TJmUxyir4_WgK}2B(urSgtwy?!< zvE}DzZ5V$t>Etp}rc8zdWJZ`r;^gN92|D!uY4F=~9+{>d6cs(cYgd zb%-C4S9P@&nDPE%WF4wTD%RKOONQNf(G4TII#)M-3FwkX8~G_LDi-P$VhwEzVW>SJ z`(j=fc%5G(1NE8ia?QE8x=P7PW_7UlE0YkcQ)}_;Q`c`cEy=#y&Kg|8S(o5gi%uM5 zrM$lx_|jWS(&#*dxzc#gZG_b(8xgaYd%7e#mXbgQ#7zm}1nHjYB+}H?zcof&33cDo z@Im<=hW=2?QVM^;#5n`%d@n_YU&qUD5KO>U>h4-EH(SCodlrjQC{KCClwaM^pH5@| z8TSUO4=T(A%;2~}^>7gquAHR7v3}7gP9%dMJqdraQS*M>{dd)dRE`-;1X-*QUTb+} z4tEp5JqEx>M zjG{ws)O4}VE)37>7&dUhK*~E?$l+=W>0i+5)~APCrA&9D%DTgD^oT!IUxY1f;_%cT zOe{%Oi4DcCa;*zH1s+*u>$mQ;{@$x~sjx-{cl-C>W84CB7J=)*Ftjr|wLQF8KL+ zhk)nVx5$_uE#_#XzbbSVpT;+VHWS%-+H5-NdA$*(YCWOfCqcR@TJ4hPm-zcYgvhKX z*mlX;Hg$_}=3<6O6B;DNx@AN!3_;Pj=yMjd)|P?tE~7HR^n93W;p8nrqoUH#htix$ zzuh}`hebM~s-tl#10VzXSuS-(W!X5Tt}t_4i_ zqF%RYuS0S_`B>QGd2sI-X0C^EFhP3n9-tktrkrKR>nfbR+BVSoaRlER zwXWBN_z)plRyGd;O~#{J{pMwMn=Rq)BD+OK+2k#p@3cKv$2l{HeV&6A`gCN*W^#zm zyzVT(iAB;0(uSr^H=3t>o<4{0Dz6Xf2W^)WVwtF%a&V+=zW&09*c<;XK2P$eI=&^z zj~GV{DWegUbT?Mo*~YBTAb4*)%|pI?c!A0C_JHl-Lq5`W3pF2~I@ERJBf7OJ2C#&* znLEKm@EdHZk9wCMXGcMp$4PXQ-Xrn@HWi9GD)p>fmUGR(0@L741l`EqQF?7H>dsz5N65i8Z*8G-Qmh`AE8Ste+fxxU zjrAD9yw4YZGrl|GDGsl9+Wh{+dG!5pY}fSWg?7XXti+EL6--+E$zIDc-1BkRY>ZX7 z^3wgFvMf$e)`ikx_p+zE!jz0gFaJj=*x^ewVe@Auex}-29Q2Uju@p>HgYS{w;`$=L zcYl1n80BCy*;;CVZ;}2?mms>1;TZ2r{;WhrrPYHa>?vuUhm40&86wF0DwBe59TZeF zRoT+(+U~yNlLsF%%%~cbOTe1haqcGe{LJIjG>^Zds`BLyJAz#ciWz?JXCb=(n+Sz? zYso+GJ>5jWu}SM3Mtd;n+x`xFg$QS?+Vaxi?Ru6&m|imerv6K+g_rgmo+kdzDmwiH z$IYV|k_2W{cTga>H2R5*Ga~dHB=hg{c-g;?x;5e74EUIxYsLHzbWYkjP4xG!9{#K{ zas(U>wR`-$5PreJPZkwc(QQ)jBDmtK^Pd*-9qS&tEePJOtXgoGfBFvjMeUws5p#dS ztF5z&*53!e z4h?Ak3lg4butUG#K7Y2DT~&1?`peR!N4)Fkm1W(kzE+stI7)|fmD}m+DaasLS=TYK zQn)pNVk)q`7W~wZBfT+TYQt&NX>BI4fFWT#hWtuxMK1$gwWd%t=Vq{8SI9y-eGCWtutNst@55kE&@Oq)xlh%@ z4EWMHwxOKdMzDLT`OZ6zuj4%-*zGm6+HaWxyq7*9eTQ)=aW*uB69Hw6C2x6{o#b&+ zw0R&STjGrCugQ+I1$OiYB9d6a2--#e^JZL@Kvj;gw_Y&xE^WR#P`E-o1o>r6vodfz zi+MNA-GF8ZY_bKRwON|VgPq_Yh-pOinNBc3_vD&v zcVV+N@Mc`Vb9o9sc=MgF&>-D&eFxatR&?(MkAL-#8YZ`-7g>Uw`5G4P$OLPlf{fV> z6J~U!#`v}*IR%QPalFiYRR={_ueDkLU_O-O&A;EtXsZfXZsbzt?syiQW=ob6j|nr! z78Dr?nd%sejD+bRxoctOE6U+c$GIP{DEIVg!TMDD$xb5-3$opWh6MkrVRRM%K6Jl> z?9|tDRT*zcjsa&N-eMFen)Wg z@(mZK;h7ST9*j^74-Eig{pu>%IEjQYNeT^9emtYY zs4)w0r51C~X4G?`O%}y5q>9`y>Cir<^(wRABaYK;ftt+wSp~JFe;*d5h2ZdCXRf3KSj!QFR7r&A|>v!E$tU zsWG(cS`r4q5To)=gAZ@Lcv?0ZM~f?@ub0Ouu z@LOnVe2qA}$|;kTbNY=*3hpbSo5wW$N|a9$jI~s-Vw+M_Yquj;-n=DGVN%yXL9Ngg z*Aq3X9zn;A5|joTjUu>5el#d>?*lA|(+Cl~J}Ndn;Yxl=Of~nZk9`C+6VM9+f|f%7 zP4EQuiTu2XYedN$r>$79px4XjVzw?7-LEGZ3B<}+$|#xkt5j;lI~lislA+`;K*)Cy z&YqSd1=_)5%Y29mNmkmxk!n)kkyaQi3Qbbfv64Tg*tcVwS-w`gYFGDCq1P@?_XPaw zj(GJsA4<_neb!loOMU>f$AnT)Wj>c-EKg8sp=fnFCFf{m&)NzzgrJX7Oh6KClGeTasBfSYzJ`@C3WxT z{1ZKo6=RgIdzgQ7&{rznGj{8hm6`n7B;Q--&jh_*ouz=MgrUKPdM1aQJ|+JVV;!H? zg28oJJwTg(z1=ly@U@AgMI`4SblvD!Y>UIMh`@ZpmpA__mDK_9jEXa(F=r&%*0ZG8 z$Jcl!aNh*7$l$GvBq#{r$xD#g!=Z(V5{AUUjEgW5de*c9?9=!r+rbePV;;wpH`-nZ zLE-m1k&jEBKDfLgcdc+LzOJ0_4*?k$=XUR*PgCY@XAZ)3jQW-0D|<0=Y> zXqZZlpb0ml?*7_G{t~+$VRg11VGZ|xPNB2o^19@=(~IjqkAn1-oBtwhzkFy0<9O*g z%f2il)QF}}oVAn3p4ib?T>(~ZG^edy9!PnF*5*STk(}IYD1#A8(3PX4E3JG(F-pEO zq9lM%O@z$UO;HEo5EU)lTNt1ZAiMnT9;;ie8DyXgj?=|5F>F{h*|2thX8zqZ`>nvo z;Tv9qlM*j3OlIb{J*f>xv(!bIORwHKWC4Op;{EeZfzdf0R7{yIR_gpH@GplAuDpLxe zQ%72lrH7EWyoxl%TG;z8qaqM)|1^y7H`XV(9N32xMlEAK#_hW3mcK<|amPg8_P z#aIeUjrDe9t49>kQqH+QN2;DY7FS+;gIF~-Y4HcL)-nC;m;$K)BiDI2?V4O$#kQRM z3+nqaCr^U&H_W6km90BivY=ZO-5@pH+$k=*)kzKGA?O!13!KvmpdV@s;-)YoJQ=Cj-CYHr7u7sqE(OE!hjbcu?>D=I_JL7(aO&phnJzg`noSn zQ8J}yvX^+VK0Xm$U0tszqR4Bdunq{dVLpoz3fU9F@nfM}z=4w)qO!SX-OHy1|LxDK zg&+LPvpYj!_TM8`J#|~5xY#Y@28W5=|LDp73YIlob|8azei^b+c$a1cQg*n17x4M3 ze)XDdjQdA&eLPSV%_R+qABq?iGH82ftrBk|kI^SizbRNGfa!!^thFTG2jZgdA5c2F zcqP@`n12!QHDVRPZVnrmW`4}+{6?IsVG`v-b8_%NO2>c`~ObEES0;2(06$(>8u96R6zq?|`5W2`*+ zI}>T!@PZi`pLBenIwAX3?<7m_E|**rrEY7E^_*<|FG=cU);Qy$WXP65g|cwh;rUPc zyGUA*uR7Nc>Js5!w(I)vZ10sp-*i?SEb2alQL7}*zn$b`)a#^Bt<+=X{I>k7A~wA} z(oS+6Zgauw1a ztPdwy1LsOp3;*f+_sTP`)q5V{slStc%K7-ctPHUtCyJBcWa55uZ0%^`x8Et&!}fE; zx;X6jnCi*I$y#fZlT$TG(L@@xvA+lUloBjT>JDQSZbK=&=Z^d{LtO&8WGGSV>R)PX z7CqDvpxe8<8&1qSxCeBPANW#?cFt+0pDCI~aia@dwEaZ;8-}>xI8OW7c?n1E^#{qZ;s>s?b1)+ zn)pu`mE_VGQPa<+BKjy|wnI3p?FFSV2z*P5RV;Un`A&mShaU?I* zFpj3gVtoKnWq)Yj${BQ@4AvZweURHTk&TrejBITf5ZiASB78MM*D4+Q*tvMPmUvnZZll(-zqy|E|^4Ne@@Fl zaf>SRPVF1M16w>4E2n1}KYbJ3;XVa7HrDIoNdU_oY(!UT^Q}Zdy>Tn%+tMvBZSeU_ z-cL*(;_dXMCbkdb1(7@2ch3*`FxCx^kCrTITxHXG6SR;5D7I80HlwJl}J zk~Q+L6z>#1%Dx8opnF_eMK|(AO+4$&vIiqwziZqYs`w{3(#yXwHt;W|jA}SKi=e{p z+MB+SP<}CeQ1~`~9%3nonTS;*a)g)Y5|~9OB)9)cb*qPs@+^t&H2bKh>S9EvTtY?H zaU0*DdHi#+JNZhKr_cuv2i#HXiD?!N)AQzj=au77(H)uA-C)y8q96JA9TpDogY2}4 zP1cDYpW=x$t(`e+E!S&#R!7W{sav_qQxf;4Q_6XB9DvWd0M3a>6etv;1PM`~PqsQSz=k>BuZ9!kT?)-Lh@g+&|h+#O{PsaGl*#;C26nO?rWzF#9U3 z#)UbI{rkVTFjz7R!j8EIQ$HpxSPYE%46DW)Ch7u zEh=L&ZOqQZ?BK~(|A{rw?X6XrHa+NBccI(f#8hSaZ8dZS232pst$vQVOBM|+H!oE^ zTekB)eJ$n)y^o#~WpS(*kGZ)e=Sm7M*DS58bf{*O8jF`gr<3%>Si8@Y-6f zs$V~tXc88K60!vPlX=-Ay6|9ww5exInRz%|2Kfa{tePobUnk(oj9T`kB*jm8*OV@# zGzqbcCsb9&<4eCotI~N?XCQB{IE)>e*F(Ae!#O`V|IF)xYW;Iv*(AqSc1C^IpzW>g zlXwQP;I>cajpON-`=+uyG=7a1{bvL_52DAVA0aGF58>cx(!Rm;a)L|PMd{BU8|It3 zZH{ty-lH9~M~6|of%6uB8y9KWu`!5vehY?^s==_K$l=~_0l4f-|N5JEL>F;-)h=KB z-k!O*`EwEiuUj`VrIZ`@NozZ1f%_Yq1y5G`f5)y3taE&~FKt_j#gWRr@m-bO#}e|P zWmh}f9{^HL_%QG(Knpt|=;b6@hRF>ATpF7&w+ak8bXbY^-fW}#ZPOEF_l~Y4nmM4p z)ufj11ot3509p!dCIST39>l-|{c2kxb=TL*yw}vk5+aW^#yoiqBZ9BmyspUx@Mo3c zJ12a;i|Ze$^5k0S3+L?D-JUUN$S1PSTu5cdnyt1;}{!a+~g zn;c&N5#!|~Ar3zW5O+A8=a7rfBdM3pfh$jcQ5(1HdGyymzKnoN`LpIILkK^|eYi2h zYzu{VToXE!yj`61D~+RresaHyAKa)NWU$DnsyGGVm54CvODd6Hv@7&uaP8qXpn}gC zg#ir#tN&|BpgC>l_U8}R6qyqt8cb7ZdG1gBYZrZT4ux}2i^O9^Qi6B8zA4cjjpO-q zC=T#I9j!7h%<_&D%#_gxUfm8Ic!{f69P*28a$9m?m>&lrok=>x?*_Febc;QSOwaX- znIipcmUgHs5F2?#)sI~Jki^B5I9N{7?7us?d{C-#EYR$I8wbo<1utMeGe z)uG)GB7LSv{h`$Bh;a)$HnckQT*zt9ENgF`3)+V+6H0>~XECzZE(gDNg%7LMccSj| z)s(X4-Wuu9VJ)NQMERNbf6Imc=KTLikpCOPt*xG_kI;#z2)RUZ8tG7OEgszAq@94e z*xEvwZ@m$`oVxkMTFo+YItnE`Y=O+=At=XiqR7lgxkJIFPPnCGyBiiCm5 zdGdQgWUiRoydN?)A6Di?8sPr(J-nt3GN>v}JrFaV_wnO*1KA-d$r8lan^!y03Qtk0 z`)Lxck0g#H4T{khnk;fka7h>*zWHl%J8j<}+BsK93(XSp{>B!o9Qa~A$iu6gLY_tfu7uQ*KK1hp{^Wgp3Flnj(labqkdr!dC3C|q` zxU=D#mjI}TKe`z{w{GnIu%!Q!3as^QWW0`a4 zUGjGW%{XSpAgR?xOTeH#YY7L_&JU|e-{xnB4St3lz43XC=4S^@jhpq%ZY4D?_G~s| zc9L%rF8%;^o$qtXfBn)EiTD%2D*aEm^!^Fgj)~a{QWf%eH9$2vZ;YEPTVeS1JAUcU zBZ6)pYBin^)3cedMR53s;F8-grGHPth|+qh$H&rG6cQq@zKKh2`adU33S1AkuL{Qt zt>@V*R?Qyq%iqLX>k32{Q0Xx{y`;XyziF%Sy|SSG>Q+uN;fIceUw_&0`TqNK9tY}) zZUlmRr1Xcbyk^5Cd)9U7e|vtXLQ2o;xhC#;ly+sQ!Iqa5fh$WMx~y@$46b#YDN>a5 z7tDA$*8Iy9_2S6-?@ub3QMnK^`+3TSn7`pD_pj^zMmjGi)8GD7Ok>zoM+^hu34AI5A!-$-si-?{H88qlzW7ZYeUDS zp@VVFR5PbJ;*wGH^m|aesXIl|cBTf~yIZOsTRlyqH7^>MzO|j5eOMw{ivo1+Ni>mx zMu2yRcCim~kvwUwKbZJGwYYTc9`BnigLYv!j>tT}(@(9>Tm5bC04r*@!aMPZ(({CD zaZ#fr^mOyE7Ll;VO~X*?1*ql*UlRp-Z)dg#)+3ARTihPfS4tv$xWG0>;{YD zQCEvYzOz#Ka=l||kfwrI>ewBEM0W2t43*t&s?E+y(RRC24Hs>bf=J#gh-*yt+07=H z>N0pB?IX>(S?4d4z9iJ!>*$J0bL{X#OS8x~J1gUlJQz^{(v{fwBMSHBpg6$lp!tF3 z*qeKt-ABLA?k%$HgeT|Ckr{GgRVqL&s`;|eNDTWXuC(_q z0Br>OJMFqxS9698+Kw{Rm`|UD;C#qagG?I6h)poHgvI*o>vkYwoFX9xviQ51Q+GYs zJ%4A{%x$I=vX~;09#l|c8bfi%EZB^kx}GgK&L-zhZ*luoxPa7H0k6Q@J-MuwI@u7P zTfx1ng%0);Q>eq@ZYRqflH1|t57RW1XUGHH|L6tKo%8K?Eae+(=^_{ST60(H=*o8& zBE<2o+QJs6{4K|;YM$XEbVZg8axTv?M zO!>I@6$p_n&{MZsjZ7%iH%89fh^h!>-uh;5H$d46BE#EXp^PY5t2V@P*qP^9KWU%c z*00f$st-KQec*1y_>XJMXVVPxF9v}Z;lGuhEC#Pr%OIvD{VODAuiwiNq1!jUe4+)eYXKO zc|eTXR5S18W+WmZMF!m>KPCBp-OjAk{o3YaMcRFIyvZp;L^^ykK-$!$Y(02isof4@ zes?3RfzNXA-sJQHFw5Y@)egIxqjM4D(};6S%b`dooUQxp_THL?pb>2r&>S>8}Y)qd^-ajpdBd z<&2@;RV+z!kjs|L`ms_Y5fxqH2+Fg`4@xGQ3gr^$s=onl^D+iv>VN7>33{T%*%G3T z$%X3lis*+ALX3|}w%XLwgm%c+Ds>Nh2i^?E=3dh;U$jv2a>dap0vR$OH|3hTz~;qg zzG$&LC0cnmUwmLm!8HzB#>MZ*8?z;JT@84yYi^m4MYU$3+p{WLig7D>SnHM9a&apa zAC%t7_8~;<7dPffeR%bv`!;Qie>`r?hN%2>X^1`p?iSe{EZH?6v+jL zc6QTjAzbQq28%$YAGYqF9e8;XLefQPj)L)pC7Ml$Sy}xfSWEGXjS4qS2@0yQ`vgc$ zR?w~`J2q)P_PL0=Wm08?@o;T@%=odAy)~{P$D_Khd+kv*~)wh}I zmvt?*-W!i#kPC4B#E?t;4-3x5*SC6|ef}Zt5glaDJlVyhNtzqpKQDXjl>8BEXO@1U zfLf|28mw*3m+-?%K!kla>s`hJZrRoG(;>~1iBS^J3$COa1+4@AV@hi_xh@iXq4S@m zCgYQZB=(>R<@C)Z8#hMsj_jN`^Ryqy8Zt*lbulZsuk;DW=(7k39hR~8CUe5cVC)+Q zhJL*SFHS9QkKM}oZMV`S!ol<XNPqMJkm6o4PfAOhz1MAx?_*J~b2d*J+6$bxkIJxUhXAjt+koTzEdSS`^`uOzi9j_4k zzH1=RyQ`jPJv?cDMkY9YSLH_g+2fE3*}E;{yt1)_b86*#nB)kax!ch9Hf+?Hj|zZ4 znuw{Yy6yT;O^$rE(63*BT(u#|4H4yTIp25c=p7#%vtQr^_}&SMx}a)zR8*t(-=A-Y zipN5*k4+%YcSi^s1dgRs(9$>vi2QY_lpWUN-ef!(ydkH44iz2fVo}ixHOJSiS-rOe zJX8N2V>8o_De}j#M0D4F4Ud5^WNcx(7y6TSR+p}S#FU-2lG_k8vQ-EjAuE@sn;`ve z-_1;XZW^TUZWU+ZWElqmJ0dL$gmCF^G7Idqdzsu<0`nf1cqk;Wt911-x8=kR9#Yez zx`$;Ck`eAr?WU(iAA4iR3&>D7e%7By&giv0IJcpf|%f;5)R*$lEAG%vWxAEUm7yjVQx*)-I?U>)8jH(=YVXRt2b3Wu&cQ{A!BS*2dsriojTG zG)c&FPWq}a0KYehZy-(Zg4o4Al_T&9nF*K+3O`KR zy9V^Ol{CAN5gv(!**HEzwI_Vi`~~9@CQ6`ou7uUijC!~~E`&bE#v9K$CeRcJ7l<>f zPsFGmNyce3Cp4~-eKiby-qDW*e*Np-F=+7QjtW&>BtzYX)qiOHDdxo$HT|V3=42D1AC1X4&^uFs z{XMpM;AgVw{FPiOO&XZZJEbvTRtonI>l0t=b7d!G7U!V#=Q8FY=TxUXTBt6GSs$&! zPSB0N_%EcpJL2s*h3wgSJZ?)4;nv{j6$X6lLD8f)=Nu_DbgK5go4P!{|4Hb|Y^*uR z4>6m!r>+G}_I9+tt+nUUudsjoN9@v)82p8#951$$bWl|~_WP;K@CxqhS?!z6%!Q^m z++%wyXAK8um(d?$4256X9dKDcpcd0Fd`gydbTLFZ+%wexRtR0?LG5B4Zqc9%o*i<< zy3<+nxFIsp$v9Jd!5z9rWQCnj@5eXBfmztSSig`w15FxrDb8DqHr{u|TxTQtFA{8* z0VzbO{9gA&0Es6=TJ}WYgmhLP8t$HmU$j;NuO^LC*3_Cr{F1&E8JrC$wz=IVq~YSI#5_l$taGtSOv!E(6Nw&M?BJ% zd71zd{6)b-LKde%q6p=lii-O3cMAV6k-F^7srZ-j7y(RtA+D|VN!f{G{?!fGH&3+} zR3axI84{C5uLH3dOr-r^J#sZlCasCS-*}=d+2nugI<^)Du=Tb^Cap0VcRbXBkWz_U zk2{+py7sbx!R+H1-zUBx;Q1$s?7uSpMAs1kj~+SQ`Z%CAD)g4=ns|s^NIu)%dyl;zB@%3XAiyJiX4YXKK(5jlTb%Lb- z{K%fCL@#fstdx_{OOQ(->qM(6!q$;_ic{;*%`9!&*@xGZVcr+~?y;V1RlTS1>XI40 zn)}7;+{e>|$I+CIfvt`iy~hf7vI;+?L+MLe7Uh zceG4;{-I*T((4=&hB0vFUI(S5jkM|fbD&Fm4J`h>@&z0LVV+ZDVQ>VdxKVJ2MVp&5 zS}xB$3r^^PP094anlide6iMyx6U}m#v*e{*Ql)IPcEkTu;g?(te9bq`rU2x!olyao20DkU-(o2lsiy59n!X?JbF< z`q4{;_3+lm_6K<@&WV3(c5M3Pi*^fJjm)Oa?Rp%To z(cdRYDh`6S^(QqqnD=?9E68O>o$OY`X$%v<;tZz+i*3P%{!?$ie1J)(*{6DmD?v%_~g>H zCUW+nPsCnYV@$~%5=_)9ncbrJg8D?ifn=b`m!7h7?+))bLmVZYT~qSjYEyle)gEB& zOf8`11rMx+67k2G;+W>1+I1%g|L4V{lQ{XWSo^L<1TR_wy7AU@9;dKat?W(&)x{b= zS=}7Y-$V?)`tJUw^&#K_CDYNhHbNabvg61j(I!;@knmnG zoWFsI7Gd%RD5HK4^PLCOSaIz5%Bv=WT5`xa2^+$}1BbeL5B8$yZv;-Egbyw_O;#rA z%r(g28U>tCyI%uYjk)=Pmw#3=!Tf|-aX!gv0Lf1kah2bcQU*dFK1(oEcy`e>H9PxV z_qmKk`vH#h+Ma%pY+F(y^bss+xFZOgg$dPl6RqL-eazq=-*+=4_|@tk35luBtqx~p zp9jugm=UfL<6roCqWf2T>E~4Q9JZp72~BaAj1Iu?zh!>BEEfl~GVBLL)_eO4!a-uo zP6tnK`0KV6HpRU8Jw_pEl*=MAAN#E{4af7~+L&uos@EB19UCVk}9E|XajY}&hQI1id3Q_SPp zl`s^rqo*5OZ5xL6>n6W1eRJc>R7dx%=Dbmpvx0oJv+= z7X^JobVFrr{uRO+$px%Nhbzhi#Y}o8S>tk-x_F~6*SNdHDTU7m@VtV0$`cm}NqQ4w zK(^$uY__u>dyN8xZFVfZ87hr`;JvE9dz`|zh+V3>8O75GG+fqF-&!~BziAtx_}1i_or>GRQ?gS zF`7lr(Tt-mV{NxGK!Zx)(2em7(iNFX1JSy*6&$d14)nmqPuch@dZ*96q~BZ5L=Hn#%W?i|Pg3VJLnY-t3^a(FEbQMh-(tH-0Yv zHtqlFpd-F2y8#B-0Naah8pR#IRKsbO*?IKasP#6$7~ka_Y=S1gV3mcD&qx z?tc*Bp|RmLH{0C(_S1^C|K`Kh0{NH3=CvRjQj_VPQ15ZCT)2ffAmH@p<{et|ncaM{ zlr>VS?MhpzGB&a&w7Qc6+N!oG6M9x`m+77F^J^qIp6QLekONO1bp{go6B$4yiQ8GT zxi_i3>i!k4^Fogr+D~SzW*r&PS)Z%FabJ3lba-NYS*p z{FmE$bId%YO6ROkW`*DA3R?#$vsGAXq(<3i!<54wpW&k7c<7{;zz)Cl~e_m(2E_OfXZ za8Z3?)G@ar>VPYy>d2rUz4j_mB`1Mwe{?&#mR=aR~?0+|=D-`-&_M{MKC4eQRK{v}Od3o6dSKAsOSHx)&eX^$rI+_yAdt7smdEP zQT)pSo(d(9*F6NGa_W-%Cl1GxD}}^@%ZAPBeRORWAKOa=xuA-S+?X4!&$B~{h9OM& zY!lw9G_lP!U>@;|++ya09!r4ekjrU-(7$s1tb)e-swii*=O+F>suKzO7Zqz zqm90agS^m`@%8QL!i{Uh?u9p6g@MACYSubsC2AXdgu<2;G+bl%q=C}2sal87S< zd|nD${(cMHdAa5vvve+(DV7}mHNqNMXU@@n`(wd}6r_8^YuJqAw*AhRu!?-;o?RI( z_<05wtYL7pzwV5P52bxOWR#jMdvb zw!{>_iu)RYe>78*JfihjPXH`=g~Z@aYH>PJdMe4HhxcxfSdsHM2?K%4HE+0z6IO{< zqWSL@5C3%;3B8N2o%|hnWPB56kNqd*uj$0na(Q$$YChi+uDu7wu5G>vZ*S7;u=E%X z8+AFfceg$*HS6*#HtUb}iKa*wmUagV(5~mQ`ZyO80{qW9v=W(gdnhY?6ccCE7!}*V zK*ZwQ)X%dpLVDZ7_q8TW$^=j>Bdpji#OM3ACzDcEo+wsm8WG3JBW@OohKMVET#)C3 z+!{ZZF$xowH~h!W>bKv^C13IFu<=I-+_8aU>=xGu}nIr~w9SLW_UW$>}BvpYY&(YS#O7ALJtuZoTOV z=xBQnFty>cvozL86s~rlUUeo-anVCb-oBq_jTKi4qDI-ve2@BiR#fru6Xx^BLbe~* z=p?wVZuK}MJ+b{dVAzToZ4X4zkxvLExCs0;F+$=0HIH|i*}~@4)|jZNX?HRIH8t|M zX8lj?|84e=W1_dwqwj-C6=}Q@$i`465HEQ@^iFTNCT*acX2^>9d^N;U_PPfOI?4gJ zw?g|}1_hg{>2Ov$+7N}eG54uuvcXW8rWOeU3!merwToEPcXpdqU6Q|5>6Vu!ei|dM z%0r28b^1`+>w4Cp=(QrD^;g!#OOi zsccFVc0Y74KEco$<_uEknp&87*ZgNk0$`hOGc2JZYu@N9b_cQnGsSA)tFb+{3mf+u zZJc!v=O>4SMD%L0cGSVfdJnxbS|z@r3zQ9Nu}G1Jx0w>lr31Z);~oclCPkh~Rvz1g z_}@D2B&0d73u$<$Gjy9BNL^-aR7CrpExHNY^^oPfenXTZo3gRK5Kx|8o>`GE$6LTX z=0QlT_1+zWM!UAC4|06-Ha#jHB}WhQ=6Mx~qn7ucAO&AVG^`Y*p1A7E|52EIrivH9 zNqwwEzyQ6qVlLjYd>@VR=94)cv)f+-q?;%n!rdU9yL$;R*T%}`L$X{^Y`QvqQoD!# zd6-ora%v!UEkwJHBDF%s%sJMP+ZaSle0)>mI@Xd7afVhh=}e3MV|| zGRZ!Ibi5Nv!t{r9>w=#FIfY%U4S;oDpi$m>fc>sFcjL zQ>EXucK@f4$VX6AE-L9&yp-e7<)7)_qlp|&vXX0lr;;gdcul8oVW5jx5tr%NIXDDd z$h%4dNaRq1Mp5uaEzypj7}#>dEr-7@9nS2y-7_n>{?Xg}I2(jCLe9pS>Ndm0A6D4e z3*c!K#OsT}eqqPb`8x%2JlruDY=1%@vpcblVy5Ywk*>&=L||iQRA0f3|Lj;ukB^=L zpQbuP>gaB2SjfQ09R}p9miJ-n$Zyalr>l4Vf*)+hjx%lmZYYMEs7`w0>w-_jqqr>6 zk(~hu0I(K7SZ$2;Q=p04Fcz7*3)oT5y_>#2NL;B$(xB=@yF`Ko!bNr&>j8(0nvt-F z4^MgwCwDqqkr0Fj5!`P8wKkm8dL^NpAxv#rOKB)5yGKkZfyw54ZGHkp^@rFTwbu_H zE3oi?xG?#dbnhhD5|kiP?EHXCt?ncX^uE8szP&L3Id96+cN@89(-d$MNC%TdIsuR2 z8G@FWNi{k;1QmH6&~j6CW+=jgA3=W%zW<0m<5?l+o{DvrC)|k;Ma?xagt3PX8;I`twnBypWJU3e{w5xRj#fH?ntC~`}+ZNN&njQjWlJ0YV>`nCG z`VTLK5zyv$U117{IGAd6*A4YTdl11Sb%X0r^S7qMUoC5`){Vw;^`BO^p6HE1)N1C7 zSgiZW-?+2TiDI#t-CF_PFN5($rW#df+l=iGkMs&DT-u&eBsIGu2Mnf0`WpeL^&{=L zxV*<73BswxJP0dv(7l`5fMrGVl{-bGci~nc*Otf@J93{#neOfUD8( z&Se(DU{@Ser1XH`!J5undj8-dL(*D;IQ~{^5$+N!>%c z4Ymh-5)2{kL`E7TAqz-U&mV+)u^qY*CE5)}9RE!;>P-gil!E(7f#%}$7Z?Z70EDvB z7^sGdiIpvShz9>?*e9)}gV3jBieHl$J{MMvIUp#I-GmDs2)kv#P0gUJ1mXmj3;^p;7%CKjd+T(6@+TC>$(#qn)%hFB1Ey;$5dG))hFX= zJKUXRjqsFmJ%OXXGD?nbAMsoY>0Vk!Wp>&R1i^Vg#KyRg8^HBp4ZRQrN-<$*RCtaV z4fyGF2uiy#Xlns5@tvjY7a&MaGFrjqJ=fk1n9XV;gqxvuK^Tm$F-WPLtL$w% z`gxi4qPogBXVc@FL8MqnzF0gG!9eECyy*PC$q_x-BJ#lZnkCQa3jjB#TdZlXNjX2i zq#JK)mXkFirg~xdY8Lf57Wu~i;in68+Uv;p;IP75ilzt!ZnVM@Wvf?PkjXwKDa5KM zv2vd5kaX{~f3*I8s)RzVww~HOeAuOLs&}SM=aQDA=nR6@)hiSkvRGjJk#8fAx|8=A z9or>@fPA(PUfu| zGpqUqhL=P*jvAvI^J3IfFh~M{P$`I^4ib{Vu+7quz1~_(Rmrwg|w6A_f({Z94W8V5c%6 zPG)C_TF{`dK?nc<5@oss>t3Ib>_czE>u@l_YZzQZNy_**DLf>?bxXhk96?ps|6aOj zV_u_fThfW67@Lo5Jgm`g{`H&qq}X|BL|%xQQ+tu=+4g2>vr6BI=#z`s#jdR= z?=y5YGUj$U4OwkzvKA_&HcQF0f;?-#o{PdNYL1kGh~qDjVr9(K31oKtOi(B#00f14 zbx#7sP0ds*{=L#4(Z9Ej;^qFPRY&5jD48bV@;#H322ZU3HSQB?#YEQ7`DVy`@w?g} zX;vxZ*2O8v8!_Dkh27-hfspl-u^W~HUkiD)`p7=$q2_|Z*kAfW=R1$sSa#*uz7i|e zY10MCK%slm-&u!ON%i<%C8S=ZNyl2qYSfzoI`N@Ie5>E38{}yqAD`)ye^3hMLc>u^ zV9Jiz{%VVtNgWo6K60xT)%m5~hFk1eWzw6Z$H81dPGCy^bmdZ|3xFMY4gs}!`Rpj?PmEnyY>#vVn_%pXP!jQe6UThd)eI+(9BT`*Plo~60Y*|Z*T z%gx#<1!CBpG=4-XsJ0q{L%Nw-*J+}u!pnRxxV?yIXo~}xEjytXK->}#li8A3<^i|V z+EoTPgK_l1mAc`74=`(OVw}L;u*qsRd-RkvbH->t% z;+xx>P~069OWbPX5-Epjh$77E?vfg~s<;tKY5e8k_Pij5hJWh|Q#Rrrf~+|Q9sPOF zaAP%@jj4w{l*D#{If&e1i#COw5et|veK=ieB`Q(Q7+Z+VR9Z!w z6!D%A?$Y8jdzigy@j9*9Xj%^lzB*iZ(=CIU?@~CRUZR}d*#I}Np9pH>tRw1$ZZVWbf%=$z{FMz{ z#yhAq*_ocG-8F_{NgNJ}ji-V5nAh2D3{!HsWciF$JXS6XUsq%Y8=^r#x)>`jPY3x2 z6gQWH#PU^x#hH*-OP`aYkqF8TWaEWwd^|FO0wuE%E-rD1qkt z&d{T#qz=OGdt=t8pj~BSbhpx)u;+UUlYgWPnk2Sc*zs02NmpLTq4FT1cqm?V0J$!P z^g4ZoKRd`De$eo)hrC?V=o;E-H^>CaD(-gil#KgP6hLRzgj}3X6llIVtu>kO74>`| zBjXGpnDfgQ+U`O`T)`KJN^boj%_Llptxk*?kSd3s-64X}bp#x-PL--l;&{?FF~L5o zOrA)!2OmKm!{*5QBGXwvWpu>uRVq*%nKH4CF$=v@9&aw!GK%Xju0%fQB0WXVYV$OY zVE4-6NEK=iv&znhHf2UnQ+lBregFt`14B05iikP>BGp@ryt_Efl|YR3R8e~%te-O~ zXt0GoAtTXzs*jdT!2FZe#a~Ry4BVSPLF%!&y-)h7;NEpuv*?SFq+idh8Zee@&wVo}^i2Am z$V6gU(t9JX!I!})!#ez&jowTL_D56bvq+1S2>Xm!7_nL~;a`2LX~Q*a zaj$!o8WTYpVl|l?x(uER1GwOaf|JTMdQw#M8U3!0oDr)&Lov}w;b1z#7)rfMe&Y^* zofn(m&-rJS$U-bfDvHz-2zot_m%CGV^_{zGywb>dqIm9edjTyi$0pNCeKbotC*j2fL?KEJBlCfssZ zX(9hpx7Kht-@+LYgPLXcCG{-R=ay$cc2QEwfuTsPki1tRmI@Uie~KHCbOEOD%S1Jk zv-td4Z>Y?LPe|ChPw9(#zou+E{UM~!oZVF3WB=JxT8?1E{;OGZ${)tou%H^K(WLbQLc z>4-~U`lPbRYs<^Q7G(?91$L)7Q@ z+wFBG3oZoBev*k_dfC0apfQ(1uO&c~=I*m#An(Nq&x5>@vF#5FRjl7wFZ!rjV=-9W zx6g3AuZDA)hgvxcUJfU61Ya8-$|PrqVEN2Y{0$M5pI~zNd(Ony2+XmWa}Nzje|RUEWc_bS&J=v+rn5rzuj_a5T#Nm2DLR2l#luH@ zIh`6!l$8b@awY6aC8~aymo#@ms}a%7fw=4&z>H`N8&pV1{>i(n=@PY@uyqa|ym;e) z_9qe-2ZoO`cq4d2PCLXf0%(Nr7I0^}ir4@lEwUKzHS_0H&4^JDSo#M75@^MTV=P{u z@iXU-KdmKBehgxF%2ZL+*0IN_k8ysU{N;ax%wI|V4-Wp%$i!9o6N$*9{h(o8kY&q; z8V`!X@A>pboc^~mj8KZuKAsZp5;koh68u?ir?v0u&p{i$h1NhXYczjvYXeKD0 z7H5~LQjg7>h{s0*x8En|%mmr`>rcZ7Lj&M<-Ta*oX#FlI^3@Z6ZFzJ@g-tTPA+Ny> zc!`s-&UbgtwmUAq)qpk3!(tx&Ic7olp}_KCWn5x6Jlpn``R-Yq+xTrd{%(ci%Z{%useJIBHw*~ zCeTOTSSE~+p+_RaO369LQ0*dyXf|ggnH!Qh$pJlCW{=J6LNPG<-0Zv9uW;9N1VvvP zB2x}0iyJ{#886hQ8VSO=4Mon3?ev=a<*tW%>pRtQrBEDa5pH;eL6x`ixcE^zR#x&9K=XUPBPaY>&fS@2}tA3**xU-Fvt1 z`(HLYNAHtd;!}PIEGl(rBo&q0lxM2Loh6?wps4Gh+nKxtEV_}nBQ`j^VZ*qn;vXWU znh_{das>;;I7Eixog@%RoxrnsIwLb*eTcl^Tj zE4nv@z|Zuk43lo%f59?5zq9>uJ)<4XUiW10Ubnc)6KU5upCZI@b5}Z6qmwu`YPWqX z`scoLg7@6mJZw(Gma@@ANo)9J&vZe27Sms;X8(Y7or9mf4v#$i4B_=*6gin#3kE(q z>T^kaOiAg7+78k%3RF`l7}$0h-5{cSZsjue5M#c3i9_=|78?H5zgjhiIw6 z*3TSK8J>Q!ne__qW51 z9>;fzctHFt!j&evFZRul>rxx1HzP^sa}^cfybffvLnom4wP9aUu!i{C$nw008ofsH z6(N8%7Ibz0n+f;HE6}iUbL_7GsKD8`VFB~=cWpU_j{!H8V2ko{e&XVHWA71$=>nyU zXykp*W7A)ldNUkvncz#XR@8@hl9NoKXtd1T4}>LONS1LHhzRs`z?kYd ze+oNe7l&^O_bsEI41}3#1^9BzZlRHWeu@sQoVEG2F)QvA2@U1+A|gAayS{M)jgrqm z!SE1O@vAo1W|b2#&qDcE1(Da0ug}G9@#)L8(p-NUYwi<; zt239^PIG^zb6|bZ64&{2eHrJ(Dhtg+hB?>K4GLwSZ4FpsK?b!uIx+Mjs#FwFDX-12 z|HCTzvq3=k4`YL7OPM0sqMCtN+8=~`tINK!%?%h1?;*@4s7;&&`yX1-VRPhyL9<4y#_N!yh` za;Re(IQPaP=R>syXP3wY578jKSCNpKt+?N{eGZ6Iv*>c(Ra{xgIU#pP)5s;X$!Wdg z^TgES9`G=$lCAKE#0omlGlyTwP+*Zm@e-}vNLm452VREvjQbz+D6ADR?SLmKn= zK08=Xw1Mu{v_}bQRi@Io#;CoF*UZ#!hT;VuTIyQB44D#IzaHxx%IU++NYttzvxNY? zWH)A%{Xa7A2js$*ah;hQ(JbH=edGKJ`1xe2{r%aLxhjO&bLJxf57whesS&I73V(JIiDa&Voqw8FbZnjb={tCn@?PM zNn)pZK|s$i4iM(?2r_>5&-!?_<+jx#GPd(h;LB;9e({sz;Q4@O3nY=`iJ?+`V8c$kbqw*O zDeMY|O8Qj^H%IzP-RwPi&~BIs)dCWBYGtv(w$ok4j+z)8(yyutd7x*H7bo1-w<#-; zeVR=*?6IIe!6&39AGjN3ECAGJ!|rHI*>U|-s6Q(=zDjTH{<_rjMa@nV!KDfpOie4! zZ;BdLm-T?t;}$Kuk4^*>dF6I=1wF4yeW>vbbB6HK#z7Vme(&A%`rTO71sBP*q4&so zrgm!Pf&gf66OhZPFu8qL{pfrI_-q8NPvAWvrESV~7}_vNScm&hx~B7zkI!K5-`Jb& zQZHg1M}(Rq(@9AXvOkpj@uEd<-svoeb2W)aV9|s7qN1|PXgK;{XT#$wHnRT~J zoI)y4FWMkPBGzS@dw3+S*Z~|HiK> z^TY_Lvq?GSbG(4evyuFoTjsClwl!sO{u$f&v7NfM<49noB|)_b`>jl;U*}0Ef!SuH zkz4Eb+TdiibfjRT?Uu>Uw;6OeXHLpRdUu6n{)aizuG_3-UXY1smz9>SI@BKyAO;mi z_NpsV?t>BxLLRfJ4LOvs-E=+hN#)hCD7R=Ng6zo?H?vg_g*5xasR2`^={P5z@Sc!f zh}i(r_bi&3lrDnzcs@LIvDSEDG+?035&^xf$Xp=&|z#Cv(V&97o%Uh4Lefu@fVM3ZjZ6X}6*?RMN9Juu!GNf|M(yV$I0 zotXLpsgoMfJgyfBSb7@nbiNxy>@*+Y?cfo6uj8{62Dj_^qxoO{vK=SI3Bha`kob7e z-z73iSnZT1J@VngE1^Av^~b`7GL-y{g$y-Oi~Z(MH6p4Q4N0)_w*W~0id*j>|0^~S zN(|vUZOP2`Jf2pFXgpmcvaT0OhO6}OI?;z9^ja|O!qul<3?}@$_VOfw?iHtY7nr!} z`H_HGP~ScOG9M@ZMQGc6D0#lJTA*xvdlY|}XNiK~zvKP`y<)jmFX-xGA7nD8`Ic-k}n`0a&~defSvOIHW$CLXOYOO+f_FzR8*v(6fTm?kMVsmbhB zdsG~CwbpvW2PH5g*wGXk$gOeOjA5>esYD>;hEiwm{e1a_=5WsYewARnM$N@wq zsypR-3`@H>vHT{vay$SM+g=|$!?V)-cu9JsRG=?6uc5 zZ9Z=C8E*jOy(A_`>IDm zXXoQ=?DrKvN@&ZjSft!Xhy1i*X<*j;galx0wO|)Cv^^>6HvGY)*_!!eBZz#?tt28i zlM2B(QT9g8&t(J|-&k4`k(SWHFWZ9a{F3Aq-0^wh!z8iZvQ2sX zAx)vLea`+~?_w;PcS!!@5FB-JA{{K^LQriG1u|vdJ=lR2ebe287{P*VSnS?HW4_5w zIc{7%T8-GLHZS9?E^kg8JufjWWP?hVu{J-Z@1?4E_r}7JO1@Wy+CJ9%?Zgn$b-!b* zrrFZPg4DYC(JOltt6VU(t=4;zHxRe5Q&Xxwht;**C&qMq#_b#i6&gBO+$YU@g6J*d z)gsKSlho$>Gy%>N&=A)NsH-Qp|1X|61{(5{<~oFDBfPGN=5Sdo7^XE57y2v)@qe`d zIvKA|z;v)OxYm4CEI-gTu$+!`=OsEnKJf86Gv^ZlnkXbi(K5IUcv%GWXO)B%gNtvw1XXr zv!b@o@T!m1dxTE_zciAqn|CnS{T-d-1RRikPf9_d)<0}XZ+ke0`~7c<$~S%0(p(^0 zR8{m!q2IB}D5oasa}NkE{_C}xZp@`s|CbdJ&3{3V(=Fs2{0_Q z^9-IKW^ZLjrabocc*6@a6_abc7tw%_#jJM+6PX5j!2z+ysjTfbXS^N#HP z(ci2nJNg&R?^?5gnaC0!ygwm=5&#HqM4vt_MPK6mVv*ZboAc)ZbfCXSUQHLNAwCKE z{Dtd~wc>OFeeW(S+D1+L8H!>&qDit5IwJ<8Qcep|gRxW+I?FoIi|*l9k0-(_Q|d$k zrx#!)yH_3PirSaaB1Ab+7*`wR_LXKY1!1l#kwhB7qtxk{VofUlN5G%90u0ubKP3CV zrgH2Q(0z9L+eYt&u^r0k`M%VfV|9?~R%BEYdrJtKbAQz7aABLubj*J$<3Z~*e7D@} zRP^%855(T~_Z+*P(I?GLgEz9uI6lNZp$|=){ahISl0TMOjng=$|9|uQcn$k5#~#-E z4-=@PM1qf`+Kpa0sGy!$Z6gAH)&wMyGsr$XQq~(8?zI_7mbJCmXOBPNj;MH724QqU z8VG}4y{cS3mCAFz)YX8-`;$axzJVv}<}No1|aYqp3UfMm=z$u2SG`gj9htXQ{TL?n>k!=o!?F;KVS zgGZ-c#L;rcpL<&Lq8m+754;7TtpMZvYEgFs{n5-UHixpoNRKD<1Ey_lkCnZ#kF^gU zpYPz2wK9@We|wXK?N(+cR#Hfc#*2n&hMMV)I7D3aQ&9ht*c^{E>hDKuZgQppGmc)o zeF!w*8Q{}moTj7y^nhKWQE0a-n~qL>!6A2`3Gc$TE>;bhJa8jxK|k>?EFrN3%f zg90KZ_zP^Pa&nYvrg`+t)-y_S0 zF!-HRYG;p<#JV+<)8il@(W%B{Nw9si=@r1G?_j!yZK`p*C&VV?_aqMQo1j5=AJsUy{DK zCx>FkozsI^*lZkCHq_k-?Iy%r*fUB=|9M`Ie%_{w>K*X7=1yviXv*4s*dBi0CVNwM z{K0pkO%6z-)J~!xk*PE^niz@*1N~&U&~(Dg#D{EEh(QwaVs5Py*1yuK zNt?ddSdzgE8B3bfXqjpkb1FKLO@i+7jUR#P5kL1O+NNu2_;WtDI7gASK5^f=PCClm zxrS~cBv@@c`RN15lGzowT}^7wQr;HwVKN`)gJ`hxj#_3RdQHyUQx!)z@;enUX@ zN}Jr-KqD0>S#IX0K`1-ugp65j5mYHAlnn~IO+);9;{Y*7WeisQyb(7uvJV6h z>+=dnTH)K?D|v12=(SpD!cWhXl&UfiJzUT&|5<$S_f>CZy-z$63!(>IsDUp1xKMM* z4dGP{ayRs!kD7IAT)V7fVh$}T1^nvaM#+-TGmI~FMA#hXrg|SyEv0a=ZzkY}$6dIc zP2jb><^0jpL*D)La1l7NJ*o0~G)qDO1s{3Fpy2y#dc2-(Sn*4w2YroGOI5*AF+Jy2 z@hED$gt@kt4I*Opz+vuZT>1S!Zoi{foa(xh1Nt+*#hHiqbkGoX#BKmxn9p z89y_fdhOltQEVgX-dAwG5OXjJ&I?O0e^KjU&i78n&uSMSfB5)N9VxIVIUVB$a zsN^_4UF0p1O+as4Wi`dqI2RV92gS9u8y9obYIECBV3P{F@3+C>$KbCeZ)6uG_R>o; zzUc{n3~XQ)irMfSpOmE=h7Z)DMa$yj6{3#a|0}XE(nDJNt|Y;96C-d9n_-qTQK){Y zR=7o5*xKelyXcAfSKoM+&I$&{MS2NvG#DzFma5!rEAY`rPE_-08C=nF^IoWUp?RlY z`qY~4_GS5y1@$8#T?R>>Q86)as^jit&U9vd)H=P7` z-u$FZte%IbDdlx-wtSQX!)Q&agan-ziXx&|uH7BD@uSo>(!>;i5=OwV`gg`2?q#^L zlo^G=C_kC)wzhs>Y&*OxPv>Tni~Mcl*EYdA*-*V+aZ?SGqyTD%vW1GBvMs#{+PmQ( zDv#g;Tw=wK-9f!Swpz>1bN7Kh)7^!1r5E7!Mj4j}Kk4!QY=QU5b@k4-;NF@t(_qPY zCNjac%i8+A*6uh-qv>oQi_gNRmY#v%;8F&V)6`Ce>&DqPs|LkFtMPCZ=h#} zms@F0kYKUTzAJQ1r!o{iR|)nyqrKu_`>0?Ql@q3yw`vY{JGqBWmh8f8bi$q#yHkGL zj{d3SXAZAd16lu8no2LR?G?}?9WtbQrnN1I7_K*36_Ocq?YwpK7iafKw`=v6o7>|@ zw8U8?Z1dBac0k-vS(laI2O^I9{bJv4mQT9$gJ$azEKko_PmY}?s;W}%ck}EdUr+ic zWx5RmprhK~V(11ZRTZv}l|j58$5j=ixhX$y-4yOS`NiIPEv~o*pt4rBnp>Jp#8%mQ zMBy5=8}1KYj3QBol2h(xGdH4R-JKu%Z7rHg*rO_qMm%CV+e$uM=3Py3A+}Y_+JfO% zN7MJ{We*wToaved*~1S4^Of~KWJYy*3~D~ZYG^MDO)sYZP^ozEYw%jWiHRs-A9s&M z3>qk(&@a#T`p*0I{iW#ITz0Wk;|n5Is$e*U70wIdTb+ysWbU-sZjQ*X-HZWFwesQ? z(;EHr(5x2%bCg#-2z*r!vw8@#^Ct{JD378qc%R|MQVM z`&k9tjY9A#i-`4%e$5s1m|QU3Ic7zWJ{sH;K=&W%xrbdekKQ#$SNpW@(?23EE=(N! ztdiH4@Q#I_RI3n2kq~rM3*mP9QpQEgftJT$Aj@qgbB9)rxF3qxj9}2Sr>} z5<;YEFJ=F9OS>$a7(Z_%h<*Wk9;N$5DvK_<=vBsS=&Igv2>NQ#Vlv1n@H?}_r!g8f zVevp<$o$Gmb5Lt#jp`uAV`h9jT}Uv=mkQ_|(=N403DpCoX!2Yf|Sj;DV&b-$?lAgc_d1Jr`9j%an?mZLvD<=t;1qylOWOP{`y>pCpW zY47nlQhYRu|Aq??jK#lzy~$~{?8h@Qu2jTapDdqlCG>U~RxSGXqOX}-0dKP<7ic*@ z#6pCUUNIGT+J}byH&ZjqprhSaZhy=5*5`B^MMz~(0oYxvi#sV4ekx$h)#Q1trW#b&Hok{4wA=syOaQuUw!YhpDjrQbO<-6 zv9vBf&`>Zyt?cwV{*#{4e=iS|I5VC&^S3%4ZhrEPptRk|*iX(Jh&I<5UEUp%O=Sg%=>z8TwfgLa>P)NwwRMThfA(yhxD5W{9jO z?gV7WcpVAU+OpXMV0)}v*5GFvzS!l;o@9%r4#+-7 zkhN-7S-eJ3odS-~Q8p!dAuM*X!}itP=err}DI1urEh&?OIcy3KCIXkK70K zitrHN*ixTylD}xg-QUh}*?qyntrUq8lRZdldkWpunkBans`u8KYb1Ubrk-dNld2k0 z>fX)g>b*7_*=ATA&N3G(*t+lZl@B^%Hh^ok*yOa;3x2F&@yhkKU?IQh^DhsvG`Ay6>Ki|LtRCji zg`gYrK?rMA_KxxL#b1z+9CF%L4<7%?fpV%R@`2P^aeGwT?~{K82d&0@X20QmlKF1U z3#`=g{Fb!k-QMeKUH|F7tbXebV1+ZYAcR${>C(l5FzS;PgX1s)c(?eb<%5**3+J8y zF9nC$pUn{V7K1`OatObcC}X=*-5nq6=;t$M$}Z2L*YQ*<)F#hj91e&2M7 zvHO!0GC^?jG1Q;{5fj-RMJzOU)9O=+%UVZsBI`&BwgK_^5`D9UFC)j_qRH+f={o6z zq`A^_jjY3^uer-j|0vnDA(vOu6`CE7o?Pa5**A_)DSShtE}m~0$=!VUqLMSMr8@iC z1pP|cr%2Q_*EO=I-JNU&-ycgkQYQh*e6>?&Rw9ntYW{6Sy$`Zuq1v_I>|_3!QvYLi zJ^12ni-poC;gWYUoa*D!04EgJm8vsG-)1TjlKx;^i}@pPkooGU`2-4&qD+cQ5Z@2 z!^3iGgSFrH`n}y3ik98Yn0Wczt{B_<7sR+L=>3(LX~f##pXaUb{zU9juqiGXRU7C{ z@mtuexV5s$>p+2EL+ZW4Cs@L07$Cyhqn1x9V^mV4aq^9k+~8Gh_WVW54>se4(- z$@k8C9rHTqZpLpp$d%0%^!{_S-kL1zW?=9SjB3|ulj{6(%N;=~*i$YVboAx!zA7$T ztw(em(`C%!WUq_faT47PVJvfz<%owX(^G55zaA@gk&yNgtA<}kMY_gdTbFjW0C3`U zYpT^6c3LaSe_r>bb`aXrb$=K>EObRj;M4eW@X)(P(#L*VY-6O|s%M~K)292d9Tw9o zNkqy)KZl?9F^s??IrKIlYmLG)Cg9dl%l{$0a;n|tJouk^OIMULbW4x^9eYEcukTGi zXKDvFR6LM+ulKkXLHzMY~^I6OfSkW&g((|D!?Te26?{&q#|4*o_i z4wcdG#F+5jr0?!dB2xl`veARha6UOzcB%WqvePwi8|!VymmWJvd_me|qb0n`;~}rS ztS!4s7muBI0`tTBEpl`#JJi+y_9}7^D3ULQggh?!5fC%A zX*Aw+qmlyuMTx|?soh!Jy>LYNQ}ySvw>kBCi$WXX!Ijl0V&={<=Lf@uBO(h^065#t zUSt6_*tK2-+n48N-$uF>Y~}$S{EJ1p`9+%&7NRtN(Oafi%JbIGE;>C2i86$$YEUWK z6fBbqb(}2GL%Hdf?KZRhc>JS5#y|w$-p#tHg;%H{MxjrQ4Q?R?t%AXxv$K4rEDT7| z70BH|Lft8bM%tsPCUf9xW@JI=clVJ*nPi^jh#yZh7(MWByn!yER2n_dXNw3IjhgzWq@U^ZlecT?#vrQb5{TbP&rrKv}6xeY_hrR7_*k zdSUbAIfaxqGv^0yE;Sl6{zHN7T9^8^Yt?Z~ZVY$P-}*2LmFR8#AV8f}T3NZ`HEv`JlelO@wv+>+NUK3!oH9D%5Bg)dEOIs~1qK%)R!sF~jPcu%v@ z$fPt^gCXUqmk!$(hz1pw+gFKL+tD-{>LT{EqH+R&-;V+6eOxgyxXEcT-&qGhTX$`V ze3GK5b$z;QxkYDG@eK9vmKayr?`w^TL-1Y8HWN3?r5LRqbvfp*w$mGEdYSzjHE_N; zOpIO;uOGOu_)BoFL7PXW|9Lsll}t%bpAm_NLJG4oO6pgu^UU++#P?kRR?y9G|6QB6 zaL1sftynz}0%aU6H$nJ*0zl`gR<~3z2fNJ@5BMdaJCc|5CHv@hf4H{(w>%?|zK`@E zmOWr}DvEZ2?w(8LbiEvXVq0B^N5Pw4 zXhD5G`1UBJ3R&juC#|3iG(WxHDy?$)w73<9sHsL(u)wlDciDPxGb! zUZvbzAtKwTR9XFOQVZk%>B4W7OUx8nRDz%7u7~Vk{}syvb<~*zhWpspPqWSJJ5S6A zhK4HEOO6%V@2A|X9+9>h&&8W62~U~^EnZ<`p7#y;s??Rid2){E1g2ym!XM^`s~(!G z#GZC3DgI-PR|6(yy>`*7>gMRRy>j#y?=o$kXG10vatnA{-X|ps)s4bqKXu;|u7;Ts zB6jt%Q9JP~Ef4MFt98j^q2|-?%Fu|02eIfMtAU@BLHk$OA1JB=3c0~ZGx=UbJQgf_ zEzGpv*yP!`G+^}LI#mf7xgm;KGu_mmA4GVRSS+YUn~uIU@fg=r%zi$Xz#QXA-Bj(( zl%A>`IUnJM*)S29t~KcyV`~Oq`PLbApQx9uy4{2+vr&jEzI-jE%0A>fG-h}`cEMh9 zk#f-hR!{ktR%O)b?SI+vTa#}Jpoq;Vko()37&p~BUe83_!1?tTqUWbngJ0@Hy!`^B z)qz|RuQkp^I;`7iQ9aHJ%y^)zn07_sP`-22BEv*a7#*fzheB(g!&i4)n?-8<&qxTh zEezwm2=~e91jRPWHv%r+m~YI#lvBPY-p5kuvXBv*M#jkM>g0>g4Y0LZhY5+F^yWvT z)z@zay6oKKxjWEMeMq=)ZV5R~1E0cqn%$c^M2;4`2*Jj2o}?(NcJh65bPi#^IP?kEbQ!W zzfhkugQraI?lbqDBjjbiSEnk9%LNTk=V5!TG41Y1V@0e%n~xnwF3ZhdlSCUc3xKd@c1?{s@ZZztLTusURz5>xNwFuRvD2hWLjFCcPUhx-3)tt zLDKa6A)BOzxXsp<&2^&0)K3rV1~TJYp#j4${L!CHxjl>^2}~p?ZH?je;cpC`>SFGpHJ61TQ4<6 z&i-9cJI>j5F3euNJ#M!?aH_j?<168Ai~ktJppfBpa{>I@CTI=&jf`<}>W4IazO?-P<2TS17lUVY_e~f>5PZq@7$AvqF?#Ldv@)+UG~pAL|7l)Ss3SWDRwE@Rqa_%1YG3Y3a?9 zw8QG>^JJhUEPO3ZSQGCF-rN6HK&bk$T5+@cgAAT8sjivg`f7_GOn6I~>8Cl&vR^#! zP#x2vFn6-b%XBqUiF9|MCOIVfKR;yM;c=g0pybulpUgie{!Ta3EyNuT6H5N?fl~kf z2J#LYaEVQ3jRUL0` z{Byk5s?xLNKDKW(PUkL7;3bG*jA6YV=28CK@$_YO%^Ta&%ku}gXas!+t2+q)b)r6by>NG zXW;eDC&PNmuriE?>RgeS9c%09TlJDZ-$I}66)JF-n1C3KwT zTsDaSkPS`4rEP(TpK7ip?U95GEQ8C*d>@~hAkQz@zlr37NgCx3g4W%Qdd-^_cH?sje?X{_s#22UBw%PBmTr96QPTL>}Xbu27{6&LW` z;#57BDhSdHs{MkMN`XO`#fALN8V5FD-Y-5z`8UZrS$bQ-csJR2KSQ2M>3w_olFR&Q zRGY}1l{$bFq!N7io`0L!@d?|&yDC zRo&7K-Vd3cJ8XA0jP!it%w@50uX~FkK{EAvEHNQUJ6yXvu49-HWd}pPxAA)|k5TXl zkn<)sl2SSIdLQL`{&d;vaqDaU?YNQCt^f?PA+5FYNl60Ia{#~nr_y$B-la zJ~k*BxGk>#9EC#_&zX!($J^L!_+R*Z^#~@TLlLkI^f`3tYDLBRzV}~N3eY^J2PQW2 z=KFKeW{739rQxxgJkvBE8l!a>+PgeN5E(j`g~&{UC8P^+vITDvnWnJ0 zs=Hi3a#lvzLFDUgRykx3qL5wrfp>@A{lz+~g5HcY5}q#Q3KZ4X42oqXrl=fk6O@(6~ditr`wAgawNycpOpwl}LmZcG5h;Cn_-!SL<` zD#(!n|A=KS`+=V%z6aAqEkQR>-IEcw8Te;A?qe38>uPvo2u|GSFSxE3^E#W& z9v0E$(PGh2-}?g_8hg<(m0y1W0pGk`H8H9-v2vQPe1!o6 zu)#~kPRe`x_v}+beOEm+2Nb*&Lz6?cU-2&EjNF%ugiQb40vbR~fv7XlQ0Q)@RMoTE zuwgl`C9Bxk;I>WF#pZa4+i4T7DXUrJwjdk}q@0MBix`RD6FezSQFdf@ZedIUAx~Rl z3wnjFPk)W*P{0W%;92XUJ2$oFVH_Ov>;Wi6Y-njm+!rul3g3>)O!Px!SB4FjdzrB1 zN1Dv5tBps>H{K_IzXT)G6Q7ztoMu95Z|9v@z?G_|<4swC!5ZD$lSrc--K4zK6D^3qT68W5WY(StX~p? z4F0*Uh}~#7OF|4Eqb;f5duX&2MRjE;^&=z-*-ah{ums;z7d{cZKFxTskqdCz?+Z;e z;nO6iEw)mB0v(i(^u8$1oLJg}DQn8%Z|r-IYEi41kdVMNXZ2f{+CSDeh&tQ8N(;UdwW&eyZ~)C#Wgj7drfNyVP)Y_L{<4@6c4W-?gYsOm~kFGC}q!V_7-r$)l_+#G_ z-PrrN|FvGON?-2FJz65_c@cRW1#Jx*sUf;Vv$H)XQnsQM{XK$23H0Kzoq^hT<=5@p z*AHl0Wn0}lNxLxhVAu5yM3to>nyn^KFQ>jv_=X}V9(UZfV>pfMetQzHV-MIKcu$=~ z7yARyngNHzORs-WCj=|cAkDf?? zW>GsEaP?obRY%VnB|H3`LFw>}{y0*>-SWVf$Geu2fAFj58IPV;7qT>K-ly_7sJ>*n zPoX5$SPlgE-vt!h^Q&1rmhw!%V>^DhaewQ?QC9FxVetmL%k`2M6+Y-irhd?+=*_xc z!67yUl!>cv^qq8!*Z%c{q)1HK=D4JvN3408NBK1MUG*FZ%=Lmmhpu{hUv7yJ5$$`+ zcSNg{1z$<2rp3#0J>wC{cV{_qY5@6x7b*@t>pN6Jtn}@Ma9kV>^qMy`9x!=M#zvf9 z=vm)%Qwze(1_g?&_qgOs3aN4;-&B2u!_}WpIZ4S zyg!!3+5&Ua8z(|GI{Y>}`>a%lOjU`P>ZL>^F?NmxebbwB+Oo1ffe4is7Lu+P3ac=t z&!L>^ewS%mmk+Ee`3kHqG{?n1yMi3owazRjq`6a|`Ah;!pYMaxqK+xEeD4;s+;w9MhuUL+${Kwlqvu&S zWTFuqLurCvIQ^l3S!>%z(0EhI+E&Ea$; zB$%PbFbQLMtp7&D=}UWy2WbYr+k=+= zmGUbpu>!DzVC-aHRRQ+~K_4-J3TW(yiku+6&TBy5rK}rJe20u?ZNfow-Kqz1sSvkO z?qKc@`rsOx-@BwzSMHFMRGFy8IK8sAhf>n>+r zS~oZ%7p-*$;@eO6#U(lJ8yDvt^JmvY2STxC?N>Joa|! zy=_MK2Wm<%R(Zwz6CL{3szJ5E`rlg9HG_zDaw_%WfY_Z4atPnlAEHzR`Fb_)w)GGw zl4xs!H6_=<_iQQte#NawjF08!8+@-R_)Vsvpz&k*W0ap6Y!j3-gJnVLl-Y-uo3|@S7oyt7-NFs9sSFfH**SE`Nr(k&7y!anX|52sl26*9Oo!1IkAg< z;q-v{&{m0pey3eOi1F2+57^EjN?N7eO&cTOwiiEFIf$`Fh1gfK)d(@wW3 zHo*6b!(2;+6EUR~_b)m7*3fA`8%MWygA;k}51rG8d*`4+6Xq%T_X{+~X%(@V8_6r? z4wf6AyvF_^GcN+!zyB+7`yizeSbkay+(y5P0l9AR$NnY5gZ4}}#Ym!D) z7XK6#wn_EtLf>BOMxJHnAiMc)Tph_p7-H(Mq@w97Y_gLl%l9GD41xLAVFAc15t1Re z2j&S_b5eeO@}d~FZV+vNUl%Z?h=v9IHm*!u{6Hb$G>jLo77}^gP0ZqwEN|u4fo1Ek z16@2ei(R5p{LtiR&lYqgK8k+s^+IWJp^(j3x`-3ey3g0cV3oTwlL9)&-dYiL zdVuTZI7|Ha&IUHM1=69Q<1O5#hB-M$loSi-I&4=#;Ha%#2s|X-%oj_4ai%Onm-XXE zk83hbraj@`!1%OL&Hs+R@f3h>_0CjpckMRaI0~UaD%|=t6OV6#pw(Y(*xDfc$vwib zxYNZ}5n5O5X9Q03D3RT0?Okt!s1Z(BhHv$`ScRV(Bj99)KiYJHgc5LkKpEX>HMkl1 z9gI~{gGw3WT$#v1iP~KEy82!NC3br_A)Aq_%pot2@AiRr`tfkDm9N`_q*`8wB^lX- z05nZf)XG3)!-OT@>VUv&MyzH*u(P)E;Zq4$mY#+V_~4PwMQiV=-Fm>jOCtg4r#(TV zLawLP&$$T{ohSP&x@`L>7ISFFVM$}GI5N684(~7VL^F{iH$i5J#bfu8?e)n&qJ*z0 zYO4BnxVzfqq=uAZW`hkpGOuJ?j1Hb1seH`mfGSWcdSvr9M;_-95Nf$s*$#^!BcI|{ z#wv+)|6Y~wuMZ~%=mrmWR8iN8X7VaDIXSH}UbR$Mbj#lkW!(?D$!?sI8|k_}a?ezN zrbthiBCk&;3z^-1KCjWz?i+ml$qPP2ueo2Uam8c{?!FPP>p3Uo7|TqOuC$83qshw8 zxFlb@2Oas>sCBc`aSNE3*@f7y1?FTHC(F*7gx-;`Lr{9Yc#Z-wa>u$-f)ebmE7 zsc<5C@pn!qE1p*1-37ZX#*)cpfx-WQ?&fnfsgpA&wjh)k$)Aa4*wBni>zb{l+xvI( zG1ezG?n*c&(k zf9kkhtQh>S@vH|VQAZBqKH?0uI`2c@K3#3vEvvF{Ff;zgcVG2ZX;svr;!*AFy6AYhN3y~q*+X25S$ z55I9!jAhFERAmVk+BfJd>IR1y+Cu(mY7XxA&=Q4y13p(tum_n?I`hS z`x!nJTpV^!9AO-?K3o=tbFtNIT1w#JBu8TPt(eLPc@=ujgIAy(M zUXX^>b26wkR1gn!Vp>^s2z^KhCg=;aq(Ta(?^9hCMZq^r-sZSjQ_6et&Y_zqOW)BV z6!GP(b6kPc?6XG@>~!b4WTac@JdIidZNlvS^GBGEnRdaL*xq~5D`c^HzB6v;C@)kM z&PRY{-Nl#$Jv?qhaYE%BieOV7N5YF6-ldV+?ys3<_sFo?IJ%mdqh70l`|xkeKd**n z2u-BgO(je^Vmwd`x+YB6D{`+}Mk-a#5DUm{9eKd1@0bN-)k61juzz3+GJzzz{y513 zdY>sF)avpP8U!&jX)A(EJk69aR&we=oG7$URT@;;`>r-ex1V=BndG5x^mq`DZ)%X4 zQy20DB0>2mNp@rzy1d$VO%_!_In+NVvJ7ScX_OtHY`*;0N0F|dC)z?~i8phOm;@`F zqRBiw%u0_Y7Wx)48xzcL6c#Azc`7cXfi?VQxq<+(z|sSItf0aW6&{3cA_~ zlta$eLc*T-{uIeBC&p>qErAuUT_b@H(8p+HSoEtsgN?!Z1vG}LD(r1RLW~}q?n0fL zx!Kg+O~^$DQmxv=tjUZ$ixw$_)#Z=repJ5x+dX;fpZAG%>qSXT$!D;3 z#EK69yJ}?5#$Kv0WrZLnsEhMa!c5$zD2v1-m&RW6Dq=smAG$%}3a7Ri#A{H>1#C|p zMM;7`O5eQ4d*{xj5{$SFhLDnao4?DKF$k8qoVZ`Dk75tabl}lqu=~*Bn8s&Nvw{G* z1j`dM#{hu@h0)5wMpL*$ zw~Ld%T6MP4AR2{bG`dJ;mZ9t^%C*aucCu}WmTvRhs}sfY5jvut4z~3mj>t^r#WqfU zT?m&tUO5tu^xDI@1a!f93X764h8Zg@F~No&8>rfG1n=A2kEL39&H@J2A6x!)q*YK= zxve!=?Xx?7x?kl2kTCHdX|FG;1DVmn3=0Q&9ewq$?ng%Vfqv$o zxz3xk&{Zvr{LaWNz;=RRuIbfdoTmKdYd^(>J=9}SIjpyn`@2~*pciZPOZ;)>ey^o% zU)Z;ADHE42OUEf{B$GRD1b&sem1*OYu8Dd?S=&|XK(`!}K%Ox;y(|_s=k%edorN?i zr$2sac=D(tLg-4#>z?G$R#~gY=iE(6pfppA-~%P zm$}c-J?1-1f>=-Nz%aRqT$kRYW2wa;ldei7e(wj*GiGQP%BL3K|M~G=wK&bR>GN>< zggk%5qqZ%?;;k25)~lV(41V3VVZ&+DrZBKV;8KKk3OkuueMofH)gT!5z9q(?<&%;= zD=DIP2+iJdU$nlr#$h2?pO0r@qT_{C*z`#AMDOmzZys2q+r`|fy1}J$SAfzees%p@ zJh`B-{aZh-Y7T7RiTB0y1%0;kX-=9z=ywaccMUVCQ-HzfbZgYAFL$t9#Nzu;Ca+d1 zebn@a!Hg}(h&SoAqThP594y~FYBO@ouCbgl_%7m9uMWBAPe63-Um8S=^F9h5f9tTP z%qVAKd$Hk*VgXH612;1Q8!bNthO`ko|5n0d8Yw^z;z{5eG0Ut}=J(GJdRMDqwu;;# zTm4Ta0=g+B%W^y2U@6dB1no|2cj2LYzfVXw$qvPQjG}sh25L@qb-uJ~E-ywIl9W&~2Q+WTLHbMGX>Q(rs z3C;$#KH1XkOh!9P3H_E@r{LcsF5SOh|Gy{0+G&OjiAjbQ*8juaTZYxuG;4zi8Z3n1 zZo%E1O@ar9;O@aC!DRzM0zpFvt|7rSXpoH)oZzr=cXu{>e=B+3bKYm>$GPU4^PZV= z&77ZW_3G~Gs#-18Rd+FHp<#KT3G%6=v=o*`Sv!`VK4nYg)hpzyOttISm$lYrx3o-3 z*6aJ!{vtDYNEzRTi?zgpl)R)FXbqca7w9{U+fPHEaA4*SJ=T@#K$qj8vmg44^?G_( zxvD<0ybBXWPETbVGE*`2}y9+%0@IvkdCW-zu*^-yE>wav}41L&Bf`0`s|M&RfVCJI!-396} zXp&hkP%rLX+!h}R^#ug=g(#OWJr$E(lll37QeW2l-0qehl_)2tBrmq~FZb`b3O}f~ zX1!F%Mfnf30@DmOw$}B|<*N->a{wwqnoyF1e0v7;ccpR{|DL@E0ME?m{;PSmGu;<( z*RF#Ra4V{XdA${Yz+iK399OpfWY_Z?6M6h|KI6|0o=$>}RW;4q^8@rSLl_O!V|0x+ zGdfNuMf(tT3O;3g$=Yq2uwXkSO?<1B6!T(aMsIW$Y-PibX*5{9*yA$F9HI*5W^*4W zQ1pduQg(wd7XQ{VKc3cy$C@N5b6*xUM}YQ? zLs2C@%rd;_amq4fJ~#Kjcg^#s>ofDcxF3~Fr)D!_tEGG+A>R}o|AVjpUnJB{sWgGG zzA`gf?;m_pL~eA2qU)&3n0^XlUetQc_M4x74eGy~%mi`)n9`dYecV(gQ^{@T+}*|T#!qhg->?7@R)gywEZ+!mIC>g*yfZ`A za!i-(dKs;N`BQX;^&!<<&|T8Sf#G>XmsjTPT2FRwFZTKk;!8QiAD-k4U#VaZ?=iVY-9^G7zSFt7s>|TTwEE>gD-dHBdHYDsMI5`gaSUn z;aieim%$?+?rS>V+aBHA6OX+h|K;2O8!*xU6Uy|V(~dv}G$j!oFvCZ(4M&V40?-vA z3|?v1Z_5c_cpnJSW~2}Ze>my)r6*NF%<15pBEQSWD}LOEr%SE~pC?pf*CW9QCR~X( z$?pZ6yk^Jd(b%R+rd^@4VH)U%5OWyplFl-BrK*cRTK|m&gWNMzb4pF|)4LJy-hBZ! zEDyrvn3R(p`Q-2k`0&iMXO-TO57gFZH&7v-sqguzRvBOCZ4CK~j)SMdt&4d+%ZK^1 z2c76Att;W=VauM%^z=@waEDa~7gV|jU8XbUZ^y8O1V+}9;V>n-;J&jX(D7crf)1t? z(;wcj9YTDI2~*4;^c{gwk5q3il1%}Y$saA=o|zWJsyBo+N0VU~GK|JB+#Oc8UQzop zBEdwlvY$rF!*AFSuv9R77zAEZky`>EXLWkC;rJkO(_?-6jzBl}TlEcyWn>olmP^9Y zhHiAmWS-jPhsk0`vt#_B5!pNRK>x5ij!n}POITx!T_w$xZ8rG2`E%>BcqpBbTe=2h z{0j^Bi>30Q3nADY?qx&wd9m{cn&?9?Efw?A!@T#YKiNaLd4#1c zp7mO@s-wN9v!VO`sH1BcJ2C-iM2hr(8bR}x1(J|%Tb@1w{>Hf9>}u6E?@)j>&lGHv z%@~wW2Ru$9!hRwc8re?zl-_4v8MXg`MwhY`(eo6N3|gL#YrKTa|B#Zog==`1 zLhQh+oc}`|5l*dOUMT_|{8ZT|jA*tB(d!NW)5u>uiEYmlEN_SbatM}kN9x!m{8mhw ze=25+OqijQ+i`PyV6i8Sem~T9iuu1_ zYD|-rb+r%th|QMRJURJ*n%xV7HMz)pyO$>2HgBA9LEvVOm*dz@?{F8@s^hjz_GlA0 zA&B)WY6IDH3Gmn)3_hSsZjEFj>ggEQaG96a%9c!3zaI)adI!#`vb ze|T!hAoFHOZA9UUDHR;qQTD?ulrTIzmw^g-r=}fOIR0tO!=7%D2`T)!cSzvdZersE=0ona;aLwO9ol zH08dk10{?bEWf9qfi2U(`(KoFtty3nnAHqqggPrgEmwovl0~qn$1b+eV+XldlA@m2 zQnJj-j#3Ypb$>g$+w~54dEWcD8F|O?j~y6n*bi&Zb%!8SZi6MiFxZqO1Ahh$%LT&E zt`M+$5ExP57t+epB{bP(OmF=Pb3;bY{87PI7(8(ae0rz0a|9}HExc;b)=jSLfEATP zWL5-E+EIQOytaNyzh++Kryk9AMNW&n66zk~- z$h__eu}mtn!B?M$kJf;eP%=h93bMe$>`2S`o>v1P1nEhSzEaO4;cRexls`Od_Jz?C z>%7-Vo2WF=-#gl#!UwMbMBc*z`+Hs3Mz zDz4OVzZde60D9X8t;8-nE-cN z+sbN;xd8T7gmPXfx6#RVm+ej|qOaI@KHprQ!PefPPGaNfkVu#Zas9J7$38$6LD}G4 zOrR*B{eh`M9Csa$hl*w2&dkT(3vWGb1q8sm9e>9r7fZo_?<744y7H;wDsAA{F~_Hf zWEnYqk(~lC=H?>X6hVG^>!oWDc)ug2nt{{Ef%KO;1tNgeclR-EDfeyo^DFECm&lm; z@S^WMADvH@{17Z!-zfZbLL(B}FAc!8C(T zfTi*Ow;92|lK)l`Wl+FsCF1nFkW)uNEc?u1~bm>2KHfVQ3ktjZF?&((Sz;&XKGHB|j| zKzUEFYzTGB*sN~WlYBRqyxbSn^LnpyXs@EhSrT@Ptkq)R@qJBIELDlCabjY&A(m|U z2m}|N-XjL!sV6rlbxi~StVbawA}$_}J!i+?CUMcX`&NR*8aO>0w!uoM*)5vch?UNW zbg!j==~9E*IsVt9ZANcT!k)CN5fOG)O_iF|_K5R3rlfK^H}hDR&01bb4AMqW80}vB z=bgjuMj{YA*@3r_sS*ycMaE5^?M5quXDvrsl-K0a`>y3%1;(ODE-*Bu zC7*4E+m8{kXAYF7M~Osu5(uRqaKZD7k<6cK(l$HiMMwLSjc-eRwFO0Yo^1h>J;GJC zo&OQKa&+_+_52!9TmrrP(*^Zu)7#WXvnMQtzCgIs*%_{)al~1LrkZ0G0tQcqAeP1K z;P7-X{7gvwR(a#_Q>=DH|7S2f#7bR~#zn(~+v_!Qz&w2MA&_CK?me1I=0xz>3|2xN zTLfaTM#Q!r?p%uPKPVS+gl!BYL!$`UZiRd5X3JIhVayczQ5>*b7VrIO)U8bF`V-+D zyEK**gm#JUY$q;AoJ?pd*W&9-&&z%N^?;!?0naZn_D^CLi(!r6qx&m=)0fGJ390-e z$6rw&M?j_C)diWp#iG{5991uEt`yTkP`bM!@f{PtsW|DUFad-8o<6lS5r}!VNV#(| zk}3hsLf3? z@YiKMHDq1yg6Az=9G9IUAGsBL@~RQ^G0usU+2okKL+e}F?sSnXp8`A%xMk}5&P;`4B!yB0 z6;ruyU~Cz}S+8S@KYax@L2Ma4{_wsDMZTL0UGLTP>FnX4K}sEa*+2w~cnokkDV