diff --git a/.gitignore b/.gitignore index a9e3cb97..64bc7fc8 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,6 @@ Cargo.lock *.out .clang-format* .cargo -.vscode \ No newline at end of file +.vscode +polybench-tests/ +notes/ \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/AddressRewriting.cpp b/src/dios-egraphs/Diospyros/AddressRewriting.cpp new file mode 100644 index 00000000..fcfce044 --- /dev/null +++ b/src/dios-egraphs/Diospyros/AddressRewriting.cpp @@ -0,0 +1,531 @@ +#include + +#include +#include +#include +#include +#include + +#include "VectorizationUtilities.cpp" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +const std::string MAIN_FUNCTION_NAME = "main"; +const std::string NO_OPT_PREFIX = "no_opt_"; +const std::string GATHER_NAME = "llvm.masked.gather.v4f32.v4p0f32"; +const uint32_t VECTOR_WIDTH = 4; +const uint32_t FLOAT_WIDTH = 4; + +namespace { +struct AddressRewritingPass : public FunctionPass { + static char ID; + AddressRewritingPass() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + + using chunks_t = Chunking::chunks_t; + using chunk_t = Chunking::chunk_t; + + std::vector get_gather_calls(chunk_t chunk) { + std::vector gather_calls = {}; + for (auto instr : chunk) { + if (Gather::isa_gather_instruction(instr)) { + gather_calls.push_back(instr); + } + } + return gather_calls; + } + + Instruction *get_first_non_phi_instr(chunk_t chunk) { + assert(!chunk.empty()); + for (auto instr : chunk) { + if (!isa(instr)) { + return instr; + } + } + return chunk.back(); + } + + std::map> + get_gather_addresses_from_chunk(chunk_t chunk) { + std::map> result = {}; + for (auto instr : chunk) { + if (Gather::isa_gather_instruction(instr)) { + std::vector addresses = + Gather::get_gather_addresses(instr); + result.emplace(instr, addresses); + } + } + return result; + } + + std::vector join_all_addresses( + std::map> gather_map) { + std::vector result = {}; + for (auto [_, addresses] : gather_map) { + result.insert(result.end(), addresses.begin(), addresses.end()); + } + return result; + } + + std::map> get_base_offsets( + std::vector addresses, std::vector array_bases, + ScalarEvolution *SE) { + std::map> result = {}; + for (auto address : addresses) { + result.emplace(address, + Array::get_base_reference(address, array_bases, SE)); + } + return result; + } + + std::vector find_minimum_cover_for_offsets( + std::set offsets) { + std::set minimum_cover = {}; + for (auto offset : offsets) { + assert(offset >= 0); + int remainder = + ((offset / FLOAT_WIDTH) % VECTOR_WIDTH) * FLOAT_WIDTH; + int aligned_addr = offset - remainder; + minimum_cover.insert(aligned_addr); + } + std::vector result = {}; + for (auto offset : minimum_cover) { + assert(offset >= 0); + result.push_back(offset); + } + std::sort(result.begin(), result.end()); + return result; + } + + /** + * Rewrites Load-Gather Addresses in a Chunk + * + * Grabs Chunks + * For each chunk: + * Gets Gather Addresses + * Gets Gather call instruction + * Builds a map from Gather Address to Gather call instruction [many to + * one] + * + * Maps each address to pair of an array base and an offset from the + * array base + * Remove addresses with unknown array base or array offset + * Build a map between gather call instruction and the array base(s) + * and the correspoding offsets + * Use the Gather Map from address to call and + * map from address to pair of base / offset + * + * For each gather represented + * Generate a "cover" for all the unique array offsets using + * aligned & consecutive load operations + * Build a map from each load to the array offsets and base + * it corresponds to + * Generate appropriate shuffles + * Stitch in the shuffles to the old gather call instruction + * by inserting at the beginning of the chunk + * Remove the old gather call instruction + * + * + * E.g. suppose we do a gather from A[3], A[5], B[2], C[7] + * We build a load(A[0-3]), load(A[4-7]), load(B[0-3]), load(c[4-7]) + * We then build the appropriate shuffles, here 3 shuffles are needed + * Then we stitch in the instruction + * + * We would have gathered + */ + void rewrite_addresses(BasicBlock &B, std::vector array_bases, + AliasAnalysis *AA, ScalarEvolution *SE, + LLVMContext &C, unsigned address_space) { + chunks_t chunks = Chunking::build_chunks(&B, AA); + + // Define floating point 4 wide vector pointer type, e.g. <4 x float> * + Type *float_ty = Type::getFloatTy(C); + VectorType *vector_4_float_ty = VectorType::get(float_ty, VECTOR_WIDTH); + PointerType *pointer_to_vector_4_float_ty = + PointerType::get(vector_4_float_ty, address_space); + + for (chunk_t chunk : chunks) { + if (chunk.empty()) { + continue; + } + errs() << "This is the chunk\n"; + for (auto instr : chunk) { + errs() << *instr << "\n"; + } + std::vector gather_calls = get_gather_calls(chunk); + std::map> gather2addresses = + get_gather_addresses_from_chunk(chunk); + std::vector all_addresses = + join_all_addresses(gather2addresses); + // get offsets and base arrays for all addresses + std::map> addresses2base_and_offset = + get_base_offsets(all_addresses, array_bases, SE); + + // start construction after the first element that is non-phi in the + // chunk + Instruction *first_instr = get_first_non_phi_instr(chunk); + assert(first_instr != NULL); + IRBuilder<> builder(first_instr); + for (auto gather : gather_calls) { + assert(Gather::isa_gather_instruction(gather)); + errs() << "Gather Instruction\n"; + errs() << *gather << "\n"; + // get all base arrays and offsets required for the gather + std::set> all_base_offset_pairs = {}; + for (auto address : gather2addresses[gather]) { + all_base_offset_pairs.insert( + addresses2base_and_offset[address]); + } + // if any of the base array or offsets are negative, skip this + // gather + // Also get all base arrays required for the cover + // And for each base array, get all the offsets needed + std::set base_arrays = {}; + std::map> base2offsets = {}; + bool continue_loop = false; + for (auto [base, offset] : all_base_offset_pairs) { + base_arrays.insert(base); + if (base2offsets.count(base) == 1) { + base2offsets[base].insert(offset); + } else { + base2offsets[base] = {offset}; + } + if (base < 0 || offset < 0) { + continue_loop = true; + break; + } + } + if (continue_loop) { + continue; + } + + std::map>, + std::vector> + vector_load_addresses2usage_flags = {}; + std::map>, Value *> + vector_load_addresses2load_instr = {}; + // find minimium covers for each offsets + for (auto [base, offsets] : base2offsets) { + assert(base >= 0); + std::vector minimum_cover = + find_minimum_cover_for_offsets(offsets); + // build the minimum covers + for (auto aligned_offset : minimum_cover) { + Value *bitcast_instr = builder.CreateBitOrPointerCast( + array_bases[base], pointer_to_vector_4_float_ty, + "bitcast-for-alignment"); + Value *aligned_gep_instr = builder.CreateConstGEP1_32( + vector_4_float_ty, bitcast_instr, + (aligned_offset / (FLOAT_WIDTH * VECTOR_WIDTH)), + "gep-for-aligned-addr"); + Value *aligned_load = builder.CreateLoad( + vector_4_float_ty, aligned_gep_instr, false, + "load-aligned-addr"); + std::vector load_addresses = {}; + for (int i = 0; i < VECTOR_WIDTH; i++) { + load_addresses.push_back(aligned_offset + + i * FLOAT_WIDTH); + } + if (vector_load_addresses2usage_flags.count( + {base, load_addresses}) == 0) { + std::vector all_false = {}; + for (int i = 0; i < VECTOR_WIDTH; i++) { + all_false.push_back(false); + } + vector_load_addresses2usage_flags[{ + base, load_addresses}] = all_false; + } + for (auto offset : offsets) { + std::vector::iterator itr = + std::find(load_addresses.begin(), + load_addresses.end(), offset); + if (itr != load_addresses.cend()) { + int index = + std::distance(load_addresses.begin(), itr); + vector_load_addresses2usage_flags[{ + base, load_addresses}][index] = true; + } + } + vector_load_addresses2load_instr[{ + base, load_addresses}] = aligned_load; + } + } + // the correct order of the base/offset pairs for the gather + std::vector> + actual_ordered_base_offsets = {}; + std::vector addresses = gather2addresses[gather]; + std::reverse(addresses.begin(), addresses.end()); + for (auto address : addresses) { + actual_ordered_base_offsets.push_back( + addresses2base_and_offset[address]); + } + errs() << "Actual Gather Ordered Base and Offsets\n"; + for (auto [base, offset] : actual_ordered_base_offsets) { + errs() << base << ", " << offset << "\n"; + } + + std::vector< + std::pair>, + std::vector>> + to_merge = {}; + for (auto [pair, usage_flags] : + vector_load_addresses2usage_flags) { + to_merge.push_back({pair, usage_flags}); + } + assert(to_merge.size() != 0); + auto [first_base, first_load_addresses] = + to_merge.front().first; + std::vector first_usage = to_merge.front().second; + Value *final_shuffle = vector_load_addresses2load_instr[{ + first_base, first_load_addresses}]; + Value *initial_load = final_shuffle; + + // merge size of 1: + if (to_merge.size() == 1) { + std::vector shuffle_indices = {}; + for (auto [actual_base, actual_offset] : + actual_ordered_base_offsets) { + bool found_and_added = false; + for (int i = 0; i < first_load_addresses.size(); i++) { + auto first_load_address = first_load_addresses[i]; + if ((first_base == actual_base) && + (first_load_address == actual_offset)) { + shuffle_indices.push_back(i); + found_and_added = true; + } + } + if (!found_and_added) { + shuffle_indices.push_back(0); + } + } + assert(shuffle_indices.size() == VECTOR_WIDTH); + const std::vector mask_vector = shuffle_indices; + // build the shuffles back to the gather + ArrayRef mask = ArrayRef(mask_vector); + final_shuffle = builder.CreateShuffleVector( + final_shuffle, final_shuffle, mask, "one-shuffle-only"); + + // if the shuffle indices is the identity, just use the load + std::vector identity{0, 1, 2, 3}; + if (shuffle_indices == identity) { + final_shuffle = initial_load; + } + } else { + // merge size of 2 or more + auto [second_base, second_load_addresses] = + to_merge[1].first; + + // do the first 2 together + std::vector shuffle_indices = {}; + for (auto [actual_base, actual_offset] : + actual_ordered_base_offsets) { + bool found_and_added = false; + for (int i = 0; i < first_load_addresses.size(); i++) { + auto first_load_address = first_load_addresses[i]; + if ((first_base == actual_base) && + (first_load_address == actual_offset)) { + shuffle_indices.push_back(i); + found_and_added = true; + } + } + for (int i = 0; i < second_load_addresses.size(); i++) { + auto second_load_address = second_load_addresses[i]; + if ((second_base == actual_base) && + (second_load_address == actual_offset)) { + shuffle_indices.push_back(i + VECTOR_WIDTH); + found_and_added = true; + } + } + if (!found_and_added) { + shuffle_indices.push_back(0); + } + } + assert(shuffle_indices.size() == VECTOR_WIDTH); + const std::vector mask_vector = shuffle_indices; + // build the shuffles back to the gather + ArrayRef mask = ArrayRef(mask_vector); + Value *second_load = vector_load_addresses2load_instr[{ + second_base, second_load_addresses}]; + final_shuffle = builder.CreateShuffleVector( + final_shuffle, second_load, mask, "one-shuffle-only"); + + // do the remainder + // finish after first merge + for (int i = 2; i < to_merge.size(); i++) { + auto [remaining_pair, _] = to_merge[i]; + uint32_t remaining_base = remaining_pair.first; + std::vector remaining_offsets = + remaining_pair.second; + // do a shuffle into the correct positions + std::vector shuffle_indices = {}; + int j = 0; + for (auto [actual_base, actual_offset] : + actual_ordered_base_offsets) { + bool found_and_added = false; + for (int i = 0; i < remaining_offsets.size(); i++) { + uint32_t remaining_offset = + remaining_offsets[i]; + if ((actual_base == remaining_base) && + (actual_offset == remaining_offset)) { + shuffle_indices.push_back(i); + found_and_added = true; + } + } + if (!found_and_added) { + shuffle_indices.push_back(j + VECTOR_WIDTH); + } + ++j; + } + assert(shuffle_indices.size() == VECTOR_WIDTH); + const std::vector mask_vector = shuffle_indices; + // build the shuffles back to the gather + ArrayRef mask = ArrayRef(mask_vector); + final_shuffle = builder.CreateShuffleVector( + vector_load_addresses2load_instr[remaining_pair], + final_shuffle, mask, "one-shuffle-only"); + } + } + + // // for the first pair, do the shuffling into correct position + // // do a shuffle into the correct positions + + // std::vector shuffle_indices = {}; + // for (auto [actual_base, actual_offset] : + // actual_ordered_base_offsets) { + // bool found_and_added = false; + // for (int i = 0; i < first_load_addresses.size(); i++) { + // auto first_load_address = first_load_addresses[i]; + // if ((first_base == actual_base) && + // (first_load_address == actual_offset)) { + // shuffle_indices.push_back(i); + // found_and_added = true; + // } + // } + // if (!found_and_added) { + // shuffle_indices.push_back(0); + // } + // } + // assert(shuffle_indices.size() == VECTOR_WIDTH); + // const std::vector mask_vector = shuffle_indices; + // // build the shuffles back to the gather + // ArrayRef mask = ArrayRef(mask_vector); + // final_shuffle = builder.CreateShuffleVector( + // final_shuffle, final_shuffle, mask, "one-shuffle-only"); + + // // finish after first merge + // for (int i = 1; i < to_merge.size(); i++) { + // auto [remaining_pair, _] = to_merge[i]; + // uint32_t remaining_base = remaining_pair.first; + // std::vector remaining_offsets = + // remaining_pair.second; + // // do a shuffle into the correct positions + // std::vector shuffle_indices = {}; + // int j = 0; + // for (auto [actual_base, actual_offset] : + // actual_ordered_base_offsets) { + // bool found_and_added = false; + // for (int i = 0; i < remaining_offsets.size(); i++) { + // uint32_t remaining_offset = remaining_offsets[i]; + // if ((actual_base == remaining_base) && + // (actual_offset == remaining_offset)) { + // shuffle_indices.push_back(i); + // found_and_added = true; + // } + // } + // if (!found_and_added) { + // shuffle_indices.push_back(j + VECTOR_WIDTH); + // } + // ++j; + // } + // assert(shuffle_indices.size() == VECTOR_WIDTH); + // const std::vector mask_vector = shuffle_indices; + // // build the shuffles back to the gather + // ArrayRef mask = ArrayRef(mask_vector); + // final_shuffle = builder.CreateShuffleVector( + // vector_load_addresses2load_instr[remaining_pair], + // final_shuffle, mask, "one-shuffle-only"); + // } + + // replace all uses of gather with the final shuffle + // then remove gather + gather->replaceAllUsesWith(final_shuffle); + gather->eraseFromParent(); + } + } + } + + virtual bool runOnFunction(Function &F) override { + if (F.getName() == MAIN_FUNCTION_NAME || + (F.getName().size() > NO_OPT_PREFIX.size() && + F.getName().substr(0, NO_OPT_PREFIX.size()) == NO_OPT_PREFIX)) { + return false; + } + AliasAnalysis *AA = &getAnalysis().getAAResults(); + ScalarEvolution *SE = + &getAnalysis().getSE(); + TargetLibraryInfo *TLI = + &getAnalysis().getTLI(F); + std::vector array_bases = Array::get_array_bases(F, TLI); + LLVMContext &C = F.getContext(); + unsigned address_space = 0; + bool found_address_space = false; + for (auto &B : F) { + for (auto &I : B) { + if (GetElementPtrInst *gep = dyn_cast(&I)) { + address_space = gep->getAddressSpace(); + found_address_space = true; + break; + } + } + } + if (!found_address_space) { + return false; + } + for (auto &B : F) { + rewrite_addresses(B, array_bases, AA, SE, C, address_space); + } + return true; + } +}; +} // namespace + +char AddressRewritingPass::ID = 0; + +// Automatically enable the pass. +// http://adriansampson.net/blog/clangpass.html +static void registerAddressRewritingPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + PM.add(new AddressRewritingPass()); +} + +static RegisterPass X("addressrw", + "Address Rewriting Pass", + false /* Only looks at CFG */, + true /* Analysis Pass */); + +static RegisterStandardPasses RegisterMyPass( + PassManagerBuilder::EP_EarlyAsPossible, registerAddressRewritingPass); \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp index bd98073a..25fa2b89 100644 --- a/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp +++ b/src/dios-egraphs/Diospyros/LoadStoreMovement.cpp @@ -5,17 +5,26 @@ #include #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Type.h" #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" + using namespace llvm; +const std::string MAIN_FUNCTION_NAME = "main"; +const std::string NO_OPT_PREFIX = "no_opt_"; +const std::string GATHER_NAME = "llvm.masked.gather.v4f32.v4p0f32"; +const uint32_t VECTOR_WIDTH = 4; + namespace { struct LoadStoreMovementPass : public FunctionPass { static char ID; @@ -23,229 +32,1271 @@ struct LoadStoreMovementPass : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); } - void rewrite_stores(Function &F) { + bool isa_gather_instruction(Instruction *instr) { + if (CallInst *call_instr = dyn_cast(instr)) { + Function *fun = call_instr->getCalledFunction(); + // Source: + // https://stackoverflow.com/questions/11686951/how-can-i-get-function-name-from-callinst-in-llvm + // Fun Could be NULL, in which case indirect call occurs, i cannot + // get name. + if (fun) { + if (fun->getName() == GATHER_NAME) { + return true; + } + } + } + return false; + } + + bool may_alias(Value *mem_addr1, Value *mem_addr2, AliasAnalysis *AA) { + return (!AA->isNoAlias( + mem_addr1, + LocationSize::precise( + mem_addr1->getType()->getPrimitiveSizeInBits()), + mem_addr2, + LocationSize::precise( + mem_addr2->getType()->getPrimitiveSizeInBits())) || + AA->isMustAlias(mem_addr1, mem_addr2)); + } + + std::vector get_gather_addresses(Instruction *call_to_gather) { + assert(isa_gather_instruction(call_to_gather)); + + Instruction *insert_element_instr = + dyn_cast(call_to_gather->getOperand(0)); + if (insert_element_instr == NULL) { + throw "Gather Arguments Pointer Vector was NULL"; + } + + std::vector gather_addresses = {}; + // hardcode to gathers of length 4 only + for (int i = 0; i < VECTOR_WIDTH; i++) { + Value *pointer = insert_element_instr->getOperand(1); + gather_addresses.push_back(pointer); + Instruction *new_insert_element_instr = + dyn_cast(insert_element_instr->getOperand(0)); + insert_element_instr = new_insert_element_instr; + } + return gather_addresses; + } + + std::vector get_gather_insert_instrs( + Instruction *call_to_gather) { + assert(isa_gather_instruction(call_to_gather)); + + Instruction *insert_element_instr = + dyn_cast(call_to_gather->getOperand(0)); + if (insert_element_instr == NULL) { + throw "Gather Arguments Pointer Vector was NULL"; + } + + std::vector insert_instrs = {}; + // hardcode to gathers of length 4 only + for (int i = 0; i < VECTOR_WIDTH; i++) { + Value *pointer = insert_element_instr->getOperand(1); + insert_instrs.push_back(insert_element_instr); + Instruction *new_insert_element_instr = + dyn_cast(insert_element_instr->getOperand(0)); + insert_element_instr = new_insert_element_instr; + } + return insert_instrs; + } + + /** + * True iff the gather instr can be moved before prior_instr + */ + bool can_move_gather_instruction_before(Instruction *gather_instr, + Instruction *prior_instr, + AliasAnalysis *AA) { + // If the prior instruction is a phi node, you cannot move the + // instrution back + if (isa(prior_instr)) { + return false; + } + + // If the prior Instruction is a call inst [which is not to a gather + // intrinsic], do not push the current instruction back A call + // instruciton could have side effects to memory In addition, a call + // could be to @llvm.memset.p0i8.i64(i8* nonnull align 16 + // dereferenceable(40) %2, i8 0, i64 40, i1 false) or + // @memset_pattern16(i8* nonnull %2, i8* bitcast + // ([4 x float]* @.memset_pattern to i8*), i64 40) #6 which + // require alias analysis as well + + if (isa(prior_instr) && + !isa_gather_instruction(prior_instr)) { + return false; + } + + // If the prior instruction is a gather instruction, do comparisons of + // the addresses + + std::vector current_instr_addrs = + get_gather_addresses(gather_instr); + if (isa(prior_instr) && isa_gather_instruction(prior_instr)) { + std::vector prior_instr_addrs = + get_gather_addresses(prior_instr); + for (auto curr_addr : current_instr_addrs) { + for (auto prior_addr : prior_instr_addrs) { + assert(curr_addr->getType()->isPointerTy()); + assert(prior_addr->getType()->isPointerTy()); + if (may_alias(curr_addr, prior_addr, AA)) { + return false; + } + } + } + } + + // If the prior instruction is used in the load's + // arguments, do not push it back + + // In this case, the prior instruction could only be the geps in + // the current instruction addreses accessed + std::vector current_uses = + get_gather_insert_instrs(gather_instr); + for (auto current_use : current_uses) { + if (current_use == prior_instr) { + return false; + } + } + + // If the prior instruction alias with the load + // instruction, do not push the store back + // We do not rehandle gather instructions, which were already handled. + if (prior_instr->mayReadOrWriteMemory() && + !isa_gather_instruction(prior_instr)) { + Value *prior_addr = NULL; + if (isa(prior_instr)) { + prior_addr = + dyn_cast(prior_instr)->getPointerOperand(); + } else if (isa(prior_instr)) { + prior_addr = + dyn_cast(prior_instr)->getPointerOperand(); + } else { + errs() << *prior_instr << "\n"; + throw "Unmatched Instruction Type"; + } + for (auto curr_addr : current_instr_addrs) { + assert(curr_addr->getType()->isPointerTy()); + assert(prior_addr->getType()->isPointerTy()); + if (may_alias(curr_addr, prior_addr, AA)) { + return false; + } + } + } + return true; + } + + void move_forward_gather_instrs(Function &F) { AliasAnalysis *AA = &getAnalysis().getAAResults(); + for (auto &B : F) { - bool has_float = false; + // Grab all instructions + std::vector all_instrs = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + all_instrs.push_back(instr); + } + + // Perform Pushing Back of Gather Instructions + std::vector final_instrs_vec = {}; for (auto &I : B) { - if (I.getType()->isFloatTy()) { - has_float = true; + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + + // Place any non-Gather Instructions at the end of the list of + // instructions + if (!(isa_gather_instruction(instr))) { + final_instrs_vec.push_back(instr); + continue; + } + + // Handle Load Instructions + int insertion_offset = final_instrs_vec.size(); + while (true) { + Instruction *gather_instr = instr; + // If there is no prior instruction, push back at current + // offset, and stop. + if (insertion_offset - 1 < 0) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + gather_instr); + break; + } + + Instruction *prior_instr = + final_instrs_vec[insertion_offset - 1]; + + if (!can_move_gather_instruction_before(gather_instr, + prior_instr, AA)) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + gather_instr); + break; + } + + // Otherwise, keep pushing back the load instruction + --insertion_offset; + assert(insertion_offset >= 0); } } - if (!has_float) { - continue; + + // First, insert clone all instructions, and insert them into the + // basic block at the very beginning + + // build ordered vector of cloned instructions + // build map from original vector to cloned vector + std::vector cloned_instrs = {}; + std::map original_to_clone_map = {}; + std::map clone_to_original_map = {}; + for (Instruction *instr : final_instrs_vec) { + Instruction *cloned_instr = instr->clone(); + cloned_instrs.push_back(cloned_instr); + original_to_clone_map[instr] = cloned_instr; + clone_to_original_map[cloned_instr] = instr; } - // We also skip over all basic blocks without stores - bool has_store = false; + + // Grab first instruction to build before at. + Instruction *first_instr = NULL; for (auto &I : B) { - if (auto *op = dyn_cast(&I)) { - has_store = true; + first_instr = dyn_cast(&I); + assert(first_instr != NULL); + break; + } + IRBuilder<> builder(first_instr); + builder.SetInsertPoint(&B); + + for (Instruction *cloned_instr : cloned_instrs) { + // set insert point to be before beginning if inserting phi + // instruction + if (isa(cloned_instr)) { + builder.SetInsertPoint(first_instr); + } + builder.Insert(cloned_instr); + if (isa(cloned_instr)) { + builder.SetInsertPoint(&B); + } + // The cloned instruction has arguments pointing backwards to + // prior original instructions. Some of these prior instructions + // themselves will themselves be cloned. We need to replace the + // prior original instructions with clones instructions + int num_operands = cloned_instr->getNumOperands(); + for (int i = 0; i < num_operands; i++) { + Value *clone_operand = cloned_instr->getOperand(i); + Instruction *clone_operand_instr = + dyn_cast(clone_operand); + if (clone_operand_instr != NULL) { + if (original_to_clone_map.count(clone_operand_instr) > + 0) { + Instruction *replacement_operand = + original_to_clone_map[clone_operand_instr]; + Value *replacement_value = + dyn_cast(replacement_operand); + assert(replacement_value != NULL); + cloned_instr->setOperand(i, replacement_value); + } + } else { + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + cloned_instr->setOperand(i, + original_instr->getOperand(i)); + } + } + + // Furthermore, we need to change all uses of the original + // instruction to be the new cloned instruction + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + if (Value *original_val = dyn_cast(original_instr)) { + Value *cloned_val = dyn_cast(cloned_instr); + assert(cloned_val != NULL); + original_val->replaceAllUsesWith(cloned_val); } } - if (!has_store) { - continue; + + // Finally, delete all the original instructions in the basic block + // Do this in reverse order. + std::reverse(all_instrs.begin(), all_instrs.end()); + for (Instruction *instr : all_instrs) { + instr->eraseFromParent(); } + } + } + + void move_forward_gep_instrs(Function &F) { + AliasAnalysis *AA = &getAnalysis().getAAResults(); - std::vector reversed_instructions = {}; - std::vector all_instructions = {}; - int head_pointer = - -1; // points to head location in all_instructions + for (auto &B : F) { + // Grab all instructions + std::vector all_instrs = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + all_instrs.push_back(instr); + } + + // Perform Pushing Back of Gep Instructions + std::vector final_instrs_vec = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + + // Place any non-Gep Instructions at the end of the list of + // instructions + if (!(isa(instr))) { + final_instrs_vec.push_back(instr); + continue; + } + + // Handle Gep Instructions + int insertion_offset = final_instrs_vec.size(); + while (true) { + Instruction *load_instr = instr; + // If there is no prior instruction, push back at current + // offset, and stop. + if (insertion_offset - 1 < 0) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + + Instruction *prior_instr = + final_instrs_vec[insertion_offset - 1]; + + // If the prior instruction is a phi node, do not push the + // current instruction back + if (isa(prior_instr)) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + + // If the prior instruction is used in the load's + // arguments, do not push it back + int num_operands = load_instr->getNumOperands(); + bool break_while = false; + for (int i = 0; i < num_operands; i++) { + Value *load_operand = load_instr->getOperand(i); + Instruction *load_operand_instr = + dyn_cast(load_operand); + if (load_operand_instr != NULL) { + if (load_operand_instr == prior_instr) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break_while = true; + break; + } + } + } + if (break_while) { + break; + } + + // Otherwise, keep pushing back the load instruction + --insertion_offset; + assert(insertion_offset >= 0); + } + } + + // First, insert clone all instructions, and insert them into the + // basic block at the very beginning + + // build ordered vector of cloned instructions + // build map from original vector to cloned vector + std::vector cloned_instrs = {}; + std::map original_to_clone_map = {}; + std::map clone_to_original_map = {}; + for (Instruction *instr : final_instrs_vec) { + Instruction *cloned_instr = instr->clone(); + cloned_instrs.push_back(cloned_instr); + original_to_clone_map[instr] = cloned_instr; + clone_to_original_map[cloned_instr] = instr; + } + + // Grab first instruction to build before at. Instruction *first_instr = NULL; - for (BasicBlock::reverse_iterator iter = B.rbegin(); - iter != B.rend(); ++iter) { - Instruction *I = &(*iter); - first_instr = I; - if (auto *store_op = dyn_cast(I)) { - if (head_pointer < 0) { - reversed_instructions.push_back(I); + for (auto &I : B) { + first_instr = dyn_cast(&I); + assert(first_instr != NULL); + break; + } + IRBuilder<> builder(first_instr); + builder.SetInsertPoint(&B); + + for (Instruction *cloned_instr : cloned_instrs) { + // set insert point to be before beginning if inserting phi + // instruction + if (isa(cloned_instr)) { + builder.SetInsertPoint(first_instr); + } + builder.Insert(cloned_instr); + if (isa(cloned_instr)) { + builder.SetInsertPoint(&B); + } + // The cloned instruction has arguments pointing backwards to + // prior original instructions. Some of these prior instructions + // themselves will themselves be cloned. We need to replace the + // prior original instructions with clones instructions + int num_operands = cloned_instr->getNumOperands(); + for (int i = 0; i < num_operands; i++) { + Value *clone_operand = cloned_instr->getOperand(i); + Instruction *clone_operand_instr = + dyn_cast(clone_operand); + if (clone_operand_instr != NULL) { + if (original_to_clone_map.count(clone_operand_instr) > + 0) { + Instruction *replacement_operand = + original_to_clone_map[clone_operand_instr]; + Value *replacement_value = + dyn_cast(replacement_operand); + assert(replacement_value != NULL); + cloned_instr->setOperand(i, replacement_value); + } } else { - int current_counter = head_pointer; - while (current_counter >= 0) { - Instruction *curr_instr = - reversed_instructions[current_counter]; - if (curr_instr->isTerminator()) { - ++current_counter; + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + cloned_instr->setOperand(i, + original_instr->getOperand(i)); + } + } + + // Furthermore, we need to change all uses of the original + // instruction to be the new cloned instruction + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + if (Value *original_val = dyn_cast(original_instr)) { + Value *cloned_val = dyn_cast(cloned_instr); + assert(cloned_val != NULL); + original_val->replaceAllUsesWith(cloned_val); + } + } + + // Finally, delete all the original instructions in the basic block + // Do this in reverse order. + std::reverse(all_instrs.begin(), all_instrs.end()); + for (Instruction *instr : all_instrs) { + instr->eraseFromParent(); + } + } + } + + void move_forward_bitcast_instrs(Function &F) { + AliasAnalysis *AA = &getAnalysis().getAAResults(); + + for (auto &B : F) { + // Grab all instructions + std::vector all_instrs = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + all_instrs.push_back(instr); + } + + // Perform Pushing Back of Bitcast Instructions + std::vector final_instrs_vec = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + + // Place any non-Bitcast Instructions at the end of the list of + // instructions + if (!(isa(instr))) { + final_instrs_vec.push_back(instr); + continue; + } + + // Handle Gep Instructions + int insertion_offset = final_instrs_vec.size(); + while (true) { + Instruction *load_instr = instr; + // If there is no prior instruction, push back at current + // offset, and stop. + if (insertion_offset - 1 < 0) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + + Instruction *prior_instr = + final_instrs_vec[insertion_offset - 1]; + + // If the prior instruction is a phi node, do not push the + // current instruction back + if (isa(prior_instr)) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + + // If the prior instruction is used in the load's + // arguments, do not push it back + int num_operands = load_instr->getNumOperands(); + bool break_while = false; + for (int i = 0; i < num_operands; i++) { + Value *load_operand = load_instr->getOperand(i); + Instruction *load_operand_instr = + dyn_cast(load_operand); + if (load_operand_instr != NULL) { + if (load_operand_instr == prior_instr) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break_while = true; break; - } else if (auto *other_store_op = - dyn_cast(curr_instr)) { - if (AA->isNoAlias( - store_op->getOperand(1), - other_store_op->getOperand(1))) { - --current_counter; - } else { - break; - } - } else if (auto *load_op = - dyn_cast(curr_instr)) { - if (AA->isNoAlias(store_op->getOperand(1), - load_op->getOperand(0))) { - --current_counter; - } else { - break; - } - } else { - --current_counter; } } - // Do the insertion - reversed_instructions.insert( - reversed_instructions.begin() + current_counter, I); } - } else { - reversed_instructions.push_back(I); + if (break_while) { + break; + } + + // Otherwise, keep pushing back the load instruction + --insertion_offset; + assert(insertion_offset >= 0); } - ++head_pointer; - all_instructions.push_back(I); } - if (first_instr == NULL) { - assert(false); + + // First, insert clone all instructions, and insert them into the + // basic block at the very beginning + + // build ordered vector of cloned instructions + // build map from original vector to cloned vector + std::vector cloned_instrs = {}; + std::map original_to_clone_map = {}; + std::map clone_to_original_map = {}; + for (Instruction *instr : final_instrs_vec) { + Instruction *cloned_instr = instr->clone(); + cloned_instrs.push_back(cloned_instr); + original_to_clone_map[instr] = cloned_instr; + clone_to_original_map[cloned_instr] = instr; + } + + // Grab first instruction to build before at. + Instruction *first_instr = NULL; + for (auto &I : B) { + first_instr = dyn_cast(&I); + assert(first_instr != NULL); + break; } IRBuilder<> builder(first_instr); - // we add the instructions at the end builder.SetInsertPoint(&B); - // here we are going to add back our instructions - std::reverse(reversed_instructions.begin(), - reversed_instructions.end()); - BasicBlock::InstListType &bb_instrs = B.getInstList(); + + for (Instruction *cloned_instr : cloned_instrs) { + // set insert point to be before beginning if inserting phi + // instruction + if (isa(cloned_instr)) { + builder.SetInsertPoint(first_instr); + } + builder.Insert(cloned_instr); + if (isa(cloned_instr)) { + builder.SetInsertPoint(&B); + } + // The cloned instruction has arguments pointing backwards to + // prior original instructions. Some of these prior instructions + // themselves will themselves be cloned. We need to replace the + // prior original instructions with clones instructions + int num_operands = cloned_instr->getNumOperands(); + for (int i = 0; i < num_operands; i++) { + Value *clone_operand = cloned_instr->getOperand(i); + Instruction *clone_operand_instr = + dyn_cast(clone_operand); + if (clone_operand_instr != NULL) { + if (original_to_clone_map.count(clone_operand_instr) > + 0) { + Instruction *replacement_operand = + original_to_clone_map[clone_operand_instr]; + Value *replacement_value = + dyn_cast(replacement_operand); + assert(replacement_value != NULL); + cloned_instr->setOperand(i, replacement_value); + } + } else { + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + cloned_instr->setOperand(i, + original_instr->getOperand(i)); + } + } + + // Furthermore, we need to change all uses of the original + // instruction to be the new cloned instruction + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + if (Value *original_val = dyn_cast(original_instr)) { + Value *cloned_val = dyn_cast(cloned_instr); + assert(cloned_val != NULL); + original_val->replaceAllUsesWith(cloned_val); + } + } + + // Finally, delete all the original instructions in the basic block + // Do this in reverse order. + std::reverse(all_instrs.begin(), all_instrs.end()); + for (Instruction *instr : all_instrs) { + instr->eraseFromParent(); + } + } + } + + void move_forward_insert_element_instrs(Function &F) { + AliasAnalysis *AA = &getAnalysis().getAAResults(); + + for (auto &B : F) { + // Grab all instructions + std::vector all_instrs = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + all_instrs.push_back(instr); + } + + // Perform Pushing Back of Gep Instructions + std::vector final_instrs_vec = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + + // Place any non-Gep Instructions at the end of the list of + // instructions + if (!(isa(instr))) { + final_instrs_vec.push_back(instr); + continue; + } + + // Handle Gep Instructions + int insertion_offset = final_instrs_vec.size(); + while (true) { + Instruction *load_instr = instr; + // If there is no prior instruction, push back at current + // offset, and stop. + if (insertion_offset - 1 < 0) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + + Instruction *prior_instr = + final_instrs_vec[insertion_offset - 1]; + + // If the prior instruction is a phi node, do not push the + // current instruction back + if (isa(prior_instr)) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + + // If the prior instruction is used in the load's + // arguments, do not push it back + int num_operands = load_instr->getNumOperands(); + bool break_while = false; + for (int i = 0; i < num_operands; i++) { + Value *load_operand = load_instr->getOperand(i); + Instruction *load_operand_instr = + dyn_cast(load_operand); + if (load_operand_instr != NULL) { + if (load_operand_instr == prior_instr) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break_while = true; + break; + } + } + } + if (break_while) { + break; + } + + // Otherwise, keep pushing back the load instruction + --insertion_offset; + assert(insertion_offset >= 0); + } + } + + // First, insert clone all instructions, and insert them into the + // basic block at the very beginning + + // build ordered vector of cloned instructions + // build map from original vector to cloned vector + std::vector cloned_instrs = {}; std::map original_to_clone_map = {}; - for (auto &I : reversed_instructions) { - // we clone the original instruciton, then insert into builder - Instruction *cloned_instr = I->clone(); - // when adding, need to take caution about the users - original_to_clone_map[I] = cloned_instr; - for (unsigned int i = 0; i < I->getNumOperands(); i++) { - Value *operand = I->getOperand(i); - Instruction *operand_instr = dyn_cast(operand); - if (original_to_clone_map.find(operand_instr) != - original_to_clone_map.end()) { - Instruction *clone_instr = - original_to_clone_map[operand_instr]; - Value *clone_value = dyn_cast(clone_instr); - cloned_instr->setOperand(i, clone_value); + std::map clone_to_original_map = {}; + for (Instruction *instr : final_instrs_vec) { + Instruction *cloned_instr = instr->clone(); + cloned_instrs.push_back(cloned_instr); + original_to_clone_map[instr] = cloned_instr; + clone_to_original_map[cloned_instr] = instr; + } + + // Grab first instruction to build before at. + Instruction *first_instr = NULL; + for (auto &I : B) { + first_instr = dyn_cast(&I); + assert(first_instr != NULL); + break; + } + IRBuilder<> builder(first_instr); + builder.SetInsertPoint(&B); + + for (Instruction *cloned_instr : cloned_instrs) { + // set insert point to be before beginning if inserting phi + // instruction + if (isa(cloned_instr)) { + builder.SetInsertPoint(first_instr); + } + builder.Insert(cloned_instr); + if (isa(cloned_instr)) { + builder.SetInsertPoint(&B); + } + // The cloned instruction has arguments pointing backwards to + // prior original instructions. Some of these prior instructions + // themselves will themselves be cloned. We need to replace the + // prior original instructions with clones instructions + int num_operands = cloned_instr->getNumOperands(); + for (int i = 0; i < num_operands; i++) { + Value *clone_operand = cloned_instr->getOperand(i); + Instruction *clone_operand_instr = + dyn_cast(clone_operand); + if (clone_operand_instr != NULL) { + if (original_to_clone_map.count(clone_operand_instr) > + 0) { + Instruction *replacement_operand = + original_to_clone_map[clone_operand_instr]; + Value *replacement_value = + dyn_cast(replacement_operand); + assert(replacement_value != NULL); + cloned_instr->setOperand(i, replacement_value); + } } else { - cloned_instr->setOperand(i, operand); + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + cloned_instr->setOperand(i, + original_instr->getOperand(i)); } } - bb_instrs.push_back(cloned_instr); - for (auto &U : I->uses()) { - User *user = U.getUser(); - user->setOperand(U.getOperandNo(), cloned_instr); + + // Furthermore, we need to change all uses of the original + // instruction to be the new cloned instruction + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + if (Value *original_val = dyn_cast(original_instr)) { + Value *cloned_val = dyn_cast(cloned_instr); + assert(cloned_val != NULL); + original_val->replaceAllUsesWith(cloned_val); } } - // here we need to delete all original instructions, going forwards - // with no reversal as they are in reversed order - for (auto &I : all_instructions) { - I->eraseFromParent(); + + // Finally, delete all the original instructions in the basic block + // Do this in reverse order. + std::reverse(all_instrs.begin(), all_instrs.end()); + for (Instruction *instr : all_instrs) { + instr->eraseFromParent(); } } } + /** + * Move Loads as far forward as possible in LLVM IR + */ void rewrite_loads(Function &F) { AliasAnalysis *AA = &getAnalysis().getAAResults(); - std::map original_to_clone_map = {}; - std::vector all_instructions = {}; + for (auto &B : F) { - std::vector instructions = {}; + // Grab all instructions + std::vector all_instrs = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + all_instrs.push_back(instr); + } - int head_pointer = - -1; // points to head location in all_instructions - Instruction *first_instr = NULL; + // Perform Pushing Back of Load Instructions + std::vector final_instrs_vec = {}; for (auto &I : B) { - first_instr = &I; - if (auto *load_op = dyn_cast(&I)) { - if (isa(load_op->getOperand(0))) { - if (head_pointer < 0) { - instructions.push_back(&I); + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + + // Place any non-Load Instructions at the end of the list of + // instructions + if (!(isa(instr) || isa(instr))) { + final_instrs_vec.push_back(instr); + continue; + } + + // Handle Load Instructions + int insertion_offset = final_instrs_vec.size(); + while (true) { + Instruction *load_instr = instr; + // If there is no prior instruction, push back at current + // offset, and stop. + if (insertion_offset - 1 < 0) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + + Instruction *prior_instr = + final_instrs_vec[insertion_offset - 1]; + + // If the prior instruction is a phi node, do not push the + // current instruction back + if (isa(prior_instr)) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + + // If the prior Instruction is a call inst, do not push the + // current instruction back + // A call instruciton could have side effects to memory + // In addition, a call could be to @llvm.memset.p0i8.i64(i8* + // nonnull align 16 dereferenceable(40) %2, i8 0, i64 40, i1 + // false) or @memset_pattern16(i8* nonnull %2, i8* bitcast + // ([4 x float]* @.memset_pattern to i8*), i64 40) #6 which + // require alias analysis as well + + // TODO: discriminate calls to llvm memset + if (isa(prior_instr)) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + + // If the prior instruction is used in the load's + // arguments, do not push it back + int num_operands = load_instr->getNumOperands(); + bool break_while = false; + for (int i = 0; i < num_operands; i++) { + Value *load_operand = load_instr->getOperand(i); + Instruction *load_operand_instr = + dyn_cast(load_operand); + if (load_operand_instr != NULL) { + if (load_operand_instr == prior_instr) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break_while = true; + break; + } + } + } + if (break_while) { + break; + } + + // If the prior instruction alias with the load + // instruction, do not push the store back + if (prior_instr->mayReadOrWriteMemory()) { + Value *prior_addr = NULL; + if (isa(prior_instr)) { + prior_addr = dyn_cast(prior_instr) + ->getPointerOperand(); + // prior_addr = prior_instr->getOperand(0); + } else if (isa(prior_instr)) { + prior_addr = dyn_cast(prior_instr) + ->getPointerOperand(); + // prior_addr = prior_instr->getOperand(1); } else { - int current_counter = head_pointer; - while (current_counter > 0) { - Instruction *curr_instr = - instructions[current_counter]; - if (auto *op = dyn_cast(&I)) { - ++current_counter; + throw "Unmatched Instruction Type"; + } + Value *load_addr = dyn_cast(load_instr); + if (isa(load_instr)) { + load_addr = dyn_cast(load_instr) + ->getPointerOperand(); + // load_addr = load_instr->getOperand(0); + } + assert(load_addr != NULL); + if (!AA->isNoAlias( + load_addr, + LocationSize::precise( + load_addr->getType() + ->getPrimitiveSizeInBits()), + prior_addr, + LocationSize::precise( + prior_addr->getType() + ->getPrimitiveSizeInBits())) || + AA->isMustAlias( + load_addr, + prior_addr)) { // IDK WTF is happening, but + // apparently, the same pointers + // that mod / ref causes no + // alias?! + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + load_instr); + break; + } + } + + // if (isa_gather_instruction(prior_instr)) { + // std::vector gather_addresses = + // get_gather_addresses(prior_instr); + // Value *load_addr = dyn_cast(load_instr); + // if (isa(load_instr)) { + // load_addr = dyn_cast(load_instr) + // ->getPointerOperand(); + // } + // assert(load_addr != NULL); + // bool gather_break = false; + // for (auto gather_address : gather_addresses) { + // if (may_alias(gather_address, load_addr, AA)) { + // gather_break = true; + // break; + // } + // } + // if (gather_break) { + // final_instrs_vec.insert( + // final_instrs_vec.begin() + insertion_offset, + // load_instr); + // break; + // } + // } + + // Otherwise, keep pushing back the load instruction + --insertion_offset; + assert(insertion_offset >= 0); + } + } + + // First, insert clone all instructions, and insert them into the + // basic block at the very beginning + + // build ordered vector of cloned instructions + // build map from original vector to cloned vector + std::vector cloned_instrs = {}; + std::map original_to_clone_map = {}; + std::map clone_to_original_map = {}; + for (Instruction *instr : final_instrs_vec) { + Instruction *cloned_instr = instr->clone(); + cloned_instrs.push_back(cloned_instr); + original_to_clone_map[instr] = cloned_instr; + clone_to_original_map[cloned_instr] = instr; + } + + // Grab first instruction to build before at. + Instruction *first_instr = NULL; + for (auto &I : B) { + first_instr = dyn_cast(&I); + assert(first_instr != NULL); + break; + } + IRBuilder<> builder(first_instr); + builder.SetInsertPoint(&B); + + for (Instruction *cloned_instr : cloned_instrs) { + // set insert point to be before beginning if inserting phi + // instruction + if (isa(cloned_instr)) { + builder.SetInsertPoint(first_instr); + } + builder.Insert(cloned_instr); + if (isa(cloned_instr)) { + builder.SetInsertPoint(&B); + } + // The cloned instruction has arguments pointing backwards to + // prior original instructions. Some of these prior instructions + // themselves will themselves be cloned. We need to replace the + // prior original instructions with clones instructions + int num_operands = cloned_instr->getNumOperands(); + for (int i = 0; i < num_operands; i++) { + Value *clone_operand = cloned_instr->getOperand(i); + Instruction *clone_operand_instr = + dyn_cast(clone_operand); + if (clone_operand_instr != NULL) { + if (original_to_clone_map.count(clone_operand_instr) > + 0) { + Instruction *replacement_operand = + original_to_clone_map[clone_operand_instr]; + Value *replacement_value = + dyn_cast(replacement_operand); + assert(replacement_value != NULL); + cloned_instr->setOperand(i, replacement_value); + } + } else { + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + cloned_instr->setOperand(i, + original_instr->getOperand(i)); + } + } + + // Furthermore, we need to change all uses of the original + // instruction to be the new cloned instruction + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + if (Value *original_val = dyn_cast(original_instr)) { + Value *cloned_val = dyn_cast(cloned_instr); + assert(cloned_val != NULL); + original_val->replaceAllUsesWith(cloned_val); + } + } + + // Finally, delete all the original instructions in the basic block + // Do this in reverse order. + std::reverse(all_instrs.begin(), all_instrs.end()); + for (Instruction *instr : all_instrs) { + instr->eraseFromParent(); + } + } + } + + /** + * Move Stores Back As Far As Possible in the LLVM IR + */ + void rewrite_stores(Function &F) { + AliasAnalysis *AA = &getAnalysis().getAAResults(); + for (auto &B : F) { + // Grab all instructions + std::vector all_instrs = {}; + for (auto &I : B) { + Instruction *instr = dyn_cast(&I); + assert(instr != NULL); + all_instrs.push_back(instr); + } + + // Perform Pushing Back of Store Instructions + std::vector final_instrs_vec = {}; + for (BasicBlock::reverse_iterator iter = B.rbegin(); + iter != B.rend(); ++iter) { + Instruction *instr = &(*iter); + assert(instr != NULL); + + // Place any non-Load Instructions at the end of the list of + // instructions + if (!(isa(instr) || isa(instr))) { + final_instrs_vec.push_back(instr); + continue; + } + + // Handle Load Instructions + int insertion_offset = final_instrs_vec.size(); + while (true) { + Instruction *store_instr = instr; + + // If there is no prior instruction, push back at current + // offset, and stop. + if (insertion_offset - 1 < 0) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + store_instr); + break; + } + + // If the prior instruction is a terminator, do not push the + // current instruction back + Instruction *prior_instr = + final_instrs_vec[insertion_offset - 1]; + if (prior_instr->isTerminator()) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + store_instr); + break; + } + + // If the prior Instruction is a call inst, do not push the + // current instruction back + // A call instruciton could have side effects to memory + // In addition, a call could be to @llvm.memset.p0i8.i64(i8* + // nonnull align 16 dereferenceable(40) %2, i8 0, i64 40, i1 + // false) or @memset_pattern16(i8* nonnull %2, i8* bitcast + // ([4 x float]* @.memset_pattern to i8*), i64 40) #6 which + // require alias analysis as well + + // TODO: discriminate calls to llvm memset + if (isa(prior_instr)) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + store_instr); + break; + } + + // If the prior instruction is used in the store's + // arguments, do not push it back + // int num_operands = store_instr->uses(); + bool break_while = false; + // https://stackoverflow.com/questions/35370195/llvm-difference-between-uses-and-user-in-instruction-or-value-classes + for (auto U : store_instr->users()) { + if (auto use_instr = dyn_cast(U)) { + for (Instruction *older_instr : final_instrs_vec) { + if (use_instr == older_instr) { + final_instrs_vec.insert( + final_instrs_vec.begin() + + insertion_offset, + store_instr); + break_while = true; break; } - // else if (auto *other_load_op = - // dyn_cast(curr_instr)) - // { - // if (AA->isNoAlias( - // other_load_op->getOperand(0), - // load_op->getOperand(0))) { - // --current_counter; - // } else { - // break; - // } - // } - else if (auto *store_op = - dyn_cast(curr_instr)) { - if (AA->isNoAlias(store_op->getOperand(1), - load_op->getOperand(0))) { - --current_counter; - } else { - break; - } - } else { - --current_counter; - } } - // Do the insertion - assert(current_counter >= 0); - instructions.insert( - instructions.begin() + current_counter, &I); } - } else { - instructions.push_back(&I); } - } else { - instructions.push_back(&I); + + if (break_while) { + break; + } + + // If the prior instruction alias with the store + // instruction, do not push the store back + if (prior_instr->mayReadOrWriteMemory()) { + Value *prior_addr = NULL; + if (isa(prior_instr)) { + prior_addr = prior_instr->getOperand(0); + } else if (isa(prior_instr)) { + prior_addr = prior_instr->getOperand(1); + } else { + throw "Unmatched Instruction Type"; + } + Value *store_addr = store_instr->getOperand(1); + if (!AA->isNoAlias( + store_addr, + LocationSize::precise( + store_addr->getType() + ->getPrimitiveSizeInBits()), + prior_addr, + LocationSize::precise( + prior_addr->getType() + ->getPrimitiveSizeInBits())) || + AA->isMustAlias(store_addr, prior_addr)) { + final_instrs_vec.insert( + final_instrs_vec.begin() + insertion_offset, + store_instr); + break; + } + } + + // if (isa_gather_instruction(prior_instr)) { + // std::vector gather_addresses = + // get_gather_addresses(prior_instr); + // Value *store_addr = dyn_cast(store_instr); + // if (isa(store_instr)) { + // store_addr = dyn_cast(store_instr) + // ->getPointerOperand(); + // } + // assert(store_addr != NULL); + // bool gather_break = false; + // for (auto gather_address : gather_addresses) { + // if (may_alias(gather_address, store_addr, AA)) { + // gather_break = true; + // break; + // } + // } + // if (gather_break) { + // final_instrs_vec.insert( + // final_instrs_vec.begin() + insertion_offset, + // store_instr); + // break; + // } + // } + + // Otherwise, keep pushing back the str instruction + --insertion_offset; + assert(insertion_offset >= 0); } - ++head_pointer; - all_instructions.push_back(&I); } - if (first_instr == NULL) { - assert(false); + + // build ordered vector of cloned instructions + // build map from original vector to cloned vector + std::reverse(final_instrs_vec.begin(), final_instrs_vec.end()); + std::vector cloned_instrs = {}; + std::map original_to_clone_map = {}; + std::map clone_to_original_map = {}; + for (Instruction *instr : final_instrs_vec) { + Instruction *cloned_instr = instr->clone(); + cloned_instrs.push_back(cloned_instr); + original_to_clone_map[instr] = cloned_instr; + clone_to_original_map[cloned_instr] = instr; + } + + // Grab first instruction to build before at. + Instruction *first_instr = NULL; + for (auto &I : B) { + first_instr = dyn_cast(&I); + assert(first_instr != NULL); + break; } IRBuilder<> builder(first_instr); - // we add the instructions at the end builder.SetInsertPoint(&B); - // here we are going to add back our instructions - BasicBlock::InstListType &bb_instrs = B.getInstList(); - for (auto &I : instructions) { - // we clone the original instruciton, then insert into builder - Instruction *cloned_instr = I->clone(); - // when adding, need to take caution about the users - original_to_clone_map[I] = cloned_instr; - for (unsigned int i = 0; i < I->getNumOperands(); i++) { - Value *operand = I->getOperand(i); - Instruction *operand_instr = dyn_cast(operand); - if (operand_instr != NULL) { - if (original_to_clone_map.find(operand_instr) != - original_to_clone_map.end()) { - Instruction *clone_instr = - original_to_clone_map[operand_instr]; - Value *clone_value = dyn_cast(clone_instr); - cloned_instr->setOperand(i, clone_value); - } else { - cloned_instr->setOperand(i, operand); + + for (Instruction *cloned_instr : cloned_instrs) { + // set insert point to be before beginning if inserting phi + // instruction + if (isa(cloned_instr)) { + builder.SetInsertPoint(first_instr); + } + builder.Insert(cloned_instr); + if (isa(cloned_instr)) { + builder.SetInsertPoint(&B); + } + + // The cloned instruction has arguments pointing backwards to + // prior original instructions. Some of these prior instructions + // themselves will themselves be cloned. We need to replace the + // prior original instructions with clones instructions + int num_operands = cloned_instr->getNumOperands(); + for (int i = 0; i < num_operands; i++) { + Value *clone_operand = cloned_instr->getOperand(i); + Instruction *clone_operand_instr = + dyn_cast(clone_operand); + if (clone_operand_instr != NULL) { + if (original_to_clone_map.count(clone_operand_instr) > + 0) { + Instruction *replacement_operand = + original_to_clone_map[clone_operand_instr]; + Value *replacement_value = + dyn_cast(replacement_operand); + assert(replacement_value != NULL); + cloned_instr->setOperand(i, replacement_value); } + } else { + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + cloned_instr->setOperand(i, + original_instr->getOperand(i)); } } - bb_instrs.push_back(cloned_instr); - Instruction *instr = &(*I); - for (auto &U : instr->uses()) { - User *user = U.getUser(); - user->setOperand(U.getOperandNo(), cloned_instr); + + // Furthermore, we need to change all uses of the original + // instruction to be the new cloned instruction + Instruction *original_instr = + clone_to_original_map[cloned_instr]; + if (Value *original_val = dyn_cast(original_instr)) { + Value *cloned_val = dyn_cast(cloned_instr); + assert(cloned_val != NULL); + original_val->replaceAllUsesWith(cloned_val); } } + + // Finally, delete all the original instructions in the basic block + // Do this in reverse order. + std::reverse(all_instrs.begin(), all_instrs.end()); + for (Instruction *instr : all_instrs) { + instr->eraseFromParent(); + } } - // here we need to delete all original instructions, going - // forwards with no reversal as they are in reversed order - std::reverse(all_instructions.begin(), all_instructions.end()); - for (auto &I : all_instructions) { - I->eraseFromParent(); + } + + std::vector func_to_vec(Function &F) { + std::vector result = {}; + for (auto &B : F) { + for (auto &I : B) { + result.push_back(&I); + } } + return result; } virtual bool runOnFunction(Function &F) override { @@ -253,14 +1304,27 @@ struct LoadStoreMovementPass : public FunctionPass { * In this pass, we walk backwards finding the first load from the * bottom, and push it up as far as we can. We continue upwards, * pushing loads upward. - * - * We gr */ - if (F.getName() == "main") { + if (F.getName() == MAIN_FUNCTION_NAME || + (F.getName().size() > NO_OPT_PREFIX.size() && + F.getName().substr(0, NO_OPT_PREFIX.size()) == NO_OPT_PREFIX)) { return false; } - rewrite_stores(F); - rewrite_loads(F); + // Might want to iterate to convergence + const int N_ITER = 1; + for (int i = 0; i < N_ITER; i++) { + std::vector original_func = func_to_vec(F); + // move_forward_gep_instrs(F); + // move_forward_insert_element_instrs(F); + // move_forward_bitcast_instrs(F); + // move_forward_gather_instrs(F); + rewrite_loads(F); + rewrite_stores(F); + std::vector rewritten_func = func_to_vec(F); + if (original_func == rewritten_func) { + break; + } + } return true; } @@ -275,5 +1339,11 @@ static void registerLoadStoreMovementPass(const PassManagerBuilder &, legacy::PassManagerBase &PM) { PM.add(new LoadStoreMovementPass()); } + +static RegisterPass X("lsmovement", + "Load Store Movement Pass", + false /* Only looks at CFG */, + true /* Analysis Pass */); + static RegisterStandardPasses RegisterMyPass( PassManagerBuilder::EP_EarlyAsPossible, registerLoadStoreMovementPass); \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/Makefile b/src/dios-egraphs/Diospyros/Makefile index 1495bdcc..1a6a0a94 100644 --- a/src/dios-egraphs/Diospyros/Makefile +++ b/src/dios-egraphs/Diospyros/Makefile @@ -2,62 +2,50 @@ ifeq ($(shell uname),Darwin) EXT := dylib CLANG := /usr/local/opt/llvm/bin/clang SETUP := bash set_up_mac.sh - LIB := src/lib.rs Cargo.toml .cargo/config + LIB := src/lib.rs Cargo.toml .cargo/config else EXT := so CLANG = clang SETUP := : - LIB := src/lib.rs Cargo.toml + LIB := src/lib.rs Cargo.toml endif .PHONY: target/debug/libllvmlib.$(EXT) -run: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) $(test) - -run-o2: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -O2 -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) $(test) - -run-out: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) $(test) - ./a.out - -run-out-o2: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -O2 -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) $(test) - ./a.out - -run-all: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - bash run_all.sh - -emit: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -emit-llvm -S -o - $(test) - -emit-o2: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -O2 -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) -emit-llvm -S -o - $(test) - -test-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o clang.ll $(test) - opt -S --inline --mem2reg --indvars --loop-simplify --loop-instsimplify --licm --loop-unroll --simplifycfg clang.ll -o opt.ll - $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) opt.ll -o finish.ll - opt -S --adce finish.ll -o final.ll - run-opt: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp - $(CLANG) -emit-llvm -S -Xclang -disable-O0-optnone -o clang.ll $(test) - opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce clang.ll -o opt.ll - opt -S --cfl-steens-aa opt.ll -o aa.ll - $(CLANG) -emit-llvm -S -Xclang -load -Xclang target/debug/libllvmlib.$(EXT) aa.ll -o diospyros.ll - opt -S --adce --dse diospyros.ll -o dce.ll - $(CLANG) dce.ll - ./a.out - -test: set-up-mac runt.sh runt.toml target/debug/libllvmlib.$(EXT) - runt + @$(CLANG) -target arm64-apple-macos11 -mmacosx-version-min=11.0 -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) + @opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll + @opt -S -load=target/debug/libllvmlib.$(EXT) --basic-aa --lsmovement --dse --adce build/opt.ll -o build/aa.ll + @opt -S -load=target/debug/libllvmlib.$(EXT) --diospyros -opt -print=true build/aa.ll -o build/diospyros.ll + @opt -S --adce --dse --gvn build/diospyros.ll -o build/dce.ll + @$(CLANG) -target arm64-apple-macos11 -mmacosx-version-min=11.0 build/dce.ll -o build/final + @build/final + +run-slp: + @$(CLANG) -target arm64-apple-macos11 -mmacosx-version-min=11.0 -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) + @opt -S -always-inline --inline --mem2reg --scev-aa -simplifycfg --flattencfg --indvars -loops -loop-rotate --loop-simplify --loop-idiom --loop-instsimplify --licm --unroll-threshold=1000000 --loop-unroll --simplifycfg --instcombine --gvn --mem2reg --dse --adce build/clang.ll -o build/opt.ll + @opt -S --slp-vectorizer build/opt.ll -o build/slp.ll + @opt -S --adce --dse build/slp.ll -o build/dce.ll + @$(CLANG) -target arm64-apple-macos11 -mmacosx-version-min=11.0 -o0 build/dce.ll -o build/final + @build/final + +run-baseline: + @$(CLANG) -target arm64-apple-macos11 -mmacosx-version-min=11.0 -emit-llvm -S -Xclang -disable-O0-optnone -o build/clang.ll $(test) + @$(CLANG) -target arm64-apple-macos11 -mmacosx-version-min=11.0 -O3 -fno-vectorize -fno-tree-vectorize -fno-slp-vectorize -o build/final build/clang.ll + @build/final + +test: set-up-mac target/debug/libllvmlib.$(EXT) diospyros.cpp + turnt c-tests/*.c target/debug/libllvmlib.$(EXT): $(LIB) - cargo build + @cargo build set-up-mac: - $(SETUP) - + @$(SETUP) + clean: + rm -r build/* + +clean-all: rm -rf target + rm -r build/* diff --git a/src/dios-egraphs/Diospyros/README.md b/src/dios-egraphs/Diospyros/README.md index 3f3e537e..cf5b41ae 100644 --- a/src/dios-egraphs/Diospyros/README.md +++ b/src/dios-egraphs/Diospyros/README.md @@ -7,6 +7,8 @@ This directory contains an experimental [LLVM][] pass that optimizes programs us To get started, you will need **LLVM 11.x.x**. Using [Homebrew][] on macOS, for example, try `brew install llvm@11` to get the right version. +You will also need Rust, for the main Diospyros library, and a version of Python3, for testing using [turnt][]. + Because our Rust library relies on [the `llvm-sys` crate][llvm-sys], you will need an existing installation of `llvm-config` on your `$PATH`. To use a Homebrew-installed LLVM, for example, you may need something like this: @@ -21,47 +23,43 @@ Add a file `.cargo/config` here, in this directory, with these [contents](https: "-C", "link-arg=dynamic_lookup", ] -Then, build the pass library with: - - $ cargo build +Further, add a build directory in your current directory with the command: -## Run the Pass + $ mkdir build -To build and run the [Clang][] pass on a test file, use this Makefile command: +If you would like, you can build the pass library with: - $ make run test=llvm-tests/a.c + $ cargo build -where `llvm-tests/a.c` is the path to any test file. +Otherwise, running with any of the commands in the next section should also work. -To build and run the Clang pass, with optimization, use the Makefile command: +Finally, note that the code for the Diospyros pass, in directory, `dios-egraphs`, must be in the directory immediately above the current one you are in, for the LLVM pass to build properly. - $ make run-opt test=llvm-tests/a.c - -where `llvm-tests/a.c` is the path to any test file. +## Run the Pass -To build and see emitted LLVM IR code, , with optimization, use the Makefile command: +To build and run the [Clang][] pass, with Diospyros, use the Makefile command: -$ make run-opt test=llvm-tests/a.c + $ make run-opt test=llvm-tests/a.c -where, again, `llvm-tests/a.c` is the path to any test file. +where `llvm-tests/a.c` is the path to any test file, for insstance `c-tests/add.c`. -To emit the generated LLVM IR code, either unoptimized or optimized: +To build and run the [Clang][] pass, with Diospyros printing out the vectorization choices, use the Makefile command: - $ make emit test=llvm-tests/a.c - $ make emit-o2 test=llvm-tests/a.c + $ make print-opt test=llvm-tests/a.c +To build and run the [Clang][] pass, with no usage of Diospyros, use the Makefile command: -To build, run the [Clang][] pass, and the run the associated program `./a.out`, run: + $ make no-opt test=llvm-tests/a.c - $ make run-out test=llvm-tests/a.c +To build and see emitted LLVM IR code, run any of the above build commands for the file you are interested in, then look in the `build` directory and open the `dce.ll` file, which is the final pre-executable IR code file. To run all the tests, run: - $ make test + $ turnt c-tests/*.c -To run all tests and get output, run: +Or alternately: - $ make run-all + $ make test To set up macOS settings, run: @@ -73,19 +71,19 @@ To clean the repository of build files, run: ## Testing -Test files provided in the llvm-tests/ folder can be run with [Runt][]. To install or update Runt: +Test files provided in the `c-tests/` folder can be run with [turnt][]. To install or update Turnt, run the command: - $ cargo install runt + $ pip3 install --user turnt Then, ensure that the test files produce the right output with: - $ runt + $ turnt c-tests/*.c -You can also pass the `--diff` flag to compare your output with the `.expect` files. +You can also pass the `--diff` flag to compare your output with the `.expect` files, and use the `--save` flag to save new `.expect` files. [llvm]: https://llvm.org [clang]: https://clang.llvm.org [llvm-sys]: https://crates.io/crates/llvm-sys [homebrew]: https://brew.sh -[runt]: https://github.com/rachitnigam/runt +[turnt]: https://github.com/cucapra/turnt diff --git a/src/dios-egraphs/Diospyros/VectorizationUtilities.cpp b/src/dios-egraphs/Diospyros/VectorizationUtilities.cpp new file mode 100644 index 00000000..7f8a770b --- /dev/null +++ b/src/dios-egraphs/Diospyros/VectorizationUtilities.cpp @@ -0,0 +1,252 @@ +#include +#include + +#include +#include +#include + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" + +using namespace llvm; + +namespace VectorConstants { +const std::string GATHER_NAME = "llvm.masked.gather.v4f32.v4p0f32"; +const uint32_t VECTOR_WIDTH = 4; +} // namespace VectorConstants + +namespace Array { +// get all "Base" Arrays on which vectorization can occur. These are +// defined as argument inputs with a pointer type +std::vector get_array_bases(Function &F, TargetLibraryInfo *TLI) { + std::vector base_of_array_vec = {}; + for (auto &a : F.args()) { + if (a.getType()->isPointerTy()) { + if (Value *arg_val = dyn_cast(&a)) { + base_of_array_vec.push_back(arg_val); + } + } + } + for (auto &B : F) { + for (auto &I : B) { + if (Value *V = dyn_cast(&I)) { + if (isMallocOrCallocLikeFn(V, TLI)) { + base_of_array_vec.push_back(V); + } + } + } + } + return base_of_array_vec; +} + +/** + * return the index to in baseOfArrayVec that store is an offset from, or + * NULLOPT if not matching + */ +std::pair get_base_reference(Value *mem_instr_ptr, + std::vector base_of_array_vec, + ScalarEvolution *SE) { + for (int i = 0; i < base_of_array_vec.size(); i++) { + Value *base_array_ptr = base_of_array_vec[i]; + assert(base_array_ptr->getType()->isPointerTy()); + const SCEV *mem_instr_ptr_se = SE->getSCEV(mem_instr_ptr); + const SCEV *base_ptr_se = SE->getSCEV(base_array_ptr); + const SCEV *diff = SE->getMinusSCEV(mem_instr_ptr_se, base_ptr_se); + APInt min_val = SE->getSignedRangeMin(diff); + APInt max_val = SE->getSignedRangeMax(diff); + if (min_val == max_val) { + int val = (int)max_val.roundToDouble(); + return {i, val}; + } + } + return {-1, -1}; +} +} // namespace Array + +namespace Alias { +bool may_alias(Value *addr1, Value *addr2, AliasAnalysis *AA) { + // Both isNoALias and isMustAlias have to be checked for unknown reasons + return (!AA->isNoAlias(addr1, + LocationSize::precise( + addr1->getType()->getPrimitiveSizeInBits()), + addr2, + LocationSize::precise( + addr2->getType()->getPrimitiveSizeInBits())) || + AA->isMustAlias(addr1, addr2)); +} +} // namespace Alias + +namespace Gather { +bool isa_gather_instruction(Instruction *instr) { + if (CallInst *call_instr = dyn_cast(instr)) { + Function *fun = call_instr->getCalledFunction(); + // Source: + // https://stackoverflow.com/questions/11686951/how-can-i-get-function-name-from-callinst-in-llvm + // Fun Could be NULL, in which case indirect call occurs, i cannot + // get name. + if (fun) { + if (fun->getName() == VectorConstants::GATHER_NAME) { + return true; + } + } + } + return false; +} + +std::vector get_gather_addresses(Instruction *call_to_gather) { + assert(isa_gather_instruction(call_to_gather)); + + Instruction *insert_element_instr = + dyn_cast(call_to_gather->getOperand(0)); + if (insert_element_instr == NULL) { + throw "Gather Arguments Pointer Vector was NULL"; + } + + std::vector gather_addresses = {}; + // hardcode to gathers of length 4 only + for (int i = 0; i < VectorConstants::VECTOR_WIDTH; i++) { + Value *pointer = insert_element_instr->getOperand(1); + gather_addresses.push_back(pointer); + Instruction *new_insert_element_instr = + dyn_cast(insert_element_instr->getOperand(0)); + insert_element_instr = new_insert_element_instr; + } + return gather_addresses; +} + +} // namespace Gather + +namespace Chunking { +using chunk_t = std::vector; +using chunks_t = std::vector>; + +/** + * True iff an instruction is a mem intrinsic. + */ +bool isa_mem_intrinsic(Instruction *instr) { + if (isa(instr)) { + return true; + } else if (isa(instr)) { + return true; + } else if (isa(instr)) { + return true; + } else if (isa(instr)) { + return true; + } + return false; +} + +/** + * True iff is a special type of instruction for chunking + * + */ +bool isa_special_chunk_instr(Instruction *instr) { + return isa_mem_intrinsic(instr) || isa(instr) || + isa(instr) || + (isa(instr) && !Gather::isa_gather_instruction(instr)); +} + +/* +Build chunks of instructions + +A chunk is the longest contiguous section of instructions that ends in a +sequence of stores. + +A chunk does not need to contain a store instruction. + +Assumes: LoadStoreMovement pass is run before the Diospyros pass +**/ +std::vector> build_chunks(BasicBlock *B, + AliasAnalysis *AA) { + std::vector> chunks = {}; + + bool has_seen_store = false; + bool stores_alias_in_chunk = false; + std::vector curr_chunk = {}; + + // Track Last Stores seen + std::vector last_stores = {}; + for (auto &I : *B) { + // the first two cases are meant to create chunks with non-handled + // instructions + if (has_seen_store && isa_special_chunk_instr(&I)) { + if (curr_chunk.size() > 0 && !stores_alias_in_chunk) { + chunks.push_back(curr_chunk); + } + has_seen_store = false; + stores_alias_in_chunk = false; + curr_chunk = {}; + last_stores = {}; + curr_chunk.push_back(&I); + chunks.push_back(curr_chunk); + curr_chunk = {}; + } else if (!has_seen_store && isa_special_chunk_instr(&I)) { + if (curr_chunk.size() > 0 && !stores_alias_in_chunk) { + chunks.push_back(curr_chunk); + } + has_seen_store = false; + stores_alias_in_chunk = false; + curr_chunk = {}; + last_stores = {}; + curr_chunk.push_back(&I); + chunks.push_back(curr_chunk); + curr_chunk = {}; + } else if (!has_seen_store && isa(I) && + !isa_special_chunk_instr(&I)) { + has_seen_store = true; + curr_chunk.push_back(&I); + last_stores.push_back(&I); + } else if (!has_seen_store && !isa(I) && + !isa_special_chunk_instr(&I)) { + curr_chunk.push_back(&I); + } else if (has_seen_store && !isa(I) && + !isa_special_chunk_instr(&I)) { + if (curr_chunk.size() > 0 && !stores_alias_in_chunk) { + chunks.push_back(curr_chunk); + } + has_seen_store = false; + stores_alias_in_chunk = false; + curr_chunk = {}; + last_stores = {}; + curr_chunk.push_back(&I); + } else { // has seen store and is a store instruction + Value *curr_store_addr = I.getOperand(1); + for (auto other_store : last_stores) { + if (other_store != &I) { + Value *other_store_addr = other_store->getOperand(1); + if (Alias::may_alias(curr_store_addr, other_store_addr, + AA)) { + stores_alias_in_chunk = true; + } + } + } + curr_chunk.push_back(&I); + last_stores.push_back(&I); + } + } + if (curr_chunk.size() > 0 && !stores_alias_in_chunk) { + chunks.push_back(curr_chunk); + } + + // Filter to make sure no chunks are empty + chunks_t final_chunks = {}; + for (auto chunk : chunks) { + if (!chunk.empty()) { + final_chunks.push_back(chunk); + } + } + + return final_chunks; +} +} // namespace Chunking \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-2-by-2-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-2-by-2-conv.c new file mode 100644 index 00000000..96a530ec --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-2-by-2-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 3 +#define I_COLS 3 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-3-by-3-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-3-by-3-conv.c new file mode 100644 index 00000000..693ac896 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-3-by-3-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 3 +#define I_COLS 3 +#define F_ROWS 3 +#define F_COLS 3 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-4-by-4-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-4-by-4-conv.c new file mode 100644 index 00000000..f29588ba --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/3-by-3-and-4-by-4-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 3 +#define I_COLS 3 +#define F_ROWS 4 +#define F_COLS 4 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/4-by-4-and-2-by-2-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/4-by-4-and-2-by-2-conv.c new file mode 100644 index 00000000..a7ec8612 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/4-by-4-and-2-by-2-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 4 +#define I_COLS 4 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/4-by-4-and-4-by-4-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/4-by-4-and-4-by-4-conv.c new file mode 100644 index 00000000..af54c8eb --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/4-by-4-and-4-by-4-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 4 +#define I_COLS 4 +#define F_ROWS 4 +#define F_COLS 4 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-2-by-2-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-2-by-2-conv.c new file mode 100644 index 00000000..8c1089b5 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-2-by-2-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 5 +#define I_COLS 5 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-3-by-3-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-3-by-3-conv.c new file mode 100644 index 00000000..907cff88 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-3-by-3-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 5 +#define I_COLS 5 +#define F_ROWS 3 +#define F_COLS 3 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-4-by-4-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-4-by-4-conv.c new file mode 100644 index 00000000..47d28610 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/5-by-5-and-4-by-4-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 5 +#define I_COLS 5 +#define F_ROWS 4 +#define F_COLS 4 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-2-by-2-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-2-by-2-conv.c new file mode 100644 index 00000000..45029b62 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-2-by-2-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 6 +#define I_COLS 6 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-3-by-3-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-3-by-3-conv.c new file mode 100644 index 00000000..42e82adf --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-3-by-3-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 6 +#define I_COLS 6 +#define F_ROWS 3 +#define F_COLS 3 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-4-by-4-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-4-by-4-conv.c new file mode 100644 index 00000000..521f343d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/6-by-6-and-4-by-4-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 6 +#define I_COLS 6 +#define F_ROWS 4 +#define F_COLS 4 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-2-by-2-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-2-by-2-conv.c new file mode 100644 index 00000000..29a4d1fc --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-2-by-2-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 8 +#define I_COLS 8 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-3-by-3-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-3-by-3-conv.c new file mode 100644 index 00000000..ec9e557d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-3-by-3-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 8 +#define I_COLS 8 +#define F_ROWS 3 +#define F_COLS 3 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-4-by-4-conv.c b/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-4-by-4-conv.c new file mode 100644 index 00000000..465f73b6 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/8-by-8-and-4-by-4-conv.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define I_ROWS 8 +#define I_COLS 8 +#define F_ROWS 4 +#define F_COLS 4 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0.0f; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + convolution(mat_in, f_in, mat_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/conv/test-utils.h b/src/dios-egraphs/Diospyros/benchmarks/conv/test-utils.h new file mode 100644 index 00000000..8b4a8d5d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/conv/test-utils.h @@ -0,0 +1,6 @@ +#define NITER 10000000 + +#define MAX_FLOAT 100.00f +#define DELTA 0.01f + +#define FILE_PATH "data.txt" diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/10-by-10-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/10-by-10-mat-mul.c new file mode 100644 index 00000000..7cc2ca55 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/10-by-10-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 10 +#define A_COLS 10 +#define B_COLS 10 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/11-by-11-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/11-by-11-mat-mul.c new file mode 100644 index 00000000..77b9ee13 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/11-by-11-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 11 +#define A_COLS 11 +#define B_COLS 11 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/12-by-12-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/12-by-12-mat-mul.c new file mode 100644 index 00000000..886ce9b3 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/12-by-12-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 12 +#define A_COLS 12 +#define B_COLS 12 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/15-by-15-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/15-by-15-mat-mul.c new file mode 100644 index 00000000..35e5c95e --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/15-by-15-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 15 +#define A_COLS 15 +#define B_COLS 15 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/16-by-16-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/16-by-16-mat-mul.c new file mode 100644 index 00000000..1323e38d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/16-by-16-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 16 +#define A_COLS 16 +#define B_COLS 16 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/2-by-2-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/2-by-2-mat-mul.c new file mode 100644 index 00000000..517e4ae6 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/2-by-2-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 2 +#define A_COLS 2 +#define B_COLS 2 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/3-by-3-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/3-by-3-mat-mul.c new file mode 100644 index 00000000..fd58b5ed --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/3-by-3-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 3 +#define A_COLS 3 +#define B_COLS 3 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/4-by-4-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/4-by-4-mat-mul.c new file mode 100644 index 00000000..67c745b0 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/4-by-4-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 4 +#define A_COLS 4 +#define B_COLS 4 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/5-by-5-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/5-by-5-mat-mul.c new file mode 100644 index 00000000..48d0b2ce --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/5-by-5-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 5 +#define A_COLS 5 +#define B_COLS 5 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/6-by-6-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/6-by-6-mat-mul.c new file mode 100644 index 00000000..824b8c5a --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/6-by-6-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 6 +#define A_COLS 6 +#define B_COLS 6 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/7-by-7-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/7-by-7-mat-mul.c new file mode 100644 index 00000000..637e6d8c --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/7-by-7-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 7 +#define A_COLS 7 +#define B_COLS 7 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/8-by-8-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/8-by-8-mat-mul.c new file mode 100644 index 00000000..7eb44ae2 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/8-by-8-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 8 +#define A_COLS 8 +#define B_COLS 8 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/9-by-9-mat-mul.c b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/9-by-9-mat-mul.c new file mode 100644 index 00000000..1d0538f6 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/9-by-9-mat-mul.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define A_ROWS 9 +#define A_COLS 9 +#define B_COLS 9 + +void matrix_multiply(float a_in[restrict A_ROWS * A_COLS], + float b_in[restrict A_COLS * B_COLS], + float c_out[restrict A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + matrix_multiply(a_in, b_in, c_out); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/mat-mul/test-utils.h b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/test-utils.h new file mode 100644 index 00000000..8b4a8d5d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/mat-mul/test-utils.h @@ -0,0 +1,6 @@ +#define NITER 10000000 + +#define MAX_FLOAT 100.00f +#define DELTA 0.01f + +#define FILE_PATH "data.txt" diff --git a/src/dios-egraphs/Diospyros/benchmarks/q-prod/qprod.c b/src/dios-egraphs/Diospyros/benchmarks/q-prod/qprod.c new file mode 100644 index 00000000..7185a9ad --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/q-prod/qprod.c @@ -0,0 +1,128 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define SIZE 4 + +__attribute__((always_inline)) void naive_cross_product(float *lhs, float *rhs, + float *result) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +__attribute__((always_inline)) void naive_point_product(float *q, float *p, + float *result) { + float qvec[3] = {q[0], q[1], q[2]}; + float uv[3]; + naive_cross_product(qvec, p, uv); + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + float qxuv[3]; + naive_cross_product(qvec, uv, qxuv); + + for (int i = 0; i < 3; i++) { + result[i] = p[i] + q[3] * uv[i] + qxuv[i]; + } +} + +void naive_quaternion_product(float *a_q, float *a_t, float *b_q, float *b_t, + float *r_q, float *r_t) { + r_q[3] = + a_q[3] * b_q[3] - a_q[0] * b_q[0] - a_q[1] * b_q[1] - a_q[2] * b_q[2]; + r_q[0] = + a_q[3] * b_q[0] + a_q[0] * b_q[3] + a_q[1] * b_q[2] - a_q[2] * b_q[1]; + r_q[1] = + a_q[3] * b_q[1] + a_q[1] * b_q[3] + a_q[2] * b_q[0] - a_q[0] * b_q[2]; + r_q[2] = + a_q[3] * b_q[2] + a_q[2] * b_q[3] + a_q[0] * b_q[1] - a_q[1] * b_q[0]; + + naive_point_product(a_q, b_t, r_t); + for (int i = 0; i < 3; i++) { + r_t[i] += a_t[i]; + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float a_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + a_q[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float a_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + a_t[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float b_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + b_q[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float b_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + b_t[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float r_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + r_q[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float r_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + r_t[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expectedq[SIZE]; + for (int i = 0; i < SIZE; i++) { + expectedq[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expectedt[SIZE]; + for (int i = 0; i < SIZE; i++) { + expectedt[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // calculate up c_out + for (int i = 0; i < NITER; i++) { + naive_quaternion_product(a_q, a_t, b_q, b_t, r_q, r_t); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/q-prod/test-utils.h b/src/dios-egraphs/Diospyros/benchmarks/q-prod/test-utils.h new file mode 100644 index 00000000..8b4a8d5d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/q-prod/test-utils.h @@ -0,0 +1,6 @@ +#define NITER 10000000 + +#define MAX_FLOAT 100.00f +#define DELTA 0.01f + +#define FILE_PATH "data.txt" diff --git a/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/2-by-2-qr-decomp.c b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/2-by-2-qr-decomp.c new file mode 100644 index 00000000..3110904c --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/2-by-2-qr-decomp.c @@ -0,0 +1,176 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define SIZE 2 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float *a) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float *a, float *b, float *c) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0.0f; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float *A, float *Q, float *R) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(m, sizeof(float)); + float *e = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(m, sizeof(float)); + float *v = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(m * m, sizeof(float)); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(SIZE * SIZE, sizeof(float)); + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_fixed_transpose(Q); +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float expectedQ[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + naive_fixed_qr_decomp(A, Q, R); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/3-by-3-qr-decomp.c b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/3-by-3-qr-decomp.c new file mode 100644 index 00000000..66aabfb1 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/3-by-3-qr-decomp.c @@ -0,0 +1,176 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define SIZE 3 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float *a) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float *a, float *b, float *c) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0.0f; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float *A, float *Q, float *R) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(m, sizeof(float)); + float *e = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(m, sizeof(float)); + float *v = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(m * m, sizeof(float)); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(SIZE * SIZE, sizeof(float)); + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_fixed_transpose(Q); +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float expectedQ[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + naive_fixed_qr_decomp(A, Q, R); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-fixed-size.c b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/4-by-4-qr-decomp.c similarity index 58% rename from src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-fixed-size.c rename to src/dios-egraphs/Diospyros/benchmarks/qr-decomp/4-by-4-qr-decomp.c index 85807197..bf1e8ef1 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-fixed-size.c +++ b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/4-by-4-qr-decomp.c @@ -1,9 +1,13 @@ +#include #include #include #include #include #include #include +#include + +#include "test-utils.h" #define SIZE 4 @@ -18,7 +22,7 @@ float sgn(float v) { return (v > 0) - (v < 0); } float naive_norm(float *x, int m) { float sum = 0; for (int i = 0; i < m; i++) { - sum += pow(x[i], 2); + sum += x[i] * x[i]; } return sqrtf(sum); } @@ -37,7 +41,7 @@ void naive_fixed_transpose(float *a) { void naive_fixed_matrix_multiply(float *a, float *b, float *c) { for (int y = 0; y < SIZE; y++) { for (int x = 0; x < SIZE; x++) { - c[SIZE * y + x] = 0; + c[SIZE * y + x] = 0.0f; for (int k = 0; k < SIZE; k++) { c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; } @@ -49,7 +53,7 @@ void naive_fixed_qr_decomp(float *A, float *Q, float *R) { memcpy(R, A, sizeof(float) * SIZE * SIZE); // Build identity matrix of size SIZE * SIZE - float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + float *I = (float *)calloc(SIZE * SIZE, sizeof(float)); for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { I[i * SIZE + j] = (i == j); @@ -60,8 +64,8 @@ void naive_fixed_qr_decomp(float *A, float *Q, float *R) { for (int k = 0; k < SIZE - 1; k++) { int m = SIZE - k; - float *x = (float *)calloc(sizeof(float), m); - float *e = (float *)calloc(sizeof(float), m); + float *x = (float *)calloc(m, sizeof(float)); + float *e = (float *)calloc(m, sizeof(float)); for (int i = 0; i < m; i++) { int row = k + i; x[i] = R[row * SIZE + k]; @@ -70,17 +74,17 @@ void naive_fixed_qr_decomp(float *A, float *Q, float *R) { float alpha = -sgn(x[0]) * naive_norm(x, m); - float *u = (float *)calloc(sizeof(float), m); - float *v = (float *)calloc(sizeof(float), m); + float *u = (float *)calloc(m, sizeof(float)); + float *v = (float *)calloc(m, sizeof(float)); for (int i = 0; i < m; i++) { u[i] = x[i] + alpha * e[i]; } float norm_u = naive_norm(u, m); for (int i = 0; i < m; i++) { - v[i] = u[i] / norm_u; + v[i] = u[i] / (norm_u + 0.00001f); } - float *q_min = (float *)calloc(sizeof(float), m * m); + float *q_min = (float *)calloc(m * m, sizeof(float)); for (int i = 0; i < m; i++) { for (int j = 0; j < m; j++) { float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; @@ -88,7 +92,7 @@ void naive_fixed_qr_decomp(float *A, float *Q, float *R) { } } - float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + float *q_t = (float *)calloc(SIZE * SIZE, sizeof(float)); for (int i = 0; i < SIZE; i++) { for (int j = 0; j < SIZE; j++) { float q_t_i; @@ -105,7 +109,7 @@ void naive_fixed_qr_decomp(float *A, float *Q, float *R) { memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A } else { - float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + float *res = (float *)calloc(SIZE * SIZE, sizeof(float)); naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A memcpy(Q, res, sizeof(float) * SIZE * SIZE); naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A @@ -121,26 +125,52 @@ void naive_fixed_qr_decomp(float *A, float *Q, float *R) { naive_fixed_transpose(Q); } -int main(void) { - float A[SIZE * SIZE] = {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4}; - float Q[SIZE * SIZE] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - float R[SIZE * SIZE] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - naive_fixed_qr_decomp(A, Q, R); - for (int i = 0; i < SIZE; i++) { - for (int j = 0; j < SIZE; j++) { - printf("%f\n", A[i * SIZE + j]); - } +int main(void) __attribute__((optimize("no-unroll-loops"))) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); } - // naive_fixed_matrix_multiply(A, Q, R); - // for (int i = 0; i < SIZE; i++) { - // for (int j = 0; j < SIZE; j++) { - // printf("%f\n", A[i * SIZE + j]); - // } - // } - // naive_fixed_transpose(A); - // for (int i = 0; i < SIZE; i++) { - // for (int j = 0; j < SIZE; j++) { - // printf("%f\n", A[i * SIZE + j]); - // } - // } + + float Q[SIZE * SIZE] = {0.0f}; + float expectedQ[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + naive_fixed_qr_decomp(A, Q, R); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/5-by-5-qr-decomp.c b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/5-by-5-qr-decomp.c new file mode 100644 index 00000000..69eb4448 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/5-by-5-qr-decomp.c @@ -0,0 +1,176 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define SIZE 5 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float *a) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float *a, float *b, float *c) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0.0f; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float *A, float *Q, float *R) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(m, sizeof(float)); + float *e = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(m, sizeof(float)); + float *v = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(m * m, sizeof(float)); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(SIZE * SIZE, sizeof(float)); + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_fixed_transpose(Q); +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float expectedQ[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + naive_fixed_qr_decomp(A, Q, R); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/6-by-6-qr-decomp.c b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/6-by-6-qr-decomp.c new file mode 100644 index 00000000..6716741a --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/6-by-6-qr-decomp.c @@ -0,0 +1,176 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define SIZE 6 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float *a) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float *a, float *b, float *c) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0.0f; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float *A, float *Q, float *R) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(m, sizeof(float)); + float *e = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(m, sizeof(float)); + float *v = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(m * m, sizeof(float)); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(SIZE * SIZE, sizeof(float)); + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_fixed_transpose(Q); +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float expectedQ[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + naive_fixed_qr_decomp(A, Q, R); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/test-utils.h b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/test-utils.h new file mode 100644 index 00000000..8b4a8d5d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/qr-decomp/test-utils.h @@ -0,0 +1,6 @@ +#define NITER 10000000 + +#define MAX_FLOAT 100.00f +#define DELTA 0.01f + +#define FILE_PATH "data.txt" diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/12-by-12-and-2-by-2-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/12-by-12-and-2-by-2-stencil.c new file mode 100644 index 00000000..feee7103 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/12-by-12-and-2-by-2-stencil.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 12 +#define COL_SIZE 12 +#define F_SIZE 4 +#define STENCIL_DIM 2 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < STENCIL_DIM; k1++) { + for (int k2 = 0; k2 < STENCIL_DIM; k2++) { + temp += filter_in[k1 * STENCIL_DIM + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/12-by-12-and-3-by-3-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/12-by-12-and-3-by-3-stencil.c new file mode 100644 index 00000000..6bba3f8d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/12-by-12-and-3-by-3-stencil.c @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 12 +#define COL_SIZE 12 +#define F_SIZE 9 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/16-by-16-and-2-by-2-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/16-by-16-and-2-by-2-stencil.c new file mode 100644 index 00000000..b4dfbe46 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/16-by-16-and-2-by-2-stencil.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 16 +#define COL_SIZE 16 +#define F_SIZE 4 +#define STENCIL_DIM 2 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < STENCIL_DIM; k1++) { + for (int k2 = 0; k2 < STENCIL_DIM; k2++) { + temp += filter_in[k1 * STENCIL_DIM + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/16-by-16-and-3-by-3-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/16-by-16-and-3-by-3-stencil.c new file mode 100644 index 00000000..1a784419 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/16-by-16-and-3-by-3-stencil.c @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 16 +#define COL_SIZE 16 +#define F_SIZE 9 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/4-by-4-and-2-by-2-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/4-by-4-and-2-by-2-stencil.c new file mode 100644 index 00000000..1a5a123c --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/4-by-4-and-2-by-2-stencil.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 4 +#define COL_SIZE 4 +#define F_SIZE 4 +#define STENCIL_DIM 2 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < STENCIL_DIM; k1++) { + for (int k2 = 0; k2 < STENCIL_DIM; k2++) { + temp += filter_in[k1 * STENCIL_DIM + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/4-by-4-and-3-by-3-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/4-by-4-and-3-by-3-stencil.c new file mode 100644 index 00000000..5238d224 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/4-by-4-and-3-by-3-stencil.c @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 4 +#define COL_SIZE 4 +#define F_SIZE 9 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/5-by-5-and-2-by-2-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/5-by-5-and-2-by-2-stencil.c new file mode 100644 index 00000000..72d69d35 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/5-by-5-and-2-by-2-stencil.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 5 +#define COL_SIZE 5 +#define F_SIZE 4 +#define STENCIL_DIM 2 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < STENCIL_DIM; k1++) { + for (int k2 = 0; k2 < STENCIL_DIM; k2++) { + temp += filter_in[k1 * STENCIL_DIM + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/5-by-5-and-3-by-3-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/5-by-5-and-3-by-3-stencil.c new file mode 100644 index 00000000..a36889ec --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/5-by-5-and-3-by-3-stencil.c @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 5 +#define COL_SIZE 5 +#define F_SIZE 9 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/6-by-6-and-2-by-2-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/6-by-6-and-2-by-2-stencil.c new file mode 100644 index 00000000..aa2cfda6 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/6-by-6-and-2-by-2-stencil.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 6 +#define COL_SIZE 6 +#define F_SIZE 4 +#define STENCIL_DIM 2 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < STENCIL_DIM; k1++) { + for (int k2 = 0; k2 < STENCIL_DIM; k2++) { + temp += filter_in[k1 * STENCIL_DIM + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/6-by-6-and-3-by-3-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/6-by-6-and-3-by-3-stencil.c new file mode 100644 index 00000000..0e048be4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/6-by-6-and-3-by-3-stencil.c @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 6 +#define COL_SIZE 6 +#define F_SIZE 9 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/8-by-8-and-2-by-2-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/8-by-8-and-2-by-2-stencil.c new file mode 100644 index 00000000..76b38531 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/8-by-8-and-2-by-2-stencil.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 8 +#define COL_SIZE 8 +#define F_SIZE 4 +#define STENCIL_DIM 2 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < STENCIL_DIM; k1++) { + for (int k2 = 0; k2 < STENCIL_DIM; k2++) { + temp += filter_in[k1 * STENCIL_DIM + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/8-by-8-and-3-by-3-stencil.c b/src/dios-egraphs/Diospyros/benchmarks/stencil/8-by-8-and-3-by-3-stencil.c new file mode 100644 index 00000000..188adc54 --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/8-by-8-and-3-by-3-stencil.c @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include +#include + +#include "test-utils.h" + +#define ROW_SIZE 8 +#define COL_SIZE 8 +#define F_SIZE 9 + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) __attribute__((optimize("no-unroll-loops"))) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + + // This stackoverflow post explains how to calculate walk clock time. + // https://stackoverflow.com/questions/42046712/how-to-record-elaspsed-wall-time-in-c + // https://stackoverflow.com/questions/13156031/measuring-time-in-c + // https://stackoverflow.com/questions/10192903/time-in-milliseconds-in-c + // start timer + long start, end; + struct timeval timecheck; + + gettimeofday(&timecheck, NULL); + start = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + +// calculate up c_out +#pragma nounroll + for (int i = 0; i < NITER; i++) { + stencil(orig_in, sol_out, filter_in); + } + + // end timer + gettimeofday(&timecheck, NULL); + end = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000; + + // report difference in runtime + double diff = difftime(end, start); + FILE *fptr = fopen(FILE_PATH, "w"); + if (fptr == NULL) { + printf("Could not open file"); + return 0; + } + fprintf(fptr, "%ld\n", (end - start)); + fclose(fptr); + printf("%ld milliseconds elapsed over %d iterations total\n", (end - start), + NITER); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/benchmarks/stencil/test-utils.h b/src/dios-egraphs/Diospyros/benchmarks/stencil/test-utils.h new file mode 100644 index 00000000..8b4a8d5d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/stencil/test-utils.h @@ -0,0 +1,6 @@ +#define NITER 10000000 + +#define MAX_FLOAT 100.00f +#define DELTA 0.01f + +#define FILE_PATH "data.txt" diff --git a/src/dios-egraphs/Diospyros/benchmarks/test-utils.h b/src/dios-egraphs/Diospyros/benchmarks/test-utils.h new file mode 100644 index 00000000..8b4a8d5d --- /dev/null +++ b/src/dios-egraphs/Diospyros/benchmarks/test-utils.h @@ -0,0 +1,6 @@ +#define NITER 10000000 + +#define MAX_FLOAT 100.00f +#define DELTA 0.01f + +#define FILE_PATH "data.txt" diff --git a/src/dios-egraphs/Diospyros/build.rs b/src/dios-egraphs/Diospyros/build.rs index 5aed4b51..2b73e6db 100644 --- a/src/dios-egraphs/Diospyros/build.rs +++ b/src/dios-egraphs/Diospyros/build.rs @@ -31,4 +31,16 @@ fn main() { } build_diospyros.flag("-fexceptions"); build_diospyros.compile("libdiospass.a"); + + // Build the AddressRewriting C++ file. + let mut build_address_rewriting = cc::Build::new(); + build_address_rewriting + .cpp(true) + .warnings(false) // LLVM headers have lots of spurious warnings. + .file("AddressRewriting.cpp"); + for flag in cxxflags.split_ascii_whitespace() { + build_address_rewriting.flag(&flag); + } + build_address_rewriting.flag("-fexceptions"); + build_address_rewriting.compile("libadrwpass.a"); } diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv.c b/src/dios-egraphs/Diospyros/c-tests/2d-2d-conv.c similarity index 84% rename from src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv.c rename to src/dios-egraphs/Diospyros/c-tests/2d-2d-conv.c index 68888c97..372be34f 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv.c +++ b/src/dios-egraphs/Diospyros/c-tests/2d-2d-conv.c @@ -1,3 +1,4 @@ +#include #include #define I_ROWS 2 @@ -7,8 +8,9 @@ #define O_ROWS ((I_ROWS + F_ROWS) - 1) #define O_COLS ((I_COLS + F_COLS) - 1) -void convolution(float mat_in[I_ROWS][I_COLS], float f_in[F_ROWS][F_COLS], - float mat_out[O_ROWS][O_COLS]) { +void convolution(float mat_in[restrict I_ROWS][I_COLS], + float f_in[restrict F_ROWS][F_COLS], + float mat_out[restrict O_ROWS][O_COLS]) { for (int outRow = 0; outRow < O_ROWS; outRow++) { for (int outCol = 0; outCol < O_COLS; outCol++) { for (int fRow = 0; fRow < F_ROWS; fRow++) { @@ -37,9 +39,11 @@ int main(void) { float f_in[F_ROWS][F_COLS] = {{1, 1}, {1, 1}}; float mat_out[O_ROWS][O_COLS] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; convolution(mat_in, f_in, mat_out); + float expected[O_ROWS][O_COLS] = {{1, 3, 2}, {4, 10, 6}, {3, 7, 4}}; for (int i = 0; i < O_ROWS; i++) { for (int j = 0; j < O_COLS; j++) { printf("output: %f\n", mat_out[i][j]); + assert(mat_out[i][j] == expected[i][j]); } } // output: 1.000000 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-conv.c b/src/dios-egraphs/Diospyros/c-tests/2d-conv.c similarity index 93% rename from src/dios-egraphs/Diospyros/llvm-tests/2d-conv.c rename to src/dios-egraphs/Diospyros/c-tests/2d-conv.c index e1614e05..3d1be0ea 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/2d-conv.c +++ b/src/dios-egraphs/Diospyros/c-tests/2d-conv.c @@ -1,3 +1,4 @@ +#include #include #define I_ROWS 2 @@ -38,9 +39,11 @@ int main(void) { float mat_in[I_ROWS * I_COLS] = {1, 2, 3, 4}; float f_in[F_ROWS * F_COLS] = {1, 1, 1, 1}; float mat_out[O_ROWS * O_COLS] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; + float expected[O_ROWS * O_COLS] = {1, 3, 2, 4, 10, 6, 3, 7, 4}; convolution(mat_in, f_in, mat_out); for (int i = 0; i < O_ROWS * O_COLS; i++) { printf("output: %f\n", mat_out[i]); + assert(mat_out[i] == expected[i]); } // output: 1.000000 // output: 3.000000 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-matrix-multiply-new.c b/src/dios-egraphs/Diospyros/c-tests/2d-matrix-multiply-new.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/2d-matrix-multiply-new.c rename to src/dios-egraphs/Diospyros/c-tests/2d-matrix-multiply-new.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/2d-matrix-multiply.c similarity index 70% rename from src/dios-egraphs/Diospyros/llvm-tests/2d-matrix-multiply.c rename to src/dios-egraphs/Diospyros/c-tests/2d-matrix-multiply.c index 371b7967..4b7f7961 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/2d-matrix-multiply.c +++ b/src/dios-egraphs/Diospyros/c-tests/2d-matrix-multiply.c @@ -1,11 +1,13 @@ +#include #include #define A_ROWS 2 #define A_COLS 2 #define B_COLS 2 -void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], - float c_out[A_ROWS][B_COLS]) { +void matrix_multiply(float a_in[restrict A_ROWS][A_COLS], + float b_in[restrict A_COLS][B_COLS], + float c_out[restrict A_ROWS][B_COLS]) { for (int i = 0; i < A_ROWS; i++) { for (int j = 0; j < B_COLS; j++) { float sum = 0.0; @@ -26,6 +28,10 @@ int main(void) { printf("second: %f\n", c_out[0][1]); printf("third: %f\n", c_out[1][0]); printf("fourth: %f\n", c_out[1][1]); + assert(c_out[0][0] == 7); + assert(c_out[0][1] == 10); + assert(c_out[1][0] == 15); + assert(c_out[1][1] == 22); // expected (7, 10, 15, 22) return 0; } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d_new.c b/src/dios-egraphs/Diospyros/c-tests/2d.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/2d_new.c rename to src/dios-egraphs/Diospyros/c-tests/2d.c diff --git a/src/dios-egraphs/Diospyros/c-tests/3-by-3-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/3-by-3-matrix-multiply.c new file mode 100644 index 00000000..1fde8829 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/3-by-3-matrix-multiply.c @@ -0,0 +1,43 @@ +#include +#include + +#define A_ROWS 3 +#define A_COLS 3 +#define B_COLS 3 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + float a_in[A_ROWS][A_COLS] = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; + float b_in[A_COLS][B_COLS] = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; + float c_out[A_ROWS][B_COLS] = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; + matrix_multiply(a_in, b_in, c_out); + float expected[A_ROWS][B_COLS] = {{30, 36, 42}, {66, 81, 96}, {102, 126, 150}}; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + assert(expected[i][j] == c_out[i][j]); + } + } + // output: 30.000000 + // output: 36.000000 + // output: 42.000000 + // output: 66.000000 + // output: 81.000000 + // output: 96.000000 + // output: 102.000000 + // output: 126.000000 + // output: 150.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/4-by-4-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/4-by-4-matrix-multiply.c new file mode 100644 index 00000000..4e9f294a --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/4-by-4-matrix-multiply.c @@ -0,0 +1,53 @@ +#include +#include + +#define A_ROWS 4 +#define A_COLS 4 +#define B_COLS 4 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + float a_in[A_ROWS][A_COLS] = { + {1, 2, 3, 1}, {1, 2, 3, 1}, {4, 5, 6, 1}, {7, 8, 9, 1}}; + float b_in[A_COLS][B_COLS] = { + {1, 2, 3, 1}, {1, 2, 3, 1}, {4, 5, 6, 1}, {7, 8, 9, 1}}; + float c_out[A_ROWS][B_COLS] = { + {1, 2, 3, 1}, {1, 2, 3, 1}, {4, 5, 6, 1}, {7, 8, 9, 1}}; + float expected_c_out[A_ROWS][B_COLS] = { + {1, 2, 3, 1}, {1, 2, 3, 1}, {4, 5, 6, 1}, {7, 8, 9, 1}}; + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected_c_out); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + assert(expected_c_out[i][j] == c_out[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/5-by-5-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/5-by-5-matrix-multiply.c new file mode 100644 index 00000000..bab5bad9 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/5-by-5-matrix-multiply.c @@ -0,0 +1,59 @@ +#include +#include + +#define A_ROWS 5 +#define A_COLS 5 +#define B_COLS 5 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + float a_in[A_ROWS][A_COLS] = {{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}}; + float b_in[A_COLS][B_COLS] = {{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}}; + float c_out[A_ROWS][B_COLS] = {{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {1, 2, 3, 4, 5}}; + matrix_multiply(a_in, b_in, c_out); + float expected[A_ROWS][B_COLS] = {{45, 60, 75, 90, 105}, {120, 160, 200, 240, 280}, {45, 60, 75, 90, 105}, {120, 160, 200, 240, 280},{45, 60, 75, 90, 105}}; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + assert(expected[i][j] == c_out[i][j]); + } + } + // output: 45.000000 + // output: 60.000000 + // output: 75.000000 + // output: 90.000000 + // output: 105.000000 + // output: 120.000000 + // output: 160.000000 + // output: 200.000000 + // output: 240.000000 + // output: 280.000000 + // output: 45.000000 + // output: 60.000000 + // output: 75.000000 + // output: 90.000000 + // output: 105.000000 + // output: 120.000000 + // output: 160.000000 + // output: 200.000000 + // output: 240.000000 + // output: 280.000000 + // output: 45.000000 + // output: 60.000000 + // output: 75.000000 + // output: 90.000000 + // output: 105.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/README.md b/src/dios-egraphs/Diospyros/c-tests/README.md new file mode 100644 index 00000000..c4328512 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/README.md @@ -0,0 +1 @@ +C tests contains tests of simple c programs that are run through the diospyros pass, and the outputs, which are printed out, are compared between optimization with and without diospyros. The "correct" result is taken to be optimization without diospyros. \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/add-mul-interleave.c b/src/dios-egraphs/Diospyros/c-tests/add-mul-interleave.c new file mode 100644 index 00000000..f200b105 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/add-mul-interleave.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 4 + +void interleave(float a_in[restrict SIZE], float b_in[restrict SIZE], + float c_out[restrict SIZE]) { + c_out[0] = a_in[0] + b_in[0]; + c_out[1] = a_in[1] * b_in[1]; + c_out[2] = a_in[2] + b_in[2]; + c_out[3] = a_in[3] * b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + interleave(a_in, b_in, c_out); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + assert(c_out[0] == 6); + assert(c_out[1] == 12); + assert(c_out[2] == 10); + assert(c_out[3] == 32); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/add_new.c b/src/dios-egraphs/Diospyros/c-tests/add.c similarity index 86% rename from src/dios-egraphs/Diospyros/llvm-tests/add_new.c rename to src/dios-egraphs/Diospyros/c-tests/add.c index ebbcdf35..21b3f656 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/add_new.c +++ b/src/dios-egraphs/Diospyros/c-tests/add.c @@ -2,7 +2,8 @@ #include #define SIZE 4 -void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { +void sum(float a_in[restrict SIZE], float b_in[restrict SIZE], + float c_out[restrict SIZE]) { c_out[0] = a_in[0] + b_in[0]; c_out[1] = a_in[1] + b_in[1]; c_out[2] = a_in[2] + b_in[2]; @@ -14,14 +15,14 @@ int main(int argc, char **argv) { float b_in[SIZE] = {5, 6, 7, 8}; float c_out[SIZE]; sum(a_in, b_in, c_out); - assert(c_out[0] == 6); - assert(c_out[1] == 8); - assert(c_out[2] == 10); - assert(c_out[3] == 12); printf("first: %f\n", c_out[0]); printf("second: %f\n", c_out[1]); printf("third: %f\n", c_out[2]); printf("fourth: %f\n", c_out[3]); + assert(c_out[0] == 6); + assert(c_out[1] == 8); + assert(c_out[2] == 10); + assert(c_out[3] == 12); // expected: 6, 8, 10, 12 return 0; } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/add_mult_new.c b/src/dios-egraphs/Diospyros/c-tests/add_mult.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/add_mult_new.c rename to src/dios-egraphs/Diospyros/c-tests/add_mult.c diff --git a/src/dios-egraphs/Diospyros/c-tests/all-arith-ops.c b/src/dios-egraphs/Diospyros/c-tests/all-arith-ops.c new file mode 100644 index 00000000..a3b81c76 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/all-arith-ops.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] * b_in[i] + (c_in[i] - a_in[i]); + } else { + d_out[i] = a_in[i] * c_in[i] + (c_in[i] - a_in[i]); + } + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] * b_in[i] + (c_in[i] - a_in[i]); + } else { + d_out[i] = a_in[i] * c_in[i] + (c_in[i] - a_in[i]); + } + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/array-reversal.c b/src/dios-egraphs/Diospyros/c-tests/array-reversal.c new file mode 100644 index 00000000..e6b9acd6 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/array-reversal.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float temp_array[ROWS]; + for (int i = 0; i < ROWS; i++) { + temp_array[i] = d_out[i]; + } + for (int i = 0; i < ROWS; i++) { + d_out[i] = temp_array[ROWS - i - 1]; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float temp_array[ROWS]; + for (int i = 0; i < ROWS; i++) { + temp_array[i] = d_out[i]; + } + for (int i = 0; i < ROWS; i++) { + d_out[i] = temp_array[ROWS - i - 1]; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = d_out[i]; + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/array-symmetric.c b/src/dios-egraphs/Diospyros/c-tests/array-symmetric.c new file mode 100644 index 00000000..61c8f47a --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/array-symmetric.c @@ -0,0 +1,66 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + d_out[i] = d_out[ROWS - i - 1]; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + d_out[i] = d_out[ROWS - i - 1]; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = d_out[i]; + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/branching-add.c b/src/dios-egraphs/Diospyros/c-tests/branching-add.c new file mode 100644 index 00000000..229453aa --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/branching-add.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] + b_in[i]; + } else { + d_out[i] = a_in[i] + c_in[i]; + } + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] + b_in[i]; + } else { + d_out[i] = a_in[i] + c_in[i]; + } + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/branching-mul.c b/src/dios-egraphs/Diospyros/c-tests/branching-mul.c new file mode 100644 index 00000000..6c42c7ed --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/branching-mul.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] * b_in[i]; + } else { + d_out[i] = a_in[i] * c_in[i]; + } + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] * b_in[i]; + } else { + d_out[i] = a_in[i] * c_in[i]; + } + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/branching-sub.c b/src/dios-egraphs/Diospyros/c-tests/branching-sub.c new file mode 100644 index 00000000..bf7914ca --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/branching-sub.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] + b_in[i] - c_in[i]; + } else { + d_out[i] = a_in[i] + c_in[i] - b_in[i]; + } + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] + b_in[i] - c_in[i]; + } else { + d_out[i] = a_in[i] + c_in[i] - b_in[i]; + } + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/break-w.c b/src/dios-egraphs/Diospyros/c-tests/break-w.c similarity index 85% rename from src/dios-egraphs/Diospyros/llvm-tests/break-w.c rename to src/dios-egraphs/Diospyros/c-tests/break-w.c index 26581410..8afaed68 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/break-w.c +++ b/src/dios-egraphs/Diospyros/c-tests/break-w.c @@ -1,4 +1,5 @@ #include +#include #define SIZE 8 void break_w_test(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { @@ -16,8 +17,10 @@ int main(void) { float scalar_in = 10; float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; break_w_test(a_in, scalar_in, b_out); + float expected[SIZE] = {10, 0, 0, 0, 50, 40, 30, 20}; for (int i = 0; i < SIZE; i++) { printf("%f\n", b_out[i]); + assert(expected[i] == b_out[i]); } // 10.000000 // 0.000000 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/break.c b/src/dios-egraphs/Diospyros/c-tests/break.c similarity index 84% rename from src/dios-egraphs/Diospyros/llvm-tests/break.c rename to src/dios-egraphs/Diospyros/c-tests/break.c index e9e7f628..1790765f 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/break.c +++ b/src/dios-egraphs/Diospyros/c-tests/break.c @@ -1,4 +1,5 @@ #include +#include #define SIZE 8 void break_test(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { @@ -14,8 +15,10 @@ int main(void) { float scalar_in = 10; float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; break_test(a_in, scalar_in, b_out); + float expected[SIZE] = {10, 0, 0, 0, 50, 40, 30, 20}; for (int i = 0; i < SIZE; i++) { printf("%f\n", b_out[i]); + assert(expected[i] == b_out[i]); } // 10.000000 // 0.000000 diff --git a/src/dios-egraphs/Diospyros/c-tests/calloc.c b/src/dios-egraphs/Diospyros/c-tests/calloc.c new file mode 100644 index 00000000..b07ebe35 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/calloc.c @@ -0,0 +1,31 @@ +#include +#include +#include +#define SIZE 4 + +void calloc_func(int m, float q_out[SIZE][SIZE]) { + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + q_min[i * m + j] = 10.0f; + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + q_out[i][j] = q_min[i * m + j]; + } + } +} + +int main(int argc, char **argv) { + float q_out[SIZE][SIZE] = { + {1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}; + calloc_func(SIZE, q_out); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("q_out: %f\n", q_out[i][j]); + assert(q_out[i][j] == 10); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/cols-greater-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/cols-greater-matrix-multiply.c new file mode 100644 index 00000000..0e3a93d8 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/cols-greater-matrix-multiply.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 5 +#define A_COLS 4 +#define B_COLS 7 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_COLS][B_COLS]; + for (int i = 0; i < A_COLS; i++) { + for (int j = 0; j < B_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/continue-w.c b/src/dios-egraphs/Diospyros/c-tests/continue-w.c similarity index 85% rename from src/dios-egraphs/Diospyros/llvm-tests/continue-w.c rename to src/dios-egraphs/Diospyros/c-tests/continue-w.c index eef0cd58..d7b42a84 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/continue-w.c +++ b/src/dios-egraphs/Diospyros/c-tests/continue-w.c @@ -1,3 +1,4 @@ +#include #include #define SIZE 8 @@ -18,8 +19,10 @@ int main(void) { float scalar_in = 10; float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; continue_w_test(a_in, scalar_in, b_out); + float expected[SIZE] = {0, 0, 0, 0, 50, 40, 30, 20}; for (int i = 0; i < SIZE; i++) { printf("%f\n", b_out[i]); + assert(expected[i] == b_out[i]); } // 0.000000 // 0.000000 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/continue.c b/src/dios-egraphs/Diospyros/c-tests/continue.c similarity index 84% rename from src/dios-egraphs/Diospyros/llvm-tests/continue.c rename to src/dios-egraphs/Diospyros/c-tests/continue.c index 37206464..40bc13b8 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/continue.c +++ b/src/dios-egraphs/Diospyros/c-tests/continue.c @@ -1,3 +1,4 @@ +#include #include #define SIZE 8 @@ -13,8 +14,10 @@ int main(void) { float scalar_in = 10; float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; continue_test(a_in, scalar_in, b_out); + float expected[SIZE] = {0, 0, 0, 0, 50, 40, 30, 20}; for (int i = 0; i < SIZE; i++) { printf("%f\n", b_out[i]); + assert(expected[i] == b_out[i]); } // 0.000000 // 0.000000 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/cube-new.c b/src/dios-egraphs/Diospyros/c-tests/cube.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/cube-new.c rename to src/dios-egraphs/Diospyros/c-tests/cube.c diff --git a/src/dios-egraphs/Diospyros/c-tests/div.c b/src/dios-egraphs/Diospyros/c-tests/div.c new file mode 100644 index 00000000..90c19534 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/div.c @@ -0,0 +1,28 @@ +#include +#include +#include +#define SIZE 4 +#define DELTA 0.1 + +void sum(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = b_in[0] / a_in[0]; + c_out[1] = b_in[1] / a_in[1]; + c_out[2] = b_in[2] / a_in[2]; + c_out[3] = b_in[3] / a_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 6, 8}; + float c_out[SIZE]; + sum(a_in, b_in, c_out); + assert(fabs(c_out[0] - 5.0) < DELTA); + assert(fabs(c_out[1] - 3.0) < DELTA); + assert(fabs(c_out[2] - 2.0) < DELTA); + assert(fabs(c_out[3] - 2.0) < DELTA); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/dot-product.c b/src/dios-egraphs/Diospyros/c-tests/dot-product.c new file mode 100644 index 00000000..e61d8cbb --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/dot-product.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float sum = 0.0f; + for (int i = 0; i < ROWS; i++) { + sum += a_in[i] * b_in[i]; + } + for (int i = 0; i < ROWS; i++) { + float temp = (float)i + sum; + d_out[i] = temp; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float sum = 0.0f; + for (int i = 0; i < ROWS; i++) { + sum += a_in[i] * b_in[i]; + } + for (int i = 0; i < ROWS; i++) { + float temp = (float)i + sum; + d_out[i] = temp; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/fft.c b/src/dios-egraphs/Diospyros/c-tests/fft.c new file mode 100644 index 00000000..aee3b33d --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/fft.c @@ -0,0 +1,167 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 8 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define MAX_FOR_LOOP_ITERATIONS 1000 + +void fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / 2], + float img_twid_in[SIZE / 2], float real_out[SIZE], + float img_out[SIZE]) { + int even = 0; + int odd = 0; + int log = 0; + int rootindex = 0; + int span = SIZE >> 1; + float temp = 0; + + for (int i = 0; i < SIZE; i++) { + real_out[i] = real_in[i]; + img_out[i] = img_in[i]; + } + + while (span != 0) { + odd = span; + while (odd < SIZE) { + odd = odd | span; + even = odd ^ span; + + temp = real_out[even] + real_out[odd]; + real_out[odd] = real_out[even] - real_out[odd]; + real_out[even] = temp; + + temp = img_out[even] + img_out[odd]; + img_out[odd] = img_out[even] - img_out[odd]; + img_out[even] = temp; + + rootindex = (even << log) & (SIZE - 1); + if (rootindex > 0) { + temp = real_twid_in[rootindex] * real_out[odd] - + img_twid_in[rootindex] * img_out[odd]; + img_out[odd] = real_twid_in[rootindex] * img_out[odd] + + img_twid_in[rootindex] * real_out[odd]; + real_out[odd] = temp; + } + odd += 1; + } + span >>= 1; + log += 1; + } +} + +void no_opt_fft(float real_in[SIZE], float img_in[SIZE], + float real_twid_in[SIZE / 2], float img_twid_in[SIZE / 2], + float real_out[SIZE], float img_out[SIZE]) { + int even = 0; + int odd = 0; + int log = 0; + int rootindex = 0; + int span = SIZE >> 1; + float temp = 0; + + for (int i = 0; i < SIZE; i++) { + real_out[i] = real_in[i]; + img_out[i] = img_in[i]; + } + + while (span != 0) { + odd = span; + while (odd < SIZE) { + odd = odd | span; + even = odd ^ span; + + temp = real_out[even] + real_out[odd]; + real_out[odd] = real_out[even] - real_out[odd]; + real_out[even] = temp; + + temp = img_out[even] + img_out[odd]; + img_out[odd] = img_out[even] - img_out[odd]; + img_out[even] = temp; + + rootindex = (even << log) & (SIZE - 1); + if (rootindex > 0) { + temp = real_twid_in[rootindex] * real_out[odd] - + img_twid_in[rootindex] * img_out[odd]; + img_out[odd] = real_twid_in[rootindex] * img_out[odd] + + img_twid_in[rootindex] * real_out[odd]; + real_out[odd] = temp; + } + odd += 1; + } + span >>= 1; + log += 1; + } +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float real_in[SIZE] = {0.0f}; + float img_in[SIZE] = {0.0f}; + float real_twid_in[SIZE / 2] = {0.0f}; + float img_twid_in[SIZE / 2] = {0.0f}; + float real_out[SIZE] = {0.0f}; + float img_out[SIZE] = {0.0f}; + + float expected_real_in[SIZE] = {0.0f}; + float expected_img_in[SIZE] = {0.0f}; + float expected_real_twid_in[SIZE / 2] = {0.0f}; + float expected_img_twid_in[SIZE / 2] = {0.0f}; + float expected_real_out[SIZE] = {0.0f}; + float expected_img_out[SIZE] = {0.0f}; + + for (int i = 0; i < SIZE; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + real_in[i] = n; + expected_real_in[i] = n; + } + for (int i = 0; i < SIZE; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + img_in[i] = n; + expected_img_in[i] = n; + } + for (int i = 0; i < SIZE / 2; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + real_twid_in[i] = n; + expected_real_twid_in[i] = n; + } + for (int i = 0; i < SIZE / 2; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + img_twid_in[i] = n; + expected_img_twid_in[i] = n; + } + for (int i = 0; i < SIZE; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + real_out[i] = n; + expected_real_out[i] = n; + } + for (int i = 0; i < SIZE; i++) { + float n = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + img_out[i] = n; + expected_img_out[i] = n; + } + + fft(real_in, img_in, real_twid_in, img_twid_in, real_out, img_out); + no_opt_fft(expected_real_in, expected_img_in, expected_real_twid_in, + expected_img_twid_in, expected_real_out, expected_img_out); + + for (int i = 0; i < SIZE; i++) { + printf("Real Out Output: %f\n", real_out[i]); + printf("Expected Real Out Output: %f\n", expected_real_out[i]); + assert(fabs(real_out[i] - expected_real_out[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Img Out Output: %f\n", img_out[i]); + printf("Expected Img Out Output: %f\n", expected_img_out[i]); + assert(fabs(img_out[i] - expected_img_out[i]) < DELTA); + } +} diff --git a/src/dios-egraphs/Diospyros/llvm-tests/five_binops_new.c b/src/dios-egraphs/Diospyros/c-tests/five_binops.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/five_binops_new.c rename to src/dios-egraphs/Diospyros/c-tests/five_binops.c diff --git a/src/dios-egraphs/Diospyros/c-tests/identity_matrix.c b/src/dios-egraphs/Diospyros/c-tests/identity_matrix.c new file mode 100644 index 00000000..9e5f062e --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/identity_matrix.c @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1 + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } +} + +int main(void) { + float A[SIZE * SIZE] = {0, 1, 2, 3}; + float Q[SIZE * SIZE] = {0, 1, 2, 3}; + float R[SIZE * SIZE] = {0, 1, 2, 3}; + float AExpected[SIZE * SIZE] = {0, 1, 2, 3}; + float QExpected[SIZE * SIZE] = {0, 1, 2, 3}; + float RExpected[SIZE * SIZE] = {0, 1, 2, 3}; + naive_fixed_qr_decomp(A, Q, R); + no_opt_naive_fixed_qr_decomp(AExpected, QExpected, RExpected); + for (int i = 0; i < SIZE * SIZE; i++) { + printf("Expected Q: %f\n", QExpected[i]); + printf("Actual Q: %f\n", Q[i]); + assert(fabsf(QExpected[i] - Q[i]) < DELTA); + } + for (int i = 0; i < SIZE * SIZE; i++) { + printf("Expected R: %f\n", RExpected[i]); + printf("Actual R: %f\n", R[i]); + assert(fabsf(RExpected[i] - R[i]) < DELTA); + } + for (int i = 0; i < SIZE * SIZE; i++) { + printf("Expected A: %f\n", AExpected[i]); + printf("Actual A: %f\n", A[i]); + assert(fabsf(AExpected[i] - A[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/if-else-new.c b/src/dios-egraphs/Diospyros/c-tests/if-else.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/if-else-new.c rename to src/dios-egraphs/Diospyros/c-tests/if-else.c diff --git a/src/dios-egraphs/Diospyros/c-tests/inline-float.c b/src/dios-egraphs/Diospyros/c-tests/inline-float.c new file mode 100644 index 00000000..2b0245d8 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/inline-float.c @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + return sum; +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = no_opt_test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +float test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + return sum; +} + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +int main() { + float A[SIZE] = {1.0f}; + float expectedA[SIZE] = {1.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = 1.0f; + expectedA[i] = 1.0f; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + B[i] = 0.0f; + expectedB[i] = 0.0f; + } + float C[SIZE] = {0.0f}; + float expectedC[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + C[i] = 0.0f; + expectedC[i] = 0.0f; + } + test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("C Output: %f\n", C[i]); + printf("Expected C Output: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/inline-void.c b/src/dios-egraphs/Diospyros/c-tests/inline-void.c new file mode 100644 index 00000000..71e7aa89 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/inline-void.c @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +void test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +void no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } +} + +void no_opt_test(float A[SIZE], float B[SIZE]) { + no_opt_test_inline(A, B, SIZE); +} + +void test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } +} + +void test(float A[SIZE], float B[SIZE]) { test_inline(A, B, SIZE); } + +int main() { + float A[SIZE] = {1.0f}; + float expectedA[SIZE] = {1.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = 1.0f; + expectedA[i] = 1.0f; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + B[i] = 0.0f; + expectedB[i] = 0.0f; + } + test(A, B); + no_opt_test(expectedA, expectedB); + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/load_reuse.c b/src/dios-egraphs/Diospyros/c-tests/load_reuse.c new file mode 100644 index 00000000..3a447db4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/load_reuse.c @@ -0,0 +1,81 @@ +#include +#include + +#define I_ROWS 2 +#define I_COLS 2 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void load_use_twice(float mat_in[I_ROWS][I_COLS], float f_in[F_ROWS][F_COLS], + float mat_out[O_ROWS][O_COLS], + float mat_out2[O_ROWS][O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + mat_out[outRow][outCol] += + 3 * v - + 4; // try something to use v in a different way + mat_out2[outRow][outCol] += + 2 * v + + 1; // try something to use v in a different way + } + } + } + } + } +} + +int main(void) { + float mat_in[I_ROWS][I_COLS] = {{1, 2}, {3, 4}}; + float f_in[F_ROWS][F_COLS] = {{1, 1}, {1, 1}}; + float mat_out1[O_ROWS][O_COLS] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; + float mat_out2[O_ROWS][O_COLS] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; + load_use_twice(mat_in, f_in, mat_out1, mat_out2); + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + printf("output: %f\n", mat_out1[i][j]); + printf("output: %f\n", mat_out2[i][j]); + } + } + float output1[O_ROWS][O_COLS] = {{-1, 1, 2}, {4, 14, 10}, {5, 13, 8}}; + float output2[O_ROWS][O_COLS] = {{3, 8, 5}, {10, 24, 14}, {7, 16, 9}}; + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + assert(output1[i][j] == mat_out1[i][j]); + assert(output2[i][j] == mat_out2[i][j]); + } + } +// output: -1.000000 +// output: 3.000000 +// output: 1.000000 +// output: 8.000000 +// output: 2.000000 +// output: 5.000000 + +// output: 4.000000 +// output: 10.000000 +// output: 14.000000 +// output: 24.000000 +// output: 10.000000 +// output: 14.000000 + +// output: 5.000000 +// output: 7.000000 +// output: 13.000000 +// output: 16.000000 +// output: 8.000000 +// output: 9.000000 + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/local-array-1.c b/src/dios-egraphs/Diospyros/c-tests/local-array-1.c new file mode 100644 index 00000000..1622fb14 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/local-array-1.c @@ -0,0 +1,35 @@ +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +void test(float A[SIZE]) { + float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = x[i]; + } +} + +void no_opt_test(float A[SIZE]) { + float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = x[i]; + } +} + +int main() { + float A[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + float expectedA[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + test(A); + no_opt_test(expectedA); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("expected: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } + return 0; +} diff --git a/src/dios-egraphs/Diospyros/c-tests/local-array-2.c b/src/dios-egraphs/Diospyros/c-tests/local-array-2.c new file mode 100644 index 00000000..b7be1f82 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/local-array-2.c @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] += x[i]; + } + for (int i = 0; i < SIZE; i++) { + C[i] += A[i]; + } + for (int i = 0; i < SIZE; i++) { + B[i] -= x[i]; + } + for (int i = 0; i < SIZE; i++) { + C[i] += B[i]; + } +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float x[SIZE] = {[0 ... SIZE - 1] = 3.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] += x[i]; + } + for (int i = 0; i < SIZE; i++) { + C[i] += A[i]; + } + for (int i = 0; i < SIZE; i++) { + B[i] -= x[i]; + } + for (int i = 0; i < SIZE; i++) { + C[i] += B[i]; + } +} + +int main() { + float A[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + float B[SIZE] = {[0 ... SIZE - 1] = 2.0f}; + float C[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float expectedA[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + float expectedB[SIZE] = {[0 ... SIZE - 1] = 2.0f}; + float expectedC[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("expected: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("expected: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("C Output: %f\n", C[i]); + printf("expected: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} diff --git a/src/dios-egraphs/Diospyros/c-tests/local-array-3.c b/src/dios-egraphs/Diospyros/c-tests/local-array-3.c new file mode 100644 index 00000000..3c334c5e --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/local-array-3.c @@ -0,0 +1,59 @@ +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +#define SIZE 10 + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = (float)i; + } + C[i] = A[i] + x[i]; + } + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = (float)i; + } + C[i] = B[i] - x[i]; + } +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = (float)i; + } + C[i] = A[i] + x[i]; + } + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = (float)i; + } + C[i] = B[i] - x[i]; + } +} + +int main() { + float A[SIZE] = {[0 ... SIZE - 1] = 1.0f}; + float B[SIZE] = {[0 ... SIZE - 1] = 2.0f}; + float C[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float expectedC[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + test(A, B, C); + no_opt_test(A, B, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("C Output: %f\n", C[i]); + printf("expected: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} diff --git a/src/dios-egraphs/Diospyros/c-tests/local-array-4.c b/src/dios-egraphs/Diospyros/c-tests/local-array-4.c new file mode 100644 index 00000000..27a65675 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/local-array-4.c @@ -0,0 +1,55 @@ +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +void test(float A[SIZE]) { + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int j = 0; j < SIZE; j++) { + x[j] = 1.0f; + } + float sum = 0.0f; + for (int j = 0; j < SIZE; j++) { + sum += x[j]; + } + A[i] = sum; + } +} + +void no_opt_test(float A[SIZE]) { + for (int i = 0; i < SIZE; i++) { + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int j = 0; j < SIZE; j++) { + x[j] = 1.0f; + } + float sum = 0.0f; + for (int j = 0; j < SIZE; j++) { + sum += x[j]; + } + A[i] = sum; + } +} + +int main() { + float A[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = (float)i; + } + float expectedA[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + expectedA[i] = (float)i; + } + test(A); + no_opt_test(expectedA); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("expected: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } + return 0; +} diff --git a/src/dios-egraphs/Diospyros/c-tests/local-var.c b/src/dios-egraphs/Diospyros/c-tests/local-var.c new file mode 100644 index 00000000..45e2c951 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/local-var.c @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + float local; + if (i % 3 == 0) { + local = a_in[i] + b_in[i]; + } else { + local = a_in[i] + c_in[i]; + } + d_out[i] = local * 2.0f + 3.5f; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + float local; + if (i % 3 == 0) { + local = a_in[i] + b_in[i]; + } else { + local = a_in[i] + c_in[i]; + } + d_out[i] = local * 2.0f + 3.5f; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/loop-inline.c b/src/dios-egraphs/Diospyros/c-tests/loop-inline.c new file mode 100644 index 00000000..5a817002 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/loop-inline.c @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float x[SIZE], float A[SIZE]) { + for (int k = 0; k < SIZE; k++) { + float alpha = -sgn(x[k]) * naive_norm(x, k); + A[k] = alpha; + } +} + +void no_opt_sample_test(float x[SIZE], float A[SIZE]) { + for (int k = 0; k < SIZE; k++) { + float alpha = -no_opt_sgn(x[k]) * no_opt_naive_norm(x, k); + A[k] = alpha; + } +} + +int main(void) { + float x[SIZE] = {1, -1, 2, 3, 5}; + float A[SIZE] = {0}; + sample_test(x, A); + float expectedA[SIZE] = {0}; + no_opt_sample_test(x, expectedA); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mac_new.c b/src/dios-egraphs/Diospyros/c-tests/mac.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/mac_new.c rename to src/dios-egraphs/Diospyros/c-tests/mac.c diff --git a/src/dios-egraphs/Diospyros/c-tests/malloc.c b/src/dios-egraphs/Diospyros/c-tests/malloc.c new file mode 100644 index 00000000..fb91400c --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/malloc.c @@ -0,0 +1,30 @@ +#include +#include +#define SIZE 4 + +void malloc_func(int m, float q_out[SIZE][SIZE]) { + float *q_min = (float *)malloc(sizeof(float) * m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + q_min[i * m + j] = 10.0f; + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + q_out[i][j] = q_min[i * m + j]; + } + } +} + +int main(int argc, char **argv) { + float q_out[SIZE][SIZE] = { + {1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}; + malloc_func(SIZE, q_out); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("q_out: %f\n", q_out[i][j]); + assert(q_out[i][j] == 10); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mat_mul_new.c b/src/dios-egraphs/Diospyros/c-tests/mat_mul.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/mat_mul_new.c rename to src/dios-egraphs/Diospyros/c-tests/mat_mul.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply-new.c b/src/dios-egraphs/Diospyros/c-tests/matrix-multiply.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply-new.c rename to src/dios-egraphs/Diospyros/c-tests/matrix-multiply.c diff --git a/src/dios-egraphs/Diospyros/c-tests/matrix-scalar-product.c b/src/dios-egraphs/Diospyros/c-tests/matrix-scalar-product.c new file mode 100644 index 00000000..3fa98354 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/matrix-scalar-product.c @@ -0,0 +1,65 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 3 +#define A_COLS 3 +#define B_COLS 3 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b, + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = a_in[i][j] * b; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], float b, + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = a_in[i][j] * b; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + float c_out[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b, c_out); + no_opt_matrix_multiply(a_in, b, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/matrix-vector-product.c b/src/dios-egraphs/Diospyros/c-tests/matrix-vector-product.c new file mode 100644 index 00000000..1a257e74 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/matrix-vector-product.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 3 +#define A_COLS 3 +#define B_COLS 1 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_COLS][B_COLS]; + for (int i = 0; i < A_COLS; i++) { + for (int j = 0; j < B_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mixed_new.c b/src/dios-egraphs/Diospyros/c-tests/mixed.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/mixed_new.c rename to src/dios-egraphs/Diospyros/c-tests/mixed.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mult_new.c b/src/dios-egraphs/Diospyros/c-tests/mult.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/mult_new.c rename to src/dios-egraphs/Diospyros/c-tests/mult.c diff --git a/src/dios-egraphs/Diospyros/c-tests/multi-loops.c b/src/dios-egraphs/Diospyros/c-tests/multi-loops.c new file mode 100644 index 00000000..e4790b9d --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/multi-loops.c @@ -0,0 +1,88 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] * b_in[i]; + } else { + d_out[i] = a_in[i] * c_in[i]; + } + } + for (int i = 0; i < ROWS; i++) { + if (i % 5 == 0) { + d_out[i] = a_in[i] - b_in[i]; + } else { + d_out[i] = a_in[i] + c_in[i]; + } + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i % 3 == 0) { + d_out[i] = a_in[i] * b_in[i]; + } else { + d_out[i] = a_in[i] * c_in[i]; + } + } + for (int i = 0; i < ROWS; i++) { + if (i % 5 == 0) { + d_out[i] = a_in[i] - b_in[i]; + } else { + d_out[i] = a_in[i] + c_in[i]; + } + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/multi-mat-mul.c b/src/dios-egraphs/Diospyros/c-tests/multi-mat-mul.c similarity index 90% rename from src/dios-egraphs/Diospyros/llvm-tests/multi-mat-mul.c rename to src/dios-egraphs/Diospyros/c-tests/multi-mat-mul.c index 973ef8f9..af5da2f1 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/multi-mat-mul.c +++ b/src/dios-egraphs/Diospyros/c-tests/multi-mat-mul.c @@ -1,3 +1,4 @@ +#include #include #define ROWS 3 #define COLS 3 @@ -28,8 +29,10 @@ int main(void) { float c_in[ROWS * COLS] = {9, 8, 7, 6, 5, 4, 3, 2, 1}; float d_out[ROWS * COLS] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; multimatrix_multiply(a_in, b_in, c_in, d_out); + float expected[ROWS * COLS] = {160, 200, 240, 100, 125, 150, 40, 50, 60}; for (int i = 0; i < ROWS * COLS; i++) { printf("output: %f\n", d_out[i]); + assert(expected[i] == d_out[i]); } // output: 160.000000 // output: 200.000000 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds_new.c b/src/dios-egraphs/Diospyros/c-tests/multiple_adds.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/multiple_adds_new.c rename to src/dios-egraphs/Diospyros/c-tests/multiple_adds.c diff --git a/src/dios-egraphs/Diospyros/c-tests/naive-norm-inline.c b/src/dios-egraphs/Diospyros/c-tests/naive-norm-inline.c new file mode 100644 index 00000000..66605c05 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/naive-norm-inline.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float B[SIZE - 1]) { + B[0] = naive_norm(A, SIZE); +} + +void no_opt_sample_test(float A[SIZE], float B[SIZE - 1]) { + B[0] = no_opt_naive_norm(A, SIZE); +} + +int main(void) { + float A[SIZE] = {1, 2, 3, 4, 5}; + float B[SIZE - 1] = {0}; + sample_test(A, B); + float expectedA[SIZE] = {1, 2, 3, 4, 5}; + float expectedB[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedB); + for (int i = 0; i < SIZE - 1; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/naive-norm.c b/src/dios-egraphs/Diospyros/c-tests/naive-norm.c new file mode 100644 index 00000000..1727c165 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/naive-norm.c @@ -0,0 +1,43 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +float naive_norm(float x[SIZE], int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float x[SIZE], int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +int main() { + float x[SIZE] = {1.0f}; + for (int i = 0; i < SIZE; i++) { + if (i % 2 == 0) { + x[i] = 1.0f; + } else { + x[i] = 0.0f; + } + } + float calculated = naive_norm(x, SIZE); + float expected = no_opt_naive_norm(x, SIZE); + printf("Calculated of Naive L2 Norm: %f\n", calculated); + printf("Expected of Naive L2 Norm: %f\n", expected); + assert(fabs(expected - calculated) < DELTA); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/nested-inline.c b/src/dios-egraphs/Diospyros/c-tests/nested-inline.c new file mode 100644 index 00000000..02650d06 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/nested-inline.c @@ -0,0 +1,105 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float nested_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float no_opt_nested_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = -1 * A[i]; + } + float prod = 0.0f; + for (int i = 0; i < n; i++) { + prod *= B[i]; + } + return prod; +} + +float no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + float prod = no_opt_nested_inline(A, B, n); + return prod - sum; +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = no_opt_test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +float nested_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = -1 * A[i]; + } + float prod = 0.0f; + for (int i = 0; i < n; i++) { + prod *= B[i]; + } + return prod; +} + +float test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + float prod = nested_inline(A, B, n); + return prod - sum; +} + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +int main() { + float A[SIZE] = {0.0f}; + float expectedA[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + A[i] = 1.0f; + expectedA[i] = 1.0f; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + B[i] = -1.0f; + expectedB[i] = -1.0f; + } + float C[SIZE] = {0.0f}; + float expectedC[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + C[i] = 0.0f; + expectedC[i] = 0.0f; + } + test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("Calculated C Output: %f\n", C[i]); + printf("Expected C Output: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/nested-loops.c b/src/dios-egraphs/Diospyros/c-tests/nested-loops.c new file mode 100644 index 00000000..a58a4d7e --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/nested-loops.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + for (int j = 0; j < ROWS; j++) { + d_out[i] = a_in[j] * b_in[j] * c_in[i]; + } + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + for (int j = 0; j < ROWS; j++) { + d_out[i] = a_in[j] * b_in[j] * c_in[i]; + } + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/out_of_order_new.c b/src/dios-egraphs/Diospyros/c-tests/out_of_order.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/out_of_order_new.c rename to src/dios-egraphs/Diospyros/c-tests/out_of_order.c diff --git a/src/dios-egraphs/Diospyros/c-tests/overwrite.c b/src/dios-egraphs/Diospyros/c-tests/overwrite.c new file mode 100644 index 00000000..98236fbf --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/overwrite.c @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 3 +#define A_COLS 3 +#define B_COLS 3 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = 3.5f; + } + } + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = 3.5f; + } + } + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/permuted.c b/src/dios-egraphs/Diospyros/c-tests/permuted.c new file mode 100644 index 00000000..ca9f879e --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/permuted.c @@ -0,0 +1,27 @@ +#include +#include +#define SIZE 4 + +void permuted(float a_in[restrict SIZE], float b_in[restrict SIZE], + float c_out[restrict SIZE]) { + c_out[1] = a_in[2] + b_in[1]; + c_out[0] = a_in[1] + b_in[0]; + c_out[3] = a_in[3] + b_in[2]; + c_out[2] = a_in[0] + b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 3, 4}; + float b_in[SIZE] = {5, 6, 7, 8}; + float c_out[SIZE]; + permuted(a_in, b_in, c_out); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + assert(c_out[0] == 7); + assert(c_out[1] == 9); + assert(c_out[2] == 9); + assert(c_out[3] == 11); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/point-product.c b/src/dios-egraphs/Diospyros/c-tests/point-product.c similarity index 89% rename from src/dios-egraphs/Diospyros/llvm-tests/point-product.c rename to src/dios-egraphs/Diospyros/c-tests/point-product.c index 48698dd1..1d1b41e5 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/point-product.c +++ b/src/dios-egraphs/Diospyros/c-tests/point-product.c @@ -1,3 +1,4 @@ +#include #include void cross_product(float lhs[3], float rhs[3], float result[3]) @@ -38,7 +39,9 @@ int main(void) { float p_in[4] = {0, 1, 2, 3}; float result_out[4] = {0, 0, 0, 0}; point_product(q_in, p_in, result_out); - for (int i = 0; i < 3; i++) { + float expected[4] = {0, 1, 2, 0}; + for (int i = 0; i < 4; i++) { printf("%f\n", result_out[i]); + assert(expected[i] == result_out[i]); } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/q-prod.c b/src/dios-egraphs/Diospyros/c-tests/q-prod.c similarity index 91% rename from src/dios-egraphs/Diospyros/llvm-tests/q-prod.c rename to src/dios-egraphs/Diospyros/c-tests/q-prod.c index c795c3ac..2ad30a6a 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/q-prod.c +++ b/src/dios-egraphs/Diospyros/c-tests/q-prod.c @@ -3,6 +3,7 @@ #include #include #include +#include #define SIZE 4 @@ -58,10 +59,14 @@ int main(void) { float r_q[SIZE] = {0, 0, 0, 0}; float r_t[SIZE] = {0, 0, 0, 0}; naive_quaternion_product(a_q, a_t, b_q, b_t, r_q, r_t); + float expectedq[SIZE] = {0, 0, 0, 0}; for (int i = 0; i < SIZE; i++) { printf("%f\n", r_q[i]); + assert(expectedq[i] == r_q[i]); } + float expectedt[SIZE] = {2, 4, 6, 0}; for (int i = 0; i < SIZE; i++) { printf("%f\n", r_t[i]); + assert(expectedt[i] == r_t[i]); } } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-fixed-size-debug-template.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-fixed-size-debug-template.c new file mode 100644 index 00000000..a5c7f2f4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-fixed-size-debug-template.c @@ -0,0 +1,256 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], + float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1, 2, 3, 4}; + float Q[SIZE * SIZE] = {0, 0, 0, 0}; + float R[SIZE * SIZE] = {0, 0, 0, 0}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0, 0, 0, 0}; + float expectedR[SIZE * SIZE] = {0, 0, 0, 0}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(Q[i * SIZE + j] == expectedQ[i * SIZE + j]); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(R[i * SIZE + j] == expectedR[i * SIZE + j]); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-fixed-size.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-fixed-size.c new file mode 100644 index 00000000..199a5729 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-fixed-size.c @@ -0,0 +1,267 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 4 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float *a) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float *a) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float *a, float *b, float *c) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0.0f; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float *a, float *b, float *c) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0.0f; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float *A, float *Q, float *R) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(m, sizeof(float)); + float *e = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(m, sizeof(float)); + float *v = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(m * m, sizeof(float)); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(SIZE * SIZE, sizeof(float)); + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float *A, float *Q, float *R) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(m, sizeof(float)); + float *e = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(m, sizeof(float)); + float *v = (float *)calloc(m, sizeof(float)); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(m * m, sizeof(float)); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(SIZE * SIZE, sizeof(float)); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(SIZE * SIZE, sizeof(float)); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float expectedQ[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + + naive_fixed_qr_decomp(A, Q, R); + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-local-arrays.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-local-arrays.c new file mode 100644 index 00000000..430d8af1 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-local-arrays.c @@ -0,0 +1,311 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + float I[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float e[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + float u[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float v[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + float q_min[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + float q_t[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + float res[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + printf("%f\n", A[i]); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-modified-no-local-array.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-modified-no-local-array.c new file mode 100644 index 00000000..6f030330 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-modified-no-local-array.c @@ -0,0 +1,184 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], + float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // No Calloc is used here. + float I[SIZE * SIZE] = {0}; + // float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + float q_t[SIZE * SIZE] = {1}; + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float res[SIZE * SIZE] = {0}; + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + float I[SIZE * SIZE] = {0}; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + float q_t[SIZE * SIZE] = {1}; + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float res[SIZE * SIZE] = {0}; + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + } + naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1, 2, 3, 4}; + float Q[SIZE * SIZE] = {0, 0, 0, 0}; + float R[SIZE * SIZE] = {0, 0, 0, 0}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0, 0, 0, 0}; + float expectedR[SIZE * SIZE] = {0, 0, 0, 0}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-no-local-array.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-no-local-array.c new file mode 100644 index 00000000..44b534ee --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-no-local-array.c @@ -0,0 +1,304 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], float u[SIZE], + float v[SIZE], float q_min[SIZE * SIZE], + float q_t[SIZE * SIZE], float res[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1, 2, 3, 4}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float I[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float u[SIZE] = {0.0f}; + float v[SIZE] = {0.0f}; + float q_min[SIZE * SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + float res[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, I, x, e, u, v, q_min, q_t, res); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-0.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-0.c new file mode 100644 index 00000000..f43090bb --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-0.c @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +// float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + A[k] = alpha; + } +} + +void no_opt_sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + A[k] = alpha; + } +} + +int main(void) { + float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; + sample_test(A, x, e); + float expectedA[SIZE] = {0}; + float expectedx[SIZE] = {0}; + float expectede[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedx, expectede); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + printf("X Output: %f\n", x[i]); + printf("Expected X Output: %f\n", expectedx[i]); + printf("E Output: %f\n", e[i]); + printf("Expected E Output: %f\n", expectede[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + assert(fabs(expectedx[i] - x[i]) < DELTA); + assert(fabs(expectede[i] - e[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-1.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-1.c new file mode 100644 index 00000000..8c131bfb --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-1.c @@ -0,0 +1,99 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +// float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // float x[SIZE]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + // float e[SIZE]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + A[k] = alpha; + } +} + +void no_opt_sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // float x[SIZE]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + // float e[SIZE]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + A[k] = alpha; + } +} + +int main(void) { + float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; + sample_test(A, x, e); + float expectedA[SIZE] = {0}; + float expectedx[SIZE] = {0}; + float expectede[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedx, expectede); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + printf("X Output: %f\n", x[i]); + printf("Expected X Output: %f\n", expectedx[i]); + printf("E Output: %f\n", e[i]); + printf("Expected E Output: %f\n", expectede[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + assert(fabs(expectedx[i] - x[i]) < DELTA); + assert(fabs(expectede[i] - e[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-2.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-2.c new file mode 100644 index 00000000..367d75a4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-2.c @@ -0,0 +1,92 @@ +// Modification of test 1, with dynamically sized arrays. + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + A[k] = alpha; + } +} + +void no_opt_sample_test(float A[SIZE], float x[SIZE], float e[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + A[k] = alpha; + } +} + +int main(void) { + float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; + sample_test(A, x, e); + float expectedA[SIZE] = {0}; + float expectedX[SIZE] = {0}; + float expectedE[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedX, expectedE); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-3.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-3.c new file mode 100644 index 00000000..ebe988ae --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-3.c @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float x[SIZE], float e[SIZE], + float Q[SIZE * SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m) * naive_norm(e, m); + A[k] = alpha; + + // float q_t[SIZE * SIZE] = {alpha}; + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = alpha; + } + } + } +} + +void no_opt_sample_test(float A[SIZE], float x[SIZE], float e[SIZE], + float Q[SIZE * SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m) * + no_opt_naive_norm(e, m); + A[k] = alpha; + + // float q_t[SIZE * SIZE] = {alpha}; + + // for (int i = 0; i < SIZE * SIZE; i++) { + // Q[i] = q_t[i]; + // } + + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = alpha; + } + } + } +} + +int main(void) { + float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; + float Q[SIZE * SIZE] = {0}; + sample_test(A, x, e, Q); + float expectedA[SIZE] = {0}; + float expectedX[SIZE] = {0}; + float expectedE[SIZE] = {0}; + float expectedQ[SIZE * SIZE] = {0}; + no_opt_sample_test(expectedA, expectedX, expectedE, expectedQ); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + } + + for (int i = 0; i < SIZE * SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-1-linear-array.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-1-linear-array.c new file mode 100644 index 00000000..97726430 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-1-linear-array.c @@ -0,0 +1,201 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 1 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +int main(void) { + float A[SIZE] = {1.0f}; + float Q[SIZE] = {0.0f}; + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE] = {0.0f}; + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-1.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-1.c new file mode 100644 index 00000000..b0fc07ae --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-1.c @@ -0,0 +1,210 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 1 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1.0f}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float i[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + float expectedi[SIZE * SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-2-2.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-2-2.c new file mode 100644 index 00000000..63ea53cd --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-2-2.c @@ -0,0 +1,217 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1.0f, 2.0f, 3.0f, 4.0f}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {1.0f}; + float i[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {1.0f}; + float expectedi[SIZE * SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q_T Output: %f\n", q_t[i * SIZE + j]); + printf("Expected Q_T Output: %f\n", expectedq_t[i * SIZE + j]); + assert(fabs(expectedq_t[i] - q_t[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-2-linear-array.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-2-linear-array.c new file mode 100644 index 00000000..fc1db5ba --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-2-linear-array.c @@ -0,0 +1,201 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +int main(void) { + float A[SIZE] = {1.0f, 2.0f}; + float Q[SIZE] = {0.0f}; + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE] = {0.0f}; + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-2.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-2.c new file mode 100644 index 00000000..4898553e --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-2.c @@ -0,0 +1,206 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE], float b[SIZE], + float c[SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE * SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE * SIZE], float x[SIZE], + float e[SIZE], float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1.0f, 2.0f, 3.0f, 4.0f}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float i[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + float expectedi[SIZE * SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-3-linear-array.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-3-linear-array.c new file mode 100644 index 00000000..4a9b7256 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-3-linear-array.c @@ -0,0 +1,201 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 3 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +int main(void) { + float A[SIZE] = {1.0f, 2.0f, 3.0f}; + float Q[SIZE] = {0.0f}; + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE] = {0.0f}; + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-3.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-3.c new file mode 100644 index 00000000..cdb08413 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-3.c @@ -0,0 +1,209 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 3 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE * SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE * SIZE], float x[SIZE], + float e[SIZE], float q_t[SIZE * SIZE]) { + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + float A[SIZE * SIZE] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, 9.0f}; + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float i[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + float expectedi[SIZE * SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-4-linear-array.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-4-linear-array.c new file mode 100644 index 00000000..5dc845dc --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-4-linear-array.c @@ -0,0 +1,137 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 4 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return v; } + +float no_opt_sgn(float v) { return v; } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void naive_fixed_qr_decomp(float Q[SIZE], float x[SIZE], float q_t[SIZE]) { + // for (int i = 0; i < SIZE; i++) { + // R[i] = A[i]; + // } + + // for (int i = 0; i < SIZE; i++) { + // I[i] = 1.0f; + // } + + // Householder + // for (int k = 0; k < SIZE - 1; k++) { + // int k = 0; + // int m = SIZE - k; + + float alpha = -sgn(x[0]); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + // if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + // } + // } +} + +void no_opt_naive_fixed_qr_decomp(float Q[SIZE], float x[SIZE], + float q_t[SIZE]) { + // for (int i = 0; i < SIZE; i++) { + // R[i] = A[i]; + // } + + // for (int i = 0; i < SIZE; i++) { + // I[i] = 1.0f; + // } + + // Householder + // for (int k = 0; k < SIZE - 1; k++) { + // int k = 0; + // int m = SIZE - k; + + float alpha = -no_opt_sgn(x[0]); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + // if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + // } + // } +} + +int main(void) { + float A[SIZE] = {1.1f, 2.1f, 3.1f, 4.1f}; + float Q[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + Q[i] = 0.0f; + } + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {1.2f, 1.3f, 1.4f, 1.5f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(Q, x, q_t); + float expectedQ[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + expectedQ[i] = 0.0f; + } + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {1.2f, 1.3f, 1.4f, 1.5f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(expectedQ, expectedx, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-5-linear-array.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-5-linear-array.c new file mode 100644 index 00000000..de55e82a --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp-test-4-5-linear-array.c @@ -0,0 +1,201 @@ +// Here I remove the need for using calloc and free by preallocating larger +// arrays I want to eliminate any sources of error first. +// I also remove all references to memcpy as well +// This is to isolate the error so that it is not because of an externally +// linked program + +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE], float Q[SIZE], float R[SIZE], + float I[SIZE], float x[SIZE], float e[SIZE], + float q_t[SIZE]) { + for (int i = 0; i < SIZE; i++) { + R[i] = A[i]; + } + + for (int i = 0; i < SIZE; i++) { + I[i] = 1.0f; + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + for (int i = 0; i < SIZE; i++) { + q_t[i] = alpha; + } + if (k == 0) { + for (int i = 0; i < SIZE; i++) { + Q[i] = q_t[i]; + } + } + } +} + +int main(void) { + float A[SIZE] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; + float Q[SIZE] = {0.0f}; + float R[SIZE] = {1.0f}; + float i[SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float q_t[SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, i, x, e, q_t); + float expectedQ[SIZE] = {0.0f}; + float expectedR[SIZE] = {1.0f}; + float expectedi[SIZE] = {0.0f}; + float expectedx[SIZE] = {0.0f}; + float expectede[SIZE] = {0.0f}; + float expectedq_t[SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR, expectedi, expectedx, + expectede, expectedq_t); + + for (int i = 0; i < SIZE; i++) { + printf("Q Output: %f\n", Q[i]); + printf("Expected Q Output: %f\n", expectedQ[i]); + assert(fabs(Q[i] - expectedQ[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("R Output: %f\n", R[i]); + printf("Expected R Output: %f\n", expectedR[i]); + assert(fabs(R[i] - expectedR[i]) < DELTA); + } + for (int i = 0; i < SIZE; i++) { + printf("Q_T Output: %f\n", q_t[i]); + printf("Expected Q_T Output: %f\n", expectedq_t[i]); + assert(fabs(q_t[i] - expectedq_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/qr-decomp.c b/src/dios-egraphs/Diospyros/c-tests/qr-decomp.c new file mode 100644 index 00000000..16d181f0 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/qr-decomp.c @@ -0,0 +1,268 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_transpose(float *a, int n) __attribute__((always_inline)); +void naive_matrix_multiply(float *a, float *b, float *c, int row1, int col1, + int col2) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +// Naive implementation +void naive_transpose(float *a, int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} + +void no_opt_naive_transpose(float *a, int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void naive_matrix_multiply(float *a, float *b, float *c, int row1, int col1, + int col2) { + for (int y = 0; y < row1; y++) { + for (int x = 0; x < col2; x++) { + c[col2 * y + x] = 0.0f; + for (int k = 0; k < col1; k++) { + c[col2 * y + x] += a[col1 * y + k] * b[col2 * k + x]; + } + } + } +} + +void no_opt_naive_matrix_multiply(float *a, float *b, float *c, int row1, int col1, + int col2) { + for (int y = 0; y < row1; y++) { + for (int x = 0; x < col2; x++) { + c[col2 * y + x] = 0.0f; + for (int k = 0; k < col1; k++) { + c[col2 * y + x] += a[col1 * y + k] * b[col2 * k + x]; + } + } + } +} + +void naive_qr_decomp(float *A, float *Q, float *R, int n) { + memcpy(R, A, sizeof(float) * n * n); + + // Build identity matrix of size n * n + float *I = (float *)calloc(sizeof(float), n * n); + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + I[i * n + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < n - 1; k++) { + int m = n - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * n + k]; + e[i] = I[row * n + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / norm_u; + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), n * n); + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * n + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * n * n); // Q = q_t + naive_matrix_multiply(q_t, A, R, n, n, n); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), n * n); + naive_matrix_multiply(q_t, Q, res, n, n, n); // R = q_t * A + memcpy(Q, res, sizeof(float) * n * n); + naive_matrix_multiply(q_t, R, res, n, n, n); // R = q_t * A + memcpy(R, res, sizeof(float) * n * n); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + naive_transpose(Q, n); +} + +void no_opt_naive_qr_decomp(float *A, float *Q, float *R, int n) { + memcpy(R, A, sizeof(float) * n * n); + + // Build identity matrix of size n * n + float *I = (float *)calloc(sizeof(float), n * n); + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + I[i * n + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < n - 1; k++) { + int m = n - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * n + k]; + e[i] = I[row * n + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / norm_u; + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), n * n); + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * n + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * n * n); // Q = q_t + no_opt_naive_matrix_multiply(q_t, A, R, n, n, n); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), n * n); + no_opt_naive_matrix_multiply(q_t, Q, res, n, n, n); // R = q_t * A + memcpy(Q, res, sizeof(float) * n * n); + no_opt_naive_matrix_multiply(q_t, R, res, n, n, n); // R = q_t * A + memcpy(R, res, sizeof(float) * n * n); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_transpose(Q, n); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + printf("%f\n", A[i]); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + naive_qr_decomp(A, Q, R, SIZE); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_qr_decomp(A, expectedQ, expectedR, SIZE); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-1-by-1-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/random-1-by-1-matrix-multiply.c new file mode 100644 index 00000000..10ede436 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-1-by-1-matrix-multiply.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 1 +#define A_COLS 1 +#define B_COLS 1 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-2-by-2-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/random-2-by-2-matrix-multiply.c new file mode 100644 index 00000000..f1ef604a --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-2-by-2-matrix-multiply.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 2 +#define A_COLS 2 +#define B_COLS 2 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-2d-2d-conv.c b/src/dios-egraphs/Diospyros/c-tests/random-2d-2d-conv.c new file mode 100644 index 00000000..75605001 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-2d-2d-conv.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define I_ROWS 2 +#define I_COLS 2 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[restrict I_ROWS][I_COLS], + float f_in[restrict F_ROWS][F_COLS], + float mat_out[restrict O_ROWS][O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + mat_out[outRow][outCol] += v; + } + } + } + } + } +} + +void no_opt_convolution(float mat_in[restrict I_ROWS][I_COLS], + float f_in[restrict F_ROWS][F_COLS], + float mat_out[restrict O_ROWS][O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + mat_out[outRow][outCol] += v; + } + } + } + } + } +} + +int main(void) { + srand(1); // set seed + + float mat_in[I_ROWS][I_COLS]; + for (int i = 0; i < I_ROWS; i++) { + for (int j = 0; j < I_COLS; j++) { + mat_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float f_in[F_ROWS][F_COLS]; + for (int i = 0; i < F_ROWS; i++) { + for (int j = 0; j < F_COLS; j++) { + f_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float mat_out[O_ROWS][O_COLS]; + for (int i = 0; i < O_COLS; i++) { + for (int j = 0; j < O_COLS; j++) { + mat_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[O_ROWS][O_COLS]; + for (int i = 0; i < O_COLS; i++) { + for (int j = 0; j < O_COLS; j++) { + expected[i][j] = mat_out[i][j]; + } + } + + convolution(mat_in, f_in, mat_out); + no_opt_convolution(mat_in, f_in, expected); + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + printf("output: %f\n", mat_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(mat_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-2d-conv.c b/src/dios-egraphs/Diospyros/c-tests/random-2d-conv.c new file mode 100644 index 00000000..e706c392 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-2d-conv.c @@ -0,0 +1,92 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define I_ROWS 2 +#define I_COLS 2 +#define F_ROWS 2 +#define F_COLS 2 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +void no_opt_convolution(float mat_in[I_ROWS * I_COLS], + float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + srand(1); // set seed + + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = mat_out[i]; + } + + convolution(mat_in, f_in, mat_out); + no_opt_convolution(mat_in, f_in, expected); + for (int i = 0; i < O_ROWS * O_COLS; i++) { + printf("output: %f\n", mat_out[i]); + printf("expected: %f\n", expected[i]); + assert(mat_out[i] == expected[i]); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-3-by-3-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/random-3-by-3-matrix-multiply.c new file mode 100644 index 00000000..c83c6491 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-3-by-3-matrix-multiply.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 3 +#define A_COLS 3 +#define B_COLS 3 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-matrix-multiply-1.c b/src/dios-egraphs/Diospyros/c-tests/random-matrix-multiply-1.c new file mode 100644 index 00000000..d199fe28 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-matrix-multiply-1.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 4 +#define A_COLS 2 +#define B_COLS 4 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_COLS][B_COLS]; + for (int i = 0; i < A_COLS; i++) { + for (int j = 0; j < B_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-matrix-multiply-2.c b/src/dios-egraphs/Diospyros/c-tests/random-matrix-multiply-2.c new file mode 100644 index 00000000..f2d33210 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-matrix-multiply-2.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 6 +#define A_COLS 1 +#define B_COLS 5 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_COLS][B_COLS]; + for (int i = 0; i < A_COLS; i++) { + for (int j = 0; j < B_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-point-product.c b/src/dios-egraphs/Diospyros/c-tests/random-point-product.c new file mode 100644 index 00000000..8d386a23 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-point-product.c @@ -0,0 +1,107 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define SIZE 4 + +void cross_product(float lhs[3], float rhs[3], float result[3]) + __attribute__((always_inline)); + +void cross_product(float lhs[3], float rhs[3], float result[3]) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +void point_product(float q_in[4], float p_in[4], float result_out[4]) { + float qvec[3] = {q_in[0], q_in[1], q_in[2]}; + // qvec = {0, 1, 2} + + float uv[3]; + cross_product(qvec, p_in, uv); + // uv = {1 * 2 - 2 * 1, 2 * 0 - 0 * 2, 0 * 1 - 1 * 0} = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + // uv = {0, 0 , 0} + float qxuv[3]; + cross_product(qvec, uv, qxuv); + // qxuv = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + result_out[i] = p_in[i] + q_in[3] * uv[i] + qxuv[i]; + } +} + +// --- NO OPTS --- + +void no_opt_cross_product(float lhs[3], float rhs[3], float result[3]) + __attribute__((always_inline)); + +void no_opt_cross_product(float lhs[3], float rhs[3], float result[3]) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +void no_opt_point_product(float q_in[4], float p_in[4], float result_out[4]) { + float qvec[3] = {q_in[0], q_in[1], q_in[2]}; + // qvec = {0, 1, 2} + + float uv[3]; + no_opt_cross_product(qvec, p_in, uv); + // uv = {1 * 2 - 2 * 1, 2 * 0 - 0 * 2, 0 * 1 - 1 * 0} = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + // uv = {0, 0 , 0} + float qxuv[3]; + no_opt_cross_product(qvec, uv, qxuv); + // qxuv = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + result_out[i] = p_in[i] + q_in[3] * uv[i] + qxuv[i]; + } +} + +int main(void) { + srand(100); // set seed + + float q_in[SIZE]; + for (int i = 0; i < SIZE; i++) { + q_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float p_in[SIZE]; + for (int i = 0; i < SIZE; i++) { + p_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float result_out[SIZE]; + for (int i = 0; i < SIZE; i++) { + result_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[SIZE]; + for (int i = 0; i < SIZE; i++) { + expected[i] = result_out[i]; + } + point_product(q_in, p_in, result_out); + no_opt_point_product(q_in, p_in, expected); + for (int i = 0; i < SIZE; i++) { + printf("%f\n", result_out[i]); + printf("%f\n", expected[i]); + assert(expected[i] == result_out[i]); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-q-prod.c b/src/dios-egraphs/Diospyros/c-tests/random-q-prod.c new file mode 100644 index 00000000..b5c2fc0e --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-q-prod.c @@ -0,0 +1,149 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define SIZE 4 + +__attribute__((always_inline)) void naive_cross_product(float *lhs, float *rhs, + float *result) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +__attribute__((always_inline)) void naive_point_product(float *q, float *p, + float *result) { + float qvec[3] = {q[0], q[1], q[2]}; + float uv[3]; + naive_cross_product(qvec, p, uv); + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + float qxuv[3]; + naive_cross_product(qvec, uv, qxuv); + + for (int i = 0; i < 3; i++) { + result[i] = p[i] + q[3] * uv[i] + qxuv[i]; + } +} + +void naive_quaternion_product(float *a_q, float *a_t, float *b_q, float *b_t, + float *r_q, float *r_t) { + r_q[3] = + a_q[3] * b_q[3] - a_q[0] * b_q[0] - a_q[1] * b_q[1] - a_q[2] * b_q[2]; + r_q[0] = + a_q[3] * b_q[0] + a_q[0] * b_q[3] + a_q[1] * b_q[2] - a_q[2] * b_q[1]; + r_q[1] = + a_q[3] * b_q[1] + a_q[1] * b_q[3] + a_q[2] * b_q[0] - a_q[0] * b_q[2]; + r_q[2] = + a_q[3] * b_q[2] + a_q[2] * b_q[3] + a_q[0] * b_q[1] - a_q[1] * b_q[0]; + + naive_point_product(a_q, b_t, r_t); + for (int i = 0; i < 3; i++) { + r_t[i] += a_t[i]; + } +} + +// --- NO OPTS --- + +__attribute__((always_inline)) void no_opt_naive_cross_product(float *lhs, + float *rhs, + float *result) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +__attribute__((always_inline)) void no_opt_naive_point_product(float *q, + float *p, + float *result) { + float qvec[3] = {q[0], q[1], q[2]}; + float uv[3]; + no_opt_naive_cross_product(qvec, p, uv); + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + float qxuv[3]; + no_opt_naive_cross_product(qvec, uv, qxuv); + + for (int i = 0; i < 3; i++) { + result[i] = p[i] + q[3] * uv[i] + qxuv[i]; + } +} + +void no_opt_naive_quaternion_product(float *a_q, float *a_t, float *b_q, + float *b_t, float *r_q, float *r_t) { + r_q[3] = + a_q[3] * b_q[3] - a_q[0] * b_q[0] - a_q[1] * b_q[1] - a_q[2] * b_q[2]; + r_q[0] = + a_q[3] * b_q[0] + a_q[0] * b_q[3] + a_q[1] * b_q[2] - a_q[2] * b_q[1]; + r_q[1] = + a_q[3] * b_q[1] + a_q[1] * b_q[3] + a_q[2] * b_q[0] - a_q[0] * b_q[2]; + r_q[2] = + a_q[3] * b_q[2] + a_q[2] * b_q[3] + a_q[0] * b_q[1] - a_q[1] * b_q[0]; + + no_opt_naive_point_product(a_q, b_t, r_t); + for (int i = 0; i < 3; i++) { + r_t[i] += a_t[i]; + } +} + +int main(void) { + srand(1); // set seed + + float a_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + a_q[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float a_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + a_t[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float b_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + b_q[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float b_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + b_t[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float r_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + r_q[i] = 0.0f; + } + float r_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + r_t[i] = 0.0f; + } + float expectedq[SIZE]; + for (int i = 0; i < SIZE; i++) { + expectedq[i] = 0.0f; + } + float expectedt[SIZE]; + for (int i = 0; i < SIZE; i++) { + expectedt[i] = 0.0f; + } + naive_quaternion_product(a_q, a_t, b_q, b_t, r_q, r_t); + no_opt_naive_quaternion_product(a_q, a_t, b_q, b_t, expectedq, expectedt); + for (int i = 0; i < SIZE; i++) { + printf("%f\n", r_q[i]); + assert(expectedq[i] == r_q[i]); + } + for (int i = 0; i < SIZE; i++) { + printf("%f\n", r_t[i]); + assert(expectedt[i] == r_t[i]); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v2.c b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v2.c new file mode 100644 index 00000000..ae105af4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v2.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROW_SIZE 8 +#define COL_SIZE 7 +#define F_SIZE 4 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 2; k1++) { + for (int k2 = 0; k2 < 2; k2++) { + temp += filter_in[k1 * 2 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +void no_opt_stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], + float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 2; k1++) { + for (int k2 = 0; k2 < 2; k2++) { + temp += filter_in[k1 * 2 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + srand(1); // set seed + + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = sol_out[i]; + } + stencil(orig_in, sol_out, filter_in); + no_opt_stencil(orig_in, expected, filter_in); + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("%f\n", sol_out[i]); + printf("%f\n", expected[i]); + assert(expected[i] == sol_out[i]); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v3.c b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v3.c new file mode 100644 index 00000000..e8f15af3 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v3.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROW_SIZE 5 +#define COL_SIZE 5 +#define F_SIZE 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 2; k1++) { + for (int k2 = 0; k2 < 2; k2++) { + temp += filter_in[k1 * 2 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +void no_opt_stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], + float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 2; k1++) { + for (int k2 = 0; k2 < 2; k2++) { + temp += filter_in[k1 * 2 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + srand(1); // set seed + + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = sol_out[i]; + } + stencil(orig_in, sol_out, filter_in); + no_opt_stencil(orig_in, expected, filter_in); + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("%f\n", sol_out[i]); + printf("%f\n", expected[i]); + assert(expected[i] == sol_out[i]); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v4.c b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v4.c new file mode 100644 index 00000000..b8d2bae8 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v4.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROW_SIZE 2 +#define COL_SIZE 3 +#define F_SIZE 1 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 2; k1++) { + for (int k2 = 0; k2 < 2; k2++) { + temp += filter_in[k1 * 2 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +void no_opt_stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], + float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 2; k1++) { + for (int k2 = 0; k2 < 2; k2++) { + temp += filter_in[k1 * 2 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + srand(1); // set seed + + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = sol_out[i]; + } + stencil(orig_in, sol_out, filter_in); + no_opt_stencil(orig_in, expected, filter_in); + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("%f\n", sol_out[i]); + printf("%f\n", expected[i]); + assert(expected[i] == sol_out[i]); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v5.c b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v5.c new file mode 100644 index 00000000..335eb8e3 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d-v5.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROW_SIZE 1 +#define COL_SIZE 1 +#define F_SIZE 1 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +void no_opt_stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], + float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + srand(1); // set seed + + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = sol_out[i]; + } + stencil(orig_in, sol_out, filter_in); + no_opt_stencil(orig_in, expected, filter_in); + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("%f\n", sol_out[i]); + printf("%f\n", expected[i]); + assert(expected[i] == sol_out[i]); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d.c b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d.c new file mode 100644 index 00000000..279dfca1 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/random-stencil-2d.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#define ROW_SIZE 5 +#define COL_SIZE 7 +#define F_SIZE 9 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +void no_opt_stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], + float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + srand(1); // set seed + + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = sol_out[i]; + } + stencil(orig_in, sol_out, filter_in); + no_opt_stencil(orig_in, expected, filter_in); + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("%f\n", sol_out[i]); + printf("%f\n", expected[i]); + assert(expected[i] == sol_out[i]); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/rar-dependency.c b/src/dios-egraphs/Diospyros/c-tests/rar-dependency.c new file mode 100644 index 00000000..f43e02b3 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/rar-dependency.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + d_out[0] = a_in[0] + a_in[0] * c_in[0]; + d_out[1] = d_out[0] - c_in[0]; + d_out[2] = a_in[0] + a_in[2] * c_in[2]; + d_out[3] = d_out[2] - c_in[3]; + d_out[4] = a_in[2] + b_in[2] * c_in[2]; +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + d_out[0] = a_in[0] + a_in[0] * c_in[0]; + d_out[1] = d_out[0] - c_in[0]; + d_out[2] = a_in[0] + a_in[2] * c_in[2]; + d_out[3] = d_out[2] - c_in[3]; + d_out[4] = a_in[2] + b_in[2] * c_in[2]; +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = d_out[i]; + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/raw-dependency.c b/src/dios-egraphs/Diospyros/c-tests/raw-dependency.c new file mode 100644 index 00000000..208582cf --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/raw-dependency.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + d_out[0] = a_in[0] + b_in[0] * c_in[0]; + d_out[1] = d_out[0] - c_in[0]; + d_out[2] = a_in[2] + b_in[2] * c_in[2]; + d_out[3] = d_out[3] - c_in[3]; + d_out[4] = a_in[2] + b_in[2] * c_in[2]; +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + d_out[0] = a_in[0] + b_in[0] * c_in[0]; + d_out[1] = d_out[0] - c_in[0]; + d_out[2] = a_in[2] + b_in[2] * c_in[2]; + d_out[3] = d_out[3] - c_in[3]; + d_out[4] = a_in[2] + b_in[2] * c_in[2]; +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = d_out[i]; + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/reduction-add.c b/src/dios-egraphs/Diospyros/c-tests/reduction-add.c new file mode 100644 index 00000000..a8d74233 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/reduction-add.c @@ -0,0 +1,86 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float result = 0.0f; + for (int i = 0; i < ROWS; i++) { + result += a_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result += b_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result += c_in[i]; + } + for (int i = 0; i < ROWS; i++) { + d_out[i] = result; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float result = 0.0f; + for (int i = 0; i < ROWS; i++) { + result += a_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result += b_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result += c_in[i]; + } + for (int i = 0; i < ROWS; i++) { + d_out[i] = result; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/reduction-mul.c b/src/dios-egraphs/Diospyros/c-tests/reduction-mul.c new file mode 100644 index 00000000..933cd7ab --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/reduction-mul.c @@ -0,0 +1,86 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float result = 1.0f; + for (int i = 0; i < ROWS; i++) { + result *= a_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result *= b_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result *= c_in[i]; + } + for (int i = 0; i < ROWS; i++) { + d_out[i] = result; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float result = 1.0f; + for (int i = 0; i < ROWS; i++) { + result *= a_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result *= b_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result *= c_in[i]; + } + for (int i = 0; i < ROWS; i++) { + d_out[i] = result; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/reduction-sub.c b/src/dios-egraphs/Diospyros/c-tests/reduction-sub.c new file mode 100644 index 00000000..2eac3e09 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/reduction-sub.c @@ -0,0 +1,86 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float result = 100.0f; + for (int i = 0; i < ROWS; i++) { + result -= a_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result -= b_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result -= c_in[i]; + } + for (int i = 0; i < ROWS; i++) { + d_out[i] = result; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + float result = 100.0f; + for (int i = 0; i < ROWS; i++) { + result -= a_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result -= b_in[i]; + } + for (int i = 0; i < ROWS; i++) { + result -= c_in[i]; + } + for (int i = 0; i < ROWS; i++) { + d_out[i] = result; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/repeat-thrice.c b/src/dios-egraphs/Diospyros/c-tests/repeat-thrice.c new file mode 100644 index 00000000..298a4126 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/repeat-thrice.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define ROWS 12 +#define THIRD 4 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < THIRD; i++) { + d_out[i] = a_in[i] + b_in[i]; + } + for (int i = THIRD; i < 2 * THIRD; i++) { + d_out[i] = c_in[i] + b_in[i]; + } + for (int i = 2 * THIRD; i < 3 * THIRD; i++) { + d_out[i] = c_in[i] + b_in[i]; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < THIRD; i++) { + d_out[i] = a_in[i] + b_in[i]; + } + for (int i = THIRD; i < 2 * THIRD; i++) { + d_out[i] = c_in[i] + b_in[i]; + } + for (int i = 2 * THIRD; i < 3 * THIRD; i++) { + d_out[i] = c_in[i] + b_in[i]; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/repeat-twice.c b/src/dios-egraphs/Diospyros/c-tests/repeat-twice.c new file mode 100644 index 00000000..e60b3534 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/repeat-twice.c @@ -0,0 +1,73 @@ +#include +#include +#include +#include +#include + +#define ROWS 8 +#define HALF 4 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < HALF; i++) { + d_out[i] = a_in[i] + b_in[i]; + } + for (int i = HALF; i < ROWS; i++) { + d_out[i] = c_in[i] + b_in[i]; + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < HALF; i++) { + d_out[i] = a_in[i] + b_in[i]; + } + for (int i = HALF; i < ROWS; i++) { + d_out[i] = c_in[i] + b_in[i]; + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/return-new.c b/src/dios-egraphs/Diospyros/c-tests/return.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/return-new.c rename to src/dios-egraphs/Diospyros/c-tests/return.c diff --git a/src/dios-egraphs/Diospyros/c-tests/rows-greater-matrix-multiply.c b/src/dios-egraphs/Diospyros/c-tests/rows-greater-matrix-multiply.c new file mode 100644 index 00000000..422f0e10 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/rows-greater-matrix-multiply.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +#define A_ROWS 5 +#define A_COLS 3 +#define B_COLS 2 + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +void no_opt_matrix_multiply(float a_in[A_ROWS][A_COLS], + float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + srand(1); // set seed + + float a_in[A_ROWS][A_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float b_in[A_COLS][B_COLS]; + for (int i = 0; i < A_COLS; i++) { + for (int j = 0; j < B_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + c_out[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + expected[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + + matrix_multiply(a_in, b_in, c_out); + no_opt_matrix_multiply(a_in, b_in, expected); + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + printf("output: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(c_out[i][j] == expected[i][j]); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/scalar-new.c b/src/dios-egraphs/Diospyros/c-tests/scalar.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/scalar-new.c rename to src/dios-egraphs/Diospyros/c-tests/scalar.c diff --git a/src/dios-egraphs/Diospyros/c-tests/sgn-inline.c b/src/dios-egraphs/Diospyros/c-tests/sgn-inline.c new file mode 100644 index 00000000..e982b0eb --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/sgn-inline.c @@ -0,0 +1,42 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +void sample_test(float A[SIZE], float B[SIZE]) { + for (int i = 0; i < SIZE; i++) { + B[i] = sgn(A[i]); + } +} + +void no_opt_sample_test(float A[SIZE], float B[SIZE]) { + for (int i = 0; i < SIZE; i++) { + B[i] = no_opt_sgn(A[i]); + } +} + +int main(void) { + float A[SIZE] = {1, -2, 0, -4, 5}; + float B[SIZE] = {0}; + sample_test(A, B); + float expectedA[SIZE] = {1, -2, 0, -4, 5}; + float expectedB[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedB); + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/sgn-naive-norm-inline-1.c b/src/dios-egraphs/Diospyros/c-tests/sgn-naive-norm-inline-1.c new file mode 100644 index 00000000..c04592b6 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/sgn-naive-norm-inline-1.c @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float naive_norm(float *x, int m) __attribute__((always_inline)); +float sgn(float v) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float B[SIZE]) { + B[0] = naive_norm(A, SIZE); + for (int i = 1; i < SIZE; i++) { + B[i] = sgn(A[i]); + } +} + +void no_opt_sample_test(float A[SIZE], float B[SIZE]) { + B[0] = no_opt_naive_norm(A, SIZE); + for (int i = 1; i < SIZE; i++) { + B[i] = no_opt_sgn(A[i]); + } +} + +int main(void) { + float A[SIZE] = {1, -2, 0, -4, 5}; + float B[SIZE] = {0}; + sample_test(A, B); + float expectedA[SIZE] = {1, -2, 0, -4, 5}; + float expectedB[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedB); + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/sgn-naive-norm-inline-2.c b/src/dios-egraphs/Diospyros/c-tests/sgn-naive-norm-inline-2.c new file mode 100644 index 00000000..9613c3e4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/sgn-naive-norm-inline-2.c @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define DELTA 0.1f + +float naive_norm(float *x, int m) __attribute__((always_inline)); +float sgn(float v) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE], float B[SIZE]) { + float alpha = -sgn(A[0]) * naive_norm(A, SIZE); + B[0] = alpha; + for (int i = 1; i < SIZE; i++) { + B[i] = sgn(A[i]); + } +} + +void no_opt_sample_test(float A[SIZE], float B[SIZE]) { + float alpha = -no_opt_sgn(A[0]) * no_opt_naive_norm(A, SIZE); + B[0] = alpha; + for (int i = 1; i < SIZE; i++) { + B[i] = no_opt_sgn(A[i]); + } +} + +int main(void) { + float A[SIZE] = {1, -2, 0, -4, 5}; + float B[SIZE] = {0}; + sample_test(A, B); + float expectedA[SIZE] = {1, -2, 0, -4, 5}; + float expectedB[SIZE] = {0}; + no_opt_sample_test(expectedA, expectedB); + for (int i = 0; i < SIZE; i++) { + printf("B Output: %f\n", B[i]); + printf("Expected B Output: %f\n", expectedB[i]); + assert(fabs(expectedB[i] - B[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/skip.c b/src/dios-egraphs/Diospyros/c-tests/skip.c new file mode 100644 index 00000000..485518ea --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/skip.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include + +#define ROWS 8 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i != 5) { + d_out[i] = a_in[i] + b_in[i]; + } + } +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + for (int i = 0; i < ROWS; i++) { + if (i != 5) { + d_out[i] = a_in[i] + b_in[i]; + } + } +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = d_out[i]; + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/sqrt.c b/src/dios-egraphs/Diospyros/c-tests/sqrt.c similarity index 72% rename from src/dios-egraphs/Diospyros/llvm-tests/sqrt.c rename to src/dios-egraphs/Diospyros/c-tests/sqrt.c index 6cbcd0c9..a753d824 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/sqrt.c +++ b/src/dios-egraphs/Diospyros/c-tests/sqrt.c @@ -1,3 +1,4 @@ +#include #include #include #define SIZE 8 @@ -14,9 +15,12 @@ int main(void) { float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; float c_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; vsqrt(a_in, b_out, c_out); + float delta = 0.00001f; + float expected[SIZE] = {3.000000f, 2.828427f, 2.645751f, 2.449490f, + 2.236068f, 2.000000f, 1.732051f, 1.414214f}; for (int i = 0; i < SIZE; i++) { - printf("%f\n", b_out[i]); printf("%f\n", c_out[i]); + assert(fabs(expected[i] - c_out[i]) < delta); } // 3.000000 // 2.828427 diff --git a/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d.c b/src/dios-egraphs/Diospyros/c-tests/stencil-2d.c similarity index 66% rename from src/dios-egraphs/Diospyros/llvm-tests/stencil-2d.c rename to src/dios-egraphs/Diospyros/c-tests/stencil-2d.c index c4fc9f97..38362c79 100644 --- a/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d.c +++ b/src/dios-egraphs/Diospyros/c-tests/stencil-2d.c @@ -1,3 +1,4 @@ +#include #include #define ROW_SIZE 8 #define COL_SIZE 4 @@ -31,7 +32,47 @@ int main(void) { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; float filter_in[F_SIZE] = {1, 1, 1, 1, 1, 1, 1, 1, 1}; stencil(orig_in, sol_out, filter_in); + float expected[ROW_SIZE * COL_SIZE] = { + 9, 9, 1, 1, 9, 9, 1, 1, 9, 9, 1, 1, 9, 9, 1, 1, + 9, 9, 1, 1, 9, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }; for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { printf("%f\n", sol_out[i]); + assert(expected[i] == sol_out[i]); } + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + // 9.000000 + // 9.000000 + // 1.000000 + // 1.000000 + + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 + // 1.000000 } \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/sub.c b/src/dios-egraphs/Diospyros/c-tests/sub.c new file mode 100644 index 00000000..3ef3621e --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/sub.c @@ -0,0 +1,26 @@ +#include +#include +#define SIZE 4 + +void diff(float a_in[SIZE], float b_in[SIZE], float c_out[SIZE]) { + c_out[0] = a_in[0] - b_in[0]; + c_out[1] = a_in[1] - b_in[1]; + c_out[2] = a_in[2] - b_in[2]; + c_out[3] = a_in[3] - b_in[3]; +} + +int main(int argc, char **argv) { + float a_in[SIZE] = {1, 2, 7, 8}; + float b_in[SIZE] = {5, 6, 3, 4}; + float c_out[SIZE]; + diff(a_in, b_in, c_out); + assert(c_out[0] == -4); + assert(c_out[1] == -4); + assert(c_out[2] == 4); + assert(c_out[3] == 4); + printf("first: %f\n", c_out[0]); + printf("second: %f\n", c_out[1]); + printf("third: %f\n", c_out[2]); + printf("fourth: %f\n", c_out[3]); + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/ternary-new.c b/src/dios-egraphs/Diospyros/c-tests/ternary.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/ternary-new.c rename to src/dios-egraphs/Diospyros/c-tests/ternary.c diff --git a/src/dios-egraphs/Diospyros/c-tests/test1-local-array.c b/src/dios-egraphs/Diospyros/c-tests/test1-local-array.c new file mode 100644 index 00000000..cb9ba117 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/test1-local-array.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1f + +// float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +void sample_test(float A[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + int m = SIZE - k; + + // float x[SIZE]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + // float e[SIZE]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + A[k] = alpha; + } +} + +void no_opt_sample_test(float A[SIZE]) { + for (int k = 0; k < SIZE - 1; k++) { + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + int m = SIZE - k; + + // float x[SIZE]; + for (int i = 0; i < m; i++) { + x[i] = 0.0f; + } + // float e[SIZE]; + for (int i = 0; i < m; i++) { + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + x[i] = 1.0f; + e[i] = 2.0f; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + A[k] = alpha; + } +} + +int main(void) { + float A[SIZE] = {0}; + float x[SIZE] = {0}; + float e[SIZE] = {0}; + sample_test(A); + float expectedA[SIZE] = {0}; + float expectedx[SIZE] = {0}; + float expectede[SIZE] = {0}; + no_opt_sample_test(expectedA); + for (int i = 0; i < SIZE; i++) { + printf("A Output: %f\n", A[i]); + printf("Expected A Output: %f\n", expectedA[i]); + printf("X Output: %f\n", x[i]); + printf("Expected X Output: %f\n", expectedx[i]); + printf("E Output: %f\n", e[i]); + printf("Expected E Output: %f\n", expectede[i]); + assert(fabs(expectedA[i] - A[i]) < DELTA); + assert(fabs(expectedx[i] - x[i]) < DELTA); + assert(fabs(expectede[i] - e[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/test2-local-array.c b/src/dios-egraphs/Diospyros/c-tests/test2-local-array.c new file mode 100644 index 00000000..d39765f3 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/test2-local-array.c @@ -0,0 +1,276 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 3 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + float I[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float e[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + float u[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float v[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + float q_min[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + float q_t[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + printf("%f\n", A[i]); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/test3-local-array.c b/src/dios-egraphs/Diospyros/c-tests/test3-local-array.c new file mode 100644 index 00000000..7495df9d --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/test3-local-array.c @@ -0,0 +1,314 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 3 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + float I[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + float x[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float e[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + float u[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + float v[SIZE] = {[0 ... SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + float q_min[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + float q_t[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + // else { + // // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE + // * + // // SIZE); + // float res[SIZE * SIZE] = {[0 ... SIZE * SIZE - 1] = 0.0f}; + // for (int i = 0; i < SIZE * SIZE; i++) { + // res[i] = 0.0f; + // } + // naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + // for (int i = 0; i < SIZE * SIZE; i++) { + // Q[i] = res[i]; + // } + // naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + // for (int i = 0; i < SIZE * SIZE; i++) { + // R[i] = res[i]; + // } + // } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } + // else { + // float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + // no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // memcpy(Q, res, sizeof(float) * SIZE * SIZE); + // no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // memcpy(R, res, sizeof(float) * SIZE * SIZE); + // } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + // time_t t = time(NULL); + // srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + printf("%f\n", A[i]); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/transpose.c b/src/dios-egraphs/Diospyros/c-tests/transpose.c new file mode 100644 index 00000000..934d651f --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/transpose.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 10 +#define DELTA 0.1f + +void naive_transpose(float a[SIZE * SIZE], int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} + +void no_opt_naive_transpose(float a[SIZE * SIZE], int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} + +int main() { + float calculated[SIZE * SIZE] = {0}; + for (int i = 0; i < SIZE * SIZE; i++) { + if (i % 2 == 0) { + calculated[i] = 1.0f; + } else { + calculated[i] = 0.0f; + } + } + float expected[SIZE * SIZE] = {0}; + for (int i = 0; i < SIZE * SIZE; i++) { + if (i % 2 == 0) { + expected[i] = 1.0f; + } else { + expected[i] = 0.0f; + } + } + naive_transpose(calculated, SIZE); + no_opt_naive_transpose(expected, SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + printf("A Transpose Calculated: %f\n", calculated[i]); + printf("A Transpose Expected: %f\n", expected[i]); + assert(fabs(expected[i] - calculated[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/triangle.c b/src/dios-egraphs/Diospyros/c-tests/triangle.c new file mode 100644 index 00000000..344beb52 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/triangle.c @@ -0,0 +1,96 @@ +#include +#include +#include +#include +#include + +#define SIZE 2 +#define DELTA 0.1 + +// Triangle Access Pattern Test + +void lower_triangle(float A[SIZE * SIZE], float B[SIZE * SIZE], + float C[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < i; j++) { + C[i + SIZE * j] = A[i + SIZE * j] + B[i + SIZE * j]; + } + } +} + +void no_opt_lower_triangle(float A[SIZE * SIZE], float B[SIZE * SIZE], + float C[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < i; j++) { + C[i + SIZE * j] = A[i + SIZE * j] + B[i + SIZE * j]; + } + } +} + +void upper_triangle(float A[SIZE * SIZE], float B[SIZE * SIZE], + float C[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i; j < SIZE; j++) { + C[i + SIZE * j] = A[i + SIZE * j] + B[i + SIZE * j]; + } + } +} + +void no_opt_upper_triangle(float A[SIZE * SIZE], float B[SIZE * SIZE], + float C[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i; j < SIZE; j++) { + C[i + SIZE * j] = A[i + SIZE * j] + B[i + SIZE * j]; + } + } +} + +int main(void) { + float A1[SIZE * SIZE] = {0, 1, 2, 3}; + float B1[SIZE * SIZE] = {0, 1, 2, 3}; + float C1[SIZE * SIZE] = {0, 1, 2, 3}; + + float A1Expected[SIZE * SIZE] = {0, 1, 2, 3}; + float B1Expected[SIZE * SIZE] = {0, 1, 2, 3}; + float C1Expected[SIZE * SIZE] = {0, 1, 2, 3}; + + lower_triangle(A1, B1, C1); + no_opt_lower_triangle(A1Expected, B1Expected, C1Expected); + + for (int i = 0; i < SIZE * SIZE; i++) { + printf("A: %f\n", A1[i]); + printf("A Expected: %f\n", A1Expected[i]); + printf("B: %f\n", B1[i]); + printf("B Expected: %f\n", B1Expected[i]); + printf("C: %f\n", C1[i]); + printf("C Expected: %f\n", C1Expected[i]); + + assert(fabsf(A1[i] - A1Expected[i]) < DELTA); + assert(fabsf(B1[i] - B1Expected[i]) < DELTA); + assert(fabsf(C1[i] - C1Expected[i]) < DELTA); + } + + float A2[SIZE * SIZE] = {0, 1, 2, 3}; + float B2[SIZE * SIZE] = {0, 1, 2, 3}; + float C2[SIZE * SIZE] = {0, 1, 2, 3}; + + float A2Expected[SIZE * SIZE] = {0, 1, 2, 3}; + float B2Expected[SIZE * SIZE] = {0, 1, 2, 3}; + float C2Expected[SIZE * SIZE] = {0, 1, 2, 3}; + + upper_triangle(A2, B2, C2); + no_opt_upper_triangle(A2Expected, B2Expected, C2Expected); + + for (int i = 0; i < SIZE * SIZE; i++) { + printf("A: %f\n", A2[i]); + printf("A Expected: %f\n", A2Expected[i]); + printf("B: %f\n", B2[i]); + printf("B Expected: %f\n", B2Expected[i]); + printf("C: %f\n", C2[i]); + printf("C Expected: %f\n", C2Expected[i]); + + assert(fabsf(A2[i] - A2Expected[i]) < DELTA); + assert(fabsf(B2[i] - B2Expected[i]) < DELTA); + assert(fabsf(C2[i] - C2Expected[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/turnt.toml b/src/dios-egraphs/Diospyros/c-tests/turnt.toml new file mode 100644 index 00000000..4c3e1bc9 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/turnt.toml @@ -0,0 +1 @@ +command = "bash ../test-runner.sh run-opt c-tests/{filename}" diff --git a/src/dios-egraphs/Diospyros/llvm-tests/var_new.c b/src/dios-egraphs/Diospyros/c-tests/var.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/var_new.c rename to src/dios-egraphs/Diospyros/c-tests/var.c diff --git a/src/dios-egraphs/Diospyros/c-tests/war-dependency.c b/src/dios-egraphs/Diospyros/c-tests/war-dependency.c new file mode 100644 index 00000000..e5401f12 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/war-dependency.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + d_out[0] = a_in[0] + b_in[0] * c_in[0]; + d_out[0] = d_out[0] - c_in[0]; + d_out[2] = a_in[2] + b_in[2] * c_in[2]; + d_out[3] = d_out[3] - c_in[3]; + d_out[4] = a_in[2] + b_in[2] * c_in[2]; +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + d_out[0] = a_in[0] + b_in[0] * c_in[0]; + d_out[0] = d_out[0] - c_in[0]; + d_out[2] = a_in[2] + b_in[2] * c_in[2]; + d_out[3] = d_out[3] - c_in[3]; + d_out[4] = a_in[2] + b_in[2] * c_in[2]; +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = d_out[i]; + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/c-tests/waw-dependency.c b/src/dios-egraphs/Diospyros/c-tests/waw-dependency.c new file mode 100644 index 00000000..6cb8fe16 --- /dev/null +++ b/src/dios-egraphs/Diospyros/c-tests/waw-dependency.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include + +#define ROWS 5 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + d_out[0] = a_in[0] + b_in[0] * c_in[0]; + d_out[0] = d_out[0] - c_in[0]; + d_out[0] = a_in[2] + b_in[2] * c_in[2]; + d_out[0] = d_out[3] - c_in[3]; + d_out[0] = a_in[2] + b_in[2] * c_in[2]; +} + +void no_opt_branching_loop(float a_in[ROWS], float b_in[ROWS], float c_in[ROWS], + float d_out[ROWS]) { + d_out[0] = a_in[0] + b_in[0] * c_in[0]; + d_out[0] = d_out[0] - c_in[0]; + d_out[0] = a_in[2] + b_in[2] * c_in[2]; + d_out[0] = d_out[3] - c_in[3]; + d_out[0] = a_in[2] + b_in[2] * c_in[2]; +} + +int main(void) { + srand(1); // set seed + // load in a_in + float a_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in c_in + float c_in[ROWS]; + for (int i = 0; i < ROWS; i++) { + c_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float d_out[ROWS]; + for (int i = 0; i < ROWS; i++) { + d_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[ROWS]; + for (int i = 0; i < ROWS; i++) { + expected[i] = d_out[i]; + } + + // calculate up c_out + branching_loop(a_in, b_in, c_in, d_out); + // calculate expected + no_opt_branching_loop(a_in, b_in, c_in, expected); + + // check expected == output + for (int i = 0; i < ROWS; i++) { + printf("calculated: %f\n", d_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - d_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/width5_new.c b/src/dios-egraphs/Diospyros/c-tests/width5.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/width5_new.c rename to src/dios-egraphs/Diospyros/c-tests/width5.c diff --git a/src/dios-egraphs/Diospyros/llvm-tests/width9_new.c b/src/dios-egraphs/Diospyros/c-tests/width9.c similarity index 100% rename from src/dios-egraphs/Diospyros/llvm-tests/width9_new.c rename to src/dios-egraphs/Diospyros/c-tests/width9.c diff --git a/src/dios-egraphs/Diospyros/data-plots/data/all-data/all-data.csv b/src/dios-egraphs/Diospyros/data-plots/data/all-data/all-data.csv new file mode 100644 index 00000000..f02f0817 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/data/all-data/all-data.csv @@ -0,0 +1,46 @@ +Group,Benchmark,Baseline,SLP,Diospyros +qr-decomp-data,2-by-2-qr-decomp,519,1877,2025 +qr-decomp-data,3-by-3-qr-decomp,6218,6710,6932 +qr-decomp-data,4-by-4-qr-decomp,10648,11612,11410 +qr-decomp-data,5-by-5-qr-decomp,15842,18499,18311 +qr-decomp-data,6-by-6-qr-decomp,22060,25880,26048 +mat-mul-data,2-by-2-mat-mul,15,17,23 +mat-mul-data,3-by-3-mat-mul,46,73,68 +mat-mul-data,4-by-4-mat-mul,111,125,78 +mat-mul-data,5-by-5-mat-mul,219,194,246 +mat-mul-data,6-by-6-mat-mul,385,399,373 +mat-mul-data,7-by-7-mat-mul,643,283,672 +mat-mul-data,8-by-8-mat-mul,944,321,371 +mat-mul-data,9-by-9-mat-mul,1264,896,1287 +mat-mul-data,10-by-10-mat-mul,1747,1252,1381 +mat-mul-data,11-by-11-mat-mul,2383,1733,2341 +mat-mul-data,12-by-12-mat-mul,3032,1928,1626 +mat-mul-data,15-by-15-mat-mul,6224,3461,5812 +mat-mul-data,16-by-16-mat-mul,7734,3652,3580 +stencil-data,4-by-4-and-2-by-2-stencil,30,39,33 +stencil-data,5-by-5-and-2-by-2-stencil,64,79,75 +stencil-data,6-by-6-and-2-by-2-stencil,111,143,129 +stencil-data,8-by-8-and-2-by-2-stencil,257,310,227 +stencil-data,12-by-12-and-2-by-2-stencil,695,849,608 +stencil-data,16-by-16-and-2-by-2-stencil,1733,1655,1193 +stencil-data,4-by-4-and-3-by-3-stencil,65,80,77 +stencil-data,5-by-5-and-3-by-3-stencil,165,174,166 +stencil-data,6-by-6-and-3-by-3-stencil,263,313,251 +stencil-data,8-by-8-and-3-by-3-stencil,578,687,498 +stencil-data,12-by-12-and-3-by-3-stencil,1685,1908,1569 +stencil-data,16-by-16-and-3-by-3-stencil,3276,3711,3072 +conv-data,3-by-3-and-2-by-2-conv,99.0,111.0,90.0 +conv-data,3-by-3-and-3-by-3-conv,340.0,243.0,174.0 +conv-data,3-by-3-and-4-by-4-conv,650.0,428.0,278.0 +conv-data,4-by-4-and-2-by-2-conv,257.0,194.0,139.0 +conv-data,4-by-4-and-4-by-4-conv,2652.0,774.0,505.0 +conv-data,5-by-5-and-2-by-2-conv,358.0,295.0,212.0 +conv-data,5-by-5-and-3-by-3-conv,885.0,666.0,432.0 +conv-data,5-by-5-and-4-by-4-conv,3726.0,1230.0,705.0 +conv-data,6-by-6-and-2-by-2-conv,509.0,422.0,277.0 +conv-data,6-by-6-and-3-by-3-conv,2546.0,973.0,603.0 +conv-data,6-by-6-and-4-by-4-conv,5261.0,1795.0,1105.0 +conv-data,8-by-8-and-2-by-2-conv,1554.0,754.0,464.0 +conv-data,8-by-8-and-3-by-3-conv,4185.0,1692.0,925.0 +conv-data,8-by-8-and-4-by-4-conv,9788.0,3259.0,1787.0 +q-prod-data,qprod,54.0,62.0,58.0 diff --git a/src/dios-egraphs/Diospyros/data-plots/data/individual-data/conv-data.csv b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/conv-data.csv new file mode 100644 index 00000000..325909f9 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/conv-data.csv @@ -0,0 +1,15 @@ +Benchmark,Baseline,SLP,Diospyros +3-by-3-and-2-by-2-conv,99.0,111.0,90.0 +3-by-3-and-3-by-3-conv,340.0,243.0,174.0 +3-by-3-and-4-by-4-conv,650.0,428.0,278.0 +4-by-4-and-2-by-2-conv,257.0,194.0,139.0 +4-by-4-and-4-by-4-conv,2652.0,774.0,505.0 +5-by-5-and-2-by-2-conv,358.0,295.0,212.0 +5-by-5-and-3-by-3-conv,885.0,666.0,432.0 +5-by-5-and-4-by-4-conv,3726.0,1230.0,705.0 +6-by-6-and-2-by-2-conv,509.0,422.0,277.0 +6-by-6-and-3-by-3-conv,2546.0,973.0,603.0 +6-by-6-and-4-by-4-conv,5261.0,1795.0,1105.0 +8-by-8-and-2-by-2-conv,1554.0,754.0,464.0 +8-by-8-and-3-by-3-conv,4185.0,1692.0,925.0 +8-by-8-and-4-by-4-conv,9788.0,3259.0,1787.0 diff --git a/src/dios-egraphs/Diospyros/data-plots/data/individual-data/mat-mul-data.csv b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/mat-mul-data.csv new file mode 100644 index 00000000..4903f5a5 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/mat-mul-data.csv @@ -0,0 +1,14 @@ +Benchmark,Baseline,SLP,Diospyros +2-by-2-mat-mul,15,17,23 +3-by-3-mat-mul,46,73,68 +4-by-4-mat-mul,111,125,78 +5-by-5-mat-mul,219,194,246 +6-by-6-mat-mul,385,399,373 +7-by-7-mat-mul,643,283,672 +8-by-8-mat-mul,944,321,371 +9-by-9-mat-mul,1264,896,1287 +10-by-10-mat-mul,1747,1252,1381 +11-by-11-mat-mul,2383,1733,2341 +12-by-12-mat-mul,3032,1928,1626 +15-by-15-mat-mul,6224,3461,5812 +16-by-16-mat-mul,7734,3652,3580 diff --git a/src/dios-egraphs/Diospyros/data-plots/data/individual-data/q-prod-data.csv b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/q-prod-data.csv new file mode 100644 index 00000000..e95cb4a4 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/q-prod-data.csv @@ -0,0 +1,2 @@ +Benchmark,Baseline,SLP,Diospyros +qprod,54.0,62.0,58.0 diff --git a/src/dios-egraphs/Diospyros/data-plots/data/individual-data/qr-decomp-data.csv b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/qr-decomp-data.csv new file mode 100644 index 00000000..7e0d64a7 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/qr-decomp-data.csv @@ -0,0 +1,6 @@ +Benchmark,Baseline,SLP,Diospyros +2-by-2-qr-decomp,519,1877,2025 +3-by-3-qr-decomp,6218,6710,6932 +4-by-4-qr-decomp,10648,11612,11410 +5-by-5-qr-decomp,15842,18499,18311 +6-by-6-qr-decomp,22060,25880,26048 diff --git a/src/dios-egraphs/Diospyros/data-plots/data/individual-data/stencil-data.csv b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/stencil-data.csv new file mode 100644 index 00000000..b75a34e7 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/data/individual-data/stencil-data.csv @@ -0,0 +1,13 @@ +Benchmark,Baseline,SLP,Diospyros +4-by-4-and-2-by-2-stencil,30,39,33 +5-by-5-and-2-by-2-stencil,64,79,75 +6-by-6-and-2-by-2-stencil,111,143,129 +8-by-8-and-2-by-2-stencil,257,310,227 +12-by-12-and-2-by-2-stencil,695,849,608 +16-by-16-and-2-by-2-stencil,1733,1655,1193 +4-by-4-and-3-by-3-stencil,65,80,77 +5-by-5-and-3-by-3-stencil,165,174,166 +6-by-6-and-3-by-3-stencil,263,313,251 +8-by-8-and-3-by-3-stencil,578,687,498 +12-by-12-and-3-by-3-stencil,1685,1908,1569 +16-by-16-and-3-by-3-stencil,3276,3711,3072 diff --git a/src/dios-egraphs/Diospyros/data-plots/data/original-data/all-data.csv b/src/dios-egraphs/Diospyros/data-plots/data/original-data/all-data.csv new file mode 100644 index 00000000..e7672edd --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/data/original-data/all-data.csv @@ -0,0 +1,27 @@ +Benchmark,Baseline,SLP,Diospyros +2-by-2-mat-mul,15,17,23 +3-by-3-mat-mul,46,73,68 +4-by-4-mat-mul,111,125,78 +5-by-5-mat-mul,219,194,246 +6-by-6-mat-mul,385,399,373 +7-by-7-mat-mul,643,283,672 +8-by-8-mat-mul,944,321,371 +9-by-9-mat-mul,1264,896,1287 +10-by-10-mat-mul,1747,1252,1381 +11-by-11-mat-mul,2383,1733,2341 +12-by-12-mat-mul,3032,1928,1626 +15-by-15-mat-mul,6224,3461,5812 +16-by-16-mat-mul,7734,3652,3580 +3-qr-decomp,6218,6710,6932 +4-qr-decomp,10648,11612,11410 +5-qr-decomp,15842,18499,18311 +6-qr-decomp,22060,25880,26048 +4-by-4-stencil2d,65,80,77 +5-by-5-stencil2d,165,174,166 +6-by-6-stencil2d,263,313,251 +8-by-8-stencil2d,578,687,498 +12-by-12-stencil2d,1685,1908,1569 +15-by-16-stencil2d,3100,3606,2883 +16-by-16-stencil2d,3276,3711,3072 +conv2d,866,657,424 +qprod,55,63,57 diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/all.png b/src/dios-egraphs/Diospyros/data-plots/plots/all.png new file mode 100644 index 00000000..3665e004 Binary files /dev/null and b/src/dios-egraphs/Diospyros/data-plots/plots/all.png differ diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/baseline-diospyros-peedup.png b/src/dios-egraphs/Diospyros/data-plots/plots/baseline-diospyros-peedup.png new file mode 100644 index 00000000..274651c9 Binary files /dev/null and b/src/dios-egraphs/Diospyros/data-plots/plots/baseline-diospyros-peedup.png differ diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/baseline-slp-diospyros-speedup.png b/src/dios-egraphs/Diospyros/data-plots/plots/baseline-slp-diospyros-speedup.png new file mode 100644 index 00000000..b3abc154 Binary files /dev/null and b/src/dios-egraphs/Diospyros/data-plots/plots/baseline-slp-diospyros-speedup.png differ diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/conv.png b/src/dios-egraphs/Diospyros/data-plots/plots/conv.png new file mode 100644 index 00000000..d5fe2a3c Binary files /dev/null and b/src/dios-egraphs/Diospyros/data-plots/plots/conv.png differ diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/mat-mul.png b/src/dios-egraphs/Diospyros/data-plots/plots/mat-mul.png new file mode 100644 index 00000000..ce71362f Binary files /dev/null and b/src/dios-egraphs/Diospyros/data-plots/plots/mat-mul.png differ diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/q-prod.png b/src/dios-egraphs/Diospyros/data-plots/plots/q-prod.png new file mode 100644 index 00000000..a8ab5c69 Binary files /dev/null and b/src/dios-egraphs/Diospyros/data-plots/plots/q-prod.png differ diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/qr-decomp.png b/src/dios-egraphs/Diospyros/data-plots/plots/qr-decomp.png new file mode 100644 index 00000000..dc1b95da Binary files /dev/null and b/src/dios-egraphs/Diospyros/data-plots/plots/qr-decomp.png differ diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/slp-diospyros-speedup.png b/src/dios-egraphs/Diospyros/data-plots/plots/slp-diospyros-speedup.png new file mode 100644 index 00000000..c88d101b Binary files /dev/null and b/src/dios-egraphs/Diospyros/data-plots/plots/slp-diospyros-speedup.png differ diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/speedup.png b/src/dios-egraphs/Diospyros/data-plots/plots/speedup.png new file mode 100644 index 00000000..274651c9 Binary files /dev/null and b/src/dios-egraphs/Diospyros/data-plots/plots/speedup.png differ diff --git a/src/dios-egraphs/Diospyros/data-plots/plots/stencil.png b/src/dios-egraphs/Diospyros/data-plots/plots/stencil.png new file mode 100644 index 00000000..987f7489 Binary files /dev/null and b/src/dios-egraphs/Diospyros/data-plots/plots/stencil.png differ diff --git a/src/dios-egraphs/Diospyros/data-plots/table/table.txt b/src/dios-egraphs/Diospyros/data-plots/table/table.txt new file mode 100644 index 00000000..001d5bbd --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/table/table.txt @@ -0,0 +1,53 @@ +\begin{longtable}{l|rrrr} +Benchmark name & Baseline avg (s) & SLP avg (s) & Diospyros avg (s)\\ +\hline +2×2 qr decomp & 5.1900e-05 & 1.8770e-04 & 2.0250e-04 \\ +3×3 qr decomp & 6.2180e-04 & 6.7100e-04 & 6.9320e-04 \\ +4×4 qr decomp & 1.0648e-03 & 1.1612e-03 & 1.1410e-03 \\ +5×5 qr decomp & 1.5842e-03 & 1.8499e-03 & 1.8311e-03 \\ +6×6 qr decomp & 2.2060e-03 & 2.5880e-03 & 2.6048e-03 \\ +\hline +2×2 mat mul & 1.5000e-06 & 1.7000e-06 & 2.3000e-06 \\ +3×3 mat mul & 4.6000e-06 & 7.3000e-06 & 6.8000e-06 \\ +4×4 mat mul & 1.1100e-05 & 1.2500e-05 & 7.8000e-06 \\ +5×5 mat mul & 2.1900e-05 & 1.9400e-05 & 2.4600e-05 \\ +6×6 mat mul & 3.8500e-05 & 3.9900e-05 & 3.7300e-05 \\ +7×7 mat mul & 6.4300e-05 & 2.8300e-05 & 6.7200e-05 \\ +8×8 mat mul & 9.4400e-05 & 3.2100e-05 & 3.7100e-05 \\ +9×9 mat mul & 1.2640e-04 & 8.9600e-05 & 1.2870e-04 \\ +10×10 mat mul & 1.7470e-04 & 1.2520e-04 & 1.3810e-04 \\ +11×11 mat mul & 2.3830e-04 & 1.7330e-04 & 2.3410e-04 \\ +12×12 mat mul & 3.0320e-04 & 1.9280e-04 & 1.6260e-04 \\ +15×15 mat mul & 6.2240e-04 & 3.4610e-04 & 5.8120e-04 \\ +16×16 mat mul & 7.7340e-04 & 3.6520e-04 & 3.5800e-04 \\ +\hline +4×4,2×2 stencil & 3.0000e-06 & 3.9000e-06 & 3.3000e-06 \\ +5×5,2×2 stencil & 6.4000e-06 & 7.9000e-06 & 7.5000e-06 \\ +6×6,2×2 stencil & 1.1100e-05 & 1.4300e-05 & 1.2900e-05 \\ +8×8,2×2 stencil & 2.5700e-05 & 3.1000e-05 & 2.2700e-05 \\ +12×12,2×2 stencil & 6.9500e-05 & 8.4900e-05 & 6.0800e-05 \\ +16×16,2×2 stencil & 1.7330e-04 & 1.6550e-04 & 1.1930e-04 \\ +4×4,3×3 stencil & 6.5000e-06 & 8.0000e-06 & 7.7000e-06 \\ +5×5,3×3 stencil & 1.6500e-05 & 1.7400e-05 & 1.6600e-05 \\ +6×6,3×3 stencil & 2.6300e-05 & 3.1300e-05 & 2.5100e-05 \\ +8×8,3×3 stencil & 5.7800e-05 & 6.8700e-05 & 4.9800e-05 \\ +12×12,3×3 stencil & 1.6850e-04 & 1.9080e-04 & 1.5690e-04 \\ +16×16,3×3 stencil & 3.2760e-04 & 3.7110e-04 & 3.0720e-04 \\ +\hline +3×3,2×2 conv & 9.9000e-06 & 1.1100e-05 & 9.0000e-06 \\ +3×3,3×3 conv & 3.4000e-05 & 2.4300e-05 & 1.7400e-05 \\ +3×3,4×4 conv & 6.5000e-05 & 4.2800e-05 & 2.7800e-05 \\ +4×4,2×2 conv & 2.5700e-05 & 1.9400e-05 & 1.3900e-05 \\ +4×4,4×4 conv & 2.6520e-04 & 7.7400e-05 & 5.0500e-05 \\ +5×5,2×2 conv & 3.5800e-05 & 2.9500e-05 & 2.1200e-05 \\ +5×5,3×3 conv & 8.8500e-05 & 6.6600e-05 & 4.3200e-05 \\ +5×5,4×4 conv & 3.7260e-04 & 1.2300e-04 & 7.0500e-05 \\ +6×6,2×2 conv & 5.0900e-05 & 4.2200e-05 & 2.7700e-05 \\ +6×6,3×3 conv & 2.5460e-04 & 9.7300e-05 & 6.0300e-05 \\ +6×6,4×4 conv & 5.2610e-04 & 1.7950e-04 & 1.1050e-04 \\ +8×8,2×2 conv & 1.5540e-04 & 7.5400e-05 & 4.6400e-05 \\ +8×8,3×3 conv & 4.1850e-04 & 1.6920e-04 & 9.2500e-05 \\ +8×8,4×4 conv & 9.7880e-04 & 3.2590e-04 & 1.7870e-04 \\ +\hline +qprod & 5.4000e-06 & 6.2000e-06 & 5.8000e-06 \\ +\end{longtable} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/data-plots/utils/calc-stats.py b/src/dios-egraphs/Diospyros/data-plots/utils/calc-stats.py new file mode 100644 index 00000000..dc7c376b --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/utils/calc-stats.py @@ -0,0 +1,31 @@ +import sys +import csv + + +def avg(lst): + return sum(lst) / len(lst) + + +def calc(csv_path): + data = [] + with open(csv_path, "r+") as csvfile: + csvreader = csv.reader(csvfile) + for i, row in enumerate(csvreader): + if i == 0: + continue + data.append((row[1], row[2], row[3])) + + average_base = avg(list(map(lambda trip: float(trip[0]), data))) + average_slp = avg(list(map(lambda trip: float(trip[1]), data))) + average_dios = avg(list(map(lambda trip: float(trip[2]), data))) + + print(average_base, average_slp, average_dios) + print(average_base / average_dios, average_slp / average_dios) + + +def main(): + csv_path = sys.argv[1] + calc(csv_path) + + +main() diff --git a/src/dios-egraphs/Diospyros/data-plots/utils/gen-latex-table-all.py b/src/dios-egraphs/Diospyros/data-plots/utils/gen-latex-table-all.py new file mode 100644 index 00000000..4e717cc9 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/utils/gen-latex-table-all.py @@ -0,0 +1,61 @@ +import sys +import csv +from collections import OrderedDict + +TOTAL = 10**7 + +csv_file_path = sys.argv[1] +out_path = sys.argv[2] + +headers = [] +group_data = OrderedDict() + +with open(csv_file_path) as csvfile: + csvreader = csv.reader(csvfile) + + for i, row in enumerate(csvreader): + assert len(row) == 5 + if i == 0: + headers.append("Benchmark name") + for header in row[2:]: + headers.append(header + " avg (s)") + # headers.append("SLP speedup") + # headers.append("Diospyros speedup") + continue + + group, bench_name, base_time, slp_time, diospyros_time = row + if group not in group_data: + group_data[group] = OrderedDict() + group_data[group][str(bench_name)] = ["{:.4e}".format(float(base_time) / TOTAL, 2), + "{:.4e}".format( + float(slp_time) / TOTAL, 2), + "{:.4e}".format( + float(diospyros_time) / TOTAL, 2), ] + # "{:.2e}".format(float(base_time) / + # float(slp_time), 2), + # "{:.2e}".format(float(base_time) / float(diospyros_time), 2)] + + +# https://tex.stackexchange.com/questions/631583/how-would-one-print-table-outputs-in-python-into-latex + +textabular = f"l|{'r'*len(headers)}" +texheader = " & ".join(headers) + "\\\\" +texdata = "" +for group in group_data: + texdata += "\\hline\n" + for label in group_data[group]: + cleaned_label = label + cleaned_label = cleaned_label.replace(r'-by-', '×') + cleaned_label = cleaned_label.replace('-', ' ') + cleaned_label = cleaned_label.replace(' and ', ',') + texdata += f"{cleaned_label} & {' & '.join(map(str,group_data[group][label]))} \\\\\n" + +total_data = "\\begin{longtable}{"+textabular+"}" + "\n" + \ + texheader + "\n" + texdata + "\\end{longtable}" +print("\\begin{longtable}{"+textabular+"}") +print(texheader) +print(texdata, end="") +print("\\end{longtable}") + +with open(out_path, "w") as fp: + fp.write(total_data) diff --git a/src/dios-egraphs/Diospyros/data-plots/utils/merge-all-data.py b/src/dios-egraphs/Diospyros/data-plots/utils/merge-all-data.py new file mode 100644 index 00000000..ddbab7bd --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/utils/merge-all-data.py @@ -0,0 +1,35 @@ +import subprocess +from collections import OrderedDict +import csv +import glob +import sys + +OPTIONS = ["baseline", "slp", "opt"] + + +def main(): + bench_directory = sys.argv[1] + csv_path = sys.argv[2] + + print(f"{bench_directory} is the target benchmark directory to merge. Will ignore 'all-data.csv'.") + matching_bench_files = glob.glob(f"{bench_directory}/*.csv") + + with open(csv_path, "w+") as csv_wfile: + csvwriter = csv.writer(csv_wfile) + csvwriter.writerow( + ["Group", "Benchmark", "Baseline", "SLP", "Diospyros"]) + for bench_name in matching_bench_files: + stripped_bench_name = bench_name[bench_name.rindex( + "/") + 1:bench_name.rindex(".csv")] + if stripped_bench_name != "all-data": + print(f"Handling {stripped_bench_name}.") + with open(bench_name, "r+") as csv_rfile: + csvreader = csv.reader(csv_rfile) + + for i, row in enumerate(csvreader): + if i == 0: + continue + csvwriter.writerow([stripped_bench_name] + row) + + +main() diff --git a/src/dios-egraphs/Diospyros/data-plots/utils/plot-all.py b/src/dios-egraphs/Diospyros/data-plots/utils/plot-all.py new file mode 100644 index 00000000..ae01d72d --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/utils/plot-all.py @@ -0,0 +1,77 @@ +import matplotlib.pyplot as plt +import numpy as np +import csv +import sys +import glob + + +def plot(csv_file_path, out_path): + + names = [] + baseline = [] + slp = [] + diospyros = [] + + with open(csv_file_path) as csvfile: + csvreader = csv.reader(csvfile) + + for i, row in enumerate(csvreader): + if len(row) > 4: + row = row[1:] + if i == 0: + continue + + name = row[0] + name = name.replace(r'-by-', '×') + name = name.replace('-', ' ') + names.append(name) + baseline.append(1.0) + slp.append(float(row[1]) / float(row[2])) + diospyros.append(float(row[1]) / float(row[3])) + + # data to plot + n_groups = len(names) + + # create plot + fig, ax = plt.subplots() + index = np.arange(n_groups) + bar_width = 0.25 + opacity = 0.8 + + rects1 = plt.bar(index, baseline, bar_width, + alpha=opacity, + color='xkcd:baby blue', + label='Baseline') + + rects2 = plt.bar(index + bar_width, slp, bar_width, + alpha=opacity, + color='xkcd:deep sky blue', + label='SLP') + + rects2 = plt.bar(index + 2 * bar_width, diospyros, bar_width, + alpha=opacity, + color='xkcd:vibrant blue', + label='Diospyros') + + plt.xlabel('Benchmark') + plt.ylabel('Speedup') + plt.title('Speedup from Baseline for SLP and Diospyros Vectorization') + plt.xticks(index + 1.1 * bar_width, names) + plt.xticks(rotation=30, ha='right') + plt.legend() + + plt.tight_layout() + + plt.savefig(out_path) + + +def main(): + csv_file_dir = sys.argv[1] + plots_dir = sys.argv[2] + csv_files = glob.glob(f"{csv_file_dir}/*.csv") + for csv in csv_files: + short_file_name = csv[csv.rindex("/") + 1: csv.rindex("-data.csv")] + plot(csv, f"{plots_dir}/{short_file_name}.png") + + +main() diff --git a/src/dios-egraphs/Diospyros/data-plots/utils/run-all-benchmarks.sh b/src/dios-egraphs/Diospyros/data-plots/utils/run-all-benchmarks.sh new file mode 100644 index 00000000..082792ea --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/utils/run-all-benchmarks.sh @@ -0,0 +1,5 @@ +benchtypes=( conv qr-decomp mat-mul q-prod stencil ) +for name in "${benchtypes[@]}" +do + python3 run-benchmarks.py ../../benchmarks/$name/ ../data/$name-data.csv +done \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/data-plots/utils/run-benchmarks.py b/src/dios-egraphs/Diospyros/data-plots/utils/run-benchmarks.py new file mode 100644 index 00000000..7c6ed1e8 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/utils/run-benchmarks.py @@ -0,0 +1,54 @@ +import subprocess +from collections import OrderedDict +import csv +import glob +import sys + +OPTIONS = ["baseline", "slp", "opt"] + + +def main(): + # iterate over benchmark folder files, that end with .c + # for each of the files + # run each of {baseline, slp, opt} + # gather data and write them into dictionary + # write into a CSV with file name as the first item + bench_directory = sys.argv[1] + csv_path = sys.argv[2] + + print(f"{bench_directory} is the target benchmark directory.") + matching_bench_files = glob.glob(f"{bench_directory}*.c") + data_dict = OrderedDict() + + for bench_name in matching_bench_files: + stripped_bench_name = bench_name.replace('..', '')[1:] + print(f"{stripped_bench_name} is being run.") + results = [] + + for option in OPTIONS: + option_name = "run-" + option + subprocess.run( + ["make", "-C", "../..", f"{option_name}", f"test=./{stripped_bench_name}"]) + with open("../../data.txt", "r") as fp: + file_contents = fp.read() + data = float(file_contents.strip()) + results.append(data) + print("Deleting data.txt.") + subprocess.run(["rm", "../../data.txt"]) + + print(results) + further_stripped_name = stripped_bench_name[stripped_bench_name.rindex( + "/") + 1:stripped_bench_name.rindex(".c")] + data_dict[further_stripped_name] = results + + print(f"Writing to {csv_path}.") + with open(csv_path, "w+") as csvfile: + csvwriter = csv.writer(csvfile) + # write first row + csvwriter.writerow(["Benchmark", "Baseline", "SLP", "Diospyros"]) + for bench_name, bench_results in data_dict.items(): + csvwriter.writerow([bench_name] + bench_results) + print("Finished data collection.") + + +main() diff --git a/src/dios-egraphs/Diospyros/data-plots/utils/sort-csv-rows.py b/src/dios-egraphs/Diospyros/data-plots/utils/sort-csv-rows.py new file mode 100644 index 00000000..f7826040 --- /dev/null +++ b/src/dios-egraphs/Diospyros/data-plots/utils/sort-csv-rows.py @@ -0,0 +1,35 @@ +import subprocess +from collections import OrderedDict +import csv +import glob +import sys + + +def main(): + # iterate over benchmark folder files, that end with .c + # for each of the files + # run each of {baseline, slp, opt} + # gather data and write them into dictionary + # write into a CSV with file name as the first item + csv_path = sys.argv[1] + + data = [] + header = None + with open(csv_path, "r") as csvfile: + csvreader = csv.reader(csvfile) + # read first row + for i, row in enumerate(csvreader): + if i == 0: + header = row + continue + data.append(row) + assert header != None + data = sorted(data, key=lambda tup: tup[0]) + with open(csv_path, "w+") as csvfile: + csvwriter = csv.writer(csvfile) + csvwriter.writerow(header) + for row in data: + csvwriter.writerow(row) + + +main() diff --git a/src/dios-egraphs/Diospyros/diospyros.cpp b/src/dios-egraphs/Diospyros/diospyros.cpp index a59f0852..9fb422c7 100644 --- a/src/dios-egraphs/Diospyros/diospyros.cpp +++ b/src/dios-egraphs/Diospyros/diospyros.cpp @@ -1,64 +1,93 @@ #include #include +#include #include +#include #include #include #include #include #include +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/Pass.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Scalar/LoopUnrollPass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Vectorize/SLPVectorizer.h" using namespace llvm; -using namespace std; - -typedef struct IntLLVMPair { - uint32_t node_int; - LLVMValueRef arg; -} IntLLVMPair; - -typedef struct LLVMPair { - LLVMValueRef original_value; - LLVMValueRef new_value; -} LLVMPair; - -typedef struct VectorPointerSize { - LLVMPair const *llvm_pointer; - std::size_t llvm_pointer_size; -} VectorPointerSize; - -extern "C" VectorPointerSize optimize(LLVMModuleRef mod, LLVMContextRef context, - LLVMBuilderRef builder, - LLVMValueRef const *bb, std::size_t size, - LLVMPair const *past_instrs, - std::size_t past_size); - -const string ARRAY_NAME = "no-array-name"; -const string TEMP_NAME = "no-temp-name"; -const string SQRT64_FUNCTION_NAME = "llvm.sqrt.f64"; -const string SQRT32_FUNCTION_NAME = "llvm.sqrt.f32"; + +int main(int argc, char **argv) { + llvm::cl::ParseCommandLineOptions(argc, argv); +} + +llvm::cl::opt RunOpt("r", llvm::cl::desc("Enable Egg Optimization.")); +llvm::cl::alias RunOptAlias("opt", llvm::cl::desc("Alias for -r"), + llvm::cl::aliasopt(RunOpt)); + +llvm::cl::opt PrintOpt("z", llvm::cl::desc("Print Egg Optimization.")); +llvm::cl::alias PrintOptAlias("print", llvm::cl::desc("Alias for -z"), + llvm::cl::aliasopt(PrintOpt)); + +/// Struct representing load info, same as on Rust side +typedef struct load_info { + LLVMValueRef load; + int32_t base_id; + int32_t offset; +} load_info_t; + +/// Forward Declaration of Optimize function +extern "C" bool optimize( + LLVMModuleRef mod, LLVMContextRef context, LLVMBuilderRef builder, + LLVMValueRef const *chunk_instrs, std::size_t chunk_size, + LLVMValueRef const *restricted_instrs, std::size_t restricted_size, + load_info_t const *load_info, std::size_t load_info_size, bool run_egg, + bool print_opt); + +const std::string ARRAY_NAME = "no-array-name"; +const std::string TEMP_NAME = "no-temp-name"; +const std::string SQRT64_FUNCTION_NAME = "llvm.sqrt.f64"; +const std::string SQRT32_FUNCTION_NAME = "llvm.sqrt.f32"; +const std::string MEMSET_PREFIX = "memset"; +const std::string LLVM_MEMSET_PREFIX = "llvm.memset"; +const std::string MEMMOVE_PREFIX = "memmove"; +const std::string MEMCOPY_PREFIX = "memcopy"; +const std::string MAIN_FUNCTION_NAME = "main"; +const std::string NO_OPT_PREFIX = "no_opt_"; const int SQRT_OPERATOR = 3; const int BINARY_OPERATOR = 2; +const uint32_t VECTOR_WIDTH = 4; +const uint32_t FLOAT_SIZE_IN_BYTES = 4; + /** * Fresh counters for temps and array generation */ -int FRESH_INT_COUNTER = 0; -int FRESH_ARRAY_COUNTER = 0; -int FRESH_TEMP_COUNTER = 0; +static int FRESH_INT_COUNTER = 0; +static int FRESH_ARRAY_COUNTER = 0; +static int FRESH_TEMP_COUNTER = 0; /** * Generates a Fresh Index @@ -73,7 +102,7 @@ int gen_fresh_index() { */ const char *gen_fresh_array() { ++FRESH_ARRAY_COUNTER; - string array_str = ARRAY_NAME + to_string(FRESH_ARRAY_COUNTER); + std::string array_str = ARRAY_NAME + std::to_string(FRESH_ARRAY_COUNTER); char *cstr = new char[array_str.length() + 1]; std::strcpy(cstr, array_str.c_str()); return cstr; @@ -84,7 +113,7 @@ const char *gen_fresh_array() { */ const char *gen_fresh_temp() { ++FRESH_TEMP_COUNTER; - string temp_str = TEMP_NAME + to_string(FRESH_TEMP_COUNTER); + std::string temp_str = TEMP_NAME + std::to_string(FRESH_TEMP_COUNTER); char *cstr = new char[temp_str.length() + 1]; std::strcpy(cstr, temp_str.c_str()); return cstr; @@ -131,6 +160,13 @@ extern "C" int llvm_index(LLVMValueRef val, int index) { return gen_fresh_index(); } +/** + * Generates an LLVM Opaque Pointer Type wrapped as an LLVMType Ref + */ +extern "C" LLVMTypeRef generate_opaque_pointer(LLVMTypeRef element_type) { + return wrap(PointerType::getUnqual(unwrap(element_type))); +} + /** * True iff a value is an LLVM Unary Operation */ @@ -297,7 +333,7 @@ extern "C" bool isa_integertype(LLVMValueRef val) { } /** - * True iff a value is an LLVM IntPTr/LLVMValueRef ItPtr + * True iff a value is an LLVM IntPTr/LLVMValueRef IntPtr */ extern "C" bool isa_intptr(LLVMValueRef val) { auto unwrapped = unwrap(val); @@ -308,6 +344,18 @@ extern "C" bool isa_intptr(LLVMValueRef val) { return t->isPointerTy() && t->getContainedType(0)->isIntegerTy(); } +/** + * True iff a value is an LLVM FloatPtr/LLVMValueRef FloatPtr + */ +extern "C" bool isa_floatptr(LLVMValueRef val) { + auto unwrapped = unwrap(val); + if (unwrapped == NULL) { + return false; + } + Type *t = unwrapped->getType(); + return t->isPointerTy() && t->getContainedType(0)->isFloatTy(); +} + /** * True iff a value is an LLVM Float/LLVMValueRef Float */ @@ -406,8 +454,11 @@ extern "C" float get_constant_float(LLVMValueRef val) { Value *v = unwrap(val); if (auto *num = dyn_cast(v)) { return num->getValue().convertToFloat(); + } else if (auto *num = dyn_cast(v)) { + return num->getValue().bitsToFloat(); } - return -1; + errs() << "Not a Constant Float or Constant Int " << *unwrap(val) << "\n"; + throw "LLVM Value Must be a Constant Float or Constant Int"; } extern "C" LLVMValueRef build_constant_float(double n, LLVMContextRef context) { @@ -416,74 +467,855 @@ extern "C" LLVMValueRef build_constant_float(double n, LLVMContextRef context) { return wrap(ConstantFP::get(float_type, n)); } +bool is_memset_variety(CallInst *inst) { + Function *function = inst->getCalledFunction(); + if (function != NULL) { + StringRef name = function->getName(); + return (name.size() > MEMSET_PREFIX.size() && + name.substr(0, MEMSET_PREFIX.size()) == MEMSET_PREFIX) || + (name.size() > LLVM_MEMSET_PREFIX.size() && + name.substr(0, LLVM_MEMSET_PREFIX.size()) == + LLVM_MEMSET_PREFIX); + } + return false; +} + +bool is_memcopy_variety(CallInst *inst) { + Function *function = inst->getCalledFunction(); + if (function != NULL) { + StringRef name = function->getName(); + return name.size() > MEMCOPY_PREFIX.size() && + name.substr(0, MEMCOPY_PREFIX.size()) == MEMCOPY_PREFIX; + } + return false; +} + +bool is_memmove_variety(CallInst *inst) { + Function *function = inst->getCalledFunction(); + if (function != NULL) { + StringRef name = function->getName(); + return name.size() > MEMMOVE_PREFIX.size() && + name.substr(0, MEMMOVE_PREFIX.size()) == MEMMOVE_PREFIX; + } + return false; +} + +bool call_is_not_sqrt(CallInst *inst) { + Function *function = inst->getCalledFunction(); + if (function != NULL) { + return !(function->getName() == SQRT32_FUNCTION_NAME || + function->getName() == SQRT64_FUNCTION_NAME); + } + return true; // just assume it is not a sqrt. This means no optimization + // will be done +} + /** - * DFS backwards from current instruction to see if any past insdtruction - * matches match_instr. + * True iff an instruction is "vectorizable" + */ +bool can_vectorize(Value *value) { + // TODO: + Instruction *instr = dyn_cast(value); + assert(instr != NULL); + if (instr->getOpcode() == Instruction::FAdd) { + return true; + } else if (instr->getOpcode() == Instruction::FSub) { + return true; + } else if (instr->getOpcode() == Instruction::FDiv) { + return true; + } else if (instr->getOpcode() == Instruction::FMul) { + return true; + } else if (instr->getOpcode() == Instruction::FNeg) { + return true; + } else if (isa(instr)) { + return true; + } else if (isa(instr)) { + return true; + } else if (isa(instr)) { + return true; + } + // else if (isa_sqrt32(wrap(instr))) { + // return true; + // } + return false; +} + +/** + * True iff an instruction is a mem intrinsic. + */ +bool isa_mem_intrinsic(Instruction *instr) { + if (isa(instr)) { + return true; + } else if (isa(instr)) { + return true; + } else if (isa(instr)) { + return true; + } else if (isa(instr)) { + // hopefully this covers all memory intrinsics + return true; + } + return false; +} + +/** + * True iff 2 addresses MIGHT alias. * - * Terminates when no more previous expressions, or reaches a - * cosntant/argument/alloca instruction in LLVM. + * LLVM has a edge case when comparing the same pointer, which is why there is a + * MustAlias check + */ +bool may_alias(Value *addr1, Value *addr2, AliasAnalysis *AA) { + // IDK why I have to check both, but + // something about comparing a address + // to itself causes this?!~, problem + // first found in LSMovement + return (!AA->isNoAlias(addr1, + LocationSize::precise( + addr1->getType()->getPrimitiveSizeInBits()), + addr2, + LocationSize::precise( + addr2->getType()->getPrimitiveSizeInBits())) || + AA->isMustAlias(addr1, addr2)); +} + +using chunk_t = std::vector; +using chunks_t = std::vector>; + +/** + * True iff is a special type of instruction for chunking * - * Searches for consecutive load/store instructions to same addresses, - * which LLVM generates at -01 optimization. */ -bool dfs_llvm_instrs(User *current_instr, User *match_instr) { - if (current_instr == NULL) { - return false; +bool isa_special_chunk_instr(Instruction *instr) { + return isa_mem_intrinsic(instr) || isa(instr) || + isa(instr) || isa(instr); +} + +/* +Build chunks of instructions + +A chunk is the longest contiguous section of instructions that ends in a +sequence of stores. + +A chunk does not need to contain a store instruction. + +Assumes: LoadStoreMovement pass is run before the Diospyros pass +**/ +std::vector> build_chunks(BasicBlock *B, + AliasAnalysis *AA) { + std::vector> chunks = {}; + + bool has_seen_store = false; + bool stores_alias_in_chunk = false; + std::vector curr_chunk = {}; + + // Track Last Stores seen + std::vector last_stores = {}; + for (auto &I : *B) { + // the first two cases are meant to create chunks with non-handled + // instructions + if (has_seen_store && isa_special_chunk_instr(&I)) { + if (curr_chunk.size() > 0 && !stores_alias_in_chunk) { + chunks.push_back(curr_chunk); + } + has_seen_store = false; + stores_alias_in_chunk = false; + curr_chunk = {}; + last_stores = {}; + curr_chunk.push_back(&I); + chunks.push_back(curr_chunk); + curr_chunk = {}; + } else if (!has_seen_store && isa_special_chunk_instr(&I)) { + if (curr_chunk.size() > 0 && !stores_alias_in_chunk) { + chunks.push_back(curr_chunk); + } + has_seen_store = false; + stores_alias_in_chunk = false; + curr_chunk = {}; + last_stores = {}; + curr_chunk.push_back(&I); + chunks.push_back(curr_chunk); + curr_chunk = {}; + } else if (!has_seen_store && isa(I) && + !isa_special_chunk_instr(&I)) { + has_seen_store = true; + curr_chunk.push_back(&I); + last_stores.push_back(&I); + } else if (!has_seen_store && !isa(I) && + !isa_special_chunk_instr(&I)) { + curr_chunk.push_back(&I); + } else if (has_seen_store && !isa(I) && + !isa_special_chunk_instr(&I)) { + if (curr_chunk.size() > 0 && !stores_alias_in_chunk) { + chunks.push_back(curr_chunk); + } + has_seen_store = false; + stores_alias_in_chunk = false; + curr_chunk = {}; + last_stores = {}; + curr_chunk.push_back(&I); + } else { // has seen store and is a store instruction + Value *curr_store_addr = I.getOperand(1); + for (auto other_store : last_stores) { + if (other_store != &I) { + Value *other_store_addr = other_store->getOperand(1); + if (may_alias(curr_store_addr, other_store_addr, AA)) { + stores_alias_in_chunk = true; + } + } + } + curr_chunk.push_back(&I); + last_stores.push_back(&I); + } } - if (current_instr == match_instr) { - return true; + if (curr_chunk.size() > 0 && !stores_alias_in_chunk) { + chunks.push_back(curr_chunk); + } + + // Filter to make sure no chunks are empty + chunks_t final_chunks = {}; + for (auto chunk : chunks) { + if (!chunk.empty()) { + final_chunks.push_back(chunk); + } + } + + return final_chunks; +} + +using ad_tree_t = std::vector; +using ad_trees_t = std::vector; + +/** + * Recurse LLVM starts at an LLVM instruction and finds + * all of its arguments, and recursively so on, until + * either a load / number / arg is reached + * + * Non handled instructions are bailed out of by returning a failure + * Instructions with a load or arg that leaks into another chunk + * also leads to a failure bailout. + * + * Returns a Tuple (Success/Failure , Instructions accumulated) + */ +std::pair recurse_llvm( + Value *value, std::set chunk_instrs, + std::set basic_block_instrs, bool not_for_mem_constraint) { + // Constants + if (isa(value)) { + // DO not add constant, if i recall, constants are not llvm + // instructions + return std::make_pair(true, std::vector{}); + } + if (Instruction *instr = dyn_cast(value)) { + if (not_for_mem_constraint) { + // No Longer in Chunk + if (chunk_instrs.count(instr) == 0) { + return std::make_pair(false, {}); + } + } else { + // No Longer in Basic Block + if (basic_block_instrs.count(instr) == 0) { + return std::make_pair(false, {}); + } + } + + // Base case instructions + if (isa(instr) || isa(instr)) { + // there should not be a load isntr when checking memory instrs + if (!not_for_mem_constraint && isa(instr)) { + return std::make_pair(false, std::vector{instr}); + } + return std::make_pair(true, std::vector{instr}); + } + + // allow for alloca in mem constraint checking + if (!not_for_mem_constraint && isa(instr)) { + return std::make_pair(true, std::vector{instr}); + } + + // Phi is trouble, stop at Phis - previously caused recursion to fill + // stack, and also change results. + if (isa(instr)) { + return std::make_pair(false, std::vector{instr}); + } + + // Recurse on Store Instructions + if (isa(instr) && + instr->getOperand(0)->getType()->isFloatTy()) { + auto [child_b, child_tree] = + recurse_llvm(instr->getOperand(0), chunk_instrs, + basic_block_instrs, not_for_mem_constraint); + if (child_b) { + child_tree.push_back(instr); + return std::make_pair(true, child_tree); + } + } + + // Recurse on supported unary operators OR Store Instructions + if (instr->getOpcode() == Instruction::FNeg) { + auto [child_b, child_tree] = + recurse_llvm(instr->getOperand(0), chunk_instrs, + basic_block_instrs, not_for_mem_constraint); + if (child_b) { + child_tree.push_back(instr); + return std::make_pair(true, child_tree); + } + } + + // Recurse on supported binary operators + if (instr->getOpcode() == Instruction::FAdd || + instr->getOpcode() == Instruction::FSub || + instr->getOpcode() == Instruction::FDiv || + instr->getOpcode() == Instruction::FMul) { + auto [left_b, left_tree] = + recurse_llvm(instr->getOperand(0), chunk_instrs, + basic_block_instrs, not_for_mem_constraint); + auto [right_b, right_tree] = + recurse_llvm(instr->getOperand(1), chunk_instrs, + basic_block_instrs, not_for_mem_constraint); + if (left_b && right_b) { + left_tree.insert(left_tree.end(), right_tree.begin(), + right_tree.end()); + left_tree.push_back(instr); + return std::make_pair(true, left_tree); + } + } + } + + if (not_for_mem_constraint) { + // Unhandled Instruction + return std::make_pair(false, std::vector{}); + } + + if (Instruction *value_as_instr = dyn_cast(value)) { + std::vector combined_instrs = {}; + bool combined_result = true; + for (auto &operand : value_as_instr->operands()) { + auto [child_result, child_tree] = + recurse_llvm(operand, chunk_instrs, basic_block_instrs, + not_for_mem_constraint); + combined_result = combined_result && child_result; + combined_instrs.insert(combined_instrs.end(), child_tree.begin(), + child_tree.end()); + } + return std::make_pair(combined_result, combined_instrs); } - if (isa(current_instr) || isa(current_instr) || - isa(current_instr)) { + return std::make_pair(true, std::vector{}); +} + +bool check_single_memory_address_constraint( + Value *memory_address_value, ad_trees_t prior_ad_trees, + std::set basic_block_instrs) { + auto [success, accumulated_instrs] = + recurse_llvm(memory_address_value, {}, basic_block_instrs, false); + // success only if all instructions are inside the same basic block + // success also only if instructions tree has no memory operaitons + // except for alloc/argument + if (!success) { return false; } - bool result = false; - // special case for loads: check if prev is store and continue - // in fact this test will VERY LIKELY lead to errors on some well-crafted - // test cases if LLVM decides to load and store values to the same locations - // multiple times, this could mess up the final result badly, if the some of - // the previous values need to be stored back before revectorizing after. - if (auto load_instr = dyn_cast(current_instr)) { - if (auto prev_node = load_instr->getPrevNode()) { - if (auto store_instr = dyn_cast(prev_node)) { - Value *load_pointer_operand = load_instr->getPointerOperand(); - Value *store_pointer_operand = store_instr->getPointerOperand(); - if (load_pointer_operand == store_pointer_operand) { - Value *value_operand = store_instr->getValueOperand(); - - auto user_cast = dyn_cast(value_operand); - result |= dfs_llvm_instrs(user_cast, match_instr); - user_cast = dyn_cast(store_pointer_operand); - result |= dfs_llvm_instrs(user_cast, match_instr); - return result; + bool contained_in_prior_ad_tree = false; + for (auto instr : accumulated_instrs) { + for (auto prior_ad_tree : prior_ad_trees) { + for (auto prior_instr : prior_ad_tree) { + if (instr == prior_instr) { + contained_in_prior_ad_tree = true; } } } - return false; } - // remainder of instructions, besides stores - for (auto i = 0; i < current_instr->getNumOperands(); i++) { - Value *operand = current_instr->getOperand(i); - auto user_cast = dyn_cast(operand); - if (user_cast == NULL) { - throw std::invalid_argument("Could not convert Value * to User *"); + return !contained_in_prior_ad_tree; +} + +/** + Check each memory address for each memory operation in each ad tree + satisfies the following constraints: + 1. address computation tree contains no memory operations except for + alloc / argument + 2. each address computation instruction is not contained in a prior ad + tree + 3. each address computation only exists within 1 single basic block +*/ +bool check_memory_constraints(ad_tree_t curr_ad_tree, ad_trees_t prior_ad_trees, + std::set basic_block_instrs) { + bool constraint_success = true; + for (auto instr : curr_ad_tree) { + if (StoreInst *store = dyn_cast(instr)) { + Value *store_pointer = store->getPointerOperand(); + if (!check_single_memory_address_constraint( + store_pointer, prior_ad_trees, basic_block_instrs)) { + constraint_success = false; + if (!constraint_success) { + } + break; + } + } else if (LoadInst *load = dyn_cast(instr)) { + Value *load_pointer = load->getPointerOperand(); + if (!check_single_memory_address_constraint( + load_pointer, prior_ad_trees, basic_block_instrs)) { + constraint_success = false; + break; + } + } + } + return constraint_success; +} + +/** + * An AD Tree is just a vector of instructions reachable from a unique store + * instruction + * + */ +ad_trees_t build_ad_trees(chunk_t chunk, + std::set basic_block_instrs) { + ad_trees_t ad_trees = {}; + std::set chunk_instrs = {}; + for (auto instr : chunk) { + chunk_instrs.insert(instr); + } + for (auto instr : chunk) { + if (isa(instr)) { + // ad_tree_t new_tree = {}; + auto [success_b, ad_tree] = + recurse_llvm(instr, chunk_instrs, {}, true); + if (success_b) { + assert(ad_tree.size() != 0); + } else { + continue; + } + + // Check each memory address for each memory operation in each + // ad tree + bool mem_constraint_result = + check_memory_constraints(ad_tree, ad_trees, basic_block_instrs); + + if (mem_constraint_result) { + ad_trees.push_back(ad_tree); + } } - result |= dfs_llvm_instrs(user_cast, match_instr); } + return ad_trees; +} + +/** + * Joins adtrees together into vecotrs of instructions + * + */ +std::vector join_trees( + std::vector> trees_to_join) { + std::vector final_vector = {}; + for (auto tree : trees_to_join) { + final_vector.insert(final_vector.end(), tree.begin(), tree.end()); + } + return final_vector; +} + +/** + * True iff there is some load in a joined section of adtrees that MIGHT + * alias a store in the same tree. + * + * Load-store aliasing causes problems in some situation where you have + * stores as functions of the same loads, but no vectoriszation occurs, so + * the code is rewritten linearly, and a memory dependency is introduced + * + * From a bug in FFT.c + */ +chunks_t remove_load_store_alias(chunks_t chunks, AliasAnalysis *AA) { + chunks_t final_chunks = {}; + + std::vector> load_addresses = {}; + std::vector> store_addresses = {}; + int chunk_idx = 0; + for (auto chunk : chunks) { + chunk_idx++; + + for (auto instr : chunk) { + if (isa(instr)) { + Value *load_address = + dyn_cast(instr)->getPointerOperand(); + load_addresses.push_back({load_address, chunk_idx}); + } else if (isa(instr)) { + Value *store_address = + dyn_cast(instr)->getPointerOperand(); + store_addresses.push_back({store_address, chunk_idx}); + } + } + bool can_add_to_final_chunks = true; + for (auto [load_address, chunk_idx_load] : load_addresses) { + for (auto [store_address, chunk_idx_store] : store_addresses) { + if ((chunk_idx_load != + chunk_idx_store) && // if thhe load and store come + // from the same chunk, they + // cannot alias in a problem from + // the vectorizaiton as the loads + // will still come before stores + may_alias(load_address, store_address, AA)) { + can_add_to_final_chunks = false; + } + } + } + if (can_add_to_final_chunks) { + final_chunks.push_back(chunk); + } + } + return final_chunks; +} + +/** + * return the index to in baseOfArrayVec that store is an offset from, or + * NULLOPT if not matching + */ +std::pair get_base_reference(Instruction *mem_instr, + std::vector base_of_array_vec, + ScalarEvolution *SE) { + for (int i = 0; i < base_of_array_vec.size(); i++) { + Value *base_array_ptr = base_of_array_vec[i]; + assert(base_array_ptr->getType()->isPointerTy()); + Value *mem_instr_ptr = NULL; + if (StoreInst *store_instr = dyn_cast(mem_instr)) { + mem_instr_ptr = store_instr->getPointerOperand(); + } else if (LoadInst *load_instr = dyn_cast(mem_instr)) { + mem_instr_ptr = load_instr->getPointerOperand(); + } + const SCEV *mem_instr_ptr_se = SE->getSCEV(mem_instr_ptr); + const SCEV *base_ptr_se = SE->getSCEV(base_array_ptr); + const SCEV *diff = SE->getMinusSCEV(mem_instr_ptr_se, base_ptr_se); + APInt min_val = SE->getSignedRangeMin(diff); + APInt max_val = SE->getSignedRangeMax(diff); + if (min_val == max_val) { + int val = (int)max_val.roundToDouble(); + return {i, val}; + } + } + return {-1, -1}; +} + +// Check Alignment +inline bool is_aligned(int diff_from_base) { return diff_from_base % 16 == 0; } + +/** + * Given a group of stores trees, greedily assign the store trees into + * new sets of store trees such that each set is all consecutive and aligned + * Returns a the sets of groups of stores trees with this property. + */ +std::vector group_trees( + ad_trees_t group_of_trees, std::map store_to_offset) { + std::vector trees_used = {}; + for (auto _ : group_of_trees) { + trees_used.push_back(false); + } + + std::vector result = {}; + uint32_t start_offset = 0; + while (std::any_of(trees_used.begin(), trees_used.end(), + [](bool b) { return !b; })) { + // get the smallest starting offset + uint32_t min_offset = UINT32_MAX; + for (int i = 0; i < group_of_trees.size(); i++) { + if (trees_used[i]) { + continue; + } + auto tree = group_of_trees[i]; + StoreInst *store = dyn_cast(tree.back()); + int offset = store_to_offset[store] / FLOAT_SIZE_IN_BYTES; + if (offset < min_offset) { + min_offset = offset; + } + } + min_offset = min_offset - (min_offset % VECTOR_WIDTH); + std::set required_offsets = {}; + for (int i = min_offset; i < min_offset + VECTOR_WIDTH; i++) { + required_offsets.emplace(i); + } + ad_trees_t current_group = {}; + for (int i = 0; i < VECTOR_WIDTH; i++) { + current_group.push_back({}); + } + std::set current_offsets = {}; + for (int i = 0; i < group_of_trees.size(); i++) { + if (trees_used[i]) { + continue; + } + auto tree = group_of_trees[i]; + StoreInst *store = dyn_cast(tree.back()); + int offset = store_to_offset[store] / FLOAT_SIZE_IN_BYTES; + int rounded_offset = offset % 4; + if (required_offsets.count(offset) != 0 && + current_offsets.count(offset) == 0) { + current_offsets.emplace(offset); + trees_used[i] = true; + current_group[rounded_offset] = tree; + } + } + bool can_add_result = true; + for (auto curr_tree : current_group) { + if (curr_tree.empty()) { + can_add_result = false; + } + } + if (can_add_result) { + result.push_back(current_group); + } + } + + return result; +} + +/** + * Sort the stores in the ad_trees so that an aligned store + * is first, followed by consecutive stores + */ +ad_trees_t sort_ad_trees(ad_trees_t ad_trees, + std::vector base_of_array_vec, + ScalarEvolution *SE) { + // First, group ad_trees according to the base array they belong to. + // If a tree does not reference a base array, exclude that tree entirely + std::vector> groups_of_trees = {}; + for (int i = 0; i < base_of_array_vec.size(); i++) { + groups_of_trees.push_back({}); + } + + std::map store_to_base_map = {}; + for (ad_tree_t ad_tree : ad_trees) { + if (ad_tree.size() != 0) { + if (StoreInst *store = dyn_cast(ad_tree.back())) { + auto [base_ref, _] = + get_base_reference(store, base_of_array_vec, SE); + if (base_ref >= 0) { + groups_of_trees[base_ref].push_back(ad_tree); + store_to_base_map[store] = base_ref; + } + } + } + } + + auto store_sorter = [=](const ad_tree_t &a, const ad_tree_t &b) { + StoreInst *store_a = dyn_cast(a.back()); + StoreInst *store_b = dyn_cast(b.back()); + + // get the base references + Value *ref_a = base_of_array_vec[store_to_base_map.at(store_a)]; + Value *ref_b = base_of_array_vec[store_to_base_map.at(store_b)]; + + // get the difference from the store to its reference + Value *store_a_ptr = store_a->getPointerOperand(); + const SCEV *store_a_ptr_se = SE->getSCEV(store_a_ptr); + const SCEV *ref_a_ptr_se = SE->getSCEV(ref_a); + const SCEV *diff_a = SE->getMinusSCEV(store_a_ptr_se, ref_a_ptr_se); + APInt min_val_a = SE->getSignedRangeMin(diff_a); + APInt max_val_a = SE->getSignedRangeMax(diff_a); + assert(min_val_a == max_val_a); + int val_a = (int)max_val_a.roundToDouble(); + + Value *store_b_ptr = store_b->getPointerOperand(); + const SCEV *store_b_ptr_se = SE->getSCEV(store_b_ptr); + const SCEV *ref_b_ptr_se = SE->getSCEV(ref_b); + const SCEV *diff_b = SE->getMinusSCEV(store_b_ptr_se, ref_b_ptr_se); + APInt min_val_b = SE->getSignedRangeMin(diff_b); + APInt max_val_b = SE->getSignedRangeMax(diff_b); + assert(min_val_b == max_val_b); + int val_b = (int)max_val_b.roundToDouble(); + + return val_a < val_b; + }; + + // Sort each group of ad_trees by the stores in each group + for (int i = 0; i < groups_of_trees.size(); i++) { + // NO IDEA WHY THIS WORKS, BUT ITERATING OVER ELEMENTS I SORTS + // PROPERLY BUT ITERATING OVER USING COLON DOES NOT! + std::sort(groups_of_trees[i].begin(), groups_of_trees[i].end(), + store_sorter); + } + + // Build a map mapping stores to their respective offsets + std::map store_to_offset = {}; + for (auto group : groups_of_trees) { + // skip empty groups + if (group.empty()) { + continue; + } + for (auto tree : group) { + // Grab basic information about the tree + StoreInst *store = dyn_cast(tree.back()); + // Get base ref for the first store + Value *base_ref = base_of_array_vec[store_to_base_map.at(store)]; + // get the difference from the store to its reference + Value *store_ptr = store->getPointerOperand(); + const SCEV *store_ptr_se = SE->getSCEV(store_ptr); + const SCEV *ref_ptr_se = SE->getSCEV(base_ref); + const SCEV *diff = SE->getMinusSCEV(store_ptr_se, ref_ptr_se); + APInt min_val = SE->getSignedRangeMin(diff); + APInt max_val = SE->getSignedRangeMax(diff); + assert(min_val == max_val); + int offset = (int)max_val.roundToDouble(); + store_to_offset[store] = offset; + } + } + + // Grab only ad_trees that contain a 16 byte aligned reference at the + // beginning + // Also the trees must be consecutive stores, e.g. the stores must + // differ by 4 bytes each time Finally, split the trees into smaller + // subtrees of size 4 + + // We do this by accumulating a running sequence of ad_trees that + // satisfy the prerequisite conditions above + + std::vector> pruned_groups_of_trees = {}; + for (auto group : groups_of_trees) { + // skip empty groups + if (group.empty()) { + continue; + } + + std::vector new_groups_of_trees = + group_trees(group, store_to_offset); + for (auto new_group : new_groups_of_trees) { + pruned_groups_of_trees.push_back(new_group); + } + } + + // Compress group of trees back into 1 ad_tree + ad_trees_t result = {}; + for (auto group_of_trees : pruned_groups_of_trees) { + chunk_t combined_chunk = join_trees(group_of_trees); + int num_stores = 0; + for (auto instr : combined_chunk) { + if (isa(instr)) { + num_stores++; + } + } + assert(num_stores == VECTOR_WIDTH); + result.push_back(combined_chunk); + } + return result; } /** - * Main method to call dfs llvm_value + * Converts chunks into vectors, representing joined AD Trees + * + */ +std::vector> chunks_into_joined_trees( + chunks_t chunks, AliasAnalysis *AA, std::vector base_of_array_vec, + ScalarEvolution *SE, std::set basic_block_instrs) { + std::vector> trees = {}; + for (auto chunk : chunks) { + ad_trees_t ad_trees = build_ad_trees(chunk, basic_block_instrs); + + // Join trees if the store instructions in the trees + // do not alias each other + std::vector> joinable_trees = {}; + std::vector>> tree_groups = {}; + for (auto tree : ad_trees) { + // check if stores alias in the trees + assert(tree.size() > 0); + Instruction *curr_store = tree.back(); + Value *curr_store_addr = curr_store->getOperand(1); + bool can_add_tree = true; + for (auto other_tree : joinable_trees) { + assert(other_tree.size() > 0); + Instruction *other_store = other_tree.back(); + Value *other_store_addr = other_store->getOperand(1); + if (may_alias(curr_store_addr, other_store_addr, AA)) { + can_add_tree = false; + break; + } + } + if (can_add_tree) { + joinable_trees.push_back(tree); + } else { + assert(joinable_trees.size() > 0); + tree_groups.push_back(joinable_trees); + joinable_trees = {tree}; + } + } + if (joinable_trees.size() > 0) { + tree_groups.push_back(joinable_trees); + } + + // Rearrange the joinable trees by changing their store ordering + // Then Merge Joinable trees into trees + for (auto tree_group : tree_groups) { + ad_trees_t new_ad_trees = + sort_ad_trees(tree_group, base_of_array_vec, SE); + for (auto chunk : new_ad_trees) { + trees.push_back(chunk); + } + } + } + // Do final removal of any sequences with store-load aliasing + return remove_load_store_alias(trees, AA); +} + +/* +Build AD Trees for each Chunk +**/ + +/// Map instr2ref over a vector +std::vector> instr2ref(chunks_t chunks) { + std::vector> mapped_instrs = {}; + for (auto chunk : chunks) { + std::vector mapped_chunk = {}; + for (auto instr : chunk) { + mapped_chunk.push_back(wrap(instr)); + } + mapped_instrs.push_back(mapped_chunk); + } + return mapped_instrs; +} + +/** + * Run Optimization Procedure on Vector representing concatenated ad trees + * + */ +bool run_optimization(std::vector chunk, Function &F, + std::vector load_info) { + assert(chunk.size() != 0); + // Place the builder at the last instruction in the entire chunk. + Value *last_value = unwrap(chunk.back()); + Instruction *last_instr = dyn_cast(last_value); + assert(last_instr != NULL); + IRBuilder<> builder(last_instr); + + Module *mod = F.getParent(); + LLVMContext &context = F.getContext(); + std::vector restricted_instrs = {}; + + return optimize(wrap(mod), wrap(&context), wrap(&builder), chunk.data(), + chunk.size(), restricted_instrs.data(), + restricted_instrs.size(), load_info.data(), + load_info.size(), RunOpt, PrintOpt); +} + +/** + * Match each load with a pair of base id and offset + * + * NOTE: A load might be associated with more than 1 base, we choose the + * first. THIS COULD BE A BUG in the future! */ -extern "C" bool dfs_llvm_value_ref(LLVMValueRef current_instr, - LLVMValueRef match_instr) { - auto current_user = dyn_cast(unwrap(current_instr)); - auto match_user = dyn_cast(unwrap(match_instr)); - if (current_user == NULL || match_user == NULL) { - throw std::invalid_argument("Could not convert Value * to User *"); +std::vector match_loads(std::vector loads, + std::vector base_load_locations, + ScalarEvolution *SE) { + std::vector results = {}; + for (LoadInst *load : loads) { + bool continue_iteration = false; + for (Value *base_loc : base_load_locations) { + auto [load_base, load_offset] = + get_base_reference(load, base_load_locations, SE); + if (load_base >= 0) { + load_info_t new_load = {.load = wrap(load), + .base_id = load_base, + .offset = static_cast( + load_offset / FLOAT_SIZE_IN_BYTES)}; + results.push_back(new_load); + continue_iteration = true; + break; + } + } + if (continue_iteration) { + continue; + } } - return dfs_llvm_instrs(current_user, match_user); + return results; } /** @@ -496,103 +1328,99 @@ struct DiospyrosPass : public FunctionPass { static char ID; DiospyrosPass() : FunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + virtual bool runOnFunction(Function &F) override { - // do not optimize on main function. - if (F.getName() == "main") { + // We need Alias Analysis still, because it is possible groups of + // stores can addresses that alias. + AliasAnalysis *AA = &getAnalysis().getAAResults(); + ScalarEvolution *SE = + &getAnalysis().getSE(); + TargetLibraryInfo *TLI = + &getAnalysis().getTLI(F); + + // do not optimize on main function or no_opt functions. + if (F.getName() == MAIN_FUNCTION_NAME || + (F.getName().size() > NO_OPT_PREFIX.size() && + F.getName().substr(0, NO_OPT_PREFIX.size()) == NO_OPT_PREFIX)) { return false; } - bool has_changes = false; - for (auto &B : F) { - // We skip over basic blocks without floating point types - bool has_float = false; - for (auto &I : B) { - if (I.getType()->isFloatTy()) { - has_float = true; + + // get all "Base" Arrays on which vectorization can occur. These are + // defined as argument inputs with a pointer type + std::vector base_of_array_vec = {}; + for (auto &a : F.args()) { + if (a.getType()->isPointerTy()) { + if (Value *arg_val = dyn_cast(&a)) { + base_of_array_vec.push_back(arg_val); } } - if (!has_float) { - continue; + } + + // Grab information on load base locations + std::vector base_load_locations = {}; + for (auto &a : F.args()) { + if (a.getType()->isPointerTy()) { + if (Value *arg_val = dyn_cast(&a)) { + base_load_locations.push_back(arg_val); + } } - // We also skip over all basic blocks without stores - bool has_store = false; + } + for (auto &B : F) { for (auto &I : B) { - if (auto *op = dyn_cast(&I)) { - has_store = true; + if (Value *V = dyn_cast(&I)) { + if (isMallocOrCallocLikeFn(V, TLI)) { + base_load_locations.push_back(V); + } } } - if (!has_store) { - continue; - } - - // Grab the terminator from the LLVM Basic Block - Instruction *terminator = B.getTerminator(); - Instruction *cloned_terminator = terminator->clone(); + } + std::map base_load_to_id = {}; + int count = 0; + for (auto instr : base_load_locations) { + base_load_to_id[instr] = count++; + } - std::vector> vectorization_accumulator; - std::vector inner_vector = {}; - std::set store_locations; - std::vector bb_instrs = {}; + // Grab information on loads + std::vector loads = {}; + for (auto &B : F) { for (auto &I : B) { - if (auto *op = dyn_cast(&I)) { - Value *store_loc = op->getOperand(1); - store_locations.insert(store_loc); - inner_vector.push_back(wrap(op)); - } else if (auto *op = dyn_cast(&I)) { - Value *load_loc = op->getOperand(0); - if (!inner_vector.empty()) { - vectorization_accumulator.push_back(inner_vector); - } - inner_vector = {}; - store_locations.clear(); - } - bb_instrs.push_back(dyn_cast(&I)); - } - vectorization_accumulator.push_back(inner_vector); - - // Acquire each of the instructions in the "run" that terminates at - // a store We will send these instructions to optimize. - - int vec_length = vectorization_accumulator.size(); - int counter = 0; - std::vector translated_exprs = {}; - for (auto &vec : vectorization_accumulator) { - ++counter; - if (not vec.empty()) { - has_changes = has_changes || true; - Value *last_store = unwrap(vec.back()); - IRBuilder<> builder(dyn_cast(last_store)); - Instruction *store_instr = - dyn_cast(last_store); - assert(isa(store_instr)); - builder.SetInsertPoint(store_instr); - builder.SetInsertPoint(&B); - Module *mod = F.getParent(); - LLVMContext &context = F.getContext(); - VectorPointerSize pair = optimize( - wrap(mod), wrap(&context), wrap(&builder), vec.data(), - vec.size(), translated_exprs.data(), - translated_exprs.size()); - int size = pair.llvm_pointer_size; - - LLVMPair const *expr_array = pair.llvm_pointer; - translated_exprs = {}; - for (int i = 0; i < size; i++) { - translated_exprs.push_back(expr_array[i]); + if (LoadInst *load_instr = dyn_cast(&I)) { + if (std::find(loads.begin(), loads.end(), load_instr) == + loads.end()) { + loads.push_back(load_instr); } } } - std::reverse(bb_instrs.begin(), bb_instrs.end()); - for (auto &I : bb_instrs) { - if (I->isTerminator()) { - I->eraseFromParent(); - } else if (isa(I)) { - I->eraseFromParent(); + } + std::vector load_info = + match_loads(loads, base_load_locations, SE); + + bool has_changes = true; + for (auto &B : F) { + // Grab instructions in basic block + std::set basic_block_instrs = {}; + for (auto &I : B) { + basic_block_instrs.insert(&I); + } + + auto chunks = build_chunks(&B, AA); + auto trees = chunks_into_joined_trees(chunks, AA, base_of_array_vec, + SE, basic_block_instrs); + auto treerefs = instr2ref(trees); + + for (auto tree_chunk : treerefs) { + if (tree_chunk.size() != 0) { + has_changes = run_optimization(tree_chunk, F, load_info); } } - BasicBlock::InstListType &final_instrs = B.getInstList(); - final_instrs.push_back(cloned_terminator); } - return true; + return has_changes; }; }; } // namespace @@ -605,5 +1433,13 @@ static void registerDiospyrosPass(const PassManagerBuilder &, legacy::PassManagerBase &PM) { PM.add(new DiospyrosPass()); } + +static RegisterPass X("diospyros", "Diospyros Pass", + false /* Only looks at CFG */, + true /* Analysis Pass */); + static RegisterStandardPasses RegisterMyPass( PassManagerBuilder::EP_EarlyAsPossible, registerDiospyrosPass); + +// TODO check that no gep have a load in the calculation chain or some +// memory address \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/diospyros.hpp b/src/dios-egraphs/Diospyros/diospyros.hpp deleted file mode 100644 index 17286add..00000000 --- a/src/dios-egraphs/Diospyros/diospyros.hpp +++ /dev/null @@ -1,17 +0,0 @@ -#include "llvm/IR/Argument.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/User.h" -#include "llvm/Pass.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" -#include "llvm/Transforms/Scalar/LoopUnrollPass.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" - -extern "C" void optimize(LLVMModuleRef mod, LLVMContextRef context, - LLVMBuilderRef builder, LLVMValueRef const *bb, - std::size_t size); \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/documentation.md b/src/dios-egraphs/Diospyros/documentation.md new file mode 100644 index 00000000..91bdda02 --- /dev/null +++ b/src/dios-egraphs/Diospyros/documentation.md @@ -0,0 +1,100 @@ +# Documentation + +This is the documentation for work on Diospyros for LLVM, up until the end of the Spring 2022 semester. Below, documentation and design and decisions are split by file name. + +## Diospyros.cpp + +Diospyros.cpp is the starting point for the vectorization process. This pass is run on functions in the basic block, and only on functions that are not named `main` nor have the prefix `no_opt_` attached to their name. In addition, there are a multitude of `isa`-style functions at in this file, which are used on the Rust side to check instruction type. These `isa` functions are used because the Rust LLVM-Core library `isa` functions do not return booleans, instead returning `LLVMValueRefs`, which one cannot branch on. + +The heart of the Diospyros.cpp finds __runs__ of vectorizable instructions, which are then sent to the Diospyros rewriter. Vectorible instructions are instructions that are containing `FAdd`, `FSub`, `FMul`, `FDiv` or `FNeg` instruction types. Runs of vectorizable instructions are consecutive vectorizable instructions that occur before a `StoreInst` is detected in the basic block, or before a `LoadInst` is detected in the basic block. The first condition, to be before a `StoreInst`, is because the store may use the result of the vectorized computation. The second condition, to be before a `LoadInst`, is because the `load` may alias with a `store`, causing a read-write conflict. After a run is found, it is sent via the `optimize` function to be optimized by the Rust-side of the pass. + +## LoadStoreMovement.cpp + +Load Store Movement moves loads forward towards the beginning of a basic block, and stores backwards, towards the end of a basic block. Load store movement depends heavily on alias analysis. As a result, alias analysis is required to be run **before** the load store movement pass, as the load store movement pass repeatedly queries the alias analysis. Load store movement only occurs to functions that are not named `main` nor have the prefix `no_opt_` attached to their name. + +Generally, the algorithm works as follows: a load or a store is chosen, and call this instruction `I`. Under certain conditions, `I` may be swapped with its neighbor. If conditions are correct, then the swap occurs. Swapping continues until no more swaps are possible. This occurs to all load and store instructions in the basic block. + +As a technical matter relating to LLVM, and as a note to myself for future implementation issues, replacing all uses in LLVM does not actually fix back pointers for PHI Nodes. Instead, `replaceAllUsesWith()` is the preferred approach, and does not cause crashes when the LLVM Pass is run. + +As a second note, insertion of the cloned instruction must occur before any changes to the cloned instruction are effected. + +As a third note, when doing alias analysis, one must make sure that instructions that are pointed typed are being compared. To make sure of this, I use the `mayReadOrWriteMemory()` function as a guard. I then use the `isNoAlias()` function to help calculate aliasing. + +As a fourth note, several instructions are not handled as optimally as possible. In particular, there may be calls to intrinsics like `@llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(40) %2, i8 0, i64 40, i1 false)` or `@memset_pattern16(i8* nonnull %2, i8* bitcast([4 x float]* @.memset_pattern to i8*), i64 40) #6`. However, I do not actually check pointer aliasing with these instructions. Call Instructions are treated as black boxes, which are assumed to always cause aliasing issues. This means that any memory intrinsics are always conservatively assumed to alias with a load or a store instruction, and no swap will occur. I intend to fix this, by iterating over call arguments to check whether each argument is a pointer or not, and the check aliasing with the store or load instruction. This will be more accurate and fined grained, and eliminate instances where a swap is not allowed to occur, when in fact the swap does not affect the semantics of the program. + +Finally, as a fifth note, one must insert cloned PHI Nodes **before** any prior PHI nodes in the basic block. Likewise, one must insert any cloned terminator instructions **after** any existing terminator instructions in the basic block (which always exists in LLVM). This means that the builder must shift locations constantly. As part of the implementation, I place the builder at the end of the basic block, move it before the first instruction when inserting cloned PHI Nodes, and then move it to the end of the basic block again, when inserting the remainder of the instructions, including terminator instructions. + +### Store Movement + +A store can be moved towards the end of a basic block, if the next instruction proceeding the store exists, is not a terminator, is not a call instruction of any sort, and is not an instruction using a pointer `p'`, which may alias with pointer `p` in the store instruction. If all of the prior conditions are met, the store is swapped with the proceeding instruction. The process continues iteratively, until there are no more possible swaps possible. + +### Load Movement + +A load can be moved towards the end of a basic block, if the prior instruction proceeding the store exists, is not a PHI Node, is not a call instruction of any sort, does not define an LLVM register that is used by the Load as an argument, and is not an instruction using a pointer `p'`, which may alias with pointer `p` in the load instruction. If all of the prior conditions are met, the load is swapped with the proceeding instruction. The process continues iteratively, until there are no more possible swaps possible. + +Note the extra condition that regarding the Load argument. This is critical, because the load may use a prior defined value, and we cannot move the load before when the value was defined. + +## lib.rs + +The Diospyros rewriting engine is applied in lib.rs. In particular, this file contains a translation from LLVM to `Egg` VecLang instructions, a translation from `Egg` VecLanf Instructions back to LLVM instructions. One can specify whether to print the `Egg` rewriting output, and whether to run the `Egg` rewriter at all, via a series of flags, passed in from Diospyros.cpp. + +### New VecLang + +The new VecLang now has a register construct, representing a black box register computation. A register represents a computed LLVM Value. It may be used if an LLVM Register is used across multiple runs or basic blocks. The new argument construct is similar to a register construct, and represents an LLVM argument to a function. + +### LLVM To Egg + +Runs are translated from a sequence of LLVM instructions to a graph of Egg nodes. LLVM instructions are recursively translated backwards to Egg nodes. Starting in reverse in ths sequence of LLVM instructions, any translatable instruction (Add, Sub, Mul, Div, Neg, Sqrt), is chosen, and translated backwards from. If an instruction has been translated already, it is not retranslated to an Egg Node. + +Each LLVM instruction is translated to an appropriate Egg Node: + +- Restricted Instructions (instructions that are used in multiple runs/basic blocks): Translated to a Register Node +- Instruction not in the current run: Translated to a register node, because it must have existed already in the basic block +- Binary Operations: Translated to a binary operator node of the correct binary operator, and then each LLVM operand is recursively translated. +- Unary Operations: Similar to Binary Operations +- Sqrt: Translated to a sqrt node, and the operand is recursively translated +- Constants: Translated to a number node +- Arguments: Translated to argument nodes, which are conceptually similar to a register node because they act as black boxes. + +Finally, Egg Nodes are padded to be a multiple of the vector Lane Width, which is usually 4, and the binary operation nodes are added on. + +Useful metadata: + +- llvm2reg: This TreeMap maps an llvm instruction to a register. +- llvm2arg: This treemap maps an llvm argument to a register. +- start_instructions: This is a vector of instructions where llvm2egg translation began +- start_ids: This is the vector of ids corresponding to the start instructions. +- prior_translated_instructions: These are all instructions that had been translated already in the current pass. +- instructions_in_chunk: All instructions in chunk/run (they are synonyms) +- restricted_instructions: All instructions which are not be translated and are to be represented as a register node. + +All metadata from this pass lies in a struct that is passed to all recursive calls in `llvm2egg`. + +### Egg to LLVM + +The graph of Egg nodes is translated back to LLVM instructions, and the LLVM instructions are built and inserted in place. + +Beginning at the last egg node, translation from Egg to LLVM occurs. After this, extracted values are pulled out to replace each of the original start instructions. We replace all used with the extracted value, then delete the start instruction. + +**NOTE: We Assume Egg rewriter will maintain relative positions of elements in vector**. This means we assume the Egg rewriter does not change where instructions are supposed to be, because we need the extraction to have the instruction in the correct place. + +Egg2LLVM occurs recursively, considering the current Egg Node. + +- Number: LLVM Float Constant Created +- Arg: Argument is pulled from LLVM2Arg metadata +- Register: Register value is pulled from LLVM2Reg metadata +- LitVec: members of the litvec are translated recursively, then a vector is built +- VecAdd/VecSub... : Translate each argument recursively, then build the vec operation on both vectors +- VecConcat: Concatenate two vectors by translating arguments to vectors, then building a shuffle operation in LLVM +- VecNeg/VecSqrt... : Translate arguments appropriately, then build correct LLVM Intrinsic +- VecMac: Translate arguments appropriately, and then build correct LLVM intrinsic +- VecSgn/Sgn/Ite/Get/Or/And/Lt/Symbol/NoOptVec : No translateion provided at the current time + +Metadata for this pass includes: + +- llvm2egg metadata: metadata from llvm2egg pass +- egg_nodes_vector: the vector of egg nodes +- prior_translated_nodes: TreeSet of any egg nodes that had already been translated +- builder +- context +- module \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv.expect b/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv.expect deleted file mode 100644 index ecbca2f2..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/2d-2d-conv.expect +++ /dev/null @@ -1,249 +0,0 @@ - %4 = alloca [2 x float]*, align 8 - %5 = alloca [2 x float]*, align 8 - %6 = alloca [3 x float]*, align 8 - %7 = alloca i32, align 4 - %8 = alloca i32, align 4 - %9 = alloca i32, align 4 - %10 = alloca i32, align 4 - %11 = alloca i32, align 4 - %12 = alloca i32, align 4 - %13 = alloca i32, align 4 - %14 = alloca i32, align 4 - %15 = alloca float, align 4 - store [2 x float]* %0, [2 x float]** %4, align 8 - store [2 x float]* %1, [2 x float]** %5, align 8 - store [3 x float]* %2, [3 x float]** %6, align 8 - store i32 0, i32* %7, align 4 - br label %16 - -16: ; preds = %117, %3 - %17 = load i32, i32* %7, align 4 - %18 = icmp slt i32 %17, 3 - br i1 %18, label %19, label %120 - -19: ; preds = %16 - store i32 0, i32* %8, align 4 - br label %20 - -20: ; preds = %113, %19 - %21 = load i32, i32* %8, align 4 - %22 = icmp slt i32 %21, 3 - br i1 %22, label %23, label %116 - -23: ; preds = %20 - store i32 0, i32* %9, align 4 - br label %24 - -24: ; preds = %109, %23 - %25 = load i32, i32* %9, align 4 - %26 = icmp slt i32 %25, 2 - br i1 %26, label %27, label %112 - -27: ; preds = %24 - store i32 0, i32* %10, align 4 - br label %28 - -28: ; preds = %105, %27 - %29 = load i32, i32* %10, align 4 - %30 = icmp slt i32 %29, 2 - br i1 %30, label %31, label %108 - -31: ; preds = %28 - %32 = load i32, i32* %9, align 4 - %33 = sub nsw i32 1, %32 - store i32 %33, i32* %11, align 4 - %34 = load i32, i32* %10, align 4 - %35 = sub nsw i32 1, %34 - store i32 %35, i32* %12, align 4 - %36 = load i32, i32* %7, align 4 - %37 = load i32, i32* %11, align 4 - %38 = sub nsw i32 %36, %37 - store i32 %38, i32* %13, align 4 - %39 = load i32, i32* %8, align 4 - %40 = load i32, i32* %12, align 4 - %41 = sub nsw i32 %39, %40 - store i32 %41, i32* %14, align 4 - %42 = load i32, i32* %13, align 4 - %43 = icmp sge i32 %42, 0 - br i1 %43, label %44, label %104 - -44: ; preds = %31 - %45 = load i32, i32* %13, align 4 - %46 = icmp slt i32 %45, 2 - br i1 %46, label %47, label %104 - -47: ; preds = %44 - %48 = load i32, i32* %14, align 4 - %49 = icmp sge i32 %48, 0 - br i1 %49, label %50, label %104 - -50: ; preds = %47 - %51 = load i32, i32* %14, align 4 - %52 = icmp slt i32 %51, 2 - br i1 %52, label %53, label %104 - -53: ; preds = %50 - %54 = load [2 x float]*, [2 x float]** %4, align 8 - %55 = load i32, i32* %13, align 4 - %56 = sext i32 %55 to i64 - %57 = getelementptr inbounds [2 x float], [2 x float]* %54, i64 %56 - %58 = load i32, i32* %14, align 4 - %59 = sext i32 %58 to i64 - %60 = getelementptr inbounds [2 x float], [2 x float]* %57, i64 0, i64 %59 - %61 = load float, float* %60, align 4 - %62 = load [2 x float]*, [2 x float]** %5, align 8 - %63 = load i32, i32* %11, align 4 - %64 = sext i32 %63 to i64 - %65 = getelementptr inbounds [2 x float], [2 x float]* %62, i64 %64 - %66 = load i32, i32* %12, align 4 - %67 = sext i32 %66 to i64 - %68 = getelementptr inbounds [2 x float], [2 x float]* %65, i64 0, i64 %67 - %69 = load float, float* %68, align 4 - %70 = fmul float %61, %69 - %71 = load float, float* %60, align 4 - %72 = insertelement <4 x float> zeroinitializer, float %71, i32 0 - %73 = insertelement <4 x float> %72, float 0.000000e+00, i32 1 - %74 = insertelement <4 x float> %73, float 0.000000e+00, i32 2 - %75 = insertelement <4 x float> %74, float 0.000000e+00, i32 3 - %76 = load float, float* %68, align 4 - %77 = insertelement <4 x float> zeroinitializer, float %76, i32 0 - %78 = insertelement <4 x float> %77, float 0.000000e+00, i32 1 - %79 = insertelement <4 x float> %78, float 0.000000e+00, i32 2 - %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 3 - %81 = fmul <4 x float> %75, %80 - %82 = extractelement <4 x float> %81, i32 0 - store float %82, float* %15, align 4 - %83 = load float, float* %15, align 4 - %84 = load [3 x float]*, [3 x float]** %6, align 8 - %85 = load i32, i32* %7, align 4 - %86 = sext i32 %85 to i64 - %87 = getelementptr inbounds [3 x float], [3 x float]* %84, i64 %86 - %88 = load i32, i32* %8, align 4 - %89 = sext i32 %88 to i64 - %90 = getelementptr inbounds [3 x float], [3 x float]* %87, i64 0, i64 %89 - %91 = load float, float* %90, align 4 - %92 = fadd float %91, %83 - %93 = load float, float* %90, align 4 - %94 = insertelement <4 x float> zeroinitializer, float %93, i32 0 - %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 1 - %96 = insertelement <4 x float> %95, float 0.000000e+00, i32 2 - %97 = insertelement <4 x float> %96, float 0.000000e+00, i32 3 - %98 = insertelement <4 x float> zeroinitializer, float %83, i32 0 - %99 = insertelement <4 x float> %98, float 0.000000e+00, i32 1 - %100 = insertelement <4 x float> %99, float 0.000000e+00, i32 2 - %101 = insertelement <4 x float> %100, float 0.000000e+00, i32 3 - %102 = fadd <4 x float> %97, %101 - %103 = extractelement <4 x float> %102, i32 0 - store float %103, float* %90, align 4 - br label %104 - -104: ; preds = %53, %50, %47, %44, %31 - br label %105 - -105: ; preds = %104 - %106 = load i32, i32* %10, align 4 - %107 = add nsw i32 %106, 1 - store i32 %107, i32* %10, align 4 - br label %28 - -108: ; preds = %28 - br label %109 - -109: ; preds = %108 - %110 = load i32, i32* %9, align 4 - %111 = add nsw i32 %110, 1 - store i32 %111, i32* %9, align 4 - br label %24 - -112: ; preds = %24 - br label %113 - -113: ; preds = %112 - %114 = load i32, i32* %8, align 4 - %115 = add nsw i32 %114, 1 - store i32 %115, i32* %8, align 4 - br label %20 - -116: ; preds = %20 - br label %117 - -117: ; preds = %116 - %118 = load i32, i32* %7, align 4 - %119 = add nsw i32 %118, 1 - store i32 %119, i32* %7, align 4 - br label %16 - -120: ; preds = %16 - ret void - %1 = alloca i32, align 4 - %2 = alloca [2 x [2 x float]], align 16 - %3 = alloca [2 x [2 x float]], align 16 - %4 = alloca [3 x [3 x float]], align 16 - %5 = alloca i32, align 4 - %6 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - %7 = bitcast [2 x [2 x float]]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %7, i8* align 16 bitcast ([2 x [2 x float]]* @__const.main.mat_in to i8*), i64 16, i1 false) - %8 = bitcast [2 x [2 x float]]* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %8, i8* align 16 bitcast ([2 x [2 x float]]* @__const.main.f_in to i8*), i64 16, i1 false) - %9 = bitcast [3 x [3 x float]]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %9, i8 0, i64 36, i1 false) - %10 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %2, i64 0, i64 0 - %11 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %3, i64 0, i64 0 - %12 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %4, i64 0, i64 0 - call void @convolution([2 x float]* %10, [2 x float]* %11, [3 x float]* %12) - store i32 0, i32* %5, align 4 - br label %13 - -13: ; preds = %34, %0 - %14 = load i32, i32* %5, align 4 - %15 = icmp slt i32 %14, 3 - br i1 %15, label %16, label %37 - -16: ; preds = %13 - store i32 0, i32* %6, align 4 - br label %17 - -17: ; preds = %30, %16 - %18 = load i32, i32* %6, align 4 - %19 = icmp slt i32 %18, 3 - br i1 %19, label %20, label %33 - -20: ; preds = %17 - %21 = load i32, i32* %5, align 4 - %22 = sext i32 %21 to i64 - %23 = getelementptr inbounds [3 x [3 x float]], [3 x [3 x float]]* %4, i64 0, i64 %22 - %24 = load i32, i32* %6, align 4 - %25 = sext i32 %24 to i64 - %26 = getelementptr inbounds [3 x float], [3 x float]* %23, i64 0, i64 %25 - %27 = load float, float* %26, align 4 - %28 = fpext float %27 to double - %29 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %28) - br label %30 - -30: ; preds = %20 - %31 = load i32, i32* %6, align 4 - %32 = add nsw i32 %31, 1 - store i32 %32, i32* %6, align 4 - br label %17 - -33: ; preds = %17 - br label %34 - -34: ; preds = %33 - %35 = load i32, i32* %5, align 4 - %36 = add nsw i32 %35, 1 - store i32 %36, i32* %5, align 4 - br label %13 - -37: ; preds = %13 - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("no-array-name1"), Symbol("0,-1,"), Get([0, 1]), Symbol("no-array-name2"), Symbol("0,-2,"), Get([3, 4]), Mul([2, 5]), Num(0), Num(0), Num(0), Vec([6, 7, 8, 9])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name1"), Symbol("0,-1,"), Get([0, 1]), Num(0), Num(0), Num(0), LitVec([2, 3, 4, 5]), Symbol("no-array-name2"), Symbol("0,-2,"), Get([7, 8]), Num(0), Num(0), Num(0), LitVec([9, 10, 11, 12]), VecMul([6, 13])] -RecExpr { nodes: [Symbol("no-array-name3"), Symbol("0,-3,"), Get([0, 1]), Symbol("no-temp-name1"), Add([2, 3]), Num(0), Num(0), Num(0), Vec([4, 5, 6, 7])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name3"), Symbol("0,-3,"), Get([0, 1]), Num(0), Num(0), Num(0), LitVec([2, 3, 4, 5]), Symbol("no-temp-name1"), Num(0), Num(0), Num(0), Vec([7, 8, 9, 10]), VecAdd([6, 11])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-conv.expect b/src/dios-egraphs/Diospyros/llvm-tests/2d-conv.expect deleted file mode 100644 index 93130f82..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/2d-conv.expect +++ /dev/null @@ -1,227 +0,0 @@ - %4 = alloca float*, align 8 - %5 = alloca float*, align 8 - %6 = alloca float*, align 8 - %7 = alloca i32, align 4 - %8 = alloca i32, align 4 - %9 = alloca i32, align 4 - %10 = alloca i32, align 4 - %11 = alloca i32, align 4 - %12 = alloca i32, align 4 - %13 = alloca i32, align 4 - %14 = alloca i32, align 4 - %15 = alloca float, align 4 - store float* %0, float** %4, align 8 - store float* %1, float** %5, align 8 - store float* %2, float** %6, align 8 - store i32 0, i32* %7, align 4 - br label %16 - -16: ; preds = %117, %3 - %17 = load i32, i32* %7, align 4 - %18 = icmp slt i32 %17, 3 - br i1 %18, label %19, label %120 - -19: ; preds = %16 - store i32 0, i32* %8, align 4 - br label %20 - -20: ; preds = %113, %19 - %21 = load i32, i32* %8, align 4 - %22 = icmp slt i32 %21, 3 - br i1 %22, label %23, label %116 - -23: ; preds = %20 - store i32 0, i32* %9, align 4 - br label %24 - -24: ; preds = %109, %23 - %25 = load i32, i32* %9, align 4 - %26 = icmp slt i32 %25, 2 - br i1 %26, label %27, label %112 - -27: ; preds = %24 - store i32 0, i32* %10, align 4 - br label %28 - -28: ; preds = %105, %27 - %29 = load i32, i32* %10, align 4 - %30 = icmp slt i32 %29, 2 - br i1 %30, label %31, label %108 - -31: ; preds = %28 - %32 = load i32, i32* %9, align 4 - %33 = sub nsw i32 1, %32 - store i32 %33, i32* %11, align 4 - %34 = load i32, i32* %10, align 4 - %35 = sub nsw i32 1, %34 - store i32 %35, i32* %12, align 4 - %36 = load i32, i32* %7, align 4 - %37 = load i32, i32* %11, align 4 - %38 = sub nsw i32 %36, %37 - store i32 %38, i32* %13, align 4 - %39 = load i32, i32* %8, align 4 - %40 = load i32, i32* %12, align 4 - %41 = sub nsw i32 %39, %40 - store i32 %41, i32* %14, align 4 - %42 = load i32, i32* %13, align 4 - %43 = icmp sge i32 %42, 0 - br i1 %43, label %44, label %104 - -44: ; preds = %31 - %45 = load i32, i32* %13, align 4 - %46 = icmp slt i32 %45, 2 - br i1 %46, label %47, label %104 - -47: ; preds = %44 - %48 = load i32, i32* %14, align 4 - %49 = icmp sge i32 %48, 0 - br i1 %49, label %50, label %104 - -50: ; preds = %47 - %51 = load i32, i32* %14, align 4 - %52 = icmp slt i32 %51, 2 - br i1 %52, label %53, label %104 - -53: ; preds = %50 - %54 = load float*, float** %4, align 8 - %55 = load i32, i32* %13, align 4 - %56 = mul nsw i32 %55, 2 - %57 = load i32, i32* %14, align 4 - %58 = add nsw i32 %56, %57 - %59 = sext i32 %58 to i64 - %60 = getelementptr inbounds float, float* %54, i64 %59 - %61 = load float, float* %60, align 4 - %62 = load float*, float** %5, align 8 - %63 = load i32, i32* %11, align 4 - %64 = mul nsw i32 %63, 2 - %65 = load i32, i32* %12, align 4 - %66 = add nsw i32 %64, %65 - %67 = sext i32 %66 to i64 - %68 = getelementptr inbounds float, float* %62, i64 %67 - %69 = load float, float* %68, align 4 - %70 = fmul float %61, %69 - %71 = load float, float* %60, align 4 - %72 = insertelement <4 x float> zeroinitializer, float %71, i32 0 - %73 = insertelement <4 x float> %72, float 0.000000e+00, i32 1 - %74 = insertelement <4 x float> %73, float 0.000000e+00, i32 2 - %75 = insertelement <4 x float> %74, float 0.000000e+00, i32 3 - %76 = load float, float* %68, align 4 - %77 = insertelement <4 x float> zeroinitializer, float %76, i32 0 - %78 = insertelement <4 x float> %77, float 0.000000e+00, i32 1 - %79 = insertelement <4 x float> %78, float 0.000000e+00, i32 2 - %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 3 - %81 = fmul <4 x float> %75, %80 - %82 = extractelement <4 x float> %81, i32 0 - store float %82, float* %15, align 4 - %83 = load float, float* %15, align 4 - %84 = load float*, float** %6, align 8 - %85 = load i32, i32* %7, align 4 - %86 = mul nsw i32 %85, 3 - %87 = load i32, i32* %8, align 4 - %88 = add nsw i32 %86, %87 - %89 = sext i32 %88 to i64 - %90 = getelementptr inbounds float, float* %84, i64 %89 - %91 = load float, float* %90, align 4 - %92 = fadd float %91, %83 - %93 = load float, float* %90, align 4 - %94 = insertelement <4 x float> zeroinitializer, float %93, i32 0 - %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 1 - %96 = insertelement <4 x float> %95, float 0.000000e+00, i32 2 - %97 = insertelement <4 x float> %96, float 0.000000e+00, i32 3 - %98 = insertelement <4 x float> zeroinitializer, float %83, i32 0 - %99 = insertelement <4 x float> %98, float 0.000000e+00, i32 1 - %100 = insertelement <4 x float> %99, float 0.000000e+00, i32 2 - %101 = insertelement <4 x float> %100, float 0.000000e+00, i32 3 - %102 = fadd <4 x float> %97, %101 - %103 = extractelement <4 x float> %102, i32 0 - store float %103, float* %90, align 4 - br label %104 - -104: ; preds = %53, %50, %47, %44, %31 - br label %105 - -105: ; preds = %104 - %106 = load i32, i32* %10, align 4 - %107 = add nsw i32 %106, 1 - store i32 %107, i32* %10, align 4 - br label %28 - -108: ; preds = %28 - br label %109 - -109: ; preds = %108 - %110 = load i32, i32* %9, align 4 - %111 = add nsw i32 %110, 1 - store i32 %111, i32* %9, align 4 - br label %24 - -112: ; preds = %24 - br label %113 - -113: ; preds = %112 - %114 = load i32, i32* %8, align 4 - %115 = add nsw i32 %114, 1 - store i32 %115, i32* %8, align 4 - br label %20 - -116: ; preds = %20 - br label %117 - -117: ; preds = %116 - %118 = load i32, i32* %7, align 4 - %119 = add nsw i32 %118, 1 - store i32 %119, i32* %7, align 4 - br label %16 - -120: ; preds = %16 - ret void - %1 = alloca i32, align 4 - %2 = alloca [4 x float], align 16 - %3 = alloca [4 x float], align 16 - %4 = alloca [9 x float], align 16 - %5 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - %6 = bitcast [4 x float]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %6, i8* align 16 bitcast ([4 x float]* @__const.main.mat_in to i8*), i64 16, i1 false) - %7 = bitcast [4 x float]* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %7, i8* align 16 bitcast ([4 x float]* @__const.main.f_in to i8*), i64 16, i1 false) - %8 = bitcast [9 x float]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %8, i8 0, i64 36, i1 false) - %9 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %10 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - %11 = getelementptr inbounds [9 x float], [9 x float]* %4, i64 0, i64 0 - call void @convolution(float* %9, float* %10, float* %11) - store i32 0, i32* %5, align 4 - br label %12 - -12: ; preds = %22, %0 - %13 = load i32, i32* %5, align 4 - %14 = icmp slt i32 %13, 9 - br i1 %14, label %15, label %25 - -15: ; preds = %12 - %16 = load i32, i32* %5, align 4 - %17 = sext i32 %16 to i64 - %18 = getelementptr inbounds [9 x float], [9 x float]* %4, i64 0, i64 %17 - %19 = load float, float* %18, align 4 - %20 = fpext float %19 to double - %21 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), double %20) - br label %22 - -22: ; preds = %15 - %23 = load i32, i32* %5, align 4 - %24 = add nsw i32 %23, 1 - store i32 %24, i32* %5, align 4 - br label %12 - -25: ; preds = %12 - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("no-array-name1"), Symbol("-1,"), Get([0, 1]), Symbol("no-array-name2"), Symbol("-2,"), Get([3, 4]), Mul([2, 5]), Num(0), Num(0), Num(0), Vec([6, 7, 8, 9])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name1"), Symbol("-1,"), Get([0, 1]), Num(0), Num(0), Num(0), LitVec([2, 3, 4, 5]), Symbol("no-array-name2"), Symbol("-2,"), Get([7, 8]), Num(0), Num(0), Num(0), LitVec([9, 10, 11, 12]), VecMul([6, 13])] -RecExpr { nodes: [Symbol("no-array-name3"), Symbol("-3,"), Get([0, 1]), Symbol("no-temp-name1"), Add([2, 3]), Num(0), Num(0), Num(0), Vec([4, 5, 6, 7])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name3"), Symbol("-3,"), Get([0, 1]), Num(0), Num(0), Num(0), LitVec([2, 3, 4, 5]), Symbol("no-temp-name1"), Num(0), Num(0), Num(0), Vec([7, 8, 9, 10]), VecAdd([6, 11])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d-matrix-multiply.expect b/src/dios-egraphs/Diospyros/llvm-tests/2d-matrix-multiply.expect deleted file mode 100644 index 45c63123..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/2d-matrix-multiply.expect +++ /dev/null @@ -1,152 +0,0 @@ - %4 = alloca [2 x float]*, align 8 - %5 = alloca [2 x float]*, align 8 - %6 = alloca [2 x float]*, align 8 - %7 = alloca i32, align 4 - %8 = alloca i32, align 4 - %9 = alloca float, align 4 - %10 = alloca i32, align 4 - store [2 x float]* %0, [2 x float]** %4, align 8 - store [2 x float]* %1, [2 x float]** %5, align 8 - store [2 x float]* %2, [2 x float]** %6, align 8 - store i32 0, i32* %7, align 4 - br label %11 - -11: ; preds = %74, %3 - %12 = load i32, i32* %7, align 4 - %13 = icmp slt i32 %12, 2 - br i1 %13, label %14, label %77 - -14: ; preds = %11 - store i32 0, i32* %8, align 4 - br label %15 - -15: ; preds = %70, %14 - %16 = load i32, i32* %8, align 4 - %17 = icmp slt i32 %16, 2 - br i1 %17, label %18, label %73 - -18: ; preds = %15 - store float 0.000000e+00, float* %9, align 4 - store i32 0, i32* %10, align 4 - br label %19 - -19: ; preds = %58, %18 - %20 = load i32, i32* %10, align 4 - %21 = icmp slt i32 %20, 2 - br i1 %21, label %22, label %61 - -22: ; preds = %19 - %23 = load [2 x float]*, [2 x float]** %4, align 8 - %24 = load i32, i32* %7, align 4 - %25 = sext i32 %24 to i64 - %26 = getelementptr inbounds [2 x float], [2 x float]* %23, i64 %25 - %27 = load i32, i32* %10, align 4 - %28 = sext i32 %27 to i64 - %29 = getelementptr inbounds [2 x float], [2 x float]* %26, i64 0, i64 %28 - %30 = load float, float* %29, align 4 - %31 = load [2 x float]*, [2 x float]** %5, align 8 - %32 = load i32, i32* %10, align 4 - %33 = sext i32 %32 to i64 - %34 = getelementptr inbounds [2 x float], [2 x float]* %31, i64 %33 - %35 = load i32, i32* %8, align 4 - %36 = sext i32 %35 to i64 - %37 = getelementptr inbounds [2 x float], [2 x float]* %34, i64 0, i64 %36 - %38 = load float, float* %37, align 4 - %39 = fmul float %30, %38 - %40 = load float, float* %9, align 4 - %41 = fadd float %40, %39 - %42 = insertelement <4 x float> zeroinitializer, float %40, i32 0 - %43 = insertelement <4 x float> %42, float 0.000000e+00, i32 1 - %44 = insertelement <4 x float> %43, float 0.000000e+00, i32 2 - %45 = insertelement <4 x float> %44, float 0.000000e+00, i32 3 - %46 = load float, float* %29, align 4 - %47 = insertelement <4 x float> zeroinitializer, float %46, i32 0 - %48 = insertelement <4 x float> %47, float 0.000000e+00, i32 1 - %49 = insertelement <4 x float> %48, float 0.000000e+00, i32 2 - %50 = insertelement <4 x float> %49, float 0.000000e+00, i32 3 - %51 = load float, float* %37, align 4 - %52 = insertelement <4 x float> zeroinitializer, float %51, i32 0 - %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 1 - %54 = insertelement <4 x float> %53, float 0.000000e+00, i32 2 - %55 = insertelement <4 x float> %54, float 0.000000e+00, i32 3 - %56 = call <4 x float> @llvm.fma.f32(<4 x float> %50, <4 x float> %55, <4 x float> %45) - %57 = extractelement <4 x float> %56, i32 0 - store float %57, float* %9, align 4 - br label %58 - -58: ; preds = %22 - %59 = load i32, i32* %10, align 4 - %60 = add nsw i32 %59, 1 - store i32 %60, i32* %10, align 4 - br label %19 - -61: ; preds = %19 - %62 = load float, float* %9, align 4 - %63 = load [2 x float]*, [2 x float]** %6, align 8 - %64 = load i32, i32* %7, align 4 - %65 = sext i32 %64 to i64 - %66 = getelementptr inbounds [2 x float], [2 x float]* %63, i64 %65 - %67 = load i32, i32* %8, align 4 - %68 = sext i32 %67 to i64 - %69 = getelementptr inbounds [2 x float], [2 x float]* %66, i64 0, i64 %68 - store float %62, float* %69, align 4 - br label %70 - -70: ; preds = %61 - %71 = load i32, i32* %8, align 4 - %72 = add nsw i32 %71, 1 - store i32 %72, i32* %8, align 4 - br label %15 - -73: ; preds = %15 - br label %74 - -74: ; preds = %73 - %75 = load i32, i32* %7, align 4 - %76 = add nsw i32 %75, 1 - store i32 %76, i32* %7, align 4 - br label %11 - -77: ; preds = %11 - ret void - %1 = alloca i32, align 4 - %2 = alloca [2 x [2 x float]], align 16 - %3 = alloca [2 x [2 x float]], align 16 - %4 = alloca [2 x [2 x float]], align 16 - store i32 0, i32* %1, align 4 - %5 = bitcast [2 x [2 x float]]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %5, i8* align 16 bitcast ([2 x [2 x float]]* @__const.main.a_in to i8*), i64 16, i1 false) - %6 = bitcast [2 x [2 x float]]* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %6, i8* align 16 bitcast ([2 x [2 x float]]* @__const.main.b_in to i8*), i64 16, i1 false) - %7 = bitcast [2 x [2 x float]]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %7, i8 0, i64 16, i1 false) - %8 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %2, i64 0, i64 0 - %9 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %3, i64 0, i64 0 - %10 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %4, i64 0, i64 0 - call void @matrix_multiply([2 x float]* %8, [2 x float]* %9, [2 x float]* %10) - %11 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %4, i64 0, i64 0 - %12 = getelementptr inbounds [2 x float], [2 x float]* %11, i64 0, i64 0 - %13 = load float, float* %12, align 16 - %14 = fpext float %13 to double - %15 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %14) - %16 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %4, i64 0, i64 0 - %17 = getelementptr inbounds [2 x float], [2 x float]* %16, i64 0, i64 1 - %18 = load float, float* %17, align 4 - %19 = fpext float %18 to double - %20 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %19) - %21 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %4, i64 0, i64 1 - %22 = getelementptr inbounds [2 x float], [2 x float]* %21, i64 0, i64 0 - %23 = load float, float* %22, align 8 - %24 = fpext float %23 to double - %25 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %24) - %26 = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %4, i64 0, i64 1 - %27 = getelementptr inbounds [2 x float], [2 x float]* %26, i64 0, i64 1 - %28 = load float, float* %27, align 4 - %29 = fpext float %28 to double - %30 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %29) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("no-array-name1"), Symbol("0,-1,"), Get([0, 1]), Symbol("no-array-name2"), Symbol("0,-2,"), Get([3, 4]), Mul([2, 5]), Symbol("no-temp-name1"), Add([7, 6]), Num(0), Num(0), Num(0), Vec([8, 9, 10, 11])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-temp-name1"), Num(0), Num(0), Num(0), Vec([0, 1, 2, 3]), Symbol("no-array-name1"), Symbol("0,-1,"), Get([5, 6]), Num(0), Num(0), Num(0), LitVec([7, 8, 9, 10]), Symbol("no-array-name2"), Symbol("0,-2,"), Get([12, 13]), Num(0), Num(0), Num(0), LitVec([14, 15, 16, 17]), VecMAC([4, 11, 18])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d.c b/src/dios-egraphs/Diospyros/llvm-tests/2d.c deleted file mode 100644 index a022be0f..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/2d.c +++ /dev/null @@ -1,20 +0,0 @@ -#include -#include - -float a_in[4][4] = { - {1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}; -float b_in[] = {5, 6, 7, 8}; - -int main(int argc, char **argv) { - float c_out[4]; - c_out[0] = a_in[0][3] + b_in[0]; - c_out[1] = a_in[1][3] + b_in[1]; - c_out[2] = a_in[2][3] + b_in[2]; - c_out[3] = a_in[3][3] + a_in[0][1]; - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - // expected: 9, 14, 19, 18 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/2d.expect b/src/dios-egraphs/Diospyros/llvm-tests/2d.expect deleted file mode 100644 index e3ddf47d..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/2d.expect +++ /dev/null @@ -1,70 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 0, i64 3), align 4 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %11 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 1, i64 3), align 4 - %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %13 = fadd float %11, %12 - %14 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %15 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 2, i64 3), align 4 - %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %17 = fadd float %15, %16 - %18 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %19 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 3, i64 3), align 4 - %20 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 0, i64 1), align 4 - %21 = fadd float %19, %20 - %22 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %23 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 0, i64 3), align 4 - %24 = insertelement <4 x float> zeroinitializer, float %23, i32 0 - %25 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 1, i64 3), align 4 - %26 = insertelement <4 x float> %24, float %25, i32 1 - %27 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 2, i64 3), align 4 - %28 = insertelement <4 x float> %26, float %27, i32 2 - %29 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 3, i64 3), align 4 - %30 = insertelement <4 x float> %28, float %29, i32 3 - %31 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 - %33 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %34 = insertelement <4 x float> %32, float %33, i32 1 - %35 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %36 = insertelement <4 x float> %34, float %35, i32 2 - %37 = load float, float* getelementptr inbounds ([4 x [4 x float]], [4 x [4 x float]]* @a_in, i64 0, i64 0, i64 1), align 4 - %38 = insertelement <4 x float> %36, float %37, i32 3 - %39 = fadd <4 x float> %30, %38 - %40 = extractelement <4 x float> %39, i32 0 - store float %40, float* %10, align 16 - %41 = extractelement <4 x float> %39, i32 1 - store float %41, float* %14, align 4 - %42 = extractelement <4 x float> %39, i32 2 - store float %42, float* %18, align 8 - %43 = extractelement <4 x float> %39, i32 3 - store float %43, float* %22, align 4 - %44 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %45 = load float, float* %44, align 16 - %46 = fpext float %45 to double - %47 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %46) - %48 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %49 = load float, float* %48, align 4 - %50 = fpext float %49 to double - %51 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %50) - %52 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %53 = load float, float* %52, align 8 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %54) - %56 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %57 = load float, float* %56, align 4 - %58 = fpext float %57 to double - %59 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %58) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,3,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("a_in"), Symbol("0,1,3,"), Get([7, 8]), Symbol("b_in"), Symbol("0,1,"), Get([10, 11]), Add([9, 12]), Symbol("a_in"), Symbol("0,2,3,"), Get([14, 15]), Symbol("b_in"), Symbol("0,2,"), Get([17, 18]), Add([16, 19]), Symbol("a_in"), Symbol("0,3,3,"), Get([21, 22]), Symbol("a_in"), Symbol("0,0,1,"), Get([24, 25]), Add([23, 26]), Vec([6, 13, 20, 27])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,3,"), Get([0, 1]), Symbol("a_in"), Symbol("0,1,3,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,3,"), Get([6, 7]), Symbol("a_in"), Symbol("0,3,3,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("a_in"), Symbol("0,0,1,"), Get([22, 23]), Vec([15, 18, 21, 24]), VecAdd([12, 25])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/add.c b/src/dios-egraphs/Diospyros/llvm-tests/add.c deleted file mode 100644 index 54b1ef0b..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/add.c +++ /dev/null @@ -1,18 +0,0 @@ -#include - -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {5, 6, 7, 8}; - -int main(int argc, char **argv) { - float c_out[4]; - c_out[0] = a_in[0] + b_in[0]; - c_out[1] = a_in[1] + b_in[1]; - c_out[2] = a_in[2] + b_in[2]; - c_out[3] = a_in[3] + b_in[3]; - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - // expected: 6, 8, 10, 12 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/add.expect b/src/dios-egraphs/Diospyros/llvm-tests/add.expect deleted file mode 100644 index 9fd209fb..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/add.expect +++ /dev/null @@ -1,70 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %13 = fadd float %11, %12 - %14 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %15 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %17 = fadd float %15, %16 - %18 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %19 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %21 = fadd float %19, %20 - %22 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %23 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %24 = insertelement <4 x float> zeroinitializer, float %23, i32 0 - %25 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %26 = insertelement <4 x float> %24, float %25, i32 1 - %27 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %28 = insertelement <4 x float> %26, float %27, i32 2 - %29 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %30 = insertelement <4 x float> %28, float %29, i32 3 - %31 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 - %33 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %34 = insertelement <4 x float> %32, float %33, i32 1 - %35 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %36 = insertelement <4 x float> %34, float %35, i32 2 - %37 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %38 = insertelement <4 x float> %36, float %37, i32 3 - %39 = fadd <4 x float> %30, %38 - %40 = extractelement <4 x float> %39, i32 0 - store float %40, float* %10, align 16 - %41 = extractelement <4 x float> %39, i32 1 - store float %41, float* %14, align 4 - %42 = extractelement <4 x float> %39, i32 2 - store float %42, float* %18, align 8 - %43 = extractelement <4 x float> %39, i32 3 - store float %43, float* %22, align 4 - %44 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %45 = load float, float* %44, align 16 - %46 = fpext float %45 to double - %47 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %46) - %48 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %49 = load float, float* %48, align 4 - %50 = fpext float %49 to double - %51 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %50) - %52 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %53 = load float, float* %52, align 8 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %54) - %56 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %57 = load float, float* %56, align 4 - %58 = fpext float %57 to double - %59 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %58) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("a_in"), Symbol("0,1,"), Get([7, 8]), Symbol("b_in"), Symbol("0,1,"), Get([10, 11]), Add([9, 12]), Symbol("a_in"), Symbol("0,2,"), Get([14, 15]), Symbol("b_in"), Symbol("0,2,"), Get([17, 18]), Add([16, 19]), Symbol("a_in"), Symbol("0,3,"), Get([21, 22]), Symbol("b_in"), Symbol("0,3,"), Get([24, 25]), Add([23, 26]), Vec([6, 13, 20, 27])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,1,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,3,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("b_in"), Symbol("0,3,"), Get([22, 23]), LitVec([15, 18, 21, 24]), VecAdd([12, 25])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/add_mult.c b/src/dios-egraphs/Diospyros/llvm-tests/add_mult.c deleted file mode 100644 index e2c07b00..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/add_mult.c +++ /dev/null @@ -1,18 +0,0 @@ -#include - -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {2, 3, 4, 5}; - -int main(int argc, char **argv) { - float d_out[4]; - d_out[0] = a_in[0] + b_in[0]; - d_out[1] = a_in[1] * b_in[1]; - d_out[2] = a_in[2] + b_in[2]; - d_out[3] = a_in[3] * b_in[3]; - printf("first: %f\n", d_out[0]); - printf("second: %f\n", d_out[1]); - printf("third: %f\n", d_out[2]); - printf("fourth: %f\n", d_out[3]); - // expected:3, 6, 7, 20 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/add_mult.expect b/src/dios-egraphs/Diospyros/llvm-tests/add_mult.expect deleted file mode 100644 index 7ab7e34b..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/add_mult.expect +++ /dev/null @@ -1,73 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %13 = fmul float %11, %12 - %14 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %15 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %17 = fadd float %15, %16 - %18 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %19 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %21 = fmul float %19, %20 - %22 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %23 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %24 = insertelement <4 x float> zeroinitializer, float %23, i32 0 - %25 = insertelement <4 x float> %24, float 0.000000e+00, i32 1 - %26 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %27 = insertelement <4 x float> %25, float %26, i32 2 - %28 = insertelement <4 x float> %27, float 0.000000e+00, i32 3 - %29 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %30 = insertelement <4 x float> , float %29, i32 1 - %31 = insertelement <4 x float> %30, float 1.000000e+00, i32 2 - %32 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %33 = insertelement <4 x float> %31, float %32, i32 3 - %34 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %35 = insertelement <4 x float> zeroinitializer, float %34, i32 0 - %36 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %37 = insertelement <4 x float> %35, float %36, i32 1 - %38 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %39 = insertelement <4 x float> %37, float %38, i32 2 - %40 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %41 = insertelement <4 x float> %39, float %40, i32 3 - %42 = call <4 x float> @llvm.fma.f32(<4 x float> %33, <4 x float> %41, <4 x float> %28) - %43 = extractelement <4 x float> %42, i32 0 - store float %43, float* %10, align 16 - %44 = extractelement <4 x float> %42, i32 1 - store float %44, float* %14, align 4 - %45 = extractelement <4 x float> %42, i32 2 - store float %45, float* %18, align 8 - %46 = extractelement <4 x float> %42, i32 3 - store float %46, float* %22, align 4 - %47 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %48 = load float, float* %47, align 16 - %49 = fpext float %48 to double - %50 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %49) - %51 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %52 = load float, float* %51, align 4 - %53 = fpext float %52 to double - %54 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %53) - %55 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %56 = load float, float* %55, align 8 - %57 = fpext float %56 to double - %58 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %57) - %59 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %60 = load float, float* %59, align 4 - %61 = fpext float %60 to double - %62 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %61) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("a_in"), Symbol("0,1,"), Get([7, 8]), Symbol("b_in"), Symbol("0,1,"), Get([10, 11]), Mul([9, 12]), Symbol("a_in"), Symbol("0,2,"), Get([14, 15]), Symbol("b_in"), Symbol("0,2,"), Get([17, 18]), Add([16, 19]), Symbol("a_in"), Symbol("0,3,"), Get([21, 22]), Symbol("b_in"), Symbol("0,3,"), Get([24, 25]), Mul([23, 26]), Vec([6, 13, 20, 27])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Num(0), Symbol("a_in"), Symbol("0,2,"), Get([4, 5]), Num(0), LitVec([2, 3, 6, 7]), Num(1), Symbol("a_in"), Symbol("0,1,"), Get([10, 11]), Num(1), Symbol("a_in"), Symbol("0,3,"), Get([14, 15]), Vec([9, 12, 13, 16]), Symbol("b_in"), Symbol("0,0,"), Get([18, 19]), Symbol("b_in"), Symbol("0,1,"), Get([21, 22]), Symbol("b_in"), Symbol("0,2,"), Get([24, 25]), Symbol("b_in"), Symbol("0,3,"), Get([27, 28]), LitVec([20, 23, 26, 29]), VecMAC([8, 17, 30])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/cube.c b/src/dios-egraphs/Diospyros/llvm-tests/cube.c deleted file mode 100644 index 25847462..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/cube.c +++ /dev/null @@ -1,25 +0,0 @@ -#include -#include -#define SIZE 8 - -void cube(float a_in[SIZE], float b_out[SIZE]) { - for (int i = 0; i < SIZE; i++) { - b_out[i] = powf(a_in[i], 3); - } -} -int main(void) { - float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; - float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; - cube(a_in, b_out); - for (int i = 0; i < SIZE; i++) { - printf("%f\n", b_out[i]); - } - // 729.000000 - // 512.000000 - // 343.000000 - // 216.000000 - // 125.000000 - // 64.000000 - // 27.000000 - // 8.000000 -} diff --git a/src/dios-egraphs/Diospyros/llvm-tests/fft-FAIL.c b/src/dios-egraphs/Diospyros/llvm-tests/fft-FAIL.c deleted file mode 100644 index 90c1babd..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/fft-FAIL.c +++ /dev/null @@ -1,78 +0,0 @@ -#include -#define SIZE 8 - -void fft(float real_in[SIZE], float img_in[SIZE], float real_twid_in[SIZE / 2], - float img_twid_in[SIZE / 2], float real_out[SIZE], - float img_out[SIZE]) { - int even = 0; - int odd = 0; - int log = 0; - int rootindex = 0; - int span = SIZE >> 1; - float temp = 0; - - for (int i = 0; i < SIZE; i++) { - real_out[i] = real_in[i]; - img_out[i] = img_in[i]; - } - - while (span != 0) { - odd = span; - while (odd < SIZE) { - odd = odd | span; - even = odd ^ span; - - temp = real_out[even] + real_out[odd]; - real_out[odd] = real_out[even] - real_out[odd]; - real_out[even] = temp; - - temp = img_out[even] + img_out[odd]; - img_out[odd] = img_out[even] - img_out[odd]; - img_out[even] = temp; - - rootindex = (even << log) & (SIZE - 1); - if (rootindex > 0) { - temp = real_twid_in[rootindex] * real_out[odd] - - img_twid_in[rootindex] * img_out[odd]; - img_out[odd] = real_twid_in[rootindex] * img_out[odd] + - img_twid_in[rootindex] * real_out[odd]; - real_out[odd] = temp; - } - odd += 1; - } - span >>= 1; - log += 1; - } -} - -int main(void) { - float real_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; - float img_in[SIZE] = {0, 1, 2, 3, 4, 5, 6, 7}; - float real_twid_in[SIZE / 2] = {4, 3, 2, 1}; - float img_twid_in[SIZE / 2] = {8, 7, 6, 5}; - float real_out[SIZE] = {1, 1, 1, 1, 1, 1, 1, 1}; - float img_out[SIZE] = {2, 3, 4, 5, 6, 7, 8, 9}; - fft(real_in, img_in, real_twid_in, img_twid_in, real_out, img_out); - for (int i = 0; i < SIZE; i++) { - printf("%f\n", real_out[i]); - } - for (int i = 0; i < SIZE; i++) { - printf("%f\n", img_out[i]); - } - // 36.000000 - // -4.000000 - // 12.000000 - // -20.000000 - // 44.000000 - // -20.000000 - // 76.000000 - // -116.000000 - // 28.000000 - // -4.000000 - // -36.000000 - // 28.000000 - // -100.000000 - // 28.000000 - // -4.000000 - // 60.000000 -} diff --git a/src/dios-egraphs/Diospyros/llvm-tests/fft.expect b/src/dios-egraphs/Diospyros/llvm-tests/fft.expect deleted file mode 100644 index 916abfc9..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/fft.expect +++ /dev/null @@ -1,388 +0,0 @@ - %7 = alloca float*, align 8 - %8 = alloca float*, align 8 - %9 = alloca float*, align 8 - %10 = alloca float*, align 8 - %11 = alloca float*, align 8 - %12 = alloca float*, align 8 - %13 = alloca i32, align 4 - %14 = alloca i32, align 4 - %15 = alloca i32, align 4 - %16 = alloca i32, align 4 - %17 = alloca i32, align 4 - %18 = alloca float, align 4 - %19 = alloca i32, align 4 - store float* %0, float** %7, align 8 - store float* %1, float** %8, align 8 - store float* %2, float** %9, align 8 - store float* %3, float** %10, align 8 - store float* %4, float** %11, align 8 - store float* %5, float** %12, align 8 - store i32 0, i32* %13, align 4 - store i32 0, i32* %14, align 4 - store i32 0, i32* %15, align 4 - store i32 0, i32* %16, align 4 - store i32 4, i32* %17, align 4 - store float 0.000000e+00, float* %18, align 4 - store i32 0, i32* %19, align 4 - br label %20 - -20: ; preds = %42, %6 - %21 = load i32, i32* %19, align 4 - %22 = icmp slt i32 %21, 8 - br i1 %22, label %23, label %45 - -23: ; preds = %20 - %24 = load float*, float** %7, align 8 - %25 = load i32, i32* %19, align 4 - %26 = sext i32 %25 to i64 - %27 = getelementptr inbounds float, float* %24, i64 %26 - %28 = load float, float* %27, align 4 - %29 = load float*, float** %11, align 8 - %30 = load i32, i32* %19, align 4 - %31 = sext i32 %30 to i64 - %32 = getelementptr inbounds float, float* %29, i64 %31 - store float %28, float* %32, align 4 - %33 = load float*, float** %8, align 8 - %34 = load i32, i32* %19, align 4 - %35 = sext i32 %34 to i64 - %36 = getelementptr inbounds float, float* %33, i64 %35 - %37 = load float, float* %36, align 4 - %38 = load float*, float** %12, align 8 - %39 = load i32, i32* %19, align 4 - %40 = sext i32 %39 to i64 - %41 = getelementptr inbounds float, float* %38, i64 %40 - store float %37, float* %41, align 4 - br label %42 - -42: ; preds = %23 - %43 = load i32, i32* %19, align 4 - %44 = add nsw i32 %43, 1 - store i32 %44, i32* %19, align 4 - br label %20 - -45: ; preds = %20 - br label %46 - -46: ; preds = %240, %45 - %47 = load i32, i32* %17, align 4 - %48 = icmp ne i32 %47, 0 - br i1 %48, label %49, label %245 - -49: ; preds = %46 - %50 = load i32, i32* %17, align 4 - store i32 %50, i32* %14, align 4 - br label %51 - -51: ; preds = %237, %49 - %52 = load i32, i32* %14, align 4 - %53 = icmp slt i32 %52, 8 - br i1 %53, label %54, label %240 - -54: ; preds = %51 - %55 = load i32, i32* %14, align 4 - %56 = load i32, i32* %17, align 4 - %57 = or i32 %55, %56 - store i32 %57, i32* %14, align 4 - %58 = load i32, i32* %14, align 4 - %59 = load i32, i32* %17, align 4 - %60 = xor i32 %58, %59 - store i32 %60, i32* %13, align 4 - %61 = load float*, float** %11, align 8 - %62 = load i32, i32* %13, align 4 - %63 = sext i32 %62 to i64 - %64 = getelementptr inbounds float, float* %61, i64 %63 - %65 = load float, float* %64, align 4 - %66 = load float*, float** %11, align 8 - %67 = load i32, i32* %14, align 4 - %68 = sext i32 %67 to i64 - %69 = getelementptr inbounds float, float* %66, i64 %68 - %70 = load float, float* %69, align 4 - %71 = fadd float %65, %70 - %72 = load float*, float** %11, align 8 - %73 = load i32, i32* %13, align 4 - %74 = sext i32 %73 to i64 - %75 = getelementptr inbounds float, float* %72, i64 %74 - %76 = load float, float* %75, align 4 - %77 = load float*, float** %11, align 8 - %78 = load i32, i32* %14, align 4 - %79 = sext i32 %78 to i64 - %80 = getelementptr inbounds float, float* %77, i64 %79 - %81 = load float, float* %80, align 4 - %82 = fsub float %76, %81 - %83 = load float*, float** %11, align 8 - %84 = load i32, i32* %14, align 4 - %85 = sext i32 %84 to i64 - %86 = getelementptr inbounds float, float* %83, i64 %85 - %87 = load float, float* %64, align 4 - %88 = load float, float* %69, align 4 - %89 = fadd float %87, %88 - %90 = insertelement <4 x float> zeroinitializer, float %89, i32 0 - %91 = load float, float* %75, align 4 - %92 = load float, float* %80, align 4 - %93 = fsub float %91, %92 - %94 = insertelement <4 x float> %90, float %93, i32 1 - %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 2 - %96 = insertelement <4 x float> %95, float 0.000000e+00, i32 3 - %97 = extractelement <4 x float> %96, i32 0 - store float %97, float* %18, align 4 - %98 = extractelement <4 x float> %96, i32 1 - store float %98, float* %86, align 4 - %99 = load float, float* %18, align 4 - %100 = load float*, float** %11, align 8 - %101 = load i32, i32* %13, align 4 - %102 = sext i32 %101 to i64 - %103 = getelementptr inbounds float, float* %100, i64 %102 - store float %99, float* %103, align 4 - %104 = load float*, float** %12, align 8 - %105 = load i32, i32* %13, align 4 - %106 = sext i32 %105 to i64 - %107 = getelementptr inbounds float, float* %104, i64 %106 - %108 = load float, float* %107, align 4 - %109 = load float*, float** %12, align 8 - %110 = load i32, i32* %14, align 4 - %111 = sext i32 %110 to i64 - %112 = getelementptr inbounds float, float* %109, i64 %111 - %113 = load float, float* %112, align 4 - %114 = fadd float %108, %113 - %115 = load float*, float** %12, align 8 - %116 = load i32, i32* %13, align 4 - %117 = sext i32 %116 to i64 - %118 = getelementptr inbounds float, float* %115, i64 %117 - %119 = load float, float* %118, align 4 - %120 = load float*, float** %12, align 8 - %121 = load i32, i32* %14, align 4 - %122 = sext i32 %121 to i64 - %123 = getelementptr inbounds float, float* %120, i64 %122 - %124 = load float, float* %123, align 4 - %125 = fsub float %119, %124 - %126 = load float*, float** %12, align 8 - %127 = load i32, i32* %14, align 4 - %128 = sext i32 %127 to i64 - %129 = getelementptr inbounds float, float* %126, i64 %128 - %130 = load float, float* %107, align 4 - %131 = load float, float* %112, align 4 - %132 = fadd float %130, %131 - %133 = insertelement <4 x float> zeroinitializer, float %132, i32 0 - %134 = load float, float* %118, align 4 - %135 = load float, float* %123, align 4 - %136 = fsub float %134, %135 - %137 = insertelement <4 x float> %133, float %136, i32 1 - %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 2 - %139 = insertelement <4 x float> %138, float 0.000000e+00, i32 3 - %140 = extractelement <4 x float> %139, i32 0 - store float %140, float* %18, align 4 - %141 = extractelement <4 x float> %139, i32 1 - store float %141, float* %129, align 4 - %142 = load float, float* %18, align 4 - %143 = load float*, float** %12, align 8 - %144 = load i32, i32* %13, align 4 - %145 = sext i32 %144 to i64 - %146 = getelementptr inbounds float, float* %143, i64 %145 - store float %142, float* %146, align 4 - %147 = load i32, i32* %13, align 4 - %148 = load i32, i32* %15, align 4 - %149 = shl i32 %147, %148 - %150 = and i32 %149, 7 - store i32 %150, i32* %16, align 4 - %151 = load i32, i32* %16, align 4 - %152 = icmp sgt i32 %151, 0 - br i1 %152, label %153, label %237 - -153: ; preds = %54 - %154 = load float*, float** %9, align 8 - %155 = load i32, i32* %16, align 4 - %156 = sext i32 %155 to i64 - %157 = getelementptr inbounds float, float* %154, i64 %156 - %158 = load float, float* %157, align 4 - %159 = load float*, float** %11, align 8 - %160 = load i32, i32* %14, align 4 - %161 = sext i32 %160 to i64 - %162 = getelementptr inbounds float, float* %159, i64 %161 - %163 = load float, float* %162, align 4 - %164 = fmul float %158, %163 - %165 = load float*, float** %10, align 8 - %166 = load i32, i32* %16, align 4 - %167 = sext i32 %166 to i64 - %168 = getelementptr inbounds float, float* %165, i64 %167 - %169 = load float, float* %168, align 4 - %170 = load float*, float** %12, align 8 - %171 = load i32, i32* %14, align 4 - %172 = sext i32 %171 to i64 - %173 = getelementptr inbounds float, float* %170, i64 %172 - %174 = load float, float* %173, align 4 - %175 = fmul float %169, %174 - %176 = fsub float %164, %175 - %177 = load float*, float** %9, align 8 - %178 = load i32, i32* %16, align 4 - %179 = sext i32 %178 to i64 - %180 = getelementptr inbounds float, float* %177, i64 %179 - %181 = load float, float* %180, align 4 - %182 = load float*, float** %12, align 8 - %183 = load i32, i32* %14, align 4 - %184 = sext i32 %183 to i64 - %185 = getelementptr inbounds float, float* %182, i64 %184 - %186 = load float, float* %185, align 4 - %187 = fmul float %181, %186 - %188 = load float*, float** %10, align 8 - %189 = load i32, i32* %16, align 4 - %190 = sext i32 %189 to i64 - %191 = getelementptr inbounds float, float* %188, i64 %190 - %192 = load float, float* %191, align 4 - %193 = load float*, float** %11, align 8 - %194 = load i32, i32* %14, align 4 - %195 = sext i32 %194 to i64 - %196 = getelementptr inbounds float, float* %193, i64 %195 - %197 = load float, float* %196, align 4 - %198 = fmul float %192, %197 - %199 = fadd float %187, %198 - %200 = load float*, float** %12, align 8 - %201 = load i32, i32* %14, align 4 - %202 = sext i32 %201 to i64 - %203 = getelementptr inbounds float, float* %200, i64 %202 - %204 = load float, float* %180, align 4 - %205 = insertelement <4 x float> , float %204, i32 1 - %206 = insertelement <4 x float> %205, float 1.000000e+00, i32 2 - %207 = insertelement <4 x float> %206, float 1.000000e+00, i32 3 - %208 = load float, float* %185, align 4 - %209 = insertelement <4 x float> zeroinitializer, float %208, i32 1 - %210 = insertelement <4 x float> %209, float 0.000000e+00, i32 2 - %211 = insertelement <4 x float> %210, float 0.000000e+00, i32 3 - %212 = fmul <4 x float> %207, %211 - %213 = load float, float* %191, align 4 - %214 = insertelement <4 x float> , float %213, i32 1 - %215 = insertelement <4 x float> %214, float 1.000000e+00, i32 2 - %216 = insertelement <4 x float> %215, float 1.000000e+00, i32 3 - %217 = load float, float* %157, align 4 - %218 = load float, float* %162, align 4 - %219 = fmul float %217, %218 - %220 = load float, float* %168, align 4 - %221 = load float, float* %173, align 4 - %222 = fmul float %220, %221 - %223 = fsub float %219, %222 - %224 = insertelement <4 x float> zeroinitializer, float %223, i32 0 - %225 = load float, float* %196, align 4 - %226 = insertelement <4 x float> %224, float %225, i32 1 - %227 = insertelement <4 x float> %226, float 0.000000e+00, i32 2 - %228 = insertelement <4 x float> %227, float 0.000000e+00, i32 3 - %229 = call <4 x float> @llvm.fma.f32(<4 x float> %216, <4 x float> %228, <4 x float> %212) - %230 = extractelement <4 x float> %229, i32 0 - store float %230, float* %18, align 4 - %231 = extractelement <4 x float> %229, i32 1 - store float %231, float* %203, align 4 - %232 = load float, float* %18, align 4 - %233 = load float*, float** %11, align 8 - %234 = load i32, i32* %14, align 4 - %235 = sext i32 %234 to i64 - %236 = getelementptr inbounds float, float* %233, i64 %235 - store float %232, float* %236, align 4 - br label %237 - -237: ; preds = %153, %54 - %238 = load i32, i32* %14, align 4 - %239 = add nsw i32 %238, 1 - store i32 %239, i32* %14, align 4 - br label %51 - -240: ; preds = %51 - %241 = load i32, i32* %17, align 4 - %242 = ashr i32 %241, 1 - store i32 %242, i32* %17, align 4 - %243 = load i32, i32* %15, align 4 - %244 = add nsw i32 %243, 1 - store i32 %244, i32* %15, align 4 - br label %46 - -245: ; preds = %46 - ret void - %1 = alloca i32, align 4 - %2 = alloca [8 x float], align 16 - %3 = alloca [8 x float], align 16 - %4 = alloca [4 x float], align 16 - %5 = alloca [4 x float], align 16 - %6 = alloca [8 x float], align 16 - %7 = alloca [8 x float], align 16 - %8 = alloca i32, align 4 - %9 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - %10 = bitcast [8 x float]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %10, i8* align 16 bitcast ([8 x float]* @__const.main.real_in to i8*), i64 32, i1 false) - %11 = bitcast [8 x float]* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %11, i8* align 16 bitcast ([8 x float]* @__const.main.img_in to i8*), i64 32, i1 false) - %12 = bitcast [4 x float]* %4 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %12, i8* align 16 bitcast ([4 x float]* @__const.main.real_twid_in to i8*), i64 16, i1 false) - %13 = bitcast [4 x float]* %5 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %13, i8* align 16 bitcast ([4 x float]* @__const.main.img_twid_in to i8*), i64 16, i1 false) - %14 = bitcast [8 x float]* %6 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %14, i8* align 16 bitcast ([8 x float]* @__const.main.real_out to i8*), i64 32, i1 false) - %15 = bitcast [8 x float]* %7 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %15, i8* align 16 bitcast ([8 x float]* @__const.main.img_out to i8*), i64 32, i1 false) - %16 = getelementptr inbounds [8 x float], [8 x float]* %2, i64 0, i64 0 - %17 = getelementptr inbounds [8 x float], [8 x float]* %3, i64 0, i64 0 - %18 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 - %19 = getelementptr inbounds [4 x float], [4 x float]* %5, i64 0, i64 0 - %20 = getelementptr inbounds [8 x float], [8 x float]* %6, i64 0, i64 0 - %21 = getelementptr inbounds [8 x float], [8 x float]* %7, i64 0, i64 0 - call void @fft(float* %16, float* %17, float* %18, float* %19, float* %20, float* %21) - store i32 0, i32* %8, align 4 - br label %22 - -22: ; preds = %32, %0 - %23 = load i32, i32* %8, align 4 - %24 = icmp slt i32 %23, 8 - br i1 %24, label %25, label %35 - -25: ; preds = %22 - %26 = load i32, i32* %8, align 4 - %27 = sext i32 %26 to i64 - %28 = getelementptr inbounds [8 x float], [8 x float]* %6, i64 0, i64 %27 - %29 = load float, float* %28, align 4 - %30 = fpext float %29 to double - %31 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %30) - br label %32 - -32: ; preds = %25 - %33 = load i32, i32* %8, align 4 - %34 = add nsw i32 %33, 1 - store i32 %34, i32* %8, align 4 - br label %22 - -35: ; preds = %22 - store i32 0, i32* %9, align 4 - br label %36 - -36: ; preds = %46, %35 - %37 = load i32, i32* %9, align 4 - %38 = icmp slt i32 %37, 8 - br i1 %38, label %39, label %49 - -39: ; preds = %36 - %40 = load i32, i32* %9, align 4 - %41 = sext i32 %40 to i64 - %42 = getelementptr inbounds [8 x float], [8 x float]* %7, i64 0, i64 %41 - %43 = load float, float* %42, align 4 - %44 = fpext float %43 to double - %45 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %44) - br label %46 - -46: ; preds = %39 - %47 = load i32, i32* %9, align 4 - %48 = add nsw i32 %47, 1 - store i32 %48, i32* %9, align 4 - br label %36 - -49: ; preds = %36 - %50 = load i32, i32* %1, align 4 - ret i32 %50 ----STDERR--- -RecExpr { nodes: [Symbol("no-array-name1"), Symbol("-1,"), Get([0, 1]), Symbol("no-array-name2"), Symbol("-2,"), Get([3, 4]), Add([2, 5]), Symbol("no-array-name3"), Symbol("-3,"), Get([7, 8]), Symbol("no-array-name4"), Symbol("-4,"), Get([10, 11]), Minus([9, 12]), Num(0), Num(0), Vec([6, 13, 14, 15])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name1"), Symbol("-1,"), Get([0, 1]), Symbol("no-array-name2"), Symbol("-2,"), Get([3, 4]), Add([2, 5]), Symbol("no-array-name3"), Symbol("-3,"), Get([7, 8]), Symbol("no-array-name4"), Symbol("-4,"), Get([10, 11]), Minus([9, 12]), Num(0), Num(0), Vec([6, 13, 14, 15])] -RecExpr { nodes: [Symbol("no-array-name5"), Symbol("-5,"), Get([0, 1]), Symbol("no-array-name6"), Symbol("-6,"), Get([3, 4]), Add([2, 5]), Symbol("no-array-name7"), Symbol("-7,"), Get([7, 8]), Symbol("no-array-name8"), Symbol("-8,"), Get([10, 11]), Minus([9, 12]), Num(0), Num(0), Vec([6, 13, 14, 15])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name5"), Symbol("-5,"), Get([0, 1]), Symbol("no-array-name6"), Symbol("-6,"), Get([3, 4]), Add([2, 5]), Symbol("no-array-name7"), Symbol("-7,"), Get([7, 8]), Symbol("no-array-name8"), Symbol("-8,"), Get([10, 11]), Minus([9, 12]), Num(0), Num(0), Vec([6, 13, 14, 15])] -RecExpr { nodes: [Symbol("no-array-name9"), Symbol("-9,"), Get([0, 1]), Symbol("no-array-name10"), Symbol("-10,"), Get([3, 4]), Mul([2, 5]), Symbol("no-array-name11"), Symbol("-11,"), Get([7, 8]), Symbol("no-array-name12"), Symbol("-12,"), Get([10, 11]), Mul([9, 12]), Minus([6, 13]), Symbol("no-array-name13"), Symbol("-13,"), Get([15, 16]), Symbol("no-array-name14"), Symbol("-14,"), Get([18, 19]), Mul([17, 20]), Symbol("no-array-name15"), Symbol("-15,"), Get([22, 23]), Symbol("no-array-name16"), Symbol("-16,"), Get([25, 26]), Mul([24, 27]), Add([21, 28]), Num(0), Num(0), Vec([14, 29, 30, 31])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 6 iterations, reason: Some(Saturated) -[Num(1), Symbol("no-array-name13"), Symbol("-13,"), Get([1, 2]), Num(1), Num(1), Vec([0, 3, 4, 5]), Num(0), Symbol("no-array-name14"), Symbol("-14,"), Get([8, 9]), Num(0), Num(0), LitVec([7, 10, 11, 12]), VecMul([6, 13]), Num(1), Symbol("no-array-name15"), Symbol("-15,"), Get([16, 17]), Num(1), Num(1), Vec([15, 18, 19, 20]), Symbol("no-array-name9"), Symbol("-9,"), Get([22, 23]), Symbol("no-array-name10"), Symbol("-10,"), Get([25, 26]), Mul([24, 27]), Symbol("no-array-name11"), Symbol("-11,"), Get([29, 30]), Symbol("no-array-name12"), Symbol("-12,"), Get([32, 33]), Mul([31, 34]), Minus([28, 35]), Symbol("no-array-name16"), Symbol("-16,"), Get([37, 38]), Num(0), Num(0), Vec([36, 39, 40, 41]), VecMAC([14, 21, 42])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/five_binops.c b/src/dios-egraphs/Diospyros/llvm-tests/five_binops.c deleted file mode 100644 index 0d03f65f..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/five_binops.c +++ /dev/null @@ -1,22 +0,0 @@ -#include - -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {5, 6, 7, 8}; -float c_in[] = {1, 2, 3, 4}; -float d_in[] = {5, 6, 7, 8}; -float e_in[] = {1, 2, 3, 4}; - -int main(int argc, char **argv) { - // return argc + 5; - float c_out[4]; - c_out[0] = a_in[0] + b_in[0] + c_in[0] + d_in[0] + e_in[0]; - c_out[1] = a_in[1] + b_in[1] + c_in[1] + d_in[1] + e_in[1]; - c_out[2] = a_in[2] + b_in[2] + c_in[2] + d_in[2] + e_in[2]; - c_out[3] = a_in[3] + b_in[3] + c_in[3] + d_in[3] + e_in[3]; - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - // expected: 13, 18, 23, 28 - return 0; -} diff --git a/src/dios-egraphs/Diospyros/llvm-tests/five_binops.expect b/src/dios-egraphs/Diospyros/llvm-tests/five_binops.expect deleted file mode 100644 index b76605ed..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/five_binops.expect +++ /dev/null @@ -1,121 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 0), align 16 - %11 = fadd float %9, %10 - %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @d_in, i64 0, i64 0), align 16 - %13 = fadd float %11, %12 - %14 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @e_in, i64 0, i64 0), align 16 - %15 = fadd float %13, %14 - %16 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %17 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %18 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %19 = fadd float %17, %18 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 1), align 4 - %21 = fadd float %19, %20 - %22 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @d_in, i64 0, i64 1), align 4 - %23 = fadd float %21, %22 - %24 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @e_in, i64 0, i64 1), align 4 - %25 = fadd float %23, %24 - %26 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %27 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %28 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %29 = fadd float %27, %28 - %30 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 2), align 8 - %31 = fadd float %29, %30 - %32 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @d_in, i64 0, i64 2), align 8 - %33 = fadd float %31, %32 - %34 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @e_in, i64 0, i64 2), align 8 - %35 = fadd float %33, %34 - %36 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %37 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %38 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %39 = fadd float %37, %38 - %40 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 3), align 4 - %41 = fadd float %39, %40 - %42 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @d_in, i64 0, i64 3), align 4 - %43 = fadd float %41, %42 - %44 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @e_in, i64 0, i64 3), align 4 - %45 = fadd float %43, %44 - %46 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %47 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %48 = insertelement <4 x float> zeroinitializer, float %47, i32 0 - %49 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %50 = insertelement <4 x float> %48, float %49, i32 1 - %51 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %52 = insertelement <4 x float> %50, float %51, i32 2 - %53 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %54 = insertelement <4 x float> %52, float %53, i32 3 - %55 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %56 = insertelement <4 x float> zeroinitializer, float %55, i32 0 - %57 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %58 = insertelement <4 x float> %56, float %57, i32 1 - %59 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %60 = insertelement <4 x float> %58, float %59, i32 2 - %61 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %62 = insertelement <4 x float> %60, float %61, i32 3 - %63 = fadd <4 x float> %54, %62 - %64 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 0), align 4 - %65 = insertelement <4 x float> zeroinitializer, float %64, i32 0 - %66 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 1), align 4 - %67 = insertelement <4 x float> %65, float %66, i32 1 - %68 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 2), align 4 - %69 = insertelement <4 x float> %67, float %68, i32 2 - %70 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 3), align 4 - %71 = insertelement <4 x float> %69, float %70, i32 3 - %72 = fadd <4 x float> %63, %71 - %73 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @d_in, i64 0, i64 0), align 4 - %74 = insertelement <4 x float> zeroinitializer, float %73, i32 0 - %75 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @d_in, i64 0, i64 1), align 4 - %76 = insertelement <4 x float> %74, float %75, i32 1 - %77 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @d_in, i64 0, i64 2), align 4 - %78 = insertelement <4 x float> %76, float %77, i32 2 - %79 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @d_in, i64 0, i64 3), align 4 - %80 = insertelement <4 x float> %78, float %79, i32 3 - %81 = fadd <4 x float> %72, %80 - %82 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @e_in, i64 0, i64 0), align 4 - %83 = insertelement <4 x float> zeroinitializer, float %82, i32 0 - %84 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @e_in, i64 0, i64 1), align 4 - %85 = insertelement <4 x float> %83, float %84, i32 1 - %86 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @e_in, i64 0, i64 2), align 4 - %87 = insertelement <4 x float> %85, float %86, i32 2 - %88 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @e_in, i64 0, i64 3), align 4 - %89 = insertelement <4 x float> %87, float %88, i32 3 - %90 = fadd <4 x float> %81, %89 - %91 = extractelement <4 x float> %90, i32 0 - store float %91, float* %16, align 16 - %92 = extractelement <4 x float> %90, i32 1 - store float %92, float* %26, align 4 - %93 = extractelement <4 x float> %90, i32 2 - store float %93, float* %36, align 8 - %94 = extractelement <4 x float> %90, i32 3 - store float %94, float* %46, align 4 - %95 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %96 = load float, float* %95, align 16 - %97 = fpext float %96 to double - %98 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %97) - %99 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %100 = load float, float* %99, align 4 - %101 = fpext float %100 to double - %102 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %101) - %103 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %104 = load float, float* %103, align 8 - %105 = fpext float %104 to double - %106 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %105) - %107 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %108 = load float, float* %107, align 4 - %109 = fpext float %108 to double - %110 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %109) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("c_in"), Symbol("0,0,"), Get([7, 8]), Add([6, 9]), Symbol("d_in"), Symbol("0,0,"), Get([11, 12]), Add([10, 13]), Symbol("e_in"), Symbol("0,0,"), Get([15, 16]), Add([14, 17]), Symbol("a_in"), Symbol("0,1,"), Get([19, 20]), Symbol("b_in"), Symbol("0,1,"), Get([22, 23]), Add([21, 24]), Symbol("c_in"), Symbol("0,1,"), Get([26, 27]), Add([25, 28]), Symbol("d_in"), Symbol("0,1,"), Get([30, 31]), Add([29, 32]), Symbol("e_in"), Symbol("0,1,"), Get([34, 35]), Add([33, 36]), Symbol("a_in"), Symbol("0,2,"), Get([38, 39]), Symbol("b_in"), Symbol("0,2,"), Get([41, 42]), Add([40, 43]), Symbol("c_in"), Symbol("0,2,"), Get([45, 46]), Add([44, 47]), Symbol("d_in"), Symbol("0,2,"), Get([49, 50]), Add([48, 51]), Symbol("e_in"), Symbol("0,2,"), Get([53, 54]), Add([52, 55]), Symbol("a_in"), Symbol("0,3,"), Get([57, 58]), Symbol("b_in"), Symbol("0,3,"), Get([60, 61]), Add([59, 62]), Symbol("c_in"), Symbol("0,3,"), Get([64, 65]), Add([63, 66]), Symbol("d_in"), Symbol("0,3,"), Get([68, 69]), Add([67, 70]), Symbol("e_in"), Symbol("0,3,"), Get([72, 73]), Add([71, 74]), Vec([18, 37, 56, 75])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 7 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,1,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,3,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("b_in"), Symbol("0,3,"), Get([22, 23]), LitVec([15, 18, 21, 24]), VecAdd([12, 25]), Symbol("c_in"), Symbol("0,0,"), Get([27, 28]), Symbol("c_in"), Symbol("0,1,"), Get([30, 31]), Symbol("c_in"), Symbol("0,2,"), Get([33, 34]), Symbol("c_in"), Symbol("0,3,"), Get([36, 37]), LitVec([29, 32, 35, 38]), VecAdd([26, 39]), Symbol("d_in"), Symbol("0,0,"), Get([41, 42]), Symbol("d_in"), Symbol("0,1,"), Get([44, 45]), Symbol("d_in"), Symbol("0,2,"), Get([47, 48]), Symbol("d_in"), Symbol("0,3,"), Get([50, 51]), LitVec([43, 46, 49, 52]), VecAdd([40, 53]), Symbol("e_in"), Symbol("0,0,"), Get([55, 56]), Symbol("e_in"), Symbol("0,1,"), Get([58, 59]), Symbol("e_in"), Symbol("0,2,"), Get([61, 62]), Symbol("e_in"), Symbol("0,3,"), Get([64, 65]), LitVec([57, 60, 63, 66]), VecAdd([54, 67])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/if-else.c b/src/dios-egraphs/Diospyros/llvm-tests/if-else.c deleted file mode 100644 index 7e5858fd..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/if-else.c +++ /dev/null @@ -1,23 +0,0 @@ -#include -#define SIZE 8 - -void if_else(float a_in[SIZE], float b_out[SIZE]) { - for (int i = 0; i < SIZE; i++) { - if (i < SIZE / 2) { - b_out[i] = a_in[i]; - } else { - b_out[i] = a_in[i] + 1; - } - } -} - -int main(int argc, char **argv) { - float a_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; - float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; - if_else(a_in, b_out); - for (int i = 0; i < SIZE; i++) { - printf("%f\n", b_out[i]); - } - // expected: 1, 2, 3, 4, 6, 7, 8, 9 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mac.c b/src/dios-egraphs/Diospyros/llvm-tests/mac.c deleted file mode 100644 index 787b5ad0..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/mac.c +++ /dev/null @@ -1,19 +0,0 @@ -#include - -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {2, 3, 4, 5}; -float c_in[] = {3, 4, 5, 6}; - -int main(int argc, char **argv) { - float d_out[4]; - d_out[0] = a_in[0] + (b_in[0] * c_in[0]); - d_out[1] = a_in[1] + (b_in[1] * c_in[1]); - d_out[2] = a_in[2] + (b_in[2] * c_in[2]); - d_out[3] = a_in[3] + (b_in[3] * c_in[3]); - printf("first: %f\n", d_out[0]); - printf("second: %f\n", d_out[1]); - printf("third: %f\n", d_out[2]); - printf("fourth: %f\n", d_out[3]); - // expected: 7, 14, 23, 34 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mac.expect b/src/dios-egraphs/Diospyros/llvm-tests/mac.expect deleted file mode 100644 index e0e798a8..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/mac.expect +++ /dev/null @@ -1,86 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 0), align 16 - %10 = fmul float %8, %9 - %11 = fadd float %7, %10 - %12 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %13 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %14 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %15 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 1), align 4 - %16 = fmul float %14, %15 - %17 = fadd float %13, %16 - %18 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %19 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %21 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 2), align 8 - %22 = fmul float %20, %21 - %23 = fadd float %19, %22 - %24 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %25 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %26 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %27 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 3), align 4 - %28 = fmul float %26, %27 - %29 = fadd float %25, %28 - %30 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %31 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 - %33 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %34 = insertelement <4 x float> %32, float %33, i32 1 - %35 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %36 = insertelement <4 x float> %34, float %35, i32 2 - %37 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %38 = insertelement <4 x float> %36, float %37, i32 3 - %39 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %40 = insertelement <4 x float> zeroinitializer, float %39, i32 0 - %41 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %42 = insertelement <4 x float> %40, float %41, i32 1 - %43 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %44 = insertelement <4 x float> %42, float %43, i32 2 - %45 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %46 = insertelement <4 x float> %44, float %45, i32 3 - %47 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 0), align 4 - %48 = insertelement <4 x float> zeroinitializer, float %47, i32 0 - %49 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 1), align 4 - %50 = insertelement <4 x float> %48, float %49, i32 1 - %51 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 2), align 4 - %52 = insertelement <4 x float> %50, float %51, i32 2 - %53 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 3), align 4 - %54 = insertelement <4 x float> %52, float %53, i32 3 - %55 = call <4 x float> @llvm.fma.f32(<4 x float> %46, <4 x float> %54, <4 x float> %38) - %56 = extractelement <4 x float> %55, i32 0 - store float %56, float* %12, align 16 - %57 = extractelement <4 x float> %55, i32 1 - store float %57, float* %18, align 4 - %58 = extractelement <4 x float> %55, i32 2 - store float %58, float* %24, align 8 - %59 = extractelement <4 x float> %55, i32 3 - store float %59, float* %30, align 4 - %60 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %61 = load float, float* %60, align 16 - %62 = fpext float %61 to double - %63 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %62) - %64 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %65 = load float, float* %64, align 4 - %66 = fpext float %65 to double - %67 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %66) - %68 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %69 = load float, float* %68, align 8 - %70 = fpext float %69 to double - %71 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %70) - %72 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %73 = load float, float* %72, align 4 - %74 = fpext float %73 to double - %75 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %74) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("b_in"), Symbol("0,0,"), Get([0, 1]), Symbol("c_in"), Symbol("0,0,"), Get([3, 4]), Mul([2, 5]), Symbol("a_in"), Symbol("0,0,"), Get([7, 8]), Add([9, 6]), Symbol("b_in"), Symbol("0,1,"), Get([11, 12]), Symbol("c_in"), Symbol("0,1,"), Get([14, 15]), Mul([13, 16]), Symbol("a_in"), Symbol("0,1,"), Get([18, 19]), Add([20, 17]), Symbol("b_in"), Symbol("0,2,"), Get([22, 23]), Symbol("c_in"), Symbol("0,2,"), Get([25, 26]), Mul([24, 27]), Symbol("a_in"), Symbol("0,2,"), Get([29, 30]), Add([31, 28]), Symbol("b_in"), Symbol("0,3,"), Get([33, 34]), Symbol("c_in"), Symbol("0,3,"), Get([36, 37]), Mul([35, 38]), Symbol("a_in"), Symbol("0,3,"), Get([40, 41]), Add([42, 39]), Vec([10, 21, 32, 43])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 7 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,1,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,3,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("b_in"), Symbol("0,3,"), Get([22, 23]), LitVec([15, 18, 21, 24]), Symbol("c_in"), Symbol("0,0,"), Get([26, 27]), Symbol("c_in"), Symbol("0,1,"), Get([29, 30]), Symbol("c_in"), Symbol("0,2,"), Get([32, 33]), Symbol("c_in"), Symbol("0,3,"), Get([35, 36]), LitVec([28, 31, 34, 37]), VecMAC([12, 25, 38])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mat_mul.c b/src/dios-egraphs/Diospyros/llvm-tests/mat_mul.c deleted file mode 100644 index 344fce3d..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/mat_mul.c +++ /dev/null @@ -1,17 +0,0 @@ -#include - -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {5, 6, 7, 8}; - -int main(int argc, char **argv) { - float c_out[4] = {a_in[0] * b_in[0] + a_in[1] * b_in[2], - a_in[0] * b_in[1] + a_in[1] * b_in[3], - a_in[2] * b_in[0] + a_in[3] * b_in[2], - a_in[2] * b_in[1] + a_in[3] * b_in[3]}; - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - // expected: 19, 22, 43, 50 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mat_mul.expect b/src/dios-egraphs/Diospyros/llvm-tests/mat_mul.expect deleted file mode 100644 index 35745a9e..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/mat_mul.expect +++ /dev/null @@ -1,103 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %9 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %10 = fmul float %8, %9 - %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %13 = fmul float %11, %12 - %14 = fadd float %10, %13 - %15 = getelementptr inbounds float, float* %7, i64 1 - %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %17 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %18 = fmul float %16, %17 - %19 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %21 = fmul float %19, %20 - %22 = fadd float %18, %21 - %23 = getelementptr inbounds float, float* %15, i64 1 - %24 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %25 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %26 = fmul float %24, %25 - %27 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %28 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %29 = fmul float %27, %28 - %30 = fadd float %26, %29 - %31 = getelementptr inbounds float, float* %23, i64 1 - %32 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %33 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %34 = fmul float %32, %33 - %35 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %36 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %37 = fmul float %35, %36 - %38 = fadd float %34, %37 - %39 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %40 = insertelement <4 x float> zeroinitializer, float %39, i32 0 - %41 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %42 = insertelement <4 x float> %40, float %41, i32 1 - %43 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %44 = insertelement <4 x float> %42, float %43, i32 2 - %45 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %46 = insertelement <4 x float> %44, float %45, i32 3 - %47 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %48 = insertelement <4 x float> zeroinitializer, float %47, i32 0 - %49 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %50 = insertelement <4 x float> %48, float %49, i32 1 - %51 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %52 = insertelement <4 x float> %50, float %51, i32 2 - %53 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %54 = insertelement <4 x float> %52, float %53, i32 3 - %55 = fmul <4 x float> %46, %54 - %56 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %57 = insertelement <4 x float> zeroinitializer, float %56, i32 0 - %58 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %59 = insertelement <4 x float> %57, float %58, i32 1 - %60 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %61 = insertelement <4 x float> %59, float %60, i32 2 - %62 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %63 = insertelement <4 x float> %61, float %62, i32 3 - %64 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %65 = insertelement <4 x float> zeroinitializer, float %64, i32 0 - %66 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %67 = insertelement <4 x float> %65, float %66, i32 1 - %68 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %69 = insertelement <4 x float> %67, float %68, i32 2 - %70 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %71 = insertelement <4 x float> %69, float %70, i32 3 - %72 = call <4 x float> @llvm.fma.f32(<4 x float> %63, <4 x float> %71, <4 x float> %55) - %73 = extractelement <4 x float> %72, i32 0 - store float %73, float* %7, align 4 - %74 = extractelement <4 x float> %72, i32 1 - store float %74, float* %15, align 4 - %75 = extractelement <4 x float> %72, i32 2 - store float %75, float* %23, align 4 - %76 = extractelement <4 x float> %72, i32 3 - store float %76, float* %31, align 4 - %77 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %78 = load float, float* %77, align 16 - %79 = fpext float %78 to double - %80 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %79) - %81 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %82 = load float, float* %81, align 4 - %83 = fpext float %82 to double - %84 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %83) - %85 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %86 = load float, float* %85, align 8 - %87 = fpext float %86 to double - %88 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %87) - %89 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %90 = load float, float* %89, align 4 - %91 = fpext float %90 to double - %92 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %91) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Mul([2, 5]), Symbol("a_in"), Symbol("0,1,"), Get([7, 8]), Symbol("b_in"), Symbol("0,2,"), Get([10, 11]), Mul([9, 12]), Add([6, 13]), Symbol("a_in"), Symbol("0,0,"), Get([15, 16]), Symbol("b_in"), Symbol("0,1,"), Get([18, 19]), Mul([17, 20]), Symbol("a_in"), Symbol("0,1,"), Get([22, 23]), Symbol("b_in"), Symbol("0,3,"), Get([25, 26]), Mul([24, 27]), Add([21, 28]), Symbol("a_in"), Symbol("0,2,"), Get([30, 31]), Symbol("b_in"), Symbol("0,0,"), Get([33, 34]), Mul([32, 35]), Symbol("a_in"), Symbol("0,3,"), Get([37, 38]), Symbol("b_in"), Symbol("0,2,"), Get([40, 41]), Mul([39, 42]), Add([36, 43]), Symbol("a_in"), Symbol("0,2,"), Get([45, 46]), Symbol("b_in"), Symbol("0,1,"), Get([48, 49]), Mul([47, 50]), Symbol("a_in"), Symbol("0,3,"), Get([52, 53]), Symbol("b_in"), Symbol("0,3,"), Get([55, 56]), Mul([54, 57]), Add([51, 58]), Vec([14, 29, 44, 59])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 8 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,0,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,2,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,0,"), Get([19, 20]), Symbol("b_in"), Symbol("0,1,"), Get([22, 23]), LitVec([15, 18, 21, 24]), VecMul([12, 25]), Symbol("a_in"), Symbol("0,1,"), Get([27, 28]), Symbol("a_in"), Symbol("0,1,"), Get([30, 31]), Symbol("a_in"), Symbol("0,3,"), Get([33, 34]), Symbol("a_in"), Symbol("0,3,"), Get([36, 37]), LitVec([29, 32, 35, 38]), Symbol("b_in"), Symbol("0,2,"), Get([40, 41]), Symbol("b_in"), Symbol("0,3,"), Get([43, 44]), Symbol("b_in"), Symbol("0,2,"), Get([46, 47]), Symbol("b_in"), Symbol("0,3,"), Get([49, 50]), LitVec([42, 45, 48, 51]), VecMAC([26, 39, 52])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply.c b/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply.c deleted file mode 100644 index 4165ae39..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply.c +++ /dev/null @@ -1,31 +0,0 @@ -#include - -#define A_ROWS 2 -#define A_COLS 2 -#define B_COLS 2 - -void matrix_multiply(float a_in[A_ROWS * A_COLS], float b_in[A_COLS * B_COLS], - float c_out[A_ROWS * B_COLS]) { - for (int y = 0; y < A_ROWS; y++) { - for (int x = 0; x < B_COLS; x++) { - c_out[B_COLS * y + x] = 0; - for (int k = 0; k < A_COLS; k++) { - c_out[B_COLS * y + x] += - a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; - } - } - } -} - -int main(void) { - float a_in[A_ROWS * A_COLS] = {1, 2, 3, 4}; - float b_in[A_COLS * B_COLS] = {1, 2, 3, 4}; - float c_out[A_ROWS * B_COLS] = {0, 0, 0, 0}; - matrix_multiply(a_in, b_in, c_out); - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - // expected (7, 10, 15, 22) - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply.expect b/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply.expect deleted file mode 100644 index 8c0a7cf0..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/matrix-multiply.expect +++ /dev/null @@ -1,153 +0,0 @@ - %4 = alloca float*, align 8 - %5 = alloca float*, align 8 - %6 = alloca float*, align 8 - %7 = alloca i32, align 4 - %8 = alloca i32, align 4 - %9 = alloca i32, align 4 - store float* %0, float** %4, align 8 - store float* %1, float** %5, align 8 - store float* %2, float** %6, align 8 - store i32 0, i32* %7, align 4 - br label %10 - -10: ; preds = %80, %3 - %11 = load i32, i32* %7, align 4 - %12 = icmp slt i32 %11, 2 - br i1 %12, label %13, label %83 - -13: ; preds = %10 - store i32 0, i32* %8, align 4 - br label %14 - -14: ; preds = %76, %13 - %15 = load i32, i32* %8, align 4 - %16 = icmp slt i32 %15, 2 - br i1 %16, label %17, label %79 - -17: ; preds = %14 - %18 = load float*, float** %6, align 8 - %19 = load i32, i32* %7, align 4 - %20 = mul nsw i32 2, %19 - %21 = load i32, i32* %8, align 4 - %22 = add nsw i32 %20, %21 - %23 = sext i32 %22 to i64 - %24 = getelementptr inbounds float, float* %18, i64 %23 - store float 0.000000e+00, float* %24, align 4 - store i32 0, i32* %9, align 4 - br label %25 - -25: ; preds = %72, %17 - %26 = load i32, i32* %9, align 4 - %27 = icmp slt i32 %26, 2 - br i1 %27, label %28, label %75 - -28: ; preds = %25 - %29 = load float*, float** %4, align 8 - %30 = load i32, i32* %7, align 4 - %31 = mul nsw i32 2, %30 - %32 = load i32, i32* %9, align 4 - %33 = add nsw i32 %31, %32 - %34 = sext i32 %33 to i64 - %35 = getelementptr inbounds float, float* %29, i64 %34 - %36 = load float, float* %35, align 4 - %37 = load float*, float** %5, align 8 - %38 = load i32, i32* %9, align 4 - %39 = mul nsw i32 2, %38 - %40 = load i32, i32* %8, align 4 - %41 = add nsw i32 %39, %40 - %42 = sext i32 %41 to i64 - %43 = getelementptr inbounds float, float* %37, i64 %42 - %44 = load float, float* %43, align 4 - %45 = fmul float %36, %44 - %46 = load float*, float** %6, align 8 - %47 = load i32, i32* %7, align 4 - %48 = mul nsw i32 2, %47 - %49 = load i32, i32* %8, align 4 - %50 = add nsw i32 %48, %49 - %51 = sext i32 %50 to i64 - %52 = getelementptr inbounds float, float* %46, i64 %51 - %53 = load float, float* %52, align 4 - %54 = fadd float %53, %45 - %55 = load float, float* %52, align 4 - %56 = insertelement <4 x float> zeroinitializer, float %55, i32 0 - %57 = insertelement <4 x float> %56, float 0.000000e+00, i32 1 - %58 = insertelement <4 x float> %57, float 0.000000e+00, i32 2 - %59 = insertelement <4 x float> %58, float 0.000000e+00, i32 3 - %60 = load float, float* %35, align 4 - %61 = insertelement <4 x float> zeroinitializer, float %60, i32 0 - %62 = insertelement <4 x float> %61, float 0.000000e+00, i32 1 - %63 = insertelement <4 x float> %62, float 0.000000e+00, i32 2 - %64 = insertelement <4 x float> %63, float 0.000000e+00, i32 3 - %65 = load float, float* %43, align 4 - %66 = insertelement <4 x float> zeroinitializer, float %65, i32 0 - %67 = insertelement <4 x float> %66, float 0.000000e+00, i32 1 - %68 = insertelement <4 x float> %67, float 0.000000e+00, i32 2 - %69 = insertelement <4 x float> %68, float 0.000000e+00, i32 3 - %70 = call <4 x float> @llvm.fma.f32(<4 x float> %64, <4 x float> %69, <4 x float> %59) - %71 = extractelement <4 x float> %70, i32 0 - store float %71, float* %52, align 4 - br label %72 - -72: ; preds = %28 - %73 = load i32, i32* %9, align 4 - %74 = add nsw i32 %73, 1 - store i32 %74, i32* %9, align 4 - br label %25 - -75: ; preds = %25 - br label %76 - -76: ; preds = %75 - %77 = load i32, i32* %8, align 4 - %78 = add nsw i32 %77, 1 - store i32 %78, i32* %8, align 4 - br label %14 - -79: ; preds = %14 - br label %80 - -80: ; preds = %79 - %81 = load i32, i32* %7, align 4 - %82 = add nsw i32 %81, 1 - store i32 %82, i32* %7, align 4 - br label %10 - -83: ; preds = %10 - ret void - %1 = alloca i32, align 4 - %2 = alloca [4 x float], align 16 - %3 = alloca [4 x float], align 16 - %4 = alloca [4 x float], align 16 - store i32 0, i32* %1, align 4 - %5 = bitcast [4 x float]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %5, i8* align 16 bitcast ([4 x float]* @__const.main.a_in to i8*), i64 16, i1 false) - %6 = bitcast [4 x float]* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %6, i8* align 16 bitcast ([4 x float]* @__const.main.b_in to i8*), i64 16, i1 false) - %7 = bitcast [4 x float]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %7, i8 0, i64 16, i1 false) - %8 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %9 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - %10 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 - call void @matrix_multiply(float* %8, float* %9, float* %10) - %11 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 - %12 = load float, float* %11, align 16 - %13 = fpext float %12 to double - %14 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %13) - %15 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 1 - %16 = load float, float* %15, align 4 - %17 = fpext float %16 to double - %18 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %17) - %19 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 2 - %20 = load float, float* %19, align 8 - %21 = fpext float %20 to double - %22 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %21) - %23 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 3 - %24 = load float, float* %23, align 4 - %25 = fpext float %24 to double - %26 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %25) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("no-array-name1"), Symbol("-1,"), Get([0, 1]), Symbol("no-array-name2"), Symbol("-2,"), Get([3, 4]), Mul([2, 5]), Symbol("no-array-name3"), Symbol("-3,"), Get([7, 8]), Add([9, 6]), Num(0), Num(0), Num(0), Vec([10, 11, 12, 13])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name3"), Symbol("-3,"), Get([0, 1]), Num(0), Num(0), Num(0), LitVec([2, 3, 4, 5]), Symbol("no-array-name1"), Symbol("-1,"), Get([7, 8]), Num(0), Num(0), Num(0), LitVec([9, 10, 11, 12]), Symbol("no-array-name2"), Symbol("-2,"), Get([14, 15]), Num(0), Num(0), Num(0), LitVec([16, 17, 18, 19]), VecMAC([6, 13, 20])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mixed.c b/src/dios-egraphs/Diospyros/llvm-tests/mixed.c deleted file mode 100644 index 3cf28606..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/mixed.c +++ /dev/null @@ -1,18 +0,0 @@ -#include - -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {5, 6, 7, 8}; - -int main(int argc, char **argv) { - float c_out[4]; - c_out[0] = a_in[0] + 3; - c_out[1] = 5 + b_in[1]; - c_out[2] = 3 + b_in[2]; - c_out[3] = a_in[3] + b_in[3]; - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - // expected: 4, 11, 10, 12 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mixed.expect b/src/dios-egraphs/Diospyros/llvm-tests/mixed.expect deleted file mode 100644 index b743cb56..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/mixed.expect +++ /dev/null @@ -1,63 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = fadd float %7, 3.000000e+00 - %9 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %10 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %11 = fadd float 5.000000e+00, %10 - %12 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %13 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %14 = fadd float 3.000000e+00, %13 - %15 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %17 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %18 = fadd float %16, %17 - %19 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %21 = insertelement <4 x float> zeroinitializer, float %20, i32 0 - %22 = insertelement <4 x float> %21, float 5.000000e+00, i32 1 - %23 = insertelement <4 x float> %22, float 3.000000e+00, i32 2 - %24 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %25 = insertelement <4 x float> %23, float %24, i32 3 - %26 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %27 = insertelement <4 x float> , float %26, i32 1 - %28 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %29 = insertelement <4 x float> %27, float %28, i32 2 - %30 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %31 = insertelement <4 x float> %29, float %30, i32 3 - %32 = fadd <4 x float> %25, %31 - %33 = extractelement <4 x float> %32, i32 0 - store float %33, float* %9, align 16 - %34 = extractelement <4 x float> %32, i32 1 - store float %34, float* %12, align 4 - %35 = extractelement <4 x float> %32, i32 2 - store float %35, float* %15, align 8 - %36 = extractelement <4 x float> %32, i32 3 - store float %36, float* %19, align 4 - %37 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %38 = load float, float* %37, align 16 - %39 = fpext float %38 to double - %40 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %39) - %41 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %42 = load float, float* %41, align 4 - %43 = fpext float %42 to double - %44 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %43) - %45 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %46 = load float, float* %45, align 8 - %47 = fpext float %46 to double - %48 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %47) - %49 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %50 = load float, float* %49, align 4 - %51 = fpext float %50 to double - %52 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %51) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Num(3), Add([2, 3]), Num(5), Symbol("b_in"), Symbol("0,1,"), Get([6, 7]), Add([5, 8]), Num(3), Symbol("b_in"), Symbol("0,2,"), Get([11, 12]), Add([10, 13]), Symbol("a_in"), Symbol("0,3,"), Get([15, 16]), Symbol("b_in"), Symbol("0,3,"), Get([18, 19]), Add([17, 20]), Vec([4, 9, 14, 21])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Num(5), Num(3), Symbol("a_in"), Symbol("0,3,"), Get([5, 6]), Vec([2, 3, 4, 7]), Num(3), Symbol("b_in"), Symbol("0,1,"), Get([10, 11]), Symbol("b_in"), Symbol("0,2,"), Get([13, 14]), Symbol("b_in"), Symbol("0,3,"), Get([16, 17]), Vec([9, 12, 15, 18]), VecAdd([8, 19])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mult.c b/src/dios-egraphs/Diospyros/llvm-tests/mult.c deleted file mode 100644 index aebe69f7..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/mult.c +++ /dev/null @@ -1,18 +0,0 @@ -#include - -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {5, 6, 7, 8}; - -int main(int argc, char **argv) { - float c_out[4]; - c_out[0] = a_in[0] * b_in[0]; - c_out[1] = a_in[1] * b_in[1]; - c_out[2] = a_in[2] * b_in[2]; - c_out[3] = a_in[3] * b_in[3]; - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - // expected: 5, 12, 21, 32 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/mult.expect b/src/dios-egraphs/Diospyros/llvm-tests/mult.expect deleted file mode 100644 index ed6d2873..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/mult.expect +++ /dev/null @@ -1,70 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fmul float %7, %8 - %10 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %13 = fmul float %11, %12 - %14 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %15 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %17 = fmul float %15, %16 - %18 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %19 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %21 = fmul float %19, %20 - %22 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %23 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %24 = insertelement <4 x float> zeroinitializer, float %23, i32 0 - %25 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %26 = insertelement <4 x float> %24, float %25, i32 1 - %27 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %28 = insertelement <4 x float> %26, float %27, i32 2 - %29 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %30 = insertelement <4 x float> %28, float %29, i32 3 - %31 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 - %33 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %34 = insertelement <4 x float> %32, float %33, i32 1 - %35 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %36 = insertelement <4 x float> %34, float %35, i32 2 - %37 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %38 = insertelement <4 x float> %36, float %37, i32 3 - %39 = fmul <4 x float> %30, %38 - %40 = extractelement <4 x float> %39, i32 0 - store float %40, float* %10, align 16 - %41 = extractelement <4 x float> %39, i32 1 - store float %41, float* %14, align 4 - %42 = extractelement <4 x float> %39, i32 2 - store float %42, float* %18, align 8 - %43 = extractelement <4 x float> %39, i32 3 - store float %43, float* %22, align 4 - %44 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %45 = load float, float* %44, align 16 - %46 = fpext float %45 to double - %47 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %46) - %48 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %49 = load float, float* %48, align 4 - %50 = fpext float %49 to double - %51 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %50) - %52 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %53 = load float, float* %52, align 8 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %54) - %56 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %57 = load float, float* %56, align 4 - %58 = fpext float %57 to double - %59 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %58) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Mul([2, 5]), Symbol("a_in"), Symbol("0,1,"), Get([7, 8]), Symbol("b_in"), Symbol("0,1,"), Get([10, 11]), Mul([9, 12]), Symbol("a_in"), Symbol("0,2,"), Get([14, 15]), Symbol("b_in"), Symbol("0,2,"), Get([17, 18]), Mul([16, 19]), Symbol("a_in"), Symbol("0,3,"), Get([21, 22]), Symbol("b_in"), Symbol("0,3,"), Get([24, 25]), Mul([23, 26]), Vec([6, 13, 20, 27])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,1,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,3,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("b_in"), Symbol("0,3,"), Get([22, 23]), LitVec([15, 18, 21, 24]), VecMul([12, 25])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds.c b/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds.c deleted file mode 100644 index c474e2f2..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds.c +++ /dev/null @@ -1,19 +0,0 @@ -#include - -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {5, 6, 7, 8}; -float c_in[] = {9, 10, 11, 12}; - -int main(int argc, char **argv) { - float d_out[4]; - d_out[0] = a_in[0] + b_in[0] + c_in[0]; - d_out[1] = a_in[1] + b_in[1] + c_in[1]; - d_out[2] = a_in[2] + b_in[2] + c_in[2]; - d_out[3] = a_in[3] + b_in[3] + c_in[3]; - printf("first: %f\n", d_out[0]); - printf("second: %f\n", d_out[1]); - printf("third: %f\n", d_out[2]); - printf("fourth: %f\n", d_out[3]); - // expected: 15, 18, 21, 24 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds.expect b/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds.expect deleted file mode 100644 index 622025c3..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/multiple_adds.expect +++ /dev/null @@ -1,87 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 0), align 16 - %11 = fadd float %9, %10 - %12 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %13 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %14 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %15 = fadd float %13, %14 - %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 1), align 4 - %17 = fadd float %15, %16 - %18 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %19 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %21 = fadd float %19, %20 - %22 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 2), align 8 - %23 = fadd float %21, %22 - %24 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %25 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %26 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %27 = fadd float %25, %26 - %28 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 3), align 4 - %29 = fadd float %27, %28 - %30 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %31 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 - %33 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %34 = insertelement <4 x float> %32, float %33, i32 1 - %35 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %36 = insertelement <4 x float> %34, float %35, i32 2 - %37 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %38 = insertelement <4 x float> %36, float %37, i32 3 - %39 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %40 = insertelement <4 x float> zeroinitializer, float %39, i32 0 - %41 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %42 = insertelement <4 x float> %40, float %41, i32 1 - %43 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %44 = insertelement <4 x float> %42, float %43, i32 2 - %45 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %46 = insertelement <4 x float> %44, float %45, i32 3 - %47 = fadd <4 x float> %38, %46 - %48 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 0), align 4 - %49 = insertelement <4 x float> zeroinitializer, float %48, i32 0 - %50 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 1), align 4 - %51 = insertelement <4 x float> %49, float %50, i32 1 - %52 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 2), align 4 - %53 = insertelement <4 x float> %51, float %52, i32 2 - %54 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @c_in, i64 0, i64 3), align 4 - %55 = insertelement <4 x float> %53, float %54, i32 3 - %56 = fadd <4 x float> %47, %55 - %57 = extractelement <4 x float> %56, i32 0 - store float %57, float* %12, align 16 - %58 = extractelement <4 x float> %56, i32 1 - store float %58, float* %18, align 4 - %59 = extractelement <4 x float> %56, i32 2 - store float %59, float* %24, align 8 - %60 = extractelement <4 x float> %56, i32 3 - store float %60, float* %30, align 4 - %61 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %62 = load float, float* %61, align 16 - %63 = fpext float %62 to double - %64 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %63) - %65 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %66 = load float, float* %65, align 4 - %67 = fpext float %66 to double - %68 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %67) - %69 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %70 = load float, float* %69, align 8 - %71 = fpext float %70 to double - %72 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %71) - %73 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %74 = load float, float* %73, align 4 - %75 = fpext float %74 to double - %76 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %75) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("c_in"), Symbol("0,0,"), Get([7, 8]), Add([6, 9]), Symbol("a_in"), Symbol("0,1,"), Get([11, 12]), Symbol("b_in"), Symbol("0,1,"), Get([14, 15]), Add([13, 16]), Symbol("c_in"), Symbol("0,1,"), Get([18, 19]), Add([17, 20]), Symbol("a_in"), Symbol("0,2,"), Get([22, 23]), Symbol("b_in"), Symbol("0,2,"), Get([25, 26]), Add([24, 27]), Symbol("c_in"), Symbol("0,2,"), Get([29, 30]), Add([28, 31]), Symbol("a_in"), Symbol("0,3,"), Get([33, 34]), Symbol("b_in"), Symbol("0,3,"), Get([36, 37]), Add([35, 38]), Symbol("c_in"), Symbol("0,3,"), Get([40, 41]), Add([39, 42]), Vec([10, 21, 32, 43])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 6 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,1,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,3,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("b_in"), Symbol("0,3,"), Get([22, 23]), LitVec([15, 18, 21, 24]), VecAdd([12, 25]), Symbol("c_in"), Symbol("0,0,"), Get([27, 28]), Symbol("c_in"), Symbol("0,1,"), Get([30, 31]), Symbol("c_in"), Symbol("0,2,"), Get([33, 34]), Symbol("c_in"), Symbol("0,3,"), Get([36, 37]), LitVec([29, 32, 35, 38]), VecAdd([26, 39])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/out_of_order.c b/src/dios-egraphs/Diospyros/llvm-tests/out_of_order.c deleted file mode 100644 index 5469e3df..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/out_of_order.c +++ /dev/null @@ -1,18 +0,0 @@ -#include - -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {5, 6, 7, 8}; - -int main(int argc, char **argv) { - float c_out[4]; - c_out[0] = a_in[0] + b_in[0]; - c_out[3] = a_in[3] + b_in[3]; - c_out[2] = a_in[2] + b_in[2]; - c_out[1] = a_in[1] + b_in[1]; - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - // 6, 8, 10, 12 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/out_of_order.expect b/src/dios-egraphs/Diospyros/llvm-tests/out_of_order.expect deleted file mode 100644 index 58450e4a..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/out_of_order.expect +++ /dev/null @@ -1,70 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %13 = fadd float %11, %12 - %14 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %15 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 8 - %17 = fadd float %15, %16 - %18 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %19 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %20 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %21 = fadd float %19, %20 - %22 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %23 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %24 = insertelement <4 x float> zeroinitializer, float %23, i32 0 - %25 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 3), align 4 - %26 = insertelement <4 x float> %24, float %25, i32 1 - %27 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %28 = insertelement <4 x float> %26, float %27, i32 2 - %29 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 1), align 4 - %30 = insertelement <4 x float> %28, float %29, i32 3 - %31 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %32 = insertelement <4 x float> zeroinitializer, float %31, i32 0 - %33 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 3), align 4 - %34 = insertelement <4 x float> %32, float %33, i32 1 - %35 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 2), align 4 - %36 = insertelement <4 x float> %34, float %35, i32 2 - %37 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %38 = insertelement <4 x float> %36, float %37, i32 3 - %39 = fadd <4 x float> %30, %38 - %40 = extractelement <4 x float> %39, i32 0 - store float %40, float* %10, align 16 - %41 = extractelement <4 x float> %39, i32 1 - store float %41, float* %14, align 4 - %42 = extractelement <4 x float> %39, i32 2 - store float %42, float* %18, align 8 - %43 = extractelement <4 x float> %39, i32 3 - store float %43, float* %22, align 4 - %44 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %45 = load float, float* %44, align 16 - %46 = fpext float %45 to double - %47 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %46) - %48 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %49 = load float, float* %48, align 4 - %50 = fpext float %49 to double - %51 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %50) - %52 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %53 = load float, float* %52, align 8 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %54) - %56 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %57 = load float, float* %56, align 4 - %58 = fpext float %57 to double - %59 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %58) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("a_in"), Symbol("0,3,"), Get([7, 8]), Symbol("b_in"), Symbol("0,3,"), Get([10, 11]), Add([9, 12]), Symbol("a_in"), Symbol("0,2,"), Get([14, 15]), Symbol("b_in"), Symbol("0,2,"), Get([17, 18]), Add([16, 19]), Symbol("a_in"), Symbol("0,1,"), Get([21, 22]), Symbol("b_in"), Symbol("0,1,"), Get([24, 25]), Add([23, 26]), Vec([6, 13, 20, 27])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,3,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,1,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,3,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("b_in"), Symbol("0,1,"), Get([22, 23]), LitVec([15, 18, 21, 24]), VecAdd([12, 25])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/point-product.expect b/src/dios-egraphs/Diospyros/llvm-tests/point-product.expect deleted file mode 100644 index ef741a40..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/point-product.expect +++ /dev/null @@ -1,284 +0,0 @@ - %4 = alloca float*, align 8 - %5 = alloca float*, align 8 - %6 = alloca float*, align 8 - store float* %0, float** %4, align 8 - store float* %1, float** %5, align 8 - store float* %2, float** %6, align 8 - %7 = load float*, float** %4, align 8 - %8 = getelementptr inbounds float, float* %7, i64 1 - %9 = load float, float* %8, align 4 - %10 = load float*, float** %5, align 8 - %11 = getelementptr inbounds float, float* %10, i64 2 - %12 = load float, float* %11, align 4 - %13 = fmul float %9, %12 - %14 = load float*, float** %4, align 8 - %15 = getelementptr inbounds float, float* %14, i64 2 - %16 = load float, float* %15, align 4 - %17 = load float*, float** %5, align 8 - %18 = getelementptr inbounds float, float* %17, i64 1 - %19 = load float, float* %18, align 4 - %20 = fmul float %16, %19 - %21 = fsub float %13, %20 - %22 = load float*, float** %6, align 8 - %23 = getelementptr inbounds float, float* %22, i64 0 - %24 = load float*, float** %4, align 8 - %25 = getelementptr inbounds float, float* %24, i64 2 - %26 = load float, float* %25, align 4 - %27 = load float*, float** %5, align 8 - %28 = getelementptr inbounds float, float* %27, i64 0 - %29 = load float, float* %28, align 4 - %30 = fmul float %26, %29 - %31 = load float*, float** %4, align 8 - %32 = getelementptr inbounds float, float* %31, i64 0 - %33 = load float, float* %32, align 4 - %34 = load float*, float** %5, align 8 - %35 = getelementptr inbounds float, float* %34, i64 2 - %36 = load float, float* %35, align 4 - %37 = fmul float %33, %36 - %38 = fsub float %30, %37 - %39 = load float*, float** %6, align 8 - %40 = getelementptr inbounds float, float* %39, i64 1 - %41 = load float*, float** %4, align 8 - %42 = getelementptr inbounds float, float* %41, i64 0 - %43 = load float, float* %42, align 4 - %44 = load float*, float** %5, align 8 - %45 = getelementptr inbounds float, float* %44, i64 1 - %46 = load float, float* %45, align 4 - %47 = fmul float %43, %46 - %48 = load float*, float** %4, align 8 - %49 = getelementptr inbounds float, float* %48, i64 1 - %50 = load float, float* %49, align 4 - %51 = load float*, float** %5, align 8 - %52 = getelementptr inbounds float, float* %51, i64 0 - %53 = load float, float* %52, align 4 - %54 = fmul float %50, %53 - %55 = fsub float %47, %54 - %56 = load float*, float** %6, align 8 - %57 = getelementptr inbounds float, float* %56, i64 2 - %58 = load float, float* %8, align 4 - %59 = insertelement <4 x float> zeroinitializer, float %58, i32 0 - %60 = load float, float* %25, align 4 - %61 = insertelement <4 x float> %59, float %60, i32 1 - %62 = load float, float* %42, align 4 - %63 = insertelement <4 x float> %61, float %62, i32 2 - %64 = insertelement <4 x float> %63, float 1.000000e+00, i32 3 - %65 = load float, float* %11, align 4 - %66 = insertelement <4 x float> zeroinitializer, float %65, i32 0 - %67 = load float, float* %28, align 4 - %68 = insertelement <4 x float> %66, float %67, i32 1 - %69 = load float, float* %45, align 4 - %70 = insertelement <4 x float> %68, float %69, i32 2 - %71 = insertelement <4 x float> %70, float 0.000000e+00, i32 3 - %72 = fmul <4 x float> %64, %71 - %73 = load float, float* %15, align 4 - %74 = insertelement <4 x float> zeroinitializer, float %73, i32 0 - %75 = load float, float* %32, align 4 - %76 = insertelement <4 x float> %74, float %75, i32 1 - %77 = load float, float* %49, align 4 - %78 = insertelement <4 x float> %76, float %77, i32 2 - %79 = insertelement <4 x float> %78, float 1.000000e+00, i32 3 - %80 = load float, float* %18, align 4 - %81 = insertelement <4 x float> zeroinitializer, float %80, i32 0 - %82 = load float, float* %35, align 4 - %83 = insertelement <4 x float> %81, float %82, i32 1 - %84 = load float, float* %52, align 4 - %85 = insertelement <4 x float> %83, float %84, i32 2 - %86 = insertelement <4 x float> %85, float 0.000000e+00, i32 3 - %87 = fmul <4 x float> %79, %86 - %88 = fsub <4 x float> %72, %87 - %89 = extractelement <4 x float> %88, i32 0 - store float %89, float* %23, align 4 - %90 = extractelement <4 x float> %88, i32 1 - store float %90, float* %40, align 4 - %91 = extractelement <4 x float> %88, i32 2 - store float %91, float* %57, align 4 - ret void - %4 = alloca float*, align 8 - %5 = alloca float*, align 8 - %6 = alloca float*, align 8 - %7 = alloca [3 x float], align 4 - %8 = alloca [3 x float], align 4 - %9 = alloca i32, align 4 - %10 = alloca [3 x float], align 4 - %11 = alloca i32, align 4 - store float* %0, float** %4, align 8 - store float* %1, float** %5, align 8 - store float* %2, float** %6, align 8 - %12 = getelementptr inbounds [3 x float], [3 x float]* %7, i64 0, i64 0 - %13 = load float*, float** %4, align 8 - %14 = getelementptr inbounds float, float* %13, i64 0 - %15 = load float, float* %14, align 4 - store float %15, float* %12, align 4 - %16 = getelementptr inbounds float, float* %12, i64 1 - %17 = load float*, float** %4, align 8 - %18 = getelementptr inbounds float, float* %17, i64 1 - %19 = load float, float* %18, align 4 - store float %19, float* %16, align 4 - %20 = getelementptr inbounds float, float* %16, i64 1 - %21 = load float*, float** %4, align 8 - %22 = getelementptr inbounds float, float* %21, i64 2 - %23 = load float, float* %22, align 4 - store float %23, float* %20, align 4 - %24 = getelementptr inbounds [3 x float], [3 x float]* %7, i64 0, i64 0 - %25 = load float*, float** %5, align 8 - %26 = getelementptr inbounds [3 x float], [3 x float]* %8, i64 0, i64 0 - call void @cross_product(float* %24, float* %25, float* %26) - store i32 0, i32* %9, align 4 - br label %27 - -27: ; preds = %46, %3 - %28 = load i32, i32* %9, align 4 - %29 = icmp slt i32 %28, 3 - br i1 %29, label %30, label %49 - -30: ; preds = %27 - %31 = load i32, i32* %9, align 4 - %32 = sext i32 %31 to i64 - %33 = getelementptr inbounds [3 x float], [3 x float]* %8, i64 0, i64 %32 - %34 = load float, float* %33, align 4 - %35 = fmul float %34, 2.000000e+00 - %36 = load i32, i32* %9, align 4 - %37 = sext i32 %36 to i64 - %38 = getelementptr inbounds [3 x float], [3 x float]* %8, i64 0, i64 %37 - %39 = load float, float* %33, align 4 - %40 = insertelement <4 x float> zeroinitializer, float %39, i32 0 - %41 = insertelement <4 x float> %40, float 0.000000e+00, i32 1 - %42 = insertelement <4 x float> %41, float 0.000000e+00, i32 2 - %43 = insertelement <4 x float> %42, float 0.000000e+00, i32 3 - %44 = fmul <4 x float> %43, - %45 = extractelement <4 x float> %44, i32 0 - store float %45, float* %38, align 4 - br label %46 - -46: ; preds = %30 - %47 = load i32, i32* %9, align 4 - %48 = add nsw i32 %47, 1 - store i32 %48, i32* %9, align 4 - br label %27 - -49: ; preds = %27 - %50 = getelementptr inbounds [3 x float], [3 x float]* %7, i64 0, i64 0 - %51 = getelementptr inbounds [3 x float], [3 x float]* %8, i64 0, i64 0 - %52 = getelementptr inbounds [3 x float], [3 x float]* %10, i64 0, i64 0 - call void @cross_product(float* %50, float* %51, float* %52) - store i32 0, i32* %11, align 4 - br label %53 - -53: ; preds = %103, %49 - %54 = load i32, i32* %11, align 4 - %55 = icmp slt i32 %54, 3 - br i1 %55, label %56, label %106 - -56: ; preds = %53 - %57 = load float*, float** %5, align 8 - %58 = load i32, i32* %11, align 4 - %59 = sext i32 %58 to i64 - %60 = getelementptr inbounds float, float* %57, i64 %59 - %61 = load float, float* %60, align 4 - %62 = load float*, float** %4, align 8 - %63 = getelementptr inbounds float, float* %62, i64 3 - %64 = load float, float* %63, align 4 - %65 = load i32, i32* %11, align 4 - %66 = sext i32 %65 to i64 - %67 = getelementptr inbounds [3 x float], [3 x float]* %8, i64 0, i64 %66 - %68 = load float, float* %67, align 4 - %69 = fmul float %64, %68 - %70 = fadd float %61, %69 - %71 = load i32, i32* %11, align 4 - %72 = sext i32 %71 to i64 - %73 = getelementptr inbounds [3 x float], [3 x float]* %10, i64 0, i64 %72 - %74 = load float, float* %73, align 4 - %75 = fadd float %70, %74 - %76 = load float*, float** %6, align 8 - %77 = load i32, i32* %11, align 4 - %78 = sext i32 %77 to i64 - %79 = getelementptr inbounds float, float* %76, i64 %78 - %80 = load float, float* %60, align 4 - %81 = insertelement <4 x float> zeroinitializer, float %80, i32 0 - %82 = insertelement <4 x float> %81, float 0.000000e+00, i32 1 - %83 = insertelement <4 x float> %82, float 0.000000e+00, i32 2 - %84 = insertelement <4 x float> %83, float 0.000000e+00, i32 3 - %85 = load float, float* %63, align 4 - %86 = insertelement <4 x float> zeroinitializer, float %85, i32 0 - %87 = insertelement <4 x float> %86, float 1.000000e+00, i32 1 - %88 = insertelement <4 x float> %87, float 1.000000e+00, i32 2 - %89 = insertelement <4 x float> %88, float 1.000000e+00, i32 3 - %90 = load float, float* %67, align 4 - %91 = insertelement <4 x float> zeroinitializer, float %90, i32 0 - %92 = insertelement <4 x float> %91, float 0.000000e+00, i32 1 - %93 = insertelement <4 x float> %92, float 0.000000e+00, i32 2 - %94 = insertelement <4 x float> %93, float 0.000000e+00, i32 3 - %95 = call <4 x float> @llvm.fma.f32(<4 x float> %89, <4 x float> %94, <4 x float> %84) - %96 = load float, float* %73, align 4 - %97 = insertelement <4 x float> zeroinitializer, float %96, i32 0 - %98 = insertelement <4 x float> %97, float 0.000000e+00, i32 1 - %99 = insertelement <4 x float> %98, float 0.000000e+00, i32 2 - %100 = insertelement <4 x float> %99, float 0.000000e+00, i32 3 - %101 = fadd <4 x float> %95, %100 - %102 = extractelement <4 x float> %101, i32 0 - store float %102, float* %79, align 4 - br label %103 - -103: ; preds = %56 - %104 = load i32, i32* %11, align 4 - %105 = add nsw i32 %104, 1 - store i32 %105, i32* %11, align 4 - br label %53 - -106: ; preds = %53 - ret void - %1 = alloca i32, align 4 - %2 = alloca [4 x float], align 16 - %3 = alloca [4 x float], align 16 - %4 = alloca [4 x float], align 16 - %5 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - %6 = bitcast [4 x float]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %6, i8* align 16 bitcast ([4 x float]* @__const.main.q_in to i8*), i64 16, i1 false) - %7 = bitcast [4 x float]* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %7, i8* align 16 bitcast ([4 x float]* @__const.main.p_in to i8*), i64 16, i1 false) - %8 = bitcast [4 x float]* %4 to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %8, i8 0, i64 16, i1 false) - %9 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 - %10 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0 - %11 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 0 - call void @point_product(float* %9, float* %10, float* %11) - store i32 0, i32* %5, align 4 - br label %12 - -12: ; preds = %22, %0 - %13 = load i32, i32* %5, align 4 - %14 = icmp slt i32 %13, 3 - br i1 %14, label %15, label %25 - -15: ; preds = %12 - %16 = load i32, i32* %5, align 4 - %17 = sext i32 %16 to i64 - %18 = getelementptr inbounds [4 x float], [4 x float]* %4, i64 0, i64 %17 - %19 = load float, float* %18, align 4 - %20 = fpext float %19 to double - %21 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %20) - br label %22 - -22: ; preds = %15 - %23 = load i32, i32* %5, align 4 - %24 = add nsw i32 %23, 1 - store i32 %24, i32* %5, align 4 - br label %12 - -25: ; preds = %12 - %26 = load i32, i32* %1, align 4 - ret i32 %26 ----STDERR--- -RecExpr { nodes: [Symbol("no-array-name1"), Symbol("1,"), Get([0, 1]), Symbol("no-array-name2"), Symbol("2,"), Get([3, 4]), Mul([2, 5]), Symbol("no-array-name3"), Symbol("2,"), Get([7, 8]), Symbol("no-array-name4"), Symbol("1,"), Get([10, 11]), Mul([9, 12]), Minus([6, 13]), Symbol("no-array-name5"), Symbol("2,"), Get([15, 16]), Symbol("no-array-name6"), Symbol("0,"), Get([18, 19]), Mul([17, 20]), Symbol("no-array-name7"), Symbol("0,"), Get([22, 23]), Symbol("no-array-name8"), Symbol("2,"), Get([25, 26]), Mul([24, 27]), Minus([21, 28]), Symbol("no-array-name9"), Symbol("0,"), Get([30, 31]), Symbol("no-array-name10"), Symbol("1,"), Get([33, 34]), Mul([32, 35]), Symbol("no-array-name11"), Symbol("1,"), Get([37, 38]), Symbol("no-array-name12"), Symbol("0,"), Get([40, 41]), Mul([39, 42]), Minus([36, 43]), Num(0), Vec([14, 29, 44, 45])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name1"), Symbol("1,"), Get([0, 1]), Symbol("no-array-name5"), Symbol("2,"), Get([3, 4]), Symbol("no-array-name9"), Symbol("0,"), Get([6, 7]), Num(1), Vec([2, 5, 8, 9]), Symbol("no-array-name2"), Symbol("2,"), Get([11, 12]), Symbol("no-array-name6"), Symbol("0,"), Get([14, 15]), Symbol("no-array-name10"), Symbol("1,"), Get([17, 18]), Num(0), Vec([13, 16, 19, 20]), VecMul([10, 21]), Symbol("no-array-name3"), Symbol("2,"), Get([23, 24]), Symbol("no-array-name7"), Symbol("0,"), Get([26, 27]), Symbol("no-array-name11"), Symbol("1,"), Get([29, 30]), Num(1), Vec([25, 28, 31, 32]), Symbol("no-array-name4"), Symbol("1,"), Get([34, 35]), Symbol("no-array-name8"), Symbol("2,"), Get([37, 38]), Symbol("no-array-name12"), Symbol("0,"), Get([40, 41]), Num(0), Vec([36, 39, 42, 43]), VecMul([33, 44]), VecMinus([22, 45])] -RecExpr { nodes: [Symbol("no-array-name13"), Symbol("0,-1,"), Get([0, 1]), Num(2), Mul([2, 3]), Num(0), Num(0), Num(0), Vec([4, 5, 6, 7])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name13"), Symbol("0,-1,"), Get([0, 1]), Num(0), Num(0), Num(0), LitVec([2, 3, 4, 5]), Num(2), Num(0), Num(0), Num(0), Vec([7, 8, 9, 10]), VecMul([6, 11])] -RecExpr { nodes: [Symbol("no-array-name14"), Symbol("3,"), Get([0, 1]), Symbol("no-array-name15"), Symbol("0,-2,"), Get([3, 4]), Mul([2, 5]), Symbol("no-array-name16"), Symbol("-3,"), Get([7, 8]), Add([9, 6]), Symbol("no-array-name17"), Symbol("0,-4,"), Get([11, 12]), Add([10, 13]), Num(0), Num(0), Num(0), Vec([14, 15, 16, 17])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name16"), Symbol("-3,"), Get([0, 1]), Num(0), Num(0), Num(0), LitVec([2, 3, 4, 5]), Symbol("no-array-name14"), Symbol("3,"), Get([7, 8]), Num(1), Num(1), Num(1), Vec([9, 10, 11, 12]), Symbol("no-array-name15"), Symbol("0,-2,"), Get([14, 15]), Num(0), Num(0), Num(0), LitVec([16, 17, 18, 19]), VecMAC([6, 13, 20]), Symbol("no-array-name17"), Symbol("0,-4,"), Get([22, 23]), Num(0), Num(0), Num(0), LitVec([24, 25, 26, 27]), VecAdd([21, 28])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-FAIL.c b/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-FAIL.c deleted file mode 100644 index c19c7117..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/qr-decomp-FAIL.c +++ /dev/null @@ -1,130 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define SIZE 4 - -float sgn(float v) __attribute__((always_inline)); -float naive_norm(float *x, int m) __attribute__((always_inline)); -void naive_transpose(float *a, int n) __attribute__((always_inline)); -void naive_matrix_multiply(float *a, float *b, float *c, int row1, int col1, - int col2) __attribute__((always_inline)); - -float sgn(float v) { return (v > 0) - (v < 0); } - -// Naive implementation -void naive_transpose(float *a, int n) { - for (int i = 0; i < n; i++) { - for (int j = i + 1; j < n; j++) { - float tmp = a[i * n + j]; - a[i * n + j] = a[j * n + i]; - a[j * n + i] = tmp; - } - } -} - -float naive_norm(float *x, int m) { - float sum = 0; - for (int i = 0; i < m; i++) { - sum += pow(x[i], 2); - } - return sqrt(sum); -} - -void naive_matrix_multiply(float *a, float *b, float *c, int row1, int col1, - int col2) { - for (int y = 0; y < row1; y++) { - for (int x = 0; x < col2; x++) { - c[col2 * y + x] = 0; - for (int k = 0; k < col1; k++) { - c[col2 * y + x] += a[col1 * y + k] * b[col2 * k + x]; - } - } - } -} - -void naive_qr_decomp(float *A, float *Q, float *R, int n) { - memcpy(R, A, sizeof(float) * n * n); - - // Build identity matrix of size n * n - float *I = (float *)calloc(sizeof(float), n * n); - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - I[i * n + j] = (i == j); - } - } - - // Householder - for (int k = 0; k < n - 1; k++) { - int m = n - k; - - float *x = (float *)calloc(sizeof(float), m); - float *e = (float *)calloc(sizeof(float), m); - for (int i = 0; i < m; i++) { - int row = k + i; - x[i] = R[row * n + k]; - e[i] = I[row * n + k]; - } - - float alpha = -sgn(x[0]) * naive_norm(x, m); - - float *u = (float *)calloc(sizeof(float), m); - float *v = (float *)calloc(sizeof(float), m); - for (int i = 0; i < m; i++) { - u[i] = x[i] + alpha * e[i]; - } - float norm_u = naive_norm(u, m); - for (int i = 0; i < m; i++) { - v[i] = u[i] / norm_u; - } - - float *q_min = (float *)calloc(sizeof(float), m * m); - for (int i = 0; i < m; i++) { - for (int j = 0; j < m; j++) { - float q_min_i = ((i == j) ? 1.0 : 0.0) - 2 * v[i] * v[j]; - q_min[i * m + j] = q_min_i; - } - } - - float *q_t = (float *)calloc(sizeof(float), n * n); - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - float q_t_i; - if ((i < k) || (j < k)) { - q_t_i = (i == j) ? 1.0 : 0.0; - } else { - q_t_i = q_min[(i - k) * m + (j - k)]; - } - q_t[i * n + j] = q_t_i; - } - } - - if (k == 0) { - memcpy(Q, q_t, sizeof(float) * n * n); // Q = q_t - naive_matrix_multiply(q_t, A, R, n, n, n); // R = q_t * A - } else { - float *res = (float *)calloc(sizeof(float), n * n); - naive_matrix_multiply(q_t, Q, res, n, n, n); // R = q_t * A - memcpy(Q, res, sizeof(float) * n * n); - naive_matrix_multiply(q_t, R, res, n, n, n); // R = q_t * A - memcpy(R, res, sizeof(float) * n * n); - } - free(x); - free(e); - free(u); - free(v); - free(q_min); - free(q_t); - } - naive_transpose(Q, n); -} - -int main(void) { - float A[SIZE * SIZE] = {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4}; - float Q[SIZE * SIZE] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - float R[SIZE * SIZE] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - naive_qr_decomp(A, Q, R, SIZE); -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/return.c b/src/dios-egraphs/Diospyros/llvm-tests/return.c deleted file mode 100644 index b9c73033..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/return.c +++ /dev/null @@ -1,28 +0,0 @@ -#include -#define SIZE 8 - -void return_test(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { - for (int i = 0; i < SIZE; i++) { - if (i == SIZE / 2) return; - b_out[i] = a_in[i] * scalar_in; - } - b_out[SIZE / 2] = a_in[SIZE / 2] * scalar_in; // shouldn't run -} - -int main(void) { - float a_in[SIZE] = {9, 8, 7, 6, 5, 4, 3, 2}; - float scalar_in = 10; - float b_out[SIZE] = {0, 0, 0, 0, 0, 0, 0, 0}; - return_test(a_in, scalar_in, b_out); - for (int i = 0; i < SIZE; i++) { - printf("%f\n", b_out[i]); - } - // 90.000000 - // 80.000000 - // 70.000000 - // 60.000000 - // 0.000000 - // 0.000000 - // 0.000000 - // 0.000000 -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/scalar.c b/src/dios-egraphs/Diospyros/llvm-tests/scalar.c deleted file mode 100644 index 773b9233..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/scalar.c +++ /dev/null @@ -1,18 +0,0 @@ -#include -#define SIZE 8 - -void matrix_multiply(float a_in[SIZE], float scalar_in, float b_out[SIZE]) { - for (int i = 0; i < SIZE; i++) { - b_out[i] = a_in[i] * scalar_in; - } -} - -int main(void) { - float a_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; - float scalar_in = 10; - float b_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; - matrix_multiply(a_in, scalar_in, b_in); - for (int i = 0; i < SIZE; i++) { - printf("%f\n", b_in[i]); - } -} diff --git a/src/dios-egraphs/Diospyros/llvm-tests/scalar.expect b/src/dios-egraphs/Diospyros/llvm-tests/scalar.expect deleted file mode 100644 index 5dc6cfb5..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/scalar.expect +++ /dev/null @@ -1,95 +0,0 @@ - %4 = alloca float*, align 8 - %5 = alloca float, align 4 - %6 = alloca float*, align 8 - %7 = alloca i32, align 4 - store float* %0, float** %4, align 8 - store float %1, float* %5, align 4 - store float* %2, float** %6, align 8 - store i32 0, i32* %7, align 4 - br label %8 - -8: ; preds = %34, %3 - %9 = load i32, i32* %7, align 4 - %10 = icmp slt i32 %9, 8 - br i1 %10, label %11, label %37 - -11: ; preds = %8 - %12 = load float*, float** %4, align 8 - %13 = load i32, i32* %7, align 4 - %14 = sext i32 %13 to i64 - %15 = getelementptr inbounds float, float* %12, i64 %14 - %16 = load float, float* %15, align 4 - %17 = load float, float* %5, align 4 - %18 = fmul float %16, %17 - %19 = load float*, float** %6, align 8 - %20 = load i32, i32* %7, align 4 - %21 = sext i32 %20 to i64 - %22 = getelementptr inbounds float, float* %19, i64 %21 - %23 = load float, float* %15, align 4 - %24 = insertelement <4 x float> zeroinitializer, float %23, i32 0 - %25 = insertelement <4 x float> %24, float 0.000000e+00, i32 1 - %26 = insertelement <4 x float> %25, float 0.000000e+00, i32 2 - %27 = insertelement <4 x float> %26, float 0.000000e+00, i32 3 - %28 = insertelement <4 x float> zeroinitializer, float %17, i32 0 - %29 = insertelement <4 x float> %28, float 0.000000e+00, i32 1 - %30 = insertelement <4 x float> %29, float 0.000000e+00, i32 2 - %31 = insertelement <4 x float> %30, float 0.000000e+00, i32 3 - %32 = fmul <4 x float> %27, %31 - %33 = extractelement <4 x float> %32, i32 0 - store float %33, float* %22, align 4 - br label %34 - -34: ; preds = %11 - %35 = load i32, i32* %7, align 4 - %36 = add nsw i32 %35, 1 - store i32 %36, i32* %7, align 4 - br label %8 - -37: ; preds = %8 - ret void - %1 = alloca i32, align 4 - %2 = alloca [8 x float], align 16 - %3 = alloca float, align 4 - %4 = alloca [8 x float], align 16 - %5 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - %6 = bitcast [8 x float]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %6, i8* align 16 bitcast ([8 x float]* @__const.main.a_in to i8*), i64 32, i1 false) - store float 1.000000e+01, float* %3, align 4 - %7 = bitcast [8 x float]* %4 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %7, i8* align 16 bitcast ([8 x float]* @__const.main.b_in to i8*), i64 32, i1 false) - %8 = getelementptr inbounds [8 x float], [8 x float]* %2, i64 0, i64 0 - %9 = load float, float* %3, align 4 - %10 = getelementptr inbounds [8 x float], [8 x float]* %4, i64 0, i64 0 - call void @matrix_multiply(float* %8, float %9, float* %10) - store i32 0, i32* %5, align 4 - br label %11 - -11: ; preds = %21, %0 - %12 = load i32, i32* %5, align 4 - %13 = icmp slt i32 %12, 8 - br i1 %13, label %14, label %24 - -14: ; preds = %11 - %15 = load i32, i32* %5, align 4 - %16 = sext i32 %15 to i64 - %17 = getelementptr inbounds [8 x float], [8 x float]* %4, i64 0, i64 %16 - %18 = load float, float* %17, align 4 - %19 = fpext float %18 to double - %20 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %19) - br label %21 - -21: ; preds = %14 - %22 = load i32, i32* %5, align 4 - %23 = add nsw i32 %22, 1 - store i32 %23, i32* %5, align 4 - br label %11 - -24: ; preds = %11 - %25 = load i32, i32* %1, align 4 - ret i32 %25 ----STDERR--- -RecExpr { nodes: [Symbol("no-array-name1"), Symbol("-1,"), Get([0, 1]), Symbol("no-temp-name1"), Mul([2, 3]), Num(0), Num(0), Num(0), Vec([4, 5, 6, 7])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-array-name1"), Symbol("-1,"), Get([0, 1]), Num(0), Num(0), Num(0), LitVec([2, 3, 4, 5]), Symbol("no-temp-name1"), Num(0), Num(0), Num(0), Vec([7, 8, 9, 10]), VecMul([6, 11])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d.expect b/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d.expect deleted file mode 100644 index 019bdfc8..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/stencil-2d.expect +++ /dev/null @@ -1,181 +0,0 @@ - %4 = alloca float*, align 8 - %5 = alloca float*, align 8 - %6 = alloca float*, align 8 - %7 = alloca i32, align 4 - %8 = alloca i32, align 4 - %9 = alloca float, align 4 - %10 = alloca i32, align 4 - %11 = alloca i32, align 4 - store float* %0, float** %4, align 8 - store float* %1, float** %5, align 8 - store float* %2, float** %6, align 8 - store i32 0, i32* %7, align 4 - br label %12 - -12: ; preds = %87, %3 - %13 = load i32, i32* %7, align 4 - %14 = icmp slt i32 %13, 6 - br i1 %14, label %15, label %90 - -15: ; preds = %12 - store i32 0, i32* %8, align 4 - br label %16 - -16: ; preds = %83, %15 - %17 = load i32, i32* %8, align 4 - %18 = icmp slt i32 %17, 2 - br i1 %18, label %19, label %86 - -19: ; preds = %16 - store float 0.000000e+00, float* %9, align 4 - store i32 0, i32* %10, align 4 - br label %20 - -20: ; preds = %71, %19 - %21 = load i32, i32* %10, align 4 - %22 = icmp slt i32 %21, 3 - br i1 %22, label %23, label %74 - -23: ; preds = %20 - store i32 0, i32* %11, align 4 - br label %24 - -24: ; preds = %67, %23 - %25 = load i32, i32* %11, align 4 - %26 = icmp slt i32 %25, 3 - br i1 %26, label %27, label %70 - -27: ; preds = %24 - %28 = load float*, float** %6, align 8 - %29 = load i32, i32* %10, align 4 - %30 = mul nsw i32 %29, 3 - %31 = load i32, i32* %11, align 4 - %32 = add nsw i32 %30, %31 - %33 = sext i32 %32 to i64 - %34 = getelementptr inbounds float, float* %28, i64 %33 - %35 = load float, float* %34, align 4 - %36 = load float*, float** %4, align 8 - %37 = load i32, i32* %7, align 4 - %38 = load i32, i32* %10, align 4 - %39 = add nsw i32 %37, %38 - %40 = mul nsw i32 %39, 4 - %41 = load i32, i32* %8, align 4 - %42 = add nsw i32 %40, %41 - %43 = load i32, i32* %11, align 4 - %44 = add nsw i32 %42, %43 - %45 = sext i32 %44 to i64 - %46 = getelementptr inbounds float, float* %36, i64 %45 - %47 = load float, float* %46, align 4 - %48 = fmul float %35, %47 - %49 = load float, float* %9, align 4 - %50 = fadd float %49, %48 - %51 = insertelement <4 x float> zeroinitializer, float %49, i32 0 - %52 = insertelement <4 x float> %51, float 0.000000e+00, i32 1 - %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 2 - %54 = insertelement <4 x float> %53, float 0.000000e+00, i32 3 - %55 = load float, float* %34, align 4 - %56 = insertelement <4 x float> zeroinitializer, float %55, i32 0 - %57 = insertelement <4 x float> %56, float 0.000000e+00, i32 1 - %58 = insertelement <4 x float> %57, float 0.000000e+00, i32 2 - %59 = insertelement <4 x float> %58, float 0.000000e+00, i32 3 - %60 = load float, float* %46, align 4 - %61 = insertelement <4 x float> zeroinitializer, float %60, i32 0 - %62 = insertelement <4 x float> %61, float 0.000000e+00, i32 1 - %63 = insertelement <4 x float> %62, float 0.000000e+00, i32 2 - %64 = insertelement <4 x float> %63, float 0.000000e+00, i32 3 - %65 = call <4 x float> @llvm.fma.f32(<4 x float> %59, <4 x float> %64, <4 x float> %54) - %66 = extractelement <4 x float> %65, i32 0 - store float %66, float* %9, align 4 - br label %67 - -67: ; preds = %27 - %68 = load i32, i32* %11, align 4 - %69 = add nsw i32 %68, 1 - store i32 %69, i32* %11, align 4 - br label %24 - -70: ; preds = %24 - br label %71 - -71: ; preds = %70 - %72 = load i32, i32* %10, align 4 - %73 = add nsw i32 %72, 1 - store i32 %73, i32* %10, align 4 - br label %20 - -74: ; preds = %20 - %75 = load float, float* %9, align 4 - %76 = load float*, float** %5, align 8 - %77 = load i32, i32* %7, align 4 - %78 = mul nsw i32 %77, 4 - %79 = load i32, i32* %8, align 4 - %80 = add nsw i32 %78, %79 - %81 = sext i32 %80 to i64 - %82 = getelementptr inbounds float, float* %76, i64 %81 - store float %75, float* %82, align 4 - br label %83 - -83: ; preds = %74 - %84 = load i32, i32* %8, align 4 - %85 = add nsw i32 %84, 1 - store i32 %85, i32* %8, align 4 - br label %16 - -86: ; preds = %16 - br label %87 - -87: ; preds = %86 - %88 = load i32, i32* %7, align 4 - %89 = add nsw i32 %88, 1 - store i32 %89, i32* %7, align 4 - br label %12 - -90: ; preds = %12 - ret void - %1 = alloca i32, align 4 - %2 = alloca [32 x float], align 16 - %3 = alloca [32 x float], align 16 - %4 = alloca [9 x float], align 16 - %5 = alloca i32, align 4 - store i32 0, i32* %1, align 4 - %6 = bitcast [32 x float]* %2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %6, i8* align 16 bitcast ([32 x float]* @__const.main.orig_in to i8*), i64 128, i1 false) - %7 = bitcast [32 x float]* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %7, i8* align 16 bitcast ([32 x float]* @__const.main.sol_out to i8*), i64 128, i1 false) - %8 = bitcast [9 x float]* %4 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %8, i8* align 16 bitcast ([9 x float]* @__const.main.filter_in to i8*), i64 36, i1 false) - %9 = getelementptr inbounds [32 x float], [32 x float]* %2, i64 0, i64 0 - %10 = getelementptr inbounds [32 x float], [32 x float]* %3, i64 0, i64 0 - %11 = getelementptr inbounds [9 x float], [9 x float]* %4, i64 0, i64 0 - call void @stencil(float* %9, float* %10, float* %11) - store i32 0, i32* %5, align 4 - br label %12 - -12: ; preds = %22, %0 - %13 = load i32, i32* %5, align 4 - %14 = icmp slt i32 %13, 32 - br i1 %14, label %15, label %25 - -15: ; preds = %12 - %16 = load i32, i32* %5, align 4 - %17 = sext i32 %16 to i64 - %18 = getelementptr inbounds [32 x float], [32 x float]* %3, i64 0, i64 %17 - %19 = load float, float* %18, align 4 - %20 = fpext float %19 to double - %21 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %20) - br label %22 - -22: ; preds = %15 - %23 = load i32, i32* %5, align 4 - %24 = add nsw i32 %23, 1 - store i32 %24, i32* %5, align 4 - br label %12 - -25: ; preds = %12 - %26 = load i32, i32* %1, align 4 - ret i32 %26 ----STDERR--- -RecExpr { nodes: [Symbol("no-array-name1"), Symbol("-1,"), Get([0, 1]), Symbol("no-array-name2"), Symbol("-2,"), Get([3, 4]), Mul([2, 5]), Symbol("no-temp-name1"), Add([7, 6]), Num(0), Num(0), Num(0), Vec([8, 9, 10, 11])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("no-temp-name1"), Num(0), Num(0), Num(0), Vec([0, 1, 2, 3]), Symbol("no-array-name1"), Symbol("-1,"), Get([5, 6]), Num(0), Num(0), Num(0), LitVec([7, 8, 9, 10]), Symbol("no-array-name2"), Symbol("-2,"), Get([12, 13]), Num(0), Num(0), Num(0), LitVec([14, 15, 16, 17]), VecMAC([4, 11, 18])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/ternary.c b/src/dios-egraphs/Diospyros/llvm-tests/ternary.c deleted file mode 100644 index bc667fe2..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/ternary.c +++ /dev/null @@ -1,26 +0,0 @@ -#include -#define SIZE 8 - -void tern(float a_in[SIZE], float b_out[SIZE]) { - for (int i = 0; i < SIZE; i++) { - b_out[i] = (i < SIZE / 2) ? a_in[i] : 0; - } -} - -int main(int argc, char **argv) { - float a_in[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8}; - float b_out[SIZE] = {5, 6, 7, 8, 1, 2, 3, 4}; - tern(a_in, b_out); - for (int i = 0; i < SIZE; i++) { - printf("%f\n", b_out[i]); - } - // 1.000000 - // 2.000000 - // 3.000000 - // 4.000000 - // 0.000000 - // 0.000000 - // 0.000000 - // 0.000000 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/var.c b/src/dios-egraphs/Diospyros/llvm-tests/var.c deleted file mode 100644 index de55a2e9..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/var.c +++ /dev/null @@ -1,20 +0,0 @@ -#include - -float a_in[] = {1, 2, 3, 4}; -float b_in[] = {5, 6, 7, 8}; -float t1 = 10; -float t2 = 20; - -int main(int argc, char **argv) { - float c_out[4]; - c_out[0] = a_in[0] + b_in[0]; - c_out[1] = t1 + b_in[1]; - c_out[2] = a_in[2] + t2; - c_out[3] = t2 + t1; - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - // expected: 6, 16, 23, 30 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/var.expect b/src/dios-egraphs/Diospyros/llvm-tests/var.expect deleted file mode 100644 index 9d1ce417..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/var.expect +++ /dev/null @@ -1,66 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [4 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %11 = load float, float* @t1, align 4 - %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %13 = fadd float %11, %12 - %14 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %15 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 8 - %16 = load float, float* @t2, align 4 - %17 = fadd float %15, %16 - %18 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %19 = load float, float* @t2, align 4 - %20 = load float, float* @t1, align 4 - %21 = fadd float %19, %20 - %22 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %23 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 0), align 4 - %24 = insertelement <4 x float> zeroinitializer, float %23, i32 0 - %25 = insertelement <4 x float> %24, float %20, i32 1 - %26 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @a_in, i64 0, i64 2), align 4 - %27 = insertelement <4 x float> %25, float %26, i32 2 - %28 = insertelement <4 x float> %27, float %19, i32 3 - %29 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 0), align 4 - %30 = insertelement <4 x float> zeroinitializer, float %29, i32 0 - %31 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @b_in, i64 0, i64 1), align 4 - %32 = insertelement <4 x float> %30, float %31, i32 1 - %33 = insertelement <4 x float> %32, float %19, i32 2 - %34 = insertelement <4 x float> %33, float %20, i32 3 - %35 = fadd <4 x float> %28, %34 - %36 = extractelement <4 x float> %35, i32 0 - store float %36, float* %10, align 16 - %37 = extractelement <4 x float> %35, i32 1 - store float %37, float* %14, align 4 - %38 = extractelement <4 x float> %35, i32 2 - store float %38, float* %18, align 8 - %39 = extractelement <4 x float> %35, i32 3 - store float %39, float* %22, align 4 - %40 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 0 - %41 = load float, float* %40, align 16 - %42 = fpext float %41 to double - %43 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %42) - %44 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 1 - %45 = load float, float* %44, align 4 - %46 = fpext float %45 to double - %47 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %46) - %48 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 2 - %49 = load float, float* %48, align 8 - %50 = fpext float %49 to double - %51 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %50) - %52 = getelementptr inbounds [4 x float], [4 x float]* %6, i64 0, i64 3 - %53 = load float, float* %52, align 4 - %54 = fpext float %53 to double - %55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %54) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("t1"), Symbol("b_in"), Symbol("0,1,"), Get([8, 9]), Add([7, 10]), Symbol("a_in"), Symbol("0,2,"), Get([12, 13]), Symbol("t2"), Add([14, 15]), Symbol("t2"), Symbol("t1"), Add([17, 18]), Vec([6, 11, 16, 19])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("t1"), Symbol("a_in"), Symbol("0,2,"), Get([4, 5]), Symbol("t2"), Vec([2, 3, 6, 7]), Symbol("b_in"), Symbol("0,0,"), Get([9, 10]), Symbol("b_in"), Symbol("0,1,"), Get([12, 13]), Symbol("t2"), Symbol("t1"), Vec([11, 14, 15, 16]), VecAdd([8, 17])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/width5.c b/src/dios-egraphs/Diospyros/llvm-tests/width5.c deleted file mode 100644 index 06560b25..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/width5.c +++ /dev/null @@ -1,20 +0,0 @@ -#include - -float a_in[] = {1, 2, 3, 4, 5}; -float b_in[] = {6, 7, 8, 9, 10}; - -int main(int argc, char **argv) { - float c_out[5]; - c_out[0] = a_in[0] + b_in[0]; - c_out[1] = a_in[1] + b_in[1]; - c_out[2] = a_in[2] + b_in[2]; - c_out[3] = a_in[3] + b_in[3]; - c_out[4] = a_in[4] + b_in[4]; - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - printf("fifth: %f\n", c_out[4]); - // expected: 7, 9, 11, 13, 15 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/width5.expect b/src/dios-egraphs/Diospyros/llvm-tests/width5.expect deleted file mode 100644 index fc2dfab9..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/width5.expect +++ /dev/null @@ -1,92 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [5 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 0 - %11 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 1), align 4 - %12 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 1), align 4 - %13 = fadd float %11, %12 - %14 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 1 - %15 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 2), align 8 - %16 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 2), align 8 - %17 = fadd float %15, %16 - %18 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 2 - %19 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 3), align 4 - %20 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 3), align 4 - %21 = fadd float %19, %20 - %22 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 3 - %23 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 4), align 16 - %24 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 4), align 16 - %25 = fadd float %23, %24 - %26 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 4 - %27 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 0), align 4 - %28 = insertelement <4 x float> zeroinitializer, float %27, i32 0 - %29 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 1), align 4 - %30 = insertelement <4 x float> %28, float %29, i32 1 - %31 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 2), align 4 - %32 = insertelement <4 x float> %30, float %31, i32 2 - %33 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 3), align 4 - %34 = insertelement <4 x float> %32, float %33, i32 3 - %35 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 0), align 4 - %36 = insertelement <4 x float> zeroinitializer, float %35, i32 0 - %37 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 1), align 4 - %38 = insertelement <4 x float> %36, float %37, i32 1 - %39 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 2), align 4 - %40 = insertelement <4 x float> %38, float %39, i32 2 - %41 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 3), align 4 - %42 = insertelement <4 x float> %40, float %41, i32 3 - %43 = fadd <4 x float> %34, %42 - %44 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @a_in, i64 0, i64 4), align 4 - %45 = insertelement <4 x float> zeroinitializer, float %44, i32 0 - %46 = insertelement <4 x float> %45, float 0.000000e+00, i32 1 - %47 = insertelement <4 x float> %46, float 0.000000e+00, i32 2 - %48 = insertelement <4 x float> %47, float 0.000000e+00, i32 3 - %49 = load float, float* getelementptr inbounds ([5 x float], [5 x float]* @b_in, i64 0, i64 4), align 4 - %50 = insertelement <4 x float> zeroinitializer, float %49, i32 0 - %51 = insertelement <4 x float> %50, float 0.000000e+00, i32 1 - %52 = insertelement <4 x float> %51, float 0.000000e+00, i32 2 - %53 = insertelement <4 x float> %52, float 0.000000e+00, i32 3 - %54 = fadd <4 x float> %48, %53 - %55 = shufflevector <4 x float> %43, <4 x float> %54, <8 x i32> - %56 = extractelement <8 x float> %55, i32 0 - store float %56, float* %10, align 16 - %57 = extractelement <8 x float> %55, i32 1 - store float %57, float* %14, align 4 - %58 = extractelement <8 x float> %55, i32 2 - store float %58, float* %18, align 8 - %59 = extractelement <8 x float> %55, i32 3 - store float %59, float* %22, align 4 - %60 = extractelement <8 x float> %55, i32 4 - store float %60, float* %26, align 16 - %61 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 0 - %62 = load float, float* %61, align 16 - %63 = fpext float %62 to double - %64 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %63) - %65 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 1 - %66 = load float, float* %65, align 4 - %67 = fpext float %66 to double - %68 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %67) - %69 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 2 - %70 = load float, float* %69, align 8 - %71 = fpext float %70 to double - %72 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %71) - %73 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 3 - %74 = load float, float* %73, align 4 - %75 = fpext float %74 to double - %76 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %75) - %77 = getelementptr inbounds [5 x float], [5 x float]* %6, i64 0, i64 4 - %78 = load float, float* %77, align 16 - %79 = fpext float %78 to double - %80 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.4, i64 0, i64 0), double %79) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("a_in"), Symbol("0,1,"), Get([7, 8]), Symbol("b_in"), Symbol("0,1,"), Get([10, 11]), Add([9, 12]), Symbol("a_in"), Symbol("0,2,"), Get([14, 15]), Symbol("b_in"), Symbol("0,2,"), Get([17, 18]), Add([16, 19]), Symbol("a_in"), Symbol("0,3,"), Get([21, 22]), Symbol("b_in"), Symbol("0,3,"), Get([24, 25]), Add([23, 26]), Symbol("a_in"), Symbol("0,4,"), Get([28, 29]), Symbol("b_in"), Symbol("0,4,"), Get([31, 32]), Add([30, 33]), Vec([6, 13, 20, 27]), Num(0), Num(0), Num(0), Vec([34, 36, 37, 38]), Concat([35, 39])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,1,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,3,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("b_in"), Symbol("0,3,"), Get([22, 23]), LitVec([15, 18, 21, 24]), VecAdd([12, 25]), Symbol("a_in"), Symbol("0,4,"), Get([27, 28]), Num(0), Num(0), Num(0), LitVec([29, 30, 31, 32]), Symbol("b_in"), Symbol("0,4,"), Get([34, 35]), Num(0), Num(0), Num(0), LitVec([36, 37, 38, 39]), VecAdd([33, 40]), Concat([26, 41])] diff --git a/src/dios-egraphs/Diospyros/llvm-tests/width9.c b/src/dios-egraphs/Diospyros/llvm-tests/width9.c deleted file mode 100644 index 536ff7c3..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/width9.c +++ /dev/null @@ -1,28 +0,0 @@ -#include - -float a_in[] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; -float b_in[] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - -int main(int argc, char **argv) { - float c_out[9]; - c_out[0] = a_in[0] + b_in[0]; - c_out[1] = a_in[1] + b_in[1]; - c_out[2] = a_in[2] + b_in[2]; - c_out[3] = a_in[3] + b_in[3]; - c_out[4] = a_in[4] + b_in[4]; - c_out[5] = a_in[5] + b_in[5]; - c_out[6] = a_in[6] + b_in[6]; - c_out[7] = a_in[7] + b_in[7]; - c_out[8] = a_in[8] + b_in[8]; - printf("first: %f\n", c_out[0]); - printf("second: %f\n", c_out[1]); - printf("third: %f\n", c_out[2]); - printf("fourth: %f\n", c_out[3]); - printf("fifth: %f\n", c_out[4]); - printf("sixth: %f\n", c_out[5]); - printf("seventh: %f\n", c_out[6]); - printf("eight: %f\n", c_out[7]); - printf("ninth: %f\n", c_out[8]); - // expected: 2, 4, 6, 8, 10, 12, 14, 16, 18 - return 0; -} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/llvm-tests/width9.expect b/src/dios-egraphs/Diospyros/llvm-tests/width9.expect deleted file mode 100644 index 887b8fd1..00000000 --- a/src/dios-egraphs/Diospyros/llvm-tests/width9.expect +++ /dev/null @@ -1,150 +0,0 @@ - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca [9 x float], align 16 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - %7 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 0), align 16 - %8 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 0), align 16 - %9 = fadd float %7, %8 - %10 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 0 - %11 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 1), align 4 - %12 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 1), align 4 - %13 = fadd float %11, %12 - %14 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 1 - %15 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 2), align 8 - %16 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 2), align 8 - %17 = fadd float %15, %16 - %18 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 2 - %19 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 3), align 4 - %20 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 3), align 4 - %21 = fadd float %19, %20 - %22 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 3 - %23 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 4), align 16 - %24 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 4), align 16 - %25 = fadd float %23, %24 - %26 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 4 - %27 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 5), align 4 - %28 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 5), align 4 - %29 = fadd float %27, %28 - %30 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 5 - %31 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 6), align 8 - %32 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 6), align 8 - %33 = fadd float %31, %32 - %34 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 6 - %35 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 7), align 4 - %36 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 7), align 4 - %37 = fadd float %35, %36 - %38 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 7 - %39 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 8), align 16 - %40 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 8), align 16 - %41 = fadd float %39, %40 - %42 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 8 - %43 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 0), align 4 - %44 = insertelement <4 x float> zeroinitializer, float %43, i32 0 - %45 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 1), align 4 - %46 = insertelement <4 x float> %44, float %45, i32 1 - %47 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 2), align 4 - %48 = insertelement <4 x float> %46, float %47, i32 2 - %49 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 3), align 4 - %50 = insertelement <4 x float> %48, float %49, i32 3 - %51 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 0), align 4 - %52 = insertelement <4 x float> zeroinitializer, float %51, i32 0 - %53 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 1), align 4 - %54 = insertelement <4 x float> %52, float %53, i32 1 - %55 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 2), align 4 - %56 = insertelement <4 x float> %54, float %55, i32 2 - %57 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 3), align 4 - %58 = insertelement <4 x float> %56, float %57, i32 3 - %59 = fadd <4 x float> %50, %58 - %60 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 4), align 4 - %61 = insertelement <4 x float> zeroinitializer, float %60, i32 0 - %62 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 5), align 4 - %63 = insertelement <4 x float> %61, float %62, i32 1 - %64 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 6), align 4 - %65 = insertelement <4 x float> %63, float %64, i32 2 - %66 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 7), align 4 - %67 = insertelement <4 x float> %65, float %66, i32 3 - %68 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 4), align 4 - %69 = insertelement <4 x float> zeroinitializer, float %68, i32 0 - %70 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 5), align 4 - %71 = insertelement <4 x float> %69, float %70, i32 1 - %72 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 6), align 4 - %73 = insertelement <4 x float> %71, float %72, i32 2 - %74 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 7), align 4 - %75 = insertelement <4 x float> %73, float %74, i32 3 - %76 = fadd <4 x float> %67, %75 - %77 = shufflevector <4 x float> %59, <4 x float> %76, <8 x i32> - %78 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @a_in, i64 0, i64 8), align 4 - %79 = insertelement <4 x float> zeroinitializer, float %78, i32 0 - %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 1 - %81 = insertelement <4 x float> %80, float 0.000000e+00, i32 2 - %82 = insertelement <4 x float> %81, float 0.000000e+00, i32 3 - %83 = load float, float* getelementptr inbounds ([9 x float], [9 x float]* @b_in, i64 0, i64 8), align 4 - %84 = insertelement <4 x float> zeroinitializer, float %83, i32 0 - %85 = insertelement <4 x float> %84, float 0.000000e+00, i32 1 - %86 = insertelement <4 x float> %85, float 0.000000e+00, i32 2 - %87 = insertelement <4 x float> %86, float 0.000000e+00, i32 3 - %88 = fadd <4 x float> %82, %87 - %89 = shufflevector <8 x float> %77, <4 x float> %88, <12 x i32> - %90 = extractelement <12 x float> %89, i32 0 - store float %90, float* %10, align 16 - %91 = extractelement <12 x float> %89, i32 1 - store float %91, float* %14, align 4 - %92 = extractelement <12 x float> %89, i32 2 - store float %92, float* %18, align 8 - %93 = extractelement <12 x float> %89, i32 3 - store float %93, float* %22, align 4 - %94 = extractelement <12 x float> %89, i32 4 - store float %94, float* %26, align 16 - %95 = extractelement <12 x float> %89, i32 5 - store float %95, float* %30, align 4 - %96 = extractelement <12 x float> %89, i32 6 - store float %96, float* %34, align 8 - %97 = extractelement <12 x float> %89, i32 7 - store float %97, float* %38, align 4 - %98 = extractelement <12 x float> %89, i32 8 - store float %98, float* %42, align 16 - %99 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 0 - %100 = load float, float* %99, align 16 - %101 = fpext float %100 to double - %102 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), double %101) - %103 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 1 - %104 = load float, float* %103, align 4 - %105 = fpext float %104 to double - %106 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0), double %105) - %107 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 2 - %108 = load float, float* %107, align 8 - %109 = fpext float %108 to double - %110 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), double %109) - %111 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 3 - %112 = load float, float* %111, align 4 - %113 = fpext float %112 to double - %114 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0), double %113) - %115 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 4 - %116 = load float, float* %115, align 16 - %117 = fpext float %116 to double - %118 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.4, i64 0, i64 0), double %117) - %119 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 5 - %120 = load float, float* %119, align 4 - %121 = fpext float %120 to double - %122 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.5, i64 0, i64 0), double %121) - %123 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 6 - %124 = load float, float* %123, align 8 - %125 = fpext float %124 to double - %126 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.6, i64 0, i64 0), double %125) - %127 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 7 - %128 = load float, float* %127, align 4 - %129 = fpext float %128 to double - %130 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.7, i64 0, i64 0), double %129) - %131 = getelementptr inbounds [9 x float], [9 x float]* %6, i64 0, i64 8 - %132 = load float, float* %131, align 16 - %133 = fpext float %132 to double - %134 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.8, i64 0, i64 0), double %133) - ret i32 0 ----STDERR--- -RecExpr { nodes: [Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("b_in"), Symbol("0,0,"), Get([3, 4]), Add([2, 5]), Symbol("a_in"), Symbol("0,1,"), Get([7, 8]), Symbol("b_in"), Symbol("0,1,"), Get([10, 11]), Add([9, 12]), Symbol("a_in"), Symbol("0,2,"), Get([14, 15]), Symbol("b_in"), Symbol("0,2,"), Get([17, 18]), Add([16, 19]), Symbol("a_in"), Symbol("0,3,"), Get([21, 22]), Symbol("b_in"), Symbol("0,3,"), Get([24, 25]), Add([23, 26]), Symbol("a_in"), Symbol("0,4,"), Get([28, 29]), Symbol("b_in"), Symbol("0,4,"), Get([31, 32]), Add([30, 33]), Symbol("a_in"), Symbol("0,5,"), Get([35, 36]), Symbol("b_in"), Symbol("0,5,"), Get([38, 39]), Add([37, 40]), Symbol("a_in"), Symbol("0,6,"), Get([42, 43]), Symbol("b_in"), Symbol("0,6,"), Get([45, 46]), Add([44, 47]), Symbol("a_in"), Symbol("0,7,"), Get([49, 50]), Symbol("b_in"), Symbol("0,7,"), Get([52, 53]), Add([51, 54]), Symbol("a_in"), Symbol("0,8,"), Get([56, 57]), Symbol("b_in"), Symbol("0,8,"), Get([59, 60]), Add([58, 61]), Vec([6, 13, 20, 27]), Vec([34, 41, 48, 55]), Num(0), Num(0), Num(0), Vec([62, 65, 66, 67]), Concat([63, 64]), Concat([69, 68])] } -Dropping inapplicable rules: div-1 div-1-inv neg-neg neg-neg-rev neg-sgn neg-sgn-rev neg-zero-inv neg-zero-inv-rev neg-minus neg-minus-rev neg-minus-zero neg-minus-zero-rev sqrt-1-inv sqrt-1-inv-rev neg_unop sqrt_unop /_binop -Stopped after 5 iterations, reason: Some(Saturated) -[Symbol("a_in"), Symbol("0,0,"), Get([0, 1]), Symbol("a_in"), Symbol("0,1,"), Get([3, 4]), Symbol("a_in"), Symbol("0,2,"), Get([6, 7]), Symbol("a_in"), Symbol("0,3,"), Get([9, 10]), LitVec([2, 5, 8, 11]), Symbol("b_in"), Symbol("0,0,"), Get([13, 14]), Symbol("b_in"), Symbol("0,1,"), Get([16, 17]), Symbol("b_in"), Symbol("0,2,"), Get([19, 20]), Symbol("b_in"), Symbol("0,3,"), Get([22, 23]), LitVec([15, 18, 21, 24]), VecAdd([12, 25]), Symbol("a_in"), Symbol("0,4,"), Get([27, 28]), Symbol("a_in"), Symbol("0,5,"), Get([30, 31]), Symbol("a_in"), Symbol("0,6,"), Get([33, 34]), Symbol("a_in"), Symbol("0,7,"), Get([36, 37]), LitVec([29, 32, 35, 38]), Symbol("b_in"), Symbol("0,4,"), Get([40, 41]), Symbol("b_in"), Symbol("0,5,"), Get([43, 44]), Symbol("b_in"), Symbol("0,6,"), Get([46, 47]), Symbol("b_in"), Symbol("0,7,"), Get([49, 50]), LitVec([42, 45, 48, 51]), VecAdd([39, 52]), Concat([26, 53]), Symbol("a_in"), Symbol("0,8,"), Get([55, 56]), Num(0), Num(0), Num(0), LitVec([57, 58, 59, 60]), Symbol("b_in"), Symbol("0,8,"), Get([62, 63]), Num(0), Num(0), Num(0), LitVec([64, 65, 66, 67]), VecAdd([61, 68]), Concat([54, 69])] diff --git a/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-inlining.c b/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-inlining.c new file mode 100644 index 00000000..9857755c --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-inlining.c @@ -0,0 +1,112 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 10 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float nested_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float no_opt_nested_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = -1 * A[i]; + } + float prod = 0.0f; + for (int i = 0; i < n; i++) { + prod *= B[i]; + } + return prod; +} + +float no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + float prod = no_opt_nested_inline(A, B, n); + return prod - sum; +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = no_opt_test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +float nested_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = -1 * A[i]; + } + float prod = 0.0f; + for (int i = 0; i < n; i++) { + prod *= B[i]; + } + return prod; +} + +float test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + float prod = nested_inline(A, B, n); + return prod - sum; +} + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +int main() { + time_t t = time(NULL); + srand((unsigned)time(&t)); + + float A[SIZE] = {0.0f}; + float expectedA[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + float a = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + A[i] = a; + expectedA[i] = a; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + float b = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + B[i] = b; + expectedB[i] = b; + } + float C[SIZE] = {0.0f}; + float expectedC[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + C[i] = 0.0f; + expectedC[i] = 0.0f; + } + test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("Calculated C Output: %f\n", C[i]); + printf("Expected C Output: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-naive-norm.c b/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-naive-norm.c new file mode 100644 index 00000000..83e5ac9f --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-naive-norm.c @@ -0,0 +1,41 @@ +#include +#include +#include +#include +#include + +#define SIZE 10 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float naive_norm(float x[SIZE], int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float x_in[SIZE]; + for (int i = 0; i < SIZE; i++) { + x_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // calculate up c_out + float calculated = naive_norm(x_in, SIZE); + // calculate expected + float sum = 0; + for (int i = 0; i < SIZE; i++) { + sum += x_in[i] * x_in[i]; + } + float expected = sqrtf(sum); + // check expected == output + printf("calculated: %f\n", calculated); + printf("expected: %f\n", expected); + assert(fabs(expected - calculated) < DELTA); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-qr-decomp-no-local-array.c b/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-qr-decomp-no-local-array.c new file mode 100644 index 00000000..784995ca --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-qr-decomp-no-local-array.c @@ -0,0 +1,313 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 10 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], float u[SIZE], + float v[SIZE], float q_min[SIZE * SIZE], + float q_t[SIZE * SIZE], float res[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float I[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float u[SIZE] = {0.0f}; + float v[SIZE] = {0.0f}; + float q_min[SIZE * SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + float res[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, I, x, e, u, v, q_min, q_t, res); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-transpose.c b/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-transpose.c new file mode 100644 index 00000000..a162ae1c --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/10-by-10-random-transpose.c @@ -0,0 +1,51 @@ +#include +#include +#include +#include +#include + +#define SIZE 10 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void naive_transpose(float a[SIZE * SIZE], int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float x_calculated[SIZE * SIZE]; + for (int i = 0; i < SIZE * SIZE; i++) { + x_calculated[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float x_expected[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + x_expected[i] = x_calculated[i]; + } + // calculate up c_out + naive_transpose(x_calculated, SIZE); + // calculate expected + int n = SIZE; + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = x_expected[i * n + j]; + x_expected[i * n + j] = x_expected[j * n + i]; + x_expected[j * n + i] = tmp; + } + } + // check expected == output + for (int i = 0; i < SIZE * SIZE; i++) { + printf("calculated: %f\n", x_calculated[i]); + printf("expected: %f\n", x_expected[i]); + assert(fabs(x_expected[i] - x_calculated[i]) < DELTA); + } + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/100-by-100-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/randomized-tests/100-by-100-random-matrix-multiply.c new file mode 100644 index 00000000..bdacc4fe --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/100-by-100-random-matrix-multiply.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include + +#define A_ROWS 100 +#define A_COLS 100 +#define B_COLS 100 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // load in b_in + float b_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // set up c_out + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = 0.0f; + } + } + // prep expected + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = 0.0f; + } + } + // calculate up c_out + matrix_multiply(a_in, b_in, c_out); + // calculate expected + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + expected[i][j] = sum; + } + } + // check expected == output + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + assert(fabs(expected[i][j] - c_out[i][j]) < DELTA); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/12-by-12-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/randomized-tests/12-by-12-random-matrix-multiply.c new file mode 100644 index 00000000..2963ed91 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/12-by-12-random-matrix-multiply.c @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include + +#define A_ROWS 12 +#define A_COLS 12 +#define B_COLS 12 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // load in b_in + float b_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // set up c_out + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = 0.0f; + } + } + // prep expected + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = 0.0f; + } + } + // calculate up c_out + matrix_multiply(a_in, b_in, c_out); + // calculate expected + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + expected[i][j] = sum; + } + } + // check expected == output + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + printf("calculated: %f\n", c_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(fabs(expected[i][j] - c_out[i][j]) < DELTA); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/15-by-15-random-qr-decomp-no-local-array.c b/src/dios-egraphs/Diospyros/randomized-tests/15-by-15-random-qr-decomp-no-local-array.c new file mode 100644 index 00000000..52e6701c --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/15-by-15-random-qr-decomp-no-local-array.c @@ -0,0 +1,313 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 15 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], float u[SIZE], + float v[SIZE], float q_min[SIZE * SIZE], + float q_t[SIZE * SIZE], float res[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float I[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float u[SIZE] = {0.0f}; + float v[SIZE] = {0.0f}; + float q_min[SIZE * SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + float res[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, I, x, e, u, v, q_min, q_t, res); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/1d-12-by-12-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/randomized-tests/1d-12-by-12-random-matrix-multiply.c new file mode 100644 index 00000000..aae33dda --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/1d-12-by-12-random-matrix-multiply.c @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include + +#define A_ROWS 12 +#define A_COLS 12 +#define B_COLS 12 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void matrix_multiply(float a_in[A_ROWS * A_COLS], float b_in[A_COLS * B_COLS], + float c_out[A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // calculate up c_out + matrix_multiply(a_in, b_in, c_out); + // calculate expected + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + expected[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + expected[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } + // check expected == output + for (int i = 0; i < A_ROWS * B_COLS; i++) { + printf("calculated: %f\n", c_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - c_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/1d-25-by-25-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/randomized-tests/1d-25-by-25-random-matrix-multiply.c new file mode 100644 index 00000000..cd678956 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/1d-25-by-25-random-matrix-multiply.c @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include + +#define A_ROWS 25 +#define A_COLS 25 +#define B_COLS 25 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void matrix_multiply(float a_in[A_ROWS * A_COLS], float b_in[A_COLS * B_COLS], + float c_out[A_ROWS * B_COLS]) { + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + c_out[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + c_out[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS * A_COLS]; + for (int i = 0; i < A_ROWS * A_COLS; i++) { + a_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // load in b_in + float b_in[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + b_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // set up c_out + float c_out[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + c_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // prep expected + float expected[A_ROWS * B_COLS]; + for (int i = 0; i < A_ROWS * B_COLS; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // calculate up c_out + matrix_multiply(a_in, b_in, c_out); + // calculate expected + for (int y = 0; y < A_ROWS; y++) { + for (int x = 0; x < B_COLS; x++) { + expected[B_COLS * y + x] = 0; + for (int k = 0; k < A_COLS; k++) { + expected[B_COLS * y + x] += + a_in[A_COLS * y + k] * b_in[B_COLS * k + x]; + } + } + } + // check expected == output + for (int i = 0; i < A_ROWS * B_COLS; i++) { + printf("calculated: %f\n", c_out[i]); + printf("expected: %f\n", expected[i]); + assert(fabs(expected[i] - c_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/2-by-2-random-qr-decomp-no-local-array.c b/src/dios-egraphs/Diospyros/randomized-tests/2-by-2-random-qr-decomp-no-local-array.c new file mode 100644 index 00000000..b5558092 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/2-by-2-random-qr-decomp-no-local-array.c @@ -0,0 +1,313 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 2 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], float u[SIZE], + float v[SIZE], float q_min[SIZE * SIZE], + float q_t[SIZE * SIZE], float res[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float I[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float u[SIZE] = {0.0f}; + float v[SIZE] = {0.0f}; + float q_min[SIZE * SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + float res[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, I, x, e, u, v, q_min, q_t, res); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-inlining.c b/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-inlining.c new file mode 100644 index 00000000..b9b81702 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-inlining.c @@ -0,0 +1,112 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 20 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float test_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float nested_inline(float A[SIZE], float B[SIZE], int n) + __attribute__((always_inline)); + +float no_opt_nested_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = -1 * A[i]; + } + float prod = 0.0f; + for (int i = 0; i < n; i++) { + prod *= B[i]; + } + return prod; +} + +float no_opt_test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + float prod = no_opt_nested_inline(A, B, n); + return prod - sum; +} + +void no_opt_test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = no_opt_test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +float nested_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = -1 * A[i]; + } + float prod = 0.0f; + for (int i = 0; i < n; i++) { + prod *= B[i]; + } + return prod; +} + +float test_inline(float A[SIZE], float B[SIZE], int n) { + for (int i = 0; i < n; i++) { + B[i] = 2 * A[i]; + } + float sum = 0.0f; + for (int i = 0; i < n; i++) { + sum += B[i]; + } + float prod = nested_inline(A, B, n); + return prod - sum; +} + +void test(float A[SIZE], float B[SIZE], float C[SIZE]) { + float result = test_inline(A, B, SIZE); + for (int i = 0; i < SIZE; i++) { + C[i] = result; + } +} + +int main() { + time_t t = time(NULL); + srand((unsigned)time(&t)); + + float A[SIZE] = {0.0f}; + float expectedA[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + float a = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + A[i] = a; + expectedA[i] = a; + } + float B[SIZE] = {0.0f}; + float expectedB[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + float b = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + B[i] = b; + expectedB[i] = b; + } + float C[SIZE] = {0.0f}; + float expectedC[SIZE] = {0.0f}; + for (int i = 0; i < SIZE; i++) { + C[i] = 0.0f; + expectedC[i] = 0.0f; + } + test(A, B, C); + no_opt_test(expectedA, expectedB, expectedC); + for (int i = 0; i < SIZE; i++) { + printf("Calculated C Output: %f\n", C[i]); + printf("Expected C Output: %f\n", expectedC[i]); + assert(fabs(expectedC[i] - C[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-naive-norm.c b/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-naive-norm.c new file mode 100644 index 00000000..449460d2 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-naive-norm.c @@ -0,0 +1,41 @@ +#include +#include +#include +#include +#include + +#define SIZE 20 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float naive_norm(float x[SIZE], int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float x_in[SIZE]; + for (int i = 0; i < SIZE; i++) { + x_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + // calculate up c_out + float calculated = naive_norm(x_in, SIZE); + // calculate expected + float sum = 0; + for (int i = 0; i < SIZE; i++) { + sum += x_in[i] * x_in[i]; + } + float expected = sqrtf(sum); + // check expected == output + printf("calculated: %f\n", calculated); + printf("expected: %f\n", expected); + assert(fabs(expected - calculated) < DELTA); + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-qr-decomp-no-local-array.c b/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-qr-decomp-no-local-array.c new file mode 100644 index 00000000..f8751161 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-qr-decomp-no-local-array.c @@ -0,0 +1,313 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 20 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], float u[SIZE], + float v[SIZE], float q_min[SIZE * SIZE], + float q_t[SIZE * SIZE], float res[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float I[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float u[SIZE] = {0.0f}; + float v[SIZE] = {0.0f}; + float q_min[SIZE * SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + float res[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, I, x, e, u, v, q_min, q_t, res); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-transpose.c b/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-transpose.c new file mode 100644 index 00000000..de27bd49 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/20-by-20-random-transpose.c @@ -0,0 +1,51 @@ +#include +#include +#include +#include +#include + +#define SIZE 20 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void naive_transpose(float a[SIZE * SIZE], int n) { + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = a[i * n + j]; + a[i * n + j] = a[j * n + i]; + a[j * n + i] = tmp; + } + } +} +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float x_calculated[SIZE * SIZE]; + for (int i = 0; i < SIZE * SIZE; i++) { + x_calculated[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float x_expected[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + x_expected[i] = x_calculated[i]; + } + // calculate up c_out + naive_transpose(x_calculated, SIZE); + // calculate expected + int n = SIZE; + for (int i = 0; i < n; i++) { + for (int j = i + 1; j < n; j++) { + float tmp = x_expected[i * n + j]; + x_expected[i * n + j] = x_expected[j * n + i]; + x_expected[j * n + i] = tmp; + } + } + // check expected == output + for (int i = 0; i < SIZE * SIZE; i++) { + printf("calculated: %f\n", x_calculated[i]); + printf("expected: %f\n", x_expected[i]); + assert(fabs(x_expected[i] - x_calculated[i]) < DELTA); + } + + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/25-by-25-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/randomized-tests/25-by-25-random-matrix-multiply.c new file mode 100644 index 00000000..e50590ee --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/25-by-25-random-matrix-multiply.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include + +#define A_ROWS 25 +#define A_COLS 25 +#define B_COLS 25 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // load in b_in + float b_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // set up c_out + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = 0.0f; + } + } + // prep expected + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = 0.0f; + } + } + // calculate up c_out + matrix_multiply(a_in, b_in, c_out); + // calculate expected + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + expected[i][j] = sum; + } + } + // check expected == output + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + assert(fabs(expected[i][j] - c_out[i][j]) < DELTA); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/2d-2d-conv-random-10.c b/src/dios-egraphs/Diospyros/randomized-tests/2d-2d-conv-random-10.c new file mode 100644 index 00000000..69eeec29 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/2d-2d-conv-random-10.c @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include + +#define I_ROWS 10 +#define I_COLS 10 +#define F_ROWS 5 +#define F_COLS 5 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void convolution(float mat_in[I_ROWS][I_COLS], float f_in[F_ROWS][F_COLS], + float mat_out[O_ROWS][O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + mat_out[outRow][outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS][I_COLS]; + for (int i = 0; i < I_ROWS; i++) { + for (int j = 0; j < I_ROWS; j++) { + mat_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float f_in[F_ROWS][F_COLS]; + for (int i = 0; i < F_ROWS; i++) { + for (int j = 0; j < F_COLS; j++) { + f_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float mat_out[O_ROWS][O_COLS]; + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + mat_out[i][j] = 0; + } + } + float expected[O_ROWS][O_COLS]; + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + expected[i][j] = 0; + } + } + convolution(mat_in, f_in, mat_out); + // calculate expected + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + expected[outRow][outCol] += v; + } + } + } + } + } + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + printf("calculated: %f\n", mat_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(fabs(expected[i][j] - mat_out[i][j]) < DELTA); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/2d-2d-conv-random-5.c b/src/dios-egraphs/Diospyros/randomized-tests/2d-2d-conv-random-5.c new file mode 100644 index 00000000..89ed97f0 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/2d-2d-conv-random-5.c @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include + +#define I_ROWS 5 +#define I_COLS 5 +#define F_ROWS 3 +#define F_COLS 3 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void convolution(float mat_in[I_ROWS][I_COLS], float f_in[F_ROWS][F_COLS], + float mat_out[O_ROWS][O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + mat_out[outRow][outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS][I_COLS]; + for (int i = 0; i < I_ROWS; i++) { + for (int j = 0; j < I_ROWS; j++) { + mat_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float f_in[F_ROWS][F_COLS]; + for (int i = 0; i < F_ROWS; i++) { + for (int j = 0; j < F_COLS; j++) { + f_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + float mat_out[O_ROWS][O_COLS]; + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + mat_out[i][j] = 0; + } + } + float expected[O_ROWS][O_COLS]; + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + expected[i][j] = 0; + } + } + convolution(mat_in, f_in, mat_out); + // calculate expected + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = + mat_in[iRow][iCol] * f_in[fRowTrans][fColTrans]; + expected[outRow][outCol] += v; + } + } + } + } + } + for (int i = 0; i < O_ROWS; i++) { + for (int j = 0; j < O_COLS; j++) { + printf("calculated: %f\n", mat_out[i][j]); + printf("expected: %f\n", expected[i][j]); + assert(fabs(expected[i][j] - mat_out[i][j]) < DELTA); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/2d-conv-random-10.c b/src/dios-egraphs/Diospyros/randomized-tests/2d-conv-random-10.c new file mode 100644 index 00000000..c3f5c3a6 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/2d-conv-random-10.c @@ -0,0 +1,88 @@ +#include +#include +#include +#include +#include + +#define I_ROWS 10 +#define I_COLS 10 +#define F_ROWS 5 +#define F_COLS 5 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = 0; + } + convolution(mat_in, f_in, mat_out); + // calculate expected + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + expected[outRow * O_COLS + outCol] += v; + } + } + } + } + } + for (int i = 0; i < O_ROWS * O_COLS; i++) { + printf("calculated: %f\n", mat_out[i]); + printf("expected: %f\n", expected[i]); + printf("difference: %f\n", expected[i] - mat_out[i]); + } + for (int i = 0; i < O_ROWS * O_COLS; i++) { + assert(fabs(expected[i] - mat_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/2d-conv-random-5.c b/src/dios-egraphs/Diospyros/randomized-tests/2d-conv-random-5.c new file mode 100644 index 00000000..a47953a0 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/2d-conv-random-5.c @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include + +#define I_ROWS 5 +#define I_COLS 5 +#define F_ROWS 3 +#define F_COLS 3 +#define O_ROWS ((I_ROWS + F_ROWS) - 1) +#define O_COLS ((I_COLS + F_COLS) - 1) +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void convolution(float mat_in[I_ROWS * I_COLS], float f_in[F_ROWS * F_COLS], + float mat_out[O_ROWS * O_COLS]) { + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + mat_out[outRow * O_COLS + outCol] += v; + } + } + } + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float mat_in[I_ROWS * I_COLS]; + for (int i = 0; i < I_ROWS * I_COLS; i++) { + mat_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float f_in[F_ROWS * F_COLS]; + for (int i = 0; i < F_ROWS * F_COLS; i++) { + f_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float mat_out[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + mat_out[i] = 0; + } + float expected[O_ROWS * O_COLS]; + for (int i = 0; i < O_ROWS * O_COLS; i++) { + expected[i] = 0; + } + convolution(mat_in, f_in, mat_out); + // calculate expected + for (int outRow = 0; outRow < O_ROWS; outRow++) { + for (int outCol = 0; outCol < O_COLS; outCol++) { + for (int fRow = 0; fRow < F_ROWS; fRow++) { + for (int fCol = 0; fCol < F_COLS; fCol++) { + int fRowTrans = F_ROWS - 1 - fRow; + int fColTrans = F_COLS - 1 - fCol; + int iRow = outRow - fRowTrans; + int iCol = outCol - fColTrans; + + if (iRow >= 0 && iRow < I_ROWS && iCol >= 0 && + iCol < I_COLS) { + float v = mat_in[iRow * I_COLS + iCol] * + f_in[fRowTrans * F_COLS + fColTrans]; + expected[outRow * O_COLS + outCol] += v; + } + } + } + } + } + for (int i = 0; i < O_ROWS * O_COLS; i++) { + printf("--------------------------\n"); + printf("calculated: %f\n", mat_out[i]); + printf("expected: %f\n", expected[i]); + printf("difference: %f\n", expected[i] - mat_out[i]); + } + for (int i = 0; i < O_ROWS * O_COLS; i++) { + assert(fabs(expected[i] - mat_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/5-by-5-random-qr-decomp-no-local-array.c b/src/dios-egraphs/Diospyros/randomized-tests/5-by-5-random-qr-decomp-no-local-array.c new file mode 100644 index 00000000..b874e4c8 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/5-by-5-random-qr-decomp-no-local-array.c @@ -0,0 +1,313 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 5 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +float sgn(float v) __attribute__((always_inline)); +float naive_norm(float *x, int m) __attribute__((always_inline)); +void naive_fixed_transpose(float *a) __attribute__((always_inline)); +void naive_fixed_matrix_multiply(float *a, float *b, float *c) + __attribute__((always_inline)); + +float sgn(float v) { return (v > 0) - (v < 0); } + +float no_opt_sgn(float v) { return (v > 0) - (v < 0); } + +float naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +float no_opt_naive_norm(float *x, int m) { + float sum = 0; + for (int i = 0; i < m; i++) { + sum += x[i] * x[i]; + } + return sqrtf(sum); +} + +// Naive with fixed size +void naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void no_opt_naive_fixed_transpose(float a[SIZE * SIZE]) { + for (int i = 0; i < SIZE; i++) { + for (int j = i + 1; j < SIZE; j++) { + float tmp = a[i * SIZE + j]; + a[i * SIZE + j] = a[j * SIZE + i]; + a[j * SIZE + i] = tmp; + } + } +} + +void naive_fixed_matrix_multiply(float a[SIZE * SIZE], float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void no_opt_naive_fixed_matrix_multiply(float a[SIZE * SIZE], + float b[SIZE * SIZE], + float c[SIZE * SIZE]) { + for (int y = 0; y < SIZE; y++) { + for (int x = 0; x < SIZE; x++) { + c[SIZE * y + x] = 0; + for (int k = 0; k < SIZE; k++) { + c[SIZE * y + x] += a[SIZE * y + k] * b[SIZE * k + x]; + } + } + } +} + +void naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE], float I[SIZE * SIZE], + float x[SIZE], float e[SIZE], float u[SIZE], + float v[SIZE], float q_min[SIZE * SIZE], + float q_t[SIZE * SIZE], float res[SIZE * SIZE]) { + // OLD COMMAND: memcpy(R, A, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = A[i]; + } + + // Build identity matrix of size SIZE * SIZE + // OLD COMMAND: : float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + // OLD COMMAND: float *x = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + x[i] = 0.0f; + e[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -sgn(x[0]) * naive_norm(x, m); + + // OLD COMMAND: float *u = (float *)calloc(sizeof(float), m); + // OLD COMMAND: float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < SIZE; i++) { + u[i] = 0.0f; + v[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + // OLD COMMAND: float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < SIZE * SIZE; i++) { + q_min[i] = 0.0f; + } + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + // OLD COMMAND: float *q_t = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + q_t[i] = 0.0f; + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + // OLD COMMAND: memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = + // q_t + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = q_t[i]; + } + naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + // OLD COMMAND: float *res = (float *)calloc(sizeof(float), SIZE * + // SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + res[i] = 0.0f; + } + naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + // OLD COMMAND: memcpy(Q, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + Q[i] = res[i]; + } + naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + // OLD COMMAND: memcpy(R, res, sizeof(float) * SIZE * SIZE); + for (int i = 0; i < SIZE * SIZE; i++) { + R[i] = res[i]; + } + } + // OLD COMMAND: free(x); + // OLD COMMAND: free(e); + // OLD COMMAND: free(u); + // OLD COMMAND: free(v); + // OLD COMMAND: free(q_min); + // OLD COMMAND: free(q_t); + } + naive_fixed_transpose(Q); +} + +void no_opt_naive_fixed_qr_decomp(float A[SIZE * SIZE], float Q[SIZE * SIZE], + float R[SIZE * SIZE]) { + memcpy(R, A, sizeof(float) * SIZE * SIZE); + + // Build identity matrix of size SIZE * SIZE + float *I = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + I[i * SIZE + j] = (i == j); + } + } + + // Householder + for (int k = 0; k < SIZE - 1; k++) { + int m = SIZE - k; + + float *x = (float *)calloc(sizeof(float), m); + float *e = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + int row = k + i; + x[i] = R[row * SIZE + k]; + e[i] = I[row * SIZE + k]; + } + + float alpha = -no_opt_sgn(x[0]) * no_opt_naive_norm(x, m); + + float *u = (float *)calloc(sizeof(float), m); + float *v = (float *)calloc(sizeof(float), m); + for (int i = 0; i < m; i++) { + u[i] = x[i] + alpha * e[i]; + } + float norm_u = no_opt_naive_norm(u, m); + for (int i = 0; i < m; i++) { + v[i] = u[i] / (norm_u + 0.00001f); + } + + float *q_min = (float *)calloc(sizeof(float), m * m); + for (int i = 0; i < m; i++) { + for (int j = 0; j < m; j++) { + float q_min_i = ((i == j) ? 1.0f : 0.0f) - 2 * v[i] * v[j]; + q_min[i * m + j] = q_min_i; + } + } + + float *q_t = (float *)calloc(sizeof(float), SIZE * SIZE); + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float q_t_i; + if ((i < k) || (j < k)) { + q_t_i = (i == j) ? 1.0f : 0.0f; + } else { + q_t_i = q_min[(i - k) * m + (j - k)]; + } + q_t[i * SIZE + j] = q_t_i; + } + } + + if (k == 0) { + memcpy(Q, q_t, sizeof(float) * SIZE * SIZE); // Q = q_t + no_opt_naive_fixed_matrix_multiply(q_t, A, R); // R = q_t * A + } else { + float *res = (float *)calloc(sizeof(float), SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, Q, res); // R = q_t * A + memcpy(Q, res, sizeof(float) * SIZE * SIZE); + no_opt_naive_fixed_matrix_multiply(q_t, R, res); // R = q_t * A + memcpy(R, res, sizeof(float) * SIZE * SIZE); + } + free(x); + free(e); + free(u); + free(v); + free(q_min); + free(q_t); + } + no_opt_naive_fixed_transpose(Q); +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + + float A[SIZE * SIZE] = {0.0f}; + for (int i = 0; i < SIZE * SIZE; i++) { + A[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + + float Q[SIZE * SIZE] = {0.0f}; + float R[SIZE * SIZE] = {0.0f}; + float I[SIZE * SIZE] = {0.0f}; + float x[SIZE] = {0.0f}; + float e[SIZE] = {0.0f}; + float u[SIZE] = {0.0f}; + float v[SIZE] = {0.0f}; + float q_min[SIZE * SIZE] = {0.0f}; + float q_t[SIZE * SIZE] = {0.0f}; + float res[SIZE * SIZE] = {0.0f}; + naive_fixed_qr_decomp(A, Q, R, I, x, e, u, v, q_min, q_t, res); + float expectedQ[SIZE * SIZE] = {0.0f}; + float expectedR[SIZE * SIZE] = {0.0f}; + no_opt_naive_fixed_qr_decomp(A, expectedQ, expectedR); + + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("Q Output: %f\n", Q[i * SIZE + j]); + printf("Expected Q Output: %f\n", expectedQ[i * SIZE + j]); + assert(fabs(expectedQ[i] - Q[i]) < DELTA); + } + } + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + printf("R Output: %f\n", R[i * SIZE + j]); + printf("Expected R Output: %f\n", expectedR[i * SIZE + j]); + assert(fabs(expectedR[i] - R[i]) < DELTA); + } + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/50-by-50-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/randomized-tests/50-by-50-random-matrix-multiply.c new file mode 100644 index 00000000..036c7a79 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/50-by-50-random-matrix-multiply.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include + +#define A_ROWS 50 +#define A_COLS 50 +#define B_COLS 50 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // load in b_in + float b_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // set up c_out + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = 0.0f; + } + } + // prep expected + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = 0.0f; + } + } + // calculate up c_out + matrix_multiply(a_in, b_in, c_out); + // calculate expected + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + expected[i][j] = sum; + } + } + // check expected == output + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + assert(fabs(expected[i][j] - c_out[i][j]) < DELTA); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/75-by-75-random-matrix-multiply.c b/src/dios-egraphs/Diospyros/randomized-tests/75-by-75-random-matrix-multiply.c new file mode 100644 index 00000000..afefac24 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/75-by-75-random-matrix-multiply.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include + +#define A_ROWS 75 +#define A_COLS 75 +#define B_COLS 75 +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void matrix_multiply(float a_in[A_ROWS][A_COLS], float b_in[A_COLS][B_COLS], + float c_out[A_ROWS][B_COLS]) { + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + c_out[i][j] = sum; + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + // load in a_in + float a_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + a_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // load in b_in + float b_in[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + b_in[i][j] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + } + // set up c_out + float c_out[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + c_out[i][j] = 0.0f; + } + } + // prep expected + float expected[A_ROWS][B_COLS]; + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + expected[i][j] = 0.0f; + } + } + // calculate up c_out + matrix_multiply(a_in, b_in, c_out); + // calculate expected + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < B_COLS; j++) { + float sum = 0.0; + for (int k = 0; k < A_COLS; k++) { + sum += a_in[i][k] * b_in[k][j]; + } + expected[i][j] = sum; + } + } + // check expected == output + for (int i = 0; i < A_ROWS; i++) { + for (int j = 0; j < A_COLS; j++) { + assert(fabs(expected[i][j] - c_out[i][j]) < DELTA); + } + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/README.md b/src/dios-egraphs/Diospyros/randomized-tests/README.md new file mode 100644 index 00000000..740c814b --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/README.md @@ -0,0 +1 @@ +Randomized Tests contains tests of large programs with random float array inputs. The correct result is taken to be the optimization without diospyros, and the diospyros pass is compared against the correct result. \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/point-product-random.c b/src/dios-egraphs/Diospyros/randomized-tests/point-product-random.c new file mode 100644 index 00000000..a41838e3 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/point-product-random.c @@ -0,0 +1,90 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 4 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void cross_product(float lhs[3], float rhs[3], float result[3]) + __attribute__((always_inline)); + +void cross_product(float lhs[3], float rhs[3], float result[3]) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +void point_product(float q_in[4], float p_in[4], float result_out[4]) { + float qvec[3] = {q_in[0], q_in[1], q_in[2]}; + // qvec = {0, 1, 2} + + float uv[3]; + cross_product(qvec, p_in, uv); + // uv = {1 * 2 - 2 * 1, 2 * 0 - 0 * 2, 0 * 1 - 1 * 0} = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + // uv = {0, 0 , 0} + float qxuv[3]; + cross_product(qvec, uv, qxuv); + // qxuv = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + result_out[i] = p_in[i] + q_in[3] * uv[i] + qxuv[i]; + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float q_in[SIZE]; + for (int i = 0; i < SIZE; i++) { + q_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float p_in[SIZE]; + for (int i = 0; i < SIZE; i++) { + p_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float result_out[SIZE]; + for (int i = 0; i < SIZE; i++) { + result_out[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[SIZE]; + for (int i = 0; i < SIZE; i++) { + expected[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + point_product(q_in, p_in, result_out); + float qvec[3] = {q_in[0], q_in[1], q_in[2]}; + // qvec = {0, 1, 2} + + float uv[3]; + cross_product(qvec, p_in, uv); + // uv = {1 * 2 - 2 * 1, 2 * 0 - 0 * 2, 0 * 1 - 1 * 0} = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + // uv = {0, 0 , 0} + float qxuv[3]; + cross_product(qvec, uv, qxuv); + // qxuv = {0, 0, 0} + + for (int i = 0; i < 3; i++) { + expected[i] = p_in[i] + q_in[3] * uv[i] + qxuv[i]; + } + for (int i = 0; i < 3; i++) { + printf("Calculated: %f\n", result_out[i]); + printf("Expected: %f\n", expected[i]); + assert(fabs(expected[i] - result_out[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/q-prod-random.c b/src/dios-egraphs/Diospyros/randomized-tests/q-prod-random.c new file mode 100644 index 00000000..403efae3 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/q-prod-random.c @@ -0,0 +1,117 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SIZE 4 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +__attribute__((always_inline)) void naive_cross_product(float *lhs, float *rhs, + float *result) { + result[0] = lhs[1] * rhs[2] - lhs[2] * rhs[1]; + result[1] = lhs[2] * rhs[0] - lhs[0] * rhs[2]; + result[2] = lhs[0] * rhs[1] - lhs[1] * rhs[0]; +} + +/* + Computes the point product +*/ +__attribute__((always_inline)) void naive_point_product(float *q, float *p, + float *result) { + float qvec[3] = {q[0], q[1], q[2]}; + float uv[3]; + naive_cross_product(qvec, p, uv); + + for (int i = 0; i < 3; i++) { + uv[i] = uv[i] * 2; + } + float qxuv[3]; + naive_cross_product(qvec, uv, qxuv); + + for (int i = 0; i < 3; i++) { + result[i] = p[i] + q[3] * uv[i] + qxuv[i]; + } +} + +void naive_quaternion_product(float *a_q, float *a_t, float *b_q, float *b_t, + float *r_q, float *r_t) { + r_q[3] = + a_q[3] * b_q[3] - a_q[0] * b_q[0] - a_q[1] * b_q[1] - a_q[2] * b_q[2]; + r_q[0] = + a_q[3] * b_q[0] + a_q[0] * b_q[3] + a_q[1] * b_q[2] - a_q[2] * b_q[1]; + r_q[1] = + a_q[3] * b_q[1] + a_q[1] * b_q[3] + a_q[2] * b_q[0] - a_q[0] * b_q[2]; + r_q[2] = + a_q[3] * b_q[2] + a_q[2] * b_q[3] + a_q[0] * b_q[1] - a_q[1] * b_q[0]; + + naive_point_product(a_q, b_t, r_t); + for (int i = 0; i < 3; i++) { + r_t[i] += a_t[i]; + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float a_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + a_q[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float a_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + a_t[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float b_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + b_q[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float b_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + b_t[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float r_q[SIZE]; + for (int i = 0; i < SIZE; i++) { + r_q[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float r_t[SIZE]; + for (int i = 0; i < SIZE; i++) { + r_t[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expectedq[SIZE]; + for (int i = 0; i < SIZE; i++) { + expectedq[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expectedt[SIZE]; + for (int i = 0; i < SIZE; i++) { + expectedt[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + naive_quaternion_product(a_q, a_t, b_q, b_t, r_q, r_t); + expectedq[3] = + a_q[3] * b_q[3] - a_q[0] * b_q[0] - a_q[1] * b_q[1] - a_q[2] * b_q[2]; + expectedq[0] = + a_q[3] * b_q[0] + a_q[0] * b_q[3] + a_q[1] * b_q[2] - a_q[2] * b_q[1]; + expectedq[1] = + a_q[3] * b_q[1] + a_q[1] * b_q[3] + a_q[2] * b_q[0] - a_q[0] * b_q[2]; + expectedq[2] = + a_q[3] * b_q[2] + a_q[2] * b_q[3] + a_q[0] * b_q[1] - a_q[1] * b_q[0]; + + naive_point_product(a_q, b_t, expectedt); + for (int i = 0; i < 3; i++) { + expectedt[i] += a_t[i]; + } + for (int i = 0; i < SIZE; i++) { + printf("Calculated q: %f\n", r_q[i]); + printf("Expected q: %f\n", expectedq[i]); + assert(fabs(expectedq[i] - r_q[i]) < DELTA); + } + for (int i = 0; i < 3; i++) { + printf("Calculated t: %f\n", r_t[i]); + printf("Expected t: %f\n", expectedt[i]); + assert(fabs(expectedt[i] - r_t[i]) < DELTA); + } +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/stencil-2d-random-15-16.c b/src/dios-egraphs/Diospyros/randomized-tests/stencil-2d-random-15-16.c new file mode 100644 index 00000000..8432ebd7 --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/stencil-2d-random-15-16.c @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include + +#define ROW_SIZE 15 +#define COL_SIZE 16 +#define F_SIZE 9 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + stencil(orig_in, sol_out, filter_in); + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + expected[(r * COL_SIZE) + c] = temp; + } + } + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("calculated: %f\n", sol_out[i]); + printf("expected: %f\n", expected[i]); + printf("difference: %f\n", expected[i] - sol_out[i]); + } + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + assert(fabs(expected[i] - sol_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/stencil-2d-random-22-21.c b/src/dios-egraphs/Diospyros/randomized-tests/stencil-2d-random-22-21.c new file mode 100644 index 00000000..019fd41b --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/stencil-2d-random-22-21.c @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include + +#define ROW_SIZE 22 +#define COL_SIZE 21 +#define F_SIZE 11 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + stencil(orig_in, sol_out, filter_in); + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + expected[(r * COL_SIZE) + c] = temp; + } + } + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("calculated: %f\n", sol_out[i]); + printf("expected: %f\n", expected[i]); + printf("difference: %f\n", expected[i] - sol_out[i]); + } + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + assert(fabs(expected[i] - sol_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/stencil-2d-random-50-50.c b/src/dios-egraphs/Diospyros/randomized-tests/stencil-2d-random-50-50.c new file mode 100644 index 00000000..c91b842e --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/stencil-2d-random-50-50.c @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include + +#define ROW_SIZE 50 +#define COL_SIZE 50 +#define F_SIZE 25 + +#define MAX_FLOAT 100.00f +#define DELTA 0.1f + +void stencil(float orig_in[ROW_SIZE * COL_SIZE], + float sol_out[ROW_SIZE * COL_SIZE], float filter_in[F_SIZE]) { + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + sol_out[(r * COL_SIZE) + c] = temp; + } + } +} + +int main(void) { + time_t t = time(NULL); + srand((unsigned)time(&t)); + float orig_in[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + orig_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float sol_out[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + sol_out[i] = 1; + } + float filter_in[F_SIZE]; + for (int i = 0; i < F_SIZE; i++) { + filter_in[i] = (float)rand() / (float)(RAND_MAX / MAX_FLOAT); + } + float expected[ROW_SIZE * COL_SIZE]; + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + expected[i] = 1; + } + stencil(orig_in, sol_out, filter_in); + for (int r = 0; r < ROW_SIZE - 2; r++) { + for (int c = 0; c < COL_SIZE - 2; c++) { + float temp = 0; + for (int k1 = 0; k1 < 3; k1++) { + for (int k2 = 0; k2 < 3; k2++) { + temp += filter_in[k1 * 3 + k2] * + orig_in[(r + k1) * COL_SIZE + c + k2]; + } + } + expected[(r * COL_SIZE) + c] = temp; + } + } + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + printf("calculated: %f\n", sol_out[i]); + printf("expected: %f\n", expected[i]); + printf("difference: %f\n", expected[i] - sol_out[i]); + } + for (int i = 0; i < ROW_SIZE * COL_SIZE; i++) { + assert(fabs(expected[i] - sol_out[i]) < DELTA); + } + return 0; +} \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/randomized-tests/turnt.toml b/src/dios-egraphs/Diospyros/randomized-tests/turnt.toml new file mode 100644 index 00000000..8f5b227e --- /dev/null +++ b/src/dios-egraphs/Diospyros/randomized-tests/turnt.toml @@ -0,0 +1 @@ +command = "bash ../test-runner.sh randomized-tests/{filename}" diff --git a/src/dios-egraphs/Diospyros/run_all.sh b/src/dios-egraphs/Diospyros/run_all.sh deleted file mode 100644 index e9992c70..00000000 --- a/src/dios-egraphs/Diospyros/run_all.sh +++ /dev/null @@ -1,5 +0,0 @@ -for file in llvm-tests/*.c -do - make run test="$file" - ./a.out -done \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/runt.sh b/src/dios-egraphs/Diospyros/runt.sh deleted file mode 100644 index c797722d..00000000 --- a/src/dios-egraphs/Diospyros/runt.sh +++ /dev/null @@ -1,15 +0,0 @@ -#! /bin/sh -args=("$@") -FILE=target/debug/libllvmlib.so - -if ! [ -f $FILE ]; then - FILE=target/debug/libllvmlib.dylib -fi - -if [[ "$OSTYPE" == "darwin"* ]]; then - CLANG=/usr/local/opt/llvm/bin/clang -else - CLANG=clang -fi - -$CLANG -Xclang -load -Xclang $FILE -emit-llvm -S -o - ${args[0]} | awk '/define/{flag=1; next} /}/{flag=0} flag' \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/runt.toml b/src/dios-egraphs/Diospyros/runt.toml deleted file mode 100644 index 19995b09..00000000 --- a/src/dios-egraphs/Diospyros/runt.toml +++ /dev/null @@ -1,6 +0,0 @@ -ver = "0.3.1" - -[[tests]] -name = "llvm-diospyros tests" -paths = [ "llvm-tests/*.c" ] -cmd = "bash runt.sh {}" \ No newline at end of file diff --git a/src/dios-egraphs/Diospyros/src/lib.rs b/src/dios-egraphs/Diospyros/src/lib.rs index a83ff639..31c7e53e 100644 --- a/src/dios-egraphs/Diospyros/src/lib.rs +++ b/src/dios-egraphs/Diospyros/src/lib.rs @@ -1,2184 +1,1982 @@ extern crate llvm_sys as llvm; -use dioslib::{config, rules, veclang::VecLang}; +use dioslib::{ + config::{self, vector_width}, + rules, + veclang::VecLang, +}; use egg::*; use libc::size_t; use llvm::{core::*, prelude::*, LLVMOpcode::*, LLVMRealPredicate}; use std::{ - cmp, - collections::{BTreeMap, BTreeSet}, - ffi::CStr, - mem, - os::raw::c_char, - slice::from_raw_parts, + cmp, + collections::{BTreeMap, BTreeSet, HashMap}, + os::raw::c_char, + slice::from_raw_parts, convert::TryInto, }; extern "C" { - fn llvm_index(val: LLVMValueRef, index: i32) -> i32; - fn llvm_name(val: LLVMValueRef) -> *const c_char; - fn isa_unop(val: LLVMValueRef) -> bool; - fn isa_bop(val: LLVMValueRef) -> bool; - fn isa_constant(val: LLVMValueRef) -> bool; - fn isa_constfp(val: LLVMValueRef) -> bool; - fn isa_gep(val: LLVMValueRef) -> bool; - fn isa_load(val: LLVMValueRef) -> bool; - fn isa_store(val: LLVMValueRef) -> bool; - fn isa_argument(val: LLVMValueRef) -> bool; - fn isa_call(val: LLVMValueRef) -> bool; - fn isa_fptrunc(val: LLVMValueRef) -> bool; - fn isa_fpext(val: LLVMValueRef) -> bool; - fn isa_alloca(val: LLVMValueRef) -> bool; - fn isa_phi(val: LLVMValueRef) -> bool; - fn _isa_sextint(val: LLVMValueRef) -> bool; - fn isa_sitofp(val: LLVMValueRef) -> bool; - fn isa_constaggregatezero(val: LLVMValueRef) -> bool; - fn _isa_constaggregate(val: LLVMValueRef) -> bool; - fn isa_integertype(val: LLVMValueRef) -> bool; - fn isa_intptr(val: LLVMValueRef) -> bool; - fn isa_floattype(val: LLVMValueRef) -> bool; - fn isa_bitcast(val: LLVMValueRef) -> bool; - fn isa_sqrt32(val: LLVMValueRef) -> bool; - fn isa_sqrt64(val: LLVMValueRef) -> bool; - fn get_constant_float(val: LLVMValueRef) -> f32; - fn _dfs_llvm_value_ref(val: LLVMValueRef, match_val: LLVMValueRef) -> bool; - fn build_constant_float(n: f64, context: LLVMContextRef) -> LLVMValueRef; -} - -// Note: We use BTreeMaps to enforce ordering in the map -// Without ordering, tests become flaky and start failing a lot more often -// We do not use HashMaps for this reason as ordering is not enforced. -// GEPMap : Maps the array name and array offset as symbols to the GEP -// LLVM Value Ref that LLVM Generated -type GEPMap = BTreeMap<(Symbol, Symbol), LLVMValueRef>; -// VarMap : Maps a symbol to a llvm value ref representing a variable -// type VarMap = BTreeMap; -// // BopMap : Maps a binary oeprator llvm value ref to an ID, indicating a -// // binary operator has been seen. Binary Operators be ordered in the order -// // they were generated in LLVM, which is earliest to latest in code. -// type BopMap = BTreeMap; -// // ValueVec : A vector of LLVM Value Refs for which we must do extract element -// // for after vectorization. -// type ValueVec = Vec; - -// const SQRT_OPERATOR: i32 = 3; -// const BINARY_OPERATOR: i32 = 2; -// static mut SYMBOL_IDX: i32 = 0; -static mut ARG_IDX: i32 = 0; -static mut CALL_IDX: i32 = 0; -static mut NODE_IDX: u32 = 0; - -// unsafe fn gen_symbol_name() -> String { -// SYMBOL_IDX += 1; -// let string = "SYMBOL".to_string(); -// let result = format!("{}{}", string, SYMBOL_IDX.to_string()); -// result -// } - -unsafe fn gen_node_idx() -> u32 { - NODE_IDX += 1; - return NODE_IDX; -} - -unsafe fn gen_arg_name() -> String { - ARG_IDX += 1; - let string = "ARGUMENT".to_string(); - let result = format!("{}{}", string, ARG_IDX.to_string()); - result -} - -unsafe fn gen_call_name() -> String { - CALL_IDX += 1; - let string = "CALL".to_string(); - let result = format!("{}{}", string, CALL_IDX.to_string()); - result + fn _llvm_index(val: LLVMValueRef, index: i32) -> i32; + fn _llvm_name(val: LLVMValueRef) -> *const c_char; + fn _isa_unop(val: LLVMValueRef) -> bool; + fn _isa_bop(val: LLVMValueRef) -> bool; + fn isa_constant(val: LLVMValueRef) -> bool; + fn isa_constfp(val: LLVMValueRef) -> bool; + fn _isa_gep(val: LLVMValueRef) -> bool; + fn isa_load(val: LLVMValueRef) -> bool; + fn isa_store(val: LLVMValueRef) -> bool; + fn isa_argument(val: LLVMValueRef) -> bool; + fn _isa_call(val: LLVMValueRef) -> bool; + fn _isa_fptrunc(val: LLVMValueRef) -> bool; + fn _isa_fpext(val: LLVMValueRef) -> bool; + fn _isa_alloca(val: LLVMValueRef) -> bool; + fn _isa_phi(val: LLVMValueRef) -> bool; + fn _isa_sextint(val: LLVMValueRef) -> bool; + fn _isa_sitofp(val: LLVMValueRef) -> bool; + fn isa_constaggregatezero(val: LLVMValueRef) -> bool; + fn _isa_constaggregate(val: LLVMValueRef) -> bool; + fn isa_integertype(val: LLVMValueRef) -> bool; + fn _isa_intptr(val: LLVMValueRef) -> bool; + fn isa_floatptr(val: LLVMValueRef) -> bool; + fn _isa_floattype(val: LLVMValueRef) -> bool; + fn _isa_bitcast(val: LLVMValueRef) -> bool; + fn isa_sqrt32(val: LLVMValueRef) -> bool; + fn _isa_sqrt64(val: LLVMValueRef) -> bool; + fn get_constant_float(val: LLVMValueRef) -> f32; + fn build_constant_float(n: f64, context: LLVMContextRef) -> LLVMValueRef; + fn generate_opaque_pointer(element_type: LLVMTypeRef) -> LLVMTypeRef; +} + +static mut ARG_IDX: u32 = 0; +static mut REG_IDX: u32 = 0; +static mut GET_IDX: u32 = 0; + +unsafe fn gen_arg_idx() -> u32 { + ARG_IDX += 1; + return ARG_IDX; +} + +unsafe fn gen_reg_idx() -> u32 { + REG_IDX += 1; + return REG_IDX; +} + +unsafe fn gen_get_idx() -> u32 { + GET_IDX += 1; + return GET_IDX; +} + +// Map from Func Name to LLVM FUnc +// Have to use Vec because BTReeMap is unstable at constant in Rust 1.58. New versions +// of Rust break LLVM 11.0 so I cannot upgrade. +static mut FUNC_NAME2LLVM_FUNC: Vec<(&str, LLVMValueRef)> = Vec::new(); + +static FMA_NAME: &str = "llvm.fma.v4f32"; +static _SCATTER: &str = "llvm.masked.scatter.v4f32.v4p0f32"; +static GATHER: &str = "llvm.masked.gather.v4f32.v4p0f32"; + +unsafe fn get_func_llvm_value(name: &str) -> Option { + for (func_name, value) in FUNC_NAME2LLVM_FUNC.clone() { + if func_name == name { + return Some(value); + } + } + return None; +} + +static mut VECTORIZATION_MAP : Vec<(Vec, RecExpr, HashMap)> = Vec::new(); + +unsafe fn get_vectorization(vec: Vec) -> Option<(RecExpr, HashMap)> { + for (stringvec, vectorization, map) in VECTORIZATION_MAP.clone() { + if stringvec == vec { + return Some((vectorization, map)); + } + } + return None; +} + +// Reference Comparison: https://www.reddit.com/r/rust/comments/2r3wjk/is_there_way_to_compare_objects_by_address_in_rust/ +// Compares whether addresses of LLVMValueRefs are the same. +// Not the contents of the Value Refs +fn cmp_val_ref_address(a1: &llvm::LLVMValue, a2: &llvm::LLVMValue) -> bool { + a1 as *const _ == a2 as *const _ +} + +fn _cmp_typ(a1: &LLVMTypeRef, a2: &LLVMTypeRef) -> bool { + a1 as *const _ == a2 as *const _ } /// Converts LLVMValueRef binop to equivalent VecLang Binop node unsafe fn choose_binop(bop: &LLVMValueRef, ids: [Id; 2]) -> VecLang { - match LLVMGetInstructionOpcode(*bop) { - LLVMFAdd => VecLang::Add(ids), - LLVMFMul => VecLang::Mul(ids), - LLVMFSub => VecLang::Minus(ids), - LLVMFDiv => VecLang::Div(ids), - _ => panic!("Choose_Binop: Opcode Match Error"), - } + match LLVMGetInstructionOpcode(*bop) { + LLVMFAdd => VecLang::Add(ids), + LLVMFMul => VecLang::Mul(ids), + LLVMFSub => VecLang::Minus(ids), + LLVMFDiv => VecLang::Div(ids), + _ => panic!("Choose_Binop: Opcode Match Error"), + } } -// /// Convert the sqrt into a unique symbol, which maps to the sqet argument LLVMValueRef -// /// And then Make sqrt point to that unique symbol. -// /// On the other side, the symbol gets retranslted to the LLVMValueRef argument that came in -// /// and then the sqrt takes the square root of it. -// unsafe fn to_expr_sqrt( -// sqrt_ref: &LLVMValueRef, -// var_map: &mut VarMap, -// enode_vec: &mut Vec, -// ) -> () { -// let symbol = Symbol::from(gen_symbol_name()); -// enode_vec.push(VecLang::Symbol(symbol)); -// let symbol_idx = enode_vec.len() - 1; -// var_map.insert(symbol, *sqrt_ref); -// enode_vec.push(VecLang::Sqrt([Id::from(symbol_idx)])); -// } - -// /// Converts LLVMValueRef constant to a VecLang Num. -// unsafe fn to_expr_constant( -// operand: &LLVMValueRef, -// vec: &mut Vec, -// ids: &mut [egg::Id; 2], -// id_index: usize, -// ) -> () { -// let value = get_constant_float(*operand); -// vec.push(VecLang::Num(value as i32)); -// ids[id_index] = Id::from(vec.len() - 1); -// } - -// /// Converts LLVMValueRef GEP to a VecLang Symbol with variable name. -// unsafe fn to_expr_var( -// var_operand: &LLVMValueRef, -// enode_vec: &mut Vec, -// ids: &mut [egg::Id; 2], -// id_index: usize, -// var_map: &mut VarMap, -// ) -> () { -// let var_name = CStr::from_ptr(llvm_name(*var_operand)).to_str().unwrap(); -// let symbol = Symbol::from(var_name); -// enode_vec.push(VecLang::Symbol(symbol)); -// ids[id_index] = Id::from(enode_vec.len() - 1); -// (*var_map).insert(symbol, *var_operand); -// } - -// /// Converts LLVMValueRef GEP to a VecLang Get and VecLang Symbol for array -// /// and VecLang Symbol for offset. -// unsafe fn to_expr_gep( -// gep_operand: &LLVMValueRef, -// ids: &mut [egg::Id; 2], -// id_index: usize, -// enode_vec: &mut Vec, -// gep_map: &mut GEPMap, -// ) -> () { -// let array_var_name = CStr::from_ptr(llvm_name(*gep_operand)).to_str().unwrap(); -// enode_vec.push(VecLang::Symbol(Symbol::from(array_var_name))); -// let array_var_idx = enode_vec.len() - 1; -// // --- get offsets for multidimensional arrays ---- -// let num_gep_operands = LLVMGetNumOperands(*gep_operand); -// let mut indices = Vec::new(); -// for operand_idx in 1..num_gep_operands { -// let array_offset = llvm_index(*gep_operand, operand_idx); -// indices.push(array_offset); -// } -// let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); -// let offsets_symbol = Symbol::from(&offsets_string); -// enode_vec.push(VecLang::Symbol(offsets_symbol)); -// let array_offset_idx = enode_vec.len() - 1; - -// enode_vec.push(VecLang::Get([ -// Id::from(array_var_idx), -// Id::from(array_offset_idx), -// ])); - -// ids[id_index] = Id::from(enode_vec.len() - 1); - -// let array_name_symbol = Symbol::from(array_var_name); -// gep_map.insert((array_name_symbol, offsets_symbol), *gep_operand); -// } - -// /// Makes binary operators as "used", which means that no extract is needed -// /// for these binary operators. -// /// -// /// For example: -// /// x = (3 + z) + (2 + y) -// /// will record 3 + z, 2 + y as used in the final addition (3 + z) + (2 + y)/ -// /// Only 1 extraction is needed (to assign to x's location). Not 3. -// unsafe fn mark_used_bops( -// operand: &LLVMValueRef, -// ids: &mut [egg::Id; 2], -// id_index: usize, -// bop_map: &mut BopMap, -// used_bop_ids: &mut Vec, -// ) -> bool { -// let mut changed = false; -// for (&prev_used_bop, &mut prev_used_id) in bop_map { -// if dfs_llvm_value_ref(*operand, prev_used_bop) { -// ids[id_index] = prev_used_id; -// used_bop_ids.push(prev_used_id); -// changed |= true; -// } -// } -// return changed; -// } - -// /// Converts LLVMValueRef operand to corresponding VecLang node -// unsafe fn to_expr_operand( -// operand: &LLVMValueRef, -// bop_map: &mut BopMap, -// ids: &mut [egg::Id; 2], -// id_index: usize, -// used_bop_ids: &mut Vec, -// enode_vec: &mut Vec, -// gep_map: &mut GEPMap, -// var_map: &mut VarMap, -// ) -> () { -// let removed_bops = mark_used_bops(operand, ids, id_index, bop_map, used_bop_ids); -// if removed_bops { -// return (); -// } -// if bop_map.contains_key(&operand) { -// let used_id = *bop_map.get(&operand).expect("Expected key in map"); -// ids[id_index] = used_id; -// used_bop_ids.push(used_id); -// } else if isa_bop(*operand) { -// } else if isa_constant(*operand) { -// to_expr_constant(&operand, enode_vec, ids, id_index); -// } else if isa_load(*operand) { -// let inner_operand = LLVMGetOperand(*operand, 0); -// if isa_gep(inner_operand) { -// to_expr_gep(&inner_operand, ids, id_index, enode_vec, gep_map); -// } else { -// // assume load of some temporary/global variable -// to_expr_var(operand, enode_vec, ids, id_index, var_map); -// } -// } else { -// panic!("Cannot handle LLVM IR Operand.") -// } -// } - -// /// Pads a vector to be always the Vector Lane Width. -// fn pad_vector(binop_vec: &Vec, enode_vec: &mut Vec) -> () { -// let width = config::vector_width(); -// let mut length = binop_vec.len(); -// let mut vec_indices = Vec::new(); -// let mut idx = 0; -// while length > width { -// let mut width_vec = Vec::new(); -// for _ in 0..width { -// width_vec.push(binop_vec[idx]); -// idx += 1; -// length -= 1; -// } -// enode_vec.push(VecLang::Vec(width_vec.into_boxed_slice())); -// vec_indices.push(enode_vec.len() - 1); -// } -// // wrap up extras at end -// let diff = width - length; -// let mut extras = Vec::new(); -// for _ in 0..diff { -// enode_vec.push(VecLang::Num(0)); -// extras.push(enode_vec.len() - 1); -// } -// let mut final_vec = Vec::new(); -// let original_length = binop_vec.len(); -// for i in idx..original_length { -// final_vec.push(binop_vec[i]); -// } -// for id in extras.iter() { -// final_vec.push(Id::from(*id)); -// } -// enode_vec.push(VecLang::Vec(final_vec.into_boxed_slice())); -// vec_indices.push(enode_vec.len() - 1); -// // create concats -// let mut num_concats = vec_indices.len() - 1; -// let mut idx = 0; -// let mut prev_id = Id::from(vec_indices[idx]); -// idx += 1; -// while num_concats > 0 { -// let concat = VecLang::Concat([prev_id, Id::from(vec_indices[idx])]); -// enode_vec.push(concat); -// prev_id = Id::from(enode_vec.len() - 1); -// idx += 1; -// num_concats -= 1; -// } -// } - -// /// Converts LLVMValueRef to a corresponding VecLang expression, as well as a GEPMap, -// /// which maps each LLVM gep expression to a symbol representing the array name -// /// and a symbol representing the array offset, a var map, which maps a symbol to the -// /// LLVMValueRef representing the variable, and a ValueVec, which reprsents -// /// the values we generate extract instructions on. -// pub fn to_expr( -// bb_vec: &[LLVMValueRef], -// operand_types: &[i32], -// ) -> (RecExpr, GEPMap, VarMap, ValueVec) { -// let (mut enode_vec, mut bops_vec, mut ops_to_replace, mut used_bop_ids) = -// (Vec::new(), Vec::new(), Vec::new(), Vec::new()); -// let (mut gep_map, mut var_map, mut bop_map) = (BTreeMap::new(), BTreeMap::new(), BTreeMap::new()); -// let mut ids = [Id::from(0); 2]; -// for (i, bop) in bb_vec.iter().enumerate() { -// unsafe { -// if operand_types[i] == BINARY_OPERATOR { -// // to_expr on left and then right operands -// to_expr_operand( -// &LLVMGetOperand(*bop, 0), -// &mut bop_map, -// &mut ids, -// 0, -// &mut used_bop_ids, -// &mut enode_vec, -// &mut gep_map, -// &mut var_map, -// ); -// to_expr_operand( -// &LLVMGetOperand(*bop, 1), -// &mut bop_map, -// &mut ids, -// 1, -// &mut used_bop_ids, -// &mut enode_vec, -// &mut gep_map, -// &mut var_map, -// ); -// // lhs bop rhs -// enode_vec.push(choose_binop(bop, ids)); -// } else if operand_types[i] == SQRT_OPERATOR { -// // currently fails to generate correct code or optimize. -// // to_expr_sqrt(bop, &mut var_map, &mut enode_vec); -// } -// } -// // add in the binary/unary operator to the bops_vec list -// let id = Id::from(enode_vec.len() - 1); -// bops_vec.push(id); -// ops_to_replace.push((*bop, id)); -// bop_map.insert(*bop, id); -// // remove binops that are used as part of another binop -// for used_id in used_bop_ids.iter() { -// if bops_vec.contains(used_id) { -// let index = bops_vec -// .iter() -// .position(|&_id| _id == *used_id) -// .expect("Require used_id in vector"); -// bops_vec.remove(index); -// } -// } -// } -// // decompose bops_vec into width number of binops -// pad_vector(&bops_vec, &mut enode_vec); - -// // remove binary ops that were used, and thus not the ones we want to replace directly -// let mut final_ops_to_replace = Vec::new(); -// for (bop, id) in ops_to_replace.iter() { -// if !used_bop_ids.contains(id) { -// final_ops_to_replace.push(*bop); -// } -// } - -// return ( -// RecExpr::from(enode_vec), -// gep_map, -// var_map, -// final_ops_to_replace, -// ); -// } - /// Translates VecLang binop expression node to the corresponding LLVMValueRef unsafe fn translate_binop( - enode: &VecLang, - left: LLVMValueRef, - right: LLVMValueRef, - builder: LLVMBuilderRef, - name: *const c_char, + enode: &VecLang, + left: LLVMValueRef, + right: LLVMValueRef, + builder: LLVMBuilderRef, + name: *const c_char, ) -> LLVMValueRef { - match enode { - VecLang::VecAdd(_) | VecLang::Add(_) => LLVMBuildFAdd(builder, left, right, name), - VecLang::VecMul(_) | VecLang::Mul(_) => LLVMBuildFMul(builder, left, right, name), - VecLang::VecMinus(_) | VecLang::Minus(_) => LLVMBuildFSub(builder, left, right, name), - VecLang::VecDiv(_) | VecLang::Div(_) => LLVMBuildFDiv(builder, left, right, name), - // use binary bitwise operators for or / and - VecLang::Or(_) => LLVMBuildOr(builder, left, right, name), - VecLang::And(_) => LLVMBuildAnd(builder, left, right, name), - VecLang::Lt(_) => LLVMBuildFCmp(builder, LLVMRealPredicate::LLVMRealOLT, left, right, name), - _ => panic!("Not a vector or scalar binop."), - } + match enode { + VecLang::VecAdd(_) | VecLang::Add(_) => LLVMBuildFAdd(builder, left, right, name), + VecLang::VecMul(_) | VecLang::Mul(_) => LLVMBuildFMul(builder, left, right, name), + VecLang::VecMinus(_) | VecLang::Minus(_) => LLVMBuildFSub(builder, left, right, name), + VecLang::VecDiv(_) | VecLang::Div(_) => LLVMBuildFDiv(builder, left, right, name), + // use binary bitwise operators for or / and + VecLang::Or(_) => LLVMBuildOr(builder, left, right, name), + VecLang::And(_) => LLVMBuildAnd(builder, left, right, name), + VecLang::Lt(_) => LLVMBuildFCmp(builder, LLVMRealPredicate::LLVMRealOLT, left, right, name), + _ => panic!("Not a vector or scalar binop."), + } } /// Translates VecLang unop expression node to the corresponding LLVMValueRef unsafe fn translate_unop( - enode: &VecLang, - n: LLVMValueRef, - builder: LLVMBuilderRef, - context: LLVMContextRef, - module: LLVMModuleRef, - name: *const c_char, + enode: &VecLang, + n: LLVMValueRef, + builder: LLVMBuilderRef, + context: LLVMContextRef, + module: LLVMModuleRef, + name: *const c_char, ) -> LLVMValueRef { - match enode { - VecLang::Sgn(_) => { - let one = LLVMConstReal(LLVMFloatTypeInContext(context), 1 as f64); - let param_types = [ - LLVMFloatTypeInContext(context), - LLVMFloatTypeInContext(context), - ] - .as_mut_ptr(); - let fn_type = LLVMFunctionType(LLVMFloatTypeInContext(context), param_types, 2, 0 as i32); - let func = LLVMAddFunction(module, b"llvm.copysign.f32\0".as_ptr() as *const _, fn_type); - let args = [one, n].as_mut_ptr(); - LLVMBuildCall(builder, func, args, 2, name) - } - VecLang::Sqrt(_) => { - let param_types = [LLVMFloatTypeInContext(context)].as_mut_ptr(); - let fn_type = LLVMFunctionType(LLVMFloatTypeInContext(context), param_types, 1, 0 as i32); - let func = LLVMAddFunction(module, b"llvm.sqrt.f32\0".as_ptr() as *const _, fn_type); - let args = [n].as_mut_ptr(); - LLVMBuildCall(builder, func, args, 1, name) - } - VecLang::Neg(_) => LLVMBuildFNeg(builder, n, name), - _ => panic!("Not a scalar unop."), - } + match enode { + VecLang::Sgn(_) => { + let one = LLVMConstReal(LLVMFloatTypeInContext(context), 1 as f64); + let param_types = [ + LLVMFloatTypeInContext(context), + LLVMFloatTypeInContext(context), + ] + .as_mut_ptr(); + let fn_type = + LLVMFunctionType(LLVMFloatTypeInContext(context), param_types, 2, 0 as i32); + let func = + LLVMAddFunction(module, b"llvm.copysign.f32\0".as_ptr() as *const _, fn_type); + let args = [one, n].as_mut_ptr(); + LLVMBuildCall(builder, func, args, 2, name) + } + VecLang::Sqrt(_) => { + let param_types = [LLVMFloatTypeInContext(context)].as_mut_ptr(); + let fn_type = + LLVMFunctionType(LLVMFloatTypeInContext(context), param_types, 1, 0 as i32); + let func = LLVMAddFunction(module, b"llvm.sqrt.f32\0".as_ptr() as *const _, fn_type); + let args = [n].as_mut_ptr(); + LLVMBuildCall(builder, func, args, 1, name) + } + VecLang::Neg(_) => LLVMBuildFNeg(builder, n, name), + _ => panic!("Not a scalar unop."), + } } -/// translate_get converts a VecLang Get Node to the corresponding LLVM Ir array name and -/// LLVM IR offset, as symbols. -unsafe fn translate_get(get: &VecLang, enode_vec: &[VecLang]) -> (Symbol, Symbol) { - match get { - VecLang::Get([sym, i]) => match (&enode_vec[usize::from(*sym)], &enode_vec[usize::from(*i)]) { - (VecLang::Symbol(name), VecLang::Symbol(offset)) => { - return (*name, *offset); - } - _ => panic!("Match Error: Expects Pair of Symbol, Symbol."), - }, - _ => panic!("Match Error in Translate Get: Expects Get Enode."), - } +/// Calculate cost for any Egg expression. +/// Uses a custom model that I developed, to see if an optimization should go through +/// or not. +pub fn calculate_cost() -> u32 { + return 0; } -// /// translate converts a VecLang expression to the corresponding LLVMValueRef. -// unsafe fn translate( -// enode: &VecLang, -// vec: &[VecLang], -// gep_map: &GEPMap, -// var_map: &VarMap, -// builder: LLVMBuilderRef, -// module: LLVMModuleRef, -// ) -> LLVMValueRef { -// match enode { -// VecLang::Symbol(s) => *var_map.get(s).expect("Var map lookup error"), -// VecLang::Num(n) => LLVMConstReal(LLVMFloatTypeInContext(context), *n as f64), -// VecLang::Get(..) => { -// let (array_name, array_offsets) = translate_get(enode, vec); -// let gep_value = gep_map -// .get(&(array_name, array_offsets)) -// .expect("Symbol map lookup error"); -// LLVMBuildLoad(builder, *gep_value, b"\0".as_ptr() as *const _) -// } -// VecLang::LitVec(boxed_ids) | VecLang::Vec(boxed_ids) | VecLang::List(boxed_ids) => { -// let idvec = boxed_ids.to_vec(); -// let idvec_len = idvec.len(); -// let mut zeros = Vec::new(); -// for _ in 0..idvec_len { -// zeros.push(LLVMConstReal(LLVMFloatTypeInContext(context), 0 as f64)); -// } -// let zeros_ptr = zeros.as_mut_ptr(); -// let mut vector = LLVMConstVector(zeros_ptr, idvec.len() as u32); -// for (idx, &eggid) in idvec.iter().enumerate() { -// let elt = &vec[usize::from(eggid)]; -// let elt_val = translate(elt, vec, gep_map, var_map, builder, module); -// vector = LLVMBuildInsertElement( -// builder, -// vector, -// elt_val, -// LLVMConstInt(LLVMIntTypeInContext(context, 32), idx as u64, 0), -// b"\0".as_ptr() as *const _, -// ); -// } -// vector -// } -// VecLang::VecAdd([l, r]) -// | VecLang::VecMinus([l, r]) -// | VecLang::VecMul([l, r]) -// | VecLang::VecDiv([l, r]) -// | VecLang::Add([l, r]) -// | VecLang::Minus([l, r]) -// | VecLang::Mul([l, r]) -// | VecLang::Div([l, r]) -// | VecLang::Or([l, r]) -// | VecLang::And([l, r]) -// | VecLang::Lt([l, r]) => { -// let left = translate( -// &vec[usize::from(*l)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// let right = translate( -// &vec[usize::from(*r)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// translate_binop(enode, left, right, builder, b"\0".as_ptr() as *const _) -// } -// VecLang::Concat([v1, v2]) => { -// let trans_v1 = translate( -// &vec[usize::from(*v1)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// let trans_v2 = translate( -// &vec[usize::from(*v2)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// // manually concatenate 2 vectors by using a LLVM shuffle operation. -// let v1_type = LLVMTypeOf(trans_v1); -// let v1_size = LLVMGetVectorSize(v1_type); -// let v2_type = LLVMTypeOf(trans_v2); -// let v2_size = LLVMGetVectorSize(v2_type); -// let size = v1_size + v2_size; -// let mut indices = Vec::new(); -// for i in 0..size { -// indices.push(LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0)); -// } -// let mask = indices.as_mut_ptr(); -// let mask_vector = LLVMConstVector(mask, size); -// LLVMBuildShuffleVector( -// builder, -// trans_v1, -// trans_v2, -// mask_vector, -// b"\0".as_ptr() as *const _, -// ) -// } -// VecLang::VecMAC([acc, v1, v2]) => { -// let trans_acc = translate( -// &vec[usize::from(*acc)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// let trans_v1 = translate( -// &vec[usize::from(*v1)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// let trans_v2 = translate( -// &vec[usize::from(*v2)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// let vec_type = LLVMTypeOf(trans_acc); - -// let param_types = [vec_type, vec_type, vec_type].as_mut_ptr(); -// let fn_type = LLVMFunctionType(vec_type, param_types, 3, 0 as i32); -// let func = LLVMAddFunction(module, b"llvm.fma.f32\0".as_ptr() as *const _, fn_type); -// let args = [trans_v1, trans_v2, trans_acc].as_mut_ptr(); -// LLVMBuildCall(builder, func, args, 3, b"\0".as_ptr() as *const _) -// } -// // TODO: VecNeg, VecSqrt, VecSgn all have not been tested, need test cases. -// // TODO: LLVM actually supports many more vector intrinsics, including -// // vector sine/cosine instructions for floats. -// VecLang::VecNeg([v]) => { -// let neg_vector = translate( -// &vec[usize::from(*v)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// LLVMBuildFNeg(builder, neg_vector, b"\0".as_ptr() as *const _) -// } -// VecLang::VecSqrt([v]) => { -// let sqrt_vec = translate( -// &vec[usize::from(*v)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// let vec_type = LLVMTypeOf(sqrt_vec); -// let param_types = [vec_type].as_mut_ptr(); -// let fn_type = LLVMFunctionType(vec_type, param_types, 1, 0 as i32); -// let func = LLVMAddFunction(module, b"llvm.sqrt.f32\0".as_ptr() as *const _, fn_type); -// let args = [sqrt_vec].as_mut_ptr(); -// LLVMBuildCall(builder, func, args, 1, b"\0".as_ptr() as *const _) -// } -// // compliant with c++ LibMath copysign function, which differs with sgn at x = 0. -// VecLang::VecSgn([v]) => { -// let sgn_vec = translate( -// &vec[usize::from(*v)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// let vec_type = LLVMTypeOf(sgn_vec); -// let vec_size = LLVMGetVectorSize(vec_type); -// let mut ones = Vec::new(); -// for _ in 0..vec_size { -// ones.push(LLVMConstReal(LLVMFloatTypeInContext(context), 1 as f64)); -// } -// let ones_ptr = ones.as_mut_ptr(); -// let ones_vector = LLVMConstVector(ones_ptr, vec_size); -// let param_types = [vec_type, vec_type].as_mut_ptr(); -// let fn_type = LLVMFunctionType(vec_type, param_types, 2, 0 as i32); -// let func = LLVMAddFunction(module, b"llvm.copysign.f32\0".as_ptr() as *const _, fn_type); -// let args = [ones_vector, sgn_vec].as_mut_ptr(); -// LLVMBuildCall(builder, func, args, 2, b"\0".as_ptr() as *const _) -// } -// VecLang::Sgn([n]) | VecLang::Sqrt([n]) | VecLang::Neg([n]) => { -// let number = translate( -// &vec[usize::from(*n)], -// vec, -// gep_map, -// var_map, -// builder, -// module, -// ); -// translate_unop(enode, number, builder, module, b"\0".as_ptr() as *const _) -// } -// VecLang::Ite(..) => panic!("Ite is not handled."), -// } -// } - -// /// Convert a Veclang `expr` to LLVM IR code in place, using an LLVM builder. -// unsafe fn to_llvm( -// module: LLVMModuleRef, -// expr: RecExpr, -// gep_map: &GEPMap, -// var_map: &VarMap, -// ops_to_replace: &ValueVec, -// builder: LLVMBuilderRef, -// ) -> () { -// let vec = expr.as_ref(); -// let last = vec -// .last() -// .expect("No match for last element of vector of Egg Terms."); - -// // create vectorized instructions. -// let vector = translate(last, vec, gep_map, var_map, builder, module); - -// // for each binary operation that has been vectorized AND requires replacement -// // we extract the correct index from the vector and -// // determine the store to that binary op, copy it and move it after the extraction -// for (i, op) in ops_to_replace.iter().enumerate() { -// let index = LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0); -// let extracted_value = -// LLVMBuildExtractElement(builder, vector, index, b"\0".as_ptr() as *const _); -// // figure out where the next store is located, after the binary operation to replace. -// let mut store_instr = *op; -// // assumes there is a store next: could segfault or loop forever if not. -// // WARNING: In particular, could infinitely loop under -02/-03 optimizations. -// while !isa_store(store_instr) { -// store_instr = LLVMGetNextInstruction(store_instr); -// } -// let cloned_store = LLVMInstructionClone(store_instr); -// LLVMSetOperand(cloned_store, 0, extracted_value); -// LLVMInsertIntoBuilder(builder, cloned_store); -// // erase stores -> this was affecting a load and then a store to the same -// // location in matrix multiply -// LLVMInstructionEraseFromParent(store_instr); -// } -// } +/// Struct representing load info, same as on C++ side +#[repr(C)] +#[derive(Debug, Clone)] +pub struct load_info_t { + pub load: LLVMValueRef, + pub base_id: i32, + pub offset: i32, +} -/// Main function to optimize: Takes in a basic block of instructions, -/// optimizes it, and then translates it to LLVM IR code, in place. +/// Value Numbering Metadata +#[derive(Debug, Clone)] +struct ValueNumberingState<'a> { + egg_nodes_vector: &'a [VecLang], +} -#[repr(C)] -pub struct IntLLVMPair { - node_int: u32, - arg: LLVMValueRef, +/// Value Numbering Mutable Metadata +#[derive(Debug, Clone)] +struct ValueNumberingMutableState { + node2value: HashMap, + currvalue: u32, } -#[repr(C)] -pub struct LLVMPair { - original_value: LLVMValueRef, - new_value: LLVMValueRef, + +unsafe fn value_number_args(op_name: String, args: &[Id], immd: &ValueNumberingState, md: &mut ValueNumberingMutableState) -> Vec { + let mut vec0 = vec![]; + for arg in args.into_iter() { + let node = &immd.egg_nodes_vector[usize::from(*arg)].clone(); + let vec = value_number_store_tree(node, immd, md); + vec0.extend(vec); + } + let mut final_vec1 = vec![op_name, "(".to_string()]; + let final_vec2 = vec![")".to_string()]; + final_vec1.extend(vec0); + final_vec1.extend(final_vec2); + return final_vec1; } -#[repr(C)] -pub struct VectorPointerSize { - llvm_pointer: *const LLVMPair, - llvm_pointer_size: size_t, +unsafe fn value_number_boxed_args(op_name: String, args: &Box<[Id]>, immd: &ValueNumberingState, md: &mut ValueNumberingMutableState) -> Vec { + let mut vec0 = vec![]; + for arg in args.into_iter() { + let vec = value_number_store_tree(&immd.egg_nodes_vector[usize::from(*arg)], immd, md); + vec0.extend(vec); + } + let mut final_vec1 = vec![op_name, "(".to_string()]; + let final_vec2 = vec![")".to_string()]; + final_vec1.extend(vec0); + final_vec1.extend(final_vec2); + return final_vec1; +} + +unsafe fn op_to_vec_string(op_name:String, val:i32) -> Vec { + let str0 = op_name; + let str1 = "("; + let str2 = val.to_string(); + let str3 = ")"; + return vec![format!("{}{}{}{}", str0, str1, str2, str3).to_string()]; +} + +unsafe fn value_number_check_node(op_name: String, key: VecLang, _immd: &ValueNumberingState, md: &mut ValueNumberingMutableState) -> Vec { + if md.node2value.contains_key(&key) { + let val = *md.node2value.get(&key).expect("Key should exist"); + return op_to_vec_string(op_name, val.try_into().unwrap()); + } else { + let val = md.currvalue; + md.node2value.insert(key, md.currvalue); + md.currvalue += 1; + return op_to_vec_string(op_name, val.try_into().unwrap()); + } +} + +unsafe fn get_array_offset_or_base(egg_node: &VecLang) -> i32 { + match egg_node { + VecLang::Num(n) => *n, + _ => panic!("Array Offset or Base must be in a Num construct"), + } +} + +unsafe fn load_to_tree(arg1: &Id, _arg2: &Id, arg3: &Id, immd: &ValueNumberingState, md: &mut ValueNumberingMutableState) -> Vec { + let mut load_vec = vec![]; + let node1 = &immd.egg_nodes_vector[usize::from(*arg1)].clone(); + let vec1 = value_number_store_tree(node1, immd, md); + let node3 = &immd.egg_nodes_vector[usize::from(*arg3)].clone(); + let array_offset = get_array_offset_or_base(node3); + load_vec.extend(vec1.clone()); + load_vec.extend(vec1); // ignore base, repeat vec1 + let final_vec2 = vec![(array_offset % vector_width() as i32).to_string()]; + let final_vec3 = vec![")".to_string()]; + load_vec.extend(final_vec2); + load_vec.extend(final_vec3); + return load_vec; +} + +unsafe fn value_number_store_tree(egg_node: &VecLang, immd: &ValueNumberingState, md: &mut ValueNumberingMutableState) -> Vec { + match egg_node { + VecLang::Symbol(..) => { + panic!("Symbol was found. Value Numbering does not handle symbol nodes.") + } + VecLang::Get(..) => { + panic!("Get was found. Value Numbering does not handle get nodes.") + } + VecLang::Gep(i) => return value_number_check_node(String::from("Gep"), VecLang::Gep(*i), immd, md), + VecLang::Load([a1, a2, a3]) => return load_to_tree(a1, a2, a3, immd, md), // value_number_args(String::from("Load"), &[*a1, *a1, *a1], immd, md), + VecLang::Store(args) => return value_number_args(String::from("Store"), args, immd, md), + VecLang::Set(..) => { + panic!("Set was found. Value Numbering does not handle set nodes.") + } + VecLang::Ite(..) => panic!("Ite was found. Value Numbering does not handle ite nodes."), + VecLang::Or(..) => panic!("Or was found. Value Numbering does not handle or nodes."), + VecLang::And(..) => panic!("And was found. Value Numbering does not handle and nodes."), + VecLang::Lt(..) => panic!("Lt was found. Value Numbering does not handle lt nodes."), + VecLang::Sgn(..) => panic!("Sgn was found. Value Numbering does not handle sgn nodes. TODO: In the future, tis node will be handled alongside sqrt and neg scalar nodes."), + VecLang::VecSgn(..) => panic!("VecSgn was found. Value Numbering does not handle vecsgn nodes. TODO: In the future, this node will be handled alongside VecSqrt and VecNeg vector nodes."), + VecLang::Arg(a) => return value_number_check_node(String::from("Arg"), VecLang::Arg(*a), immd, md), + VecLang::Reg(r) => return value_number_check_node(String::from("Reg"), VecLang::Reg(*r), immd, md), + VecLang::Num(n) => return op_to_vec_string(String::from("Num"), *n), + VecLang::LitVec(boxed_ids) => return value_number_boxed_args(String::from("LitVec"), boxed_ids, immd, md), + VecLang::Vec(boxed_ids) => return value_number_boxed_args(String::from("Vec"), boxed_ids, immd, md), + VecLang::List(boxed_ids) => return value_number_boxed_args(String::from("List"), boxed_ids, immd, md), + VecLang::DataVec(boxed_ids) => return value_number_boxed_args(String::from("DataVec"), boxed_ids, immd, md), + VecLang::NoOptVec(boxed_ids) => return value_number_boxed_args(String::from("NoOptVec"), boxed_ids, immd, md), + VecLang::VecAdd(args) => return value_number_args(String::from("VecAdd"), args, immd, md), + VecLang::VecMinus(args) => return value_number_args(String::from("VecMinus"), args, immd, md), + VecLang::VecMul(args) => return value_number_args(String::from("VecMul"), args, immd, md), + VecLang::VecDiv(args) => return value_number_args(String::from("VecDiv"), args, immd, md), + VecLang::Add(args) => return value_number_args(String::from("Add"), args, immd, md), + VecLang::Minus(args) => return value_number_args(String::from("Minus"), args, immd, md), + VecLang::Mul(args) => return value_number_args(String::from("Mul"), args, immd, md), + VecLang::Div(args) => return value_number_args(String::from("Div"), args,immd, md), + VecLang::Concat(args) => return value_number_args(String::from("Concat"), args,immd, md), + VecLang::VecMAC(args) => return value_number_args(String::from("VecMac"), args, immd, md), + + + // TODO: VecNeg, VecSqrt, VecSgn all have not been tested, need test cases. + // TODO: LLVM actually supports many more vector intrinsics, including + // vector sine/cosine instructions for floats. + VecLang::VecNeg(args) => return value_number_args(String::from("VecNeg"), args, immd, md), + VecLang::VecSqrt(args) => return value_number_args(String::from("VecSqrt"), args, immd, md), + // VecSgn compliant with c++ LibMath copysign function, which differs with sgn at x = 0. + VecLang::VecSgn(args) => return value_number_args(String::from("VecSgn"), args, immd, md), + VecLang::Sgn(args) => return value_number_args(String::from("Sgn"), args, immd, md), + VecLang::Sqrt(args) => return value_number_args(String::from("Sqrt"), args, immd, md), + VecLang::Neg(args) => return value_number_args(String::from("Neg"), args, immd, md), + VecLang::VecLoad(args) => return value_number_args(String::from("VecLoad"), args, immd, md), + VecLang::VecStore(args) => return value_number_args(String::from("VecStore"), args,immd, md), + VecLang::AlignedConsecVecLoad(args) => return value_number_args(String::from("AlignedConsecVecLoad"), args, immd, md), + VecLang::Shuffle(args) => return value_number_args(String::from("Shuffle"), args, immd, md), + VecLang::Join(args) => return value_number_args(String::from("Join"), args, immd, md), + VecLang::VecTwo(boxed_ids) => return value_number_boxed_args(String::from("VecTwo"), boxed_ids, immd, md), + VecLang::AlignedConsecVecLoad2(args) => return value_number_args(String::from("AlignedConsecVecLoad2"), args, immd, md), + } } +/// Main function to optimize: Takes in a basic block of instructions, +/// optimizes it, and then translates it to LLVM IR code, in place. + #[no_mangle] pub fn optimize( - module: LLVMModuleRef, - context: LLVMContextRef, - builder: LLVMBuilderRef, - bb: *const LLVMValueRef, - size: size_t, - past_instrs: *const LLVMPair, - past_size: size_t, -) -> VectorPointerSize { - unsafe { - // llvm to egg - let llvm_instrs = from_raw_parts(bb, size); - let past_llvm_instrs = from_raw_parts(past_instrs, past_size); - let mut llvm_arg_pairs = Vec::new(); - for instr_pair in past_llvm_instrs { - let new_instr_pair = LLVMPair { - original_value: instr_pair.original_value, - new_value: instr_pair.new_value, - }; - llvm_arg_pairs.push(new_instr_pair); - } - let mut node_to_arg = Vec::new(); - let (expr, gep_map, store_map, symbol_map) = - llvm_to_egg(llvm_instrs, &mut llvm_arg_pairs, &mut node_to_arg); - - // optimization pass - eprintln!("{}", expr.pretty(10)); - let (_, best) = rules::run(&expr, 180, true, false); - eprintln!("{}", best.pretty(10)); - - // egg to llvm - egg_to_llvm( - best, - &gep_map, - &store_map, - &symbol_map, - &mut llvm_arg_pairs, // does this work properly?, IDK? Need to return mut value - &node_to_arg, - module, - context, - builder, - ); + module: LLVMModuleRef, + context: LLVMContextRef, + builder: LLVMBuilderRef, + chunk_instrs: *const LLVMValueRef, + chunk_size: size_t, + restricted_instrs: *const LLVMValueRef, + restricted_size: size_t, + load_info: *const load_info_t, + load_info_size: size_t, + run_egg: bool, + print_opt: bool, +) -> bool { + unsafe { + // preprocessing of instructions + let chunk_llvm_instrs = from_raw_parts(chunk_instrs, chunk_size); + let restricted_llvm_instrs = from_raw_parts(restricted_instrs, restricted_size); + let load_info = from_raw_parts(load_info, load_info_size); + + // llvm to egg + let (egg_expr, mut llvm2egg_metadata) = llvm_to_egg_main( + chunk_llvm_instrs, + restricted_llvm_instrs, + run_egg, + load_info, + ); - let mut final_llvm_arg_pairs = Vec::new(); - for pair in llvm_arg_pairs { - final_llvm_arg_pairs.push(pair); - } + // Bail if no egg Nodes to optimize + if egg_expr.as_ref().is_empty() { + eprintln!("No Egg Nodes in Optimization Vector"); + return false; + } - // https://stackoverflow.com/questions/39224904/how-to-expose-a-rust-vect-to-ffi - let mut llvm_arg_pairs_boxed_slice: Box<[LLVMPair]> = final_llvm_arg_pairs.into_boxed_slice(); - let llvm_arg_pairs_array: *mut LLVMPair = llvm_arg_pairs_boxed_slice.as_mut_ptr(); - let llvm_arg_pairs_array_len: usize = llvm_arg_pairs_boxed_slice.len(); - mem::forget(llvm_arg_pairs_boxed_slice); + let root_of_tree = egg_expr.as_ref().last().expect("There should be a root egg node"); + let immd = &ValueNumberingState { egg_nodes_vector: egg_expr.as_ref() }; + let md = &mut ValueNumberingMutableState { node2value: HashMap::new(), currvalue: 0 }; + let value_numbered_tree = value_number_store_tree(root_of_tree, immd, md); + let veclang2val = &md.node2value; + let mut val2veclang = HashMap::new(); + for (key, val) in veclang2val.iter() { + val2veclang.insert(val, key); + } + if print_opt { + eprintln!("This is the value tree"); + eprintln!("{:?}", value_numbered_tree); + } + if let Some((vectorization, old_veclang2val)) = get_vectorization(value_numbered_tree.clone()) { + let mut old_val2veclang = HashMap::new(); + for (old_k, old_v) in old_veclang2val.iter() { + old_val2veclang.insert(old_v, old_k); + } + let mut oldveclang2newveclang = HashMap::new(); + for (old_val, old_veclang) in old_val2veclang.iter() { + if val2veclang.contains_key(old_val) { + oldveclang2newveclang.insert((**old_veclang).clone(), (**val2veclang.get(old_val).expect("Key must exist")).clone()); + } + } + llvm2egg_metadata.template_enode2actual_enode = oldveclang2newveclang; - // TODO: FIX THIS - return VectorPointerSize { - llvm_pointer: llvm_arg_pairs_array, - llvm_pointer_size: llvm_arg_pairs_array_len, - }; - } + if print_opt { + eprintln!("Current tree matches old tree: Using old vectorization. "); + } + // egg to llvm + egg_to_llvm_main( + vectorization, + &llvm2egg_metadata, + module, + context, + builder, + run_egg, + ); + + return true; + } + + // optimization pass + if print_opt { + eprintln!("{}", egg_expr.pretty(10)); + } + let mut best_egg_expr = egg_expr.clone(); + if run_egg { + let pair = rules::run(&egg_expr, 180, true, !run_egg); + best_egg_expr = pair.1; + } + if print_opt { + eprintln!("{}", best_egg_expr.pretty(10)); + } + + VECTORIZATION_MAP.push((value_numbered_tree, best_egg_expr.clone(), veclang2val.clone())); + // build identity map + let mut oldveclang2newveclang = HashMap::new(); + for (key, _) in veclang2val.iter() { + oldveclang2newveclang.insert((*key).clone(), (*key).clone()); + } + llvm2egg_metadata.template_enode2actual_enode = oldveclang2newveclang; + + // egg to llvm + egg_to_llvm_main( + best_egg_expr, + &llvm2egg_metadata, + module, + context, + builder, + run_egg, + ); + + return true; + } } // ------------ NEW CONVERSION FROM LLVM IR TO EGG EXPRESSIONS ------- -type StoreMap = BTreeMap; -// type gep_map = BTreeMap; -type IdMap = BTreeSet; -type SymbolMap = BTreeMap; - enum LLVMOpType { - Argument, - Constant, - Store, - Load, - Gep, - Unop, - Bop, - Call, - FPTrunc, - SIToFP, - Bitcast, - Sqrt32, - Sqrt64, - FPExt, -} - -// unsafe fn is_pow2(n: u32) -> bool { -// let mut pow = 1; -// while pow < n { -// pow *= 2; -// } -// return pow == n; -// } + Argument, + Constant, + FNeg, + FAdd, + FSub, + FMul, + FDiv, + Sqrt32, + // TODO: SGN signum + UnhandledLLVMOpCode, + Load, + Store, +} unsafe fn get_pow2(n: u32) -> u32 { - let mut pow = 1; - while pow < n { - pow *= 2; - } - return pow; + let mut pow = 1; + while pow < n { + pow *= 2; + } + return pow; +} + +fn is_pow2(n: u32) -> bool { + if n == 1 { + return true; + } else if n % 2 == 1 { + return false; + } + return is_pow2(n / 2); } /// New Pad Vector should round the number of elements up to a power of 2, and then recursive -/// divide each into the lane width. Assumes lane width is also a power of 2 in size +/// divide each into the lane width. Assumes lane width is also a power of 2 in size. +/// Raises assertion error if width is not a power of 2 +/// If the vector has less than the width, we do not pad, and just append that vector to enodevect unsafe fn balanced_pad_vector<'a>( - binop_vec: &mut Vec, - enode_vec: &'a mut Vec, + binop_vec: &mut Vec, + enode_vec: &'a mut Vec, ) -> &'a mut Vec { - let width = config::vector_width(); - let length = binop_vec.len(); - let closest_pow2 = get_pow2(cmp::max(length, width) as u32); - let diff = closest_pow2 - (length as u32); - for _ in 0..diff { - let zero = VecLang::Num(0); - enode_vec.push(zero); - let zero_idx = enode_vec.len() - 1; - binop_vec.push(Id::from(zero_idx)); - } - return build_concat(width, binop_vec, enode_vec); + let width = config::vector_width(); + assert!(is_pow2(width as u32)); + let length = binop_vec.len(); + assert!( + length > 0, + "There must be 1 or more operators to vectorize." + ); + // Check vector less than width, and then return + if length < width { + enode_vec.push(VecLang::Vec(binop_vec.clone().into_boxed_slice())); + return enode_vec; + } + let closest_pow2 = get_pow2(cmp::max(length, width) as u32); + let diff = closest_pow2 - (length as u32); + for _ in 0..diff { + let zero = VecLang::Num(0); + enode_vec.push(zero); + let zero_idx = enode_vec.len() - 1; + binop_vec.push(Id::from(zero_idx)); + } + return build_concat(width, binop_vec, enode_vec); } +/// Recursively concatenate vectors together unsafe fn build_concat<'a>( - lane_width: usize, - binop_vec: &mut Vec, - enode_vec: &'a mut Vec, + lane_width: usize, + binop_vec: &mut Vec, + enode_vec: &'a mut Vec, ) -> &'a mut Vec { - if binop_vec.len() == lane_width { - enode_vec.push(VecLang::Vec(binop_vec.clone().into_boxed_slice())); - return enode_vec; - } - let num_binops = binop_vec.len(); - let halfway = num_binops / 2; - let (mut left, mut right) = (Vec::new(), Vec::new()); - for (i, b) in binop_vec.iter().enumerate() { - if i < halfway { - left.push(*b); - } else { - right.push(*b); + if binop_vec.len() == lane_width { + enode_vec.push(VecLang::Vec(binop_vec.clone().into_boxed_slice())); + return enode_vec; } - } - assert_eq!(left.len(), right.len()); - assert_eq!(left.len() + right.len(), num_binops); - assert_eq!(left.len() % lane_width, 0); - assert_eq!(right.len() % lane_width, 0); - let enode_vec1 = build_concat(lane_width, &mut left, enode_vec); - let idx1 = enode_vec1.len() - 1; - let enode_vec2 = build_concat(lane_width, &mut right, enode_vec1); - let idx2 = enode_vec2.len() - 1; - enode_vec2.push(VecLang::Concat([Id::from(idx1), Id::from(idx2)])); - return enode_vec2; + let num_binops = binop_vec.len(); + let halfway = num_binops / 2; + let (mut left, mut right) = (Vec::new(), Vec::new()); + for (i, b) in binop_vec.iter().enumerate() { + if i < halfway { + left.push(*b); + } else { + right.push(*b); + } + } + assert_eq!(left.len(), right.len()); + assert_eq!(left.len() + right.len(), num_binops); + assert_eq!(left.len() % lane_width, 0); + assert_eq!(right.len() % lane_width, 0); + let enode_vec1 = build_concat(lane_width, &mut left, enode_vec); + let idx1 = enode_vec1.len() - 1; + let enode_vec2 = build_concat(lane_width, &mut right, enode_vec1); + let idx2 = enode_vec2.len() - 1; + enode_vec2.push(VecLang::Concat([Id::from(idx1), Id::from(idx2)])); + return enode_vec2; } unsafe fn _llvm_print(inst: LLVMValueRef) -> () { - LLVMDumpValue(inst); - println!(); + LLVMDumpValue(inst); + println!(); } unsafe fn _llvm_recursive_print(inst: LLVMValueRef) -> () { - if isa_argument(inst) { - return LLVMDumpValue(inst); - } else if isa_constant(inst) { - return LLVMDumpValue(inst); - } - let num_ops = LLVMGetNumOperands(inst); - for i in 0..num_ops { - let operand = LLVMGetOperand(inst, i as u32); - _llvm_recursive_print(operand); - print!(" "); - } - println!(); - LLVMDumpValue(inst); - println!(); - return; -} - -unsafe fn llvm_recursive_add(builder: LLVMBuilderRef, inst: LLVMValueRef) -> LLVMValueRef { - if isa_argument(inst) { - return inst; - } else if isa_constant(inst) { - return inst; - } else if isa_phi(inst) { - return inst; - } else if isa_alloca(inst) { - let cloned_inst = LLVMInstructionClone(inst); - LLVMInsertIntoBuilder(builder, cloned_inst); - return cloned_inst; - } else if isa_call(inst) { - let cloned_inst = LLVMInstructionClone(inst); - LLVMInsertIntoBuilder(builder, cloned_inst); - return cloned_inst; - } - let cloned_inst = LLVMInstructionClone(inst); - let num_ops = LLVMGetNumOperands(inst); - for i in 0..num_ops { - let operand = LLVMGetOperand(inst, i as u32); - let new_inst = llvm_recursive_add(builder, operand); - LLVMSetOperand(cloned_inst, i as u32, new_inst); - } - LLVMInsertIntoBuilder(builder, cloned_inst); - return cloned_inst; -} - -unsafe fn match_llvm_op(expr: &LLVMValueRef) -> LLVMOpType { - if isa_bop(*expr) { - return LLVMOpType::Bop; - } else if isa_unop(*expr) { - return LLVMOpType::Unop; - } else if isa_constant(*expr) { - return LLVMOpType::Constant; - } else if isa_gep(*expr) { - return LLVMOpType::Gep; - } else if isa_load(*expr) { - return LLVMOpType::Load; - } else if isa_store(*expr) { - return LLVMOpType::Store; - } else if isa_argument(*expr) { - return LLVMOpType::Argument; - } else if isa_call(*expr) { - return LLVMOpType::Call; - } else if isa_fptrunc(*expr) { - return LLVMOpType::FPTrunc; - } else if isa_sitofp(*expr) { - return LLVMOpType::SIToFP; - } else if isa_bitcast(*expr) { - return LLVMOpType::Bitcast; - } else if isa_sqrt32(*expr) { - return LLVMOpType::Sqrt32; - } else if isa_sqrt64(*expr) { - return LLVMOpType::Sqrt64; - } else if isa_fpext(*expr) { - return LLVMOpType::FPExt; - } else { - LLVMDumpValue(*expr); + if isa_argument(inst) { + return LLVMDumpValue(inst); + } else if isa_constant(inst) { + return LLVMDumpValue(inst); + } + let num_ops = LLVMGetNumOperands(inst); + for i in 0..num_ops { + let operand = LLVMGetOperand(inst, i as u32); + _llvm_recursive_print(operand); + print!(" "); + } println!(); - panic!("ref_to_egg: Unmatched case for LLVMValueRef {:?}", *expr); - } + LLVMDumpValue(inst); + println!(); + return; +} + +unsafe fn isa_fadd(llvm_instr: LLVMValueRef) -> bool { + match LLVMGetInstructionOpcode(llvm_instr) { + LLVMFAdd => true, + _ => false, + } +} + +unsafe fn isa_fsub(llvm_instr: LLVMValueRef) -> bool { + match LLVMGetInstructionOpcode(llvm_instr) { + LLVMFSub => true, + _ => false, + } +} + +unsafe fn isa_fmul(llvm_instr: LLVMValueRef) -> bool { + match LLVMGetInstructionOpcode(llvm_instr) { + LLVMFMul => true, + _ => false, + } +} + +unsafe fn isa_fdiv(llvm_instr: LLVMValueRef) -> bool { + match LLVMGetInstructionOpcode(llvm_instr) { + LLVMFDiv => true, + _ => false, + } +} + +unsafe fn isa_fneg(llvm_instr: LLVMValueRef) -> bool { + match LLVMGetInstructionOpcode(llvm_instr) { + LLVMFNeg => true, + _ => false, + } +} + +unsafe fn isa_supported_binop(llvm_instr: LLVMValueRef) -> bool { + return isa_fadd(llvm_instr) + || isa_fmul(llvm_instr) + || isa_fdiv(llvm_instr) + || isa_fsub(llvm_instr); +} + +unsafe fn isa_supported_unop(llvm_instr: LLVMValueRef) -> bool { + return isa_fneg(llvm_instr); +} + +unsafe fn match_llvm_op(llvm_instr: &LLVMValueRef) -> LLVMOpType { + if isa_argument(*llvm_instr) { + return LLVMOpType::Argument; + } else if isa_fadd(*llvm_instr) { + return LLVMOpType::FAdd; + } else if isa_fsub(*llvm_instr) { + return LLVMOpType::FSub; + } else if isa_fmul(*llvm_instr) { + return LLVMOpType::FMul; + } else if isa_fdiv(*llvm_instr) { + return LLVMOpType::FDiv; + } else if isa_fneg(*llvm_instr) { + return LLVMOpType::FNeg; + } else if isa_constant(*llvm_instr) { + return LLVMOpType::Constant; + } else if isa_sqrt32(*llvm_instr) { + return LLVMOpType::Sqrt32; + } else if isa_load(*llvm_instr) { + return LLVMOpType::Load; + } else if isa_store(*llvm_instr) { + return LLVMOpType::Store; + } else { + return LLVMOpType::UnhandledLLVMOpCode; + } } unsafe fn choose_unop(unop: &LLVMValueRef, id: Id) -> VecLang { - match LLVMGetInstructionOpcode(*unop) { - LLVMFNeg => VecLang::Neg([id]), - _ => panic!("Choose_Unop: Opcode Match Error"), - } + match LLVMGetInstructionOpcode(*unop) { + LLVMFNeg => VecLang::Neg([id]), + _ => panic!("Choose_Unop: Opcode Match Error"), + } +} + +/// LLVM2EggState Contains Egg to LLVM Translation Metadata +#[derive(Debug, Clone)] +struct LLVM2EggState { + llvm2reg: BTreeMap, + llvm2arg: BTreeMap, + get2gep: BTreeMap, + instructions_in_chunk: BTreeSet, + restricted_instructions: BTreeSet, + prior_translated_instructions: BTreeSet, + start_instructions: Vec, + start_ids: Vec, + load_info: BTreeMap, + template_enode2actual_enode: HashMap, } +/// Translates LLVM Arg to an Egg Argument Node unsafe fn arg_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - _gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &Vec, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - let sym_name = gen_arg_name(); - let symbol = VecLang::Symbol(Symbol::from(sym_name)); - symbol_map.insert(symbol.clone(), expr); - enode_vec.push(symbol); - return (enode_vec, next_idx + 1); + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, +) -> (Vec, u32) { + assert!(isa_argument(llvm_instr)); + let argument_idx = gen_arg_idx(); + let argument_node = VecLang::Arg(argument_idx); + egg_nodes.push(argument_node.clone()); + assert!(!translation_metadata.llvm2arg.contains_key(&llvm_instr)); + translation_metadata + .llvm2arg + .insert(llvm_instr, argument_node); + return (egg_nodes, next_node_idx + 1); } +/// Translates Supported Binop Instruction to an Egg Bunary Operator Node +/// +/// Supported Binary Operators are: FAdd, FSub, FMul, FDiv unsafe fn bop_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - let left = LLVMGetOperand(expr, 0); - let right = LLVMGetOperand(expr, 1); - let (v1, next_idx1) = ref_to_egg( - left, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - let (mut v2, next_idx2) = ref_to_egg( - right, - v1, - next_idx1, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - // let mut concat = [&v1[..], &v2[..]].concat(); // https://users.rust-lang.org/t/how-to-concatenate-two-vectors/8324/3 - let ids = [ - Id::from((next_idx1 - 1) as usize), - Id::from((next_idx2 - 1) as usize), - ]; - v2.push(choose_binop(&expr, ids)); - (v2, next_idx2 + 1) + llvm_instr: LLVMValueRef, + egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, +) -> (Vec, u32) { + assert!(isa_supported_binop(llvm_instr)); + let left = LLVMGetOperand(llvm_instr, 0); + let right = LLVMGetOperand(llvm_instr, 1); + let (left_egg_nodes, left_next_idx) = + llvm_to_egg(left, egg_nodes, next_node_idx, translation_metadata); + let (mut right_egg_nodes, right_next_idx) = + llvm_to_egg(right, left_egg_nodes, left_next_idx, translation_metadata); + let ids = [ + Id::from((left_next_idx - 1) as usize), + Id::from((right_next_idx - 1) as usize), + ]; + right_egg_nodes.push(choose_binop(&llvm_instr, ids)); + (right_egg_nodes, right_next_idx + 1) } +/// Translates Supported Unop Instruction to an Egg Unary Operator Node +/// +/// Supported Unary Operators are: FNeg unsafe fn unop_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - let sub_expr = LLVMGetOperand(expr, 0); - let (mut v, next_idx1) = ref_to_egg( - sub_expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - let id = Id::from((next_idx1 - 1) as usize); - v.push(choose_unop(&expr, id)); - (v, next_idx1 + 1) -} - -unsafe fn gep_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &Vec, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - assert!(isa_argument(expr) || isa_gep(expr) || isa_load(expr)); - // let mut enode_vec = Vec::new(); - let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); - enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); - - let num_gep_operands = LLVMGetNumOperands(expr); - let mut indices = Vec::new(); - for operand_idx in 1..num_gep_operands { - let array_offset = llvm_index(expr, operand_idx); - indices.push(array_offset); - } - let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); - let offsets_symbol = Symbol::from(&offsets_string); - enode_vec.push(VecLang::Symbol(offsets_symbol)); - - let get_node = VecLang::Get([ - Id::from((next_idx) as usize), - Id::from((next_idx + 1) as usize), - ]); - (*gep_map).insert( - (Symbol::from(array_name), Symbol::from(&offsets_string)), - expr, - ); - enode_vec.push(get_node); - - return (enode_vec, next_idx + 3); -} - -unsafe fn address_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &Vec, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); - enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); - - let num_gep_operands = LLVMGetNumOperands(expr); - let mut indices = Vec::new(); - for operand_idx in 1..num_gep_operands { - let array_offset = llvm_index(expr, operand_idx); - indices.push(array_offset); - } - let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); - let offsets_symbol = Symbol::from(&offsets_string); - enode_vec.push(VecLang::Symbol(offsets_symbol)); - - let get_node = VecLang::Get([ - Id::from((next_idx) as usize), - Id::from((next_idx + 1) as usize), - ]); - (*gep_map).insert( - (Symbol::from(array_name), Symbol::from(&offsets_string)), - expr, - ); - enode_vec.push(get_node); - - return (enode_vec, next_idx + 3); -} - -unsafe fn sitofp_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &Vec, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); - enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); - - let num_gep_operands = LLVMGetNumOperands(expr); - let mut indices = Vec::new(); - for operand_idx in 1..num_gep_operands { - let array_offset = llvm_index(expr, operand_idx); - indices.push(array_offset); - } - let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); - let offsets_symbol = Symbol::from(&offsets_string); - enode_vec.push(VecLang::Symbol(offsets_symbol)); + llvm_instr: LLVMValueRef, + egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, +) -> (Vec, u32) { + assert!(isa_supported_unop(llvm_instr)); + let neg_expr = LLVMGetOperand(llvm_instr, 0); + let (mut new_egg_nodes, new_next_idx) = + llvm_to_egg(neg_expr, egg_nodes, next_node_idx, translation_metadata); + let id = Id::from((new_next_idx - 1) as usize); + new_egg_nodes.push(choose_unop(&llvm_instr, id)); + (new_egg_nodes, new_next_idx + 1) +} - let get_node = VecLang::Get([ - Id::from((next_idx) as usize), - Id::from((next_idx + 1) as usize), - ]); - (*gep_map).insert( - (Symbol::from(array_name), Symbol::from(&offsets_string)), - expr, - ); - enode_vec.push(get_node); +/// Translates Const Instruction to an Egg Number Node +unsafe fn const_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: u32, + _translation_metadata: &mut LLVM2EggState, +) -> (Vec, u32) { + assert!(isa_constant(llvm_instr)); + let value = get_constant_float(llvm_instr); + egg_nodes.push(VecLang::Num(value as i32)); + (egg_nodes, next_node_idx + 1) +} - return (enode_vec, next_idx + 3); +/// Translates Sqrt 32 Instruction to an Egg Square Root Node +unsafe fn sqrt32_to_egg( + llvm_instr: LLVMValueRef, + egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, +) -> (Vec, u32) { + assert!(isa_sqrt32(llvm_instr)); + let sqrt_operand = LLVMGetOperand(llvm_instr, 0); + let (mut new_enode_vec, new_next_node_idx) = + llvm_to_egg(sqrt_operand, egg_nodes, next_node_idx, translation_metadata); + let sqrt_node = VecLang::Sqrt([Id::from((new_next_node_idx - 1) as usize)]); + new_enode_vec.push(sqrt_node); + (new_enode_vec, new_next_node_idx + 1) } +/// Translates a Load to an Egg Get Node +/// +/// The translation of a load is a Get Node, which can then possibly be vectorized +/// Adds the gep address of the load to the translation metadata so that it can +/// be referenced when translating from Egg to LLVM +/// +/// Fails if the llvm instruction under translation is not a load unsafe fn load_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - let addr = LLVMGetOperand(expr, 0); - if isa_argument(addr) { - return load_arg_to_egg( - addr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - } else if isa_gep(addr) { - return gep_to_egg( - expr, // we pass the entire instruction and not just the address - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - } else { - return address_to_egg( - addr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - } + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, +) -> (Vec, u32) { + assert!(isa_load(llvm_instr)); + let gep_id = gen_get_idx(); + let gep_node = VecLang::Gep(gep_id); + egg_nodes.push(gep_node.clone()); + let llvm_gep_instr = LLVMGetOperand(llvm_instr, 0); + // assert!(isa_gep(llvm_gep_instr) || isa_argument(llvm_gep_instr)); + translation_metadata.get2gep.insert(gep_id, llvm_gep_instr); + + let result = translation_metadata.load_info.get(&llvm_instr); + let (base_id, offset) = match result { + None => (-1, -1), + Some(n) => *n, + }; + let base_node = VecLang::Num(base_id); + egg_nodes.push(base_node.clone()); + let offset_node = VecLang::Num(offset); + egg_nodes.push(offset_node.clone()); + let load_node = VecLang::Load([ + Id::from(next_node_idx as usize), + Id::from((next_node_idx + 1) as usize), + Id::from((next_node_idx + 2) as usize), + ]); + egg_nodes.push(load_node.clone()); + assert!(!translation_metadata.llvm2reg.contains_key(&llvm_instr)); + translation_metadata.llvm2reg.insert(llvm_instr, load_node); + (egg_nodes, next_node_idx + 4) } unsafe fn store_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - let data = LLVMGetOperand(expr, 0); - let addr = LLVMGetOperand(expr, 1); // expected to be a gep operator or addr in LLVM - let (vec, next_idx1) = ref_to_egg( - data, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - (*store_map).insert(next_idx1 - 1, addr); - (*id_map).insert(Id::from((next_idx1 - 1) as usize)); - return (vec, next_idx1); + llvm_instr: LLVMValueRef, + egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, +) -> (Vec, u32) { + assert!(isa_store(llvm_instr)); + let llvm_val_instr = LLVMGetOperand(llvm_instr, 0); + let llvm_gep_instr = LLVMGetOperand(llvm_instr, 1); + let (mut new_egg_nodes, new_next_idx) = llvm_to_egg( + llvm_val_instr, + egg_nodes, + next_node_idx, + translation_metadata, + ); + + let gep_id = gen_get_idx(); + let gep_node = VecLang::Gep(gep_id); + new_egg_nodes.push(gep_node.clone()); + translation_metadata.get2gep.insert(gep_id, llvm_gep_instr); + + let store_node = VecLang::Store([ + Id::from((new_next_idx - 1) as usize), // reference to the recursive translation + Id::from(new_next_idx as usize), // reference to a GEP node + ]); + new_egg_nodes.push(store_node.clone()); + assert!(!translation_metadata.llvm2reg.contains_key(&llvm_instr)); + translation_metadata.llvm2reg.insert(llvm_instr, store_node); + + (new_egg_nodes, new_next_idx + 2) // Add 2 because we built a gep, then also a store node } -unsafe fn const_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - _gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &Vec, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - let value = get_constant_float(expr); - enode_vec.push(VecLang::Num(value as i32)); - (enode_vec, next_idx + 1) -} - -unsafe fn load_arg_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &Vec, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - assert!(isa_argument(expr) || isa_gep(expr)); - let array_name = CStr::from_ptr(llvm_name(expr)).to_str().unwrap(); - enode_vec.push(VecLang::Symbol(Symbol::from(array_name))); - - let num_gep_operands = LLVMGetNumOperands(expr); - let mut indices = Vec::new(); - for operand_idx in 1..num_gep_operands { - let array_offset = llvm_index(expr, operand_idx); - indices.push(array_offset); - } - let offsets_string: String = indices.into_iter().map(|i| i.to_string() + ",").collect(); - let offsets_symbol = Symbol::from(&offsets_string); - enode_vec.push(VecLang::Symbol(offsets_symbol)); - - let get_node = VecLang::Get([ - Id::from((next_idx) as usize), - Id::from((next_idx + 1) as usize), - ]); - (*gep_map).insert( - (Symbol::from(array_name), Symbol::from(&offsets_string)), - expr, - ); - enode_vec.push(get_node); - - return (enode_vec, next_idx + 3); -} - -unsafe fn load_call_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - if isa_sqrt32(expr) { - return sqrt32_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, +/// Translates an Unhandled OpCode to an Egg Register. +/// +/// This represents a blackbox computation that we bail on translating +/// Assumes that the OpCode is actually a computation. If not, translation fails. +unsafe fn unhandled_opcode_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, +) -> (Vec, u32) { + let register_idx = gen_reg_idx(); + let register_node = VecLang::Reg(register_idx); + egg_nodes.push(register_node.clone()); + assert!(!translation_metadata.llvm2reg.contains_key(&llvm_instr)); + translation_metadata + .llvm2reg + .insert(llvm_instr, register_node); + (egg_nodes, next_node_idx + 1) +} + +/// Recursively Translate LLVM Instruction to Egg Nodes. +/// +/// TODO: Take care of chunk boundaries: translation should never overreach a chunk +/// TODO: May need to keep track of llvm instructions across chunks +unsafe fn llvm_to_egg( + llvm_instr: LLVMValueRef, + mut egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, +) -> (Vec, u32) { + // Mark instruction as translated, as it will be after it goes through the code below + if !translation_metadata + .prior_translated_instructions + .contains(&llvm_instr) + { + translation_metadata + .prior_translated_instructions + .insert(llvm_instr); + } + // If, on a different pass, the instruction was translated already, then + // just used the egg node representing the translation + if translation_metadata.llvm2reg.contains_key(&llvm_instr) { + let translated_egg_node = translation_metadata + .llvm2reg + .get(&llvm_instr) + .expect("Key must exist"); + egg_nodes.push(translated_egg_node.clone()); + return (egg_nodes, next_node_idx + 1); + } + // If the current llvm instruction is a "restricted" instruction, do not translate, but make it a register + if translation_metadata + .restricted_instructions + .contains(&llvm_instr) + { + return unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); + } + // If the current llvm instruction is not in the current chunk, we must return a register + // The current llvm instruction must not be a arguments, because arguments will be outside every chunk + if !translation_metadata + .instructions_in_chunk + .contains(&llvm_instr) + && !isa_argument(llvm_instr) + { + return unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); + } + // Recurse Backwards on the current instruction, translating its children, + // based on the opcode of the parent. + return match match_llvm_op(&llvm_instr) { + LLVMOpType::FAdd | LLVMOpType::FSub | LLVMOpType::FMul | LLVMOpType::FDiv => { + bop_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + LLVMOpType::FNeg => unop_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata), + LLVMOpType::Constant => { + const_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + LLVMOpType::Argument => { + arg_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + LLVMOpType::Sqrt32 => { + sqrt32_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + LLVMOpType::Load => load_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata), + LLVMOpType::Store => { + store_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + LLVMOpType::UnhandledLLVMOpCode => { + unhandled_opcode_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata) + } + }; +} + +unsafe fn start_translating_llvm_to_egg( + llvm_instr: LLVMValueRef, + egg_nodes: Vec, + next_node_idx: u32, + translation_metadata: &mut LLVM2EggState, +) -> (Vec, u32) { + translation_metadata.start_instructions.push(llvm_instr); + let pair_result = llvm_to_egg(llvm_instr, egg_nodes, next_node_idx, translation_metadata); + translation_metadata + .start_ids + .push(Id::from((pair_result.1 - 1) as usize)); + pair_result +} + +unsafe fn can_start_translation_instr(llvm_instr: LLVMValueRef) -> bool { + return match match_llvm_op(&llvm_instr) { + LLVMOpType::Store => true, + LLVMOpType::FAdd + | LLVMOpType::FMul + | LLVMOpType::FDiv + | LLVMOpType::FSub + | LLVMOpType::FNeg + | LLVMOpType::Constant + | LLVMOpType::Sqrt32 + | LLVMOpType::Load + | LLVMOpType::Argument + | LLVMOpType::UnhandledLLVMOpCode => false, + }; +} + +unsafe fn llvm_to_egg_main( + llvm_instrs_in_chunk: &[LLVMValueRef], + restricted_instrs: &[LLVMValueRef], + vectorize: bool, + load_info: &[load_info_t], + // TODO: feed this in as an argument llvm_instr2egg_node: BTreeMap, +) -> (RecExpr, LLVM2EggState) { + let mut egg_nodes: Vec = Vec::new(); + + // Map from (translated / opaque) llvm instructions to register egg graph nodes + let llvm_instr2reg_node: BTreeMap = BTreeMap::new(); + // Map from (translated) llvm instructions to argument egg graph nodes + let llvm_instr2arg_node: BTreeMap = BTreeMap::new(); + + // Map from (translated) Egg get ID to an original LLVM get node + let getid2gep: BTreeMap = BTreeMap::new(); + + // Ordered Vector of Starting LLVM instructions where translation began + let start_instructions: Vec = Vec::new(); + + // Ordered Set of Instructions in Chunk + let mut instructions_in_chunk: BTreeSet = BTreeSet::new(); + for llvm_instr in llvm_instrs_in_chunk.iter() { + instructions_in_chunk.insert(*llvm_instr); + } + + // Ordered Set of Ids + let start_ids: Vec = Vec::new(); + + // Ordered Set of Instructions NOT TO BE Translated, except as registers + let mut restricted_instrs_set: BTreeSet = BTreeSet::new(); + for llvm_instr in restricted_instrs.iter() { + restricted_instrs_set.insert(*llvm_instr); + } + + // Load Info map + let mut load_info_map: BTreeMap = BTreeMap::new(); + for triple in load_info.iter() { + load_info_map.insert(triple.load, (triple.base_id, triple.offset)); + } + + // Invariant: every restricted instruction is in the chunk, using a pointer check + for restr_instr in restricted_instrs.iter() { + let mut found_match = false; + for instr in instructions_in_chunk.iter() { + if cmp_val_ref_address(&**restr_instr, &**instr) { + found_match = true; + break; + } + } + if found_match { + continue; + } + } + // Invariant: chunk instructions are not empty in size + assert!(!instructions_in_chunk.is_empty()); + + let prior_translated_instructions: BTreeSet = BTreeSet::new(); + + let template_enode2actual_enode = HashMap::new(); + + // State Variable To Hold Maps During Translation + let mut translation_metadata = LLVM2EggState { + llvm2reg: llvm_instr2reg_node, + llvm2arg: llvm_instr2arg_node, + get2gep: getid2gep, + instructions_in_chunk: instructions_in_chunk, + restricted_instructions: restricted_instrs_set, + prior_translated_instructions: prior_translated_instructions, + start_instructions: start_instructions, + start_ids: start_ids, + load_info: load_info_map, + template_enode2actual_enode: template_enode2actual_enode, + }; + + // Index of next node to translate + let mut next_node_idx: u32 = 0; + + // for each final instruction, iterate backwards from that final instruction and translate to egg + for llvm_instr in llvm_instrs_in_chunk.iter() { + // only start translation back if it is a "translatable instruction" and it was not translated already + if can_start_translation_instr(*llvm_instr) // TODO: Need to DFS back from this instruction and make sure invariants for translation hold, e.g. no bitcasts somewhere down the translation tree. + && !translation_metadata + .prior_translated_instructions + .contains(&llvm_instr) + { + let (new_egg_nodes, new_next_node_idx) = start_translating_llvm_to_egg( + *llvm_instr, + egg_nodes, + next_node_idx, + &mut translation_metadata, + ); + egg_nodes = new_egg_nodes; + next_node_idx = new_next_node_idx; + } + } + + // For testing purposes: Handle no vectorization + if !vectorize { + let mut outer_vec_ids = Vec::new(); + for id in translation_metadata.start_ids.iter() { + outer_vec_ids.push(*id); + } + egg_nodes.push(VecLang::NoOptVec(outer_vec_ids.clone().into_boxed_slice())); + let rec_expr = RecExpr::from(egg_nodes); + return (rec_expr, translation_metadata); + } + + // Generate a padded vector + let mut outer_vec_ids = Vec::new(); + for id in translation_metadata.start_ids.iter() { + outer_vec_ids.push(*id); + } + balanced_pad_vector(&mut outer_vec_ids, &mut egg_nodes); + + let rec_expr = RecExpr::from(egg_nodes); + + return (rec_expr, translation_metadata); +} + +/// Egg2LLVMState represent the state needed to translate from Egg to LLVM +struct Egg2LLVMState<'a> { + llvm2egg_metadata: LLVM2EggState, + egg_nodes_vector: &'a [VecLang], + prior_translated_nodes: BTreeSet, + builder: LLVMBuilderRef, + context: LLVMContextRef, + module: LLVMModuleRef, +} + +/// Translates a Gep node to an ID that the node holds. This ID is matche dto +/// a gep instruction in the get2gep map +/// +/// Used in conjunction with Load to LLVM and VecLoad to LLVM +unsafe fn gep_to_llvm(original_egg_node: &VecLang, md: &mut Egg2LLVMState) -> LLVMValueRef { + let egg_node = md.llvm2egg_metadata.template_enode2actual_enode.get(original_egg_node).expect("Reg to LLVM expects egg node in template2actual map."); + let new_gep_id = match *egg_node { + VecLang::Gep(gep_id) => gep_id, + _ => { + println!("{:?}", *egg_node); + panic!("Non Gep nodes cannot be translated in gep_to_llvm.") + } + }; + let gep_instr = *md + .llvm2egg_metadata + .get2gep + .get(&new_gep_id) + .expect("Value of gep1 id should exist in get2gep"); + assert!(isa_floatptr(gep_instr)); + gep_instr +} + +/// Translates a Load Egg Node back to an LLVM Load INstruction +/// +/// Assumes that every load is implicitly from a Float * / Single Level Float Pointer +unsafe fn load_to_llvm(gep_id: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { + let gep_instr = egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep_id)], md); + let new_load_instr = LLVMBuildLoad(md.builder, gep_instr, b"\0".as_ptr() as *const _); + return new_load_instr; +} + +unsafe fn store_to_llvm(val_id: &Id, gep_id: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { + let llvm_val_instr = egg_to_llvm(&md.egg_nodes_vector[usize::from(*val_id)], md); + let gep_instr = egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep_id)], md); + let new_store_instr = LLVMBuildStore(md.builder, llvm_val_instr, gep_instr); + return new_store_instr; +} + +unsafe fn aligned_consec_loadvec_to_llvm( + gep1_id: &Id, + load_vector_width: u32, + md: &mut Egg2LLVMState, +) -> LLVMValueRef { + // New code to handle an aligned and consecutive vector load + let gep1_llvm_instr = egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); + let address_space = LLVMGetPointerAddressSpace(LLVMTypeOf(gep1_llvm_instr)); + let bitcase_scalar_to_vector_type = LLVMBuildBitCast( + md.builder, + gep1_llvm_instr, + LLVMPointerType( + LLVMVectorType(LLVMFloatTypeInContext(md.context), load_vector_width), + address_space, + ), + b"scalar-to-vector-type-bit-cast\0".as_ptr() as *const _, ); - } - let call_sym_name = gen_call_name(); - let call_sym = VecLang::Symbol(Symbol::from(call_sym_name)); - symbol_map.insert(call_sym.clone(), expr); - enode_vec.push(call_sym); - return (enode_vec, next_idx + 1); -} - -unsafe fn fpext_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - assert!(isa_fpext(expr)); - let operand = LLVMGetOperand(expr, 0); - return ref_to_egg( - operand, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); + let load = LLVMBuildLoad( + md.builder, + bitcase_scalar_to_vector_type, + b"vector-load\0".as_ptr() as *const _, + ); + return load; } -unsafe fn sqrt32_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - assert!(isa_sqrt32(expr)); - let operand = LLVMGetOperand(expr, 0); - let (mut new_enode_vec, next_idx1) = ref_to_egg( - operand, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - let sqrt_node = VecLang::Sqrt([Id::from((next_idx1 - 1) as usize)]); - new_enode_vec.push(sqrt_node); - return (new_enode_vec, next_idx1 + 1); -} - -unsafe fn sqrt64_to_egg( - expr: LLVMValueRef, - _enode_vec: Vec, - _next_idx: i32, - _gep_map: &mut GEPMap, - _store_map: &mut StoreMap, - _id_map: &mut IdMap, - _symbol_map: &mut SymbolMap, - _llvm_arg_pairs: &Vec, - _node_to_arg: &mut Vec, -) -> (Vec, i32) { - assert!(isa_sqrt64(expr)); - panic!("Currently, we do not handle calls to sqrt.f64 without fpext and fptrunc before and after!. This is the only 'context sensitive' instance in the dispatch matching. ") -} - -unsafe fn fptrunc_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - assert!(isa_fptrunc(expr)); - let operand = LLVMGetOperand(expr, 0); - if isa_sqrt64(operand) { - return sqrt64_to_egg( - operand, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, +unsafe fn loadvec_to_llvm( + gep1_id: &Id, + gep2_id: &Id, + gep3_id: &Id, + gep4_id: &Id, + base_ids_vec: &Id, + offsets_id_vec: &Id, + md: &mut Egg2LLVMState, +) -> LLVMValueRef { + // Set Opaque Pointer ness + let mut base_data = get_shuf_vec_data(base_ids_vec, md); + let mut offsets_data = get_shuf_vec_data(offsets_id_vec, md); + + base_data.dedup(); + offsets_data.dedup(); + + let gep1_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); + let gep2_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep2_id)], md); + let gep3_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep3_id)], md); + let gep4_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep4_id)], md); + + // special case: when all the bases and offsets are the same, do a shuffle, whcih can represent a splat. Splats are fast operations + // Build up shuf mask + if base_data.len() == 1 && offsets_data.len() == 1 { + let mut mask = Vec::new(); + for _ in 0..4 { + mask.push(LLVMConstInt( + LLVMInt32TypeInContext(md.context), + 0 as u64, + 0 as i32, + )); + } + + let single_load = LLVMBuildLoad( + md.builder, + *gep1_llvm_instr, + b"splat-load\0".as_ptr() as *const _, + ); + + let mut zeros = Vec::new(); + for _ in 0..4 { + zeros.push(LLVMConstReal(LLVMFloatTypeInContext(md.context), 0 as f64)); + } + let zeros_ptr = zeros.as_mut_ptr(); + let mut zero_vector = LLVMConstVector(zeros_ptr, 4); + for (idx, llvm_instr) in vec![single_load].iter().enumerate() { + // Construct the Vector + zero_vector = LLVMBuildInsertElement( + md.builder, + zero_vector, + *llvm_instr, + LLVMConstInt(LLVMIntTypeInContext(md.context, 32), idx as u64, 0), + b"\0".as_ptr() as *const _, + ); + } + + let shuf_mask = LLVMConstVector(mask.as_mut_ptr(), vector_width() as u32); + let shuffle_vec = LLVMBuildShuffleVector( + md.builder, + zero_vector, + zero_vector, + shuf_mask, + b"\0".as_ptr() as *const _, + ); + return shuffle_vec; + } + + let vector_width = 4; + let floatptr_type = LLVMTypeOf(*gep1_llvm_instr); + let vec4ptr_type = LLVMVectorType(floatptr_type, vector_width); + let vec4f_type = LLVMVectorType(LLVMFloatTypeInContext(md.context), vector_width); + let vec4b_type = LLVMVectorType(LLVMInt1TypeInContext(md.context), vector_width); + let int_type = LLVMIntTypeInContext(md.context, 32); + + // Parameter Types are:: vector of pointers, offset int, mask vector booleans and pass through vector + // Pasthru is poison according to LLVM + let param_types = [vec4ptr_type, int_type, vec4b_type, vec4f_type].as_mut_ptr(); + // Output type is a 4 length vector + let fn_type = LLVMFunctionType(vec4f_type, param_types, 4, 0 as i32); + // Build the Vector Load Intrinsic + let func_name = &GATHER; + let llvm_masked_gather_func = get_func_llvm_value(&func_name); + + let func = match llvm_masked_gather_func { + Some(value) => value, + None => { + let new_func = LLVMAddFunction( + md.module, + b"llvm.masked.gather.v4f32.v4p0f32\0".as_ptr() as *const _, + fn_type, + ); + FUNC_NAME2LLVM_FUNC.push((&func_name, new_func)); + new_func + } + }; + + // Build Arguments + + let mut zeros = Vec::new(); + for _ in 0..4 { + zeros.push(LLVMConstReal(LLVMFloatTypeInContext(md.context), 0 as f64)); + } + let zeros_ptr = zeros.as_mut_ptr(); + let zero_vector = LLVMConstVector(zeros_ptr, 4); + + let pointer_to_int_value = LLVMBuildPtrToInt( + md.builder, + LLVMConstInt(LLVMIntTypeInContext(md.context, 32), 0 as u64, 0), + LLVMIntTypeInContext(md.context, 32), + b"pointer-to-int\0".as_ptr() as *const _, ); - } - return ref_to_egg( - operand, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - // panic!("TODO: Currently, only square roots for f64 are supported after fptrunc. "); -} - -unsafe fn bitcast_to_egg( - expr: LLVMValueRef, - enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - assert!(isa_bitcast(expr)); - let operand = LLVMGetOperand(expr, 0); - let result = ref_to_egg( - operand, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - return result; -} - -unsafe fn ref_to_egg( - expr: LLVMValueRef, - mut enode_vec: Vec, - next_idx: i32, - gep_map: &mut GEPMap, - store_map: &mut StoreMap, - id_map: &mut IdMap, - symbol_map: &mut SymbolMap, - llvm_arg_pairs: &Vec, - node_to_arg: &mut Vec, -) -> (Vec, i32) { - for pair in llvm_arg_pairs { - if pair.original_value == expr { - // Here we create a new numbered variable node - let var_idx = gen_node_idx(); - let var_idx_str = var_idx.to_string(); - let special_var_node = VecLang::Symbol(Symbol::from(var_idx_str)); - enode_vec.push(special_var_node); - let node_to_arg_pair = IntLLVMPair { - arg: expr, - node_int: var_idx, - }; - node_to_arg.push(node_to_arg_pair); - return (enode_vec, next_idx + 1); + let pointer_to_float_value = LLVMBuildBitCast( + md.builder, + pointer_to_int_value, + generate_opaque_pointer(LLVMFloatTypeInContext(md.context)), + b"pointer-to-float-bit-cast\0".as_ptr() as *const _, + ); + let mut pointer_to_floats = Vec::new(); + for _ in 0..4 { + pointer_to_floats.push(pointer_to_float_value); } - } - let (vec, next_idx) = match match_llvm_op(&expr) { - LLVMOpType::Bop => bop_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Unop => unop_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Constant => const_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Gep => gep_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Load => load_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Store => store_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Argument => arg_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Call => load_call_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::FPTrunc => fptrunc_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::FPExt => fpext_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::SIToFP => sitofp_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Bitcast => bitcast_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Sqrt32 => sqrt32_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - LLVMOpType::Sqrt64 => sqrt64_to_egg( - expr, - enode_vec, - next_idx, - gep_map, - store_map, - id_map, - symbol_map, - llvm_arg_pairs, - node_to_arg, - ), - }; - return (vec, next_idx); -} - -unsafe fn llvm_to_egg<'a>( - bb_vec: &[LLVMValueRef], - llvm_arg_pairs: &Vec, - node_to_arg: &mut Vec, -) -> (RecExpr, GEPMap, StoreMap, SymbolMap) { - let mut enode_vec = Vec::new(); - let (mut gep_map, mut store_map, mut id_map, mut symbol_map) = ( - BTreeMap::new(), - BTreeMap::new(), - BTreeSet::new(), - BTreeMap::new(), - ); - let mut next_idx = 0; - for bop in bb_vec.iter() { - if isa_store(*bop) { - let (new_enode_vec, next_idx1) = ref_to_egg( - *bop, - enode_vec, - next_idx, - &mut gep_map, - &mut store_map, - &mut id_map, - &mut symbol_map, - llvm_arg_pairs, - node_to_arg, - ); - next_idx = next_idx1; - enode_vec = new_enode_vec; + let pointer_to_floats_ptr = pointer_to_floats.as_mut_ptr(); + let mut pointer_vector = LLVMConstVector(pointer_to_floats_ptr, 4); + + let llvm_ptrs = vec![ + *gep1_llvm_instr, + *gep2_llvm_instr, + *gep3_llvm_instr, + *gep4_llvm_instr, + ]; + for idx in 0..4 { + // Grow the Vector + pointer_vector = LLVMBuildInsertElement( + md.builder, + pointer_vector, + *llvm_ptrs.get(idx).expect("Index must be in vector"), + LLVMConstInt(LLVMIntTypeInContext(md.context, 32), idx as u64, 0), + b"\0".as_ptr() as *const _, + ); } - } - let mut final_vec = Vec::new(); - for id in id_map.iter() { - final_vec.push(*id); - } - balanced_pad_vector(&mut final_vec, &mut enode_vec); - - let rec_expr = RecExpr::from(enode_vec); - (rec_expr, gep_map, store_map, symbol_map) -} - -unsafe fn translate_egg( - enode: &VecLang, - vec: &[VecLang], - gep_map: &GEPMap, - store_map: &StoreMap, - symbol_map: &SymbolMap, - llvm_arg_pairs: &mut Vec, - node_to_arg_pair: &Vec, - builder: LLVMBuilderRef, - context: LLVMContextRef, - module: LLVMModuleRef, + + let offset = LLVMConstInt(LLVMIntTypeInContext(md.context, 32), 0 as u64, 0); + + let mut mask_values = vec![ + LLVMConstInt(LLVMIntTypeInContext(md.context, 1), 1 as u64, 0), + LLVMConstInt(LLVMIntTypeInContext(md.context, 1), 1 as u64, 0), + LLVMConstInt(LLVMIntTypeInContext(md.context, 1), 1 as u64, 0), + LLVMConstInt(LLVMIntTypeInContext(md.context, 1), 1 as u64, 0), + ]; + let ptr_to_mask_values = mask_values.as_mut_ptr(); + let mask_vector = LLVMConstVector(ptr_to_mask_values, 4); + + let args = [pointer_vector, offset, mask_vector, zero_vector].as_mut_ptr(); + LLVMBuildCall(md.builder, func, args, 4, b"\0".as_ptr() as *const _) +} + +unsafe fn storevec_to_llvm( + val_vec_id: &Id, + gep1_id: &Id, + _gep2_id: &Id, + _gep3_id: &Id, + _gep4_id: &Id, + md: &mut Egg2LLVMState, ) -> LLVMValueRef { - let instr = match enode { - VecLang::Symbol(symbol) => { - match symbol_map.get(enode) { - Some(llvm_instr) => *llvm_instr, - None => { - let mut matched = false; - let mut ret_value = LLVMBuildAdd( - builder, - LLVMConstReal(LLVMFloatTypeInContext(context), 0 as f64), - LLVMConstReal(LLVMFloatTypeInContext(context), 0 as f64), - b"nop\0".as_ptr() as *const _, - ); - for node_arg_pair in node_to_arg_pair { - let llvm_node = node_arg_pair.arg; - let node_index = node_arg_pair.node_int; - let string_node_index = node_index.to_string(); - if string_node_index.parse::().unwrap() == *symbol { - for llvm_pair in &mut *llvm_arg_pairs { - let original_llvm = llvm_pair.original_value; - let new_llvm = llvm_pair.new_value; - if original_llvm == llvm_node { - matched = true; - ret_value = new_llvm; - break; - } - } + // Recursively translate val_vec_id to an LLVM Vector Instr + let llvm_val_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*val_vec_id)], md); + + let gep1_llvm_instr = &egg_to_llvm(&md.egg_nodes_vector[usize::from(*gep1_id)], md); + + // New code to handle a vector store + // Currently, this is the only type of store that can be generated because stores are not split. + let address_space = LLVMGetPointerAddressSpace(LLVMTypeOf(*gep1_llvm_instr)); + let bitcase_scalar_to_vector_type = LLVMBuildBitCast( + md.builder, + *gep1_llvm_instr, + LLVMPointerType( + LLVMVectorType(LLVMFloatTypeInContext(md.context), 4), + address_space, + ), + b"scalar-to-vector-type-bit-cast\0".as_ptr() as *const _, + ); + let store = LLVMBuildStore(md.builder, llvm_val_vec, bitcase_scalar_to_vector_type); + return store; +} + +unsafe fn arg_to_llvm(original_egg_node: &VecLang, md: &mut Egg2LLVMState) -> LLVMValueRef { + // TODO: Make More Efficient with BTREEMAP? + let llvm2arg = &md.llvm2egg_metadata.llvm2arg; + let egg_node = md.llvm2egg_metadata.template_enode2actual_enode.get(original_egg_node).expect("Arg to LLVM expects egg node in template2actual map."); + for (llvm_instr, arg_node) in llvm2arg.iter() { + // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. + if arg_node == egg_node { + assert!(isa_argument(*llvm_instr)); + return *llvm_instr; + } + } + panic!( + "Expected a successful lookup in llvm2arg, but cannot find Argument Egg Node: {:?}.", + egg_node + ); +} + +unsafe fn reg_to_llvm( + original_egg_node: &VecLang, + md: &mut Egg2LLVMState, +) -> LLVMValueRef { + // TODO: Make More Efficient with BTREEMAP? + let llvm2reg = &md.llvm2egg_metadata.llvm2reg; + let egg_node = md.llvm2egg_metadata.template_enode2actual_enode.get(original_egg_node).expect("Reg to LLVM expects egg node in template2actual map."); + for (llvm_instr, reg_node) in llvm2reg.iter() { + // We can do a struct comparison rather than point comparison as arg node contents are indexed by a unique u32. + if reg_node == egg_node { + assert!(!isa_argument(*llvm_instr)); + // do not clone an instruction translated earlier in the same chunk + if md + .prior_translated_nodes + .contains(&*llvm_instr) + { + return *llvm_instr; } - if matched { - break; + // do not clone an instruction translated in a prior basic block / prior chunk + if !md + .llvm2egg_metadata + .instructions_in_chunk + .contains(&*llvm_instr) + { + return *llvm_instr; } - } - if matched { - ret_value - } else { - panic!("No Match in Node Arg Pair List.") - } + let new_instr = LLVMInstructionClone(*llvm_instr); + LLVMInsertIntoBuilder(md.builder, new_instr); + md + .prior_translated_nodes + .insert(new_instr); + return new_instr; } - } - // *symbol_map - // .get(enode) - // .expect("Symbol Should Exist in Symbol Map.") } - VecLang::Num(n) => LLVMConstReal(LLVMFloatTypeInContext(context), *n as f64), - // VecLang::Num(n) => build_constant_float(*n as f64, context), - VecLang::Get(..) => { - let (array_name, array_offsets) = translate_get(enode, vec); - let gep_value = gep_map - .get(&(array_name, array_offsets)) - .expect("Symbol map lookup error: Cannot Find GEP"); - let load_value = if isa_load(*gep_value) { - let addr = LLVMGetOperand(*gep_value, 0); - let cloned_gep = LLVMInstructionClone(addr); - let new_gep = llvm_recursive_add(builder, cloned_gep); - let new_load = LLVMBuildLoad(builder, new_gep, b"\0".as_ptr() as *const _); - let llvm_pair = LLVMPair { - original_value: *gep_value, - new_value: new_load, - }; - llvm_arg_pairs.push(llvm_pair); - new_load - } else if isa_gep(*gep_value) { - let cloned_gep = LLVMInstructionClone(*gep_value); - let new_gep = llvm_recursive_add(builder, cloned_gep); - LLVMBuildLoad(builder, new_gep, b"\0".as_ptr() as *const _) - } else if isa_bitcast(*gep_value) { - // TODO: DO NOT REGERATE CALLS. THESE SHOULD BE CACHED!!. e.g. a CALLOC - let cloned_bitcast = LLVMInstructionClone(*gep_value); - let new_bitcast = llvm_recursive_add(builder, cloned_bitcast); - LLVMBuildLoad(builder, new_bitcast, b"\0".as_ptr() as *const _) - } else if isa_sitofp(*gep_value) { - let cloned_sitofp = LLVMInstructionClone(*gep_value); - let new_sitofp = llvm_recursive_add(builder, cloned_sitofp); - new_sitofp - } else { - LLVMBuildLoad(builder, *gep_value, b"\0".as_ptr() as *const _) - }; - load_value + panic!( + "Expected a successful lookup in llvm2reg, but cannot find Register Egg Node: {:?}.", + egg_node + ); +} + +unsafe fn num_to_llvm(n: &i32, md: &mut Egg2LLVMState) -> LLVMValueRef { + LLVMConstReal(LLVMFloatTypeInContext(md.context), *n as f64) +} + +unsafe fn vec_to_llvm(boxed_ids: &Box<[Id]>, md: &mut Egg2LLVMState) -> LLVMValueRef { + // Convert the Boxed Ids to a Vector, and generate a vector of zeros + // Invariant: idvec must not be empty + let idvec = boxed_ids.to_vec(); + let idvec_len = idvec.len(); + assert!( + !idvec.is_empty(), + "Id Vec Cannot be empty when converting Vector to an LLVM Vector" + ); + let mut zeros = Vec::new(); + for _ in 0..idvec_len { + zeros.push(LLVMConstReal(LLVMFloatTypeInContext(md.context), 0 as f64)); } - VecLang::LitVec(boxed_ids) | VecLang::Vec(boxed_ids) | VecLang::List(boxed_ids) => { - let idvec = boxed_ids.to_vec(); - let idvec_len = idvec.len(); - let mut zeros = Vec::new(); - for _ in 0..idvec_len { - zeros.push(LLVMConstReal(LLVMFloatTypeInContext(context), 0 as f64)); - } - let zeros_ptr = zeros.as_mut_ptr(); - let mut vector = LLVMConstVector(zeros_ptr, idvec.len() as u32); - for (idx, &eggid) in idvec.iter().enumerate() { - let elt = &vec[usize::from(eggid)]; - let mut elt_val = translate_egg( - elt, - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - // check if the elt is an int + + // Convert the Vector of Zeros to a Mut PTr to construct an LLVM Zero Vector + // Invariant: zeros must not be empty + assert!( + !zeros.is_empty(), + "Zeros Vector Cannot be empty when converting Vector to an LLVM Vector" + ); + let zeros_ptr = zeros.as_mut_ptr(); + let mut vector = LLVMConstVector(zeros_ptr, idvec.len() as u32); + for (idx, &eggid) in idvec.iter().enumerate() { + let elt = &md.egg_nodes_vector[usize::from(eggid)]; + let mut elt_val = egg_to_llvm(elt, md); + // TODO: Can We Eliminate this BitCast in the future?? + // With the new formulation, will we ever have an integer type? + // Check if the elt is an int if isa_integertype(elt_val) { - elt_val = LLVMBuildSIToFP( - builder, - elt_val, - LLVMFloatTypeInContext(context), - b"\0".as_ptr() as *const _, - ); + elt_val = LLVMBuildBitCast( + md.builder, + elt_val, + LLVMFloatTypeInContext(md.context), + b"\0".as_ptr() as *const _, + ); } + + // Construct the Vector vector = LLVMBuildInsertElement( - builder, - vector, - elt_val, - LLVMConstInt(LLVMIntTypeInContext(context, 32), idx as u64, 0), - b"\0".as_ptr() as *const _, + md.builder, + vector, + elt_val, + LLVMConstInt(LLVMIntTypeInContext(md.context, 32), idx as u64, 0), + b"\0".as_ptr() as *const _, ); - } - vector } - VecLang::VecAdd([l, r]) - | VecLang::VecMinus([l, r]) - | VecLang::VecMul([l, r]) - | VecLang::VecDiv([l, r]) - | VecLang::Add([l, r]) - | VecLang::Minus([l, r]) - | VecLang::Mul([l, r]) - | VecLang::Div([l, r]) - | VecLang::Or([l, r]) - | VecLang::And([l, r]) - | VecLang::Lt([l, r]) => { - let left = translate_egg( - &vec[usize::from(*l)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let right = translate_egg( - &vec[usize::from(*r)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let left = if LLVMTypeOf(left) == LLVMIntTypeInContext(context, 32) { + vector +} + +unsafe fn nooptvector_to_llvm(boxed_ids: &Box<[Id]>, md: &mut Egg2LLVMState) -> LLVMValueRef { + // Convert the Boxed Ids to a Vector, and generate a vector of zeros + // Invariant: idvec must not be empty + let idvec = boxed_ids.to_vec(); + let mut elt_val = LLVMConstInt(LLVMIntTypeInContext(md.context, 32), 0 as u64, 0); + for eggid in idvec { + let elt = &md.egg_nodes_vector[usize::from(eggid)]; + elt_val = egg_to_llvm(elt, md); + } + elt_val +} + +// TODO: Segregate Vec and Scalar Binops? +unsafe fn binop_to_llvm( + binop_node: &VecLang, + left_id: &Id, + right_id: &Id, + md: &mut Egg2LLVMState, +) -> LLVMValueRef { + let left = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_id)], md); + let right = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_id)], md); + + // TODO: Can We Remove these Casts? + let left = if LLVMTypeOf(left) == LLVMIntTypeInContext(md.context, 32) { LLVMBuildBitCast( - builder, - left, - LLVMFloatTypeInContext(context), - b"\0".as_ptr() as *const _, + md.builder, + left, + LLVMFloatTypeInContext(md.context), + b"\0".as_ptr() as *const _, ) - } else { + } else { left - }; - let right = if LLVMTypeOf(right) == LLVMIntTypeInContext(context, 32) { + }; + + // TODO: Can We Remove these Casts? + let right = if LLVMTypeOf(right) == LLVMIntTypeInContext(md.context, 32) { LLVMBuildBitCast( - builder, - right, - LLVMFloatTypeInContext(context), - b"\0".as_ptr() as *const _, + md.builder, + right, + LLVMFloatTypeInContext(md.context), + b"\0".as_ptr() as *const _, ) - } else { + } else { right - }; - if isa_constfp(left) + }; + + // TODO: Can we eliminate these cases? + if isa_constfp(left) && !isa_constaggregatezero(left) && isa_constfp(right) && !isa_constaggregatezero(right) - { + { let mut loses_info = 1; let nright = LLVMConstRealGetDouble(right, &mut loses_info); - let new_right = build_constant_float(nright, context); + let new_right = build_constant_float(nright, md.context); let nleft = LLVMConstRealGetDouble(left, &mut loses_info); - let new_left = build_constant_float(nleft, context); + let new_left = build_constant_float(nleft, md.context); translate_binop( - enode, - new_left, - new_right, - builder, - b"\0".as_ptr() as *const _, + binop_node, + new_left, + new_right, + md.builder, + b"\0".as_ptr() as *const _, ) - } else if isa_constfp(right) && !isa_constaggregatezero(right) { + } else if isa_constfp(right) && !isa_constaggregatezero(right) { let mut loses_info = 1; let n = LLVMConstRealGetDouble(right, &mut loses_info); - let new_right = build_constant_float(n, context); - translate_binop(enode, left, new_right, builder, b"\0".as_ptr() as *const _) - } else if isa_constfp(left) && !isa_constaggregatezero(left) { + let new_right = build_constant_float(n, md.context); + translate_binop( + binop_node, + left, + new_right, + md.builder, + b"\0".as_ptr() as *const _, + ) + } else if isa_constfp(left) && !isa_constaggregatezero(left) { let mut loses_info = 1; let n = LLVMConstRealGetDouble(left, &mut loses_info); - let new_left = build_constant_float(n, context); - translate_binop(enode, new_left, right, builder, b"\0".as_ptr() as *const _) - } else { - translate_binop(enode, left, right, builder, b"\0".as_ptr() as *const _) - } + let new_left = build_constant_float(n, md.context); + translate_binop( + binop_node, + new_left, + right, + md.builder, + b"\0".as_ptr() as *const _, + ) + } else { + translate_binop( + binop_node, + left, + right, + md.builder, + b"\0".as_ptr() as *const _, + ) } - VecLang::Concat([v1, v2]) => { - let trans_v1 = translate_egg( - &vec[usize::from(*v1)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let mut trans_v2 = translate_egg( - &vec[usize::from(*v2)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - // it turns out all vectors need to be length power of 2 - // if the 2 vectors are not the same size, double the length of the smaller vector by padding with 0's in it - // manually concatenate 2 vectors by using a LLVM shuffle operation. - let v1_type = LLVMTypeOf(trans_v1); - let v1_size = LLVMGetVectorSize(v1_type); - let v2_type = LLVMTypeOf(trans_v2); - let v2_size = LLVMGetVectorSize(v2_type); - - // HACKY FIX FOR NOW - // assume both v1 and v2 are pow of 2 size - // assume v2 size smaller or equal to v1 size - // assume v2 is 1/2 size of v1 - if v1_size != v2_size { - // replicate v2 size - let mut zeros = Vec::new(); - for _ in 0..v2_size { - zeros.push(LLVMConstReal(LLVMFloatTypeInContext(context), 0 as f64)); - } - let zeros_ptr = zeros.as_mut_ptr(); - let zeros_vector = LLVMConstVector(zeros_ptr, v2_size); - let size = 2 * v2_size; - let mut indices = Vec::new(); - for i in 0..size { - indices.push(LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0)); +} + +unsafe fn concat_to_llvm( + left_vector: &Id, + right_vector: &Id, + md: &mut Egg2LLVMState, +) -> LLVMValueRef { + let _ = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_vector)], md); + let trans_v2 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_vector)], md); + return trans_v2; +} + +unsafe fn mac_to_llvm( + accumulator_vector: &Id, + left_prod_vector: &Id, + right_prod_vector: &Id, + md: &mut Egg2LLVMState, +) -> LLVMValueRef { + let trans_acc = egg_to_llvm(&md.egg_nodes_vector[usize::from(*accumulator_vector)], md); + let trans_v1 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_prod_vector)], md); + let trans_v2 = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_prod_vector)], md); + let vec_type = LLVMTypeOf(trans_acc); + let param_types = [vec_type, vec_type, vec_type].as_mut_ptr(); + let fn_type = LLVMFunctionType(vec_type, param_types, 3, 0 as i32); + + let func_name = &FMA_NAME; + let llvm_fma_func = get_func_llvm_value(&func_name); + + let func = match llvm_fma_func { + Some(value) => value, + None => { + let new_func = + LLVMAddFunction(md.module, b"llvm.fma.v4f32\0".as_ptr() as *const _, fn_type); + FUNC_NAME2LLVM_FUNC.push((&func_name, new_func)); + new_func } - let mask = indices.as_mut_ptr(); - let mask_vector = LLVMConstVector(mask, size); - trans_v2 = LLVMBuildShuffleVector( - builder, - trans_v2, - zeros_vector, - mask_vector, - b"\0".as_ptr() as *const _, + }; + let args = [trans_v1, trans_v2, trans_acc].as_mut_ptr(); + LLVMBuildCall(md.builder, func, args, 3, b"\0".as_ptr() as *const _) +} + +unsafe fn scalar_unop_to_llvm(n: &Id, unop_node: &VecLang, md: &mut Egg2LLVMState) -> LLVMValueRef { + let mut number = egg_to_llvm(&md.egg_nodes_vector[usize::from(*n)], md); + if isa_integertype(number) { + number = LLVMBuildBitCast( + md.builder, + number, + LLVMFloatTypeInContext(md.context), + b"\0".as_ptr() as *const _, ); - } - let size = v1_size + v2_size; - let mut indices = Vec::new(); - for i in 0..size { - indices.push(LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0)); - } - let mask = indices.as_mut_ptr(); - let mask_vector = LLVMConstVector(mask, size); - LLVMBuildShuffleVector( - builder, - trans_v1, - trans_v2, - mask_vector, + } + translate_unop( + unop_node, + number, + md.builder, + md.context, + md.module, b"\0".as_ptr() as *const _, - ) - } - VecLang::VecMAC([acc, v1, v2]) => { - let trans_acc = translate_egg( - &vec[usize::from(*acc)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let trans_v1 = translate_egg( - &vec[usize::from(*v1)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let trans_v2 = translate_egg( - &vec[usize::from(*v2)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let vec_type = LLVMTypeOf(trans_acc); - let param_types = [vec_type, vec_type, vec_type].as_mut_ptr(); - let fn_type = LLVMFunctionType(vec_type, param_types, 3, 0 as i32); - let func = LLVMAddFunction(module, b"llvm.fma.f32\0".as_ptr() as *const _, fn_type); - let args = [trans_v1, trans_v2, trans_acc].as_mut_ptr(); - LLVMBuildCall(builder, func, args, 3, b"\0".as_ptr() as *const _) + ) +} + +unsafe fn vecneg_to_llvm(vec: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { + let neg_vector = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); + LLVMBuildFNeg(md.builder, neg_vector, b"\0".as_ptr() as *const _) +} + +unsafe fn vecsqrt_to_llvm(vec: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { + let sqrt_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); + let vec_type = LLVMTypeOf(sqrt_vec); + let param_types = [vec_type].as_mut_ptr(); + let fn_type = LLVMFunctionType(vec_type, param_types, 1, 0 as i32); + let func = LLVMAddFunction(md.module, b"llvm.sqrt.f32\0".as_ptr() as *const _, fn_type); + let args = [sqrt_vec].as_mut_ptr(); + LLVMBuildCall(md.builder, func, args, 1, b"\0".as_ptr() as *const _) +} + +unsafe fn vecsgn_to_llvm(vec: &Id, md: &mut Egg2LLVMState) -> LLVMValueRef { + let sgn_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*vec)], md); + let vec_type = LLVMTypeOf(sgn_vec); + let vec_size = LLVMGetVectorSize(vec_type); + let mut ones = Vec::new(); + for _ in 0..vec_size { + ones.push(LLVMConstReal(LLVMFloatTypeInContext(md.context), 1 as f64)); + } + let ones_ptr = ones.as_mut_ptr(); + let ones_vector = LLVMConstVector(ones_ptr, vec_size); + let param_types = [vec_type, vec_type].as_mut_ptr(); + let fn_type = LLVMFunctionType(vec_type, param_types, 2, 0 as i32); + let func = LLVMAddFunction( + md.module, + b"llvm.copysign.f32\0".as_ptr() as *const _, + fn_type, + ); + let args = [ones_vector, sgn_vec].as_mut_ptr(); + LLVMBuildCall(md.builder, func, args, 2, b"\0".as_ptr() as *const _) +} + +unsafe fn get_shuf_vec_data(shuf_vec_id: &Id, md: &mut Egg2LLVMState) -> Vec { + let mut results_vec = Vec::new(); + let match_expr = &md.egg_nodes_vector[usize::from(*shuf_vec_id)]; + match match_expr { + VecLang::DataVec(boxed_ids) => { + let idvec = boxed_ids.to_vec(); + for eggid in idvec { + match &md.egg_nodes_vector[usize::from(eggid)] { + VecLang::Num(n) => results_vec.push(*n), + _ => panic!("Each element of a shuf vec needs to be a num"), + } + } + } + _ => { + println!("{:?}", match_expr); + panic!("Shuf Vec Id should point to a vector of numbers") + } + } + return results_vec; +} + +/** + * Shuffle Node to an LLVM Shuffle Op + */ +unsafe fn shuffle_to_llvm( + data_vec_id: &Id, + shuf_vec_id: &Id, + md: &mut Egg2LLVMState, +) -> LLVMValueRef { + let data_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*data_vec_id)], md); + let shuf_data = get_shuf_vec_data(shuf_vec_id, md); + + // Build up shuf mask + let mut mask = Vec::new(); + for val in shuf_data { + mask.push(LLVMConstInt( + LLVMInt32TypeInContext(md.context), + val as u64, + 0 as i32, + )); + } + + let shuf_mask = LLVMConstVector(mask.as_mut_ptr(), vector_width() as u32); + LLVMBuildShuffleVector( + md.builder, + data_vec, + data_vec, + shuf_mask, + b"\0".as_ptr() as *const _, + ) +} + +unsafe fn join_to_llvm( + left_vec_id: &Id, + right_vec_id: &Id, + md: &mut Egg2LLVMState, +) -> LLVMValueRef { + let left_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*left_vec_id)], md); + let right_vec = egg_to_llvm(&md.egg_nodes_vector[usize::from(*right_vec_id)], md); + + // Build up shuf mask + let mut mask = Vec::new(); + for i in 0..4 { + mask.push(LLVMConstInt( + LLVMInt32TypeInContext(md.context), + i as u64, + 0 as i32, + )); + } + let shuf_mask = LLVMConstVector(mask.as_mut_ptr(), vector_width() as u32); + + LLVMBuildShuffleVector( + md.builder, + left_vec, + right_vec, + shuf_mask, + b"\0".as_ptr() as *const _, + ) +} + +/** + * Vector representing No Optimization: Egg will not have modified the vector at all. + */ +unsafe fn nooptvec_to_llvm(boxed_ids: &Box<[Id]>, md: &mut Egg2LLVMState) -> () { + // Convert the Boxed Ids to a Vector, and generate a vector of zeros + // Invariant: idvec must not be empty + let idvec = boxed_ids.to_vec(); + assert!( + !idvec.is_empty(), + "Id Vec Cannot be empty when converting Vector to an LLVM Vector" + ); + for (i, &eggid) in idvec.iter().enumerate() { + let egg_node = &md.egg_nodes_vector[usize::from(eggid)]; + let new_instr = egg_to_llvm(egg_node, md); + let old_instr = md + .llvm2egg_metadata + .start_instructions + .get(i) + .expect("Index Must Exist In Start Instructions"); + LLVMReplaceAllUsesWith(*old_instr, new_instr); + LLVMInstructionEraseFromParent(*old_instr); + } +} + +/// Egg To LLVM Dispatches translation of VecLanf Egg Nodes to LLVMValueRegs +/// +/// Side Effect: Builds and Insert LLVM instructions +unsafe fn egg_to_llvm( + egg_node: &VecLang, + translation_metadata: &mut Egg2LLVMState, +) -> LLVMValueRef { + match egg_node { + VecLang::Symbol(..) => { + panic!("Symbol was found. Egg to LLVM Translation does not handle symbol nodes.") + } + VecLang::Get(..) => { + panic!("Get was found. Egg to LLVM Translation does not handle get nodes.") + } + VecLang::Gep(..) => { + gep_to_llvm(egg_node, translation_metadata) } + VecLang::Load([gep_id, _, _]) => { + load_to_llvm(gep_id, translation_metadata) + } + VecLang::Store([val_id, gep_id]) => { + store_to_llvm(val_id, gep_id, translation_metadata) + } + VecLang::Set(..) => { + panic!("Set was found. Egg to LLVM Translation does not handle set nodes.") + } + VecLang::Ite(..) => panic!("Ite was found. Egg to LLVM Translation does not handle ite nodes."), + VecLang::Or(..) => panic!("Or was found. Egg to LLVM Translation does not handle or nodes."), + VecLang::And(..) => panic!("And was found. Egg to LLVM Translation does not handle and nodes."), + VecLang::Lt(..) => panic!("Lt was found. Egg to LLVM Translation does not handle lt nodes."), + VecLang::Sgn(..) => panic!("Sgn was found. Egg to LLVM Translation does not handle sgn nodes. TODO: In the future, tis node will be handled alongside sqrt and neg scalar nodes."), + VecLang::VecSgn(..) => panic!("VecSgn was found. Egg to LLVM Translation does not handle vecsgn nodes. TODO: In the future, this node will be handled alongside VecSqrt and VecNeg vector nodes."), + VecLang::Arg(_) => arg_to_llvm(egg_node, translation_metadata), + VecLang::Reg(_) => reg_to_llvm(egg_node, translation_metadata), + VecLang::Num(n) => num_to_llvm(n, translation_metadata), + VecLang::LitVec(boxed_ids) | VecLang::Vec(boxed_ids) | VecLang::List(boxed_ids) => { + vec_to_llvm(&*boxed_ids, translation_metadata) + } + VecLang::DataVec(_) => panic!("Cannot translate a datavec"), + VecLang::NoOptVec(boxed_ids) => nooptvector_to_llvm(boxed_ids, translation_metadata), + VecLang::VecAdd([l, r]) + | VecLang::VecMinus([l, r]) + | VecLang::VecMul([l, r]) + | VecLang::VecDiv([l, r]) + | VecLang::Add([l, r]) + | VecLang::Minus([l, r]) + | VecLang::Mul([l, r]) + | VecLang::Div([l, r]) => binop_to_llvm(egg_node, l, r, translation_metadata), + VecLang::Concat([v1, v2]) => concat_to_llvm(v1, v2, translation_metadata), + VecLang::VecMAC([acc, v1, v2]) => mac_to_llvm(acc, v1, v2, translation_metadata), + + // TODO: VecNeg, VecSqrt, VecSgn all have not been tested, need test cases. // TODO: LLVM actually supports many more vector intrinsics, including // vector sine/cosine instructions for floats. - VecLang::VecNeg([v]) => { - let neg_vector = translate_egg( - &vec[usize::from(*v)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - LLVMBuildFNeg(builder, neg_vector, b"\0".as_ptr() as *const _) - } - VecLang::VecSqrt([v]) => { - let sqrt_vec = translate_egg( - &vec[usize::from(*v)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let vec_type = LLVMTypeOf(sqrt_vec); - let param_types = [vec_type].as_mut_ptr(); - let fn_type = LLVMFunctionType(vec_type, param_types, 1, 0 as i32); - let func = LLVMAddFunction(module, b"llvm.sqrt.f32\0".as_ptr() as *const _, fn_type); - let args = [sqrt_vec].as_mut_ptr(); - LLVMBuildCall(builder, func, args, 1, b"\0".as_ptr() as *const _) - } - // compliant with c++ LibMath copysign function, which differs with sgn at x = 0. - VecLang::VecSgn([v]) => { - let sgn_vec = translate_egg( - &vec[usize::from(*v)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - let vec_type = LLVMTypeOf(sgn_vec); - let vec_size = LLVMGetVectorSize(vec_type); - let mut ones = Vec::new(); - for _ in 0..vec_size { - ones.push(LLVMConstReal(LLVMFloatTypeInContext(context), 1 as f64)); - } - let ones_ptr = ones.as_mut_ptr(); - let ones_vector = LLVMConstVector(ones_ptr, vec_size); - let param_types = [vec_type, vec_type].as_mut_ptr(); - let fn_type = LLVMFunctionType(vec_type, param_types, 2, 0 as i32); - let func = LLVMAddFunction(module, b"llvm.copysign.f32\0".as_ptr() as *const _, fn_type); - let args = [ones_vector, sgn_vec].as_mut_ptr(); - LLVMBuildCall(builder, func, args, 2, b"\0".as_ptr() as *const _) - } - VecLang::Sgn([n]) | VecLang::Sqrt([n]) | VecLang::Neg([n]) => { - let mut number = translate_egg( - &vec[usize::from(*n)], - vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - if isa_integertype(number) { - number = LLVMBuildSIToFP( - builder, - number, - LLVMFloatTypeInContext(context), - b"\0".as_ptr() as *const _, - ) - } - translate_unop( - enode, - number, - builder, - context, - module, - b"\0".as_ptr() as *const _, - ) + VecLang::VecNeg([v]) => vecneg_to_llvm(v, translation_metadata), + VecLang::VecSqrt([v]) => vecsqrt_to_llvm(v, translation_metadata), + // VecSgn compliant with c++ LibMath copysign function, which differs with sgn at x = 0. + VecLang::VecSgn([v]) => vecsgn_to_llvm(v, translation_metadata), + VecLang::Sgn([n]) | VecLang::Sqrt([n]) | VecLang::Neg([n]) => scalar_unop_to_llvm(n, egg_node, translation_metadata), + VecLang::VecLoad([gep1_id, gep2_id, gep3_id, gep4_id, base_ids_vec, offsets_id_vec]) => loadvec_to_llvm(gep1_id, gep2_id, gep3_id, gep4_id, base_ids_vec, offsets_id_vec, translation_metadata), + VecLang::VecStore([val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id]) => storevec_to_llvm(val_vec_id, gep1_id, gep2_id, gep3_id, gep4_id, translation_metadata), + VecLang::AlignedConsecVecLoad([gep_id]) => aligned_consec_loadvec_to_llvm(gep_id, 4, translation_metadata), + VecLang::Shuffle([data_vec_id, shuf_vec_id]) => shuffle_to_llvm(data_vec_id, shuf_vec_id, translation_metadata), + VecLang::Join([left, right]) => join_to_llvm(left, right, translation_metadata), + VecLang::VecTwo(boxed_ids) => vec_to_llvm(&*boxed_ids, translation_metadata), + VecLang::AlignedConsecVecLoad2([gep_id]) => aligned_consec_loadvec_to_llvm(gep_id, 2, translation_metadata), + } +} + +// Function types for constructor anonymous functions +type VecLangBoxedConstructor = fn(bool, Box<[Id]>) -> VecLang; + +// Canonicalize the Concat Constructor +unsafe fn canonicalize_concat( + can_change_vector: bool, + vector_elements: Box<[Id]>, + old_egg_nodes: &[VecLang], +) -> Vec { + let mut whole_vector = Vec::new(); + for vec_elt in vector_elements.iter() { + // note the first argument is to can_change_vector, which is unlike any other constructor + let mut trans_val = + canonicalize_egg(can_change_vector, &old_egg_nodes[usize::from(*vec_elt)], old_egg_nodes); + whole_vector.append(&mut trans_val); } - VecLang::Ite(..) => panic!("Ite is not handled."), - }; - return instr; + // note specialization to concat constructor + whole_vector.push(VecLang::Concat(box2arr(vector_elements))); + whole_vector } -unsafe fn egg_to_llvm( - expr: RecExpr, - gep_map: &GEPMap, - store_map: &StoreMap, - symbol_map: &SymbolMap, - llvm_arg_pairs: &mut Vec, - node_to_arg_pair: &Vec, - module: LLVMModuleRef, - context: LLVMContextRef, - builder: LLVMBuilderRef, -) -> () { - // in fact this will look rather similar to translation from egg to llvm - // the major differece is how we reconstruct loads and stores - // whenever we encounter a get instruction, we retranslate as a gep and then a load - // whenever we encounter an operand that is within the store map, we immediately build a store too. - // This should maintain the translation - - // Note: You must include all instructions in the basic block, up to the final store - // The builder mount location must be immediately at the beginning of the basic block to start writing instrucitons - - // Walk the RecExpr and translate it in place to LLVM - let enode_vec = expr.as_ref(); - let last_enode = enode_vec - .last() - .expect("No match for last element of vector of Egg Terms."); - let vector = translate_egg( - last_enode, - enode_vec, - gep_map, - store_map, - symbol_map, - llvm_arg_pairs, - node_to_arg_pair, - builder, - context, - module, - ); - - // Add in the Stores - for (i, (_, addr)) in store_map.iter().enumerate() { - let index = LLVMConstInt(LLVMIntTypeInContext(context, 32), i as u64, 0); - let mut extracted_value = - LLVMBuildExtractElement(builder, vector, index, b"\0".as_ptr() as *const _); - // check if the extracted type is an float and the address is a int ptr - if isa_floattype(extracted_value) && isa_intptr(*addr) { - extracted_value = LLVMBuildFPToSI( - builder, - extracted_value, - LLVMIntTypeInContext(context, 32), - b"\0".as_ptr() as *const _, - ); +unsafe fn canoncalize_ntuple( + can_change_vector: bool, + constructor: VecLangBoxedConstructor, + vector_elements: Box<[Id]>, + old_egg_nodes: &[VecLang], +) -> Vec { + let mut whole_vector = Vec::new(); + for vec_elt in vector_elements.iter() { + let mut trans_val = + canonicalize_egg(false, &old_egg_nodes[usize::from(*vec_elt)], old_egg_nodes); + whole_vector.append(&mut trans_val); } - if isa_argument(*addr) { - LLVMBuildStore(builder, extracted_value, *addr); - } else { - let cloned_addr = LLVMInstructionClone(*addr); - let new_addr = llvm_recursive_add(builder, cloned_addr); - // LLVMReplaceAllUsesWith(*addr, new_addr); - LLVMBuildStore(builder, extracted_value, new_addr); + whole_vector.push(constructor(can_change_vector, vector_elements)); + whole_vector +} + +// Solution to convert by: +// https://stackoverflow.com/questions/29570607/is-there-a-good-way-to-convert-a-vect-to-an-array +// https://stackoverflow.com/questions/35751927/how-to-convert-a-boxed-array-into-a-vec-in-rust +unsafe fn box2arr(b: Box<[T]>) -> [T; N] { + let v = b.into_vec(); + v.try_into() + .unwrap_or_else(|v: Vec| panic!("Expected a Vec of length {} but it was {}", N, v.len())) +} + +/// Modify the Egg expression so that the first instance of a Vector operation is replaced by a NoOpVector expression node +/// The reason is that in this version of Diospyros, stores and vecstores explictly mark where a store is to be done. +/// The outermost vectors encountered will not store anything. Replacing them with NoOps will allow translation to occur properly. +unsafe fn canonicalize_egg( + can_change_vector: bool, + curr_egg_node: &VecLang, + old_egg_nodes: &[VecLang], +) -> Vec { + match curr_egg_node { + VecLang::NoOptVec(..) => panic!("No Opt Vector was found. Egg canonicalization does not handle No Opt Vector nodes at this location."), + VecLang::Symbol(..) => { + panic!("Symbol was found. Egg canonicalization does not handle symbol nodes.") + } + VecLang::Get(..) => { + panic!("Get was found. Egg canonicalization does not handle get nodes.") + } + VecLang::Gep(g) => vec![VecLang::Gep(*g)], + VecLang::Load(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Load(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Store(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Store(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Set(..) => { + panic!("Set was found. Egg canonicalization does not handle set nodes.") } + VecLang::Ite(..) => panic!("Ite was found. Egg canonicalization does not handle ite nodes."), + VecLang::Or(..) => panic!("Or was found. Egg canonicalization does not handle or nodes."), + VecLang::And(..) => panic!("And was found. Egg canonicalization does not handle and nodes."), + VecLang::Lt(..) => panic!("Lt was found. Egg canonicalizationdoes not handle lt nodes."), + VecLang::Sgn(..) => panic!("Sgn was found. Egg canonicalization does not handle sgn nodes. TODO: In the future, tis node will be handled alongside sqrt and neg scalar nodes."), + VecLang::VecSgn(..) => panic!("VecSgn was found. Egg canonicalization does not handle vecsgn nodes. TODO: In the future, this node will be handled alongside VecSqrt and VecNeg vector nodes."), + VecLang::Arg(a) => vec![VecLang::Arg(*a)], + VecLang::Reg(r) => vec![VecLang::Reg(*r)], + VecLang::Num(n) => vec![VecLang::Num(*n)], + VecLang::List(_) => panic!("List was found. Egg canonicalization does not handle list nodes."), + VecLang::LitVec(args) => canoncalize_ntuple(can_change_vector, |can_change_vector, b| -> VecLang {if can_change_vector {VecLang::NoOptVec(b)} else {VecLang::LitVec(b)}}, args.clone(), old_egg_nodes), + VecLang::Vec(args) => canoncalize_ntuple(can_change_vector, |can_change_vector, b| -> VecLang {if can_change_vector {VecLang::NoOptVec(b)} else {VecLang::Vec(b)}}, args.clone(), old_egg_nodes), + VecLang::DataVec(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::DataVec(b)}, args.clone(), old_egg_nodes), + VecLang::VecAdd(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecAdd(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::VecMinus(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecMinus(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::VecMul(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecMul(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::VecDiv(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecDiv(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Add(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Add(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Minus(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Minus(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Mul(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Mul(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Div(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Div(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Concat(args) => canonicalize_concat(can_change_vector, Box::new(*args), old_egg_nodes), + VecLang::VecMAC(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecMAC(box2arr(b))}, Box::new(*args), old_egg_nodes), + + + // TODO: VecNeg, VecSqrt, VecSgn all have not been tested, need test cases. + // TODO: LLVM actually supports many more vector intrinsics, including + // vector sine/cosine instructions for floats. + VecLang::VecNeg(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecNeg(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::VecSqrt(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecSqrt(box2arr(b))}, Box::new(*args), old_egg_nodes), + // VecSgn compliant with c++ LibMath copysign function, which differs with sgn at x = 0. + VecLang::VecSgn(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecSgn(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Sgn(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Sgn(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Sqrt(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Sqrt(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Neg(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Neg(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::VecLoad(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecLoad(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::VecStore(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecStore(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::AlignedConsecVecLoad(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::AlignedConsecVecLoad(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Shuffle(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Shuffle(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::Join(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::Join(box2arr(b))}, Box::new(*args), old_egg_nodes), + VecLang::VecTwo(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::VecTwo(b)}, args.clone(), old_egg_nodes), + VecLang::AlignedConsecVecLoad2(args) => canoncalize_ntuple(can_change_vector, |_, b| -> VecLang {VecLang::AlignedConsecVecLoad2(box2arr(b))}, Box::new(*args), old_egg_nodes), } } + +unsafe fn is_nooptvec(egg_expr: &VecLang) -> bool { + match egg_expr { + VecLang::NoOptVec(..) => true, + _ => false, + } +} + +unsafe fn get_noopt_eggnodes(egg_expr: &VecLang) -> &Box<[Id]> { + match egg_expr { + VecLang::NoOptVec(boxed_ids) => boxed_ids, + _ => panic!("Not a NoOptVec!"), + } +} + +// TODO: Add non-vectorized version as well! +unsafe fn egg_to_llvm_main( + expr: RecExpr, + llvm2egg_metadata: &LLVM2EggState, + module: LLVMModuleRef, + context: LLVMContextRef, + builder: LLVMBuilderRef, + vectorize: bool, +) -> () { + // Walk the RecExpr of Egg Nodes and translate it in place to LLVM + let egg_nodes = expr.as_ref(); + let last_egg_node = egg_nodes + .last() + .expect("No match for last element of vector of Egg Terms."); + + // Nodes converted to llvm already, not to be retranslated + let prior_translated_nodes: BTreeSet = BTreeSet::new(); + + // Regular translation from vectorization + + assert!(!is_nooptvec(last_egg_node)); + + let canonicalized_egg_nodes = canonicalize_egg(true, last_egg_node, egg_nodes); + let canonicalized_last_node = canonicalized_egg_nodes + .last() + .expect("No match for last element of vector of Canonicalized Egg Terms."); + + let mut translation_metadata = Egg2LLVMState { + egg_nodes_vector: &canonicalized_egg_nodes, + llvm2egg_metadata: llvm2egg_metadata.clone(), + prior_translated_nodes: prior_translated_nodes, + builder: builder, + context: context, + module: module, + }; + + // If vectorize was not true, we are finished, because nooptvectorize_to_llvm will generate the required code. + if !vectorize { + assert!(is_nooptvec(last_egg_node)); + return nooptvec_to_llvm(get_noopt_eggnodes(last_egg_node), &mut translation_metadata); + } + + // let llvm_vector = + egg_to_llvm(canonicalized_last_node, &mut translation_metadata); + + // remove starting stores + let num_extractions = llvm2egg_metadata.start_instructions.len(); + for i in (0..num_extractions).rev() { + let old_instr = llvm2egg_metadata + .start_instructions + .get(i) + .expect("Index should be in vector."); + LLVMInstructionEraseFromParent(*old_instr); + } +} diff --git a/src/dios-egraphs/Diospyros/test-runner.sh b/src/dios-egraphs/Diospyros/test-runner.sh new file mode 100644 index 00000000..6897dcaa --- /dev/null +++ b/src/dios-egraphs/Diospyros/test-runner.sh @@ -0,0 +1,16 @@ +cd .. 1> /dev/null +case $1 in + run-opt) + make run-opt test=$2 + ;; + run-opt-stdout) + make run-opt-stdout test=$2 + ;; + no-opt-stdout) + make no-opt-stdout test=$2 + ;; + *) + echo "match failure" + ;; +esac +cd - 1> /dev/null \ No newline at end of file diff --git a/src/dios-egraphs/src/alignconsecsearcher.rs b/src/dios-egraphs/src/alignconsecsearcher.rs new file mode 100644 index 00000000..e3188450 --- /dev/null +++ b/src/dios-egraphs/src/alignconsecsearcher.rs @@ -0,0 +1,157 @@ +use crate::veclang::VecLang; +use egg::*; + +/// Search for permutations of sequences of Loads and Stores that are Aligned and Consecutive +/// +/// This module creates an Applier, which attempts to find successful permutations of loads ands stores to be aligned and consecutive + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PermuteLoad { + pub a0: Var, + pub a1: Var, + pub a2: Var, + pub a3: Var, + pub b0: Var, + pub b1: Var, + pub b2: Var, + pub b3: Var, + pub o0: Var, + pub o1: Var, + pub o2: Var, + pub o3: Var, +} + +impl> Applier for PermuteLoad { + /// We are going to look for permutations of the four offsets that could + /// allow for consecutive and aligned loading to occur with a shuffle operation + fn apply_one(&self, egraph: &mut EGraph, matched_id: Id, subst: &Subst) -> Vec { + let mut first_base = -10; + for e in egraph[subst[self.b0]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + first_base = n; + } + } + assert!(first_base != -10); + let mut second_base = -10; + for e in egraph[subst[self.b1]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + second_base = n; + } + } + assert!(second_base != -10); + let mut third_base = -10; + for e in egraph[subst[self.b2]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + third_base = n; + } + } + assert!(third_base != -10); + let mut fourth_base = -10; + for e in egraph[subst[self.b3]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + fourth_base = n; + } + } + assert!(fourth_base != -10); + + if !(first_base == second_base + && first_base == third_base + && first_base == fourth_base + && first_base >= 0) + { + return vec![]; + } + + let mut first_offset = -10; + for e in egraph[subst[self.o0]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + first_offset = n; + } + } + assert!(first_offset != -10); + let mut second_offset = -10; + for e in egraph[subst[self.o1]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + second_offset = n; + } + } + assert!(second_offset != -10); + let mut third_offset = -10; + for e in egraph[subst[self.o2]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + third_offset = n; + } + } + assert!(third_offset != -10); + let mut fourth_offset = -10; + for e in egraph[subst[self.o3]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + fourth_offset = n; + } + } + assert!(fourth_offset != -10); + + let off0_id: Id = subst[self.o0]; + let off1_id: Id = subst[self.o1]; + let off2_id: Id = subst[self.o2]; + let off3_id: Id = subst[self.o3]; + let base0_id: Id = subst[self.a0]; + let base1_id: Id = subst[self.a1]; + let base2_id: Id = subst[self.a2]; + let base3_id: Id = subst[self.a3]; + + // deduplicate + let mut undedup_offsets = vec![first_offset, second_offset, third_offset, fourth_offset]; + undedup_offsets.dedup(); + if undedup_offsets.len() < 4 { + return vec![]; + } + + let mut offsets: Vec<(i32, Id, Id)> = Vec::new(); + offsets.push((first_offset, off0_id, base0_id)); + offsets.push((second_offset, off1_id, base1_id)); + offsets.push((third_offset, off2_id, base2_id)); + offsets.push((fourth_offset, off3_id, base3_id)); + offsets.sort_by(|o1, o2| o1.0.partial_cmp(&o2.0).unwrap()); + + if offsets[0].0 % 4 != 0 { + return vec![]; + } + + if !(offsets[0].0 + 1 == offsets[1].0 + && offsets[0].0 + 2 == offsets[2].0 + && offsets[0].0 + 3 == offsets[3].0) + { + return vec![]; + } + + let mut shuffle_vec: Vec = Vec::new(); + let offset_ids_vec: Vec = vec![off0_id, off1_id, off2_id, off3_id]; + for off_id in offset_ids_vec { + for (i, (_, other_off_id, _)) in offsets.iter().enumerate() { + if off_id == *other_off_id { + shuffle_vec.push(i as u32); + } + } + } + // the identity permutation does not count, as it gets handled elsewhere + if shuffle_vec == vec![0, 1, 2, 3] { + return vec![]; + } + + let mut shuffle_ids_vec: Vec = Vec::new(); + for elt in shuffle_vec { + let new_shuf_id = egraph.add(VecLang::Num(elt as i32)); + shuffle_ids_vec.push(new_shuf_id); + } + let (_, _, first_base_id) = offsets[0]; + let aligned_consec_load_vec = egraph.add(VecLang::AlignedConsecVecLoad([first_base_id])); + let shuffle_shuf_arg = egraph.add(VecLang::DataVec(shuffle_ids_vec.into_boxed_slice())); + let shuffle_vec_op = egraph.add(VecLang::Shuffle([ + aligned_consec_load_vec, + shuffle_shuf_arg, + ])); + + vec![shuffle_vec_op] + } +} diff --git a/src/dios-egraphs/src/cost.rs b/src/dios-egraphs/src/cost.rs index f9f036d0..bb4f08af 100644 --- a/src/dios-egraphs/src/cost.rs +++ b/src/dios-egraphs/src/cost.rs @@ -14,23 +14,55 @@ impl CostFunction for VecCostFn<'_> { where C: FnMut(Id) -> Self::Cost, { + // const NO_OPTIMIZATION: f64 = 0.0; + // const NO_COST: f64 = 0.0; + // const ALIGNED_CONSEC_VECTORIZED_MEMORY_ACCESS: f64 = -1.0; + // const VECTORIZED_MEMORY_ACCESS: f64 = 0.0001; + // const LITERAL: f64 = 0.001; + // const STRUCTURE: f64 = 0.1; + // const VEC_OP: f64 = 1.; + // const OP: f64 = 1.; + // const BIG: f64 = 100.0; + + // New cost model + const NO_OPTIMIZATION: f64 = 0.0; + const NO_COST: f64 = 0.0; + const ALIGNED_CONSEC_VECTORIZED_MEMORY_ACCESS: f64 = -1.0; + const VECTORIZED_MEMORY_ACCESS: f64 = 0.0001; const LITERAL: f64 = 0.001; const STRUCTURE: f64 = 0.1; const VEC_OP: f64 = 1.; const OP: f64 = 1.; const BIG: f64 = 100.0; let op_cost = match enode { + // No Optimization case for testing purposes + VecLang::NoOptVec(..) => NO_OPTIMIZATION, + + // Vectorized Memory Accesses are cheaper than individual memory loads and stores + // Note: This assumes that masked-gathers or masked-scattters to vectors or memory + // are implemented on the target, and are cheap, according to the LLVM cost model + VecLang::AlignedConsecVecLoad2(..) => ALIGNED_CONSEC_VECTORIZED_MEMORY_ACCESS * 0.5, + VecLang::AlignedConsecVecLoad(..) => ALIGNED_CONSEC_VECTORIZED_MEMORY_ACCESS, + VecLang::VecLoad(..) => VECTORIZED_MEMORY_ACCESS, + VecLang::VecStore(..) => ALIGNED_CONSEC_VECTORIZED_MEMORY_ACCESS, + // You get literals for extremely cheap VecLang::Num(..) => LITERAL, + VecLang::Reg(..) => LITERAL, + VecLang::Arg(..) => LITERAL, VecLang::Symbol(..) => LITERAL, VecLang::Get(..) => LITERAL, + VecLang::Set(..) => LITERAL, + VecLang::Load(..) => LITERAL, + VecLang::Store(..) => LITERAL, + VecLang::Gep(..) => LITERAL, // And list structures for quite cheap VecLang::List(..) => STRUCTURE, VecLang::Concat(..) => STRUCTURE, // Vectors are cheap if they have literal values - VecLang::Vec(vals) => { + VecLang::Vec(vals) | VecLang::VecTwo(vals) => { // For now, workaround to determine if children are num, symbol, // or get let non_literals = vals.iter().any(|&x| costs(x) > 3. * LITERAL); @@ -40,6 +72,7 @@ impl CostFunction for VecCostFn<'_> { STRUCTURE } } + VecLang::DataVec(..) => NO_COST, VecLang::LitVec(..) => LITERAL, // But scalar and vector ops cost something @@ -60,6 +93,8 @@ impl CostFunction for VecCostFn<'_> { VecLang::VecNeg(..) => VEC_OP, VecLang::VecSqrt(..) => VEC_OP, VecLang::VecSgn(..) => VEC_OP, + VecLang::Shuffle(..) => VEC_OP, + VecLang::Join(..) => VEC_OP, _ => VEC_OP, }; enode.fold(op_cost, |sum, id| sum + costs(id)) diff --git a/src/dios-egraphs/src/lib.rs b/src/dios-egraphs/src/lib.rs index 95b589d0..211263fb 100644 --- a/src/dios-egraphs/src/lib.rs +++ b/src/dios-egraphs/src/lib.rs @@ -1,7 +1,9 @@ +pub mod alignconsecsearcher; pub mod binopsearcher; pub mod config; pub mod cost; pub mod macsearcher; +pub mod permutestore; pub mod rewriteconcats; pub mod rules; pub mod searchutils; diff --git a/src/dios-egraphs/src/main.rs b/src/dios-egraphs/src/main.rs index b2d58b76..82bd089e 100644 --- a/src/dios-egraphs/src/main.rs +++ b/src/dios-egraphs/src/main.rs @@ -3,167 +3,233 @@ use clap::{App, Arg}; use dioslib::*; fn main() { - let matches = App::new("Diospyros Rewriter") - .arg( - Arg::with_name("INPUT") - .help("Sets the input file") - .required(true) - .index(1), - ) - .arg( - Arg::with_name("no-ac") - .long("no-ac") - .help("Disable associativity and commutativity rules"), - ) - .arg( - Arg::with_name("no-vec") - .long("no-vec") - .help("Disable vector rules"), - ) - .get_matches(); + let matches = App::new("Diospyros Rewriter") + .arg( + Arg::with_name("INPUT") + .help("Sets the input file") + .required(true) + .index(1), + ) + .arg( + Arg::with_name("no-ac") + .long("no-ac") + .help("Disable associativity and commutativity rules"), + ) + .arg( + Arg::with_name("no-vec") + .long("no-vec") + .help("Disable vector rules"), + ) + .get_matches(); - use std::{env, fs}; + use std::{env, fs}; - // Get a path string to parse a program. - let path = matches.value_of("INPUT").unwrap(); - let timeout = env::var("TIMEOUT") - .ok() - .and_then(|t| t.parse::().ok()) - .unwrap_or(180); - let prog_str = fs::read_to_string(path).expect("Failed to read the input file."); + // Get a path string to parse a program. + let path = matches.value_of("INPUT").unwrap(); + let timeout = env::var("TIMEOUT") + .ok() + .and_then(|t| t.parse::().ok()) + .unwrap_or(180); + let prog_str = fs::read_to_string(path).expect("Failed to read the input file."); - // AST conversion: boxed Rosette terms to Egg syntax - let converted: String = stringconversion::convert_string(&prog_str) - .expect("Failed to convert the input file to egg AST."); + // AST conversion: boxed Rosette terms to Egg syntax + let converted: String = stringconversion::convert_string(&prog_str) + .expect("Failed to convert the input file to egg AST."); - // Rewrite a list of expressions to a concatenation of vectors - let concats = rewriteconcats::list_to_concats(&converted); - let prog = concats.unwrap().parse().unwrap(); + // Rewrite a list of expressions to a concatenation of vectors + let concats = rewriteconcats::list_to_concats(&converted); + let prog = concats.unwrap().parse().unwrap(); - // Rules to disable flags - let no_ac = matches.is_present("no-ac"); - let no_vec = matches.is_present("no-vec"); + // Rules to disable flags + let no_ac = matches.is_present("no-ac"); + let no_vec = matches.is_present("no-vec"); - // Run rewriter - eprintln!( - "Running egg with timeout {:?}s, width: {:?}", - timeout, - config::vector_width() - ); - let (cost, best) = rules::run(&prog, timeout, no_ac, no_vec); + // Run rewriter + eprintln!( + "Running egg with timeout {:?}s, width: {:?}", + timeout, + config::vector_width() + ); + let (cost, best) = rules::run(&prog, timeout, no_ac, no_vec); - println!("{}", best.pretty(80)); /* Pretty print with width 80 */ - eprintln!("\nCost: {}", cost); + println!("{}", best.pretty(80)); /* Pretty print with width 80 */ + eprintln!("\nCost: {}", cost); } #[cfg(test)] mod tests { - use super::{rules::*, veclang::VecLang}; - use assert_approx_eq::assert_approx_eq; - use egg::*; + use super::{rules::*, veclang::VecLang}; + use assert_approx_eq::assert_approx_eq; + use egg::*; - fn run_egpraph_with_start(prog: &str, exp_best: &str, exp_best_cost: f64) { - // AST conversion: boxed Rosette terms to Egg syntax - let converted: String = super::stringconversion::convert_string(&prog.to_string()) - .expect("Failed to convert the input file to egg AST."); + fn run_egpraph_with_start(prog: &str, exp_best: &str, exp_best_cost: f64) { + // AST conversion: boxed Rosette terms to Egg syntax + let converted: String = super::stringconversion::convert_string(&prog.to_string()) + .expect("Failed to convert the input file to egg AST."); - // Rewrite a list of expressions to a concatenation of vectors - let concats = super::rewriteconcats::list_to_concats(&converted); - let start = concats.unwrap().parse().unwrap(); + // Rewrite a list of expressions to a concatenation of vectors + let concats = super::rewriteconcats::list_to_concats(&converted); + let start = concats.unwrap().parse().unwrap(); - // Run with AC off - let (best_cost, best) = run(&start, 60, true, false); + // Run with AC off + let (best_cost, best) = run(&start, 60, true, false); - println!( - "original:\n{}\nbest:\n{}\nbest cost {}", - start.pretty(80), - best.pretty(80), - best_cost, - ); - if best != exp_best.parse().unwrap() { - println!( - "Expected best not equal:{}", - exp_best.parse::>().unwrap().pretty(80) - ); - } - assert_approx_eq!(best_cost, exp_best_cost, 0.000001); + println!( + "original:\n{}\nbest:\n{}\nbest cost {}", + start.pretty(80), + best.pretty(80), + best_cost, + ); + if best != exp_best.parse().unwrap() { + println!( + "Expected best not equal:{}", + exp_best.parse::>().unwrap().pretty(80) + ); } + assert_approx_eq!(best_cost, exp_best_cost, 0.000001); + } + + #[test] + fn simple_register_parse() { + let start = "(Vec (+ a b) (+ c d) 0 0)"; + let exp_best = "(VecAdd (Vec a c 0 0) (Vec b d 0 0))"; + let exp_best_cost = 1.208; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn direct_recexpr() { - let expr = RecExpr::from( - [VecLang::Symbol(Symbol::from("a_in")), - VecLang::Symbol(Symbol::from("b_in")), + #[test] + fn simple_register() { + let expr = RecExpr::from( + [ + VecLang::Symbol(Symbol::from("x1")), + VecLang::Num(1), + VecLang::Symbol(Symbol::from("x2")), + VecLang::Num(2), + VecLang::Add([Id::from(1), Id::from(3)]), + VecLang::Symbol(Symbol::from("x3")), + VecLang::Reg(1), + VecLang::Symbol(Symbol::from("x4")), + VecLang::Num(4), + VecLang::Add([Id::from(6), Id::from(8)]), + VecLang::Symbol(Symbol::from("x5")), + VecLang::Num(5), + VecLang::Symbol(Symbol::from("x6")), + VecLang::Reg(2), + VecLang::Add([Id::from(11), Id::from(13)]), + VecLang::Symbol(Symbol::from("x7")), + VecLang::Reg(3), + VecLang::Symbol(Symbol::from("x8")), + VecLang::Reg(4), + VecLang::Add([Id::from(16), Id::from(18)]), + VecLang::Vec(Box::new([ + Id::from(4), + Id::from(9), + Id::from(14), + Id::from(19), + ])), + ] + .to_vec(), + ); + let (_, new_expr) = super::rules::run(&expr, 180, false, false); + println!("Test1!"); + println!("{:?}", expr); + println!("{:?}", new_expr); + } + + #[test] + fn direct_recexpr() { + let expr = RecExpr::from( + [ + VecLang::Symbol(Symbol::from("a_in")), + VecLang::Symbol(Symbol::from("b_in")), VecLang::Num(0), VecLang::Num(1), VecLang::Num(2), VecLang::Num(3), - VecLang::Get([Id::from(0), Id::from(2)]), - VecLang::Get([Id::from(1), Id::from(2)]), - VecLang::Add([Id::from(6), Id::from(7)]), + VecLang::Get([Id::from(0), Id::from(2)]), + VecLang::Get([Id::from(1), Id::from(2)]), + VecLang::Add([Id::from(6), Id::from(7)]), VecLang::Get([Id::from(0), Id::from(3)]), - VecLang::Get([Id::from(1), Id::from(3)]), + VecLang::Get([Id::from(1), Id::from(3)]), VecLang::Add([Id::from(9), Id::from(10)]), VecLang::Get([Id::from(0), Id::from(4)]), - VecLang::Get([Id::from(1), Id::from(4)]), - VecLang::Add([Id::from(12), Id::from(13)]), + VecLang::Get([Id::from(1), Id::from(4)]), + VecLang::Add([Id::from(12), Id::from(13)]), VecLang::Get([Id::from(0), Id::from(5)]), - VecLang::Get([Id::from(1), Id::from(5)]), - VecLang::Add([Id::from(15), Id::from(16)]), - VecLang::Vec(Box::new([Id::from(8), Id::from(11), Id::from(14), Id::from(17)]))].to_vec() - ); - let (cost, _) = super::rules::run(&expr, 180, false, false); - assert_approx_eq!(cost, 1.026, 0.000001); - } + VecLang::Get([Id::from(1), Id::from(5)]), + VecLang::Add([Id::from(15), Id::from(16)]), + VecLang::Vec(Box::new([ + Id::from(8), + Id::from(11), + Id::from(14), + Id::from(17), + ])), + ] + .to_vec(), + ); + let (cost, _) = super::rules::run(&expr, 180, false, false); + assert_approx_eq!(cost, 1.026, 0.000001); + } - #[test] - fn direct_recexpr_2() { - let expr = RecExpr::from( - [ - VecLang::Symbol(Symbol::from("scalar_in")), - VecLang::Num(0), - VecLang::Get([Id::from(0), Id::from(1)]), - VecLang::Vec(Box::new([Id::from(2), Id::from(1), Id::from(1), Id::from(1)])), - VecLang::Symbol(Symbol::from("a_in")), - VecLang::Num(4), - VecLang::Num(5), - VecLang::Num(6), - VecLang::Num(7), - VecLang::Get([Id::from(4), Id::from(5)]), - VecLang::Get([Id::from(4), Id::from(6)]), - VecLang::Get([Id::from(4), Id::from(7)]), - VecLang::Get([Id::from(4), Id::from(8)]), - VecLang::Mul([Id::from(9), Id::from(2)]), - VecLang::Mul([Id::from(10), Id::from(2)]), - VecLang::Mul([Id::from(11), Id::from(2)]), - VecLang::Mul([Id::from(12), Id::from(2)]), - VecLang::Vec(Box::new([Id::from(13), Id::from(14), Id::from(15), Id::from(16)])), - VecLang::Concat([Id::from(3), Id::from(17)]) - ].to_vec() - ); + #[test] + fn direct_recexpr_2() { + let expr = RecExpr::from( + [ + VecLang::Symbol(Symbol::from("scalar_in")), + VecLang::Num(0), + VecLang::Get([Id::from(0), Id::from(1)]), + VecLang::Vec(Box::new([ + Id::from(2), + Id::from(1), + Id::from(1), + Id::from(1), + ])), + VecLang::Symbol(Symbol::from("a_in")), + VecLang::Num(4), + VecLang::Num(5), + VecLang::Num(6), + VecLang::Num(7), + VecLang::Get([Id::from(4), Id::from(5)]), + VecLang::Get([Id::from(4), Id::from(6)]), + VecLang::Get([Id::from(4), Id::from(7)]), + VecLang::Get([Id::from(4), Id::from(8)]), + VecLang::Mul([Id::from(9), Id::from(2)]), + VecLang::Mul([Id::from(10), Id::from(2)]), + VecLang::Mul([Id::from(11), Id::from(2)]), + VecLang::Mul([Id::from(12), Id::from(2)]), + VecLang::Vec(Box::new([ + Id::from(13), + Id::from(14), + Id::from(15), + Id::from(16), + ])), + VecLang::Concat([Id::from(3), Id::from(17)]), + ] + .to_vec(), + ); - let (cost, _) = super::rules::run(&expr, 180, false, false); - assert_approx_eq!(cost, 1.133, 0.000001); - } + let (cost, _) = super::rules::run(&expr, 180, false, false); + assert_approx_eq!(cost, 1.133, 0.000001); + } - #[test] - fn simple_vector_add() { - let start = "(Vec (+ a b) (+ c d) 0 0)"; - let exp_best = "(VecAdd (Vec a c 0 0) (Vec b d 0 0))"; - let exp_best_cost = 1.208; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + #[test] + fn simple_vector_add() { + let start = "(Vec (+ a b) (+ c d) 0 0)"; + let exp_best = "(VecAdd (Vec a c 0 0) (Vec b d 0 0))"; + let exp_best_cost = 1.208; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn vector_pairwise_mac() { - let start = " + #[test] + fn vector_pairwise_mac() { + let start = " (Vec (+ (* a b) (+ (* c d) (* e f))) (+ (* aa bb) (+ (* cc dd) (* ee ff))) 0 0)"; - let exp_best = " + let exp_best = " (VecMAC (VecMAC (VecMul (Vec c aa 0 0) (Vec d bb 0 0)) @@ -171,13 +237,13 @@ mod tests { (Vec f ff 0 0)) (Vec a cc 0 0) (Vec b dd 0 0))"; - let exp_best_cost = 3.624; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 3.624; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn qr_decomp_snippet() { - let start = " + #[test] + fn qr_decomp_snippet() { + let start = " (Vec (* (neg (sgn (Get A 0))) @@ -198,7 +264,7 @@ mod tests { (* (Get A 0) (Get A 0)) (* (Get A 2) (Get A 2))))) (Get A 2))"; - let _best_with_ac = " + let _best_with_ac = " (VecMul (VecNeg (Vec @@ -213,21 +279,21 @@ mod tests { (Vec (Get A 0) (Get A 0) (Get A 0) 1)) (Vec (Get A 2) (Get A 2) (Get A 2) 1) (Vec (Get A 2) (Get A 2) (Get A 2) 1))))"; - let exp_best_cost = 121.048; + let exp_best_cost = 121.048; - // No rewrites found with AC off - run_egpraph_with_start(start, start, exp_best_cost); - } + // No rewrites found with AC off + run_egpraph_with_start(start, start, exp_best_cost); + } - #[test] - fn vector_variadic_add_mac() { - let start = " + #[test] + fn vector_variadic_add_mac() { + let start = " (Vec (+ (* a b) (* c d) (* e f)) (+ (* aa bb) (* cc dd) (* ee ff)) 0 0)"; - let exp_best = " + let exp_best = " (VecMAC (VecMAC (VecMul (Vec e ee 0 0) (Vec f ff 0 0)) @@ -235,81 +301,81 @@ mod tests { (Vec d dd 0 0)) (Vec a aa 0 0) (Vec b bb 0 0))"; - let exp_best_cost = 3.624; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 3.624; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn vector_mac() { - let start = " + #[test] + fn vector_mac() { + let start = " (Vec (+ ?a0 (* ?b0 ?c0)) (+ ?a1 (* ?b1 ?c1)) (+ ?a2 (* ?b2 ?c2)) (+ ?a3 (* ?b3 ?c3)))"; - let exp_best = " + let exp_best = " (VecMAC (Vec ?a0 ?a1 ?a2 ?a3) (Vec ?b0 ?b1 ?b2 ?b3) (Vec ?c0 ?c1 ?c2 ?c3))"; - let exp_best_cost = 1.312; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 1.312; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn vector_mac_just_mul_or_zero() { - let start = " + #[test] + fn vector_mac_just_mul_or_zero() { + let start = " (Vec (+ ?a0 (* ?b0 ?c0)) (* ?b1 ?c1) 0 (+ ?a3 (* ?b3 ?c3)))"; - let exp_best = " + let exp_best = " (VecMAC (Vec ?a0 0 0 ?a3) (Vec ?b0 ?b1 0 ?b3) (Vec ?c0 ?c1 0 ?c3))"; - let exp_best_cost = 1.312; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 1.312; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn vector_matrix_multiply_2x2_2x2() { - let start = " + #[test] + fn vector_matrix_multiply_2x2_2x2() { + let start = " (Vec (+ (* v0 v4) (* v1 v6)) (+ (* v0 v5) (* v1 v7)) (+ (* v2 v4) (* v3 v6)) (+ (* v2 v5) (* v3 v7)))"; - let exp_best = " + let exp_best = " (VecMAC (VecMul (Vec v1 v1 v6 v7) (Vec v6 v7 v3 v3)) (Vec v4 v5 v2 v2) (Vec v0 v0 v4 v5))"; - let exp_best_cost = 2.416; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 2.416; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn vector_matrix_multiply_2x2_2x2_explicit_get() { - let start = " + #[test] + fn vector_matrix_multiply_2x2_2x2_explicit_get() { + let start = " (Vec (+ (* (Get a 0) (Get b 0)) (* (Get a 1) (Get b 2))) (+ (* (Get a 0) (Get b 1)) (* (Get a 1) (Get b 3))) (+ (* (Get a 2) (Get b 0)) (* (Get a 3) (Get b 2))) (+ (* (Get a 2) (Get b 1)) (* (Get a 3) (Get b 3))))"; - let exp_best = " + let exp_best = " (VecMAC (VecMul (LitVec (Get a 0) (Get a 0) (Get a 2) (Get a 2)) (LitVec (Get b 0) (Get b 1) (Get b 0) (Get b 1))) (LitVec (Get a 1) (Get a 1) (Get a 3) (Get a 3)) (LitVec (Get b 2) (Get b 3) (Get b 2) (Get b 3)))"; - let exp_best_cost = 2.052; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 2.052; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn vector_matrix_multiply_2x3_3x3() { - let start = " + #[test] + fn vector_matrix_multiply_2x3_3x3() { + let start = " (List (+ (* (Get A 0) (Get B 0)) @@ -335,7 +401,7 @@ mod tests { (* (Get A 3) (Get B 2)) (* (Get A 4) (Get B 5)) (* (Get A 5) (Get B 8))))"; - let exp_best = " + let exp_best = " (Concat (VecMAC (VecMAC @@ -353,13 +419,13 @@ mod tests { (LitVec (Get B 7) (Get B 8) 0 0)) (LitVec (Get A 3) (Get A 3) 0 0) (LitVec (Get B 1) (Get B 2) 0 0)))"; - let exp_best_cost = 6.43; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 6.43; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn vector_2d_conv_2x2_2x2() { - let start = " + #[test] + fn vector_2d_conv_2x2_2x2() { + let start = " (List (* v0 v4) (+ (* v0 v5) (* v1 v4)) @@ -370,7 +436,7 @@ mod tests { (* v2 v6) (+ (* v2 v7) (* v3 v6)) (* v3 v7))"; - let exp_best = " + let exp_best = " (Concat (VecMAC (VecMul (Vec 1 v0 1 v0) (Vec 0 v5 0 v6)) @@ -387,13 +453,13 @@ mod tests { (Vec v0 v3 v2 v3) (Vec v7 v5 v6 v6)) (VecMul (Vec v3 0 0 0) (Vec v7 0 0 0))))"; - let exp_best_cost = 9.656; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 9.656; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } - #[test] - fn vector_2d_conv_3x3_3x3() { - let start = "(Concat + #[test] + fn vector_2d_conv_3x3_3x3() { + let start = "(Concat (Vec (* v0 v9) (+ (* v0 v10) (* v1 v9)) @@ -462,7 +528,7 @@ mod tests { (+ (* v7 v17) (* v8 v16))) (List (* v8 v17))))))))"; - let exp_best = " + let exp_best = " (Vec (VecMAC (VecMAC @@ -544,7 +610,7 @@ mod tests { (List (* v8 v17))))))) 0 0)"; - let exp_best_cost = 156.468; - run_egpraph_with_start(start, exp_best, exp_best_cost); - } + let exp_best_cost = 156.468; + run_egpraph_with_start(start, exp_best, exp_best_cost); + } } diff --git a/src/dios-egraphs/src/permutestore.rs b/src/dios-egraphs/src/permutestore.rs new file mode 100644 index 00000000..0d6d5bad --- /dev/null +++ b/src/dios-egraphs/src/permutestore.rs @@ -0,0 +1,55 @@ +use crate::veclang::VecLang; +use egg::*; +use itertools::Itertools; + +/// Search for permutations of sequences of Loads and Stores that are Aligned and Consecutive +/// +/// This module creates an Applier, which attempts to find successful permutations of loads ands stores to be aligned and consecutive + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PermuteStore { + pub a0: Var, + pub a1: Var, + pub a2: Var, + pub a3: Var, + pub b0: Var, + pub b1: Var, + pub b2: Var, + pub b3: Var, +} + +impl> Applier for PermuteStore { + /// We generate all permutations of the vecstore + fn apply_one(&self, egraph: &mut EGraph, matched_id: Id, subst: &Subst) -> Vec { + let a0_id: Id = subst[self.a0]; + let a1_id: Id = subst[self.a1]; + let a2_id: Id = subst[self.a2]; + let a3_id: Id = subst[self.a3]; + let base0_id: Id = subst[self.a0]; + let base1_id: Id = subst[self.a1]; + let base2_id: Id = subst[self.a2]; + let base3_id: Id = subst[self.a3]; + + let original_list = vec![ + (a0_id, base0_id), + (a1_id, base1_id), + (a2_id, base2_id), + (a3_id, base3_id), + ]; + let perms = original_list.iter().permutations(4); + let mut new_vec_stores = vec![]; + for perm in perms { + let gep_vec_id = egraph.add(VecLang::Vec( + vec![perm[0].0, perm[1].0, perm[2].0, perm[3].0].into_boxed_slice(), + )); + let vec_store_node = + VecLang::VecStore([gep_vec_id, perm[0].1, perm[1].1, perm[2].1, perm[3].1]); + let vec_store_id = egraph.add(vec_store_node); + + // add in the shuffle + new_vec_stores.push(vec_store_id); + } + + new_vec_stores + } +} diff --git a/src/dios-egraphs/src/rules.rs b/src/dios-egraphs/src/rules.rs index a037f0f9..cc0d9356 100644 --- a/src/dios-egraphs/src/rules.rs +++ b/src/dios-egraphs/src/rules.rs @@ -3,10 +3,12 @@ use egg::{rewrite as rw, *}; use itertools::Itertools; use crate::{ + alignconsecsearcher::*, binopsearcher::build_binop_or_zero_rule, config::*, cost::VecCostFn, macsearcher::build_mac_rule, + permutestore::*, searchutils::*, veclang::{EGraph, VecLang}, }; @@ -67,7 +69,7 @@ pub fn run( .with_iter_limit(10_000) .run(&rules); - // print reason to STDERR. + // print reason to STDERR eprintln!( "Stopped after {} iterations, reason: {:?}", runner.iterations.len(), @@ -126,6 +128,158 @@ pub fn build_litvec_rule() -> Rewrite { if is_all_same_memory_or_zero(&mem_vars)) } +fn memory_is_aligned_and_consec2( + var1: &'static str, + var2: &'static str, + var3: &'static str, + var4: &'static str, +) -> impl Fn(&mut EGraph, Id, &Subst) -> bool { + let var1: Var = var1.parse().unwrap(); + let var2: Var = var2.parse().unwrap(); + let var3: Var = var3.parse().unwrap(); + let var4: Var = var4.parse().unwrap(); + move |egraph, _, subst| unsafe { + let mut first_base = -10; + for e in egraph[subst[var1]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + first_base = n; + } + } + assert!(first_base != -10); + let mut second_base = -10; + for e in egraph[subst[var2]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + second_base = n; + } + } + assert!(second_base != -10); + + if !(first_base == second_base) { + return false; + } + + let mut first_offset = -10; + for e in egraph[subst[var3]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + first_offset = n; + } + } + assert!(first_offset != -10); + let mut second_offset = -10; + for e in egraph[subst[var4]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + second_offset = n; + } + } + assert!(second_offset != -10); + + if !(first_offset + 1 == second_offset) { + return false; + } + if !(first_offset % 4 == 0) { + return false; + } + + return true; + } +} + +// This returns a function that implements Condition +fn memory_is_aligned_and_consec( + var1: &'static str, + var2: &'static str, + var3: &'static str, + var4: &'static str, + var5: &'static str, + var6: &'static str, + var7: &'static str, + var8: &'static str, +) -> impl Fn(&mut EGraph, Id, &Subst) -> bool { + let var1: Var = var1.parse().unwrap(); + let var2: Var = var2.parse().unwrap(); + let var3: Var = var3.parse().unwrap(); + let var4: Var = var4.parse().unwrap(); + let var5: Var = var5.parse().unwrap(); + let var6: Var = var6.parse().unwrap(); + let var7: Var = var7.parse().unwrap(); + let var8: Var = var8.parse().unwrap(); + move |egraph, _, subst| unsafe { + let mut first_base = -10; + for e in egraph[subst[var1]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + first_base = n; + } + } + assert!(first_base != -10); + let mut second_base = -10; + for e in egraph[subst[var2]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + second_base = n; + } + } + assert!(second_base != -10); + let mut third_base = -10; + for e in egraph[subst[var3]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + third_base = n; + } + } + assert!(third_base != -10); + let mut fourth_base = -10; + for e in egraph[subst[var4]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + fourth_base = n; + } + } + assert!(fourth_base != -10); + + if !(first_base == second_base && first_base == third_base && first_base == fourth_base) { + return false; + } + + let mut first_offset = -10; + for e in egraph[subst[var5]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + first_offset = n; + } + } + assert!(first_offset != -10); + let mut second_offset = -10; + for e in egraph[subst[var6]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + second_offset = n; + } + } + assert!(second_offset != -10); + let mut third_offset = -10; + for e in egraph[subst[var7]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + third_offset = n; + } + } + assert!(third_offset != -10); + let mut fourth_offset = -10; + for e in egraph[subst[var8]].nodes.as_slice().into_iter() { + if let VecLang::Num(n) = *e { + fourth_offset = n; + } + } + assert!(fourth_offset != -10); + + if !(first_offset + 1 == second_offset + && first_offset + 2 == third_offset + && first_offset + 3 == fourth_offset) + { + return false; + } + if !(first_offset % 4 == 0) { + return false; + } + + return true; + } +} + pub fn rules(no_ac: bool, no_vec: bool) -> Vec> { let mut rules: Vec> = vec![ rw!("add-0"; "(+ 0 ?a)" => "?a"), @@ -158,10 +312,30 @@ pub fn rules(no_ac: bool, no_vec: bool) -> Vec> { // Vector rules if !no_vec { rules.extend(vec![ + // Aligned Consec Load rule + rw!("vec-load-aligned-consec"; "(Vec (Load ?a0 ?b0 ?o0) (Load ?a1 ?b1 ?o1) (Load ?a2 ?b2 ?o2) (Load ?a3 ?b3 ?o3))" => "(AlignedConsecVecLoad ?a0)" if memory_is_aligned_and_consec("?b0", "?b1", "?b2", "?b3", "?o0", "?o1", "?o2", "?o3")), + // Load load fusion rule + rw!("vec-load-loads"; "(Vec (Load ?a0 ?b0 ?o0) (Load ?a1 ?b1 ?o1) (Load ?a2 ?b2 ?o2) (Load ?a3 ?b3 ?o3))" => "(VecLoad ?a0 ?a1 ?a2 ?a3 (DataVec ?b0 ?b1 ?b2 ?b3) (DataVec ?o0 ?o1 ?o2 ?o3))"), + // Set store fusion rule + rw!("vec-store-sets"; "(Vec (Store ?a0 ?b0) (Store ?a1 ?b1) (Store ?a2 ?b2) (Store ?a3 ?b3))" => "(VecStore (Vec ?a0 ?a1 ?a2 ?a3) ?b0 ?b1 ?b2 ?b3)"), + + // rw!("vec-store-permutations"; "(VecStore (Vec ?a0 ?a1 ?a2 ?a3) ?b0 ?b1 ?b2 ?b3)" => { PermuteStore { + // a0: "?a0".parse().unwrap(), + // a1: "?a1".parse().unwrap(), + // a2: "?a2".parse().unwrap(), + // a3: "?a3".parse().unwrap(), + // b0: "?b0".parse().unwrap(), + // b1: "?b1".parse().unwrap(), + // b2: "?b2".parse().unwrap(), + // b3: "?b3".parse().unwrap(), + // }}), + + // also should have split for into vecstore2 as well + // Special MAC fusion rule - rw!("vec-mac-add-mul"; - "(VecAdd ?v0 (VecMul ?v1 ?v2))" - => "(VecMAC ?v0 ?v1 ?v2)"), + // rw!("vec-mac-add-mul"; + // "(VecAdd ?v0 (VecMul ?v1 ?v2))" + // => "(VecMAC ?v0 ?v1 ?v2)"), // Custom searchers build_unop_rule("neg", "VecNeg"), build_unop_rule("sqrt", "VecSqrt"), @@ -170,7 +344,10 @@ pub fn rules(no_ac: bool, no_vec: bool) -> Vec> { build_binop_or_zero_rule("+", "VecAdd"), build_binop_or_zero_rule("*", "VecMul"), build_binop_or_zero_rule("-", "VecMinus"), - build_mac_rule(), + // build_mac_rule(), + + // rw!("intros-join"; "(Vec ?a ?b ?c ?d)" => "(Join (VecTwo ?a ?b) (VecTwo ?c ?d))"), + rw!("intros-aligned-vec-load2"; "(VecTwo (Load ?a0 ?b0 ?o0) (Load ?a1 ?b1 ?o1))" => "(AlignedConsecVecLoad2 ?a0)" if memory_is_aligned_and_consec2("?b0", "?b1", "?o0", "?o1")), ]); } else { eprintln!("Skipping vector rules") @@ -185,5 +362,50 @@ pub fn rules(no_ac: bool, no_vec: bool) -> Vec> { rw!("assoc-mul"; "(* (* ?a ?b) ?c)" => "(* ?a (* ?b ?c))"), ]); } + + // // Context Rules + // rules.extend(vec![ + // // rw!("commute-add-context"; "(Vec (+ ?a0 ?b0) (- ?a1 ?b1) (+ ?a2 ?b2) (- ?a3 ?b3))" => "(Shuffle (Vec (+ ?a0 ?b0) (+ ?a2 ?b2) (- ?a1 ?b1) (- ?a3 ?b3)) (DataVec 0 2 1 3))"), + // // rw!("commute-add-context"; "(Vec (- ?a0 ?b0) (+ ?a1 ?b1) (- ?a2 ?b2) (+ ?a3 ?b3))" => "(Shuffle (Vec (- ?a0 ?b0) (- ?a2 ?b2) (+ ?a1 ?b1) (+ ?a3 ?b3)) (DataVec 0 2 1 3))"), + + // rw!("commute-add-context"; "(Vec (+ ?a0 ?b0) (* ?a1 ?b1) (+ ?a2 ?b2) (* ?a3 ?b3))" => "(Shuffle (Vec (+ ?a0 ?b0) (+ ?a2 ?b2) (* ?a1 ?b1) (* ?a3 ?b3)) (DataVec 0 2 1 3))"), + // rw!("commute-add-context"; "(Vec (* ?a0 ?b0) (+ ?a1 ?b1) (* ?a2 ?b2) (+ ?a3 ?b3))" => "(Shuffle (Vec (* ?a0 ?b0) (* ?a2 ?b2) (+ ?a1 ?b1) (+ ?a3 ?b3)) (DataVec 0 2 1 3))"), + // ]); + + // Data Movement Rules + // shuffle rules + rules.extend(vec![ + // rw!("vec2-permutation"; "(VecTwo ?a ?b)" => "(VecTwo ?b ?a)"), + + // The below commented out rules are completely wrong and should never occur or be used. + // rw!("shuffle-op1"; "(Vec (VecAdd ?a ?b) (VecMinus ?c ?d) (VecAdd ?e ?f) (VecMinus ?g ?h))" => "(Shuffle (Vec (VecAdd ?a ?b) (VecAdd ?e ?f) (VecMinus ?c ?d) (VecMinus ?g ?h)) (DataVec 0 2 1 3))"), + + + // rw!("shuffle-op-A1M1A2M2-A1A2M1M2"; "(Vec (VecAdd ?a ?b) (VecMul ?c ?d) (VecAdd ?e ?f) (VecMul ?g ?h))" => "(Shuffle (Vec (VecAdd ?a ?b) (VecAdd ?e ?f) (VecMul ?c ?d) (VecMul ?g ?h)) (DataVec 0 2 1 3))"), + // rw!("shuffle-op-A1M1A2M2-A2A1M1M2"; "(Vec (VecAdd ?a ?b) (VecMul ?c ?d) (VecAdd ?e ?f) (VecMul ?g ?h))" => "(Shuffle (Vec (VecAdd ?e ?f) (VecAdd ?a ?b) (VecMul ?c ?d) (VecMul ?g ?h)) (DataVec 2 0 1 3))"), + // rw!("shuffle-op-A1M1A2M2-A1A2M2M1"; "(Vec (VecAdd ?a ?b) (VecMul ?c ?d) (VecAdd ?e ?f) (VecMul ?g ?h))" => "(Shuffle (Vec (VecAdd ?a ?b) (VecAdd ?e ?f) (VecMul ?g ?h) (VecMul ?c ?d)) (DataVec 0 2 3 1))"), + // rw!("shuffle-op-A1M1A2M2-A2A1M2M1"; "(Vec (VecAdd ?a ?b) (VecMul ?c ?d) (VecAdd ?e ?f) (VecMul ?g ?h))" => "(Shuffle (Vec (VecAdd ?e ?f) (VecAdd ?a ?b) (VecMul ?g ?h) (VecMul ?c ?d)) (DataVec 2 0 3 1))"), + // rw!("shuffle-op4"; "(Vec (VecAdd ?a ?b) (VecMul ?c ?d) (VecAdd ?e ?f) (VecMul ?g ?h))" => "(Shuffle (Vec (VecAdd ?e ?f) (VecAdd ?a ?b) (VecMul ?c ?d) (VecMul ?g ?h)) (DataVec 2 0 1 3))"), + // rw!("shuffle-op5"; "(Vec (VecMul ?a ?b) (VecAdd ?c ?d) (VecMul ?e ?f) (VecAdd ?g ?h))" => "(Shuffle (Vec (VecMul ?a ?b) (VecMul ?e ?f) (VecAdd ?c ?d) (VecAdd ?g ?h)) (DataVec 0 2 1 3))"), + + + // rw!("shuffle-load-vec"; "(Vec (Load ?a0 ?b0 ?o0) (Load ?a1 ?b1 ?o1) (Load ?a2 ?b2 ?o2) (Load ?a3 ?b3 ?o3))" => { PermuteLoad { + // a0: "?a0".parse().unwrap(), + // a1: "?a1".parse().unwrap(), + // a2: "?a2".parse().unwrap(), + // a3: "?a3".parse().unwrap(), + // b0: "?b0".parse().unwrap(), + // b1: "?b1".parse().unwrap(), + // b2: "?b2".parse().unwrap(), + // b3: "?b3".parse().unwrap(), + // o0: "?o0".parse().unwrap(), + // o1: "?o1".parse().unwrap(), + // o2: "?o2".parse().unwrap(), + // o3: "?o3".parse().unwrap(), + // }}), + ]); + + // split vec rules + rules } diff --git a/src/dios-egraphs/src/veclang-original.rs b/src/dios-egraphs/src/veclang-original.rs new file mode 100644 index 00000000..ee056f17 --- /dev/null +++ b/src/dios-egraphs/src/veclang-original.rs @@ -0,0 +1,59 @@ +use egg::*; + +define_language! { + pub enum VecLang { + Num(i32), + + // Id is a key to identify EClasses within an EGraph, represents + // children nodes + "+" = Add([Id; 2]), + "*" = Mul([Id; 2]), + "-" = Minus([Id; 2]), + "/" = Div([Id; 2]), + + "or" = Or([Id; 2]), + "&&" = And([Id; 2]), + "ite" = Ite([Id; 3]), + "<" = Lt([Id; 2]), + + "sgn" = Sgn([Id; 1]), + "sqrt" = Sqrt([Id; 1]), + "neg" = Neg([Id; 1]), + + // Lists have a variable number of elements + "List" = List(Box<[Id]>), + + // Vectors have width elements + "Vec" = Vec(Box<[Id]>), + + // Vector with all literals + "LitVec" = LitVec(Box<[Id]>), + + "Get" = Get([Id; 2]), + + // Used for partitioning and recombining lists + "Concat" = Concat([Id; 2]), + + // Vector operations that take 2 vectors of inputs + "VecAdd" = VecAdd([Id; 2]), + "VecMinus" = VecMinus([Id; 2]), + "VecMul" = VecMul([Id; 2]), + "VecDiv" = VecDiv([Id; 2]), + // "VecMulSgn" = VecMulSgn([Id; 2]), + + // Vector operations that take 1 vector of inputs + "VecNeg" = VecNeg([Id; 1]), + "VecSqrt" = VecSqrt([Id; 1]), + "VecSgn" = VecSgn([Id; 1]), + + // MAC takes 3 lists: acc, v1, v2 + "VecMAC" = VecMAC([Id; 3]), + + // language items are parsed in order, and we want symbol to + // be a fallback, so we put it last. + // `Symbol` is an egg-provided interned string type + Symbol(egg::Symbol), + } +} + +pub type EGraph = egg::EGraph; diff --git a/src/dios-egraphs/src/veclang.rs b/src/dios-egraphs/src/veclang.rs index ee056f17..9bbfe116 100644 --- a/src/dios-egraphs/src/veclang.rs +++ b/src/dios-egraphs/src/veclang.rs @@ -4,6 +4,23 @@ define_language! { pub enum VecLang { Num(i32), + // Register points to other computation, denoted by a number + Reg(u32), + + // Argument points to a argument, denoted by a number + Arg(u32), + + Gep(u32), + + // Load is a read of memory + // The FIRST subelement is the ID of the parent of this load + // The SECOND subelement is a ID of the base of the array where the load occurs + // The THIRD subelement is the offset from the base of the array. Offsets are in number of floats away from base. + "Load" = Load([Id; 3]), + + // Store is a write to memory + "Store" = Store([Id; 2]), + // Id is a key to identify EClasses within an EGraph, represents // children nodes "+" = Add([Id; 2]), @@ -24,13 +41,21 @@ define_language! { "List" = List(Box<[Id]>), // Vectors have width elements + "VecTwo" = VecTwo(Box<[Id]>), "Vec" = Vec(Box<[Id]>), + "DataVec" = DataVec(Box<[Id]>), + + // Vectors have width elements, not to be optimized (for testing purposes) + "NoOptVec" = NoOptVec(Box<[Id]>), // Vector with all literals "LitVec" = LitVec(Box<[Id]>), "Get" = Get([Id; 2]), + // Set is a modification of memory + "Set" = Set([Id; 3]), + // Used for partitioning and recombining lists "Concat" = Concat([Id; 2]), @@ -49,6 +74,20 @@ define_language! { // MAC takes 3 lists: acc, v1, v2 "VecMAC" = VecMAC([Id; 3]), + "VecLoad" = VecLoad([Id; 6]), + + "AlignedConsecVecLoad2" = AlignedConsecVecLoad2([Id; 1]), + "AlignedConsecVecLoad" = AlignedConsecVecLoad([Id; 1]), + + "VecStore" = VecStore([Id; 5]), + + "Shuffle" = Shuffle([Id; 2]), + + "Join" = Join([Id; 2]), + + // Info specific to register + // RegInfo(egg::Symbol), + // language items are parsed in order, and we want symbol to // be a fallback, so we put it last. // `Symbol` is an egg-provided interned string type diff --git a/web-demo/frontend/custom-js/yarn.lock b/web-demo/frontend/custom-js/yarn.lock index 05139550..054e2950 100644 --- a/web-demo/frontend/custom-js/yarn.lock +++ b/web-demo/frontend/custom-js/yarn.lock @@ -748,9 +748,9 @@ path-is-absolute@^1.0.0, path-is-absolute@^1.0.1: integrity sha1-F0uSaHNVNP+8es5r9TpanhtcX18= path-parse@^1.0.6: - version "1.0.6" - resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.6.tgz#d62dbb5679405d72c4737ec58600e9ddcf06d24c" - integrity sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw== + version "1.0.7" + resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" + integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== path-platform@~0.11.15: version "0.11.15"