From b9c0e371b10be3ab001d1df18019d2628c953079 Mon Sep 17 00:00:00 2001 From: ergo720 <45463469+ergo720@users.noreply.github.com> Date: Sun, 1 Sep 2024 23:40:20 +0200 Subject: [PATCH] Reworked FILD, FST(P), FIST(P) --- lib86cpu/core/emitter/x64/jit.cpp | 509 ++++++++++++++++-------------- lib86cpu/core/emitter/x64/jit.h | 11 +- lib86cpu/core/fpu.cpp | 33 +- lib86cpu/core/fpu.h | 6 +- lib86cpu/core/internal.h | 18 +- 5 files changed, 315 insertions(+), 262 deletions(-) diff --git a/lib86cpu/core/emitter/x64/jit.cpp b/lib86cpu/core/emitter/x64/jit.cpp index 7e41f81..e7cd073 100644 --- a/lib86cpu/core/emitter/x64/jit.cpp +++ b/lib86cpu/core/emitter/x64/jit.cpp @@ -512,12 +512,7 @@ static_assert((LOCAL_VARS_off(0) & 15) == 0); // must be 16 byte aligned so that TEST(EAX, EAX) #define FPU_CLEAR_C1() AND(MEMD16(RCX, CPU_CTX_FSTATUS), ~FPU_FLG_C1) #define FPU_PUSH() DEC(MEMD16(RCX, FPU_DATA_FTOP)); AND(MEMD16(RCX, FPU_DATA_FTOP), 7) -#define FPU_LOAD_STX(x) MOVZX(EDX, MEMD16(RCX, FPU_DATA_FTOP)); \ - ADD(EDX, (x)); \ - AND(EDX, 7); \ - MOV(EAX, sizeof(uint80_t)); \ - MUL(DX); \ - FLD(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)) +#define FPU_POP() INC(MEMD16(RCX, FPU_DATA_FTOP)); AND(MEMD16(RCX, FPU_DATA_FTOP), 7) lc86_jit::lc86_jit(cpu_t *cpu) @@ -2371,6 +2366,7 @@ void lc86_jit::gen_fpu_exp_post_check(uint32_t exception, T &&unmasked) OR(MEMD16(RCX, CPU_CTX_FSTATUS), AX); // update exception and condition code flags of guest fstatus TEST(DX, exception); // test if exceptions of interest are unmasked BR_EQ(masked); + OR(MEMD16(RCX, CPU_CTX_FSTATUS), FPU_FLG_ES); unmasked(); m_a.bind(masked); } @@ -2413,10 +2409,24 @@ lc86_jit::gen_update_fpu_ptr(decoded_instr *instr, x86::Gp mem_addr64) } void -lc86_jit::gen_fpu_exp(uint32_t exception, stack_fault_func func) +lc86_jit::gen_fpu_stack_fault(uint32_t exception) { MOV(EDX, exception); - CALL_F(func); + CALL_F(&fpu_stack_fault); +} + +void +lc86_jit::gen_fpu_stack_overflow() +{ + CALL_F(&fpu_stack_overflow); +} + +void +lc86_jit::gen_fpu_stack_underflow(uint32_t st_num, uint32_t should_pop) +{ + MOV(EDX, st_num); + MOV(R8D, should_pop); + CALL_F(&fpu_stack_underflow); } void @@ -2434,27 +2444,15 @@ lc86_jit::gen_check_fpu_unmasked_exp() m_a.bind(no_exp); } -template -void lc86_jit::gen_fpu_stack_prologue(fpu_instr_t fpu_instr, T &&action_when_no_fault) +void +lc86_jit::gen_fpu_load_stx(uint32_t st_num) { - /* - action_when_no_fault = code to run when no stack fault is detected. Typically, loads a value to host st0 - stack4 = ftop after push or before pop - r8w = fstatus after stack fault check (original if no fault or with updated ie/sf/c1 flags if it faulted) - */ - - Label stack_fault = m_a.newLabel(), ok = m_a.newLabel(); - MOV(MEMD64(RSP, LOCAL_VARS_off(0)), 0); - gen_fpu_stack_fault_check(fpu_instr); - gen_set_host_fpu_ctx(); - MOV(MEMD32(RSP, LOCAL_VARS_off(4)), EAX); // save ftop to stack4 - TEST(MEMD64(RSP, LOCAL_VARS_off(0)), 0); // if not zero, then stack fault - BR_NE(stack_fault); - action_when_no_fault(); - BR_UNCOND(ok); - m_a.bind(stack_fault); - FLD(MEMD32(RSP, LOCAL_VARS_off(0))); // load indefinite value in host st0 - m_a.bind(ok); + MOVZX(EDX, MEMD16(RCX, FPU_DATA_FTOP)); + ADD(EDX, st_num); + AND(EDX, 7); + MOV(EAX, sizeof(uint80_t)); + MUL(DX); + FLD(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); } template @@ -3606,6 +3604,245 @@ void lc86_jit::float_load_constant(decoded_instr *instr) #endif } +template +void lc86_jit::float_store(decoded_instr *instr) +{ + // idx 0 -> fst(p), 1 -> fist(p) + + if (m_cpu->cpu_ctx.hflags & (HFLG_CR0_EM | HFLG_CR0_TS)) { + RAISEin0_t(EXP_NM); + } + else { + uint32_t should_pop = (instr->i.mnemonic == ZYDIS_MNEMONIC_FSTP) || + (instr->i.mnemonic == ZYDIS_MNEMONIC_FISTP); + Label ok = m_a.newLabel(), end_instr = m_a.newLabel(); + + gen_check_fpu_unmasked_exp(); + const auto stack_fault_check = [&]() { + gen_update_fpu_ptr(instr); + FPU_CLEAR_C1(); + FPU_IS_TAG_EMPTY(0); // check for stack underflow for src st0 + BR_EQ(ok); + }; + + get_rm(instr, + [&](const op_info rm) + { + assert(idx == 0); + + stack_fault_check(); + gen_fpu_stack_underflow(0, should_pop); + BR_UNCOND(end_instr); + m_a.bind(ok); + gen_set_host_fpu_ctx(); + gen_fpu_load_stx(0); // load src st0 + MOV(EDX, instr->i.raw.modrm.rm); + MOV(EAX, sizeof(uint80_t)); + MUL(DX); + FSTP(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); // store src st0 to dst stx + RESTORE_FPU_CTX(); + }, + [&](const op_info rm) + { + Label masked = m_a.newLabel(), do_store = m_a.newLabel(); + uint64_t qnan_low; + uint16_t size, qnan_high = 0; + switch (instr->o[OPNUM_SINGLE].size) + { + case 16: + assert(idx == 1); + size = SIZE16; + qnan_low = FPU_QNAN_INT16; + break; + + case 32: + size = SIZE32; + qnan_low = idx == 1 ? FPU_QNAN_INT32 : FPU_QNAN_FLOAT32; + break; + + case 64: + size = SIZE64; + qnan_low = idx == 1 ? FPU_QNAN_INT64 : FPU_QNAN_FLOAT64; + break; + + case 80: + assert(idx == 0); + size = SIZE80; + qnan_low = FPU_QNAN_FLOAT80_LOW; + qnan_high = FPU_QNAN_FLOAT80_HIGH; + break; + + default: + LIB86CPU_ABORT(); + } + auto r8_host_reg = SIZED_REG(x64::r8, size); + MOV(EBX, EDX); // save mem addr for gen_update_fpu_ptr and ST_MEMs + stack_fault_check(); + gen_fpu_stack_fault(FPU_STACK_UNDERFLOW); + TEST(MEMD16(RCX, CPU_CTX_FCTRL), FPU_EXP_INVALID); + BR_NE(masked); + BR_UNCOND(end_instr); + m_a.bind(masked); // if masked, store a qnan + if (size != SIZE80) { + MOV(r8_host_reg, size == SIZE64 ? FPU_QNAN_FLOAT64 : FPU_QNAN_FLOAT32); + } + else { + MOV(MEMD64(RSP, LOCAL_VARS_off(0)), qnan_low); + MOV(MEMD64(RSP, LOCAL_VARS_off(1)), qnan_high); + LEA(R8, MEMD64(RSP, LOCAL_VARS_off(0))); + } + BR_UNCOND(do_store); + m_a.bind(ok); + gen_set_host_fpu_ctx(); + gen_fpu_load_stx(0); // load src st0 + if constexpr (idx == 0) { + FSTP(MEMD(RSP, LOCAL_VARS_off(0), size)); + } + else if constexpr (idx == 1) { + FISTP(MEMD(RSP, LOCAL_VARS_off(0), size)); + } + else { + LIB86CPU_ABORT(); + } + gen_fpu_exp_post_check(FPU_EXP_ALL, [this, end_instr]() { + RESTORE_FPU_CTX(); + BR_UNCOND(end_instr); + }); + if (size != SIZE80) { + MOV(r8_host_reg, MEMD(RSP, LOCAL_VARS_off(0), size)); + } + else { + LEA(R8, MEMD64(RSP, LOCAL_VARS_off(0))); + } + RESTORE_FPU_CTX(); + m_a.bind(do_store); + MOV(EDX, EBX); + ST_MEMs(r8_host_reg, size); // store src st0 to dst mem + }); + + if (should_pop) { + FPU_POP(); + XOR(EDX, EDX); + CALL_F(&fpu_update_tag); // update src st0 tag + } + m_a.bind(end_instr); + } +} + +template +void lc86_jit::float_load(decoded_instr *instr) +{ + // idx 0 -> fld, 1 -> fild + + if (m_cpu->cpu_ctx.hflags & (HFLG_CR0_EM | HFLG_CR0_TS)) { + RAISEin0_t(EXP_NM); + } + else { + gen_check_fpu_unmasked_exp(); + Label end_instr = m_a.newLabel(); + const auto stack_fault_check = [&]() { + Label ok = m_a.newLabel(); + FPU_IS_TAG_EMPTY(-1); // check for stack overflow of dst st0 + BR_NE(ok); + gen_fpu_stack_overflow(); + BR_UNCOND(end_instr); + m_a.bind(ok); + }; + + get_rm(instr, + [this, instr, end_instr, &stack_fault_check](const op_info rm) + { + assert(idx == 0); + + Label ok = m_a.newLabel(), masked = m_a.newLabel(), do_push = m_a.newLabel(); + unsigned stx = instr->i.raw.modrm.rm; + gen_update_fpu_ptr(instr); + FPU_CLEAR_C1(); + stack_fault_check(); + FPU_IS_TAG_EMPTY(stx); // check for stack underflow for src stx + BR_EQ(ok); + gen_fpu_stack_fault(FPU_STACK_UNDERFLOW); + TEST(MEMD16(RCX, CPU_CTX_FCTRL), FPU_FLG_IE); + BR_NE(masked); + BR_UNCOND(end_instr); + m_a.bind(masked); // if masked, load a qnan + gen_set_host_fpu_ctx(); + MOV(MEMD64(RSP, LOCAL_VARS_off(0)), FPU_QNAN_FLOAT80_LOW); + MOV(MEMD64(RSP, LOCAL_VARS_off(1)), FPU_QNAN_FLOAT80_HIGH); + FLD(MEMD80(RSP, LOCAL_VARS_off(0))); // load qnan + BR_UNCOND(do_push); + m_a.bind(ok); + gen_set_host_fpu_ctx(); + gen_fpu_load_stx(instr->i.raw.modrm.rm); // load src stx + m_a.bind(do_push); + FPU_PUSH(); + MOV(EAX, sizeof(uint80_t)); + MUL(MEMD16(RCX, FPU_DATA_FTOP)); + FSTP(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); // store src stx or qnan to dst st0 + }, + [this, instr, end_instr, &stack_fault_check](const op_info rm) + { + uint8_t size; + switch (instr->o[OPNUM_SINGLE].size) + { + case 16: + assert(idx == 1); + size = SIZE16; + break; + + case 32: + size = SIZE32; + break; + + case 64: + size = SIZE64; + break; + + case 80: + assert(idx == 0); + size = SIZE80; + break; + + default: + LIB86CPU_ABORT(); + } + auto rax_host_reg = SIZED_REG(x64::rax, size); + MOV(EBX, EDX); // save mem addr for gen_update_fpu_ptr + LD_MEMs(size); // load src mem + if (size != SIZE80) { + MOV(MEMD(RSP, LOCAL_VARS_off(0), size), rax_host_reg); + } + gen_update_fpu_ptr(instr); + FPU_CLEAR_C1(); + stack_fault_check(); + gen_set_host_fpu_ctx(); + if constexpr (idx == 0) { + FLD(MEMD(RSP, LOCAL_VARS_off(0), size)); + gen_fpu_exp_post_check(FPU_EXP_INVALID, [this, instr, end_instr, size]() { + FSTP(MEMD(RSP, LOCAL_VARS_off(0), size)); // do a dummy pop to restore host fpu stack + RESTORE_FPU_CTX(); + BR_UNCOND(end_instr); + }); + } + else if constexpr (idx == 1) { + FILD(MEMD(RSP, LOCAL_VARS_off(0), size)); + } + else { + LIB86CPU_ABORT(); + } + FPU_PUSH(); + MOV(EAX, sizeof(uint80_t)); + MUL(MEMD16(RCX, FPU_DATA_FTOP)); + FSTP(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); // store src mem to dst st0 + }); + + RESTORE_FPU_CTX(); + XOR(EDX, EDX); + CALL_F(&fpu_update_tag); // update dst st0 tag + m_a.bind(end_instr); + } +} + void lc86_jit::aaa(decoded_instr *instr) { @@ -5285,171 +5522,19 @@ lc86_jit::enter(decoded_instr *instr) void lc86_jit::fild(decoded_instr *instr) { - LIB86CPU_ABORT(); -#if 0 - if (m_cpu->cpu_ctx.hflags & (HFLG_CR0_EM | HFLG_CR0_TS)) { - RAISEin0_t(EXP_NM); - } - else { - get_rm(instr, - [](const op_info rm) - { - assert(0); - }, - [this, instr](const op_info rm) - { - uint8_t size_mode = instr->i.opcode == 0xDB ? SIZE32 : (instr->i.raw.modrm.reg == 5 ? SIZE64 : SIZE16); - fpu_instr_t fpu_instr = instr->i.opcode == 0xDB ? fpu_instr_t::integer32 : (instr->i.raw.modrm.reg == 5 ? fpu_instr_t::integer64 : fpu_instr_t::integer16); - LD_MEMs(size_mode); - MOV(MEMD32(RSP, LOCAL_VARS_off(0)), EAX); - MOV(R9D, fpu_instr); - LEA(R8, MEMD64(RSP, LOCAL_VARS_off(0))); - LEA(RDX, MEMD64(RSP, LOCAL_VARS_off(1))); - CALL_F((&fpu_stack_check)); - MOV(EBX, EAX); - MOV(EDX, EAX); - MOV(EAX, sizeof(uint80_t)); - MUL(DX); - EMMS(); - FILD(MEMD(RSP, LOCAL_VARS_off(0), size_mode)); - FSTP(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); - MOV(AX, MEMD16(RSP, LOCAL_VARS_off(1))); - ST_R16(CPU_CTX_FSTATUS, AX); - ST_R16(FPU_DATA_FTOP, BX); - MOV(EDX, EBX); - CALL_F(&fpu_update_tag); - }); - } -#endif + float_load<1>(instr); } void lc86_jit::fistp(decoded_instr *instr) { - LIB86CPU_ABORT(); -#if 0 - if (m_cpu->cpu_ctx.hflags & (HFLG_CR0_EM | HFLG_CR0_TS)) { - RAISEin0_t(EXP_NM); - } - else { - bool should_pop = instr->i.mnemonic == ZYDIS_MNEMONIC_FISTP; - fpu_instr_t fpu_instr = instr->i.opcode == 0xDB ? fpu_instr_t::integer32 : (instr->i.raw.modrm.reg == 7 ? fpu_instr_t::integer64 : fpu_instr_t::integer16); - if (should_pop) { - gen_fpu_stack_prologue(fpu_instr, [this]() { - MOV(EAX, sizeof(uint80_t)); - MUL(MEMD16(RSP, LOCAL_VARS_off(4))); - FLD(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); // load guest st0 to host st0 - }); - } - else { - XOR(R8D, R8D); // clear r8w so that gen_fpu_exp_post_check still works - } - - get_rm(instr, - [](const op_info rm) - { - assert(0); - }, - [this, instr](const op_info rm) - { - uint8_t size = instr->i.opcode == 0xDB ? SIZE32 : (instr->i.raw.modrm.reg == 7 ? SIZE64 : SIZE16); - auto r8_host_reg = SIZED_REG(x64::r8, size); - FISTP(MEMD(RSP, LOCAL_VARS_off(0), size)); - gen_fpu_exp_post_check(); - MOV(r8_host_reg, MEMD(RSP, LOCAL_VARS_off(0), size)); - ST_MEMs(r8_host_reg, size); - gen_update_fpu_ptr(instr); - }); - - RESTORE_FPU_CTX(); - if (should_pop) { - MOV(EDX, MEMD32(RSP, LOCAL_VARS_off(4))); - LEA(EBX, MEMD32(EDX, 1)); - AND(EBX, 7); - ST_R16(FPU_DATA_FTOP, BX); - CALL_F(&fpu_update_tag); - } - } -#endif + float_store<1>(instr); } void lc86_jit::fld(decoded_instr *instr) { - if (m_cpu->cpu_ctx.hflags & (HFLG_CR0_EM | HFLG_CR0_TS)) { - RAISEin0_t(EXP_NM); - } - else { - gen_check_fpu_unmasked_exp(); - Label end_instr = m_a.newLabel(); - const auto stack_fault_check = [&]() { - Label ok = m_a.newLabel(); - FPU_IS_TAG_EMPTY(-1); // check for stack overflow of dst st0 - BR_NE(ok); - gen_fpu_exp(FPU_STACK_OVERFLOW, &fpu_stack_overflow); - BR_UNCOND(end_instr); - m_a.bind(ok); - }; - - get_rm(instr, - [this, instr, end_instr, &stack_fault_check](const op_info rm) - { - Label ok = m_a.newLabel(), masked = m_a.newLabel(), do_push = m_a.newLabel(); - unsigned stx = instr->i.raw.modrm.rm; - gen_update_fpu_ptr(instr); - FPU_CLEAR_C1(); - stack_fault_check(); - FPU_IS_TAG_EMPTY(stx); // check for stack underflow for src stx - BR_EQ(ok); - gen_fpu_exp(FPU_STACK_UNDERFLOW, &fpu_stack_fault); - TEST(MEMD16(RCX, CPU_CTX_FCTRL), FPU_FLG_IE); - BR_NE(masked); - BR_UNCOND(end_instr); - m_a.bind(masked); // if masked, load a qnan - gen_set_host_fpu_ctx(); - MOV(MEMD64(RSP, LOCAL_VARS_off(0)), FPU_QNAN_FLOAT_INDEFINITE64); - MOV(MEMD64(RSP, LOCAL_VARS_off(1)), FPU_QNAN_FLOAT_INDEFINITE16); - FLD(MEMD80(RSP, LOCAL_VARS_off(0))); // load qnan - BR_UNCOND(do_push); - m_a.bind(ok); - gen_set_host_fpu_ctx(); - FPU_LOAD_STX(instr->i.raw.modrm.rm); // load src stx - m_a.bind(do_push); - FPU_PUSH(); - MOV(EAX, sizeof(uint80_t)); - MUL(MEMD16(RCX, FPU_DATA_FTOP)); - FSTP(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); // store src stx or qnan to dst st0 - }, - [this, instr, end_instr, &stack_fault_check](const op_info rm) - { - uint8_t size = instr->i.opcode == 0xD9 ? SIZE32 : (instr->i.opcode == 0xDD ? SIZE64 : SIZE80); - auto rax_host_reg = SIZED_REG(x64::rax, size); - MOV(EBX, EDX); // save mem addr for gen_fpu_exp - LD_MEMs(size); // load src mem - if (size != SIZE80) { - MOV(MEMD(RSP, LOCAL_VARS_off(0), size), rax_host_reg); - } - gen_update_fpu_ptr(instr); - FPU_CLEAR_C1(); - stack_fault_check(); - gen_set_host_fpu_ctx(); - FLD(MEMD(RSP, LOCAL_VARS_off(0), size)); - gen_fpu_exp_post_check(FPU_EXP_INVALID, [this, instr, end_instr, size]() { - FSTP(MEMD(RSP, LOCAL_VARS_off(0), size)); // do a dummy pop to restore host fpu stack - OR(MEMD16(RCX, CPU_CTX_FSTATUS), FPU_FLG_ES); - BR_UNCOND(end_instr); - }); - FPU_PUSH(); - MOV(EAX, sizeof(uint80_t)); - MUL(MEMD16(RCX, FPU_DATA_FTOP)); - FSTP(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); // store src mem to dst st0 - }); - - RESTORE_FPU_CTX(); - XOR(EDX, EDX); - CALL_F(&fpu_update_tag); // update dst st0 tag - m_a.bind(end_instr); - } + float_load<0>(instr); } void @@ -5606,63 +5691,7 @@ lc86_jit::fnstsw(decoded_instr *instr) void lc86_jit::fstp(decoded_instr *instr) { - LIB86CPU_ABORT(); -#if 0 - if (m_cpu->cpu_ctx.hflags & (HFLG_CR0_EM | HFLG_CR0_TS)) { - RAISEin0_t(EXP_NM); - } - else { - bool should_pop = instr->i.mnemonic == ZYDIS_MNEMONIC_FSTP; - if (should_pop) { - gen_fpu_stack_prologue(fpu_instr_t::float_, [this]() { - MOV(EAX, sizeof(uint80_t)); - MUL(MEMD16(RSP, LOCAL_VARS_off(4))); - FLD(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); // load src st0 - }); - } - else { - MOV(EAX, sizeof(uint80_t)); - MUL(MEMD16(RCX, FPU_DATA_FTOP)); - FLD(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); // load src st0 - XOR(R8D, R8D); // clear r8w so that gen_fpu_exp_post_check still works - } - - get_rm(instr, - [this, instr](const op_info rm) - { - MOV(EDX, instr->i.raw.modrm.rm); - MOV(EAX, sizeof(uint80_t)); - MUL(DX); - FSTP(MEMSD80(RCX, RAX, 0, CPU_CTX_R0)); - gen_fpu_exp_post_check(); - gen_update_fpu_ptr(instr); - }, - [this, instr](const op_info rm) - { - uint8_t size = instr->i.opcode == 0xD9 ? SIZE32 : (instr->i.opcode == 0xDD ? SIZE64 : SIZE80); - auto r8_host_reg = SIZED_REG(x64::r8, size); - FSTP(MEMD(RSP, LOCAL_VARS_off(0), size)); - gen_fpu_exp_post_check(); - if (size != SIZE80) { - MOV(r8_host_reg, MEMD(RSP, LOCAL_VARS_off(0), size)); - } - else { - LEA(R8, MEMD64(RSP, LOCAL_VARS_off(0))); - } - ST_MEMs(r8_host_reg, size); - gen_update_fpu_ptr(instr); - }); - - RESTORE_FPU_CTX(); - if (should_pop) { - MOV(EDX, MEMD32(RSP, LOCAL_VARS_off(4))); - LEA(EBX, MEMD32(EDX, 1)); - AND(EBX, 7); - ST_R16(FPU_DATA_FTOP, BX); - CALL_F(&fpu_update_tag); - } - } -#endif + float_store<0>(instr); } void diff --git a/lib86cpu/core/emitter/x64/jit.h b/lib86cpu/core/emitter/x64/jit.h index 8cbe2c8..7593954 100644 --- a/lib86cpu/core/emitter/x64/jit.h +++ b/lib86cpu/core/emitter/x64/jit.h @@ -284,10 +284,15 @@ class lc86_jit : public Target { void int_(decoded_instr *instr); template void float_load_constant(decoded_instr *instr); - template - void gen_fpu_stack_prologue(fpu_instr_t fpu_instr, T &&action_when_no_fault); - void gen_fpu_exp(uint32_t exception, stack_fault_func func); + template + void float_store(decoded_instr *instr); + template + void float_load(decoded_instr *instr); + void gen_fpu_stack_fault(uint32_t exception); + void gen_fpu_stack_overflow(); + void gen_fpu_stack_underflow(uint32_t st_num, uint32_t should_pop); void gen_check_fpu_unmasked_exp(); + void gen_fpu_load_stx(uint32_t st_num); cpu_t *m_cpu; CodeHolder m_code; diff --git a/lib86cpu/core/fpu.cpp b/lib86cpu/core/fpu.cpp index a590228..921fe27 100644 --- a/lib86cpu/core/fpu.cpp +++ b/lib86cpu/core/fpu.cpp @@ -26,6 +26,13 @@ fpu_push(cpu_ctx_t *cpu_ctx) cpu_ctx->fpu_data.ftop = (cpu_ctx->fpu_data.ftop - 1) & 7; } +static void +fpu_pop(cpu_ctx_t *cpu_ctx) +{ + cpu_ctx->regs.ftags[cpu_ctx->fpu_data.ftop] = FPU_TAG_EMPTY; + cpu_ctx->fpu_data.ftop = (cpu_ctx->fpu_data.ftop + 1) & 7; +} + template void fpu_update_tag(cpu_ctx_t *cpu_ctx, uint32_t st_num) { @@ -88,23 +95,35 @@ fpu_stack_fault(cpu_ctx_t *cpu_ctx, uint32_t exception) } void -fpu_stack_overflow(cpu_ctx_t *cpu_ctx, uint32_t exception) +fpu_stack_overflow(cpu_ctx_t *cpu_ctx) { if (cpu_ctx->regs.fctrl & FPU_EXP_INVALID) { // masked stack fault response fpu_push(cpu_ctx); - cpu_ctx->regs.fr[cpu_ctx->fpu_data.ftop].low = FPU_QNAN_FLOAT_INDEFINITE64; - cpu_ctx->regs.fr[cpu_ctx->fpu_data.ftop].high = FPU_QNAN_FLOAT_INDEFINITE16; - fpu_update_tag(cpu_ctx, 0); + uint32_t idx = cpu_ctx->fpu_data.ftop; + cpu_ctx->regs.fr[idx].low = FPU_QNAN_FLOAT80_LOW; + cpu_ctx->regs.fr[idx].high = FPU_QNAN_FLOAT80_HIGH; + cpu_ctx->regs.ftags[idx] = FPU_TAG_SPECIAL; } - fpu_stack_fault(cpu_ctx, exception); + fpu_stack_fault(cpu_ctx, FPU_STACK_OVERFLOW); } void -fpu_stack_underflow(cpu_ctx_t *cpu_ctx, uint32_t exception) +fpu_stack_underflow(cpu_ctx_t *cpu_ctx, uint32_t st_num, uint32_t should_pop) { - // TODO + if (cpu_ctx->regs.fctrl & FPU_EXP_INVALID) { + // masked stack fault response + uint32_t idx = (st_num + cpu_ctx->fpu_data.ftop) & 7; + cpu_ctx->regs.fr[idx].low = FPU_QNAN_FLOAT80_LOW; + cpu_ctx->regs.fr[idx].high = FPU_QNAN_FLOAT80_HIGH; + cpu_ctx->regs.ftags[idx] = FPU_TAG_SPECIAL; + if (should_pop) { + fpu_pop(cpu_ctx); + } + } + + fpu_stack_fault(cpu_ctx, FPU_STACK_UNDERFLOW); } template JIT_API void fpu_update_tag(cpu_ctx_t *cpu_ctx, uint32_t idx); diff --git a/lib86cpu/core/fpu.h b/lib86cpu/core/fpu.h index 9ebb35d..7947279 100644 --- a/lib86cpu/core/fpu.h +++ b/lib86cpu/core/fpu.h @@ -16,14 +16,12 @@ enum class fpu_instr_t : uint32_t { bcd, }; -using stack_fault_func = void(* JIT_API)(cpu_ctx_t *, uint32_t); - void fpu_init(cpu_t *cpu); template JIT_API void fpu_update_tag(cpu_ctx_t *cpu_ctx, uint32_t st_num); JIT_API uint32_t fpu_is_tag_empty(cpu_ctx_t *cpu_ctx, uint32_t st_num); -JIT_API void fpu_stack_overflow(cpu_ctx_t *cpu_ctx, uint32_t exception); -JIT_API void fpu_stack_underflow(cpu_ctx_t *cpu_ctx, uint32_t exception); +JIT_API void fpu_stack_overflow(cpu_ctx_t *cpu_ctx); +JIT_API void fpu_stack_underflow(cpu_ctx_t *cpu_ctx, uint32_t st_num, uint32_t should_pop); JIT_API void fpu_stack_fault(cpu_ctx_t *cpu_ctx, uint32_t exception); JIT_API void fpu_update_ptr(cpu_ctx_t *cpu_ctx, uint64_t instr_info); diff --git a/lib86cpu/core/internal.h b/lib86cpu/core/internal.h index cbab99e..e7cef61 100644 --- a/lib86cpu/core/internal.h +++ b/lib86cpu/core/internal.h @@ -423,14 +423,16 @@ CR0_TS_MASK | CR0_EM_MASK | CR0_MP_MASK | CR0_PE_MASK) #define FPU_FLG_RC (3 << 10) // fpu indefinite values -#define FPU_INTEGER_INDEFINITE8 (1 << 7) -#define FPU_INTEGER_INDEFINITE16 (1 << 15) -#define FPU_INTEGER_INDEFINITE32 (1UL << 31) -#define FPU_INTEGER_INDEFINITE64 (1ULL << 63) -#define FPU_QNAN_FLOAT_INDEFINITE64 0xC000000000000000 // mantissa part -#define FPU_QNAN_FLOAT_INDEFINITE16 0xFFFF // exponent and sign parts -#define FPU_BCD_INDEFINITE64 0xC000000000000000 // mantissa part -#define FPU_BCD_INDEFINITE16 0xFFFF // exponent and sign parts +#define FPU_QNAN_INT8 (1UL << 7) +#define FPU_QNAN_INT16 (1UL << 15) +#define FPU_QNAN_INT32 (1UL << 31) +#define FPU_QNAN_INT64 (1ULL << 63) +#define FPU_QNAN_FLOAT32 0xFFC00000 +#define FPU_QNAN_FLOAT64 0xFFF8000000000000 +#define FPU_QNAN_FLOAT80_LOW 0xC000000000000000 // mantissa part +#define FPU_QNAN_FLOAT80_HIGH 0xFFFF // exponent and sign parts +#define FPU_QNAN_BCD64 0xC000000000000000 // mantissa part +#define FPU_QNAN_BCD16 0xFFFF // exponent and sign parts // fpu precision macros #define FPU_SINGLE_PRECISION 0