Skip to content

Commit

Permalink
Reworked FLDCW, FNCLEX, FNINIT, FNSTCW, FNSTSW, FWAIT
Browse files Browse the repository at this point in the history
  • Loading branch information
ergo720 committed Sep 2, 2024
1 parent 201ba73 commit 9bbb621
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 122 deletions.
69 changes: 36 additions & 33 deletions lib86cpu/core/emitter/x64/jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ static_assert((LOCAL_VARS_off(0) & 15) == 0); // must be 16 byte aligned so that
#define FPU_IS_TAG_EMPTY(num) MOV(EDX, (num)); \
CALL_F(&fpu_is_tag_empty); \
TEST(EAX, EAX)
#define FPU_CLEAR_C1() AND(MEMD16(RCX, CPU_CTX_FSTATUS), ~FPU_FLG_C1)
#define FPU_CLEAR_C1() AND(MEMD16(RCX, CPU_CTX_FSTATUS), ~FPU_SW_C1)
#define FPU_PUSH() DEC(MEMD16(RCX, FPU_DATA_FTOP)); AND(MEMD16(RCX, FPU_DATA_FTOP), 7)
#define FPU_POP() INC(MEMD16(RCX, FPU_DATA_FTOP)); AND(MEMD16(RCX, FPU_DATA_FTOP), 7)

Expand Down Expand Up @@ -2361,12 +2361,12 @@ void lc86_jit::gen_fpu_exp_post_check(uint32_t exception, T &&unmasked)
MOV(DX, MEMD16(RCX, CPU_CTX_FCTRL));
NOT(DX);
AND(DX, AX);
AND(AX, (FPU_EXP_ALL | FPU_FLG_CC_ALL));
AND(MEMD16(RCX, CPU_CTX_FSTATUS), ~(FPU_EXP_ALL | FPU_FLG_CC_ALL));
AND(AX, (FPU_EXP_ALL | FPU_SW_CC_ALL));
AND(MEMD16(RCX, CPU_CTX_FSTATUS), ~(FPU_EXP_ALL | FPU_SW_CC_ALL));
OR(MEMD16(RCX, CPU_CTX_FSTATUS), AX); // update exception and condition code flags of guest fstatus
TEST(DX, exception); // test if exceptions of interest are unmasked
BR_EQ(masked);
OR(MEMD16(RCX, CPU_CTX_FSTATUS), FPU_FLG_ES);
OR(MEMD16(RCX, CPU_CTX_FSTATUS), FPU_SW_ES);
unmasked();
m_a.bind(masked);
}
Expand Down Expand Up @@ -2430,7 +2430,7 @@ void
lc86_jit::gen_check_fpu_unmasked_exp()
{
Label no_exp = m_a.newLabel();
TEST(MEMD16(RCX, CPU_CTX_FSTATUS), FPU_FLG_ES);
TEST(MEMD16(RCX, CPU_CTX_FSTATUS), FPU_SW_ES);
BR_EQ(no_exp);
if (m_cpu->cpu_ctx.regs.cr0 & CR0_NE_MASK) {
RAISEin0_f(EXP_MF);
Expand Down Expand Up @@ -3759,7 +3759,7 @@ void lc86_jit::float_load(decoded_instr *instr)
FPU_IS_TAG_EMPTY(stx); // check for stack underflow for src stx
BR_EQ(ok);
gen_fpu_stack_fault(FPU_STACK_UNDERFLOW);
TEST(MEMD16(RCX, CPU_CTX_FCTRL), FPU_FLG_IE);
TEST(MEMD16(RCX, CPU_CTX_FCTRL), FPU_EXP_INVALID);
BR_NE(masked);
BR_UNCOND(end_instr);
m_a.bind(masked); // if masked, load a qnan
Expand Down Expand Up @@ -5547,21 +5547,34 @@ lc86_jit::fldcw(decoded_instr *instr)
RAISEin0_t(EXP_NM);
}
else {
gen_check_fpu_unmasked_exp();

get_rm<OPNUM_SINGLE>(instr,
[](const op_info rm)
{
assert(0);
},
[this](const op_info rm)
{
Label ok = m_a.newLabel(), end_instr = m_a.newLabel();
LD_MEMs(SIZE16);
MOV(DX, AX);
AND(AX, FPU_EXP_ALL);
AND(DX, FPU_FLG_PC | FPU_FLG_RC);
AND(AX, ~(FPU_CW_EXP | FPU_CW_PC | FPU_CW_RC | FPU_CW_INF));
OR(AX, 0x40);
OR(DX, FPU_EXP_ALL);
MOV(DX, AX);
OR(DX, FPU_CW_EXP);
ST_R16(CPU_CTX_FCTRL, AX);
ST_R16(FPU_DATA_FRP, DX);
// Test for pending unmasked exceptions
MOV(DX, MEMD16(RCX, CPU_CTX_FSTATUS));
NOT(AX);
AND(DX, AX);
TEST(DX, FPU_EXP_ALL);
BR_EQ(ok);
OR(MEMD16(RCX, CPU_CTX_FSTATUS), FPU_SW_ES);
BR_UNCOND(end_instr);
m_a.bind(ok);
AND(MEMD16(RCX, CPU_CTX_FSTATUS), ~FPU_SW_ES);
m_a.bind(end_instr);
});
}
}
Expand Down Expand Up @@ -5609,9 +5622,7 @@ lc86_jit::fnclex(decoded_instr *instr)
RAISEin0_t(EXP_NM);
}
else {
LD_R16(AX, CPU_CTX_FSTATUS);
AND(AX, ~(FPU_FLG_SF | FPU_FLG_ES | FPU_FLG_BSY | FPU_EXP_ALL));
ST_R16(CPU_CTX_FSTATUS, AX);
AND(MEMD16(RCX, CPU_CTX_FSTATUS), ~(FPU_SW_SF | FPU_SW_ES | FPU_SW_BSY | FPU_EXP_ALL));
}
}

Expand All @@ -5624,8 +5635,14 @@ lc86_jit::fninit(decoded_instr *instr)
else {
ST_R16(CPU_CTX_FCTRL, 0x37F);
ST_R16(CPU_CTX_FSTATUS, 0);
MOV(MEMD32(RCX, CPU_CTX_FTAGS0), 0x03030303); // FPU_TAG_EMPTY for all ftags
MOV(MEMD32(RCX, CPU_CTX_FTAGS4), 0x03030303);
MOV(MEMD8(RCX, CPU_CTX_FTAGS0), FPU_TAG_EMPTY);
MOV(MEMD8(RCX, CPU_CTX_FTAGS1), FPU_TAG_EMPTY);
MOV(MEMD8(RCX, CPU_CTX_FTAGS2), FPU_TAG_EMPTY);
MOV(MEMD8(RCX, CPU_CTX_FTAGS3), FPU_TAG_EMPTY);
MOV(MEMD8(RCX, CPU_CTX_FTAGS4), FPU_TAG_EMPTY);
MOV(MEMD8(RCX, CPU_CTX_FTAGS5), FPU_TAG_EMPTY);
MOV(MEMD8(RCX, CPU_CTX_FTAGS6), FPU_TAG_EMPTY);
MOV(MEMD8(RCX, CPU_CTX_FTAGS7), FPU_TAG_EMPTY);
ST_R16(CPU_CTX_FCS, 0);
ST_R32(CPU_CTX_FIP, 0);
ST_R16(CPU_CTX_FDS, 0);
Expand All @@ -5643,20 +5660,15 @@ lc86_jit::fnstcw(decoded_instr *instr)
RAISEin0_t(EXP_NM);
}
else {
LD_R16(R8W, CPU_CTX_FCTRL);
LD_R16(AX, FPU_DATA_FRP);
AND(R8W, FPU_EXP_ALL);
AND(AX, FPU_FLG_PC | FPU_FLG_RC);
OR(R8W, AX);
OR(R8W, 0x40);
get_rm<OPNUM_SINGLE>(instr,
[](const op_info rm)
{
assert(0);
},
[this](const op_info rm)
{
ST_MEMs(R8W, SIZE16);
LD_R16(AX, CPU_CTX_FCTRL);
ST_MEMs(AX, SIZE16);
});
}
}
Expand All @@ -5670,7 +5682,7 @@ lc86_jit::fnstsw(decoded_instr *instr)
else {
LD_R16(R8W, CPU_CTX_FSTATUS);
LD_R16(AX, FPU_DATA_FTOP);
AND(R8W, ~(3 << 11));
AND(R8W, ~FPU_SW_TOP);
SHL(AX, 11);
OR(R8W, AX);
get_rm<OPNUM_SINGLE>(instr,
Expand Down Expand Up @@ -5698,16 +5710,7 @@ lc86_jit::fwait(decoded_instr *instr)
RAISEin0_t(EXP_NM);
}
else {
Label no_exp = m_a.newLabel();
LD_R16(AX, CPU_CTX_FSTATUS);
TEST(AX, FPU_FLG_ES);
BR_EQ(no_exp);
static const char *abort_msg = "Unmasked fpu exceptions are not supported";
MOV(RCX, abort_msg);
MOV(RAX, &cpu_runtime_abort);
CALL(RAX); // won't return
INT3();
m_a.bind(no_exp);
gen_check_fpu_unmasked_exp();
}
}

Expand Down
10 changes: 5 additions & 5 deletions lib86cpu/core/fpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,16 +80,16 @@ fpu_stack_fault(cpu_ctx_t *cpu_ctx, uint32_t exception)
{
assert(exception & FPU_EXP_INVALID);

exception &= (FPU_EXP_ALL | FPU_FLG_SF | FPU_FLG_C1);
exception &= (FPU_EXP_ALL | FPU_SW_SF | FPU_SW_C1);
uint32_t unmasked = (exception & ~cpu_ctx->regs.fctrl) & FPU_EXP_ALL;
if (unmasked) {
cpu_ctx->regs.fstatus |= FPU_FLG_ES;
cpu_ctx->regs.fstatus |= FPU_SW_ES;
}

cpu_ctx->regs.fstatus |= exception;
if (exception & FPU_FLG_SF) {
if (!(exception & FPU_FLG_C1)) {
cpu_ctx->regs.fstatus &= ~FPU_FLG_C1;
if (exception & FPU_SW_SF) {
if (!(exception & FPU_SW_C1)) {
cpu_ctx->regs.fstatus &= ~FPU_SW_C1;
}
}
}
Expand Down
88 changes: 27 additions & 61 deletions lib86cpu/core/fpu_instructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,42 +11,27 @@ void
fxsave_helper(cpu_ctx_t *cpu_ctx, addr_t addr)
{
mem_write_helper<uint16_t>(cpu_ctx, addr, cpu_ctx->regs.fctrl, 0);
addr += 2;
mem_write_helper<uint16_t>(cpu_ctx, addr, read_fstatus(cpu_ctx->cpu), 0);
addr += 2;
mem_write_helper<uint16_t>(cpu_ctx, addr + 2, read_fstatus(cpu_ctx->cpu), 0);
uint8_t ftag_abridged = 0;
for (unsigned i = 0; i < 8; ++i) {
ftag_abridged |= (((cpu_ctx->regs.ftags[i] == FPU_TAG_EMPTY) ? 0 : 1) << i);
}
mem_write_helper<uint8_t>(cpu_ctx, addr, ftag_abridged, 0);
addr += 2;
mem_write_helper<uint16_t>(cpu_ctx, addr, cpu_ctx->regs.fop, 0);
addr += 2;
mem_write_helper<uint32_t>(cpu_ctx, addr, cpu_ctx->regs.fip, 0);
addr += 4;
mem_write_helper<uint16_t>(cpu_ctx, addr, cpu_ctx->regs.fcs, 0);
addr += 4;
mem_write_helper<uint32_t>(cpu_ctx, addr, cpu_ctx->regs.fdp, 0);
addr += 4;
mem_write_helper<uint16_t>(cpu_ctx, addr, cpu_ctx->regs.fds, 0);
addr += 4;
mem_write_helper<uint8_t>(cpu_ctx, addr + 4, ftag_abridged, 0);
mem_write_helper<uint16_t>(cpu_ctx, addr + 6, cpu_ctx->regs.fop, 0);
mem_write_helper<uint32_t>(cpu_ctx, addr + 8, cpu_ctx->regs.fip, 0);
mem_write_helper<uint16_t>(cpu_ctx, addr + 12, cpu_ctx->regs.fcs, 0);
mem_write_helper<uint32_t>(cpu_ctx, addr + 16, cpu_ctx->regs.fdp, 0);
mem_write_helper<uint16_t>(cpu_ctx, addr + 20, cpu_ctx->regs.fds, 0);
if (cpu_ctx->hflags & HFLG_CR4_OSFXSR) {
mem_write_helper<uint32_t>(cpu_ctx, addr, cpu_ctx->regs.mxcsr, 0);
addr += 4;
mem_write_helper<uint32_t>(cpu_ctx, addr, 0, 0);
addr += 4;
}
else {
addr += 8;
mem_write_helper<uint32_t>(cpu_ctx, addr + 24, cpu_ctx->regs.mxcsr, 0);
mem_write_helper<uint32_t>(cpu_ctx, addr + 28, MXCSR_MASK, 0);
}
for (unsigned i = 0; i < 8; ++i) {
mem_write_helper<uint80_t>(cpu_ctx, addr, cpu_ctx->regs.fr[i], 0);
addr += 16;
mem_write_helper<uint80_t>(cpu_ctx, addr + 32 + 16 * i, cpu_ctx->regs.fr[i], 0);
}
if (cpu_ctx->hflags & HFLG_CR4_OSFXSR) {
for (unsigned i = 0; i < 8; ++i) {
mem_write_helper<uint128_t>(cpu_ctx, addr, cpu_ctx->regs.xmm[i], 0);
addr += 16;
mem_write_helper<uint128_t>(cpu_ctx, addr + 160 + 16 * i, cpu_ctx->regs.xmm[i], 0);
}
}
}
Expand All @@ -72,53 +57,34 @@ fxrstor_helper(cpu_ctx_t *cpu_ctx, addr_t addr)
cpu_ctx->regs.mxcsr = temp;
}

cpu_ctx->regs.fctrl = mem_read_helper<uint16_t>(cpu_ctx, addr, 0) | 0x40;
cpu_ctx->fpu_data.frp = cpu_ctx->regs.fctrl | FPU_EXP_ALL | 0x40;
addr += 2;
write_fstatus(cpu_ctx->cpu, mem_read_helper<uint16_t>(cpu_ctx, addr, 0));
addr += 2;
uint8_t ftag_abridged = mem_read_helper<uint8_t>(cpu_ctx, addr, 0);
addr += 2;
cpu_ctx->regs.fop = mem_read_helper<uint16_t>(cpu_ctx, addr, 0);
addr += 2;
cpu_ctx->regs.fip = mem_read_helper<uint32_t>(cpu_ctx, addr, 0);
addr += 4;
cpu_ctx->regs.fcs = mem_read_helper<uint16_t>(cpu_ctx, addr, 0);
addr += 4;
cpu_ctx->regs.fdp = mem_read_helper<uint32_t>(cpu_ctx, addr, 0);
addr += 4;
cpu_ctx->regs.fds = mem_read_helper<uint16_t>(cpu_ctx, addr, 0);
addr += (4 + 8);
cpu_ctx->regs.fctrl = (mem_read_helper<uint16_t>(cpu_ctx, addr, 0) | 0x40);
cpu_ctx->fpu_data.frp = cpu_ctx->regs.fctrl | FPU_EXP_ALL;
write_fstatus(cpu_ctx->cpu, mem_read_helper<uint16_t>(cpu_ctx, addr + 2, 0));
uint8_t ftag_abridged = mem_read_helper<uint8_t>(cpu_ctx, addr + 4, 0);
cpu_ctx->regs.fop = (mem_read_helper<uint16_t>(cpu_ctx, addr + 6, 0) & 0x7FF);
cpu_ctx->regs.fip = mem_read_helper<uint32_t>(cpu_ctx, addr + 8, 0);
cpu_ctx->regs.fcs = mem_read_helper<uint16_t>(cpu_ctx, addr + 12, 0);
cpu_ctx->regs.fdp = mem_read_helper<uint32_t>(cpu_ctx, addr + 16, 0);
cpu_ctx->regs.fds = mem_read_helper<uint16_t>(cpu_ctx, addr + 20, 0);
for (unsigned i = 0; i < 8; ++i) {
cpu_ctx->regs.fr[i] = mem_read_helper<uint80_t>(cpu_ctx, addr, 0);
addr += 16;
cpu_ctx->regs.fr[i] = mem_read_helper<uint80_t>(cpu_ctx, addr + 32 + 16 * i, 0);
}
if (cpu_ctx->hflags & HFLG_CR4_OSFXSR) {
for (unsigned i = 0; i < 8; ++i) {
cpu_ctx->regs.xmm[i] = mem_read_helper<uint128_t>(cpu_ctx, addr, 0);
addr += 16;
cpu_ctx->regs.xmm[i] = mem_read_helper<uint128_t>(cpu_ctx, addr + 160 + 16 * i, 0);
}
}
uint16_t temp_ftop = cpu_ctx->fpu_data.ftop;
cpu_ctx->fpu_data.ftop = 0; // set ftop to zero so that we can use fpu_update_tag below
for (unsigned i = 0; i < 8; ++i) {
if (!(ftag_abridged & (1 << i))) { // empty
cpu_ctx->regs.ftags[i] = FPU_TAG_EMPTY;
fpu_update_tag<false>(cpu_ctx, i);
}
else {
uint16_t exp = cpu_ctx->regs.fr[i].high & 0x7FFF;
uint64_t mant = cpu_ctx->regs.fr[i].low;
if (exp == 0 && mant == 0) { // zero
cpu_ctx->regs.ftags[i] = FPU_TAG_ZERO;
}
else if ((exp == 0) || // denormal
(exp == 0x7FFF) || // NaN or infinity
((mant & (1ULL << 63)) == 0)) { // unnormal
cpu_ctx->regs.ftags[i] = FPU_TAG_SPECIAL;
}
else { // normal
cpu_ctx->regs.ftags[i] = FPU_TAG_VALID;
}
fpu_update_tag<true>(cpu_ctx, i);
}
}
cpu_ctx->fpu_data.ftop = temp_ftop;

return 0;
}
44 changes: 23 additions & 21 deletions lib86cpu/core/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -398,29 +398,31 @@ CR0_TS_MASK | CR0_EM_MASK | CR0_MP_MASK | CR0_PE_MASK)
#define FPU_EXP_ALL (FPU_EXP_INVALID | FPU_EXP_DENORMAL | FPU_EXP_DIVBYZERO | FPU_EXP_OVERFLOW | FPU_EXP_UNDERFLOW | FPU_EXP_PRECISION)

// fpu fstatus flags
#define FPU_FLG_IE FPU_EXP_INVALID
#define FPU_FLG_DE FPU_EXP_DENORMAL
#define FPU_FLG_ZE FPU_EXP_DIVBYZERO
#define FPU_FLG_OE FPU_EXP_OVERFLOW
#define FPU_FLG_UE FPU_EXP_UNDERFLOW
#define FPU_FLG_PE FPU_EXP_PRECISION
#define FPU_FLG_SF (1 << 6)
#define FPU_FLG_ES (1 << 7)
#define FPU_FLG_C0 (1 << 8)
#define FPU_FLG_C1 (1 << 9)
#define FPU_FLG_C2 (1 << 10)
#define FPU_FLG_TOP (7 << 11)
#define FPU_FLG_C3 (1 << 14)
#define FPU_FLG_BSY (1 << 15)
#define FPU_FLG_CC_ALL (FPU_FLG_C0 | FPU_FLG_C1 | FPU_FLG_C2 | FPU_FLG_C3)
#define FPU_SW_IE FPU_EXP_INVALID
#define FPU_SW_DE FPU_EXP_DENORMAL
#define FPU_SW_ZE FPU_EXP_DIVBYZERO
#define FPU_SW_OE FPU_EXP_OVERFLOW
#define FPU_SW_UE FPU_EXP_UNDERFLOW
#define FPU_SW_PE FPU_EXP_PRECISION
#define FPU_SW_SF (1 << 6)
#define FPU_SW_ES (1 << 7)
#define FPU_SW_C0 (1 << 8)
#define FPU_SW_C1 (1 << 9)
#define FPU_SW_C2 (1 << 10)
#define FPU_SW_TOP (7 << 11)
#define FPU_SW_C3 (1 << 14)
#define FPU_SW_BSY (1 << 15)
#define FPU_SW_CC_ALL (FPU_SW_C0 | FPU_SW_C1 | FPU_SW_C2 | FPU_SW_C3)

// fpu stack fault flags
#define FPU_STACK_OVERFLOW (FPU_EXP_INVALID | FPU_FLG_SF | FPU_FLG_C1)
#define FPU_STACK_UNDERFLOW (FPU_EXP_INVALID | FPU_FLG_SF)

// fpu cctrl flags
#define FPU_FLG_PC (3 << 8)
#define FPU_FLG_RC (3 << 10)
#define FPU_STACK_OVERFLOW (FPU_EXP_INVALID | FPU_SW_SF | FPU_SW_C1)
#define FPU_STACK_UNDERFLOW (FPU_EXP_INVALID | FPU_SW_SF)

// fpu ctrl flags
#define FPU_CW_EXP FPU_EXP_ALL
#define FPU_CW_PC (3 << 8)
#define FPU_CW_RC (3 << 10)
#define FPU_CW_INF (1 << 12)

// fpu indefinite values
#define FPU_QNAN_INT8 (1UL << 7)
Expand Down
4 changes: 2 additions & 2 deletions lib86cpu/interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1337,7 +1337,7 @@ write_ftags(cpu_t *cpu, uint16_t value)
uint16_t
read_fstatus(cpu_t *cpu)
{
uint16_t fstatus = (cpu->cpu_ctx.regs.fstatus & ~FPU_FLG_TOP);
uint16_t fstatus = (cpu->cpu_ctx.regs.fstatus & ~FPU_SW_TOP);
fstatus |= (cpu->cpu_ctx.fpu_data.ftop << 11);
return fstatus;
}
Expand All @@ -1351,6 +1351,6 @@ read_fstatus(cpu_t *cpu)
void
write_fstatus(cpu_t *cpu, uint16_t value)
{
cpu->cpu_ctx.fpu_data.ftop = (value & FPU_FLG_TOP) >> 11;
cpu->cpu_ctx.fpu_data.ftop = (value & FPU_SW_TOP) >> 11;
cpu->cpu_ctx.regs.fstatus = value;
}

0 comments on commit 9bbb621

Please sign in to comment.