From 3337b846204c3d18fde4e28ad1558f5e73532ccc Mon Sep 17 00:00:00 2001 From: Lioncash <mathew1800@gmail.com> Date: Fri, 2 Jan 2015 18:21:45 -0500 Subject: [PATCH] dyncom: Implement SMLAD/SMUAD/SMLSD/SMUSD --- .../arm/dyncom/arm_dyncom_interpreter.cpp | 117 +++++++++++------- src/core/arm/interpreter/armemu.cpp | 6 +- src/core/arm/interpreter/armsupp.cpp | 8 +- src/core/arm/skyeye_common/armdefs.h | 2 + src/core/arm/skyeye_common/armemu.h | 1 - 5 files changed, 84 insertions(+), 50 deletions(-) diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp index 7ba82503d..c5e885bcd 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp @@ -930,6 +930,8 @@ typedef struct _smlad_inst { unsigned int Rd; unsigned int Ra; unsigned int Rn; + unsigned int op1; + unsigned int op2; } smlad_inst; typedef struct _smla_inst { @@ -2313,25 +2315,40 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(smla)(unsigned int inst, int index) return inst_base; } -ARM_INST_PTR INTERPRETER_TRANSLATE(smlad)(unsigned int inst, int index){ - arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(smlad_inst)); - smlad_inst *inst_cream = (smlad_inst *)inst_base->component; - inst_base->cond = BITS(inst, 28, 31); - inst_base->idx = index; - inst_base->br = NON_BRANCH; +ARM_INST_PTR INTERPRETER_TRANSLATE(smlad)(unsigned int inst, int index) +{ + arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(smlad_inst)); + smlad_inst* const inst_cream = (smlad_inst*)inst_base->component; + + inst_base->cond = BITS(inst, 28, 31); + inst_base->idx = index; + inst_base->br = NON_BRANCH; inst_base->load_r15 = 0; - inst_cream->m = BIT(inst, 4); - inst_cream->Rn = BITS(inst, 0, 3); - inst_cream->Rm = BITS(inst, 8, 11); - inst_cream->Rd = BITS(inst, 16, 19); - inst_cream->Ra = BITS(inst, 12, 15); + inst_cream->m = BIT(inst, 5); + inst_cream->Rn = BITS(inst, 0, 3); + inst_cream->Rm = BITS(inst, 8, 11); + inst_cream->Rd = BITS(inst, 16, 19); + inst_cream->Ra = BITS(inst, 12, 15); + inst_cream->op1 = BITS(inst, 20, 22); + inst_cream->op2 = BITS(inst, 5, 7); - if (CHECK_RM ) - inst_base->load_r15 = 1; return inst_base; } +ARM_INST_PTR INTERPRETER_TRANSLATE(smuad)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(smlad)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(smusd)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(smlad)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(smlsd)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(smlad)(inst, index); +} + ARM_INST_PTR INTERPRETER_TRANSLATE(smlal)(unsigned int inst, int index) { arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(umlal_inst)); @@ -2355,12 +2372,10 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(smlal)(unsigned int inst, int index) ARM_INST_PTR INTERPRETER_TRANSLATE(smlalxy)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SMLALXY"); } ARM_INST_PTR INTERPRETER_TRANSLATE(smlald)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SMLALD"); } ARM_INST_PTR INTERPRETER_TRANSLATE(smlaw)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SMLAW"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(smlsd)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SMLSD"); } ARM_INST_PTR INTERPRETER_TRANSLATE(smlsld)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SMLSLD"); } ARM_INST_PTR INTERPRETER_TRANSLATE(smmla)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SMMLA"); } ARM_INST_PTR INTERPRETER_TRANSLATE(smmls)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SMMLS"); } ARM_INST_PTR INTERPRETER_TRANSLATE(smmul)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SMMUL"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(smuad)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SMUAD"); } ARM_INST_PTR INTERPRETER_TRANSLATE(smul)(unsigned int inst, int index) { arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(smul_inst)); @@ -2423,7 +2438,6 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(smulw)(unsigned int inst, int index) inst_base->load_r15 = 1; return inst_base; } -ARM_INST_PTR INTERPRETER_TRANSLATE(smusd)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SMUSD"); } ARM_INST_PTR INTERPRETER_TRANSLATE(srs)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SRS"); } ARM_INST_PTR INTERPRETER_TRANSLATE(ssat)(unsigned int inst, int index) { @@ -5382,44 +5396,59 @@ unsigned InterpreterMainLoop(ARMul_State* state) { FETCH_INST; GOTO_NEXT_INST; } + SMLAD_INST: + SMLSD_INST: + SMUAD_INST: + SMUSD_INST: { - if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) { - smlad_inst *inst_cream = (smlad_inst *)inst_base->component; - long long int rm = cpu->Reg[inst_cream->Rm]; - long long int rn = cpu->Reg[inst_cream->Rn]; - long long int ra = cpu->Reg[inst_cream->Ra]; + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { + smlad_inst* const inst_cream = (smlad_inst*)inst_base->component; + const u8 op2 = inst_cream->op2; - // See SMUAD - if(inst_cream->Ra == 15) - CITRA_IGNORE_EXIT(-1); - int operand2 = (inst_cream->m)? ROTATE_RIGHT_32(rm, 16):rm; - int half_rn, half_operand2; + u32 rm_val = cpu->Reg[inst_cream->Rm]; + const u32 rn_val = cpu->Reg[inst_cream->Rn]; - half_rn = rn & 0xFFFF; - half_rn = (half_rn & 0x8000)? (0xFFFF0000|half_rn) : half_rn; + if (inst_cream->m) + rm_val = (((rm_val & 0xFFFF) << 16) | (rm_val >> 16)); - half_operand2 = operand2 & 0xFFFF; - half_operand2 = (half_operand2 & 0x8000)? (0xFFFF0000|half_operand2) : half_operand2; + const s16 rm_lo = (rm_val & 0xFFFF); + const s16 rm_hi = ((rm_val >> 16) & 0xFFFF); + const s16 rn_lo = (rn_val & 0xFFFF); + const s16 rn_hi = ((rn_val >> 16) & 0xFFFF); - long long int product1 = half_rn * half_operand2; + const u32 product1 = (rn_lo * rm_lo); + const u32 product2 = (rn_hi * rm_hi); - half_rn = (rn & 0xFFFF0000) >> 16; - half_rn = (half_rn & 0x8000)? (0xFFFF0000|half_rn) : half_rn; + // SMUAD and SMLAD + if (BIT(op2, 1) == 0) { + RD = (product1 + product2); - half_operand2 = (operand2 & 0xFFFF0000) >> 16; - half_operand2 = (half_operand2 & 0x8000)? (0xFFFF0000|half_operand2) : half_operand2; + if (inst_cream->Ra != 15) { + RD += cpu->Reg[inst_cream->Ra]; - long long int product2 = half_rn * half_operand2; + if (ARMul_AddOverflowQ(product1 + product2, cpu->Reg[inst_cream->Ra])) + cpu->Cpsr |= (1 << 27); + } - long long int signed_ra = (ra & 0x80000000)? (0xFFFFFFFF00000000LL) | ra : ra; - long long int result = product1 + product2 + signed_ra; - cpu->Reg[inst_cream->Rd] = result & 0xFFFFFFFF; + if (ARMul_AddOverflowQ(product1, product2)) + cpu->Cpsr |= (1 << 27); + } + // SMUSD and SMLSD + else { + RD = (product1 - product2); - // TODO: FIXME should check Signed overflow + if (inst_cream->Ra != 15) { + RD += cpu->Reg[inst_cream->Ra]; + + if (ARMul_AddOverflowQ(product1 - product2, cpu->Reg[inst_cream->Ra])) + cpu->Cpsr |= (1 << 27); + } + } } + cpu->Reg[15] += GET_INST_SIZE(cpu); - INC_PC(sizeof(umlal_inst)); + INC_PC(sizeof(smlad_inst)); FETCH_INST; GOTO_NEXT_INST; } @@ -5452,15 +5481,15 @@ unsigned InterpreterMainLoop(ARMul_State* state) { FETCH_INST; GOTO_NEXT_INST; } + SMLALXY_INST: SMLALD_INST: SMLAW_INST: - SMLSD_INST: SMLSLD_INST: SMMLA_INST: SMMLS_INST: SMMUL_INST: - SMUAD_INST: + SMUL_INST: { if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) { @@ -5528,8 +5557,8 @@ unsigned InterpreterMainLoop(ARMul_State* state) { GOTO_NEXT_INST; } - SMUSD_INST: SRS_INST: + SSAT_INST: { if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { diff --git a/src/core/arm/interpreter/armemu.cpp b/src/core/arm/interpreter/armemu.cpp index 43b1ba40e..40e4837d8 100644 --- a/src/core/arm/interpreter/armemu.cpp +++ b/src/core/arm/interpreter/armemu.cpp @@ -6470,10 +6470,12 @@ L_stm_s_takeabort: if (BITS(12, 15) != 15) { state->Reg[rd_idx] += state->Reg[ra_idx]; - ARMul_AddOverflowQ(state, product1 + product2, state->Reg[ra_idx]); + if (ARMul_AddOverflowQ(product1 + product2, state->Reg[ra_idx])) + SETQ; } - ARMul_AddOverflowQ(state, product1, product2); + if (ARMul_AddOverflowQ(product1, product2)) + SETQ; } // SMUSD and SMLSD else { diff --git a/src/core/arm/interpreter/armsupp.cpp b/src/core/arm/interpreter/armsupp.cpp index 426b67831..eec34143e 100644 --- a/src/core/arm/interpreter/armsupp.cpp +++ b/src/core/arm/interpreter/armsupp.cpp @@ -453,12 +453,14 @@ ARMul_AddOverflow (ARMul_State * state, ARMword a, ARMword b, ARMword result) ASSIGNV (AddOverflow (a, b, result)); } -/* Assigns the Q flag if the given result is considered an overflow from the addition of a and b */ -void ARMul_AddOverflowQ(ARMul_State* state, ARMword a, ARMword b) +// Returns true if the Q flag should be set as a result of overflow. +bool ARMul_AddOverflowQ(ARMword a, ARMword b) { u32 result = a + b; if (((result ^ a) & (u32)0x80000000) && ((a ^ b) & (u32)0x80000000) == 0) - SETQ; + return true; + + return false; } /* Assigns the C flag after an subtraction of a and b to give result. */ diff --git a/src/core/arm/skyeye_common/armdefs.h b/src/core/arm/skyeye_common/armdefs.h index 8611d7392..c2c78cd5a 100644 --- a/src/core/arm/skyeye_common/armdefs.h +++ b/src/core/arm/skyeye_common/armdefs.h @@ -790,6 +790,8 @@ extern void ARMul_FixSPSR(ARMul_State*, ARMword, ARMword); extern void ARMul_ConsolePrint(ARMul_State*, const char*, ...); extern void ARMul_SelectProcessor(ARMul_State*, unsigned); +extern bool ARMul_AddOverflowQ(ARMword, ARMword); + extern u8 ARMul_SignedSaturatedAdd8(u8, u8); extern u8 ARMul_SignedSaturatedSub8(u8, u8); extern u16 ARMul_SignedSaturatedAdd16(u16, u16); diff --git a/src/core/arm/skyeye_common/armemu.h b/src/core/arm/skyeye_common/armemu.h index 3ea14b5a3..e1b286f0f 100644 --- a/src/core/arm/skyeye_common/armemu.h +++ b/src/core/arm/skyeye_common/armemu.h @@ -602,7 +602,6 @@ extern ARMword ARMul_SwitchMode (ARMul_State *, ARMword, ARMword); extern void ARMul_MSRCpsr (ARMul_State *, ARMword, ARMword); extern void ARMul_SubOverflow (ARMul_State *, ARMword, ARMword, ARMword); extern void ARMul_AddOverflow (ARMul_State *, ARMword, ARMword, ARMword); -extern void ARMul_AddOverflowQ(ARMul_State*, ARMword, ARMword); extern void ARMul_SubCarry (ARMul_State *, ARMword, ARMword, ARMword); extern void ARMul_AddCarry (ARMul_State *, ARMword, ARMword, ARMword); extern tdstate ARMul_ThumbDecode (ARMul_State *, ARMword, ARMword, ARMword *);