diff --git a/include/crt.c b/include/crt.c index cc10ab3..0613c56 100644 --- a/include/crt.c +++ b/include/crt.c @@ -119,6 +119,34 @@ W1: asl accu rts } +__asm mul16by8 +{ + lda #0 + sta tmp + 2 + sta tmp + 3 + + lda tmp + lsr + bcc L2 +L1: + tax + clc + lda tmp + 2 + adc accu + sta tmp + 2 + lda tmp + 3 + adc accu + 1 + sta tmp + 3 + txa +L2: + asl accu + 0 + rol accu + 1 + lsr + bcs L1 + bne L2 + rts +} + __asm divs16 { bit accu + 1 @@ -159,6 +187,7 @@ L2: jsr divmod } #pragma runtime(mul16, mul16); +#pragma runtime(mul16by8, mul16by8); #pragma runtime(divu16, divmod); #pragma runtime(modu16, divmod); #pragma runtime(divs16, divs16); @@ -1827,11 +1856,14 @@ __asm faddsub sta accu + 2 beq fas_aligned W1: - lsr accu + 2 + lda accu + 2 +L1: + lsr ror accu + 1 ror accu inx - bne W1 + bne L1 + sta accu + 2 lda tmp + 5 sta tmp + 4 jmp fas_aligned @@ -1840,12 +1872,13 @@ fas_align2nd: // check if second operand is below rounding cpx #24 bcs fas_done - -L2: lsr tmp + 2 + lda tmp + 2 +L2: lsr ror tmp + 1 ror tmp dex bne L2 + sta tmp + 2 fas_aligned: lda accu + 3 @@ -1960,8 +1993,8 @@ __asm fmul8 bcc L2 L1: tax clc - lda tmp + 6 - adc accu + tya + adc tmp + 6 sta tmp + 6 lda tmp + 7 adc accu + 1 @@ -2011,6 +2044,7 @@ W2: sta tmp + 7 sta tmp + 8 + ldy accu lda tmp jsr fmul8 lda tmp + 1 @@ -2055,7 +2089,9 @@ W3: and #$7f __asm inp_binop_mul_f32 { jsr freg.split_exp + sty tmpy jsr fmul + ldy tmpy jmp startup.exec } diff --git a/oscar64/ByteCodeGenerator.cpp b/oscar64/ByteCodeGenerator.cpp index 61eab58..0efad35 100644 --- a/oscar64/ByteCodeGenerator.cpp +++ b/oscar64/ByteCodeGenerator.cpp @@ -528,6 +528,16 @@ void ByteCodeBasicBlock::FloatConstToAccu(double val) mIns.Push(bins); } +void ByteCodeBasicBlock::FloatConstToWork(double val) +{ + union { float f; int v; } cc; + cc.f = val; + ByteCodeInstruction bins(BC_CONST_32); + bins.mRegister = BC_REG_WORK; + bins.mValue = cc.v; + mIns.Push(bins); +} + void ByteCodeBasicBlock::IntConstToAddr(__int64 val) { @@ -623,64 +633,124 @@ void ByteCodeBasicBlock::StoreDirectValue(InterCodeProcedure* proc, const InterI { if (ins.mSTemp[1] < 0) { - if (ins.mMemory == IM_GLOBAL) + if (ins.mSTemp[0] < 0) { - ByteCodeInstruction bins(BC_STORE_ABS_32); - bins.mRelocate = true; - bins.mVIndex = ins.mVarIndex; - bins.mValue = ins.mSIntConst[1]; - bins.mRegister = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]]; - mIns.Push(bins); - } - else if (ins.mMemory == IM_ABSOLUTE) - { - ByteCodeInstruction bins(BC_STORE_ABS_32); - bins.mValue = ins.mSIntConst[1]; - bins.mRegister = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]]; - mIns.Push(bins); - } - else if (ins.mMemory == IM_LOCAL || ins.mMemory == IM_PARAM) - { - int index = ins.mSIntConst[1]; - if (ins.mMemory == IM_LOCAL) - index += proc->mLocalVars[ins.mVarIndex].mOffset; - else - index += ins.mVarIndex + proc->mLocalSize + 2; + FloatConstToAccu(ins.mSFloatConst[0]); - if (index <= 252) + if (ins.mMemory == IM_GLOBAL) { - ByteCodeInstruction bins(BC_STORE_LOCAL_32); - bins.mRegister = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]]; - bins.mRegisterFinal = ins.mSFinal[0]; - bins.mValue = index; + ByteCodeInstruction bins(BC_STORE_ABS_32); + bins.mRelocate = true; + bins.mVIndex = ins.mVarIndex; + bins.mValue = ins.mSIntConst[1]; + bins.mRegister = BC_REG_ACCU; mIns.Push(bins); } - else + else if (ins.mMemory == IM_ABSOLUTE) { - ByteCodeInstruction lins(BC_LEA_LOCAL); - lins.mRegister = BC_REG_ADDR; - lins.mValue = index; - mIns.Push(lins); - ByteCodeInstruction bins(BC_STORE_ADDR_32); - bins.mRegister = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]]; + ByteCodeInstruction bins(BC_STORE_ABS_32); + bins.mValue = ins.mSIntConst[1]; + bins.mRegister = BC_REG_ACCU; + mIns.Push(bins); + } + else if (ins.mMemory == IM_LOCAL || ins.mMemory == IM_PARAM) + { + int index = ins.mSIntConst[1]; + if (ins.mMemory == IM_LOCAL) + index += proc->mLocalVars[ins.mVarIndex].mOffset; + else + index += ins.mVarIndex + proc->mLocalSize + 2; + + if (index <= 252) + { + ByteCodeInstruction bins(BC_STORE_LOCAL_32); + bins.mRegister = BC_REG_ACCU; + bins.mRegisterFinal = ins.mSFinal[0]; + bins.mValue = index; + mIns.Push(bins); + } + else + { + ByteCodeInstruction lins(BC_LEA_LOCAL); + lins.mRegister = BC_REG_ADDR; + lins.mValue = index; + mIns.Push(lins); + ByteCodeInstruction bins(BC_STORE_ADDR_32); + bins.mRegister = BC_REG_ACCU; + bins.mRegisterFinal = ins.mSFinal[0]; + mIns.Push(bins); + } + } + else if (ins.mMemory == IM_FRAME) + { + ByteCodeInstruction bins(BC_STORE_FRAME_32); + bins.mRegister = BC_REG_ACCU; bins.mRegisterFinal = ins.mSFinal[0]; + bins.mValue = ins.mVarIndex + ins.mSIntConst[1] + 2; mIns.Push(bins); } } - else if (ins.mMemory == IM_FRAME) + else { - ByteCodeInstruction bins(BC_STORE_FRAME_32); - bins.mRegister = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]]; - bins.mRegisterFinal = ins.mSFinal[0]; - bins.mValue = ins.mVarIndex + ins.mSIntConst[1] + 2; - mIns.Push(bins); + if (ins.mMemory == IM_GLOBAL) + { + ByteCodeInstruction bins(BC_STORE_ABS_32); + bins.mRelocate = true; + bins.mVIndex = ins.mVarIndex; + bins.mValue = ins.mSIntConst[1]; + bins.mRegister = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]]; + mIns.Push(bins); + } + else if (ins.mMemory == IM_ABSOLUTE) + { + ByteCodeInstruction bins(BC_STORE_ABS_32); + bins.mValue = ins.mSIntConst[1]; + bins.mRegister = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]]; + mIns.Push(bins); + } + else if (ins.mMemory == IM_LOCAL || ins.mMemory == IM_PARAM) + { + int index = ins.mSIntConst[1]; + if (ins.mMemory == IM_LOCAL) + index += proc->mLocalVars[ins.mVarIndex].mOffset; + else + index += ins.mVarIndex + proc->mLocalSize + 2; + + if (index <= 252) + { + ByteCodeInstruction bins(BC_STORE_LOCAL_32); + bins.mRegister = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]]; + bins.mRegisterFinal = ins.mSFinal[0]; + bins.mValue = index; + mIns.Push(bins); + } + else + { + ByteCodeInstruction lins(BC_LEA_LOCAL); + lins.mRegister = BC_REG_ADDR; + lins.mValue = index; + mIns.Push(lins); + ByteCodeInstruction bins(BC_STORE_ADDR_32); + bins.mRegister = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]]; + bins.mRegisterFinal = ins.mSFinal[0]; + mIns.Push(bins); + } + } + else if (ins.mMemory == IM_FRAME) + { + ByteCodeInstruction bins(BC_STORE_FRAME_32); + bins.mRegister = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]]; + bins.mRegisterFinal = ins.mSFinal[0]; + bins.mValue = ins.mVarIndex + ins.mSIntConst[1] + 2; + mIns.Push(bins); + } } } else { if (ins.mSTemp[0] < 0) { - IntConstToAccu(ins.mSIntConst[0]); + FloatConstToAccu(ins.mSFloatConst[0]); if (ins.mMemory == IM_INDIRECT) { @@ -1553,12 +1623,30 @@ ByteCode ByteCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const if (ins.mSType[0] == IT_FLOAT) { - ByteCodeInstruction lins(BC_LOAD_REG_32); - lins.mRegister = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]]; - lins.mRegisterFinal = ins.mSFinal[0]; - mIns.Push(lins); + if (ins.mSTemp[0] < 0) + { + FloatConstToAccu(ins.mSFloatConst[0]); + } + else + { + ByteCodeInstruction lins(BC_LOAD_REG_32); + lins.mRegister = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]]; + lins.mRegisterFinal = ins.mSFinal[0]; + mIns.Push(lins); + } + ByteCodeInstruction cins(BC_BINOP_CMP_F32); - cins.mRegister = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[1]]; + + if (ins.mSTemp[1] < 0) + { + FloatConstToWork(ins.mSFloatConst[1]); + cins.mRegister = BC_REG_WORK; + } + else + { + cins.mRegister = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[1]]; + } + cins.mRegisterFinal = ins.mSFinal[1]; mIns.Push(cins); } @@ -1796,16 +1884,30 @@ void ByteCodeBasicBlock::BinaryOperator(InterCodeProcedure* proc, const InterIns { ByteCode bc = ByteCodeBinRegOperator(ins); - ByteCodeInstruction lins(BC_LOAD_REG_32); - lins.mRegister = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[1]]; - lins.mRegisterFinal = ins.mSFinal[1]; - mIns.Push(lins); + if (ins.mSTemp[1] < 0) + { + FloatConstToAccu(ins.mSFloatConst[1]); + } + else + { + ByteCodeInstruction lins(BC_LOAD_REG_32); + lins.mRegister = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[1]]; + lins.mRegisterFinal = ins.mSFinal[1]; + mIns.Push(lins); + } ByteCodeInstruction bins(bc); - if (ins.mSTemp[1] == ins.mSTemp[0]) + + if (ins.mSTemp[0] < 0) + { + FloatConstToWork(ins.mSFloatConst[0]); + bins.mRegister = BC_REG_WORK; + } + else if (ins.mSTemp[1] == ins.mSTemp[0]) bins.mRegister = BC_REG_ACCU; else bins.mRegister = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]]; + bins.mRegisterFinal = ins.mSFinal[0]; mIns.Push(bins); diff --git a/oscar64/ByteCodeGenerator.h b/oscar64/ByteCodeGenerator.h index 51997d0..66e5eea 100644 --- a/oscar64/ByteCodeGenerator.h +++ b/oscar64/ByteCodeGenerator.h @@ -210,6 +210,7 @@ public: void IntConstToAccu(__int64 val); void IntConstToAddr(__int64 val); void FloatConstToAccu(double val); + void FloatConstToWork(double val); void CopyValue(InterCodeProcedure* proc, const InterInstruction& ins); void LoadConstant(InterCodeProcedure* proc, const InterInstruction& ins); void StoreDirectValue(InterCodeProcedure* proc, const InterInstruction & ins); diff --git a/oscar64/Emulator.cpp b/oscar64/Emulator.cpp index f6b8a91..c1cce7e 100644 --- a/oscar64/Emulator.cpp +++ b/oscar64/Emulator.cpp @@ -65,7 +65,7 @@ void Emulator::DumpCycles(void) } printf("Total Cycles %d\n", totalCycles); - return; +// return; for (int i = 0; i < numTops; i++) { @@ -380,7 +380,7 @@ bool Emulator::EmulateInstruction(AsmInsType type, AsmInsMode mode, int addr, in t = (mMemory[addr] >> 1) | ((mRegP & STATUS_CARRY) << 7); mMemory[addr] = t & 255; UpdateStatusCarry(t & 255, c != 0); - cycles++; + cycles += 2; } break; case ASMIT_RTI: diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 34ce1a2..690a38e 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -1633,6 +1633,8 @@ void InterCodeBasicBlock::CheckValueUsage(InterInstruction& ins, const GrowingIn switch (ins.mSType[0]) { case IT_FLOAT: + ins.mSFloatConst[0] = tvalue[ins.mSTemp[0]]->mFloatValue; + ins.mSTemp[0] = -1; break; case IT_POINTER: break; @@ -1717,32 +1719,35 @@ void InterCodeBasicBlock::CheckValueUsage(InterInstruction& ins, const GrowingIn if (ins.mSTemp[0] >= 0 && tvalue[ins.mSTemp[0]] && tvalue[ins.mSTemp[0]]->mCode == IC_CONSTANT) { ins.mCode = IC_CONSTANT; - ins.mIntValue = ConstantFolding(ins.mOperator, tvalue[ins.mSTemp[1]]->mFloatValue, tvalue[ins.mSTemp[0]]->mFloatValue); + ins.mFloatValue = ConstantFolding(ins.mOperator, tvalue[ins.mSTemp[1]]->mFloatValue, tvalue[ins.mSTemp[0]]->mFloatValue); ins.mSTemp[0] = -1; ins.mSTemp[1] = -1; } else { - if (ins.mOperator == IA_ADD && tvalue[ins.mSTemp[1]]->mFloatValue == 0) + ins.mSFloatConst[1] = tvalue[ins.mSTemp[1]]->mFloatValue; + ins.mSTemp[1] = -1; + + if (ins.mOperator == IA_ADD && ins.mSFloatConst[1] == 0) { ins.mCode = IC_LOAD_TEMPORARY; assert(ins.mSTemp[0] >= 0); } else if (ins.mOperator == IA_MUL) { - if (tvalue[ins.mSTemp[1]]->mFloatValue == 1.0) + if (ins.mSFloatConst[1] == 1.0) { ins.mCode = IC_LOAD_TEMPORARY; assert(ins.mSTemp[0] >= 0); } - else if (tvalue[ins.mSTemp[1]]->mFloatValue == 0.0) + else if (ins.mSFloatConst[1] == 0.0) { ins.mCode = IC_CONSTANT; ins.mFloatValue = 0.0; ins.mSTemp[0] = -1; ins.mSTemp[1] = -1; } - else if (tvalue[ins.mSTemp[1]]->mFloatValue == 2.0) + else if (ins.mSFloatConst[1] == 2.0) { ins.mOperator = IA_ADD; ins.mSTemp[1] = ins.mSTemp[0]; @@ -1753,7 +1758,10 @@ void InterCodeBasicBlock::CheckValueUsage(InterInstruction& ins, const GrowingIn } else if (ins.mSTemp[0] >= 0 && tvalue[ins.mSTemp[0]] && tvalue[ins.mSTemp[0]]->mCode == IC_CONSTANT) { - if (ins.mOperator == IA_ADD && tvalue[ins.mSTemp[0]]->mFloatValue == 0) + ins.mSFloatConst[0] = tvalue[ins.mSTemp[0]]->mFloatValue; + ins.mSTemp[0] = -1; + + if (ins.mOperator == IA_ADD && ins.mSFloatConst[0] == 0) { ins.mCode = IC_LOAD_TEMPORARY; ins.mSTemp[0] = ins.mSTemp[1]; @@ -1762,21 +1770,21 @@ void InterCodeBasicBlock::CheckValueUsage(InterInstruction& ins, const GrowingIn } else if (ins.mOperator == IA_MUL) { - if (tvalue[ins.mSTemp[0]]->mFloatValue == 1.0) + if (ins.mSFloatConst[0] == 1.0) { ins.mCode = IC_LOAD_TEMPORARY; ins.mSTemp[0] = ins.mSTemp[1]; ins.mSTemp[1] = -1; assert(ins.mSTemp[0] >= 0); } - else if (tvalue[ins.mSTemp[0]]->mFloatValue == 0.0) + else if (ins.mSFloatConst[0] == 0.0) { ins.mCode = IC_CONSTANT; ins.mFloatValue = 0.0; ins.mSTemp[0] = -1; ins.mSTemp[1] = -1; } - else if (tvalue[ins.mSTemp[0]]->mFloatValue == 2.0) + else if (ins.mSFloatConst[0] == 2.0) { ins.mOperator = IA_ADD; ins.mSTemp[0] = ins.mSTemp[1]; @@ -1845,42 +1853,41 @@ void InterCodeBasicBlock::CheckValueUsage(InterInstruction& ins, const GrowingIn } else if (ins.mSTemp[0] >= 0 && tvalue[ins.mSTemp[0]] && tvalue[ins.mSTemp[0]]->mCode == IC_CONSTANT) { - ins.mSIntConst[0] = tvalue[ins.mSTemp[0]]->mIntValue; - ins.mSTemp[0] = -1; -#if 1 - if (ins.mOperator == IA_ADD && ins.mSIntConst[0] == 0) - { - ins.mCode = IC_LOAD_TEMPORARY; - ins.mSTemp[0] = ins.mSTemp[1]; - ins.mSTemp[1] = -1; - assert(ins.mSTemp[0] >= 0); - } - else if (ins.mOperator == IA_MUL) - { - if (ins.mSIntConst[0] == 1) + ins.mSIntConst[0] = tvalue[ins.mSTemp[0]]->mIntValue; + ins.mSTemp[0] = -1; + + if (ins.mOperator == IA_ADD && ins.mSIntConst[0] == 0) { ins.mCode = IC_LOAD_TEMPORARY; ins.mSTemp[0] = ins.mSTemp[1]; ins.mSTemp[1] = -1; assert(ins.mSTemp[0] >= 0); } - else if (ins.mSIntConst[0] == 2) + else if (ins.mOperator == IA_MUL) { - ins.mOperator = IA_SHL; - ins.mSIntConst[0] = 1; + if (ins.mSIntConst[0] == 1) + { + ins.mCode = IC_LOAD_TEMPORARY; + ins.mSTemp[0] = ins.mSTemp[1]; + ins.mSTemp[1] = -1; + assert(ins.mSTemp[0] >= 0); + } + else if (ins.mSIntConst[0] == 2) + { + ins.mOperator = IA_SHL; + ins.mSIntConst[0] = 1; + } + else if (ins.mSIntConst[0] == 4) + { + ins.mOperator = IA_SHL; + ins.mSIntConst[0] = 2; + } + else if (ins.mSIntConst[0] == 8) + { + ins.mOperator = IA_SHL; + ins.mSIntConst[0] = 3; + } } - else if (ins.mSIntConst[0] == 4) - { - ins.mOperator = IA_SHL; - ins.mSIntConst[0] = 2; - } - else if (ins.mSIntConst[0] == 8) - { - ins.mOperator = IA_SHL; - ins.mSIntConst[0] = 3; - } - } -#endif } if (ins.mSTemp[1] < 0 && ins.mSTemp[0] >= 0 && tvalue[ins.mSTemp[0]] && tvalue[ins.mSTemp[0]]->mCode == IC_BINARY_OPERATOR) @@ -1948,6 +1955,28 @@ void InterCodeBasicBlock::CheckValueUsage(InterInstruction& ins, const GrowingIn switch (ins.mSType[1]) { case IT_FLOAT: + if (ins.mSTemp[1] >= 0 && tvalue[ins.mSTemp[1]] && tvalue[ins.mSTemp[1]]->mCode == IC_CONSTANT && + ins.mSTemp[0] >= 0 && tvalue[ins.mSTemp[0]] && tvalue[ins.mSTemp[0]]->mCode == IC_CONSTANT) + { + ins.mCode = IC_CONSTANT; + ins.mIntValue = ConstantRelationalFolding(ins.mOperator, tvalue[ins.mSTemp[1]]->mFloatValue, tvalue[ins.mSTemp[0]]->mFloatValue); + ins.mTType = IT_SIGNED; + ins.mSTemp[0] = -1; + ins.mSTemp[1] = -1; + } + else + { + if (ins.mSTemp[1] >= 0 && tvalue[ins.mSTemp[1]] && tvalue[ins.mSTemp[1]]->mCode == IC_CONSTANT) + { + ins.mSFloatConst[1] = tvalue[ins.mSTemp[1]]->mFloatValue; + ins.mSTemp[1] = -1; + } + else if (ins.mSTemp[0] >= 0 && tvalue[ins.mSTemp[0]] && tvalue[ins.mSTemp[0]]->mCode == IC_CONSTANT) + { + ins.mSFloatConst[0] = tvalue[ins.mSTemp[0]]->mFloatValue; + ins.mSTemp[0] = -1; + } + } break; case IT_POINTER: if (ins.mOperator == IA_CMPEQ || ins.mOperator == IA_CMPNE) @@ -2634,6 +2663,62 @@ bool InterCodeBasicBlock::IsLeafProcedure(void) return true; } +static bool CanBypassLoad(const InterInstruction& lins, const InterInstruction& bins) +{ + // Check ambiguity + if (bins.mCode == IC_STORE || bins.mCode == IC_COPY) + return false; + + // Side effects + if (bins.mCode == IC_CALL || bins.mCode == IC_JSR) + return false; + + // True data dependency + if (lins.mTTemp == bins.mSTemp[0] || lins.mTTemp == bins.mSTemp[1] || lins.mTTemp == bins.mSTemp[2]) + return false; + + // False data dependency + if (lins.mSTemp[0] >= 0 && lins.mSTemp[0] == bins.mTTemp) + return false; + + return true; +} + +static bool CanBypassStore(const InterInstruction& sins, const InterInstruction& bins) +{ + if (bins.mCode == IC_COPY || bins.mCode == IC_PUSH_FRAME) + return false; + + // Check ambiguity + if (bins.mCode == IC_STORE || bins.mCode == IC_LOAD) + { + if (sins.mMemory == IM_LOCAL) + { + if (bins.mMemory == IM_PARAM || bins.mMemory == IM_GLOBAL) + ; + else if (bins.mMemory == IM_LOCAL) + { + if (bins.mVarIndex == sins.mVarIndex) + return false; + } + else + return false; + } + else + return false; + } + + // Side effects + if (bins.mCode == IC_CALL || bins.mCode == IC_JSR) + return false; + + // True data dependency + if (bins.mTTemp >= 0 && (bins.mTTemp == sins.mSTemp[0] || bins.mTTemp == sins.mSTemp[1])) + return false; + + return true; +} + void InterCodeBasicBlock::PeepholeOptimization(void) { int i; @@ -2642,6 +2727,49 @@ void InterCodeBasicBlock::PeepholeOptimization(void) { mVisited = true; + // shorten lifespan + + int i = mInstructions.Size() - 2; + + while (i >= 0) + { + // move loads down + if (mInstructions[i].mCode == IC_LOAD) + { + InterInstruction ins(mInstructions[i]); + int j = i; + while (j + 2 < mInstructions.Size() && CanBypassLoad(ins, mInstructions[j + 1])) + { + mInstructions[j] = mInstructions[j + 1]; + j++; + } + if (i != j) + mInstructions[j] = ins; + } + + i--; + } + + i = 0; + while (i < mInstructions.Size()) + { + // move stores up + if (mInstructions[i].mCode == IC_STORE) + { + InterInstruction ins(mInstructions[i]); + int j = i; + while (j > 0 && CanBypassStore(ins, mInstructions[j - 1])) + { + mInstructions[j] = mInstructions[j - 1]; + j--; + } + if (i != j) + mInstructions[j] = ins; + } + + i++; + } + bool changed; do { @@ -2681,6 +2809,29 @@ void InterCodeBasicBlock::PeepholeOptimization(void) mInstructions[i + 2].mSFinal[1] = false; changed = true; } + else if ( + mInstructions[i + 0].mCode == IC_BINARY_OPERATOR && mInstructions[i + 0].mOperator == IA_SAR && mInstructions[i + 0].mSTemp[0] < 0 && + mInstructions[i + 1].mCode == IC_BINARY_OPERATOR && mInstructions[i + 1].mOperator == IA_MUL && mInstructions[i + 1].mSTemp[0] < 0 && + mInstructions[i + 1].mSTemp[1] == mInstructions[i + 0].mTTemp && mInstructions[i + 1].mSFinal[1] && + (mInstructions[i + 1].mSIntConst[0] & (1LL << mInstructions[i + 0].mSIntConst[0])) == 0) + { + int shift = mInstructions[i + 0].mSIntConst[0]; + mInstructions[i + 1].mSIntConst[0] >>= shift; + mInstructions[i + 0].mOperator = IA_AND; + mInstructions[i + 0].mSIntConst[0] = ~((1LL << shift) - 1); + } + else if ( + mInstructions[i + 0].mCode == IC_BINARY_OPERATOR && mInstructions[i + 0].mOperator == IA_SAR && mInstructions[i + 0].mSTemp[0] < 0 && + mInstructions[i + 1].mCode == IC_BINARY_OPERATOR && mInstructions[i + 1].mOperator == IA_MUL && mInstructions[i + 1].mSTemp[1] < 0 && + mInstructions[i + 1].mSTemp[0] == mInstructions[i + 0].mTTemp && mInstructions[i + 1].mSFinal[0] && + (mInstructions[i + 1].mSIntConst[1] & (1LL << mInstructions[i + 0].mSIntConst[0])) == 0) + { + int shift = mInstructions[i + 0].mSIntConst[0]; + mInstructions[i + 1].mSIntConst[1] >>= shift; + mInstructions[i + 0].mOperator = IA_AND; + mInstructions[i + 0].mSIntConst[0] = ~((1LL << shift) - 1); + } + // Postincrement artifact if (mInstructions[i + 0].mCode == IC_LOAD_TEMPORARY && mInstructions[i + 1].mCode == IC_BINARY_OPERATOR && diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 0292294..96f7b64 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -264,6 +264,7 @@ public: int mTTemp, mSTemp[3]; bool mSFinal[3]; __int64 mSIntConst[3]; + double mSFloatConst[3]; InterMemory mMemory; InterOperator mOperator; int mOperandSize; diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index b9c8657..4c7b9b9 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -214,12 +214,15 @@ void NativeCodeBasicBlock::LoadConstant(InterCodeProcedure* proc, const InterIns else index += ins.mVarIndex + proc->mLocalSize + 2; - mIns.Push(NativeCodeInstruction(ASMIT_CLC, ASMIM_IMPLIED)); + if (index != 0) + mIns.Push(NativeCodeInstruction(ASMIT_CLC, ASMIM_IMPLIED)); mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, mNoFrame ? BC_REG_STACK : BC_REG_LOCALS)); - mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_IMMEDIATE, index & 0xff)); + if (index != 0) + mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_IMMEDIATE, index & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mTTemp])); mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, (mNoFrame ? BC_REG_STACK : BC_REG_LOCALS) + 1)); - mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_IMMEDIATE, (index >> 8) & 0xff)); + if (index != 0) + mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_IMMEDIATE, (index >> 8) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mTTemp] + 1)); } else if (ins.mMemory == IM_PROCEDURE) @@ -244,26 +247,29 @@ void NativeCodeBasicBlock::StoreValue(InterCodeProcedure* proc, const InterInstr { if (ins.mSTemp[0] < 0) { + union { float f; unsigned int v; } cc; + cc.f = ins.mSFloatConst[0]; + if (ins.mMemory == IM_GLOBAL) { - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, ins.mSIntConst[0] & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, cc.v & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ABSOLUTE, ins.mSIntConst[1], ins.mVarIndex)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[0] >> 8) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 8) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ABSOLUTE, ins.mSIntConst[1] + 1, ins.mVarIndex)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[0] >> 16) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 16) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ABSOLUTE, ins.mSIntConst[1] + 2, ins.mVarIndex)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[0] >> 24) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 24) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ABSOLUTE, ins.mSIntConst[1] + 3, ins.mVarIndex)); } else if (ins.mMemory == IM_ABSOLUTE) { - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, ins.mSIntConst[0] & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, cc.v & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ABSOLUTE, ins.mSIntConst[1])); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[0] >> 8) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 8) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ABSOLUTE, ins.mSIntConst[1] + 1)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[0] >> 16) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 16) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ABSOLUTE, ins.mSIntConst[1] + 2)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[0] >> 24) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 24) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ABSOLUTE, ins.mSIntConst[1] + 3)); } else if (ins.mMemory == IM_LOCAL || ins.mMemory == IM_PARAM) @@ -275,16 +281,16 @@ void NativeCodeBasicBlock::StoreValue(InterCodeProcedure* proc, const InterInstr index += ins.mVarIndex + proc->mLocalSize + 2; mIns.Push(NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, index)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, ins.mSIntConst[0] & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, cc.v & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_INDIRECT_Y, mNoFrame ? BC_REG_STACK : BC_REG_LOCALS)); mIns.Push(NativeCodeInstruction(ASMIT_INY, ASMIM_IMPLIED)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[0] >> 8) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 8) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_INDIRECT_Y, mNoFrame ? BC_REG_STACK : BC_REG_LOCALS)); mIns.Push(NativeCodeInstruction(ASMIT_INY, ASMIM_IMPLIED)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[0] >> 16) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 16) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_INDIRECT_Y, mNoFrame ? BC_REG_STACK : BC_REG_LOCALS)); mIns.Push(NativeCodeInstruction(ASMIT_INY, ASMIM_IMPLIED)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[0] >> 24) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 24) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_INDIRECT_Y, mNoFrame ? BC_REG_STACK : BC_REG_LOCALS)); } else if (ins.mMemory == IM_FRAME) @@ -293,26 +299,35 @@ void NativeCodeBasicBlock::StoreValue(InterCodeProcedure* proc, const InterInstr } else { + int sreg = BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]]; + + if (ins.mSFinal[0] && CheckPredAccuStore(sreg)) + { + // cull previous store from accu to temp using direcrt forwarding from accu + mIns.SetSize(mIns.Size() - 8); + sreg = BC_REG_ACCU; + } + if (ins.mMemory == IM_GLOBAL) { - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]])); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, sreg)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ABSOLUTE, ins.mSIntConst[1], ins.mVarIndex)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]] + 1)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, sreg + 1)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ABSOLUTE, ins.mSIntConst[1] + 1, ins.mVarIndex)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]] + 2)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, sreg + 2)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ABSOLUTE, ins.mSIntConst[1] + 2, ins.mVarIndex)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]] + 3)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, sreg + 3)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ABSOLUTE, ins.mSIntConst[1] + 3, ins.mVarIndex)); } else if (ins.mMemory == IM_ABSOLUTE) { - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]])); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, sreg)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ABSOLUTE, ins.mSIntConst[1])); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]] + 1)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, sreg + 1)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ABSOLUTE, ins.mSIntConst[1] + 1)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]] + 2)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, sreg + 2)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ABSOLUTE, ins.mSIntConst[1] + 2)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]] + 3)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, sreg + 3)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ABSOLUTE, ins.mSIntConst[1] + 3)); } else if (ins.mMemory == IM_LOCAL || ins.mMemory == IM_PARAM) @@ -324,16 +339,16 @@ void NativeCodeBasicBlock::StoreValue(InterCodeProcedure* proc, const InterInstr index += ins.mVarIndex + proc->mLocalSize + 2; mIns.Push(NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, index)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]])); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, sreg)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_INDIRECT_Y, mNoFrame ? BC_REG_STACK : BC_REG_LOCALS)); mIns.Push(NativeCodeInstruction(ASMIT_INY, ASMIM_IMPLIED)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]] + 1)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, sreg + 1)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_INDIRECT_Y, mNoFrame ? BC_REG_STACK : BC_REG_LOCALS)); mIns.Push(NativeCodeInstruction(ASMIT_INY, ASMIM_IMPLIED)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]] + 2)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, sreg + 2)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_INDIRECT_Y, mNoFrame ? BC_REG_STACK : BC_REG_LOCALS)); mIns.Push(NativeCodeInstruction(ASMIT_INY, ASMIM_IMPLIED)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]] + 3)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, sreg + 3)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_INDIRECT_Y, mNoFrame ? BC_REG_STACK : BC_REG_LOCALS)); } else if (ins.mMemory == IM_FRAME) @@ -995,6 +1010,26 @@ void NativeCodeBasicBlock::LoadValue(InterCodeProcedure* proc, const InterInstru LoadValueToReg(proc, ins, BC_REG_TMP + proc->mTempOffset[ins.mTTemp], nullptr, nullptr); } +bool NativeCodeBasicBlock::CheckPredAccuStore(int reg) +{ + if (mIns.Size() < 8) + return false; + + int p = mIns.Size() - 8; + + for (int i = 0; i < 4; i++) + { + if (mIns[p + 0].mType != ASMIT_LDA || mIns[p + 0].mMode != ASMIM_ZERO_PAGE || mIns[p + 0].mAddress != BC_REG_ACCU + i) + return false; + if (mIns[p + 1].mType != ASMIT_STA || mIns[p + 1].mMode != ASMIM_ZERO_PAGE || mIns[p + 1].mAddress != reg + i) + return false; + + p += 2; + } + + return true; +} + void NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* proc, const InterInstruction& ins, const InterInstruction * sins1, const InterInstruction * sins0) { int treg = BC_REG_TMP + proc->mTempOffset[ins.mTTemp]; @@ -1003,15 +1038,27 @@ void NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* proc, const InterI { if (ins.mSTemp[1] < 0) { - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, ins.mSIntConst[1] & 0xff)); + union { float f; unsigned int v; } cc; + cc.f = ins.mSFloatConst[1]; + + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, cc.v & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[1] >> 8) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 8) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[1] >> 16) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 16) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 2)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[1] >> 24) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 24) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 3)); } + else if (sins1) + { + LoadValueToReg(proc, *sins1, BC_REG_ACCU, nullptr, nullptr); + } + else if (ins.mSFinal[1] && CheckPredAccuStore(BC_REG_TMP + proc->mTempOffset[ins.mSTemp[1]])) + { + // cull previous store from accu to temp using direcrt forwarding + mIns.SetSize(mIns.Size() - 8); + } else { mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[1]] + 0)); @@ -1026,15 +1073,22 @@ void NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* proc, const InterI if (ins.mSTemp[0] < 0) { - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, ins.mSIntConst[0] & 0xff)); + union { float f; unsigned int v; } cc; + cc.f = ins.mSFloatConst[0]; + + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, cc.v & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[0] >> 8) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 8) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 1)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[0] >> 16) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 16) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 2)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[0] >> 24) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 24) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 3)); } + else if (sins0) + { + LoadValueToReg(proc, *sins0, BC_REG_WORK, nullptr, nullptr); + } else { mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]] + 0)); @@ -1265,61 +1319,101 @@ void NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* proc, const InterI case IA_DIVU: case IA_MODU: { - if (sins1) - LoadValueToReg(proc, *sins1, BC_REG_ACCU, nullptr, nullptr); - else if (ins.mSTemp[1] < 0) - { - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, ins.mSIntConst[1] & 0xff)); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[1] >> 8) & 0xff)); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); - } - else - { - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[1]])); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[1]] + 1)); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); - } + int reg = BC_REG_ACCU; - if (sins0) - LoadValueToReg(proc, *sins0, BC_REG_WORK, nullptr, nullptr); - else if (ins.mSTemp[0] < 0) + if (ins.mOperator == IA_MUL && ins.mSTemp[1] < 0 && (ins.mSIntConst[1] & ~0xff) == 0) { + if (sins0) + LoadValueToReg(proc, *sins0, BC_REG_ACCU, nullptr, nullptr); + else + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]])); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]] + 1)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); + } + + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, ins.mSIntConst[1] & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0)); + + mIns.Push(NativeCodeInstruction("mul16by8")); + reg = BC_REG_WORK + 2; + } + else if (ins.mOperator == IA_MUL && ins.mSTemp[0] < 0 && (ins.mSIntConst[0] & ~0xff) == 0) + { + if (sins1) + LoadValueToReg(proc, *sins1, BC_REG_ACCU, nullptr, nullptr); + else + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[1]])); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[1]] + 1)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); + } + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, ins.mSIntConst[0] & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[0] >> 8) & 0xff)); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 1)); + + mIns.Push(NativeCodeInstruction("mul16by8")); + reg = BC_REG_WORK + 2; } else { - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]])); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]] + 1)); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 1)); - } + if (sins1) + LoadValueToReg(proc, *sins1, BC_REG_ACCU, nullptr, nullptr); + else if (ins.mSTemp[1] < 0) + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, ins.mSIntConst[1] & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[1] >> 8) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); + } + else + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[1]])); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[1]] + 1)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); + } - int reg = BC_REG_ACCU; - switch (ins.mOperator) - { - case IA_MUL: - mIns.Push(NativeCodeInstruction("mul16")); - reg = BC_REG_WORK + 2; - break; - case IA_DIVS: - mIns.Push(NativeCodeInstruction("divs16")); - break; - case IA_MODS: - mIns.Push(NativeCodeInstruction("mods16")); - reg = BC_REG_WORK + 2; - break; - case IA_DIVU: - mIns.Push(NativeCodeInstruction("divu16")); - break; - case IA_MODU: - mIns.Push(NativeCodeInstruction("modu16")); - reg = BC_REG_WORK + 2; - break; + if (sins0) + LoadValueToReg(proc, *sins0, BC_REG_WORK, nullptr, nullptr); + else if (ins.mSTemp[0] < 0) + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, ins.mSIntConst[0] & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[0] >> 8) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 1)); + } + else + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]])); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]] + 1)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 1)); + } + + switch (ins.mOperator) + { + case IA_MUL: + mIns.Push(NativeCodeInstruction("mul16")); + reg = BC_REG_WORK + 2; + break; + case IA_DIVS: + mIns.Push(NativeCodeInstruction("divs16")); + break; + case IA_MODS: + mIns.Push(NativeCodeInstruction("mods16")); + reg = BC_REG_WORK + 2; + break; + case IA_DIVU: + mIns.Push(NativeCodeInstruction("divu16")); + break; + case IA_MODU: + mIns.Push(NativeCodeInstruction("modu16")); + reg = BC_REG_WORK + 2; + break; + } } mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, reg + 0)); @@ -1629,15 +1723,23 @@ void NativeCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const In if (ins.mSTemp[li] < 0) { - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, ins.mSIntConst[li] & 0xff)); + union { float f; unsigned int v; } cc; + cc.f = ins.mSFloatConst[li]; + + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, cc.v & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[li] >> 8) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 8) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[li] >> 16) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 16) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 2)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[li] >> 24) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 24) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 3)); } + else if (ins.mSFinal[li] && CheckPredAccuStore(BC_REG_TMP + proc->mTempOffset[ins.mSTemp[li]])) + { + // cull previous store from accu to temp using direcrt forwarding + mIns.SetSize(mIns.Size() - 8); + } else { mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[li]] + 0)); @@ -1652,13 +1754,16 @@ void NativeCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const In if (ins.mSTemp[ri] < 0) { - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, ins.mSIntConst[ri] & 0xff)); + union { float f; unsigned int v; } cc; + cc.f = ins.mSFloatConst[ri]; + + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, cc.v & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[ri] >> 8) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 8) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 1)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[ri] >> 16) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 16) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 2)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[ri] >> 24) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (cc.v >> 24) & 0xff)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 3)); } else @@ -1747,7 +1852,7 @@ void NativeCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const In mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[ri]] + 1)); } - this->Close(eblock, nblock, ASMIT_BEQ); + this->Close(nblock, eblock, ASMIT_BNE); if (ins.mSTemp[li] < 0) eblock->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, ins.mSIntConst[li] & 0xff)); @@ -1789,34 +1894,120 @@ void NativeCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const In void NativeCodeBasicBlock::LoadEffectiveAddress(InterCodeProcedure* proc, const InterInstruction& ins) { - mIns.Push(NativeCodeInstruction(ASMIT_CLC, ASMIM_IMPLIED)); + if (ins.mSTemp[0] >= 0 || ins.mSIntConst[0] != 0) + mIns.Push(NativeCodeInstruction(ASMIT_CLC, ASMIM_IMPLIED)); + if (ins.mSTemp[1] < 0) mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, ins.mSIntConst[1] & 0xff)); else mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[1]])); + if (ins.mSTemp[0] < 0) - mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_IMMEDIATE, ins.mSIntConst[0] & 0xff)); + { + if (ins.mSIntConst[0]) + mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_IMMEDIATE, ins.mSIntConst[0] & 0xff)); + } else mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]])); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mTTemp])); + if (ins.mSTemp[1] < 0) mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins.mSIntConst[1] >> 8) & 0xff)); else mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[1]] + 1)); + if (ins.mSTemp[0] < 0) - mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_IMMEDIATE, (ins.mSIntConst[0] >> 8) & 0xff)); + { + if (ins.mSIntConst[0]) + mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_IMMEDIATE, (ins.mSIntConst[0] >> 8) & 0xff)); + } else mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mSTemp[0]] + 1)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins.mTTemp] + 1)); } +void NativeCodeBasicBlock::PeepHoleOptimizer(void) +{ + bool progress = false; + do { + progress = false; + + int i = 0; + int j = 0; + while (i < mIns.Size()) + { + if (mIns[i].mType == ASMIT_NOP) + ; + else + { + if (i != j) + mIns[j] = mIns[i]; + j++; + } + i++; + } + mIns.SetSize(j); + + for (int i = 0; i < mIns.Size(); i++) + { + if (mIns[i].mType == ASMIT_AND && mIns[i].mMode == ASMIM_IMMEDIATE && mIns[i].mAddress == 0) + { + mIns[i].mType = ASMIT_LDA; + progress = true; + } + else if (mIns[i].mType == ASMIT_AND && mIns[i].mMode == ASMIM_IMMEDIATE && mIns[i].mAddress == 0xff) + { + mIns[i].mType = ASMIT_NOP; + progress = true; + } + else if (mIns[i].mType == ASMIT_ORA && mIns[i].mMode == ASMIM_IMMEDIATE && mIns[i].mAddress == 0xff) + { + mIns[i].mType = ASMIT_LDA; + progress = true; + } + else if (mIns[i].mType == ASMIT_ORA && mIns[i].mMode == ASMIM_IMMEDIATE && mIns[i].mAddress == 0x00) + { + mIns[i].mType = ASMIT_NOP; + progress = true; + } + + if (i + 1 < mIns.Size()) + { + if (mIns[i].mType == ASMIT_LDA && mIns[i + 1].mType == ASMIT_LDA) + { + mIns[i].mType = ASMIT_NOP; + progress = true; + } + else if (mIns[i].mType == ASMIT_LDA && mIns[i + 1].mType == ASMIT_STA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == mIns[i + 1].mAddress) + { + mIns[i + 1].mType = ASMIT_NOP; + progress = true; + } + } + + if (i + 2 < mIns.Size()) + { + if (mIns[i].mType == ASMIT_LDA && mIns[i + 2].mType == ASMIT_LDA && (mIns[i + 1].mType == ASMIT_CLC || mIns[i + 1].mType == ASMIT_SEC)) + { + mIns[i].mType = ASMIT_NOP; + progress = true; + } + } + } + + } while (progress); + +} + void NativeCodeBasicBlock::Assemble(void) { if (!mAssembled) { mAssembled = true; - //PeepHoleOptimizer(); + PeepHoleOptimizer(); for (int i = 0; i < mIns.Size(); i++) mIns[i].Assemble(this); @@ -2240,7 +2431,7 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode i++; } else if (i + 1 < iblock->mInstructions.Size() && - ins.mOperandSize == 2 && + ins.mOperandSize >= 2 && iblock->mInstructions[i + 1].mCode == IC_BINARY_OPERATOR && iblock->mInstructions[i + 1].mSTemp[0] == ins.mTTemp && iblock->mInstructions[i + 1].mSFinal[0]) { @@ -2248,7 +2439,7 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode i++; } else if (i + 1 < iblock->mInstructions.Size() && - ins.mOperandSize == 2 && + ins.mOperandSize >= 2 && iblock->mInstructions[i + 1].mCode == IC_BINARY_OPERATOR && iblock->mInstructions[i + 1].mSTemp[1] == ins.mTTemp && iblock->mInstructions[i + 1].mSFinal[1]) { @@ -2256,7 +2447,7 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode i++; } else if (i + 2 < iblock->mInstructions.Size() && - ins.mOperandSize == 2 && + ins.mOperandSize >= 2 && iblock->mInstructions[i + 1].mCode == IC_LOAD && iblock->mInstructions[i + 1].mOperandSize == 2 && iblock->mInstructions[i + 2].mCode == IC_BINARY_OPERATOR && iblock->mInstructions[i + 2].mSTemp[0] == iblock->mInstructions[i + 1].mTTemp && iblock->mInstructions[i + 2].mSFinal[0] && @@ -2266,7 +2457,7 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode i += 2; } else if (i + 2 < iblock->mInstructions.Size() && - ins.mOperandSize == 2 && + ins.mOperandSize >= 2 && iblock->mInstructions[i + 1].mCode == IC_LOAD && iblock->mInstructions[i + 1].mOperandSize == 2 && iblock->mInstructions[i + 2].mCode == IC_BINARY_OPERATOR && iblock->mInstructions[i + 2].mSTemp[1] == iblock->mInstructions[i + 1].mTTemp && iblock->mInstructions[i + 2].mSFinal[1] && diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 90a0e5c..e2631a7 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -50,6 +50,8 @@ public: void Assemble(void); void Close(NativeCodeBasicBlock* trueJump, NativeCodeBasicBlock* falseJump, AsmInsType branch); + void PeepHoleOptimizer(void); + void PutByte(uint8 code); void PutWord(uint16 code); @@ -64,6 +66,8 @@ public: void RelationalOperator(InterCodeProcedure* proc, const InterInstruction& ins, NativeCodeBasicBlock* trueJump, NativeCodeBasicBlock * falseJump); void LoadEffectiveAddress(InterCodeProcedure* proc, const InterInstruction& ins); void NumericConversion(InterCodeProcedure* proc, const InterInstruction& ins); + + bool CheckPredAccuStore(int reg); }; class NativeCodeProcedure