diff --git a/include/c64/rasterirq.c b/include/c64/rasterirq.c index 87ee3b0..11c44bb 100644 --- a/include/c64/rasterirq.c +++ b/include/c64/rasterirq.c @@ -89,6 +89,85 @@ e1: } +__asm irq2 +{ + pha + txa + pha + tya + pha + + lda #$35 + sta $01 + + asl $d019 + + ldx nextIRQ +l1: + lda rasterIRQNext, x + cmp #$ff + beq e1 + + ldy rasterIRQIndex, x + tax + lda rasterIRQLow, y + sta ji + 1 + lda rasterIRQHigh, y + sta ji + 2 + +ji: + jsr $0000 + + inc nextIRQ + ldx nextIRQ + + lda rasterIRQNext, x + cmp #$ff + beq e2 + // carry is cleared at this point + + tay + dey + sbc #2 + cmp $d012 + bcc l1 + + sty $d012 + +ex: + + lda PLAShadow + sta $01 + + pla + tay + pla + tax + pla + rti + +e2: + + ldx npos + stx tpos + inc rirq_count + + bit $d011 + bmi e1 + + sta $d012 + jmp ex + +e1: + ldx #0 + stx nextIRQ + ldy rasterIRQNext + dey + sty $d012 + jmp ex + +} + __asm irq1 { lda $d019 @@ -284,7 +363,7 @@ void rirq_clear(byte n) rasterIRQRows[n] = 255; } -void rirq_init(bool kernalIRQ) +void rirq_init_kernal(void) { for(byte i=0; imSrc[0].mTemp < 0) + vr.LimitMax(ins->mSrc[0].mIntConst - 1); + else if (ins->mSrc[0].mRange.mMaxState == IntegerValueRange::S_BOUND) + vr.LimitMax(ins->mSrc[0].mRange.mMaxValue - 1); + break; +#endif default: vr.mMaxState = vr.mMinState = IntegerValueRange::S_UNBOUND; } @@ -7200,6 +7208,20 @@ bool InterCodeBasicBlock::SimplifyPointerOffsets(void) return true; } +static bool IsValidSignedIntRange(InterType t, int64 value) +{ + switch (t) + { + case IT_INT8: + return value >= -128 && value <= 127; + case IT_INT16: + return value >= -32768 && value <= 32767; + case IT_INT32: + return true; + default: + return false; + } +} bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArray& tvalue, int& spareTemps) { @@ -7353,6 +7375,40 @@ bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArra } } break; + case IC_RELATIONAL_OPERATOR: + if (ins->mOperator == IA_CMPLS || ins->mOperator == IA_CMPLES || ins->mOperator == IA_CMPGS || ins->mOperator == IA_CMPGES) + { + if (ins->mSrc[0].mTemp < 0 && ins->mSrc[1].mTemp >= 0 && ltvalue[ins->mSrc[1].mTemp]) + { + InterInstruction* pins = ltvalue[ins->mSrc[1].mTemp]; + + if (pins->mCode == IC_BINARY_OPERATOR && pins->mOperator == IA_ADD) + { + if (pins->mSrc[0].mTemp < 0) + { + if (IsValidSignedIntRange(ins->mSrc[0].mType, ins->mSrc[0].mIntConst - pins->mSrc[0].mIntConst)) + { + ins->mSrc[1].Forward(pins->mSrc[1]); + pins->mSrc[1].mFinal = false; + ins->mSrc[0].mIntConst -= pins->mSrc[0].mIntConst; + changed = true; + } + } + else if (pins->mSrc[1].mTemp < 0) + { + if (IsValidSignedIntRange(ins->mSrc[0].mType, ins->mSrc[0].mIntConst - pins->mSrc[1].mIntConst)) + { + ins->mSrc[1].Forward(pins->mSrc[0]); + pins->mSrc[0].mFinal = false; + ins->mSrc[0].mIntConst -= pins->mSrc[1].mIntConst; + changed = true; + } + } + } + } + } + break; + case IC_LEA: if (ins->mSrc[1].mMemory == IM_INDIRECT && ins->mSrc[1].mTemp >= 0 && tvalue[ins->mSrc[1].mTemp]) { @@ -8271,6 +8327,16 @@ bool InterCodeBasicBlock::LoadStoreForwarding(const GrowingInstructionPtrArray& j++; } mLoadStoreInstructions.SetSize(k); + + if (nins) + { + // Check self destruction of source operaand + int l = 0; + while (l < nins->mNumOperands && t != nins->mSrc[l].mTemp) + l++; + if (l != nins->mNumOperands) + nins = nullptr; + } } if (nins) @@ -10822,6 +10888,15 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa else if (cins->mCode == IC_BINARY_OPERATOR && cins->mOperator == IA_ADD && cins->mSrc[1].mTemp == st && cins->mSrc[0].mTemp < 0) toffset += cins->mSrc[0].mIntConst; else + break; + } + else + { + int k = 0; + while (k < cins->mNumOperands && cins->mSrc[k].mTemp != dt) + k++; + + if (k != cins->mNumOperands) break; } @@ -10973,6 +11048,8 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati mVisited = true; CheckFinalLocal(); + if (mTrueJump) mTrueJump->CheckFinalLocal(); + if (mFalseJump) mFalseJump->CheckFinalLocal(); // Remove none instructions @@ -11751,6 +11828,29 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati #endif } + if (i + 3 < mInstructions.Size()) + { + if ( + mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_ADD && mInstructions[i + 0]->mSrc[0].mTemp < 0 && + mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 1]->mOperator == IA_MUL && mInstructions[i + 1]->mSrc[0].mTemp < 0 && + mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal && + mInstructions[i + 2]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 2]->mOperator == IA_ADD && + mInstructions[i + 2]->mSrc[1].mTemp == mInstructions[i + 1]->mDst.mTemp && mInstructions[i + 2]->mSrc[1].mFinal && + mInstructions[i + 3]->mCode == IC_LEA && mInstructions[i + 3]->mSrc[1].mTemp < 0 && + mInstructions[i + 3]->mSrc[0].mTemp == mInstructions[i + 2]->mDst.mTemp && mInstructions[i + 3]->mSrc[0].mFinal) + { + int d = mInstructions[i + 0]->mSrc[0].mIntConst * mInstructions[i + 1]->mSrc[0].mIntConst; + mInstructions[i + 3]->mSrc[1].mIntConst += d; + mInstructions[i + 1]->mSrc[1] = mInstructions[i + 0]->mSrc[1]; + mInstructions[i + 1]->mDst.mRange.mMinValue -= d; mInstructions[i + 1]->mDst.mRange.mMaxValue -= d; + mInstructions[i + 2]->mSrc[1].mRange.mMinValue -= d; mInstructions[i + 2]->mSrc[1].mRange.mMaxValue -= d; + mInstructions[i + 2]->mDst.mRange.mMinValue -= d; mInstructions[i + 2]->mDst.mRange.mMaxValue -= d; + mInstructions[i + 3]->mSrc[0].mRange.mMinValue -= d; mInstructions[i + 3]->mSrc[0].mRange.mMaxValue -= d; + mInstructions[i + 0]->mCode = IC_NONE; mInstructions[i + 0]->mNumOperands = 0; + changed = true; + } + + } #if 1 @@ -11836,6 +11936,53 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati } while (changed); + // Check case of cmp signed immediate + if (mFalseJump && mInstructions.Size() > 3) + { + int nins = mInstructions.Size(); + if (mInstructions[nins - 1]->mCode == IC_BRANCH && + mInstructions[nins - 2]->mCode == IC_RELATIONAL_OPERATOR && mInstructions[nins - 2]->mDst.mTemp == mInstructions[nins - 1]->mSrc[0].mTemp && + mInstructions[nins - 2]->mOperator == IA_CMPLS && mInstructions[nins - 2]->mSrc[0].mTemp < 0) + { + int j = nins - 2; + while (j >= 0 && mInstructions[j]->mDst.mTemp != mInstructions[nins - 2]->mSrc[1].mTemp) + j--; + if (j >= 0 && mInstructions[j]->mCode == IC_LOAD_TEMPORARY) + { + int si = mInstructions[j]->mSrc[0].mTemp, di = mInstructions[j]->mDst.mTemp, ioffset = 0; + + InterInstruction* ains = nullptr; + + int k = j + 1; + while (k < nins - 2) + { + InterInstruction* ins = mInstructions[k]; + if (ins->mDst.mTemp == si) + { + if (ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && ins->mSrc[0].mTemp < 0 && ins->mSrc[1].mTemp == si) + { + ioffset += ins->mSrc[0].mIntConst; + ains = ins; + } + else + break; + } + + k++; + } + + if (k == nins - 2) + { + if (ains) + { + mInstructions[nins - 2]->mSrc[1] = ains->mDst; + mInstructions[nins - 2]->mSrc[0].mIntConst += ioffset; + } + } + } + } + } + CheckFinalLocal(); if (mTrueJump) mTrueJump->PeepholeOptimization(staticVars); @@ -12363,6 +12510,9 @@ void InterCodeProcedure::PeepholeOptimization(void) TempForwarding(); RemoveUnusedInstructions(); + Disassemble("Precheck Final"); + CheckFinal(); + ResetVisited(); mEntryBlock->PeepholeOptimization(mModule->mGlobalVars); } @@ -12751,6 +12901,8 @@ void InterCodeProcedure::EliminateAliasValues() void InterCodeProcedure::LoadStoreForwarding(InterMemory paramMemory) { + DisassembleDebug("Load/Store forwardingY"); + bool changed; do { GrowingInstructionPtrArray gipa(nullptr); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index a7e5714..d64a777 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -880,6 +880,80 @@ bool NativeCodeInstruction::SwapXYReg(void) } +static void UpdateCollisionSet(NumberSet& liveTemps, NumberSet* collisionSets, int temp) +{ + int i; + + if (temp >= 0 && !liveTemps[temp]) + { + for (i = 0; i < liveTemps.Size(); i++) + { + if (liveTemps[i]) + { + collisionSets[i] += temp; + collisionSets[temp] += i; + } + } + + liveTemps += temp; + } +} + +void NativeCodeInstruction::BuildCollisionTable(NumberSet& liveTemps, NumberSet* collisionSets) +{ + if (mMode == ASMIM_ZERO_PAGE) + { + if (ChangesAddress()) + liveTemps -= mAddress; + if (UsesAddress()) + UpdateCollisionSet(liveTemps, collisionSets, mAddress); + } + if (mMode == ASMIM_INDIRECT_Y) + { + UpdateCollisionSet(liveTemps, collisionSets, mAddress); + UpdateCollisionSet(liveTemps, collisionSets, mAddress + 1); + } + if (mType == ASMIT_JSR) + { + for(int i= BC_REG_ACCU; i< BC_REG_ACCU + 4; i++) + UpdateCollisionSet(liveTemps, collisionSets, i); + for (int i = BC_REG_WORK; i < BC_REG_WORK + 4; i++) + UpdateCollisionSet(liveTemps, collisionSets, i); + + if (mFlags & NCIF_RUNTIME) + { + + if (mFlags & NCIF_USE_ZP_32_X) + { + for (int i = mParam; i < mParam + 4; i++) + UpdateCollisionSet(liveTemps, collisionSets, i); + } + + if (mFlags & NCIF_FEXEC) + { + for (int i = BC_REG_FPARAMS; i < BC_REG_FPARAMS_END; i++) + UpdateCollisionSet(liveTemps, collisionSets, i); + } + } + else + { + for (int i = BC_REG_FPARAMS; i < BC_REG_FPARAMS_END; i++) + UpdateCollisionSet(liveTemps, collisionSets, i); + + if (mLinkerObject && mLinkerObject->mProc) + { + for (int i = BC_REG_TMP; i < BC_REG_TMP + mLinkerObject->mProc->mCallerSavedTemps; i++) + UpdateCollisionSet(liveTemps, collisionSets, i); + } + else + { + for (int i = BC_REG_TMP; i < BC_REG_TMP_SAVED; i++) + UpdateCollisionSet(liveTemps, collisionSets, i); + } + } + } +} + bool NativeCodeInstruction::ReplaceXRegWithYReg(void) { @@ -10714,6 +10788,39 @@ bool NativeCodeBasicBlock::RemoveUnusedResultInstructions(void) return changed; } +void NativeCodeBasicBlock::BuildCollisionTable(NumberSet* collisionSets) +{ + if (!mVisited) + { + mVisited = true; + + NumberSet requiredTemps(mExitRequiredRegs); + int i, j; + + for (i = 0; i < mExitRequiredRegs.Size(); i++) + { + if (mExitRequiredRegs[i]) + { + for (j = 0; j < mExitRequiredRegs.Size(); j++) + { + if (mExitRequiredRegs[j]) + { + collisionSets[i] += j; + } + } + } + } + + for (i = mIns.Size() - 1; i >= 0; i--) + { + mIns[i].BuildCollisionTable(requiredTemps, collisionSets); + } + + if (mTrueJump) mTrueJump->BuildCollisionTable(collisionSets); + if (mFalseJump) mFalseJump->BuildCollisionTable(collisionSets); + } +} + void NativeCodeBasicBlock::BuildDominatorTree(NativeCodeBasicBlock* from) { if (from == this) @@ -11370,6 +11477,39 @@ bool NativeCodeBasicBlock::ReduceLocalYPressure(void) CheckLive(); +#if 1 + if (mLoopHead && mFalseJump && !mEntryRequiredRegs[CPU_REG_X] && !mExitRequiredRegs[CPU_REG_X] && mEntryBlocks.Size() == 2 && (mFalseJump == this || mTrueJump == this)) + { + NativeCodeBasicBlock* pblock, * nblock; + if (mTrueJump == this) + nblock = mFalseJump; + else + nblock = mTrueJump; + if (mEntryBlocks[0] == this) + pblock = mEntryBlocks[1]; + else + pblock = mEntryBlocks[0]; + + if (!pblock->mFalseJump && !nblock->mEntryRequiredRegs[CPU_REG_Y]) + { + int pz = pblock->mIns.Size(); + + if (mEntryRequiredRegs[CPU_REG_Y] && pz > 0 && pblock->mIns[pz - 1].mType == ASMIT_LDY && pblock->mIns[pz - 1].mMode == ASMIM_IMMEDIATE) + { + if (CanReplaceYRegWithXReg(0, mIns.Size())) + { + mEntryRequiredRegs += CPU_REG_X; mEntryRequiredRegs -= CPU_REG_Y; + mExitRequiredRegs += CPU_REG_X; mExitRequiredRegs -= CPU_REG_Y; + pblock->mExitRequiredRegs += CPU_REG_X; pblock->mExitRequiredRegs -= CPU_REG_Y; + + ReplaceYRegWithXReg(0, mIns.Size()); + pblock->mIns[pz - 1].mType = ASMIT_LDX; + changed = true; + } + } + } + } +#endif int start = 0; while (start < mIns.Size()) @@ -14207,6 +14347,32 @@ bool NativeCodeBasicBlock::HasTailSTX(int& addr, int& index) const return false; } +bool NativeCodeBasicBlock::HasTailSTY(int& addr, int& index) const +{ + int i = mIns.Size(); + while (i > 0) + { + i--; + if (mIns[i].ChangesYReg()) + return false; + if (mIns[i].mType == ASMIT_STY && mIns[i].mMode == ASMIM_ZERO_PAGE) + { + index = i; + addr = mIns[i].mAddress; + + i++; + while (i < mIns.Size()) + { + if (mIns[i].ReferencesZeroPage(addr)) + return false; + i++; + } + return true; + } + } + return false; +} + void NativeCodeBasicBlock::AddEntryBlock(NativeCodeBasicBlock* block) { @@ -14397,7 +14563,7 @@ bool NativeCodeBasicBlock::PropagateSinglePath(void) #endif - if (mTrueJump && mFalseJump) + if (mTrueJump && mFalseJump && mExitRequiredRegs.Size()) { uint32 live = 0; if (mExitRequiredRegs[CPU_REG_X]) @@ -14871,6 +15037,27 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool } } + if (eb->HasTailSTY(addr, index)) + { + i = 1; + while (i < mEntryBlocks.Size() && mEntryBlocks[i]->HasTailSTY(taddr, tindex) && taddr == addr) + i++; + if (i == mEntryBlocks.Size()) + { + mIns.Insert(0, eb->mIns[index]); + mIns[0].mLive |= LIVE_CPU_REG_A | LIVE_CPU_REG_X | LIVE_CPU_REG_Y; + for (int i = 0; i < mEntryBlocks.Size(); i++) + { + NativeCodeBasicBlock* b = mEntryBlocks[i]; + b->HasTailSTY(taddr, tindex); + for (int j = tindex + 1; j < b->mIns.Size(); j++) + b->mIns[j].mLive |= LIVE_CPU_REG_Y; + b->mIns.Remove(tindex); + } + changed = true; + } + } + break; } } @@ -15509,6 +15696,227 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool return changed; } +void NativeCodeBasicBlock::DoCrossBlockAShortcut(int addr) +{ + mExitRequiredRegs += CPU_REG_A; + int i = mIns.Size(); + while (i > 0) + { + i--; + mIns[i].mLive |= LIVE_CPU_REG_A; + if (mIns[i].mType == ASMIT_STA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr) + return; + } +} + +bool NativeCodeBasicBlock::CanCrossBlockAShortcut(int addr) +{ + int i = mIns.Size(); + while (i > 0) + { + i--; + if (mIns[i].ChangesAccu()) + return false; + if (mIns[i].mType == ASMIT_STA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr) + return true; + + if (mIns[i].ReferencesZeroPage(addr)) + return false; + } + + return false; +} + + +void NativeCodeBasicBlock::DoCrossBlockXShortcut(int addr) +{ + mExitRequiredRegs += CPU_REG_X; + int i = mIns.Size(); + while (i > 0) + { + i--; + mIns[i].mLive |= LIVE_CPU_REG_X; + if (mIns[i].mType == ASMIT_STX && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr) + return; + if (mIns[i].mType == ASMIT_STA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr) + { + mIns.Insert(i, NativeCodeInstruction(ASMIT_TAX)); + return; + } + } +} + +bool NativeCodeBasicBlock::CanCrossBlockXShortcut(int addr) +{ + int i = mIns.Size(); + while (i > 0) + { + i--; + if (mIns[i].ChangesXReg()) + return false; + if (mIns[i].mType == ASMIT_STX && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr) + return true; + if (mIns[i].mType == ASMIT_STA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr && !(mIns[i].mLive & LIVE_CPU_REG_Z)) + return true; + + if (mIns[i].ReferencesZeroPage(addr)) + return false; + } + + return false; +} + +void NativeCodeBasicBlock::DoCrossBlockYShortcut(int addr) +{ + mExitRequiredRegs += CPU_REG_Y; + int i = mIns.Size(); + while (i > 0) + { + i--; + mIns[i].mLive |= LIVE_CPU_REG_Y; + if (mIns[i].mType == ASMIT_STY && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr) + return; + if (mIns[i].mType == ASMIT_STA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr) + { + mIns.Insert(i, NativeCodeInstruction(ASMIT_TAY)); + return; + } + } +} + +bool NativeCodeBasicBlock::CanCrossBlockYShortcut(int addr) +{ + int i = mIns.Size(); + while (i > 0) + { + i--; + if (mIns[i].ChangesYReg()) + return false; + if (mIns[i].mType == ASMIT_STY && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr) + return true; + if (mIns[i].mType == ASMIT_STA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr && !(mIns[i].mLive & LIVE_CPU_REG_Z)) + return true; + + if (mIns[i].ReferencesZeroPage(addr)) + return false; + } + + return false; +} + +bool NativeCodeBasicBlock::CrossBlockXYShortcut(void) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + if (!mLoopHead && mEntryBlocks.Size() > 1) + { + CheckLive(); + + bool xvalid = !mEntryRequiredRegs[CPU_REG_X]; + bool yvalid = !mEntryRequiredRegs[CPU_REG_Y]; + bool avalid = !mEntryRequiredRegs[CPU_REG_A]; + + int i = 0; + while (i < mIns.Size() && (xvalid || yvalid || avalid)) + { + if (xvalid && mIns[i].ChangesAccu()) + { + if (mIns[i].mType == ASMIT_LDA && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_CPU_REG_Z)) + { + int k = i; + while (k > 0 && !mIns[k - 1].ReferencesZeroPage(mIns[i].mAddress)) + k--; + if (k == 0) + { + k = 0; + while (k < mEntryBlocks.Size() && mEntryBlocks[k]->CanCrossBlockAShortcut(mIns[i].mAddress)) + k++; + if (k == mEntryBlocks.Size()) + { + for (int k = 0; k < mEntryBlocks.Size(); k++) + mEntryBlocks[k]->DoCrossBlockAShortcut(mIns[i].mAddress); + changed = true; + mIns[i].mType = ASMIT_NOP; mIns[i].mMode = ASMIM_IMPLIED; + mEntryRequiredRegs += CPU_REG_A; + for (int k = 0; k < i; k++) + mIns[k].mLive |= LIVE_CPU_REG_A; + } + } + } + xvalid = false; + } + if (xvalid && mIns[i].ChangesXReg()) + { + if (mIns[i].mType == ASMIT_LDX && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_CPU_REG_Z)) + { + int k = i; + while (k > 0 && !mIns[k - 1].ReferencesZeroPage(mIns[i].mAddress)) + k--; + if (k == 0) + { + k = 0; + while (k < mEntryBlocks.Size() && mEntryBlocks[k]->CanCrossBlockXShortcut(mIns[i].mAddress)) + k++; + if (k == mEntryBlocks.Size()) + { + for (int k = 0; k < mEntryBlocks.Size(); k++) + mEntryBlocks[k]->DoCrossBlockXShortcut(mIns[i].mAddress); + changed = true; + mIns[i].mType = ASMIT_NOP; mIns[i].mMode = ASMIM_IMPLIED; + mEntryRequiredRegs += CPU_REG_X; + for (int k = 0; k < i; k++) + mIns[k].mLive |= LIVE_CPU_REG_X; + } + } + } + xvalid = false; + } + if (yvalid && mIns[i].ChangesYReg()) + { + if (mIns[i].mType == ASMIT_LDY && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_CPU_REG_Z)) + { + int k = i; + while (k > 0 && !mIns[k - 1].ReferencesZeroPage(mIns[i].mAddress)) + k--; + if (k == 0) + { + k = 0; + while (k < mEntryBlocks.Size() && mEntryBlocks[k]->CanCrossBlockYShortcut(mIns[i].mAddress)) + k++; + if (k == mEntryBlocks.Size()) + { + for (int k = 0; k < mEntryBlocks.Size(); k++) + mEntryBlocks[k]->DoCrossBlockYShortcut(mIns[i].mAddress); + changed = true; + mIns[i].mType = ASMIT_NOP; mIns[i].mMode = ASMIM_IMPLIED; + mEntryRequiredRegs += CPU_REG_Y; + for (int k = 0; k < i; k++) + mIns[k].mLive |= LIVE_CPU_REG_Y; + } + } + } + yvalid = false; + } + + i++; + } + + CheckLive(); + } + + if (mTrueJump && mTrueJump->CrossBlockXYShortcut()) + changed = true; + if (mFalseJump && mFalseJump->CrossBlockXYShortcut()) + changed = true; + } + +return changed; +} + bool NativeCodeBasicBlock::FindPageStartAddress(int at, int reg, int& addr) { int j = at - 2; @@ -19589,8 +19997,6 @@ bool NativeCodeBasicBlock::GlobalValueForwarding(void) mIns.Insert(i + 1, NativeCodeInstruction(carryop)); } - CheckLive(); - if (this->mTrueJump && this->mTrueJump->GlobalValueForwarding()) changed = true; if (this->mFalseJump && this->mFalseJump->GlobalValueForwarding()) @@ -19960,6 +20366,14 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc lblock->mTrueJump = lblock; lblock->mFalseJump = eblock; + lblock->mEntryRequiredRegs = mEntryRequiredRegs; + lblock->mExitRequiredRegs = mExitRequiredRegs; + + eblock->mEntryRequiredRegs = mExitRequiredRegs; + eblock->mExitRequiredRegs = mExitRequiredRegs; + + mExitRequiredRegs = mEntryRequiredRegs; + for (int i = 0; i < mIns.Size(); i++) lblock->mIns.Push(mIns[i]); @@ -20679,6 +21093,30 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc } } + if (mEntryRequiredRegs.Size() && !mEntryRequiredRegs[CPU_REG_A]) + { + for (int i = 0; i + 1 < mIns.Size(); i++) + { + if (mIns[i + 0].mType == ASMIT_LDA && mIns[i + 0].mMode == ASMIM_ZERO_PAGE && + mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE) + { + int j = 0; + while (j < mIns.Size() && + (j == i + 0 || !mIns[j].ChangesZeroPage(mIns[i + 0].mAddress)) && + (j == i + 1 || !mIns[j].ChangesZeroPage(mIns[i + 1].mAddress))) + j++; + if (j == mIns.Size()) + { + if (!prevBlock) + return OptimizeSimpleLoopInvariant(proc); + prevBlock->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, mIns[i + 0].mAddress)); + prevBlock->mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, mIns[i + 1].mAddress)); + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + } + } + } + } + CheckLive(); return changed; @@ -23658,7 +24096,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass // // move ldx/y down - for (int i = 0; i + 2 < mIns.Size(); i++) + for (int i = 0; i + 1 < mIns.Size(); i++) { #if 1 if (mIns[i].mType == ASMIT_LDY) @@ -27014,6 +27452,22 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass progress = true; } #endif +#if 1 + else if ( + mIns[i + 0].mType == ASMIT_LDA && + mIns[i + 1].mType == ASMIT_ASL && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && + mIns[i + 2].mType == ASMIT_ROL && mIns[i + 2].mMode == ASMIM_IMPLIED && HasAsmInstructionMode(ASMIT_ROL, mIns[i + 0].mMode) && + mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].SameEffectiveAddress(mIns[i + 0]) && !(mIns[i + 3].mLive & LIVE_CPU_REG_A)) + { + + mIns[i + 2].CopyMode(mIns[i + 0]); + + mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; + mIns[i + 3].mType = ASMIT_NOP; mIns[i + 3].mMode = ASMIM_IMPLIED; + + progress = true; + } +#endif else if ( mIns[i + 0].ChangesAccuAndFlag() && mIns[i + 1].mType == ASMIT_STA && @@ -28929,6 +29383,15 @@ void NativeCodeProcedure::CompressTemporaries(void) int tpos = BC_REG_TMP + mInterProc->mFreeCallerSavedTemps; int spos = BC_REG_TMP_SAVED; + NumberSet * collisionSet = new NumberSet[NUM_REGS]; + + for (int i = 0; i < NUM_REGS; i++) + collisionSet[i].Reset(NUM_REGS); + + ResetVisited(); + mEntryBlock->BuildCollisionTable(collisionSet); + + // for (int tsize = 4; tsize > 0; tsize >>= 1) { for (int i = 0; i < mInterProc->mTempOffset.Size(); i++) @@ -28951,19 +29414,52 @@ void NativeCodeProcedure::CompressTemporaries(void) if (usize) { int pos = spos; - if (tpos + usize <= BC_REG_TMP + mInterProc->mCallerSavedTemps) + +#if 1 + if (mInterProc->mLeafProcedure) { - pos = tpos; - tpos += usize; + int k = 0; + while (k < usize && !collisionSet[k + BC_REG_ACCU][k + reg]) + k++; + if (k == usize) + { + pos = BC_REG_ACCU; + for (int i = 0; i < 256; i++) + { + for (int j = 0; j < usize; j++) + { + if (collisionSet[j + reg][i]) + { + collisionSet[j + BC_REG_ACCU] += i; + collisionSet[i] += j + BC_REG_ACCU; + } + } + } + } + } +#endif + + if (pos == spos) + { + if (tpos + usize <= BC_REG_TMP + mInterProc->mCallerSavedTemps) + { + pos = tpos; + tpos += usize; + } + else + spos += usize; + mInterProc->mTempOffset[i] = pos - BC_REG_TMP; + mInterProc->mTempSizes[i] = usize; } else - spos += usize; + { + mInterProc->mTempOffset[i] = 0; + mInterProc->mTempSizes[i] = 0; + } for (int j = 0; j < usize; j++) remap[reg + j] = pos + j; - mInterProc->mTempOffset[i] = pos - BC_REG_TMP; - mInterProc->mTempSizes[i] = usize; } else { @@ -28975,6 +29471,8 @@ void NativeCodeProcedure::CompressTemporaries(void) } } + delete[] collisionSet; + mInterProc->mCallerSavedTemps = tpos - BC_REG_TMP; ResetVisited(); @@ -29777,6 +30275,13 @@ void NativeCodeProcedure::Optimize(void) if (!changed && mEntryBlock->ShortcutZeroPageCopyUp(this)) changed = true; #endif + +#if 1 + ResetVisited(); + if (!changed && mEntryBlock->CrossBlockXYShortcut()) + changed = true; +#endif + } #endif diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 3d7dffc..efc41de 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -147,6 +147,9 @@ public: bool CanSwapXYReg(void); bool SwapXYReg(void); + + void BuildCollisionTable(NumberSet& liveTemps, NumberSet* collisionSets); + }; class NativeCodeBasicBlock @@ -270,6 +273,8 @@ public: bool BuildGlobalRequiredRegSet(NumberSet& fromRequiredTemps); bool RemoveUnusedResultInstructions(void); + void BuildCollisionTable(NumberSet* collisionSets); + bool IsSame(const NativeCodeBasicBlock* block) const; bool FindSameBlocks(NativeCodeProcedure* nproc); bool MergeSameBlocks(NativeCodeProcedure* nproc); @@ -365,11 +370,23 @@ public: bool SameTail(const NativeCodeInstruction& ins) const; bool HasTailSTA(int& addr, int& index) const; bool HasTailSTX(int& addr, int& index) const; + bool HasTailSTY(int& addr, int& index) const; bool PropagateSinglePath(void); bool CanChangeTailZPStoreToX(int addr, const NativeCodeBasicBlock * nblock, const NativeCodeBasicBlock* fblock = nullptr) const; void ChangeTailZPStoreToX(int addr); + bool CanCrossBlockAShortcut(int addr); + void DoCrossBlockAShortcut(int addr); + + bool CanCrossBlockXShortcut(int addr); + void DoCrossBlockXShortcut(int addr); + + bool CanCrossBlockYShortcut(int addr); + void DoCrossBlockYShortcut(int addr); + + bool CrossBlockXYShortcut(void); + bool Check16BitSum(int at, NativeRegisterSum16Info& info); bool Propagate16BitSum(void);