diff --git a/include/fixmath.c b/include/fixmath.c index 87fbfc3..ed7db28 100644 --- a/include/fixmath.c +++ b/include/fixmath.c @@ -628,6 +628,178 @@ unsigned long lmul16f16(unsigned long x, unsigned long y) return ll; } +#if 1 +long lmul16f16s(long x, long y) +{ + __asm + { + lda #0 + // fractional + sta __tmp + 0 + sta __tmp + 1 + + // result + sta __accu + 0 + sta __accu + 1 + sta __accu + 2 + sta __accu + 3 + + lda x + 0 + ora x + 1 + ora y + 0 + ora y + 1 + bne w0b + + l2: + + lsr x + 2 + bcc ws1 + + clc + lda y + 2 + adc __accu + 2 + sta __accu + 2 + lda y + 3 + adc __accu + 3 + sta __accu + 3 + ws1: + + lsr x + 3 + bcc ws2 + + clc + lda y + 2 + adc __accu + 3 + sta __accu + 3 + ws2: + + asl y + 2 + rol y + 3 + + lda x + 2 + ora x + 3 + bne l2 + rts + + w0b: + + lda y + 3 + and #$80 + beq w0 + lda #$ff + w0: + // overflow + sta __tmp + 2 + sta __tmp + 3 + + lda x + 3 + bpl w0a + + sec + lda #0 + sbc y + 0 + sta __accu + 2 + lda #0 + sbc y + 1 + sta __accu + 3 + w0a: + + ldx #8 + + l1: + lsr x + 0 + bcc w1 + + clc + lda y + 0 + adc __tmp + 0 + sta __tmp + 0 + lda y + 1 + adc __tmp + 1 + sta __tmp + 1 + lda y + 2 + adc __accu + 0 + sta __accu + 0 + lda y + 3 + adc __accu + 1 + sta __accu + 1 + lda __tmp + 2 + adc __accu + 2 + sta __accu + 2 + lda __tmp + 3 + adc __accu + 3 + sta __accu + 3 + w1: + + lsr x + 1 + bcc w2 + + clc + lda y + 0 + adc __tmp + 1 + sta __tmp + 1 + lda y + 1 + adc __accu + 0 + sta __accu + 0 + lda y + 2 + adc __accu + 1 + sta __accu + 1 + lda y + 3 + adc __accu + 2 + sta __accu + 2 + lda __tmp + 2 + adc __accu + 3 + sta __accu + 3 + w2: + + lsr x + 2 + bcc w3 + + clc + lda y + 0 + adc __accu + 0 + sta __accu + 0 + lda y + 1 + adc __accu + 1 + sta __accu + 1 + lda y + 2 + adc __accu + 2 + sta __accu + 2 + lda y + 3 + adc __accu + 3 + sta __accu + 3 + w3: + + lsr x + 3 + bcc w4 + + clc + lda y + 0 + adc __accu + 1 + sta __accu + 1 + lda y + 1 + adc __accu + 2 + sta __accu + 2 + lda y + 2 + adc __accu + 3 + sta __accu + 3 + w4: + + asl y + 0 + rol y + 1 + rol y + 2 + rol y + 3 + rol __tmp + 2 + rol __tmp + 3 + + dex + beq w5 + jmp l1 + w5: + } +} + +#else __native long lmul16f16s(long x, long y) { unsigned lox = x; @@ -656,6 +828,7 @@ __native long lmul16f16s(long x, long y) return r; } +#endif __native unsigned long ldiv16f16(unsigned long x, unsigned long y) { diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index a609bc4..8637221 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -4847,6 +4847,14 @@ InterCodeBasicBlock::~InterCodeBasicBlock(void) { } +InterCodeBasicBlock* InterCodeBasicBlock::Clone(void) +{ + InterCodeBasicBlock* nblock = new InterCodeBasicBlock(mProc); + for (int i = 0; i < mInstructions.Size(); i++) + nblock->mInstructions.Push(mInstructions[i]->Clone()); + return nblock; +} + void InterCodeBasicBlock::Append(InterInstruction * code) { @@ -14007,6 +14015,143 @@ void InterCodeBasicBlock::PropagateMemoryAliasingInfo(const GrowingInstructionPt } } +static bool IsTempModifiedInBlocks(const ExpandingArray& body, int temp) +{ + for (int j = 0; j < body.Size(); j++) + if (body[j]->IsTempModified(temp)) + return true; + return false; +} + +static bool IsInsSrcModifiedInBlocks(const ExpandingArray& body, const InterInstruction * ins) +{ + for (int i = 0; i < ins->mNumOperands; i++) + { + if (ins->mSrc[i].mTemp >= 0 && IsTempModifiedInBlocks(body, ins->mSrc[i].mTemp)) + return true; + } + return false; +} + +bool InterCodeBasicBlock::MoveConditionOutOfLoop(void) +{ + if (!mVisited) + { + mVisited = true; + + if (mLoopHead) + { + ExpandingArray body, path; + body.Push(this); + bool innerLoop = true; + + for (int i = 0; i < mEntryBlocks.Size(); i++) + { + if (mEntryBlocks[i] != mLoopPrefix) + { + if (!mEntryBlocks[i]->CollectLoopBody(this, body)) + innerLoop = false; + } + } + + if (innerLoop) + { + // Find all conditions based on invariants + for (int i = 0; i < body.Size(); i++) + { + InterCodeBasicBlock* block = body[i]; + int nins = block->mInstructions.Size(); + if (block->mFalseJump && block->mInstructions[nins-1]->mCode == IC_BRANCH && body.Contains(block->mFalseJump) && body.Contains(block->mTrueJump)) + { + int ncins = 0; + if (!IsInsSrcModifiedInBlocks(body, block->mInstructions[nins - 1])) + ncins = 1; + else if (nins > 1 && block->mInstructions[nins - 2]->mCode == IC_RELATIONAL_OPERATOR && + block->mInstructions[nins - 1]->mSrc[0].mTemp == block->mInstructions[nins - 2]->mDst.mTemp && block->mInstructions[nins - 1]->mSrc[0].mFinal && + !IsInsSrcModifiedInBlocks(body, block->mInstructions[nins - 2])) + ncins = 2; + + if (ncins > 0) + { + // The condition is not modified on the path + // Now check the number of instructions in the conditional section + + int ninside = 0, noutside = 0; + for (int i = 0; i < body.Size(); i++) + { + bool tdom = block->mTrueJump->IsDirectDominatorBlock(body[i]); + bool fdom = block->mFalseJump->IsDirectDominatorBlock(body[i]); + if (tdom != fdom) + ninside += body[i]->mInstructions.Size(); + else + noutside += body[i]->mInstructions.Size(); + } + + // Less than four instructions outside of condition, or twice as many + // inside as outside is the trigger + if (noutside - ncins < 4 || ninside > 2 * (noutside - ncins)) + { + // Now clone the loop into a true and a false branch + + GrowingArray copies(nullptr); + for (int i = 0; i < body.Size(); i++) + { + InterCodeBasicBlock* nblock = body[i]->Clone(); + copies[body[i]->mIndex] = nblock; + } + + for (int i = 0; i < body.Size(); i++) + { + InterCodeBasicBlock* rblock = body[i]; + InterCodeBasicBlock* nblock = copies[rblock->mIndex]; + if (rblock->mTrueJump) + { + InterCodeBasicBlock* tblock = copies[rblock->mTrueJump->mIndex]; + if (tblock) + nblock->mTrueJump = tblock; + else + nblock->mTrueJump = rblock->mTrueJump; + } + if (rblock->mFalseJump) + { + InterCodeBasicBlock* tblock = copies[rblock->mFalseJump->mIndex]; + if (tblock) + nblock->mFalseJump = tblock; + else + nblock->mFalseJump = rblock->mFalseJump; + } + } + mLoopPrefix->mInstructions.Pop(); + for (int i = 0; i < ncins; i++) + mLoopPrefix->mInstructions.Push(block->mInstructions[nins - ncins + i]->Clone()); + + block->mInstructions[nins - 1]->mSrc[0].mTemp = -1; + block->mInstructions[nins - 1]->mSrc[0].mIntConst = 1; + + mLoopPrefix->mFalseJump = copies[mLoopPrefix->mTrueJump->mIndex]; + + InterCodeBasicBlock* nblock = copies[block->mIndex]; + nblock->mInstructions[nins - 1]->mSrc[0].mTemp = -1; + nblock->mInstructions[nins - 1]->mSrc[0].mIntConst = 0; + + return true; + } + } + } + } + } + } + + if (mTrueJump && mTrueJump->MoveConditionOutOfLoop()) + return true; + if (mFalseJump && mFalseJump->MoveConditionOutOfLoop()) + return true; + } + + return false; +} + + void InterCodeBasicBlock::PushMoveOutOfLoop(void) { @@ -17977,6 +18122,38 @@ void InterCodeProcedure::CheckUsedDefinedTemps(void) #endif } +void InterCodeProcedure::MoveConditionsOutOfLoop(void) +{ + BuildTraces(false); + BuildLoopPrefix(); + ResetEntryBlocks(); + ResetVisited(); + mEntryBlock->CollectEntryBlocks(nullptr); + + Disassemble("PreMoveConditionOutOfLoop"); + + ResetVisited(); + while (mEntryBlock->MoveConditionOutOfLoop()) + { + Disassemble("MoveConditionOutOfLoop"); + + BuildDataFlowSets(); + TempForwarding(); + RemoveUnusedInstructions(); + + BuildTraces(false); + BuildLoopPrefix(); + ResetEntryBlocks(); + ResetVisited(); + mEntryBlock->CollectEntryBlocks(nullptr); + + Disassemble("PostMoveConditionOutOfLoop"); + + ResetVisited(); + } +} + + void InterCodeProcedure::PropagateMemoryAliasingInfo(void) { GrowingInstructionPtrArray tvalue(nullptr); @@ -18607,7 +18784,7 @@ void InterCodeProcedure::Close(void) { GrowingTypeArray tstack(IT_NONE); - CheckFunc = !strcmp(mIdent->mString, "parse_expression"); + CheckFunc = !strcmp(mIdent->mString, "main"); CheckCase = false; mEntryBlock = mBlocks[0]; @@ -19258,6 +19435,11 @@ void InterCodeProcedure::Close(void) RemoveUnusedLocalStoreInstructions(); + if (mCompilerOptions & COPT_OPTIMIZE_BASIC) + { + MoveConditionsOutOfLoop(); + } + #if 1 ResetVisited(); diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 0d56e25..b95c2c2 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -398,6 +398,8 @@ public: InterCodeBasicBlock(InterCodeProcedure * proc); ~InterCodeBasicBlock(void); + InterCodeBasicBlock* Clone(void); + void Append(InterInstruction * code); const InterInstruction* FindByDst(int dst) const; void Close(InterCodeBasicBlock* trueJump, InterCodeBasicBlock* falseJump); @@ -576,6 +578,7 @@ public: void CollectLoopPath(const ExpandingArray& body, ExpandingArray& path); void InnerLoopOptimization(const NumberSet& aliasedParams); void PushMoveOutOfLoop(void); + bool MoveConditionOutOfLoop(void); void PropagateMemoryAliasingInfo(const GrowingInstructionPtrArray& tvalue); void RemoveUnusedMallocs(void); @@ -716,6 +719,7 @@ protected: void CheckUsedDefinedTemps(void); void WarnUsedUndefinedVariables(void); void PropagateMemoryAliasingInfo(void); + void MoveConditionsOutOfLoop(void); void PeepholeOptimization(void); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 9a349d4..3c06645 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -164,6 +164,25 @@ void NativeRegisterDataSet::ResetAbsolute(LinkerObject* linkerObject, int addr) } } +void NativeRegisterDataSet::ResetAbsoluteXY(LinkerObject* linkerObject, int addr) +{ + for (int i = 0; i < NUM_REGS; i++) + { + if (mRegs[i].mMode == NRDM_ABSOLUTE) + { + if (mRegs[i].mLinkerObject == linkerObject && mRegs[i].mValue < addr + 256 && mRegs[i].mValue >= addr) + mRegs[i].Reset(); + } + else if (mRegs[i].mMode == NRDM_ABSOLUTE_X || mRegs[i].mMode == NRDM_ABSOLUTE_Y) + { + if (mRegs[i].mLinkerObject == linkerObject && mRegs[i].mValue < addr + 256 && mRegs[i].mValue + 256 > addr) + mRegs[i].Reset(); + } + else if (mRegs[i].mMode == NRDM_INDIRECT_Y) + mRegs[i].Reset(); + } +} + void NativeRegisterDataSet::ResetX(void) { for (int i = 0; i < NUM_REGS; i++) @@ -4047,7 +4066,7 @@ bool NativeCodeInstruction::ValueForwarding(NativeRegisterDataSet& data, AsmInsT } if (ChangesAddress()) - data.ResetIndirect(mAddress); + data.ResetAbsoluteXY(mLinkerObject, mAddress); } else if (mMode == ASMIM_ABSOLUTE_Y) { @@ -4111,7 +4130,7 @@ bool NativeCodeInstruction::ValueForwarding(NativeRegisterDataSet& data, AsmInsT } if (ChangesAddress()) - data.ResetIndirect(mAddress); + data.ResetAbsoluteXY(mLinkerObject, mAddress); } else if (mMode == ASMIM_ABSOLUTE) { @@ -15404,6 +15423,57 @@ bool NativeCodeBasicBlock::PropagateCommonSubExpression(void) return changed; } +bool NativeCodeBasicBlock::LocalSwapXY(void) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + int pre = -1; + + if (mEntryRequiredRegs[CPU_REG_X] || mEntryRequiredRegs[CPU_REG_Y]) + pre = -2; + + for (int i = 0; i < mIns.Size(); i++) + { + uint32 live = mIns[i].mLive & (LIVE_CPU_REG_X | LIVE_CPU_REG_Y); + if (live) + { + if (pre == -1) + pre = i; + } + else if (!live) + { + if (pre >= 0) + { + int k = pre; + while (k <= i && mIns[k].CanSwapXYReg()) + k++; + if (k > i) + { + for (int k = pre; k <= i; k++) + { + if (mIns[k].SwapXYReg()) + changed = true; + } + } + } + pre = -1; + } + } + + if (mTrueJump && mTrueJump->LocalSwapXY()) + changed = true; + if (mFalseJump && mFalseJump->LocalSwapXY()) + changed = true; + } + + return changed; +} + + bool NativeCodeBasicBlock::GlobalSwapXY(void) { bool changed = false; @@ -15498,6 +15568,66 @@ bool NativeCodeBasicBlock::GlobalSwapXY(void) return changed; } +bool NativeCodeBasicBlock::UntangleXYUsage(void) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + int xins = -1, yins = -1; + for (int i = 0; i + 1 < mIns.Size(); i++) + { + NativeCodeInstruction& ins(mIns[i]); + NativeCodeInstruction& nins(mIns[i + 1]); + + if (ins.mType == ASMIT_LDX) + { + if (yins >= 0 && mIns[yins].SameEffectiveAddress(ins) && nins.mMode == ASMIM_ABSOLUTE_X && HasAsmInstructionMode(nins.mType, ASMIM_ABSOLUTE_Y)) + { + mIns.Insert(i, NativeCodeInstruction(nins.mIns, nins.mType, ASMIM_ABSOLUTE_Y, nins.mAddress, nins.mLinkerObject, nins.mFlags)); + mIns[i + 1].mLive |= mIns[i].mLive; + mIns.Remove(i + 2); + for (int j = yins; j < i; j++) + mIns[j].mLive |= LIVE_CPU_REG_Y; + changed = true; + } + else + xins = i; + } + else if (ins.mType == ASMIT_LDY) + { + if (xins >= 0 && mIns[xins].SameEffectiveAddress(ins) && nins.mMode == ASMIM_ABSOLUTE_Y && HasAsmInstructionMode(nins.mType, ASMIM_ABSOLUTE_X)) + { + mIns.Insert(i, NativeCodeInstruction(nins.mIns, nins.mType, ASMIM_ABSOLUTE_X, nins.mAddress, nins.mLinkerObject, nins.mFlags)); + mIns[i + 1].mLive |= mIns[i].mLive; + mIns.Remove(i + 2); + for (int j = xins; j < i; j++) + mIns[j].mLive |= LIVE_CPU_REG_X; + changed = true; + } + else + yins = i; + } + else + { + if (xins >= 0 && (mIns[xins].MayBeChangedOnAddress(ins) || ins.ChangesXReg())) + xins = -1; + if (yins >= 0 && (mIns[yins].MayBeChangedOnAddress(ins) || ins.ChangesYReg())) + yins = -1; + } + } + + if (mTrueJump && mTrueJump->UntangleXYUsage()) + changed = true; + if (mFalseJump && mFalseJump->UntangleXYUsage()) + changed = true; + } + + return changed; +} + bool NativeCodeBasicBlock::OptimizeXYPairUsage(void) { bool changed = false; @@ -16514,6 +16644,7 @@ bool NativeCodeBasicBlock::EliminateDeadLoops(void) mTrueJump = eblock; mBranch = ASMIT_JMP; mEntryBlocks.RemoveAll(this); + mFalseJump = nullptr; mNumEntries--; changed = true; } @@ -16526,6 +16657,7 @@ bool NativeCodeBasicBlock::EliminateDeadLoops(void) mTrueJump = eblock; mBranch = ASMIT_JMP; mEntryBlocks.RemoveAll(this); + mFalseJump = nullptr; mNumEntries--; changed = true; } @@ -26649,6 +26781,68 @@ bool NativeCodeBasicBlock::MoveIndirectLoadStoreDown(int at) return false; } +bool NativeCodeBasicBlock::MoveLDXUp(int at) +{ + NativeCodeInstruction& lins(mIns[at]); + + int i = at; + while (i > 0) + { + i--; + NativeCodeInstruction& ins(mIns[i]); + if (ins.mType == ASMIT_STA && ins.SameEffectiveAddress(lins)) + { + if (ins.mLive & LIVE_CPU_REG_Z) + return false; + + mIns.Insert(i + 1, NativeCodeInstruction(lins.mIns, ASMIT_TAX)); + mIns.Remove(at + 1); + + while (i < at) + { + mIns[i].mLive |= LIVE_CPU_REG_X; + i++; + } + return true; + } + if (ins.ReferencesXReg() || lins.MayBeChangedOnAddress(ins)) + return false; + } + + return false; +} + +bool NativeCodeBasicBlock::MoveLDYUp(int at) +{ + NativeCodeInstruction& lins(mIns[at]); + + int i = at; + while (i > 0) + { + i--; + NativeCodeInstruction& ins(mIns[i]); + if (ins.mType == ASMIT_STA && ins.SameEffectiveAddress(lins)) + { + if (ins.mLive & LIVE_CPU_REG_Z) + return false; + + mIns.Insert(i + 1, NativeCodeInstruction(lins.mIns, ASMIT_TAY)); + mIns.Remove(at + 1); + + while (i < at) + { + mIns[i].mLive |= LIVE_CPU_REG_Y; + i++; + } + return true; + } + if (ins.ReferencesYReg() || lins.MayBeChangedOnAddress(ins)) + return false; + } + + return false; +} + bool NativeCodeBasicBlock::MoveIndirectLoadStoreDownY(int at) { int j = at + 3; @@ -35270,6 +35464,17 @@ void NativeCodeBasicBlock::BlockSizeReduction(NativeCodeProcedure* proc, int xen j += 2; i += 4; } + else if (i + 4 < mIns.Size() && + mIns[i + 0].mType == ASMIT_SEC && + mIns[i + 1].mType == ASMIT_LDA && + mIns[i + 2].mType == ASMIT_SBC && + mIns[i + 3].mType == ASMIT_SEC && + mIns[i + 4].mType == ASMIT_SBC && mIns[i + 4].mMode == ASMIM_IMMEDIATE && mIns[i + 4].mAddress == 0x01) + { + mIns[j + 0] = NativeCodeInstruction(mIns[i + 0].mIns, ASMIT_CLC); + j += 3; + i += 5; + } #endif else if (i + 1 < mIns.Size() && mIns[i + 0].ChangesZFlag() && mIns[i + 1].mType == ASMIT_LDA && mIns[i + 0].SameEffectiveAddress(mIns[i + 1]) && !(mIns[i + 1].mLive & LIVE_CPU_REG_A)) @@ -35929,6 +36134,27 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass #endif + // Move LDX/LDY up to STA of same address +#if 1 + if (pass >= 9) + { + for (int i = 2; i < mIns.Size(); i++) + { + if (mIns[i].mType == ASMIT_LDX && (mIns[i].mMode == ASMIM_ZERO_PAGE || mIns[i].mMode == ASMIM_ABSOLUTE) && !(mIns[i].mLive & LIVE_CPU_REG_Z)) + { + if (MoveLDXUp(i)) + changed = true; + } + else if (mIns[i].mType == ASMIT_LDY && (mIns[i].mMode == ASMIM_ZERO_PAGE || mIns[i].mMode == ASMIM_ABSOLUTE) && !(mIns[i].mLive & LIVE_CPU_REG_Z)) + { + if (MoveLDYUp(i)) + changed = true; + } + } + CheckLive(); + } +#endif + #if 1 // move ORA #imm up a shift chain to an LDA #imm @@ -37813,6 +38039,43 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 1].mMode = ASMIM_IMPLIED; progress = true; } + + else if (mIns[i + 0].ChangesAccuAndFlag() && mIns[i + 1].mType == ASMIT_TAX && !(mIns[i + 1].mLive & LIVE_CPU_REG_X)) + { + mIns[i + 0].mLive |= (mIns[i + 1].mLive & LIVE_CPU_REG_Z); + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + progress = true; + } + else if (mIns[i + 0].ChangesAccuAndFlag() && mIns[i + 1].mType == ASMIT_TAY && !(mIns[i + 1].mLive & LIVE_CPU_REG_Y)) + { + mIns[i + 0].mLive |= (mIns[i + 1].mLive & LIVE_CPU_REG_Z); + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + progress = true; + } + else if (mIns[i + 0].mType == ASMIT_LDX && mIns[i + 1].mType == ASMIT_CPX && !(mIns[i + 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_X))) + { + mIns[i + 0].mType = ASMIT_LDA; mIns[i + 0].mLive |= LIVE_CPU_REG_A; + mIns[i + 1].mType = ASMIT_CMP; + progress = true; + } + else if (mIns[i + 0].mType == ASMIT_LDY && mIns[i + 1].mType == ASMIT_CPY && !(mIns[i + 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Y))) + { + mIns[i + 0].mType = ASMIT_LDA; mIns[i + 0].mLive |= LIVE_CPU_REG_A; + mIns[i + 1].mType = ASMIT_CMP; + progress = true; + } + else if (mIns[i + 0].mType == ASMIT_LDX && mIns[i + 1].mType == ASMIT_STX && !(mIns[i + 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_X))) + { + mIns[i + 0].mType = ASMIT_LDA; mIns[i + 0].mLive |= LIVE_CPU_REG_A; + mIns[i + 1].mType = ASMIT_STA; + progress = true; + } + else if (mIns[i + 0].mType == ASMIT_LDY && mIns[i + 1].mType == ASMIT_STY && !(mIns[i + 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Y))) + { + mIns[i + 0].mType = ASMIT_LDA; mIns[i + 0].mLive |= LIVE_CPU_REG_A; + mIns[i + 1].mType = ASMIT_STA; + progress = true; + } #endif else if ( mIns[i + 0].mType == ASMIT_INC && mIns[i + 0].mMode == ASMIM_ZERO_PAGE && @@ -43264,7 +43527,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc) { mInterProc = proc; - CheckFunc = !strcmp(mInterProc->mIdent->mString, "board_draw_item"); + CheckFunc = !strcmp(mInterProc->mIdent->mString, "interpret_expression"); int nblocks = proc->mBlocks.Size(); tblocks = new NativeCodeBasicBlock * [nblocks]; @@ -44448,9 +44711,11 @@ void NativeCodeProcedure::Optimize(void) ResetVisited(); if (mEntryBlock->OptimizeXYPairUsage()) changed = true; + ResetVisited(); + if (mEntryBlock->UntangleXYUsage()) + changed = true; } #endif - #if _DEBUG ResetVisited(); mEntryBlock->CheckBlocks(); @@ -44653,6 +44918,18 @@ void NativeCodeProcedure::Optimize(void) } } } + + if (step == 11) + { + if (changed) + swappedXY = false; + else if (!swappedXY) + { + ResetVisited(); + changed = mEntryBlock->LocalSwapXY(); + swappedXY = true; + } + } #endif #if _DEBUG ResetVisited(); @@ -44673,7 +44950,7 @@ void NativeCodeProcedure::Optimize(void) } #if 1 - if (!changed && step < 11) + if (!changed && step < 12) { cnt = 0; step++; diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index c015883..1061ce0 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -47,6 +47,7 @@ struct NativeRegisterDataSet void ResetZeroPage(int addr); void ResetZeroPageRange(int addr, int num); void ResetAbsolute(LinkerObject * linkerObject, int addr); + void ResetAbsoluteXY(LinkerObject* linkerObject, int addr); int FindAbsolute(LinkerObject* linkerObject, int addr); void ResetIndirect(int reg); void ResetX(void); @@ -407,6 +408,10 @@ public: bool MoveIndirectLoadStoreDown(int at); bool MoveIndirectLoadStoreDownY(int at); + bool MoveLDXUp(int at); + bool MoveLDYUp(int at); + + bool MoveIndirectLoadStoreUp(int at); bool MoveAbsoluteLoadStoreUp(int at); bool MoveLoadStoreOutOfXYRangeUp(int at); @@ -624,6 +629,8 @@ public: bool OptimizeXYPairUsage(void); bool CanGlobalSwapXY(void); bool GlobalSwapXY(void); + bool LocalSwapXY(void); + bool UntangleXYUsage(void); bool IsSimpleSubExpression(int at, NativeSimpleSubExpression & ex); bool PropagateCommonSubExpression(void);