diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index c3990a6..47a5b67 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -2800,7 +2800,7 @@ void InterInstruction::Disassemble(FILE* file) { if (this->mCode != IC_NONE) { - static char memchars[] = "NPLGFPITAZ"; + static char memchars[] = "NPLGFPITAZZ"; fprintf(file, "\t"); switch (this->mCode) @@ -3784,6 +3784,92 @@ void InterCodeBasicBlock::MarkAliasedLocalTemps(const GrowingIntArray& localTabl } } +bool InterCodeBasicBlock::PropagateNonLocalUsedConstTemps(void) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + int i = 0; + while (i < mInstructions.Size()) + { + InterInstruction* ins(mInstructions[i]); + if (ins->mCode == IC_CONSTANT && ins->mSingleAssignment) + { + int ttemp = ins->mDst.mTemp; + InterCodeBasicBlock* target = this; + while (target && !target->mLocalUsedTemps[ttemp]) + { + InterCodeBasicBlock* ttarget = nullptr; + + if (!target->mFalseJump) + ttarget = target->mTrueJump; + else if (!target->mFalseJump->mFalseJump && target->mFalseJump->mTrueJump == target->mTrueJump && !target->mFalseJump->mLocalUsedTemps[ttemp]) + ttarget = target->mTrueJump; + else if (!target->mTrueJump->mFalseJump && target->mTrueJump->mTrueJump == target->mFalseJump && !target->mTrueJump->mLocalUsedTemps[ttemp]) + ttarget = target->mFalseJump; + + while (ttarget && ttarget->mLoopHead) + { + if (ttarget->mFalseJump == ttarget && !ttarget->mLocalUsedTemps[ttemp]) + ttarget = ttarget->mTrueJump; + else if (ttarget->mTrueJump == ttarget && !ttarget->mLocalUsedTemps[ttemp]) + ttarget = ttarget->mFalseJump; + else + ttarget = nullptr; + } + + target = ttarget; + } + + if (target && this != target) + { + target->mInstructions.Insert(0, ins); + mInstructions.Remove(i); + changed = true; + } + else + i++; + } + else + i++; + } + + if (mTrueJump && mTrueJump->PropagateNonLocalUsedConstTemps()) + changed = true; + if (mFalseJump && mFalseJump->PropagateNonLocalUsedConstTemps()) + changed = true; + } + + return changed; +} + +void InterCodeBasicBlock::CollectLocalUsedTemps(int numTemps) +{ + if (!mVisited) + { + mVisited = true; + + mLocalUsedTemps.Reset(numTemps); + + for (int i = 0; i < mInstructions.Size(); i++) + { + InterInstruction* ins(mInstructions[i]); + + for (int j = 0; j < ins->mNumOperands; j++) + { + if (ins->mSrc[j].mTemp >= 0) + mLocalUsedTemps += ins->mSrc[j].mTemp; + } + } + + if (mTrueJump) mTrueJump->CollectLocalUsedTemps(numTemps); + if (mFalseJump) mFalseJump->CollectLocalUsedTemps(numTemps); + } +} + void InterCodeBasicBlock::CollectConstTemps(GrowingInstructionPtrArray& ctemps, NumberSet& assignedTemps) { int i; @@ -8018,6 +8104,39 @@ void InterCodeBasicBlock::PeepholeOptimization(void) i--; } #endif +#if 1 + // move indirect load/store pairs up + i = 0; + while (i + 1 < mInstructions.Size()) + { + if (mInstructions[i + 0]->mCode == IC_LOAD && mInstructions[i + 1]->mCode == IC_STORE && mInstructions[i + 1]->mSrc[0].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[0].mFinal) + { + if (mInstructions[i + 0]->mSrc[0].mMemory == IM_INDIRECT) + { + InterInstruction* lins(mInstructions[i + 0]); + InterInstruction* sins(mInstructions[i + 1]); + + int j = i; + while (j > 0 && + CanBypassLoadUp(lins, mInstructions[j - 1]) && + CanBypassStore(sins, mInstructions[j - 1])) + { + mInstructions[j + 1] = mInstructions[j - 1]; + j--; + } + + if (i != j) + { + mInstructions[j + 0] = lins; + mInstructions[j + 1] = sins; + } + } + } + + i++; + } + +#endif #if 1 i = 0; while (i < mInstructions.Size()) @@ -9009,6 +9128,7 @@ void InterCodeProcedure::Close(void) ResetVisited(); mEntryBlock->CompactInstructions(); + BuildDataFlowSets(); ResetVisited(); @@ -9078,12 +9198,19 @@ void InterCodeProcedure::Close(void) #endif BuildDataFlowSets(); - TempForwarding(); RemoveUnusedInstructions(); DisassembleDebug("Moved single path instructions"); + PropagateNonLocalUsedTemps(); + + BuildDataFlowSets(); + TempForwarding(); + RemoveUnusedInstructions(); + + DisassembleDebug("propagate non local used temps"); + #if 1 do { @@ -9438,13 +9565,20 @@ void InterCodeProcedure::BuildLoopPrefix(void) mEntryBlock->CollectEntries(); } +bool InterCodeProcedure::PropagateNonLocalUsedTemps(void) +{ + ResetVisited(); + mEntryBlock->CollectLocalUsedTemps(mTemporaries.Size()); + + ResetVisited(); + return mEntryBlock->PropagateNonLocalUsedConstTemps(); +} + bool InterCodeProcedure::GlobalConstantPropagation(void) { - NumberSet assignedTemps(mTemporaries.Size()); GrowingInstructionPtrArray ctemps(nullptr); - ResetVisited(); mEntryBlock->CollectConstTemps(ctemps, assignedTemps); diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 276d94a..aa098e6 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -326,6 +326,7 @@ public: bool mVisited, mInPath, mLoopHead, mChecked, mConditionBlockTrue, mUnreachable, mLoopPath; + NumberSet mLocalUsedTemps; NumberSet mLocalRequiredTemps, mLocalProvidedTemps; NumberSet mEntryRequiredTemps, mEntryProvidedTemps; NumberSet mExitRequiredTemps, mExitProvidedTemps; @@ -368,6 +369,8 @@ public: void CollectLocalAddressTemps(GrowingIntArray& localTable, GrowingIntArray& paramTable); void MarkAliasedLocalTemps(const GrowingIntArray& localTable, NumberSet& aliasedLocals, const GrowingIntArray& paramTable, NumberSet& aliasedParams); + void CollectLocalUsedTemps(int numTemps); + bool PropagateNonLocalUsedConstTemps(void); void CollectConstTemps(GrowingInstructionPtrArray& ctemps, NumberSet& assignedTemps); bool PropagateConstTemps(const GrowingInstructionPtrArray& ctemps); @@ -518,6 +521,7 @@ protected: void TempForwarding(void); void RemoveUnusedInstructions(void); bool GlobalConstantPropagation(void); + bool PropagateNonLocalUsedTemps(void); void BuildLoopPrefix(void); void SingleAssignmentForwarding(void); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 29b971e..22123a8 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -724,6 +724,11 @@ bool NativeCodeInstruction::ReferencesXReg(void) const return ChangesXReg() || RequiresXReg(); } +bool NativeCodeInstruction::ReferencesZeroPage(int address) const +{ + return UsesZeroPage(address); +} + bool NativeCodeInstruction::ChangesZeroPage(int address) const { if (mMode == ASMIM_ZERO_PAGE && mAddress == address) @@ -8393,7 +8398,7 @@ void NativeCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const In } } -void NativeCodeBasicBlock::LoadEffectiveAddress(InterCodeProcedure* proc, const InterInstruction * ins, const InterInstruction* sins1, const InterInstruction* sins0) +void NativeCodeBasicBlock::LoadEffectiveAddress(InterCodeProcedure* proc, const InterInstruction * ins, const InterInstruction* sins1, const InterInstruction* sins0, bool addrvalid) { bool isub = false; int ireg = ins->mSrc[0].mTemp; @@ -8440,7 +8445,7 @@ void NativeCodeBasicBlock::LoadEffectiveAddress(InterCodeProcedure* proc, const // if the global variable is smaller than 256 bytes, we can safely ignore the upper byte? mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE_ADDRESS, ins->mSrc[1].mIntConst, ins->mSrc[1].mLinkerObject, NCIF_UPPER)); #if 1 - if (ins->mSrc[1].mLinkerObject->mSize < 256 || ins->mSrc[0].IsUByte()) + if (ins->mSrc[1].mLinkerObject->mSize < 256 || ins->mSrc[0].IsUByte() || (addrvalid && ins->mSrc[1].mLinkerObject->mSize <= 256)) mIns.Push(NativeCodeInstruction(iop, ASMIM_IMMEDIATE, 0)); else #endif @@ -12173,6 +12178,37 @@ bool NativeCodeBasicBlock::MoveLoadStoreXUp(int at) return false; } +bool NativeCodeBasicBlock::MoveLoadAddImmStoreAbsXUp(int at) +{ + int j = at - 1, top = at; + while (j > 0) + { + if (mIns[j].ChangesXReg()) + break; + if (mIns[j].MayBeSameAddress(mIns[at + 3])) + break; + if (mIns[j].MayBeSameAddress(mIns[at + 0]) && mIns[j].ChangesAddress()) + break; + + if (!(mIns[j - 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C | LIVE_CPU_REG_Z))) + top = j; + j--; + } + + if (top < at) + { + mIns.Insert(top, mIns[at + 3]); mIns.Remove(at + 4); + mIns.Insert(top, mIns[at + 3]); mIns.Remove(at + 4); + mIns.Insert(top, mIns[at + 3]); mIns.Remove(at + 4); + mIns.Insert(top, mIns[at + 3]); mIns.Remove(at + 4); + + return true; + } + else + return false; +} + + bool NativeCodeBasicBlock::MoveLoadAddImmStoreUp(int at) { int j = at - 1; @@ -12807,6 +12843,38 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc } } + if (sz >= 2 && + mIns[0].mType == ASMIT_LDA && mIns[0].mMode == ASMIM_ZERO_PAGE && !(mIns[0].mLive & LIVE_MEM) && + mIns[1].mType == ASMIT_STA && mIns[1].mMode == ASMIM_ZERO_PAGE && !(mIns[1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z))) + { + int i = mIns.Size() - 1; + while (i > 1 && !mIns[i].ReferencesZeroPage(mIns[1].mAddress) && !mIns[i].ReferencesZeroPage(mIns[0].mAddress)) + i--; + + if (i > 1) + { + i--; + + if (mIns[i + 0].mType == ASMIT_LDA && mIns[i + 0].mMode == ASMIM_ZERO_PAGE && mIns[i + 0].mAddress == mIns[1].mAddress && + mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && mIns[i + 1].mAddress == mIns[0].mAddress && !(mIns[1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z))) + { + if (!prevBlock) + return OptimizeSimpleLoopInvariant(proc); + + prevBlock->mIns.Push(mIns[0]); + prevBlock->mIns.Push(mIns[1]); + + exitBlock->mIns.Insert(0, mIns[i + 0]); + exitBlock->mIns.Insert(1, mIns[i + 1]); + + mIns.Remove(i); mIns.Remove(i); + mIns.Remove(0); mIns.Remove(0); + return true; + } + } + + } + int ai = 0; while (ai < mIns.Size() && !mIns[ai].ChangesAccu()) @@ -13513,11 +13581,12 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc) assert(mIns.Size() == 0 || mIns[0].mType != ASMIT_INV); } } -#if 1 - if (!changed) - changed = OptimizeSimpleLoopInvariant(proc, nullptr, nullptr); -#endif } + +#if 1 + if (!changed) + changed = OptimizeSimpleLoopInvariant(proc, nullptr, nullptr); +#endif } if (mTrueJump && mTrueJump->OptimizeSimpleLoop(proc)) @@ -14658,6 +14727,24 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass #endif +#if 1 + // move load - add # - store with absolute,x up as far possible + // + + for (int i = 2; i + 3 < mIns.Size(); i++) + { + if ( + mIns[i + 0].mType == ASMIT_LDA && mIns[i + 0].mMode == ASMIM_ABSOLUTE_X && + mIns[i + 1].mType == ASMIT_CLC && + mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && + mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ABSOLUTE_X && !(mIns[i + 3].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z | LIVE_CPU_REG_C))) + { + if (MoveLoadAddImmStoreAbsXUp(i)) + changed = true; + } + } +#endif + #if 1 // move load - add ZP - store up to initial store // @@ -17377,7 +17464,23 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 4].mType = ASMIT_NOP; mIns[i + 4].mMode = ASMIM_IMPLIED; progress = true; } +#if 0 + else if ( + mIns[i + 0].mType == ASMIT_LDY && mIns[i + 0].mMode == ASMIM_ZERO_PAGE && + mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_ABSOLUTE_Y && + mIns[i + 2].mType == ASMIT_LDY && mIns[i + 2].mMode == ASMIM_ZERO_PAGE && mIns[i + 2].mAddress != mIns[i + 0].mAddress && + mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ABSOLUTE_Y && + mIns[i + 4].mType == ASMIT_LDY && mIns[i + 4].mMode == ASMIM_ZERO_PAGE && mIns[i + 4].mAddress == mIns[i + 0].mAddress && !(mIns[i + 2].mLive & LIVE_CPU_REG_X)) + { + mIns[i + 1].mLive |= LIVE_CPU_REG_Y; + mIns[i + 2].mType = ASMIT_LDX; mIns[i + 2].mLive |= LIVE_CPU_REG_X; + mIns[i + 3].mMode = ASMIM_ABSOLUTE_X; mIns[i + 3].mLive |= LIVE_CPU_REG_Y; + mIns[i + 4].mType = ASMIT_NOP; mIns[i + 4].mMode = ASMIM_IMPLIED; + progress = true; + } +#endif } + #if 1 if (pass > 2 && i + 4 < mIns.Size()) { @@ -19190,7 +19293,7 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode iblock->mInstructions[i + 1]->mCode == IC_LEA && iblock->mInstructions[i + 1]->mSrc[1].mTemp == ins->mDst.mTemp && iblock->mInstructions[i + 1]->mSrc[1].mFinal) { - block->LoadEffectiveAddress(iproc, iblock->mInstructions[i + 1], ins, nullptr); + block->LoadEffectiveAddress(iproc, iblock->mInstructions[i + 1], ins, nullptr, false); i++; } else @@ -19219,7 +19322,7 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode case IC_UNARY_OPERATOR: if (i + 1 < iblock->mInstructions.Size() && ins->mOperator == IA_NEG && iblock->mInstructions[i + 1]->mCode == IC_LEA && iblock->mInstructions[i + 1]->mSrc[0].mTemp == ins->mDst.mTemp && iblock->mInstructions[i + 1]->mSrc[0].mFinal) { - block->LoadEffectiveAddress(iproc, iblock->mInstructions[i + 1], nullptr, ins); + block->LoadEffectiveAddress(iproc, iblock->mInstructions[i + 1], nullptr, ins, false); i++; } else @@ -19229,7 +19332,17 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode block->NumericConversion(iproc, this, ins); break; case IC_LEA: - block->LoadEffectiveAddress(iproc, ins, nullptr, nullptr); + { + bool avalid = false; + if (i + 1 < iblock->mInstructions.Size()) + { + if (iblock->mInstructions[i + 1]->mCode == IC_LOAD && iblock->mInstructions[i + 1]->mSrc[0].mTemp == ins->mDst.mTemp) + avalid = true; + else if (iblock->mInstructions[i + 1]->mCode == IC_STORE && iblock->mInstructions[i + 1]->mSrc[1].mTemp == ins->mDst.mTemp) + avalid = true; + } + block->LoadEffectiveAddress(iproc, ins, nullptr, nullptr, avalid); + } break; case IC_CONSTANT: block->LoadConstant(iproc, ins); diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index c4a2b1c..b1f602a 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -103,6 +103,8 @@ public: bool ChangesZeroPage(int address) const; bool UsesZeroPage(int address) const; + bool ReferencesZeroPage(int address) const; + bool ChangesGlobalMemory(void) const; bool SameEffectiveAddress(const NativeCodeInstruction& ins) const; bool MayBeChangedOnAddress(const NativeCodeInstruction& ins) const; @@ -189,7 +191,7 @@ public: NativeCodeBasicBlock* BinaryOperator(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction * ins, const InterInstruction* sins1, const InterInstruction* sins0); void UnaryOperator(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction * ins); void RelationalOperator(InterCodeProcedure* proc, const InterInstruction * ins, NativeCodeProcedure * nproc, NativeCodeBasicBlock* trueJump, NativeCodeBasicBlock * falseJump); - void LoadEffectiveAddress(InterCodeProcedure* proc, const InterInstruction * ins, const InterInstruction* sins1, const InterInstruction* sins0); + void LoadEffectiveAddress(InterCodeProcedure* proc, const InterInstruction * ins, const InterInstruction* sins1, const InterInstruction* sins0, bool addrvalid); void NumericConversion(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction * ins); NativeCodeBasicBlock * CopyValue(InterCodeProcedure* proc, const InterInstruction * ins, NativeCodeProcedure* nproc); NativeCodeBasicBlock * StrcpyValue(InterCodeProcedure* proc, const InterInstruction* ins, NativeCodeProcedure* nproc); @@ -225,10 +227,14 @@ public: bool MoveIndirectLoadStoreUp(int at); bool MoveAbsoluteLoadStoreUp(int at); bool MoveLoadStoreOutOfXYRangeUp(int at); + + bool MoveLoadAddImmStoreAbsXUp(int at); + bool MoveLoadAddImmStoreUp(int at); bool MoveCLCLoadAddZPStoreUp(int at); bool MoveLoadAddZPStoreUp(int at); bool MoveLoadShiftRotateUp(int at); + bool MoveCLCLoadAddZPStoreDown(int at); bool FindDirectAddressSumY(int at, int reg, int& apos, int& breg); bool PatchDirectAddressSumY(int at, int reg, int apos, int breg);