From fc7bb2c37750d80efd023ce2a8b32e14c42b63ca Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sat, 18 Jun 2022 17:32:14 +0200 Subject: [PATCH] Optimize parameter handling in loops --- include/c64/joystick.c | 2 +- oscar64/InterCode.cpp | 396 ++++++++++++++++++++++++++------ oscar64/InterCode.h | 6 +- oscar64/NativeCodeGenerator.cpp | 206 +++++++++++------ oscar64/NativeCodeGenerator.h | 2 + samples/memmap/charsetload.d64 | Bin 174848 -> 174848 bytes 6 files changed, 473 insertions(+), 139 deletions(-) diff --git a/include/c64/joystick.c b/include/c64/joystick.c index 4fce85f..4939a74 100644 --- a/include/c64/joystick.c +++ b/include/c64/joystick.c @@ -5,7 +5,7 @@ bool joyb[2]; void joy_poll(char n) { - char b = ((char *)0xdc00)[n]; + char b = ((volatile char *)0xdc00)[n]; if (!(b & 1)) joyy[n] = -1; diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 9f92dd6..432eb75 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -380,6 +380,8 @@ static bool CollidingMem(const InterOperand& op1, const InterOperand& op2, const { if (op2.mMemory == IM_GLOBAL) return staticVars[op2.mVarIndex]->mAliased; + else if (op2.mMemory == IM_FPARAM) + return false; else return true; } @@ -387,6 +389,8 @@ static bool CollidingMem(const InterOperand& op1, const InterOperand& op2, const { if (op1.mMemory == IM_GLOBAL) return staticVars[op1.mVarIndex]->mAliased; + else if (op1.mMemory == IM_FPARAM) + return false; else return true; } @@ -1204,6 +1208,11 @@ void TempForwardingTable::Intersect(const TempForwardingTable& table) } } +int TempForwardingTable::Size(void) const +{ + return mAssoc.Size(); +} + void TempForwardingTable::SetSize(int size) { int i; @@ -2767,6 +2776,15 @@ bool InterInstruction::RemoveUnusedResultInstructions(InterInstruction* pre, Num changed = true; } + else if (mCode == IC_LOAD_TEMPORARY && mDst.mTemp == mSrc[0].mTemp) + { + mCode = IC_NONE; + mDst.mTemp = -1; + for (int i = 0; i < mNumOperands; i++) + mSrc[i].mTemp = -1; + + changed = true; + } else if (mDst.mTemp != -1) { if (!requiredTemps[mDst.mTemp] && mDst.mTemp >= 0) @@ -2793,15 +2811,6 @@ bool InterInstruction::RemoveUnusedResultInstructions(InterInstruction* pre, Num else requiredTemps -= mDst.mTemp; } - else if (mCode == IC_LOAD_TEMPORARY && mDst.mTemp == mSrc[0].mTemp) - { - mCode = IC_NONE; - mDst.mTemp = -1; - for (int i = 0; i < mNumOperands; i++) - mSrc[i].mTemp = -1; - - changed = true; - } for (int i = 0; i < mNumOperands; i++) { @@ -3710,7 +3719,7 @@ static bool IsInfiniteLoop(InterCodeBasicBlock* head, InterCodeBasicBlock* block return false; } -void InterCodeBasicBlock::GenerateTraces(bool expand) +void InterCodeBasicBlock::GenerateTraces(bool expand, bool compact) { int i; @@ -3760,12 +3769,31 @@ void InterCodeBasicBlock::GenerateTraces(bool expand) mFalseJump->mNumEntries++; } } +#if 1 + else if (compact && mTrueJump && !mFalseJump && mTrueJump->mInstructions.Size() == 1 && mTrueJump->mInstructions[0]->mCode == IC_BRANCH && mTrueJump->mFalseJump) + { + InterCodeBasicBlock* tj = mTrueJump; + + int ns = mInstructions.Size(); + + tj->mNumEntries--; + tj->mTrueJump->mNumEntries++; + tj->mFalseJump->mNumEntries++; + + mInstructions[ns - 1]->mCode = IC_BRANCH; + mInstructions[ns - 1]->mOperator = tj->mInstructions[0]->mOperator; + mInstructions[ns - 1]->mSrc[0].Forward(tj->mInstructions[0]->mSrc[0]); + + mTrueJump = tj->mTrueJump; + mFalseJump = tj->mFalseJump; + } +#endif else break; } - if (mTrueJump) mTrueJump->GenerateTraces(expand); - if (mFalseJump) mFalseJump->GenerateTraces(expand); + if (mTrueJump) mTrueJump->GenerateTraces(expand, compact); + if (mFalseJump) mFalseJump->GenerateTraces(expand, compact); mInPath = false; } @@ -6353,7 +6381,18 @@ void InterCodeBasicBlock::PerformTempForwarding(TempForwardingTable& forwardingT if (mLoopHead) { - localForwardingTable.Reset(); + if (mNumEntries == 2 && (mTrueJump == this || mFalseJump == this) && mLocalModifiedTemps.Size()) + { + assert(localForwardingTable.Size() == mLocalModifiedTemps.Size()); + + for (int i = 0; i < mLocalModifiedTemps.Size(); i++) + { + if (mLocalModifiedTemps[i]) + localForwardingTable.Destroy(i); + } + } + else + localForwardingTable.Reset(); } #if 0 else if (mNumEntries > 1) @@ -7550,10 +7589,46 @@ bool InterCodeBasicBlock::LoadStoreForwarding(const GrowingInstructionPtrArray& int i = 0; while (i < mLoadStoreInstructions.Size()) { - if (tvalue.IndexOf(mLoadStoreInstructions[i]) == -1) - mLoadStoreInstructions.Remove(i); + InterInstruction* ins(mLoadStoreInstructions[i]); + InterInstruction* nins = nullptr; + + int j = tvalue.IndexOf(ins); + if (j != -1) + nins = ins; else - i++; + { + if (ins->mCode == IC_LOAD) + { + j = 0; + while (j < tvalue.Size() && !SameMem(ins->mSrc[0], tvalue[j])) + j++; + if (j < tvalue.Size()) + { + if (tvalue[j]->mCode == IC_LOAD && tvalue[j]->mDst.IsEqual(ins->mDst)) + nins = ins; + else if (tvalue[j]->mCode == IC_STORE && tvalue[j]->mSrc[0].IsEqual(ins->mDst)) + nins = ins; + } + } + else if (ins->mCode == IC_STORE) + { + j = 0; + while (j < tvalue.Size() && !SameMem(ins->mSrc[1], tvalue[j])) + j++; + if (j < tvalue.Size()) + { + if (tvalue[j]->mCode == IC_LOAD && tvalue[j]->mDst.IsEqual(ins->mSrc[0])) + nins = tvalue[j]; + else if (tvalue[j]->mCode == IC_STORE && tvalue[j]->mSrc[0].IsEqual(ins->mSrc[0])) + nins = ins; + } + } + } + + if (nins) + mLoadStoreInstructions[i++] = nins; + else + mLoadStoreInstructions.Remove(i); } } @@ -8008,7 +8083,9 @@ bool InterCodeBasicBlock::MergeCommonPathInstructions(void) while (ti < mTrueJump->mInstructions.Size() && !changed) { InterInstruction* tins = mTrueJump->mInstructions[ti]; - if (tins->mCode != IC_BRANCH && tins->mCode != IC_JUMP && tins->mCode != IC_RELATIONAL_OPERATOR) + InterInstruction* nins = (ti + 1 < mTrueJump->mInstructions.Size()) ? mTrueJump->mInstructions[ti + 1] : nullptr; + + if (tins->mCode != IC_BRANCH && tins->mCode != IC_JUMP && !(nins && nins->mCode == IC_BRANCH && tins->mDst.mTemp == nins->mSrc[0].mTemp)) { int fi = 0; while (fi < mFalseJump->mInstructions.Size() && !tins->IsEqualSource(mFalseJump->mInstructions[fi])) @@ -8789,6 +8866,50 @@ void InterCodeBasicBlock::FollowJumps(void) } } +void InterCodeBasicBlock::BuildLoopSuffix(InterCodeProcedure* proc) +{ + if (!mVisited) + { + mVisited = true; + + if (mLoopHead && mNumEntries == 2 && mFalseJump) + { + if (mTrueJump == this && mFalseJump != this) + { + if (mFalseJump->mNumEntries > 1) + { + InterCodeBasicBlock* suffix = new InterCodeBasicBlock(); + proc->Append(suffix); + InterInstruction* jins = new InterInstruction(); + jins->mCode = IC_JUMP; + suffix->Append(jins); + suffix->Close(mFalseJump, nullptr); + mFalseJump = suffix; + suffix->mNumEntries = 1; + } + } + else if (mFalseJump == this && mTrueJump != this) + { + if (mTrueJump->mNumEntries > 1) + { + InterCodeBasicBlock* suffix = new InterCodeBasicBlock(); + proc->Append(suffix); + InterInstruction* jins = new InterInstruction(); + jins->mCode = IC_JUMP; + suffix->Append(jins); + suffix->Close(mTrueJump, nullptr); + mTrueJump = suffix; + suffix->mNumEntries = 1; + } + } + } + + if (mTrueJump) + mTrueJump->BuildLoopSuffix(proc); + if (mFalseJump) + mFalseJump->BuildLoopSuffix(proc); + } +} InterCodeBasicBlock* InterCodeBasicBlock::BuildLoopPrefix(InterCodeProcedure* proc) { @@ -8809,40 +8930,6 @@ InterCodeBasicBlock* InterCodeBasicBlock::BuildLoopPrefix(InterCodeProcedure* pr jins->mCode = IC_JUMP; mLoopPrefix->Append(jins); mLoopPrefix->Close(this, nullptr); - - if (mNumEntries == 2 && mFalseJump) - { - if (mTrueJump == this && mFalseJump != this) - { - if (mFalseJump->mNumEntries > 1) - { - InterCodeBasicBlock* suffix = new InterCodeBasicBlock(); - proc->Append(suffix); - InterInstruction* jins = new InterInstruction(); - jins->mCode = IC_JUMP; - suffix->Append(jins); - suffix->Close(mFalseJump, nullptr); - mFalseJump->mNumEntries--; - mFalseJump = suffix; - suffix->mNumEntries = 1; - } - } - else if (mFalseJump == this && mTrueJump != this) - { - if (mTrueJump->mNumEntries > 1) - { - InterCodeBasicBlock* suffix = new InterCodeBasicBlock(); - proc->Append(suffix); - InterInstruction* jins = new InterInstruction(); - jins->mCode = IC_JUMP; - suffix->Append(jins); - suffix->Close(mTrueJump, nullptr); - mTrueJump->mNumEntries--; - mTrueJump = suffix; - suffix->mNumEntries = 1; - } - } - } } } @@ -9276,6 +9363,8 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa #if 1 if (!hasCall) { + assert(this == mTrueJump && mFalseJump->mNumEntries == 1 || this == mFalseJump && mTrueJump->mNumEntries == 1); + // Check forwarding globals int i = 0; @@ -9284,7 +9373,7 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa InterInstruction* ins = mInstructions[i]; // A global load - if (ins->mCode == IC_LOAD && ins->mSrc[0].mTemp < 0 && ins->mSrc[0].mMemory == IM_GLOBAL) + if (ins->mCode == IC_LOAD && ins->mSrc[0].mTemp < 0 && (ins->mSrc[0].mMemory == IM_GLOBAL || ins->mSrc[0].mMemory == IM_FPARAM)) { // Find the last store that overlaps the load int j = mInstructions.Size() - 1; @@ -9312,6 +9401,8 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa k++; if (k == mInstructions.Size()) { + assert(!mEntryRequiredTemps[sins->mSrc[0].mTemp]); + // Move load before loop mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, ins); InterInstruction* nins = new InterInstruction(); @@ -9760,13 +9851,13 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa mInstructions.SetSize(j); - NumberSet requiredTemps(mTrueJump == this ? mFalseJump->mExitRequiredTemps : mTrueJump->mExitRequiredTemps); + NumberSet requiredTemps(mTrueJump == this ? mFalseJump->mEntryRequiredTemps : mTrueJump->mEntryRequiredTemps); for (int i = 0; i < mInstructions.Size(); i++) { InterInstruction* ins = mInstructions[i]; for(int j=0; jmNumOperands; j++) - if (ins->mSrc[j].mTemp >= 0) + if (ins->mSrc[j].mTemp >= 0 && ins->mDst.mTemp != ins->mSrc[j].mTemp) requiredTemps += ins->mSrc[j].mTemp; } @@ -9787,6 +9878,16 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa di++; } } + + int i = 0; + while (i < mInstructions.Size()) + { + InterInstruction* ins = mInstructions[i]; + if (!HasSideEffect(ins->mCode) && !ins->mVolatile && ins->mDst.mTemp >= 0 && !requiredTemps[ins->mDst.mTemp]) + mInstructions.Remove(i); + else + i++; + } } if (mTrueJump) @@ -10103,7 +10204,7 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati } #endif - bool changed; + bool changed = false; do { int j = 0; @@ -10272,6 +10373,28 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati changed = true; } #endif +#if 1 + else if ( + mInstructions[i + 0]->mCode == IC_LOAD_TEMPORARY && + mInstructions[i + 1]->mCode == IC_LEA && mInstructions[i + 1]->mSrc[0].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[0].mFinal + ) + { + mInstructions[i + 1]->mSrc[0].Forward(mInstructions[i + 0]->mSrc[0]); + mInstructions[i + 0]->mCode = IC_NONE; mInstructions[i + 0]->mNumOperands = 0; + changed = true; + } +#endif +#if 1 + else if ( + mInstructions[i + 0]->mCode == IC_LOAD_TEMPORARY && + mInstructions[i + 1]->mCode == IC_LOAD && mInstructions[i + 1]->mSrc[0].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[0].mFinal + ) + { + mInstructions[i + 1]->mSrc[0].Forward(mInstructions[i + 0]->mSrc[0]); + mInstructions[i + 0]->mCode = IC_NONE; mInstructions[i + 0]->mNumOperands = 0; + changed = true; + } +#endif #if 1 else if ( mInstructions[i + 1]->mCode == IC_LOAD_TEMPORARY && mExitRequiredTemps[mInstructions[i + 1]->mDst.mTemp] && @@ -10409,6 +10532,7 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati changed = true; } #endif + #if 1 // Postincrement artifact if (mInstructions[i + 0]->mCode == IC_LOAD_TEMPORARY && mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && @@ -10435,6 +10559,30 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati #endif } + +#if 1 + if (i + 1 < mInstructions.Size()) + { + if ( + mInstructions[i + 0]->mCode == IC_LOAD_TEMPORARY && + mInstructions[i + 1]->mCode == IC_RELATIONAL_OPERATOR && mInstructions[i + 1]->mSrc[0].mTemp == mInstructions[i + 0]->mSrc[0].mTemp && mInstructions[i + 1]->mSrc[0].mFinal + ) + { + mInstructions[i + 1]->mSrc[0].mTemp = mInstructions[i + 0]->mDst.mTemp; + mInstructions[i + 1]->mSrc[0].mFinal = false; + changed = true; + } + else if ( + mInstructions[i + 0]->mCode == IC_LOAD_TEMPORARY && + mInstructions[i + 1]->mCode == IC_RELATIONAL_OPERATOR && mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mSrc[0].mTemp && mInstructions[i + 1]->mSrc[1].mFinal + ) + { + mInstructions[i + 1]->mSrc[1].mTemp = mInstructions[i + 0]->mDst.mTemp; + mInstructions[i + 1]->mSrc[1].mFinal = false; + changed = true; + } + } +#endif } } while (changed); @@ -10772,7 +10920,7 @@ void InterCodeProcedure::DisassembleDebug(const char* name) Disassemble(name); } -void InterCodeProcedure::BuildTraces(bool expand, bool dominators) +void InterCodeProcedure::BuildTraces(bool expand, bool dominators, bool compact) { // Count number of entries // @@ -10788,7 +10936,7 @@ void InterCodeProcedure::BuildTraces(bool expand, bool dominators) // Build traces // ResetVisited(); - mEntryBlock->GenerateTraces(expand); + mEntryBlock->GenerateTraces(expand, compact); ResetVisited(); for (int i = 0; i < mBlocks.Size(); i++) @@ -10827,6 +10975,9 @@ void InterCodeProcedure::BuildDataFlowSets(void) do { ResetVisited(); } while (mEntryBlock->BuildGlobalRequiredTempSet(totalRequired)); + + ResetVisited(); + mEntryBlock->CollectLocalUsedTemps(numTemps); } void InterCodeProcedure::RenameTemporaries(void) @@ -11075,6 +11226,8 @@ void InterCodeProcedure::Close(void) RenameTemporaries(); + BuildDataFlowSets(); + TempForwarding(); int numTemps = mTemporaries.Size(); @@ -11130,6 +11283,9 @@ void InterCodeProcedure::Close(void) } mTemporaries.SetSize(numTemps, true); + + BuildDataFlowSets(); + TempForwarding(); retries--; @@ -11225,6 +11381,8 @@ void InterCodeProcedure::Close(void) RenameTemporaries(); do { + BuildDataFlowSets(); + TempForwarding(); } while (GlobalConstantPropagation()); @@ -11240,9 +11398,6 @@ void InterCodeProcedure::Close(void) } - BuildLoopPrefix(); - DisassembleDebug("added dominators"); - ResetVisited(); mEntryBlock->CompactInstructions(); @@ -11266,6 +11421,9 @@ void InterCodeProcedure::Close(void) DisassembleDebug("Peephole optimized"); + BuildLoopPrefix(); + DisassembleDebug("added dominators"); + BuildDataFlowSets(); TempForwarding(); @@ -11307,9 +11465,6 @@ void InterCodeProcedure::Close(void) { BuildDataFlowSets(); - ResetVisited(); - mEntryBlock->CollectLocalUsedTemps(mTemporaries.Size()); - ResetVisited(); changed = mEntryBlock->PushSinglePathResultInstructions(); @@ -11343,6 +11498,8 @@ void InterCodeProcedure::Close(void) ResetVisited(); changed = mEntryBlock->MergeCommonPathInstructions(); + DisassembleDebug("Merged common path part"); + if (changed) { TempForwarding(); @@ -11536,9 +11693,6 @@ void InterCodeProcedure::Close(void) BuildDataFlowSets(); - ResetVisited(); - mEntryBlock->CollectLocalUsedTemps(mTemporaries.Size()); - ResetVisited(); mEntryBlock->ForwardDiamondMovedTemp(); DisassembleDebug("Diamond move forwarding"); @@ -11662,9 +11816,6 @@ void InterCodeProcedure::Close(void) #if 1 BuildDataFlowSets(); - ResetVisited(); - mEntryBlock->CollectLocalUsedTemps(mTemporaries.Size()); - ResetVisited(); mEntryBlock->ForwardDiamondMovedTemp(); DisassembleDebug("Diamond move forwarding 2"); @@ -11711,10 +11862,109 @@ void InterCodeProcedure::Close(void) TempForwarding(); RemoveUnusedInstructions(); - DisassembleDebug("Peephole optimized"); + DisassembleDebug("Global Constant Prop 1"); #endif +#if 1 + for (int i = 0; i < 4; i++) + { + ResetVisited(); + mEntryBlock->PeepholeOptimization(mModule->mGlobalVars); + + DisassembleDebug("Peephole Temp Check"); + + ReduceTemporaries(); + + MergeBasicBlocks(); + + BuildDataFlowSets(); + + TempForwarding(); + + BuildLoopPrefix(); + + BuildDataFlowSets(); + + DisassembleDebug("Checking Unused"); + + RemoveUnusedInstructions(); + + DisassembleDebug("Checked Unused"); + + BuildDataFlowSets(); + + RenameTemporaries(); + + BuildDataFlowSets(); + + TempForwarding(); +#if 1 + + BuildDataFlowSets(); + do { + TempForwarding(); + } while (GlobalConstantPropagation()); + + do { + GrowingInstructionPtrArray gipa(nullptr); + ResetVisited(); + changed = mEntryBlock->LoadStoreForwarding(gipa, mModule->mGlobalVars); + + DisassembleDebug("Load/Store forwardingX"); + + RemoveUnusedStoreInstructions(paramMemory); + + TempForwarding(); + RemoveUnusedInstructions(); + + DisassembleDebug("Load/Store forwarding"); + } while (changed); + + do + { + ResetVisited(); + mEntryBlock->CompactInstructions(); + + BuildDataFlowSets(); + + ResetVisited(); + changed = mEntryBlock->MergeCommonPathInstructions(); + + DisassembleDebug("Merged common path part"); + + if (changed) + { + TempForwarding(); + RemoveUnusedInstructions(); + + } + + } while (changed); + + DisassembleDebug("Merged common path instructions"); + +#if 1 + do + { + BuildDataFlowSets(); + + ResetVisited(); + changed = mEntryBlock->PushSinglePathResultInstructions(); + + DisassembleDebug("Pushed single path result"); + + } while (changed); +#endif + + TempForwarding(); + RemoveUnusedInstructions(); +#endif + + DisassembleDebug("Global Constant Prop 2"); + } +#endif + MapVariables(); DisassembleDebug("mapped variabled"); @@ -11726,7 +11976,7 @@ void InterCodeProcedure::Close(void) // Optimize for size MergeBasicBlocks(); - BuildTraces(false, false); + BuildTraces(false, false, true); DisassembleDebug("Final Merged basic blocks"); if (mSaveTempsLinkerObject && mTempSize > 16) @@ -12007,6 +12257,9 @@ void InterCodeProcedure::BuildLoopPrefix(void) for (int i = 0; i < mBlocks.Size(); i++) mBlocks[i]->mNumEntries = 0; mEntryBlock->CollectEntries(); + + ResetVisited(); + mEntryBlock->BuildLoopSuffix(this); } bool InterCodeProcedure::PropagateNonLocalUsedTemps(void) @@ -12123,6 +12376,7 @@ void InterCodeProcedure::ReduceTemporaries(void) callerSavedTemps = freeCallerSavedTemps; mTempOffset.SetSize(0); + mTempSizes.SetSize(0); for (int i = 0; i < mTemporaries.Size(); i++) { diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 7e92696..a681b23 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -212,6 +212,7 @@ public: void Intersect(const TempForwardingTable& table); + int Size(void) const; void SetSize(int size); void Reset(void); @@ -371,7 +372,7 @@ public: void CollectEntries(void); void CollectEntryBlocks(InterCodeBasicBlock* from); - void GenerateTraces(bool expand); + void GenerateTraces(bool expand, bool compact); void BuildDominatorTree(InterCodeBasicBlock * from); void LocalToTemp(int vindex, int temp); @@ -472,6 +473,7 @@ public: void InnerLoopOptimization(const NumberSet& aliasedParams); InterCodeBasicBlock* BuildLoopPrefix(InterCodeProcedure * proc); + void BuildLoopSuffix(InterCodeProcedure* proc); void SplitBranches(InterCodeProcedure* proc); void FollowJumps(void); @@ -543,7 +545,7 @@ public: void Disassemble(FILE* file); void Disassemble(const char* name, bool dumpSets = false); protected: - void BuildTraces(bool expand, bool dominators = true); + void BuildTraces(bool expand, bool dominators = true, bool compact = false); void BuildDataFlowSets(void); void RenameTemporaries(void); void TempForwarding(void); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index eb27d31..3f0843b 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -5759,7 +5759,7 @@ void NativeCodeBasicBlock::LoadValueToReg(InterCodeProcedure* proc, const InterI if (InterTypeSize[ins->mDst.mType] == 1) { mIns.Push(NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, index)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_INDIRECT_Y, areg)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_INDIRECT_Y, areg, nullptr, flags)); if (ainsl) { if (ainsl->mType == ASMIT_ADC) @@ -5779,7 +5779,7 @@ void NativeCodeBasicBlock::LoadValueToReg(InterCodeProcedure* proc, const InterI else if (InterTypeSize[ins->mDst.mType] == 2) { mIns.Push(NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, index)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_INDIRECT_Y, areg)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_INDIRECT_Y, areg, nullptr, flags)); if (ainsl) { if (ainsl->mType == ASMIT_ADC) @@ -5797,7 +5797,7 @@ void NativeCodeBasicBlock::LoadValueToReg(InterCodeProcedure* proc, const InterI mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, reg)); mIns.Push(NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, index + 1)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_INDIRECT_Y, areg)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_INDIRECT_Y, areg, nullptr, flags)); if (ainsh) mIns.Push(*ainsh); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, reg + 1)); if (reg == areg) @@ -5812,17 +5812,17 @@ void NativeCodeBasicBlock::LoadValueToReg(InterCodeProcedure* proc, const InterI else if (InterTypeSize[ins->mDst.mType] == 4) { mIns.Push(NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, index)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_INDIRECT_Y, areg)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_INDIRECT_Y, areg, nullptr, flags)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, reg)); mIns.Push(NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, index + 1)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_INDIRECT_Y, areg)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_INDIRECT_Y, areg, nullptr, flags)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, reg + 1)); mIns.Push(NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, index + 2)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_INDIRECT_Y, areg)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_INDIRECT_Y, areg, nullptr, flags)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, reg + 2)); mIns.Push(NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, index + 3)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_INDIRECT_Y, areg)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_INDIRECT_Y, areg, nullptr, flags)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, reg + 3)); } } @@ -13280,6 +13280,7 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool } #endif +#if 1 if (mEntryBlocks.Size() == 1) { NativeCodeBasicBlock* eblock = mEntryBlocks[0]; @@ -13288,11 +13289,13 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool if (mIns[0].mType == ASMIT_ORA && mIns[0].mMode == ASMIM_IMMEDIATE && mIns[0].mAddress == 0 && eblock->mIns.Last().ChangesAccuAndFlag()) { eblock->mExitRequiredRegs += CPU_REG_Z; + mEntryRequiredRegs += CPU_REG_Z; mIns.Remove(0); changed = true; } } } +#endif #if 1 if (loops && mIns.Size() >= 1 && mEntryBlocks.Size() == 2) { @@ -13837,7 +13840,7 @@ bool NativeCodeBasicBlock::CheckForwardSumYPointer(const NativeCodeBasicBlock* b { NativeCodeInstruction& ins(mIns[at]); - if (ins.mMode == ASMIM_ZERO_PAGE && (ins.mAddress == reg || ins.mAddress == reg + 1 || ins.mAddress == index)) + if (ins.mMode == ASMIM_ZERO_PAGE && (ins.mAddress == reg || ins.mAddress == reg + 1)) return false; else if (ins.mMode == ASMIM_INDIRECT_Y && ins.mAddress == reg) { @@ -14786,6 +14789,23 @@ bool NativeCodeBasicBlock::MergeXYSameValue(int from) return false; } +int NativeCodeBasicBlock::RetrieveYValue(int at) const +{ + while (at > 0 && !mIns[at].ChangesYReg()) + at--; + if (mIns[at].mType == ASMIT_LDY && mIns[at].mMode == ASMIM_IMMEDIATE) + return mIns[at].mAddress; + else + return -1; +} + +void NativeCodeBasicBlock::InsertLoadYImmediate(int at, int val) +{ + while (at < mIns.Size() && !mIns[at].ReferencesYReg()) + at++; + mIns.Insert(at, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, val)); +} + bool NativeCodeBasicBlock::PatchGlobalAdressSumYByX(int at, int reg, const NativeCodeInstruction& ains, int addr) { int yindex = 0; @@ -14867,8 +14887,9 @@ bool NativeCodeBasicBlock::PatchDirectAddressSumY(int at, int reg, int apos, int if (mIns[last].mLive & LIVE_CPU_REG_Y) { - mIns.Insert(last + 1, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, yindex)); - mIns[last + 1].mLive |= CPU_REG_Y; + InsertLoadYImmediate(last + 1, yindex); +// mIns.Insert(last + 1, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, yindex)); +// mIns[last + 1].mLive |= CPU_REG_Y; } mIns[apos].mType = ASMIT_TAY; @@ -14934,8 +14955,9 @@ bool NativeCodeBasicBlock::PatchAddressSumY(int at, int reg, int apos, int breg, if (mIns[last].mLive & LIVE_CPU_REG_Y) { - mIns.Insert(last + 1, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, yindex)); - mIns[last + 1].mLive |= CPU_REG_Y; + InsertLoadYImmediate(last + 1, yindex); +// mIns.Insert(last + 1, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, yindex)); +// mIns[last + 1].mLive |= CPU_REG_Y; } for (int i = 0; i < 5; i++) @@ -15323,9 +15345,9 @@ bool NativeCodeBasicBlock::MoveLoadIndirectTempStoreUp(int at) mIns[j - 0].mAddress == mIns[at + 1].mAddress + 1 && mIns[j - 1].mAddress == mIns[j - 3].mAddress + 1) { + mIns[at + 0].mLive |= mIns[j].mLive; mIns[at + 1].mLive |= mIns[j].mLive; mIns[at + 2].mLive |= mIns[j].mLive; - mIns[at + 3].mLive |= mIns[j].mLive; mIns[at + 1].mAddress = mIns[j - 3].mAddress; mIns[at + 1].mLive |= LIVE_MEM; @@ -15364,9 +15386,9 @@ bool NativeCodeBasicBlock::MoveLoadIndirectTempStoreUp(int at) return false; } + mIns[at + 0].mLive |= mIns[j].mLive; mIns[at + 1].mLive |= mIns[j].mLive; mIns[at + 2].mLive |= mIns[j].mLive; - mIns[at + 3].mLive |= mIns[j].mLive; mIns[at + 1].mAddress = addr; mIns[at + 1].mLive |= LIVE_MEM; @@ -20441,8 +20463,9 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } if (mIns[i + 0].mLive & LIVE_CPU_REG_Y) { - mIns.Insert(i + 1, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, 0)); - mIns[i + 1].mLive |= LIVE_CPU_REG_Y; + InsertLoadYImmediate(i + 1, 0); +// mIns.Insert(i + 1, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, 0)); +// mIns[i + 1].mLive |= LIVE_CPU_REG_Y; } mIns.Insert(i + 0, NativeCodeInstruction(ASMIT_LDY, ASMIM_ZERO_PAGE, ireg)); mIns[i + 0].mLive |= LIVE_CPU_REG_Y | LIVE_MEM; @@ -20464,13 +20487,17 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass { if (mIns[i + 0].mLive & LIVE_CPU_REG_Y) { - mIns.Insert(i + 1, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, 0)); - mIns[i + 1].mLive |= LIVE_CPU_REG_Y; if (mIns[i + 0].mLive & LIVE_CPU_REG_Z) { + mIns.Insert(i + 1, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, 0)); + mIns[i + 1].mLive |= LIVE_CPU_REG_Y; mIns.Insert(i + 2, NativeCodeInstruction(ASMIT_ORA, ASMIM_IMMEDIATE, 0)); mIns[i + 2].mLive |= LIVE_CPU_REG_Y | LIVE_CPU_REG_Z; } + else + { + InsertLoadYImmediate(i + 1, 0); + } } if (flags & LIVE_CPU_REG_Y) @@ -20557,8 +20584,9 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass #if 1 if (mIns[i + 0].mLive & LIVE_CPU_REG_Y) { - mIns.Insert(i + 1, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, 0)); - mIns[i + 1].mLive |= LIVE_CPU_REG_Y; + InsertLoadYImmediate(i + 1, 0); +// mIns.Insert(i + 1, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, 0)); +// mIns[i + 1].mLive |= LIVE_CPU_REG_Y; } mIns.Insert(i + 0, NativeCodeInstruction(ASMIT_LDY, ASMIM_ZERO_PAGE, ireg)); mIns[i + 0].mLive |= LIVE_CPU_REG_Y | LIVE_MEM; @@ -21012,13 +21040,17 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass { if (mIns[i + 1].mLive & LIVE_CPU_REG_Y) { - mIns.Insert(i + 2, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, mIns[i + 0].mAddress)); - mIns[i + 2].mLive |= LIVE_CPU_REG_Y; if (mIns[i + 1].mLive & LIVE_CPU_REG_Z) { + mIns.Insert(i + 2, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, mIns[i + 0].mAddress)); + mIns[i + 2].mLive |= LIVE_CPU_REG_Y; mIns.Insert(i + 3, NativeCodeInstruction(ASMIT_ORA, ASMIM_IMMEDIATE, 0)); mIns[i + 3].mLive |= LIVE_CPU_REG_Y | LIVE_CPU_REG_Z; } + else + { + InsertLoadYImmediate(i + 2, 0); + } } if (flags & LIVE_CPU_REG_Y) @@ -21180,8 +21212,9 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } if (mIns[i + 1].mLive & LIVE_CPU_REG_Y) { - mIns.Insert(i + 2, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, 0)); - mIns[i + 2].mLive |= LIVE_CPU_REG_Y; + InsertLoadYImmediate(i + 2, 0); +// mIns.Insert(i + 2, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, 0)); +// mIns[i + 2].mLive |= LIVE_CPU_REG_Y; } mIns.Insert(i + 0, NativeCodeInstruction(ASMIT_LDY, ASMIM_ZERO_PAGE, ireg)); mIns[i + 0].mLive |= LIVE_CPU_REG_Y | LIVE_MEM; @@ -21215,8 +21248,9 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } if (mIns[i + 1].mLive & LIVE_CPU_REG_Y) { - mIns.Insert(i + 2, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, yoffset)); - mIns[i + 2].mLive |= LIVE_CPU_REG_Y; + InsertLoadYImmediate(i + 2, 0); +// mIns.Insert(i + 2, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, yoffset)); +// mIns[i + 2].mLive |= LIVE_CPU_REG_Y; } mIns[i + 0].mMode = ASMIM_ZERO_PAGE; @@ -21302,7 +21336,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass else if ( mIns[i + 0].mType == ASMIT_LDA && mIns[i + 0].mMode == ASMIM_ZERO_PAGE && mIns[i + 2].mType == ASMIT_TAY && !(mIns[i + 2].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z)) && - !mIns[i + 1].ChangesAccu() && !mIns[i + 1].RequiresAccu() && !(mIns[i + 1].mLive & LIVE_CPU_REG_Y)) + !mIns[i + 1].ChangesAccu() && !mIns[i + 1].RequiresAccu() && !mIns[i + 1].RequiresYReg()) { mIns[i + 0].mType = ASMIT_LDY; mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; @@ -22135,8 +22169,9 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } if (mIns[i + 2].mLive & LIVE_CPU_REG_Y) { - mIns.Insert(i + 3, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, yoffset)); - mIns[i + 3].mLive |= LIVE_CPU_REG_Y; + InsertLoadYImmediate(i + 3, yoffset); +// mIns.Insert(i + 3, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, yoffset)); +// mIns[i + 3].mLive |= LIVE_CPU_REG_Y; } int ypos = i; @@ -22897,8 +22932,9 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } if (mIns[i + 3].mLive & LIVE_CPU_REG_Y) { - mIns.Insert(i + 4, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, 0)); - mIns[i + 4].mLive |= LIVE_CPU_REG_Y; + InsertLoadYImmediate(i + 4, 0); +// mIns.Insert(i + 4, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, 0)); +// mIns[i + 4].mLive |= LIVE_CPU_REG_Y; } mIns[i + 0].mMode = ASMIM_ZERO_PAGE; mIns[i + 0].mAddress = ireg; @@ -23066,8 +23102,9 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } if (mIns[i + 4].mLive & LIVE_CPU_REG_Y) { - mIns.Insert(i + 5, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, 0)); - mIns[i + 5].mLive |= LIVE_CPU_REG_Y; + InsertLoadYImmediate(i + 5, 0); +// mIns.Insert(i + 5, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, 0)); +// mIns[i + 5].mLive |= LIVE_CPU_REG_Y; } mIns[i + 0].mMode = ASMIM_ZERO_PAGE; mIns[i + 0].mAddress = ireg; @@ -23269,28 +23306,33 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE) { int n = 3; + if (mIns[i + 0].mFlags & NCIF_VOLATILE) + n = 1; if (mIns[i + 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z)) n--; - proc->ResetPatched(); - if (CheckSingleUseGlobalLoad(this, mIns[i + 1].mAddress, i + 2, mIns[i], n)) + if (n > 0) { proc->ResetPatched(); - if (PatchSingleUseGlobalLoad(this, mIns[i + 1].mAddress, i + 2, mIns[i])) + if (CheckSingleUseGlobalLoad(this, mIns[i + 1].mAddress, i + 2, mIns[i], n)) { - if (mIns[i + 0].mMode == ASMIM_ABSOLUTE_X) + proc->ResetPatched(); + if (PatchSingleUseGlobalLoad(this, mIns[i + 1].mAddress, i + 2, mIns[i])) { - mIns[i + 0].mLive |= LIVE_CPU_REG_X; - mIns[i + 1].mLive |= LIVE_CPU_REG_X; + if (mIns[i + 0].mMode == ASMIM_ABSOLUTE_X) + { + mIns[i + 0].mLive |= LIVE_CPU_REG_X; + mIns[i + 1].mLive |= LIVE_CPU_REG_X; + } + else if (mIns[i + 0].mMode == ASMIM_ABSOLUTE_Y) + { + mIns[i + 0].mLive |= LIVE_CPU_REG_Y; + mIns[i + 1].mLive |= LIVE_CPU_REG_Y; + } + progress = true; } - else if (mIns[i + 0].mMode == ASMIM_ABSOLUTE_Y) - { - mIns[i + 0].mLive |= LIVE_CPU_REG_Y; - mIns[i + 1].mLive |= LIVE_CPU_REG_Y; - } - progress = true; + CheckLive(); } - CheckLive(); } } else if ( @@ -23554,8 +23596,9 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 6].mType == ASMIT_STA && mIns[i + 6].mMode == ASMIM_ZERO_PAGE && mIns[i + 6].mAddress == mIns[i + 3].mAddress + 1 && !(mIns[i + 6].mLive & LIVE_CPU_REG_A)) { + int yval = RetrieveYValue(i); proc->ResetPatched(); - if (CheckForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 1].mAddress, mIns[i + 2].mAddress, i + 7, -1)) + if (CheckForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 1].mAddress, mIns[i + 2].mAddress, i + 7, yval)) { if (mIns[i + 3].mAddress == mIns[i + 1].mAddress) { @@ -23566,7 +23609,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } proc->ResetPatched(); - if (PatchForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 1].mAddress, mIns[i + 2].mAddress, i + 7, -1)) + if (PatchForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 1].mAddress, mIns[i + 2].mAddress, i + 7, yval)) progress = true; } } @@ -23582,19 +23625,20 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 6].mType == ASMIT_STA && mIns[i + 6].mMode == ASMIM_ZERO_PAGE && mIns[i + 6].mAddress == mIns[i + 3].mAddress + 1 && !(mIns[i + 6].mLive & LIVE_CPU_REG_A)) { + int yval = RetrieveYValue(i); proc->ResetPatched(); - if (CheckForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 2].mAddress, mIns[i + 0].mAddress, i + 7, -1)) + if (CheckForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 2].mAddress, mIns[i + 0].mAddress, i + 7, yval)) { if (mIns[i + 3].mAddress == mIns[i + 2].mAddress) { - for (int j = 0; j < 7; j++) + for (int j = 1; j < 7; j++) { mIns[i + j].mType = ASMIT_NOP; mIns[i + j].mMode = ASMIM_IMPLIED; } } proc->ResetPatched(); - if (PatchForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 2].mAddress, mIns[i + 0].mAddress, i + 7, -1)) + if (PatchForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 2].mAddress, mIns[i + 0].mAddress, i + 7, yval)) progress = true; } } @@ -23770,6 +23814,8 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } while (progress); + CheckLive(); + int sz = mIns.Size(); #if 1 if (sz >= 2 && @@ -23807,43 +23853,47 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass #if 1 else if (sz >= 2 && mIns[sz - 2].mType == ASMIT_LDA && mIns[sz - 2].mMode == ASMIM_IMMEDIATE && mIns[sz - 2].mAddress == 0 && - mIns[sz - 1].mType == ASMIT_ROL && mIns[sz - 1].mMode == ASMIM_IMPLIED && !(mIns[sz - 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C))) + mIns[sz - 1].mType == ASMIT_ROL && mIns[sz - 1].mMode == ASMIM_IMPLIED && !(mIns[sz - 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C)) && !mExitRequiredRegs[CPU_REG_Z]) { if (mBranch == ASMIT_BNE) { mBranch = ASMIT_BCS; mIns.SetSize(sz - 2); + sz -= 2; changed = true; } else if (mBranch == ASMIT_BEQ) { mBranch = ASMIT_BCC; mIns.SetSize(sz - 2); + sz -= 2; changed = true; } } else if (sz >= 3 && mIns[sz - 3].mType == ASMIT_LDA && mIns[sz - 3].mMode == ASMIM_IMMEDIATE && mIns[sz - 3].mAddress == 0 && mIns[sz - 2].mType == ASMIT_ROL && mIns[sz - 2].mMode == ASMIM_IMPLIED && - mIns[sz - 1].mType == ASMIT_CMP && mIns[sz - 1].mMode == ASMIM_IMMEDIATE && mIns[sz - 1].mAddress == 0x0 && !(mIns[sz - 1].mLive & LIVE_CPU_REG_A)) + mIns[sz - 1].mType == ASMIT_CMP && mIns[sz - 1].mMode == ASMIM_IMMEDIATE && mIns[sz - 1].mAddress == 0x0 && !(mIns[sz - 1].mLive & LIVE_CPU_REG_A) && !mExitRequiredRegs[CPU_REG_Z]) { if (mBranch == ASMIT_BNE) { mBranch = ASMIT_BCS; mIns.SetSize(sz - 3); + sz -= 3; changed = true; } else if (mBranch == ASMIT_BEQ) { mBranch = ASMIT_BCC; mIns.SetSize(sz - 3); + sz -= 3; changed = true; } } else if (sz >= 2 && mIns[sz - 2].mType == ASMIT_EOR && mIns[sz - 2].mMode == ASMIM_IMMEDIATE && mIns[sz - 2].mAddress == 0x80 && - mIns[sz - 1].mType == ASMIT_CMP && mIns[sz - 1].mMode == ASMIM_IMMEDIATE && mIns[sz - 1].mAddress == 0x80 && !(mIns[sz - 1].mLive & LIVE_CPU_REG_A)) + mIns[sz - 1].mType == ASMIT_CMP && mIns[sz - 1].mMode == ASMIM_IMMEDIATE && mIns[sz - 1].mAddress == 0x80 && !(mIns[sz - 1].mLive & LIVE_CPU_REG_A) && !mExitRequiredRegs[CPU_REG_Z]) { if (mBranch == ASMIT_BNE && mTrueJump->mIns.Size() == 0 && mTrueJump->mBranch == ASMIT_BCC) { @@ -23865,6 +23915,8 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[sz - 2].mType = ASMIT_NOP; mIns[sz - 2].mMode = ASMIM_IMPLIED; mIns[sz - 1].mType = ASMIT_ORA; mIns[sz - 1].mAddress = 0; mIns[sz - 1].mLive |= LIVE_CPU_REG_Z; + + CheckLive(); } } @@ -23872,7 +23924,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[sz - 4].mType == ASMIT_EOR && mIns[sz - 4].mMode == ASMIM_IMMEDIATE && mIns[sz - 4].mAddress == 0x80 && mIns[sz - 3].mType == ASMIT_STA && mIns[sz - 3].mMode == ASMIM_ZERO_PAGE && mIns[sz - 2].mType == ASMIT_LDA && mIns[sz - 2].mMode == ASMIM_IMMEDIATE && mIns[sz - 2].mAddress == 0x80 && - mIns[sz - 1].mType == ASMIT_CMP && mIns[sz - 1].mMode == ASMIM_ZERO_PAGE && mIns[sz - 1].mAddress == mIns[sz - 3].mAddress && !(mIns[sz - 1].mLive & (LIVE_CPU_REG_A | LIVE_MEM))) + mIns[sz - 1].mType == ASMIT_CMP && mIns[sz - 1].mMode == ASMIM_ZERO_PAGE && mIns[sz - 1].mAddress == mIns[sz - 3].mAddress && !(mIns[sz - 1].mLive & (LIVE_CPU_REG_A | LIVE_MEM)) && !mExitRequiredRegs[CPU_REG_Z]) { if (mBranch == ASMIT_BNE && mTrueJump->mIns.Size() == 0 && mTrueJump->mBranch == ASMIT_BCC) { @@ -23896,12 +23948,14 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[sz - 3].mType = ASMIT_NOP; mIns[sz - 3].mMode = ASMIM_IMPLIED; mIns[sz - 2].mType = ASMIT_NOP; mIns[sz - 2].mMode = ASMIM_IMPLIED; mIns[sz - 1].mType = ASMIT_ORA; mIns[sz - 1].mMode = ASMIM_IMMEDIATE; mIns[sz - 1].mAddress = 0; mIns[sz - 1].mLive |= LIVE_CPU_REG_Z; + + CheckLive(); } } else if (sz >= 2 && mIns[sz - 2].mType == ASMIT_LDA && mIns[sz - 2].mMode == ASMIM_IMMEDIATE && mIns[sz - 2].mAddress == 0 && - mIns[sz - 1].mType == ASMIT_SBC && mIns[sz - 1].mMode == ASMIM_IMMEDIATE && mIns[sz - 1].mAddress == 0 && !(mIns[sz - 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C))) + mIns[sz - 1].mType == ASMIT_SBC && mIns[sz - 1].mMode == ASMIM_IMMEDIATE && mIns[sz - 1].mAddress == 0 && !(mIns[sz - 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C)) && !mExitRequiredRegs[CPU_REG_Z]) { if (mBranch == ASMIT_BNE) { @@ -23915,6 +23969,9 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns.SetSize(sz - 2); changed = true; } + + sz -= 2; + CheckLive(); } if (sz >= 1 && @@ -23931,16 +23988,20 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass changed = true; } #endif + + CheckLive(); + #if 1 if (mTrueJump && mFalseJump && !mTrueJump->mFalseJump && !mFalseJump->mFalseJump && mTrueJump->mTrueJump == mFalseJump->mTrueJump && mTrueJump->mIns.Size() == 1 && mFalseJump->mIns.Size() == 1 && mTrueJump->mIns[0].mType == ASMIT_LDA && mTrueJump->mIns[0].mMode == ASMIM_IMMEDIATE && mFalseJump->mIns[0].mType == ASMIT_LDA && mFalseJump->mIns[0].mMode == ASMIM_IMMEDIATE) { - if (mBranch == ASMIT_BCS && mTrueJump->mIns[0].mAddress == 1 && mFalseJump->mIns[0].mAddress == 0) + if (mBranch == ASMIT_BCS && mTrueJump->mIns[0].mAddress == 1 && mFalseJump->mIns[0].mAddress == 0 && !(mExitRequiredRegs[CPU_REG_C])) { mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0)); mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); + mExitProvidedRegs += CPU_REG_A; mBranch = ASMIT_JMP; mTrueJump = mTrueJump->mTrueJump; mFalseJump = nullptr; @@ -23948,10 +24009,11 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass CheckLive(); } - else if (mBranch == ASMIT_BCC && mTrueJump->mIns[0].mAddress == 0 && mFalseJump->mIns[0].mAddress == 1) + else if (mBranch == ASMIT_BCC && mTrueJump->mIns[0].mAddress == 0 && mFalseJump->mIns[0].mAddress == 1 && !(mExitRequiredRegs[CPU_REG_C])) { mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0)); mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); + mExitProvidedRegs += CPU_REG_A; mBranch = ASMIT_JMP; mTrueJump = mTrueJump->mTrueJump; mFalseJump = nullptr; @@ -23959,11 +24021,12 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass CheckLive(); } - else if (mBranch == ASMIT_BCS && mTrueJump->mIns[0].mAddress == 0 && mFalseJump->mIns[0].mAddress == 1) + else if (mBranch == ASMIT_BCS && mTrueJump->mIns[0].mAddress == 0 && mFalseJump->mIns[0].mAddress == 1 && !(mExitRequiredRegs[CPU_REG_C])) { mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0)); mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); mIns.Push(NativeCodeInstruction(ASMIT_EOR, ASMIM_IMMEDIATE, 1)); + mExitProvidedRegs += CPU_REG_A; mBranch = ASMIT_JMP; mTrueJump = mTrueJump->mTrueJump; mFalseJump = nullptr; @@ -23971,11 +24034,12 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass CheckLive(); } - else if (mBranch == ASMIT_BCC && mTrueJump->mIns[0].mAddress == 1 && mFalseJump->mIns[0].mAddress == 0) + else if (mBranch == ASMIT_BCC && mTrueJump->mIns[0].mAddress == 1 && mFalseJump->mIns[0].mAddress == 0 && !(mExitRequiredRegs[CPU_REG_C])) { mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0)); mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); mIns.Push(NativeCodeInstruction(ASMIT_EOR, ASMIM_IMMEDIATE, 1)); + mExitProvidedRegs += CPU_REG_A; mBranch = ASMIT_JMP; mTrueJump = mTrueJump->mTrueJump; mFalseJump = nullptr; @@ -23990,6 +24054,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns.Insert(mIns.Size() - 1, NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0)); mIns[mIns.Size() - 1].mType = ASMIT_CMP; mIns[mIns.Size() - 1].mLive |= LIVE_CPU_REG_C; mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); + mExitProvidedRegs += CPU_REG_A; mBranch = ASMIT_JMP; mTrueJump = mTrueJump->mTrueJump; mFalseJump = nullptr; @@ -24002,6 +24067,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns.Insert(mIns.Size() - 1, NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0)); mIns[mIns.Size() - 1].mType = ASMIT_CMP; mIns[mIns.Size() - 1].mLive |= LIVE_CPU_REG_C; mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); + mExitProvidedRegs += CPU_REG_A; mBranch = ASMIT_JMP; mTrueJump = mTrueJump->mTrueJump; mFalseJump = nullptr; @@ -24012,6 +24078,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } } #endif + CheckLive(); #endif assert(mIndex == 1000 || mNumEntries == mEntryBlocks.Size()); @@ -24056,6 +24123,8 @@ void NativeCodeBasicBlock::CheckLive(void) for (int j = mIns.Size() - 1; j >= 0; j--) { + assert(mIns[j].mType != ASMIT_INV); + if (mIns[j].mType != ASMIT_NOP) { assert(!(live & ~mIns[j].mLive)); @@ -25037,6 +25106,7 @@ void NativeCodeProcedure::Optimize(void) changed = true; #endif + #if 1 if (step < 6) { @@ -25046,6 +25116,7 @@ void NativeCodeProcedure::Optimize(void) changed = true; } } + #if 1 if (step == 3) { @@ -25067,10 +25138,11 @@ void NativeCodeProcedure::Optimize(void) changed = true; } #endif +#if 1 ResetVisited(); if (mEntryBlock->MergeBasicBlocks()) changed = true; - +#endif ResetEntryBlocks(); ResetVisited(); mEntryBlock->CollectEntryBlocks(nullptr); @@ -25082,13 +25154,14 @@ void NativeCodeProcedure::Optimize(void) changed = true; } #endif + #if 1 if (step > 2 && !changed) { ResetVisited(); if (mEntryBlock->JoinTailCodeSequences(this, step > 3)) changed = true; - + ResetVisited(); if (mEntryBlock->PropagateSinglePath()) changed = true; @@ -25108,9 +25181,9 @@ void NativeCodeProcedure::Optimize(void) if (mEntryBlock->ReduceLocalYPressure()) changed = true; #endif - } #endif + #if 1 if (step == 4 || step == 5) { @@ -25262,12 +25335,12 @@ void NativeCodeProcedure::Optimize(void) #endif #endif - if (cnt > 200) { changed = false; mGenerator->mErrors->Error(mInterProc->mLocation, EWARN_OPTIMIZER_LOCKED, "Optimizer locked in infinite loop", mInterProc->mIdent); } + #if 1 if (!changed && step < 8) { @@ -25275,7 +25348,10 @@ void NativeCodeProcedure::Optimize(void) changed = true; } #endif + + cnt++; + } while (changed); #if 1 @@ -25650,7 +25726,7 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode } break; case IC_RELATIONAL_OPERATOR: - if (i + 1 < iblock->mInstructions.Size() && iblock->mInstructions[i + 1]->mCode == IC_BRANCH && iblock->mInstructions[i + 1]->mSrc[0].mFinal) + if (i + 1 < iblock->mInstructions.Size() && iblock->mInstructions[i + 1]->mCode == IC_BRANCH && iblock->mInstructions[i + 1]->mSrc[0].mTemp == ins->mDst.mTemp && iblock->mInstructions[i + 1]->mSrc[0].mFinal) { block->RelationalOperator(iproc, ins, this, CompileBlock(iproc, iblock->mTrueJump), CompileBlock(iproc, iblock->mFalseJump)); return; diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index cb9954f..25b9cce 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -303,6 +303,8 @@ public: bool JoinTAYARange(int from, int to); bool PatchGlobalAdressSumYByX(int at, int reg, const NativeCodeInstruction& ains, int addr); bool MergeXYSameValue(int from); + void InsertLoadYImmediate(int at, int val); + int RetrieveYValue(int at) const; bool ReverseReplaceTAX(int at); diff --git a/samples/memmap/charsetload.d64 b/samples/memmap/charsetload.d64 index ae07601a3ca0909d5ec04fc33e8048d5eda9efe4..17fafc41fe00c9b32f73b62b5cad94e1cfdf17d6 100644 GIT binary patch delta 544 zcmYj|KWGzS7{=fCyAZ?Oci$b+RM6_S{G zHB>W}n>AyWEG1ha7{gx+C@DJq*6*)ua(}ZWh=rA*7pz$`3NKkXnl&7wNVfV+faH}> z`B2sUni<`@fPZz_spursH#6 zdfj!-TpVB%Y?57Pw^@7ZG8g*}JnrNM`^+DoIBCIZ%7R-KLf9&Pm)>sg9H)D&Ktu<<-QuIe&%|^Z&tXX{rEx?rX&)~7!74bs zrOv8mUTmbXM|q%l+)0>E6^}2VG;gEQd&xiSC0aqp@_6fu#}~(*lr(c$9JO&kCR^hk z_t%m?Ws$Hky3gbO{Wj!noT-srqUZq#>vQs5&TK(4c&g_t-39_?j5@puPQPt_+I{Yd Tcm^M0Q*31L;Jqv^W=Z@n?<kis6if+*_69(plI zRueSPRme*VLW68*0!9`zp8Ua6gi-@Sj^d$Ky@Xis;zem1#JSAO_syIp(+*|Yq09>u zCVawL@*9^Y@MrMrQ0w`o*4pfrQ4|!)iGfc?tE}m%g&Hd*EY((*gsq4Ukh~Bm?X%Rv zYvx9-Lg5YnWZ>c8m@`sJSR?jmV+v8k6;96KFGmSBW-v^B_vLDy52o;dxbh?aYCMG- z;;}h;D`3)V6na>sIr@mM_8maIui8ESO+%7{2d6v#{s2+wxU}2-ABF^QxqrXj<2!xV z=yRAkILs#59J|79vYl>@IT+ddxTouEgYA(4CS^-rkUbQ|Vj54Jt`MIDaR%#g1>S8x zBYp#p7_go)V8K8L8%y8C_nqw+-A(!83Me}%ZG3E*-$>&r%=5K09#Zo1hqEnz6>P05 zWhH%)e@Wv2c|csvp4E%Q)ygOqO(cV7TKnDEuHQCXt^3*4DpODLvR>vf6Nmk+t(hbt z{Je